Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline.c
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "vk_util.h"
25
26
#include "v3dv_debug.h"
27
#include "v3dv_private.h"
28
29
#include "vk_format_info.h"
30
31
#include "common/v3d_debug.h"
32
33
#include "compiler/nir/nir_builder.h"
34
#include "nir/nir_serialize.h"
35
36
#include "util/u_atomic.h"
37
38
#include "vulkan/util/vk_format.h"
39
40
static VkResult
41
compute_vpm_config(struct v3dv_pipeline *pipeline);
42
43
void
44
v3dv_print_v3d_key(struct v3d_key *key,
45
uint32_t v3d_key_size)
46
{
47
struct mesa_sha1 ctx;
48
unsigned char sha1[20];
49
char sha1buf[41];
50
51
_mesa_sha1_init(&ctx);
52
53
_mesa_sha1_update(&ctx, key, v3d_key_size);
54
55
_mesa_sha1_final(&ctx, sha1);
56
_mesa_sha1_format(sha1buf, sha1);
57
58
fprintf(stderr, "key %p: %s\n", key, sha1buf);
59
}
60
61
static void
62
pipeline_compute_sha1_from_nir(nir_shader *nir,
63
unsigned char sha1[20])
64
{
65
assert(nir);
66
struct blob blob;
67
blob_init(&blob);
68
69
nir_serialize(&blob, nir, false);
70
if (!blob.out_of_memory)
71
_mesa_sha1_compute(blob.data, blob.size, sha1);
72
73
blob_finish(&blob);
74
}
75
76
void
77
v3dv_shader_module_internal_init(struct v3dv_device *device,
78
struct vk_shader_module *module,
79
nir_shader *nir)
80
{
81
vk_object_base_init(&device->vk, &module->base,
82
VK_OBJECT_TYPE_SHADER_MODULE);
83
module->nir = nir;
84
module->size = 0;
85
86
pipeline_compute_sha1_from_nir(nir, module->sha1);
87
}
88
89
void
90
v3dv_shader_variant_destroy(struct v3dv_device *device,
91
struct v3dv_shader_variant *variant)
92
{
93
/* The assembly BO is shared by all variants in the pipeline, so it can't
94
* be freed here and should be freed with the pipeline
95
*/
96
ralloc_free(variant->prog_data.base);
97
vk_free(&device->vk.alloc, variant);
98
}
99
100
static void
101
destroy_pipeline_stage(struct v3dv_device *device,
102
struct v3dv_pipeline_stage *p_stage,
103
const VkAllocationCallbacks *pAllocator)
104
{
105
if (!p_stage)
106
return;
107
108
ralloc_free(p_stage->nir);
109
vk_free2(&device->vk.alloc, pAllocator, p_stage);
110
}
111
112
static void
113
pipeline_free_stages(struct v3dv_device *device,
114
struct v3dv_pipeline *pipeline,
115
const VkAllocationCallbacks *pAllocator)
116
{
117
assert(pipeline);
118
119
/* FIXME: we can't just use a loop over mesa stage due the bin, would be
120
* good to find an alternative.
121
*/
122
destroy_pipeline_stage(device, pipeline->vs, pAllocator);
123
destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
124
destroy_pipeline_stage(device, pipeline->gs, pAllocator);
125
destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
126
destroy_pipeline_stage(device, pipeline->fs, pAllocator);
127
destroy_pipeline_stage(device, pipeline->cs, pAllocator);
128
129
pipeline->vs = NULL;
130
pipeline->vs_bin = NULL;
131
pipeline->gs = NULL;
132
pipeline->gs_bin = NULL;
133
pipeline->fs = NULL;
134
pipeline->cs = NULL;
135
}
136
137
static void
138
v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
139
struct v3dv_device *device,
140
const VkAllocationCallbacks *pAllocator)
141
{
142
if (!pipeline)
143
return;
144
145
pipeline_free_stages(device, pipeline, pAllocator);
146
147
if (pipeline->shared_data) {
148
v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
149
pipeline->shared_data = NULL;
150
}
151
152
if (pipeline->spill.bo) {
153
assert(pipeline->spill.size_per_thread > 0);
154
v3dv_bo_free(device, pipeline->spill.bo);
155
}
156
157
if (pipeline->default_attribute_values) {
158
v3dv_bo_free(device, pipeline->default_attribute_values);
159
pipeline->default_attribute_values = NULL;
160
}
161
162
vk_object_free(&device->vk, pAllocator, pipeline);
163
}
164
165
VKAPI_ATTR void VKAPI_CALL
166
v3dv_DestroyPipeline(VkDevice _device,
167
VkPipeline _pipeline,
168
const VkAllocationCallbacks *pAllocator)
169
{
170
V3DV_FROM_HANDLE(v3dv_device, device, _device);
171
V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
172
173
if (!pipeline)
174
return;
175
176
v3dv_destroy_pipeline(pipeline, device, pAllocator);
177
}
178
179
static const struct spirv_to_nir_options default_spirv_options = {
180
.caps = {
181
.device_group = true,
182
.variable_pointers = true,
183
.subgroup_basic = true,
184
},
185
.ubo_addr_format = nir_address_format_32bit_index_offset,
186
.ssbo_addr_format = nir_address_format_32bit_index_offset,
187
.phys_ssbo_addr_format = nir_address_format_64bit_global,
188
.push_const_addr_format = nir_address_format_logical,
189
.shared_addr_format = nir_address_format_32bit_offset,
190
.frag_coord_is_sysval = false,
191
};
192
193
const nir_shader_compiler_options v3dv_nir_options = {
194
.lower_add_sat = true,
195
.lower_all_io_to_temps = true,
196
.lower_extract_byte = true,
197
.lower_extract_word = true,
198
.lower_insert_byte = true,
199
.lower_insert_word = true,
200
.lower_bitfield_insert_to_shifts = true,
201
.lower_bitfield_extract_to_shifts = true,
202
.lower_bitfield_reverse = true,
203
.lower_bit_count = true,
204
.lower_cs_local_id_from_index = true,
205
.lower_ffract = true,
206
.lower_fmod = true,
207
.lower_pack_unorm_2x16 = true,
208
.lower_pack_snorm_2x16 = true,
209
.lower_unpack_unorm_2x16 = true,
210
.lower_unpack_snorm_2x16 = true,
211
.lower_pack_unorm_4x8 = true,
212
.lower_pack_snorm_4x8 = true,
213
.lower_unpack_unorm_4x8 = true,
214
.lower_unpack_snorm_4x8 = true,
215
.lower_pack_half_2x16 = true,
216
.lower_unpack_half_2x16 = true,
217
/* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
218
* get the tests to pass since it might produce slightly better code.
219
*/
220
.lower_uadd_carry = true,
221
.lower_usub_borrow = true,
222
/* FIXME: check if we can use multop + umul24 to implement mul2x32_64
223
* without lowering.
224
*/
225
.lower_mul_2x32_64 = true,
226
.lower_fdiv = true,
227
.lower_find_lsb = true,
228
.lower_ffma16 = true,
229
.lower_ffma32 = true,
230
.lower_ffma64 = true,
231
.lower_flrp32 = true,
232
.lower_fpow = true,
233
.lower_fsat = true,
234
.lower_fsqrt = true,
235
.lower_ifind_msb = true,
236
.lower_isign = true,
237
.lower_ldexp = true,
238
.lower_mul_high = true,
239
.lower_wpos_pntc = true,
240
.lower_rotate = true,
241
.lower_to_scalar = true,
242
.lower_device_index_to_zero = true,
243
.has_fsub = true,
244
.has_isub = true,
245
.vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
246
* needs to be supported */
247
.lower_interpolate_at = true,
248
.max_unroll_iterations = 16,
249
.divergence_analysis_options =
250
nir_divergence_multiple_workgroup_per_compute_subgroup
251
};
252
253
const nir_shader_compiler_options *
254
v3dv_pipeline_get_nir_options(void)
255
{
256
return &v3dv_nir_options;
257
}
258
259
#define OPT(pass, ...) ({ \
260
bool this_progress = false; \
261
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
262
if (this_progress) \
263
progress = true; \
264
this_progress; \
265
})
266
267
static void
268
nir_optimize(nir_shader *nir,
269
struct v3dv_pipeline_stage *stage,
270
bool allow_copies)
271
{
272
bool progress;
273
274
do {
275
progress = false;
276
OPT(nir_split_array_vars, nir_var_function_temp);
277
OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
278
OPT(nir_opt_deref);
279
OPT(nir_lower_vars_to_ssa);
280
if (allow_copies) {
281
/* Only run this pass in the first call to nir_optimize. Later calls
282
* assume that we've lowered away any copy_deref instructions and we
283
* don't want to introduce any more.
284
*/
285
OPT(nir_opt_find_array_copies);
286
}
287
OPT(nir_opt_copy_prop_vars);
288
OPT(nir_opt_dead_write_vars);
289
OPT(nir_opt_combine_stores, nir_var_all);
290
291
OPT(nir_lower_alu_to_scalar, NULL, NULL);
292
293
OPT(nir_copy_prop);
294
OPT(nir_lower_phis_to_scalar, false);
295
296
OPT(nir_copy_prop);
297
OPT(nir_opt_dce);
298
OPT(nir_opt_cse);
299
OPT(nir_opt_combine_stores, nir_var_all);
300
301
/* Passing 0 to the peephole select pass causes it to convert
302
* if-statements that contain only move instructions in the branches
303
* regardless of the count.
304
*
305
* Passing 1 to the peephole select pass causes it to convert
306
* if-statements that contain at most a single ALU instruction (total)
307
* in both branches.
308
*/
309
OPT(nir_opt_peephole_select, 0, false, false);
310
OPT(nir_opt_peephole_select, 8, false, true);
311
312
OPT(nir_opt_intrinsics);
313
OPT(nir_opt_idiv_const, 32);
314
OPT(nir_opt_algebraic);
315
OPT(nir_opt_constant_folding);
316
317
OPT(nir_opt_dead_cf);
318
319
OPT(nir_opt_if, false);
320
OPT(nir_opt_conditional_discard);
321
322
OPT(nir_opt_remove_phis);
323
OPT(nir_opt_undef);
324
OPT(nir_lower_pack);
325
} while (progress);
326
327
OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
328
}
329
330
static void
331
preprocess_nir(nir_shader *nir,
332
struct v3dv_pipeline_stage *stage)
333
{
334
/* Make sure we lower variable initializers on output variables so that
335
* nir_remove_dead_variables below sees the corresponding stores
336
*/
337
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
338
339
/* Now that we've deleted all but the main function, we can go ahead and
340
* lower the rest of the variable initializers.
341
*/
342
NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
343
344
/* Split member structs. We do this before lower_io_to_temporaries so that
345
* it doesn't lower system values to temporaries by accident.
346
*/
347
NIR_PASS_V(nir, nir_split_var_copies);
348
NIR_PASS_V(nir, nir_split_per_member_structs);
349
350
if (nir->info.stage == MESA_SHADER_FRAGMENT)
351
NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
352
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
353
NIR_PASS_V(nir, nir_lower_input_attachments,
354
&(nir_input_attachment_options) {
355
.use_fragcoord_sysval = false,
356
});
357
}
358
359
NIR_PASS_V(nir, nir_lower_explicit_io,
360
nir_var_mem_push_const,
361
nir_address_format_32bit_offset);
362
363
NIR_PASS_V(nir, nir_lower_explicit_io,
364
nir_var_mem_ubo | nir_var_mem_ssbo,
365
nir_address_format_32bit_index_offset);
366
367
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
368
nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
369
NULL);
370
371
NIR_PASS_V(nir, nir_propagate_invariant, false);
372
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
373
nir_shader_get_entrypoint(nir), true, false);
374
375
NIR_PASS_V(nir, nir_lower_system_values);
376
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
377
378
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
379
380
NIR_PASS_V(nir, nir_normalize_cubemap_coords);
381
382
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
383
384
NIR_PASS_V(nir, nir_split_var_copies);
385
NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
386
387
nir_optimize(nir, stage, true);
388
389
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
390
391
/* Lower a bunch of stuff */
392
NIR_PASS_V(nir, nir_lower_var_copies);
393
394
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
395
396
NIR_PASS_V(nir, nir_lower_indirect_derefs,
397
nir_var_function_temp, 2);
398
399
NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
400
nir_var_mem_ubo | nir_var_mem_ssbo,
401
nir_lower_direct_array_deref_of_vec_load);
402
403
NIR_PASS_V(nir, nir_lower_frexp);
404
405
/* Get rid of split copies */
406
nir_optimize(nir, stage, false);
407
}
408
409
/* FIXME: This is basically the same code at anv, tu and radv. Move to common
410
* place?
411
*/
412
static struct nir_spirv_specialization*
413
vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
414
uint32_t *out_num_spec_entries)
415
{
416
if (spec_info == NULL || spec_info->mapEntryCount == 0)
417
return NULL;
418
419
uint32_t num_spec_entries = spec_info->mapEntryCount;
420
struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
421
422
for (uint32_t i = 0; i < num_spec_entries; i++) {
423
VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
424
const void *data = spec_info->pData + entry.offset;
425
assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
426
427
spec_entries[i].id = spec_info->pMapEntries[i].constantID;
428
switch (entry.size) {
429
case 8:
430
spec_entries[i].value.u64 = *(const uint64_t *)data;
431
break;
432
case 4:
433
spec_entries[i].value.u32 = *(const uint32_t *)data;
434
break;
435
case 2:
436
spec_entries[i].value.u16 = *(const uint16_t *)data;
437
break;
438
case 1:
439
spec_entries[i].value.u8 = *(const uint8_t *)data;
440
break;
441
default:
442
assert(!"Invalid spec constant size");
443
break;
444
}
445
}
446
447
*out_num_spec_entries = num_spec_entries;
448
return spec_entries;
449
}
450
451
static nir_shader *
452
shader_module_compile_to_nir(struct v3dv_device *device,
453
struct v3dv_pipeline_stage *stage)
454
{
455
nir_shader *nir;
456
const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
457
458
if (!stage->module->nir) {
459
uint32_t *spirv = (uint32_t *) stage->module->data;
460
assert(stage->module->size % 4 == 0);
461
462
if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
463
v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
464
465
uint32_t num_spec_entries = 0;
466
struct nir_spirv_specialization *spec_entries =
467
vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
468
const struct spirv_to_nir_options spirv_options = default_spirv_options;
469
nir = spirv_to_nir(spirv, stage->module->size / 4,
470
spec_entries, num_spec_entries,
471
broadcom_shader_stage_to_gl(stage->stage),
472
stage->entrypoint,
473
&spirv_options, nir_options);
474
assert(nir);
475
nir_validate_shader(nir, "after spirv_to_nir");
476
free(spec_entries);
477
} else {
478
/* For NIR modules created by the driver we can't consume the NIR
479
* directly, we need to clone it first, since ownership of the NIR code
480
* (as with SPIR-V code for SPIR-V shaders), belongs to the creator
481
* of the module and modules can be destroyed immediately after been used
482
* to create pipelines.
483
*/
484
nir = nir_shader_clone(NULL, stage->module->nir);
485
nir_validate_shader(nir, "nir module");
486
}
487
assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
488
489
if (V3D_DEBUG & (V3D_DEBUG_NIR |
490
v3d_debug_flag_for_shader_stage(
491
broadcom_shader_stage_to_gl(stage->stage)))) {
492
fprintf(stderr, "Initial form: %s prog %d NIR:\n",
493
broadcom_shader_stage_name(stage->stage),
494
stage->program_id);
495
nir_print_shader(nir, stderr);
496
fprintf(stderr, "\n");
497
}
498
499
/* We have to lower away local variable initializers right before we
500
* inline functions. That way they get properly initialized at the top
501
* of the function and not at the top of its caller.
502
*/
503
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
504
NIR_PASS_V(nir, nir_lower_returns);
505
NIR_PASS_V(nir, nir_inline_functions);
506
NIR_PASS_V(nir, nir_opt_deref);
507
508
/* Pick off the single entrypoint that we want */
509
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
510
if (func->is_entrypoint)
511
func->name = ralloc_strdup(func, "main");
512
else
513
exec_node_remove(&func->node);
514
}
515
assert(exec_list_length(&nir->functions) == 1);
516
517
/* Vulkan uses the separate-shader linking model */
518
nir->info.separate_shader = true;
519
520
preprocess_nir(nir, stage);
521
522
return nir;
523
}
524
525
static int
526
type_size_vec4(const struct glsl_type *type, bool bindless)
527
{
528
return glsl_count_attribute_slots(type, false);
529
}
530
531
/* FIXME: the number of parameters for this method is somewhat big. Perhaps
532
* rethink.
533
*/
534
static unsigned
535
descriptor_map_add(struct v3dv_descriptor_map *map,
536
int set,
537
int binding,
538
int array_index,
539
int array_size,
540
uint8_t return_size)
541
{
542
assert(array_index < array_size);
543
assert(return_size == 16 || return_size == 32);
544
545
unsigned index = 0;
546
for (unsigned i = 0; i < map->num_desc; i++) {
547
if (set == map->set[i] &&
548
binding == map->binding[i] &&
549
array_index == map->array_index[i]) {
550
assert(array_size == map->array_size[i]);
551
if (return_size != map->return_size[index]) {
552
/* It the return_size is different it means that the same sampler
553
* was used for operations with different precision
554
* requirement. In this case we need to ensure that we use the
555
* larger one.
556
*/
557
map->return_size[index] = 32;
558
}
559
return index;
560
}
561
index++;
562
}
563
564
assert(index == map->num_desc);
565
566
map->set[map->num_desc] = set;
567
map->binding[map->num_desc] = binding;
568
map->array_index[map->num_desc] = array_index;
569
map->array_size[map->num_desc] = array_size;
570
map->return_size[map->num_desc] = return_size;
571
map->num_desc++;
572
573
return index;
574
}
575
576
577
static void
578
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
579
struct v3dv_pipeline *pipeline)
580
{
581
assert(instr->intrinsic == nir_intrinsic_load_push_constant);
582
instr->intrinsic = nir_intrinsic_load_uniform;
583
}
584
585
static struct v3dv_descriptor_map*
586
pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
587
VkDescriptorType desc_type,
588
gl_shader_stage gl_stage,
589
bool is_sampler)
590
{
591
enum broadcom_shader_stage broadcom_stage =
592
gl_shader_stage_to_broadcom(gl_stage);
593
594
assert(pipeline->shared_data &&
595
pipeline->shared_data->maps[broadcom_stage]);
596
597
switch(desc_type) {
598
case VK_DESCRIPTOR_TYPE_SAMPLER:
599
return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
600
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
601
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
602
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
603
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
604
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
605
return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
606
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
607
return is_sampler ?
608
&pipeline->shared_data->maps[broadcom_stage]->sampler_map :
609
&pipeline->shared_data->maps[broadcom_stage]->texture_map;
610
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
611
return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
612
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
613
return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
614
default:
615
unreachable("Descriptor type unknown or not having a descriptor map");
616
}
617
}
618
619
/* Gathers info from the intrinsic (set and binding) and then lowers it so it
620
* could be used by the v3d_compiler */
621
static void
622
lower_vulkan_resource_index(nir_builder *b,
623
nir_intrinsic_instr *instr,
624
nir_shader *shader,
625
struct v3dv_pipeline *pipeline,
626
const struct v3dv_pipeline_layout *layout)
627
{
628
assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
629
630
nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
631
632
unsigned set = nir_intrinsic_desc_set(instr);
633
unsigned binding = nir_intrinsic_binding(instr);
634
struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
635
struct v3dv_descriptor_set_binding_layout *binding_layout =
636
&set_layout->binding[binding];
637
unsigned index = 0;
638
const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
639
640
switch (desc_type) {
641
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
642
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
643
struct v3dv_descriptor_map *descriptor_map =
644
pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
645
646
if (!const_val)
647
unreachable("non-constant vulkan_resource_index array index");
648
649
index = descriptor_map_add(descriptor_map, set, binding,
650
const_val->u32,
651
binding_layout->array_size,
652
32 /* return_size: doesn't really apply for this case */);
653
654
if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
655
/* skip index 0 which is used for push constants */
656
index++;
657
}
658
break;
659
}
660
661
default:
662
unreachable("unsupported desc_type for vulkan_resource_index");
663
break;
664
}
665
666
/* Since we use the deref pass, both vulkan_resource_index and
667
* vulkan_load_descriptor return a vec2 providing an index and
668
* offset. Our backend compiler only cares about the index part.
669
*/
670
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
671
nir_imm_ivec2(b, index, 0));
672
nir_instr_remove(&instr->instr);
673
}
674
675
/* Returns return_size, so it could be used for the case of not having a
676
* sampler object
677
*/
678
static uint8_t
679
lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
680
nir_shader *shader,
681
struct v3dv_pipeline *pipeline,
682
const struct v3dv_pipeline_layout *layout)
683
{
684
nir_ssa_def *index = NULL;
685
unsigned base_index = 0;
686
unsigned array_elements = 1;
687
nir_tex_src *src = &instr->src[src_idx];
688
bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
689
690
/* We compute first the offsets */
691
nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
692
while (deref->deref_type != nir_deref_type_var) {
693
assert(deref->parent.is_ssa);
694
nir_deref_instr *parent =
695
nir_instr_as_deref(deref->parent.ssa->parent_instr);
696
697
assert(deref->deref_type == nir_deref_type_array);
698
699
if (nir_src_is_const(deref->arr.index) && index == NULL) {
700
/* We're still building a direct index */
701
base_index += nir_src_as_uint(deref->arr.index) * array_elements;
702
} else {
703
if (index == NULL) {
704
/* We used to be direct but not anymore */
705
index = nir_imm_int(b, base_index);
706
base_index = 0;
707
}
708
709
index = nir_iadd(b, index,
710
nir_imul(b, nir_imm_int(b, array_elements),
711
nir_ssa_for_src(b, deref->arr.index, 1)));
712
}
713
714
array_elements *= glsl_get_length(parent->type);
715
716
deref = parent;
717
}
718
719
if (index)
720
index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
721
722
/* We have the offsets, we apply them, rewriting the source or removing
723
* instr if needed
724
*/
725
if (index) {
726
nir_instr_rewrite_src(&instr->instr, &src->src,
727
nir_src_for_ssa(index));
728
729
src->src_type = is_sampler ?
730
nir_tex_src_sampler_offset :
731
nir_tex_src_texture_offset;
732
} else {
733
nir_tex_instr_remove_src(instr, src_idx);
734
}
735
736
uint32_t set = deref->var->data.descriptor_set;
737
uint32_t binding = deref->var->data.binding;
738
/* FIXME: this is a really simplified check for the precision to be used
739
* for the sampling. Right now we are ony checking for the variables used
740
* on the operation itself, but there are other cases that we could use to
741
* infer the precision requirement.
742
*/
743
bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
744
deref->var->data.precision == GLSL_PRECISION_LOW;
745
struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
746
struct v3dv_descriptor_set_binding_layout *binding_layout =
747
&set_layout->binding[binding];
748
749
/* For input attachments, the shader includes the attachment_idx. As we are
750
* treating them as a texture, we only want the base_index
751
*/
752
uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
753
deref->var->data.index + base_index :
754
base_index;
755
756
uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
757
758
struct v3dv_descriptor_map *map =
759
pipeline_get_descriptor_map(pipeline, binding_layout->type,
760
shader->info.stage, is_sampler);
761
int desc_index =
762
descriptor_map_add(map,
763
deref->var->data.descriptor_set,
764
deref->var->data.binding,
765
array_index,
766
binding_layout->array_size,
767
return_size);
768
769
if (is_sampler)
770
instr->sampler_index = desc_index;
771
else
772
instr->texture_index = desc_index;
773
774
return return_size;
775
}
776
777
static bool
778
lower_sampler(nir_builder *b, nir_tex_instr *instr,
779
nir_shader *shader,
780
struct v3dv_pipeline *pipeline,
781
const struct v3dv_pipeline_layout *layout)
782
{
783
uint8_t return_size = 0;
784
785
int texture_idx =
786
nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
787
788
if (texture_idx >= 0)
789
return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
790
pipeline, layout);
791
792
int sampler_idx =
793
nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
794
795
if (sampler_idx >= 0)
796
lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
797
798
if (texture_idx < 0 && sampler_idx < 0)
799
return false;
800
801
/* If we don't have a sampler, we assign it the idx we reserve for this
802
* case, and we ensure that it is using the correct return size.
803
*/
804
if (sampler_idx < 0) {
805
instr->sampler_index = return_size == 16 ?
806
V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
807
}
808
809
return true;
810
}
811
812
/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
813
static void
814
lower_image_deref(nir_builder *b,
815
nir_intrinsic_instr *instr,
816
nir_shader *shader,
817
struct v3dv_pipeline *pipeline,
818
const struct v3dv_pipeline_layout *layout)
819
{
820
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
821
nir_ssa_def *index = NULL;
822
unsigned array_elements = 1;
823
unsigned base_index = 0;
824
825
while (deref->deref_type != nir_deref_type_var) {
826
assert(deref->parent.is_ssa);
827
nir_deref_instr *parent =
828
nir_instr_as_deref(deref->parent.ssa->parent_instr);
829
830
assert(deref->deref_type == nir_deref_type_array);
831
832
if (nir_src_is_const(deref->arr.index) && index == NULL) {
833
/* We're still building a direct index */
834
base_index += nir_src_as_uint(deref->arr.index) * array_elements;
835
} else {
836
if (index == NULL) {
837
/* We used to be direct but not anymore */
838
index = nir_imm_int(b, base_index);
839
base_index = 0;
840
}
841
842
index = nir_iadd(b, index,
843
nir_imul(b, nir_imm_int(b, array_elements),
844
nir_ssa_for_src(b, deref->arr.index, 1)));
845
}
846
847
array_elements *= glsl_get_length(parent->type);
848
849
deref = parent;
850
}
851
852
if (index)
853
index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
854
855
uint32_t set = deref->var->data.descriptor_set;
856
uint32_t binding = deref->var->data.binding;
857
struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
858
struct v3dv_descriptor_set_binding_layout *binding_layout =
859
&set_layout->binding[binding];
860
861
uint32_t array_index = deref->var->data.index + base_index;
862
863
assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
864
binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
865
866
struct v3dv_descriptor_map *map =
867
pipeline_get_descriptor_map(pipeline, binding_layout->type,
868
shader->info.stage, false);
869
870
int desc_index =
871
descriptor_map_add(map,
872
deref->var->data.descriptor_set,
873
deref->var->data.binding,
874
array_index,
875
binding_layout->array_size,
876
32 /* return_size: doesn't apply for textures */);
877
878
/* Note: we don't need to do anything here in relation to the precision and
879
* the output size because for images we can infer that info from the image
880
* intrinsic, that includes the image format (see
881
* NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
882
*/
883
884
index = nir_imm_int(b, desc_index);
885
886
nir_rewrite_image_intrinsic(instr, index, false);
887
}
888
889
static bool
890
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
891
nir_shader *shader,
892
struct v3dv_pipeline *pipeline,
893
const struct v3dv_pipeline_layout *layout)
894
{
895
switch (instr->intrinsic) {
896
case nir_intrinsic_load_layer_id:
897
/* FIXME: if layered rendering gets supported, this would need a real
898
* lowering
899
*/
900
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
901
nir_imm_int(b, 0));
902
nir_instr_remove(&instr->instr);
903
return true;
904
905
case nir_intrinsic_load_push_constant:
906
lower_load_push_constant(b, instr, pipeline);
907
return true;
908
909
case nir_intrinsic_vulkan_resource_index:
910
lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
911
return true;
912
913
case nir_intrinsic_load_vulkan_descriptor: {
914
/* Loading the descriptor happens as part of load/store instructions,
915
* so for us this is a no-op.
916
*/
917
nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
918
nir_instr_remove(&instr->instr);
919
return true;
920
}
921
922
case nir_intrinsic_image_deref_load:
923
case nir_intrinsic_image_deref_store:
924
case nir_intrinsic_image_deref_atomic_add:
925
case nir_intrinsic_image_deref_atomic_imin:
926
case nir_intrinsic_image_deref_atomic_umin:
927
case nir_intrinsic_image_deref_atomic_imax:
928
case nir_intrinsic_image_deref_atomic_umax:
929
case nir_intrinsic_image_deref_atomic_and:
930
case nir_intrinsic_image_deref_atomic_or:
931
case nir_intrinsic_image_deref_atomic_xor:
932
case nir_intrinsic_image_deref_atomic_exchange:
933
case nir_intrinsic_image_deref_atomic_comp_swap:
934
case nir_intrinsic_image_deref_size:
935
case nir_intrinsic_image_deref_samples:
936
lower_image_deref(b, instr, shader, pipeline, layout);
937
return true;
938
939
default:
940
return false;
941
}
942
}
943
944
static bool
945
lower_impl(nir_function_impl *impl,
946
nir_shader *shader,
947
struct v3dv_pipeline *pipeline,
948
const struct v3dv_pipeline_layout *layout)
949
{
950
nir_builder b;
951
nir_builder_init(&b, impl);
952
bool progress = false;
953
954
nir_foreach_block(block, impl) {
955
nir_foreach_instr_safe(instr, block) {
956
b.cursor = nir_before_instr(instr);
957
switch (instr->type) {
958
case nir_instr_type_tex:
959
progress |=
960
lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
961
break;
962
case nir_instr_type_intrinsic:
963
progress |=
964
lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
965
pipeline, layout);
966
break;
967
default:
968
break;
969
}
970
}
971
}
972
973
return progress;
974
}
975
976
static bool
977
lower_pipeline_layout_info(nir_shader *shader,
978
struct v3dv_pipeline *pipeline,
979
const struct v3dv_pipeline_layout *layout)
980
{
981
bool progress = false;
982
983
nir_foreach_function(function, shader) {
984
if (function->impl)
985
progress |= lower_impl(function->impl, shader, pipeline, layout);
986
}
987
988
return progress;
989
}
990
991
992
static void
993
lower_fs_io(nir_shader *nir)
994
{
995
/* Our backend doesn't handle array fragment shader outputs */
996
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
997
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
998
999
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1000
MESA_SHADER_FRAGMENT);
1001
1002
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1003
MESA_SHADER_FRAGMENT);
1004
1005
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
1006
type_size_vec4, 0);
1007
}
1008
1009
static void
1010
lower_gs_io(struct nir_shader *nir)
1011
{
1012
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1013
1014
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1015
MESA_SHADER_GEOMETRY);
1016
1017
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1018
MESA_SHADER_GEOMETRY);
1019
}
1020
1021
static void
1022
lower_vs_io(struct nir_shader *nir)
1023
{
1024
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1025
1026
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1027
MESA_SHADER_VERTEX);
1028
1029
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1030
MESA_SHADER_VERTEX);
1031
1032
/* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1033
* overlaps with v3d_nir_lower_io. Need further research though.
1034
*/
1035
}
1036
1037
static void
1038
shader_debug_output(const char *message, void *data)
1039
{
1040
/* FIXME: We probably don't want to debug anything extra here, and in fact
1041
* the compiler is not using this callback too much, only as an alternative
1042
* way to debug out the shaderdb stats, that you can already get using
1043
* V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1044
* compiler to remove that callback.
1045
*/
1046
}
1047
1048
static void
1049
pipeline_populate_v3d_key(struct v3d_key *key,
1050
const struct v3dv_pipeline_stage *p_stage,
1051
uint32_t ucp_enables,
1052
bool robust_buffer_access)
1053
{
1054
assert(p_stage->pipeline->shared_data &&
1055
p_stage->pipeline->shared_data->maps[p_stage->stage]);
1056
1057
/* The following values are default values used at pipeline create. We use
1058
* there 32 bit as default return size.
1059
*/
1060
struct v3dv_descriptor_map *sampler_map =
1061
&p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1062
struct v3dv_descriptor_map *texture_map =
1063
&p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1064
1065
key->num_tex_used = texture_map->num_desc;
1066
assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1067
for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1068
key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1069
key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1070
key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1071
key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1072
}
1073
1074
key->num_samplers_used = sampler_map->num_desc;
1075
assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1076
for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1077
sampler_idx++) {
1078
key->sampler[sampler_idx].return_size =
1079
sampler_map->return_size[sampler_idx];
1080
1081
key->sampler[sampler_idx].return_channels =
1082
key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1083
}
1084
1085
switch (p_stage->stage) {
1086
case BROADCOM_SHADER_VERTEX:
1087
case BROADCOM_SHADER_VERTEX_BIN:
1088
key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1089
break;
1090
case BROADCOM_SHADER_GEOMETRY:
1091
case BROADCOM_SHADER_GEOMETRY_BIN:
1092
/* FIXME: while we don't implement tessellation shaders */
1093
key->is_last_geometry_stage = true;
1094
break;
1095
case BROADCOM_SHADER_FRAGMENT:
1096
case BROADCOM_SHADER_COMPUTE:
1097
key->is_last_geometry_stage = false;
1098
break;
1099
default:
1100
unreachable("unsupported shader stage");
1101
}
1102
1103
/* Vulkan doesn't have fixed function state for user clip planes. Instead,
1104
* shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1105
* takes care of adding a single compact array variable at
1106
* VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1107
*
1108
* The only lowering we are interested is specific to the fragment shader,
1109
* where we want to emit discards to honor writes to gl_ClipDistance[] in
1110
* previous stages. This is done via nir_lower_clip_fs() so we only set up
1111
* the ucp enable mask for that stage.
1112
*/
1113
key->ucp_enables = ucp_enables;
1114
1115
key->robust_buffer_access = robust_buffer_access;
1116
1117
key->environment = V3D_ENVIRONMENT_VULKAN;
1118
}
1119
1120
/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1121
* same. For not using prim_mode that is the one already used on v3d
1122
*/
1123
static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1124
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1125
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1126
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1127
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1128
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1129
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1130
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1131
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1132
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1133
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1134
};
1135
1136
static const enum pipe_logicop vk_to_pipe_logicop[] = {
1137
[VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1138
[VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1139
[VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1140
[VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1141
[VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1142
[VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1143
[VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1144
[VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1145
[VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1146
[VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1147
[VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1148
[VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1149
[VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1150
[VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1151
[VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1152
[VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1153
};
1154
1155
static void
1156
pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1157
const VkGraphicsPipelineCreateInfo *pCreateInfo,
1158
const struct v3dv_pipeline_stage *p_stage,
1159
bool has_geometry_shader,
1160
uint32_t ucp_enables)
1161
{
1162
assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1163
1164
memset(key, 0, sizeof(*key));
1165
1166
const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1167
pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1168
1169
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1170
pCreateInfo->pInputAssemblyState;
1171
uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1172
1173
key->is_points = (topology == PIPE_PRIM_POINTS);
1174
key->is_lines = (topology >= PIPE_PRIM_LINES &&
1175
topology <= PIPE_PRIM_LINE_STRIP);
1176
key->has_gs = has_geometry_shader;
1177
1178
const VkPipelineColorBlendStateCreateInfo *cb_info =
1179
pCreateInfo->pColorBlendState;
1180
1181
key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1182
vk_to_pipe_logicop[cb_info->logicOp] :
1183
PIPE_LOGICOP_COPY;
1184
1185
const bool raster_enabled =
1186
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1187
1188
/* Multisample rasterization state must be ignored if rasterization
1189
* is disabled.
1190
*/
1191
const VkPipelineMultisampleStateCreateInfo *ms_info =
1192
raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1193
if (ms_info) {
1194
assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1195
ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1196
key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1197
1198
if (key->msaa) {
1199
key->sample_coverage =
1200
p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1201
key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1202
key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1203
}
1204
}
1205
1206
/* This is intended for V3D versions before 4.1, otherwise we just use the
1207
* tile buffer load/store swap R/B bit.
1208
*/
1209
key->swap_color_rb = 0;
1210
1211
const struct v3dv_render_pass *pass =
1212
v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1213
const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1214
for (uint32_t i = 0; i < subpass->color_count; i++) {
1215
const uint32_t att_idx = subpass->color_attachments[i].attachment;
1216
if (att_idx == VK_ATTACHMENT_UNUSED)
1217
continue;
1218
1219
key->cbufs |= 1 << i;
1220
1221
VkFormat fb_format = pass->attachments[att_idx].desc.format;
1222
enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1223
1224
/* If logic operations are enabled then we might emit color reads and we
1225
* need to know the color buffer format and swizzle for that
1226
*/
1227
if (key->logicop_func != PIPE_LOGICOP_COPY) {
1228
key->color_fmt[i].format = fb_pipe_format;
1229
key->color_fmt[i].swizzle =
1230
v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
1231
}
1232
1233
const struct util_format_description *desc =
1234
vk_format_description(fb_format);
1235
1236
if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1237
desc->channel[0].size == 32) {
1238
key->f32_color_rb |= 1 << i;
1239
}
1240
1241
if (p_stage->nir->info.fs.untyped_color_outputs) {
1242
if (util_format_is_pure_uint(fb_pipe_format))
1243
key->uint_color_rb |= 1 << i;
1244
else if (util_format_is_pure_sint(fb_pipe_format))
1245
key->int_color_rb |= 1 << i;
1246
}
1247
1248
if (key->is_points) {
1249
/* FIXME: The mask would need to be computed based on the shader
1250
* inputs. On gallium it is done at st_atom_rasterizer
1251
* (sprite_coord_enable). anv seems (need to confirm) to do that on
1252
* genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1253
* better to have tests to guide filling the mask.
1254
*/
1255
key->point_sprite_mask = 0;
1256
1257
/* Vulkan mandates upper left. */
1258
key->point_coord_upper_left = true;
1259
}
1260
}
1261
}
1262
1263
static void
1264
setup_stage_outputs_from_next_stage_inputs(
1265
uint8_t next_stage_num_inputs,
1266
struct v3d_varying_slot *next_stage_input_slots,
1267
uint8_t *num_used_outputs,
1268
struct v3d_varying_slot *used_output_slots,
1269
uint32_t size_of_used_output_slots)
1270
{
1271
*num_used_outputs = next_stage_num_inputs;
1272
memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1273
}
1274
1275
static void
1276
pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1277
const VkGraphicsPipelineCreateInfo *pCreateInfo,
1278
const struct v3dv_pipeline_stage *p_stage)
1279
{
1280
assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1281
p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1282
1283
memset(key, 0, sizeof(*key));
1284
1285
const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1286
pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1287
1288
struct v3dv_pipeline *pipeline = p_stage->pipeline;
1289
1290
key->per_vertex_point_size =
1291
p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1292
1293
key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1294
1295
assert(key->base.is_last_geometry_stage);
1296
if (key->is_coord) {
1297
/* Output varyings in the last binning shader are only used for transform
1298
* feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1299
*/
1300
key->num_used_outputs = 0;
1301
} else {
1302
struct v3dv_shader_variant *fs_variant =
1303
pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1304
1305
STATIC_ASSERT(sizeof(key->used_outputs) ==
1306
sizeof(fs_variant->prog_data.fs->input_slots));
1307
1308
setup_stage_outputs_from_next_stage_inputs(
1309
fs_variant->prog_data.fs->num_inputs,
1310
fs_variant->prog_data.fs->input_slots,
1311
&key->num_used_outputs,
1312
key->used_outputs,
1313
sizeof(key->used_outputs));
1314
}
1315
}
1316
1317
static void
1318
pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1319
const VkGraphicsPipelineCreateInfo *pCreateInfo,
1320
const struct v3dv_pipeline_stage *p_stage)
1321
{
1322
assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1323
p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1324
1325
memset(key, 0, sizeof(*key));
1326
1327
const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1328
pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1329
1330
struct v3dv_pipeline *pipeline = p_stage->pipeline;
1331
1332
/* Vulkan specifies a point size per vertex, so true for if the prim are
1333
* points, like on ES2)
1334
*/
1335
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1336
pCreateInfo->pInputAssemblyState;
1337
uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1338
1339
/* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1340
* PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1341
key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1342
1343
key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1344
1345
if (key->is_coord) { /* Binning VS*/
1346
if (key->base.is_last_geometry_stage) {
1347
/* Output varyings in the last binning shader are only used for
1348
* transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1349
* supported.
1350
*/
1351
key->num_used_outputs = 0;
1352
} else {
1353
/* Linking against GS binning program */
1354
assert(pipeline->gs);
1355
struct v3dv_shader_variant *gs_bin_variant =
1356
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1357
1358
STATIC_ASSERT(sizeof(key->used_outputs) ==
1359
sizeof(gs_bin_variant->prog_data.gs->input_slots));
1360
1361
setup_stage_outputs_from_next_stage_inputs(
1362
gs_bin_variant->prog_data.gs->num_inputs,
1363
gs_bin_variant->prog_data.gs->input_slots,
1364
&key->num_used_outputs,
1365
key->used_outputs,
1366
sizeof(key->used_outputs));
1367
}
1368
} else { /* Render VS */
1369
if (pipeline->gs) {
1370
/* Linking against GS render program */
1371
struct v3dv_shader_variant *gs_variant =
1372
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1373
1374
STATIC_ASSERT(sizeof(key->used_outputs) ==
1375
sizeof(gs_variant->prog_data.gs->input_slots));
1376
1377
setup_stage_outputs_from_next_stage_inputs(
1378
gs_variant->prog_data.gs->num_inputs,
1379
gs_variant->prog_data.gs->input_slots,
1380
&key->num_used_outputs,
1381
key->used_outputs,
1382
sizeof(key->used_outputs));
1383
} else {
1384
/* Linking against FS program */
1385
struct v3dv_shader_variant *fs_variant =
1386
pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1387
1388
STATIC_ASSERT(sizeof(key->used_outputs) ==
1389
sizeof(fs_variant->prog_data.fs->input_slots));
1390
1391
setup_stage_outputs_from_next_stage_inputs(
1392
fs_variant->prog_data.fs->num_inputs,
1393
fs_variant->prog_data.fs->input_slots,
1394
&key->num_used_outputs,
1395
key->used_outputs,
1396
sizeof(key->used_outputs));
1397
}
1398
}
1399
1400
const VkPipelineVertexInputStateCreateInfo *vi_info =
1401
pCreateInfo->pVertexInputState;
1402
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1403
const VkVertexInputAttributeDescription *desc =
1404
&vi_info->pVertexAttributeDescriptions[i];
1405
assert(desc->location < MAX_VERTEX_ATTRIBS);
1406
if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1407
key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1408
}
1409
}
1410
1411
/**
1412
* Creates the initial form of the pipeline stage for a binning shader by
1413
* cloning the render shader and flagging it as a coordinate shader.
1414
*
1415
* Returns NULL if it was not able to allocate the object, so it should be
1416
* handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1417
*/
1418
static struct v3dv_pipeline_stage *
1419
pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1420
const VkAllocationCallbacks *pAllocator)
1421
{
1422
struct v3dv_device *device = src->pipeline->device;
1423
1424
struct v3dv_pipeline_stage *p_stage =
1425
vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1426
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1427
1428
if (p_stage == NULL)
1429
return NULL;
1430
1431
assert(src->stage == BROADCOM_SHADER_VERTEX ||
1432
src->stage == BROADCOM_SHADER_GEOMETRY);
1433
1434
enum broadcom_shader_stage bin_stage =
1435
src->stage == BROADCOM_SHADER_VERTEX ?
1436
BROADCOM_SHADER_VERTEX_BIN :
1437
BROADCOM_SHADER_GEOMETRY_BIN;
1438
1439
p_stage->pipeline = src->pipeline;
1440
p_stage->stage = bin_stage;
1441
p_stage->entrypoint = src->entrypoint;
1442
p_stage->module = src->module;
1443
p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL;
1444
p_stage->spec_info = src->spec_info;
1445
memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1446
1447
return p_stage;
1448
}
1449
1450
/**
1451
* Returns false if it was not able to allocate or map the assembly bo memory.
1452
*/
1453
static bool
1454
upload_assembly(struct v3dv_pipeline *pipeline)
1455
{
1456
uint32_t total_size = 0;
1457
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1458
struct v3dv_shader_variant *variant =
1459
pipeline->shared_data->variants[stage];
1460
1461
if (variant != NULL)
1462
total_size += variant->qpu_insts_size;
1463
}
1464
1465
struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1466
"pipeline shader assembly", true);
1467
if (!bo) {
1468
fprintf(stderr, "failed to allocate memory for shader\n");
1469
return false;
1470
}
1471
1472
bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1473
if (!ok) {
1474
fprintf(stderr, "failed to map source shader buffer\n");
1475
return false;
1476
}
1477
1478
uint32_t offset = 0;
1479
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1480
struct v3dv_shader_variant *variant =
1481
pipeline->shared_data->variants[stage];
1482
1483
if (variant != NULL) {
1484
variant->assembly_offset = offset;
1485
1486
memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1487
offset += variant->qpu_insts_size;
1488
1489
/* We dont need qpu_insts anymore. */
1490
free(variant->qpu_insts);
1491
variant->qpu_insts = NULL;
1492
}
1493
}
1494
assert(total_size == offset);
1495
1496
pipeline->shared_data->assembly_bo = bo;
1497
1498
return true;
1499
}
1500
1501
static void
1502
pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1503
struct v3dv_pipeline_key *key,
1504
unsigned char *sha1_out)
1505
{
1506
struct mesa_sha1 ctx;
1507
_mesa_sha1_init(&ctx);
1508
1509
/* We need to include all shader stages in the sha1 key as linking may modify
1510
* the shader code in any stage. An alternative would be to use the
1511
* serialized NIR, but that seems like an overkill.
1512
*/
1513
_mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1514
sizeof(pipeline->vs->shader_sha1));
1515
1516
if (pipeline->gs) {
1517
_mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1518
sizeof(pipeline->gs->shader_sha1));
1519
}
1520
1521
_mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1522
sizeof(pipeline->fs->shader_sha1));
1523
1524
_mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1525
1526
_mesa_sha1_final(&ctx, sha1_out);
1527
}
1528
1529
static void
1530
pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1531
struct v3dv_pipeline_key *key,
1532
unsigned char *sha1_out)
1533
{
1534
struct mesa_sha1 ctx;
1535
_mesa_sha1_init(&ctx);
1536
1537
_mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1538
sizeof(pipeline->cs->shader_sha1));
1539
1540
_mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1541
1542
_mesa_sha1_final(&ctx, sha1_out);
1543
}
1544
1545
/* Checks that the pipeline has enough spill size to use for any of their
1546
* variants
1547
*/
1548
static void
1549
pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1550
{
1551
uint32_t max_spill_size = 0;
1552
1553
for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1554
struct v3dv_shader_variant *variant =
1555
pipeline->shared_data->variants[stage];
1556
1557
if (variant != NULL) {
1558
max_spill_size = MAX2(variant->prog_data.base->spill_size,
1559
max_spill_size);
1560
}
1561
}
1562
1563
if (max_spill_size > 0) {
1564
struct v3dv_device *device = pipeline->device;
1565
1566
/* The TIDX register we use for choosing the area to access
1567
* for scratch space is: (core << 6) | (qpu << 2) | thread.
1568
* Even at minimum threadcount in a particular shader, that
1569
* means we still multiply by qpus by 4.
1570
*/
1571
const uint32_t total_spill_size =
1572
4 * device->devinfo.qpu_count * max_spill_size;
1573
if (pipeline->spill.bo) {
1574
assert(pipeline->spill.size_per_thread > 0);
1575
v3dv_bo_free(device, pipeline->spill.bo);
1576
}
1577
pipeline->spill.bo =
1578
v3dv_bo_alloc(device, total_spill_size, "spill", true);
1579
pipeline->spill.size_per_thread = max_spill_size;
1580
}
1581
}
1582
1583
/**
1584
* Creates a new shader_variant_create. Note that for prog_data is not const,
1585
* so it is assumed that the caller will prove a pointer that the
1586
* shader_variant will own.
1587
*
1588
* Creation doesn't include allocate a BD to store the content of qpu_insts,
1589
* as we will try to share the same bo for several shader variants. Also note
1590
* that qpu_ints being NULL is valid, for example if we are creating the
1591
* shader_variants from the cache, so we can just upload the assembly of all
1592
* the shader stages at once.
1593
*/
1594
struct v3dv_shader_variant *
1595
v3dv_shader_variant_create(struct v3dv_device *device,
1596
enum broadcom_shader_stage stage,
1597
struct v3d_prog_data *prog_data,
1598
uint32_t prog_data_size,
1599
uint32_t assembly_offset,
1600
uint64_t *qpu_insts,
1601
uint32_t qpu_insts_size,
1602
VkResult *out_vk_result)
1603
{
1604
struct v3dv_shader_variant *variant =
1605
vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1606
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1607
1608
if (variant == NULL) {
1609
*out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1610
return NULL;
1611
}
1612
1613
variant->stage = stage;
1614
variant->prog_data_size = prog_data_size;
1615
variant->prog_data.base = prog_data;
1616
1617
variant->assembly_offset = assembly_offset;
1618
variant->qpu_insts_size = qpu_insts_size;
1619
variant->qpu_insts = qpu_insts;
1620
1621
*out_vk_result = VK_SUCCESS;
1622
1623
return variant;
1624
}
1625
1626
/* For a given key, it returns the compiled version of the shader. Returns a
1627
* new reference to the shader_variant to the caller, or NULL.
1628
*
1629
* If the method returns NULL it means that something wrong happened:
1630
* * Not enough memory: this is one of the possible outcomes defined by
1631
* vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1632
* * Compilation error: hypothetically this shouldn't happen, as the spec
1633
* states that vkShaderModule needs to be created with a valid SPIR-V, so
1634
* any compilation failure is a driver bug. In the practice, something as
1635
* common as failing to register allocate can lead to a compilation
1636
* failure. In that case the only option (for any driver) is
1637
* VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1638
* error.
1639
*/
1640
static struct v3dv_shader_variant *
1641
pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1642
struct v3d_key *key,
1643
size_t key_size,
1644
const VkAllocationCallbacks *pAllocator,
1645
VkResult *out_vk_result)
1646
{
1647
struct v3dv_pipeline *pipeline = p_stage->pipeline;
1648
struct v3dv_physical_device *physical_device =
1649
&pipeline->device->instance->physicalDevice;
1650
const struct v3d_compiler *compiler = physical_device->compiler;
1651
1652
if (V3D_DEBUG & (V3D_DEBUG_NIR |
1653
v3d_debug_flag_for_shader_stage
1654
(broadcom_shader_stage_to_gl(p_stage->stage)))) {
1655
fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1656
broadcom_shader_stage_name(p_stage->stage),
1657
p_stage->program_id);
1658
nir_print_shader(p_stage->nir, stderr);
1659
fprintf(stderr, "\n");
1660
}
1661
1662
uint64_t *qpu_insts;
1663
uint32_t qpu_insts_size;
1664
struct v3d_prog_data *prog_data;
1665
uint32_t prog_data_size =
1666
v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1667
1668
qpu_insts = v3d_compile(compiler,
1669
key, &prog_data,
1670
p_stage->nir,
1671
shader_debug_output, NULL,
1672
p_stage->program_id, 0,
1673
&qpu_insts_size);
1674
1675
struct v3dv_shader_variant *variant = NULL;
1676
1677
if (!qpu_insts) {
1678
fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1679
gl_shader_stage_name(p_stage->stage),
1680
p_stage->program_id);
1681
*out_vk_result = VK_ERROR_UNKNOWN;
1682
} else {
1683
variant =
1684
v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1685
prog_data, prog_data_size,
1686
0, /* assembly_offset, no final value yet */
1687
qpu_insts, qpu_insts_size,
1688
out_vk_result);
1689
}
1690
/* At this point we don't need anymore the nir shader, but we are freeing
1691
* all the temporary p_stage structs used during the pipeline creation when
1692
* we finish it, so let's not worry about freeing the nir here.
1693
*/
1694
1695
return variant;
1696
}
1697
1698
/* FIXME: C&P from st, common place? */
1699
static void
1700
st_nir_opts(nir_shader *nir)
1701
{
1702
bool progress;
1703
1704
do {
1705
progress = false;
1706
1707
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1708
1709
/* Linking deals with unused inputs/outputs, but here we can remove
1710
* things local to the shader in the hopes that we can cleanup other
1711
* things. This pass will also remove variables with only stores, so we
1712
* might be able to make progress after it.
1713
*/
1714
NIR_PASS(progress, nir, nir_remove_dead_variables,
1715
(nir_variable_mode)(nir_var_function_temp |
1716
nir_var_shader_temp |
1717
nir_var_mem_shared),
1718
NULL);
1719
1720
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1721
NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1722
1723
if (nir->options->lower_to_scalar) {
1724
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1725
NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1726
}
1727
1728
NIR_PASS_V(nir, nir_lower_alu);
1729
NIR_PASS_V(nir, nir_lower_pack);
1730
NIR_PASS(progress, nir, nir_copy_prop);
1731
NIR_PASS(progress, nir, nir_opt_remove_phis);
1732
NIR_PASS(progress, nir, nir_opt_dce);
1733
if (nir_opt_trivial_continues(nir)) {
1734
progress = true;
1735
NIR_PASS(progress, nir, nir_copy_prop);
1736
NIR_PASS(progress, nir, nir_opt_dce);
1737
}
1738
NIR_PASS(progress, nir, nir_opt_if, false);
1739
NIR_PASS(progress, nir, nir_opt_dead_cf);
1740
NIR_PASS(progress, nir, nir_opt_cse);
1741
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1742
1743
NIR_PASS(progress, nir, nir_opt_algebraic);
1744
NIR_PASS(progress, nir, nir_opt_constant_folding);
1745
1746
NIR_PASS(progress, nir, nir_opt_undef);
1747
NIR_PASS(progress, nir, nir_opt_conditional_discard);
1748
} while (progress);
1749
}
1750
1751
static void
1752
link_shaders(nir_shader *producer, nir_shader *consumer)
1753
{
1754
assert(producer);
1755
assert(consumer);
1756
1757
if (producer->options->lower_to_scalar) {
1758
NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1759
NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1760
}
1761
1762
nir_lower_io_arrays_to_elements(producer, consumer);
1763
1764
st_nir_opts(producer);
1765
st_nir_opts(consumer);
1766
1767
if (nir_link_opt_varyings(producer, consumer))
1768
st_nir_opts(consumer);
1769
1770
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1771
NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1772
1773
if (nir_remove_unused_varyings(producer, consumer)) {
1774
NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1775
NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1776
1777
st_nir_opts(producer);
1778
st_nir_opts(consumer);
1779
1780
/* Optimizations can cause varyings to become unused.
1781
* nir_compact_varyings() depends on all dead varyings being removed so
1782
* we need to call nir_remove_dead_variables() again here.
1783
*/
1784
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1785
NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1786
}
1787
}
1788
1789
static void
1790
pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1791
struct v3dv_pipeline_stage *p_stage,
1792
struct v3dv_pipeline_layout *layout)
1793
{
1794
assert(pipeline->shared_data &&
1795
pipeline->shared_data->maps[p_stage->stage]);
1796
1797
nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1798
1799
/* We add this because we need a valid sampler for nir_lower_tex to do
1800
* unpacking of the texture operation result, even for the case where there
1801
* is no sampler state.
1802
*
1803
* We add two of those, one for the case we need a 16bit return_size, and
1804
* another for the case we need a 32bit return size.
1805
*/
1806
UNUSED unsigned index =
1807
descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1808
-1, -1, -1, 0, 16);
1809
assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1810
1811
index =
1812
descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1813
-2, -2, -2, 0, 32);
1814
assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1815
1816
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1817
NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1818
}
1819
1820
/**
1821
* The SPIR-V compiler will insert a sized compact array for
1822
* VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1823
* where the size of the array determines the number of active clip planes.
1824
*/
1825
static uint32_t
1826
get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1827
{
1828
assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1829
const nir_shader *shader = p_stage->nir;
1830
assert(shader);
1831
1832
nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1833
if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1834
assert(var->data.compact);
1835
return (1 << glsl_get_length(var->type)) - 1;
1836
}
1837
}
1838
return 0;
1839
}
1840
1841
static nir_shader *
1842
pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1843
struct v3dv_pipeline *pipeline,
1844
struct v3dv_pipeline_cache *cache)
1845
{
1846
nir_shader *nir = NULL;
1847
1848
nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1849
&v3dv_nir_options,
1850
p_stage->shader_sha1);
1851
1852
if (nir) {
1853
assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1854
return nir;
1855
}
1856
1857
nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1858
1859
if (nir) {
1860
struct v3dv_pipeline_cache *default_cache =
1861
&pipeline->device->default_pipeline_cache;
1862
1863
v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1864
p_stage->shader_sha1);
1865
1866
/* Ensure that the variant is on the default cache, as cmd_buffer could
1867
* need to change the current variant
1868
*/
1869
if (default_cache != cache) {
1870
v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1871
p_stage->shader_sha1);
1872
}
1873
return nir;
1874
}
1875
1876
/* FIXME: this shouldn't happen, raise error? */
1877
return NULL;
1878
}
1879
1880
static void
1881
pipeline_hash_shader(const struct vk_shader_module *module,
1882
const char *entrypoint,
1883
gl_shader_stage stage,
1884
const VkSpecializationInfo *spec_info,
1885
unsigned char *sha1_out)
1886
{
1887
struct mesa_sha1 ctx;
1888
_mesa_sha1_init(&ctx);
1889
1890
_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1891
_mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1892
_mesa_sha1_update(&ctx, &stage, sizeof(stage));
1893
if (spec_info) {
1894
_mesa_sha1_update(&ctx, spec_info->pMapEntries,
1895
spec_info->mapEntryCount *
1896
sizeof(*spec_info->pMapEntries));
1897
_mesa_sha1_update(&ctx, spec_info->pData,
1898
spec_info->dataSize);
1899
}
1900
1901
_mesa_sha1_final(&ctx, sha1_out);
1902
}
1903
1904
static VkResult
1905
pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1906
const VkAllocationCallbacks *pAllocator,
1907
const VkGraphicsPipelineCreateInfo *pCreateInfo)
1908
{
1909
assert(pipeline->vs_bin != NULL);
1910
if (pipeline->vs_bin->nir == NULL) {
1911
assert(pipeline->vs->nir);
1912
pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1913
}
1914
1915
VkResult vk_result;
1916
struct v3d_vs_key key;
1917
pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1918
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1919
pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1920
pAllocator, &vk_result);
1921
if (vk_result != VK_SUCCESS)
1922
return vk_result;
1923
1924
pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1925
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1926
pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1927
pAllocator, &vk_result);
1928
1929
return vk_result;
1930
}
1931
1932
static VkResult
1933
pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1934
const VkAllocationCallbacks *pAllocator,
1935
const VkGraphicsPipelineCreateInfo *pCreateInfo)
1936
{
1937
assert(pipeline->gs);
1938
1939
assert(pipeline->gs_bin != NULL);
1940
if (pipeline->gs_bin->nir == NULL) {
1941
assert(pipeline->gs->nir);
1942
pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1943
}
1944
1945
VkResult vk_result;
1946
struct v3d_gs_key key;
1947
pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1948
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1949
pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1950
pAllocator, &vk_result);
1951
if (vk_result != VK_SUCCESS)
1952
return vk_result;
1953
1954
pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1955
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1956
pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1957
pAllocator, &vk_result);
1958
1959
return vk_result;
1960
}
1961
1962
static VkResult
1963
pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1964
const VkAllocationCallbacks *pAllocator,
1965
const VkGraphicsPipelineCreateInfo *pCreateInfo)
1966
{
1967
struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1968
1969
p_stage = pipeline->fs;
1970
1971
struct v3d_fs_key key;
1972
1973
pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1974
pipeline->gs != NULL,
1975
get_ucp_enable_mask(pipeline->vs));
1976
1977
VkResult vk_result;
1978
pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1979
pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1980
pAllocator, &vk_result);
1981
1982
return vk_result;
1983
}
1984
1985
static void
1986
pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1987
struct v3dv_pipeline_key *key,
1988
const VkGraphicsPipelineCreateInfo *pCreateInfo)
1989
{
1990
memset(key, 0, sizeof(*key));
1991
key->robust_buffer_access =
1992
pipeline->device->features.robustBufferAccess;
1993
1994
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1995
pCreateInfo->pInputAssemblyState;
1996
key->topology = vk_to_pipe_prim_type[ia_info->topology];
1997
1998
const VkPipelineColorBlendStateCreateInfo *cb_info =
1999
pCreateInfo->pColorBlendState;
2000
key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
2001
vk_to_pipe_logicop[cb_info->logicOp] :
2002
PIPE_LOGICOP_COPY;
2003
2004
const bool raster_enabled =
2005
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2006
2007
/* Multisample rasterization state must be ignored if rasterization
2008
* is disabled.
2009
*/
2010
const VkPipelineMultisampleStateCreateInfo *ms_info =
2011
raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2012
if (ms_info) {
2013
assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2014
ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2015
key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2016
2017
if (key->msaa) {
2018
key->sample_coverage =
2019
pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2020
key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2021
key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2022
}
2023
}
2024
2025
const struct v3dv_render_pass *pass =
2026
v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2027
const struct v3dv_subpass *subpass = pipeline->subpass;
2028
for (uint32_t i = 0; i < subpass->color_count; i++) {
2029
const uint32_t att_idx = subpass->color_attachments[i].attachment;
2030
if (att_idx == VK_ATTACHMENT_UNUSED)
2031
continue;
2032
2033
key->cbufs |= 1 << i;
2034
2035
VkFormat fb_format = pass->attachments[att_idx].desc.format;
2036
enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2037
2038
/* If logic operations are enabled then we might emit color reads and we
2039
* need to know the color buffer format and swizzle for that
2040
*/
2041
if (key->logicop_func != PIPE_LOGICOP_COPY) {
2042
key->color_fmt[i].format = fb_pipe_format;
2043
key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
2044
fb_format);
2045
}
2046
2047
const struct util_format_description *desc =
2048
vk_format_description(fb_format);
2049
2050
if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2051
desc->channel[0].size == 32) {
2052
key->f32_color_rb |= 1 << i;
2053
}
2054
}
2055
2056
const VkPipelineVertexInputStateCreateInfo *vi_info =
2057
pCreateInfo->pVertexInputState;
2058
for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2059
const VkVertexInputAttributeDescription *desc =
2060
&vi_info->pVertexAttributeDescriptions[i];
2061
assert(desc->location < MAX_VERTEX_ATTRIBS);
2062
if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2063
key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2064
}
2065
2066
}
2067
2068
static void
2069
pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2070
struct v3dv_pipeline_key *key,
2071
const VkComputePipelineCreateInfo *pCreateInfo)
2072
{
2073
/* We use the same pipeline key for graphics and compute, but we don't need
2074
* to add a field to flag compute keys because this key is not used alone
2075
* to search in the cache, we also use the SPIR-V or the serialized NIR for
2076
* example, which already flags compute shaders.
2077
*/
2078
memset(key, 0, sizeof(*key));
2079
key->robust_buffer_access =
2080
pipeline->device->features.robustBufferAccess;
2081
}
2082
2083
static struct v3dv_pipeline_shared_data *
2084
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2085
struct v3dv_pipeline *pipeline,
2086
bool is_graphics_pipeline)
2087
{
2088
/* We create new_entry using the device alloc. Right now shared_data is ref
2089
* and unref by both the pipeline and the pipeline cache, so we can't
2090
* ensure that the cache or pipeline alloc will be available on the last
2091
* unref.
2092
*/
2093
struct v3dv_pipeline_shared_data *new_entry =
2094
vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2095
sizeof(struct v3dv_pipeline_shared_data), 8,
2096
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2097
2098
if (new_entry == NULL)
2099
return NULL;
2100
2101
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2102
/* We don't need specific descriptor maps for binning stages we use the
2103
* map for the render stage.
2104
*/
2105
if (broadcom_shader_stage_is_binning(stage))
2106
continue;
2107
2108
if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2109
(!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2110
continue;
2111
}
2112
2113
if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs)
2114
continue;
2115
2116
struct v3dv_descriptor_maps *new_maps =
2117
vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2118
sizeof(struct v3dv_descriptor_maps), 8,
2119
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2120
2121
if (new_maps == NULL)
2122
goto fail;
2123
2124
new_entry->maps[stage] = new_maps;
2125
}
2126
2127
new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2128
new_entry->maps[BROADCOM_SHADER_VERTEX];
2129
2130
new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2131
new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2132
2133
new_entry->ref_cnt = 1;
2134
memcpy(new_entry->sha1_key, sha1_key, 20);
2135
2136
return new_entry;
2137
2138
fail:
2139
if (new_entry != NULL) {
2140
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2141
if (new_entry->maps[stage] != NULL)
2142
vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2143
}
2144
}
2145
2146
vk_free(&pipeline->device->vk.alloc, new_entry);
2147
2148
return NULL;
2149
}
2150
2151
/*
2152
* It compiles a pipeline. Note that it also allocate internal object, but if
2153
* some allocations success, but other fails, the method is not freeing the
2154
* successful ones.
2155
*
2156
* This is done to simplify the code, as what we do in this case is just call
2157
* the pipeline destroy method, and this would handle freeing the internal
2158
* objects allocated. We just need to be careful setting to NULL the objects
2159
* not allocated.
2160
*/
2161
static VkResult
2162
pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2163
struct v3dv_pipeline_cache *cache,
2164
const VkGraphicsPipelineCreateInfo *pCreateInfo,
2165
const VkAllocationCallbacks *pAllocator)
2166
{
2167
struct v3dv_device *device = pipeline->device;
2168
struct v3dv_physical_device *physical_device =
2169
&device->instance->physicalDevice;
2170
2171
/* First pass to get some common info from the shader, and create the
2172
* individual pipeline_stage objects
2173
*/
2174
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2175
const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2176
gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2177
2178
struct v3dv_pipeline_stage *p_stage =
2179
vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2180
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2181
2182
if (p_stage == NULL)
2183
return VK_ERROR_OUT_OF_HOST_MEMORY;
2184
2185
/* Note that we are assigning program_id slightly differently that
2186
* v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2187
* would have a different program_id, while v3d would have the same for
2188
* both. For the case of v3dv, it is more natural to have an id this way,
2189
* as right now we are using it for debugging, not for shader-db.
2190
*/
2191
p_stage->program_id =
2192
p_atomic_inc_return(&physical_device->next_program_id);
2193
2194
p_stage->pipeline = pipeline;
2195
p_stage->stage = gl_shader_stage_to_broadcom(stage);
2196
p_stage->entrypoint = sinfo->pName;
2197
p_stage->module = vk_shader_module_from_handle(sinfo->module);
2198
p_stage->spec_info = sinfo->pSpecializationInfo;
2199
2200
pipeline_hash_shader(p_stage->module,
2201
p_stage->entrypoint,
2202
stage,
2203
p_stage->spec_info,
2204
p_stage->shader_sha1);
2205
2206
pipeline->active_stages |= sinfo->stage;
2207
2208
/* We will try to get directly the compiled shader variant, so let's not
2209
* worry about getting the nir shader for now.
2210
*/
2211
p_stage->nir = NULL;
2212
2213
switch(stage) {
2214
case MESA_SHADER_VERTEX:
2215
pipeline->vs = p_stage;
2216
pipeline->vs_bin =
2217
pipeline_stage_create_binning(pipeline->vs, pAllocator);
2218
if (pipeline->vs_bin == NULL)
2219
return VK_ERROR_OUT_OF_HOST_MEMORY;
2220
break;
2221
2222
case MESA_SHADER_GEOMETRY:
2223
pipeline->has_gs = true;
2224
pipeline->gs = p_stage;
2225
pipeline->gs_bin =
2226
pipeline_stage_create_binning(pipeline->gs, pAllocator);
2227
if (pipeline->gs_bin == NULL)
2228
return VK_ERROR_OUT_OF_HOST_MEMORY;
2229
break;
2230
2231
case MESA_SHADER_FRAGMENT:
2232
pipeline->fs = p_stage;
2233
break;
2234
2235
default:
2236
unreachable("not supported shader stage");
2237
}
2238
}
2239
2240
/* Add a no-op fragment shader if needed */
2241
if (!pipeline->fs) {
2242
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2243
&v3dv_nir_options,
2244
"noop_fs");
2245
2246
struct v3dv_pipeline_stage *p_stage =
2247
vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2248
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2249
2250
if (p_stage == NULL)
2251
return VK_ERROR_OUT_OF_HOST_MEMORY;
2252
2253
p_stage->pipeline = pipeline;
2254
p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2255
p_stage->entrypoint = "main";
2256
p_stage->module = 0;
2257
p_stage->nir = b.shader;
2258
pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2259
p_stage->program_id =
2260
p_atomic_inc_return(&physical_device->next_program_id);
2261
2262
pipeline->fs = p_stage;
2263
pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2264
}
2265
2266
/* First we try to get the variants from the pipeline cache */
2267
struct v3dv_pipeline_key pipeline_key;
2268
pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2269
unsigned char pipeline_sha1[20];
2270
pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
2271
2272
pipeline->shared_data =
2273
v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
2274
2275
if (pipeline->shared_data != NULL) {
2276
/* A correct pipeline must have at least a VS and FS */
2277
assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2278
assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2279
assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2280
assert(!pipeline->gs ||
2281
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2282
assert(!pipeline->gs ||
2283
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2284
goto success;
2285
}
2286
2287
/* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2288
* shader or the pipeline cache) and compile.
2289
*/
2290
pipeline->shared_data =
2291
v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
2292
2293
if (!pipeline->vs->nir)
2294
pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2295
if (pipeline->gs && !pipeline->gs->nir)
2296
pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2297
if (!pipeline->fs->nir)
2298
pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2299
2300
/* Linking + pipeline lowerings */
2301
if (pipeline->gs) {
2302
link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2303
link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2304
} else {
2305
link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2306
}
2307
2308
pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2309
lower_fs_io(pipeline->fs->nir);
2310
2311
if (pipeline->gs) {
2312
pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2313
lower_gs_io(pipeline->vs->nir);
2314
}
2315
2316
pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2317
lower_vs_io(pipeline->vs->nir);
2318
2319
/* Compiling to vir */
2320
VkResult vk_result;
2321
2322
/* We should have got all the variants or no variants from the cache */
2323
assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2324
vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2325
if (vk_result != VK_SUCCESS)
2326
return vk_result;
2327
2328
assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2329
!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2330
2331
if (pipeline->gs) {
2332
vk_result =
2333
pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2334
if (vk_result != VK_SUCCESS)
2335
return vk_result;
2336
}
2337
2338
assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2339
!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2340
2341
vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2342
if (vk_result != VK_SUCCESS)
2343
return vk_result;
2344
2345
if (!upload_assembly(pipeline))
2346
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2347
2348
v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2349
2350
success:
2351
/* Since we have the variants in the pipeline shared data we can now free
2352
* the pipeline stages.
2353
*/
2354
pipeline_free_stages(device, pipeline, pAllocator);
2355
2356
pipeline_check_spill_size(pipeline);
2357
2358
return compute_vpm_config(pipeline);
2359
}
2360
2361
static VkResult
2362
compute_vpm_config(struct v3dv_pipeline *pipeline)
2363
{
2364
struct v3dv_shader_variant *vs_variant =
2365
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2366
struct v3dv_shader_variant *vs_bin_variant =
2367
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2368
struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2369
struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2370
2371
struct v3d_gs_prog_data *gs = NULL;
2372
struct v3d_gs_prog_data *gs_bin = NULL;
2373
if (pipeline->has_gs) {
2374
struct v3dv_shader_variant *gs_variant =
2375
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2376
struct v3dv_shader_variant *gs_bin_variant =
2377
pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2378
gs = gs_variant->prog_data.gs;
2379
gs_bin = gs_bin_variant->prog_data.gs;
2380
}
2381
2382
if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2383
vs_bin, vs, gs_bin, gs,
2384
&pipeline->vpm_cfg_bin,
2385
&pipeline->vpm_cfg)) {
2386
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2387
}
2388
2389
return VK_SUCCESS;
2390
}
2391
2392
static unsigned
2393
v3dv_dynamic_state_mask(VkDynamicState state)
2394
{
2395
switch(state) {
2396
case VK_DYNAMIC_STATE_VIEWPORT:
2397
return V3DV_DYNAMIC_VIEWPORT;
2398
case VK_DYNAMIC_STATE_SCISSOR:
2399
return V3DV_DYNAMIC_SCISSOR;
2400
case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2401
return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2402
case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2403
return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2404
case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2405
return V3DV_DYNAMIC_STENCIL_REFERENCE;
2406
case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2407
return V3DV_DYNAMIC_BLEND_CONSTANTS;
2408
case VK_DYNAMIC_STATE_DEPTH_BIAS:
2409
return V3DV_DYNAMIC_DEPTH_BIAS;
2410
case VK_DYNAMIC_STATE_LINE_WIDTH:
2411
return V3DV_DYNAMIC_LINE_WIDTH;
2412
2413
/* Depth bounds testing is not available in in V3D 4.2 so here we are just
2414
* ignoring this dynamic state. We are already asserting at pipeline creation
2415
* time that depth bounds testing is not enabled.
2416
*/
2417
case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2418
return 0;
2419
2420
default:
2421
unreachable("Unhandled dynamic state");
2422
}
2423
}
2424
2425
static void
2426
pipeline_init_dynamic_state(
2427
struct v3dv_pipeline *pipeline,
2428
const VkPipelineDynamicStateCreateInfo *pDynamicState,
2429
const VkPipelineViewportStateCreateInfo *pViewportState,
2430
const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2431
const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2432
const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
2433
{
2434
pipeline->dynamic_state = default_dynamic_state;
2435
struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2436
2437
/* Create a mask of enabled dynamic states */
2438
uint32_t dynamic_states = 0;
2439
if (pDynamicState) {
2440
uint32_t count = pDynamicState->dynamicStateCount;
2441
for (uint32_t s = 0; s < count; s++) {
2442
dynamic_states |=
2443
v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2444
}
2445
}
2446
2447
/* For any pipeline states that are not dynamic, set the dynamic state
2448
* from the static pipeline state.
2449
*/
2450
if (pViewportState) {
2451
if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2452
dynamic->viewport.count = pViewportState->viewportCount;
2453
typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2454
pViewportState->viewportCount);
2455
2456
for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2457
v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2458
dynamic->viewport.scale[i],
2459
dynamic->viewport.translate[i]);
2460
}
2461
}
2462
2463
if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2464
dynamic->scissor.count = pViewportState->scissorCount;
2465
typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2466
pViewportState->scissorCount);
2467
}
2468
}
2469
2470
if (pDepthStencilState) {
2471
if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2472
dynamic->stencil_compare_mask.front =
2473
pDepthStencilState->front.compareMask;
2474
dynamic->stencil_compare_mask.back =
2475
pDepthStencilState->back.compareMask;
2476
}
2477
2478
if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2479
dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2480
dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2481
}
2482
2483
if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2484
dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2485
dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2486
}
2487
}
2488
2489
if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2490
memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2491
sizeof(dynamic->blend_constants));
2492
}
2493
2494
if (pRasterizationState) {
2495
if (pRasterizationState->depthBiasEnable &&
2496
!(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2497
dynamic->depth_bias.constant_factor =
2498
pRasterizationState->depthBiasConstantFactor;
2499
dynamic->depth_bias.depth_bias_clamp =
2500
pRasterizationState->depthBiasClamp;
2501
dynamic->depth_bias.slope_factor =
2502
pRasterizationState->depthBiasSlopeFactor;
2503
}
2504
if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2505
dynamic->line_width = pRasterizationState->lineWidth;
2506
}
2507
2508
pipeline->dynamic_state.mask = dynamic_states;
2509
}
2510
2511
static bool
2512
stencil_op_is_no_op(const VkStencilOpState *stencil)
2513
{
2514
return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2515
stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2516
}
2517
2518
static void
2519
enable_depth_bias(struct v3dv_pipeline *pipeline,
2520
const VkPipelineRasterizationStateCreateInfo *rs_info)
2521
{
2522
pipeline->depth_bias.enabled = false;
2523
pipeline->depth_bias.is_z16 = false;
2524
2525
if (!rs_info || !rs_info->depthBiasEnable)
2526
return;
2527
2528
/* Check the depth/stencil attachment description for the subpass used with
2529
* this pipeline.
2530
*/
2531
assert(pipeline->pass && pipeline->subpass);
2532
struct v3dv_render_pass *pass = pipeline->pass;
2533
struct v3dv_subpass *subpass = pipeline->subpass;
2534
2535
if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2536
return;
2537
2538
assert(subpass->ds_attachment.attachment < pass->attachment_count);
2539
struct v3dv_render_pass_attachment *att =
2540
&pass->attachments[subpass->ds_attachment.attachment];
2541
2542
if (att->desc.format == VK_FORMAT_D16_UNORM)
2543
pipeline->depth_bias.is_z16 = true;
2544
2545
pipeline->depth_bias.enabled = true;
2546
}
2547
2548
static void
2549
pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2550
const VkPipelineDepthStencilStateCreateInfo *ds_info)
2551
{
2552
if (!ds_info || !ds_info->depthTestEnable) {
2553
pipeline->ez_state = V3D_EZ_DISABLED;
2554
return;
2555
}
2556
2557
switch (ds_info->depthCompareOp) {
2558
case VK_COMPARE_OP_LESS:
2559
case VK_COMPARE_OP_LESS_OR_EQUAL:
2560
pipeline->ez_state = V3D_EZ_LT_LE;
2561
break;
2562
case VK_COMPARE_OP_GREATER:
2563
case VK_COMPARE_OP_GREATER_OR_EQUAL:
2564
pipeline->ez_state = V3D_EZ_GT_GE;
2565
break;
2566
case VK_COMPARE_OP_NEVER:
2567
case VK_COMPARE_OP_EQUAL:
2568
pipeline->ez_state = V3D_EZ_UNDECIDED;
2569
break;
2570
default:
2571
pipeline->ez_state = V3D_EZ_DISABLED;
2572
break;
2573
}
2574
2575
/* If stencil is enabled and is not a no-op, we need to disable EZ */
2576
if (ds_info->stencilTestEnable &&
2577
(!stencil_op_is_no_op(&ds_info->front) ||
2578
!stencil_op_is_no_op(&ds_info->back))) {
2579
pipeline->ez_state = V3D_EZ_DISABLED;
2580
}
2581
}
2582
2583
static bool
2584
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2585
{
2586
for (uint8_t i = 0; i < pipeline->va_count; i++) {
2587
if (vk_format_is_int(pipeline->va[i].vk_format))
2588
return true;
2589
}
2590
return false;
2591
}
2592
2593
/* @pipeline can be NULL. We assume in that case that all the attributes have
2594
* a float format (we only create an all-float BO once and we reuse it with
2595
* all float pipelines), otherwise we look at the actual type of each
2596
* attribute used with the specific pipeline passed in.
2597
*/
2598
struct v3dv_bo *
2599
v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2600
struct v3dv_pipeline *pipeline)
2601
{
2602
uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2603
struct v3dv_bo *bo;
2604
2605
bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2606
2607
if (!bo) {
2608
fprintf(stderr, "failed to allocate memory for the default "
2609
"attribute values\n");
2610
return NULL;
2611
}
2612
2613
bool ok = v3dv_bo_map(device, bo, size);
2614
if (!ok) {
2615
fprintf(stderr, "failed to map default attribute values buffer\n");
2616
return false;
2617
}
2618
2619
uint32_t *attrs = bo->map;
2620
uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2621
for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2622
attrs[i * 4 + 0] = 0;
2623
attrs[i * 4 + 1] = 0;
2624
attrs[i * 4 + 2] = 0;
2625
VkFormat attr_format =
2626
pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2627
if (i < va_count && vk_format_is_int(attr_format)) {
2628
attrs[i * 4 + 3] = 1;
2629
} else {
2630
attrs[i * 4 + 3] = fui(1.0);
2631
}
2632
}
2633
2634
v3dv_bo_unmap(device, bo);
2635
2636
return bo;
2637
}
2638
2639
static void
2640
pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2641
const VkPipelineMultisampleStateCreateInfo *ms_info)
2642
{
2643
pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2644
2645
/* Ignore pSampleMask if we are not enabling multisampling. The hardware
2646
* requires this to be 0xf or 0x0 if using a single sample.
2647
*/
2648
if (ms_info && ms_info->pSampleMask &&
2649
ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2650
pipeline->sample_mask &= ms_info->pSampleMask[0];
2651
}
2652
}
2653
2654
static void
2655
pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2656
const VkPipelineMultisampleStateCreateInfo *ms_info)
2657
{
2658
pipeline->sample_rate_shading =
2659
ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2660
ms_info->sampleShadingEnable;
2661
}
2662
2663
static VkResult
2664
pipeline_init(struct v3dv_pipeline *pipeline,
2665
struct v3dv_device *device,
2666
struct v3dv_pipeline_cache *cache,
2667
const VkGraphicsPipelineCreateInfo *pCreateInfo,
2668
const VkAllocationCallbacks *pAllocator)
2669
{
2670
VkResult result = VK_SUCCESS;
2671
2672
pipeline->device = device;
2673
2674
V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2675
pipeline->layout = layout;
2676
2677
V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2678
assert(pCreateInfo->subpass < render_pass->subpass_count);
2679
pipeline->pass = render_pass;
2680
pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2681
2682
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2683
pCreateInfo->pInputAssemblyState;
2684
pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2685
2686
/* If rasterization is not enabled, various CreateInfo structs must be
2687
* ignored.
2688
*/
2689
const bool raster_enabled =
2690
!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2691
2692
const VkPipelineViewportStateCreateInfo *vp_info =
2693
raster_enabled ? pCreateInfo->pViewportState : NULL;
2694
2695
const VkPipelineDepthStencilStateCreateInfo *ds_info =
2696
raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2697
2698
const VkPipelineRasterizationStateCreateInfo *rs_info =
2699
raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2700
2701
const VkPipelineColorBlendStateCreateInfo *cb_info =
2702
raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2703
2704
const VkPipelineMultisampleStateCreateInfo *ms_info =
2705
raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2706
2707
pipeline_init_dynamic_state(pipeline,
2708
pCreateInfo->pDynamicState,
2709
vp_info, ds_info, cb_info, rs_info);
2710
2711
/* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2712
* feature and it shouldn't be used by any pipeline.
2713
*/
2714
assert(!ds_info || !ds_info->depthBoundsTestEnable);
2715
2716
v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2717
rs_info, ms_info);
2718
2719
pipeline_set_ez_state(pipeline, ds_info);
2720
enable_depth_bias(pipeline, rs_info);
2721
pipeline_set_sample_mask(pipeline, ms_info);
2722
pipeline_set_sample_rate_shading(pipeline, ms_info);
2723
2724
pipeline->primitive_restart =
2725
pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2726
2727
result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2728
2729
if (result != VK_SUCCESS) {
2730
/* Caller would already destroy the pipeline, and we didn't allocate any
2731
* extra info. We don't need to do anything else.
2732
*/
2733
return result;
2734
}
2735
2736
v3dv_X(device, pipeline_pack_compile_state)(pipeline,
2737
pCreateInfo->pVertexInputState);
2738
2739
if (pipeline_has_integer_vertex_attrib(pipeline)) {
2740
pipeline->default_attribute_values =
2741
v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
2742
if (!pipeline->default_attribute_values)
2743
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2744
} else {
2745
pipeline->default_attribute_values = NULL;
2746
}
2747
2748
return result;
2749
}
2750
2751
static VkResult
2752
graphics_pipeline_create(VkDevice _device,
2753
VkPipelineCache _cache,
2754
const VkGraphicsPipelineCreateInfo *pCreateInfo,
2755
const VkAllocationCallbacks *pAllocator,
2756
VkPipeline *pPipeline)
2757
{
2758
V3DV_FROM_HANDLE(v3dv_device, device, _device);
2759
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2760
2761
struct v3dv_pipeline *pipeline;
2762
VkResult result;
2763
2764
/* Use the default pipeline cache if none is specified */
2765
if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2766
cache = &device->default_pipeline_cache;
2767
2768
pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
2769
VK_OBJECT_TYPE_PIPELINE);
2770
2771
if (pipeline == NULL)
2772
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2773
2774
result = pipeline_init(pipeline, device, cache,
2775
pCreateInfo,
2776
pAllocator);
2777
2778
if (result != VK_SUCCESS) {
2779
v3dv_destroy_pipeline(pipeline, device, pAllocator);
2780
return result;
2781
}
2782
2783
*pPipeline = v3dv_pipeline_to_handle(pipeline);
2784
2785
return VK_SUCCESS;
2786
}
2787
2788
VKAPI_ATTR VkResult VKAPI_CALL
2789
v3dv_CreateGraphicsPipelines(VkDevice _device,
2790
VkPipelineCache pipelineCache,
2791
uint32_t count,
2792
const VkGraphicsPipelineCreateInfo *pCreateInfos,
2793
const VkAllocationCallbacks *pAllocator,
2794
VkPipeline *pPipelines)
2795
{
2796
V3DV_FROM_HANDLE(v3dv_device, device, _device);
2797
VkResult result = VK_SUCCESS;
2798
2799
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2800
mtx_lock(&device->pdevice->mutex);
2801
2802
for (uint32_t i = 0; i < count; i++) {
2803
VkResult local_result;
2804
2805
local_result = graphics_pipeline_create(_device,
2806
pipelineCache,
2807
&pCreateInfos[i],
2808
pAllocator,
2809
&pPipelines[i]);
2810
2811
if (local_result != VK_SUCCESS) {
2812
result = local_result;
2813
pPipelines[i] = VK_NULL_HANDLE;
2814
}
2815
}
2816
2817
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2818
mtx_unlock(&device->pdevice->mutex);
2819
2820
return result;
2821
}
2822
2823
static void
2824
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
2825
{
2826
assert(glsl_type_is_vector_or_scalar(type));
2827
2828
uint32_t comp_size = glsl_type_is_boolean(type)
2829
? 4 : glsl_get_bit_size(type) / 8;
2830
unsigned length = glsl_get_vector_elements(type);
2831
*size = comp_size * length,
2832
*align = comp_size * (length == 3 ? 4 : length);
2833
}
2834
2835
static void
2836
lower_cs_shared(struct nir_shader *nir)
2837
{
2838
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
2839
nir_var_mem_shared, shared_type_info);
2840
NIR_PASS_V(nir, nir_lower_explicit_io,
2841
nir_var_mem_shared, nir_address_format_32bit_offset);
2842
}
2843
2844
static VkResult
2845
pipeline_compile_compute(struct v3dv_pipeline *pipeline,
2846
struct v3dv_pipeline_cache *cache,
2847
const VkComputePipelineCreateInfo *info,
2848
const VkAllocationCallbacks *alloc)
2849
{
2850
struct v3dv_device *device = pipeline->device;
2851
struct v3dv_physical_device *physical_device =
2852
&device->instance->physicalDevice;
2853
2854
const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
2855
gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2856
2857
struct v3dv_pipeline_stage *p_stage =
2858
vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
2859
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2860
if (!p_stage)
2861
return VK_ERROR_OUT_OF_HOST_MEMORY;
2862
2863
p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2864
p_stage->pipeline = pipeline;
2865
p_stage->stage = gl_shader_stage_to_broadcom(stage);
2866
p_stage->entrypoint = sinfo->pName;
2867
p_stage->module = vk_shader_module_from_handle(sinfo->module);
2868
p_stage->spec_info = sinfo->pSpecializationInfo;
2869
2870
pipeline_hash_shader(p_stage->module,
2871
p_stage->entrypoint,
2872
stage,
2873
p_stage->spec_info,
2874
p_stage->shader_sha1);
2875
2876
/* We try to get directly the variant first from the cache */
2877
p_stage->nir = NULL;
2878
2879
pipeline->cs = p_stage;
2880
pipeline->active_stages |= sinfo->stage;
2881
2882
struct v3dv_pipeline_key pipeline_key;
2883
pipeline_populate_compute_key(pipeline, &pipeline_key, info);
2884
unsigned char pipeline_sha1[20];
2885
pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
2886
2887
pipeline->shared_data =
2888
v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
2889
2890
if (pipeline->shared_data != NULL) {
2891
assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
2892
goto success;
2893
}
2894
2895
pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
2896
pipeline,
2897
false);
2898
2899
/* If not found on cache, compile it */
2900
p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
2901
assert(p_stage->nir);
2902
2903
st_nir_opts(p_stage->nir);
2904
pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
2905
lower_cs_shared(p_stage->nir);
2906
2907
VkResult result = VK_SUCCESS;
2908
2909
struct v3d_key key;
2910
memset(&key, 0, sizeof(key));
2911
pipeline_populate_v3d_key(&key, p_stage, 0,
2912
pipeline->device->features.robustBufferAccess);
2913
pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
2914
pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
2915
alloc, &result);
2916
2917
if (result != VK_SUCCESS)
2918
return result;
2919
2920
if (!upload_assembly(pipeline))
2921
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2922
2923
v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2924
/* As we got the variants in pipeline->shared_data, after compiling we
2925
* don't need the pipeline_stages
2926
*/
2927
pipeline_free_stages(device, pipeline, alloc);
2928
2929
success:
2930
pipeline_check_spill_size(pipeline);
2931
2932
return VK_SUCCESS;
2933
}
2934
2935
static VkResult
2936
compute_pipeline_init(struct v3dv_pipeline *pipeline,
2937
struct v3dv_device *device,
2938
struct v3dv_pipeline_cache *cache,
2939
const VkComputePipelineCreateInfo *info,
2940
const VkAllocationCallbacks *alloc)
2941
{
2942
V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
2943
2944
pipeline->device = device;
2945
pipeline->layout = layout;
2946
2947
VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
2948
2949
return result;
2950
}
2951
2952
static VkResult
2953
compute_pipeline_create(VkDevice _device,
2954
VkPipelineCache _cache,
2955
const VkComputePipelineCreateInfo *pCreateInfo,
2956
const VkAllocationCallbacks *pAllocator,
2957
VkPipeline *pPipeline)
2958
{
2959
V3DV_FROM_HANDLE(v3dv_device, device, _device);
2960
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2961
2962
struct v3dv_pipeline *pipeline;
2963
VkResult result;
2964
2965
/* Use the default pipeline cache if none is specified */
2966
if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2967
cache = &device->default_pipeline_cache;
2968
2969
pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
2970
VK_OBJECT_TYPE_PIPELINE);
2971
if (pipeline == NULL)
2972
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2973
2974
result = compute_pipeline_init(pipeline, device, cache,
2975
pCreateInfo, pAllocator);
2976
if (result != VK_SUCCESS) {
2977
v3dv_destroy_pipeline(pipeline, device, pAllocator);
2978
return result;
2979
}
2980
2981
*pPipeline = v3dv_pipeline_to_handle(pipeline);
2982
2983
return VK_SUCCESS;
2984
}
2985
2986
VKAPI_ATTR VkResult VKAPI_CALL
2987
v3dv_CreateComputePipelines(VkDevice _device,
2988
VkPipelineCache pipelineCache,
2989
uint32_t createInfoCount,
2990
const VkComputePipelineCreateInfo *pCreateInfos,
2991
const VkAllocationCallbacks *pAllocator,
2992
VkPipeline *pPipelines)
2993
{
2994
V3DV_FROM_HANDLE(v3dv_device, device, _device);
2995
VkResult result = VK_SUCCESS;
2996
2997
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2998
mtx_lock(&device->pdevice->mutex);
2999
3000
for (uint32_t i = 0; i < createInfoCount; i++) {
3001
VkResult local_result;
3002
local_result = compute_pipeline_create(_device,
3003
pipelineCache,
3004
&pCreateInfos[i],
3005
pAllocator,
3006
&pPipelines[i]);
3007
3008
if (local_result != VK_SUCCESS) {
3009
result = local_result;
3010
pPipelines[i] = VK_NULL_HANDLE;
3011
}
3012
}
3013
3014
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3015
mtx_unlock(&device->pdevice->mutex);
3016
3017
return result;
3018
}
3019
3020