CoCalc -- v3dv_pipeline.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline.c
⁴⁵⁶⁰ views
1
/*
2
 * Copyright © 2019 Raspberry Pi
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "vk_util.h"
25

26
#include "v3dv_debug.h"
27
#include "v3dv_private.h"
28

29
#include "vk_format_info.h"
30

31
#include "common/v3d_debug.h"
32

33
#include "compiler/nir/nir_builder.h"
34
#include "nir/nir_serialize.h"
35

36
#include "util/u_atomic.h"
37

38
#include "vulkan/util/vk_format.h"
39

40
static VkResult
41
compute_vpm_config(struct v3dv_pipeline *pipeline);
42

43
void
44
v3dv_print_v3d_key(struct v3d_key *key,
45
                   uint32_t v3d_key_size)
46
{
47
   struct mesa_sha1 ctx;
48
   unsigned char sha1[20];
49
   char sha1buf[41];
50

51
   _mesa_sha1_init(&ctx);
52

53
   _mesa_sha1_update(&ctx, key, v3d_key_size);
54

55
   _mesa_sha1_final(&ctx, sha1);
56
   _mesa_sha1_format(sha1buf, sha1);
57

58
   fprintf(stderr, "key %p: %s\n", key, sha1buf);
59
}
60

61
static void
62
pipeline_compute_sha1_from_nir(nir_shader *nir,
63
                               unsigned char sha1[20])
64
{
65
   assert(nir);
66
   struct blob blob;
67
   blob_init(&blob);
68

69
   nir_serialize(&blob, nir, false);
70
   if (!blob.out_of_memory)
71
      _mesa_sha1_compute(blob.data, blob.size, sha1);
72

73
   blob_finish(&blob);
74
}
75

76
void
77
v3dv_shader_module_internal_init(struct v3dv_device *device,
78
                                 struct vk_shader_module *module,
79
                                 nir_shader *nir)
80
{
81
   vk_object_base_init(&device->vk, &module->base,
82
                       VK_OBJECT_TYPE_SHADER_MODULE);
83
   module->nir = nir;
84
   module->size = 0;
85

86
   pipeline_compute_sha1_from_nir(nir, module->sha1);
87
}
88

89
void
90
v3dv_shader_variant_destroy(struct v3dv_device *device,
91
                            struct v3dv_shader_variant *variant)
92
{
93
   /* The assembly BO is shared by all variants in the pipeline, so it can't
94
    * be freed here and should be freed with the pipeline
95
    */
96
   ralloc_free(variant->prog_data.base);
97
   vk_free(&device->vk.alloc, variant);
98
}
99

100
static void
101
destroy_pipeline_stage(struct v3dv_device *device,
102
                       struct v3dv_pipeline_stage *p_stage,
103
                       const VkAllocationCallbacks *pAllocator)
104
{
105
   if (!p_stage)
106
      return;
107

108
   ralloc_free(p_stage->nir);
109
   vk_free2(&device->vk.alloc, pAllocator, p_stage);
110
}
111

112
static void
113
pipeline_free_stages(struct v3dv_device *device,
114
                     struct v3dv_pipeline *pipeline,
115
                     const VkAllocationCallbacks *pAllocator)
116
{
117
   assert(pipeline);
118

119
   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
120
    * good to find an alternative.
121
    */
122
   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
123
   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
124
   destroy_pipeline_stage(device, pipeline->gs, pAllocator);
125
   destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
126
   destroy_pipeline_stage(device, pipeline->fs, pAllocator);
127
   destroy_pipeline_stage(device, pipeline->cs, pAllocator);
128

129
   pipeline->vs = NULL;
130
   pipeline->vs_bin = NULL;
131
   pipeline->gs = NULL;
132
   pipeline->gs_bin = NULL;
133
   pipeline->fs = NULL;
134
   pipeline->cs = NULL;
135
}
136

137
static void
138
v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
139
                      struct v3dv_device *device,
140
                      const VkAllocationCallbacks *pAllocator)
141
{
142
   if (!pipeline)
143
      return;
144

145
   pipeline_free_stages(device, pipeline, pAllocator);
146

147
   if (pipeline->shared_data) {
148
      v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
149
      pipeline->shared_data = NULL;
150
   }
151

152
   if (pipeline->spill.bo) {
153
      assert(pipeline->spill.size_per_thread > 0);
154
      v3dv_bo_free(device, pipeline->spill.bo);
155
   }
156

157
   if (pipeline->default_attribute_values) {
158
      v3dv_bo_free(device, pipeline->default_attribute_values);
159
      pipeline->default_attribute_values = NULL;
160
   }
161

162
   vk_object_free(&device->vk, pAllocator, pipeline);
163
}
164

165
VKAPI_ATTR void VKAPI_CALL
166
v3dv_DestroyPipeline(VkDevice _device,
167
                     VkPipeline _pipeline,
168
                     const VkAllocationCallbacks *pAllocator)
169
{
170
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
171
   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
172

173
   if (!pipeline)
174
      return;
175

176
   v3dv_destroy_pipeline(pipeline, device, pAllocator);
177
}
178

179
static const struct spirv_to_nir_options default_spirv_options =  {
180
   .caps = {
181
      .device_group = true,
182
      .variable_pointers = true,
183
      .subgroup_basic = true,
184
    },
185
   .ubo_addr_format = nir_address_format_32bit_index_offset,
186
   .ssbo_addr_format = nir_address_format_32bit_index_offset,
187
   .phys_ssbo_addr_format = nir_address_format_64bit_global,
188
   .push_const_addr_format = nir_address_format_logical,
189
   .shared_addr_format = nir_address_format_32bit_offset,
190
   .frag_coord_is_sysval = false,
191
};
192

193
const nir_shader_compiler_options v3dv_nir_options = {
194
   .lower_add_sat = true,
195
   .lower_all_io_to_temps = true,
196
   .lower_extract_byte = true,
197
   .lower_extract_word = true,
198
   .lower_insert_byte = true,
199
   .lower_insert_word = true,
200
   .lower_bitfield_insert_to_shifts = true,
201
   .lower_bitfield_extract_to_shifts = true,
202
   .lower_bitfield_reverse = true,
203
   .lower_bit_count = true,
204
   .lower_cs_local_id_from_index = true,
205
   .lower_ffract = true,
206
   .lower_fmod = true,
207
   .lower_pack_unorm_2x16 = true,
208
   .lower_pack_snorm_2x16 = true,
209
   .lower_unpack_unorm_2x16 = true,
210
   .lower_unpack_snorm_2x16 = true,
211
   .lower_pack_unorm_4x8 = true,
212
   .lower_pack_snorm_4x8 = true,
213
   .lower_unpack_unorm_4x8 = true,
214
   .lower_unpack_snorm_4x8 = true,
215
   .lower_pack_half_2x16 = true,
216
   .lower_unpack_half_2x16 = true,
217
   /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
218
    * get the tests to pass since it might produce slightly better code.
219
    */
220
   .lower_uadd_carry = true,
221
   .lower_usub_borrow = true,
222
   /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
223
    * without lowering.
224
    */
225
   .lower_mul_2x32_64 = true,
226
   .lower_fdiv = true,
227
   .lower_find_lsb = true,
228
   .lower_ffma16 = true,
229
   .lower_ffma32 = true,
230
   .lower_ffma64 = true,
231
   .lower_flrp32 = true,
232
   .lower_fpow = true,
233
   .lower_fsat = true,
234
   .lower_fsqrt = true,
235
   .lower_ifind_msb = true,
236
   .lower_isign = true,
237
   .lower_ldexp = true,
238
   .lower_mul_high = true,
239
   .lower_wpos_pntc = true,
240
   .lower_rotate = true,
241
   .lower_to_scalar = true,
242
   .lower_device_index_to_zero = true,
243
   .has_fsub = true,
244
   .has_isub = true,
245
   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
246
                                   * needs to be supported */
247
   .lower_interpolate_at = true,
248
   .max_unroll_iterations = 16,
249
   .divergence_analysis_options =
250
      nir_divergence_multiple_workgroup_per_compute_subgroup
251
};
252

253
const nir_shader_compiler_options *
254
v3dv_pipeline_get_nir_options(void)
255
{
256
   return &v3dv_nir_options;
257
}
258

259
#define OPT(pass, ...) ({                                  \
260
   bool this_progress = false;                             \
261
   NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
262
   if (this_progress)                                      \
263
      progress = true;                                     \
264
   this_progress;                                          \
265
})
266

267
static void
268
nir_optimize(nir_shader *nir,
269
             struct v3dv_pipeline_stage *stage,
270
             bool allow_copies)
271
{
272
   bool progress;
273

274
   do {
275
      progress = false;
276
      OPT(nir_split_array_vars, nir_var_function_temp);
277
      OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
278
      OPT(nir_opt_deref);
279
      OPT(nir_lower_vars_to_ssa);
280
      if (allow_copies) {
281
         /* Only run this pass in the first call to nir_optimize.  Later calls
282
          * assume that we've lowered away any copy_deref instructions and we
283
          * don't want to introduce any more.
284
          */
285
         OPT(nir_opt_find_array_copies);
286
      }
287
      OPT(nir_opt_copy_prop_vars);
288
      OPT(nir_opt_dead_write_vars);
289
      OPT(nir_opt_combine_stores, nir_var_all);
290

291
      OPT(nir_lower_alu_to_scalar, NULL, NULL);
292

293
      OPT(nir_copy_prop);
294
      OPT(nir_lower_phis_to_scalar, false);
295

296
      OPT(nir_copy_prop);
297
      OPT(nir_opt_dce);
298
      OPT(nir_opt_cse);
299
      OPT(nir_opt_combine_stores, nir_var_all);
300

301
      /* Passing 0 to the peephole select pass causes it to convert
302
       * if-statements that contain only move instructions in the branches
303
       * regardless of the count.
304
       *
305
       * Passing 1 to the peephole select pass causes it to convert
306
       * if-statements that contain at most a single ALU instruction (total)
307
       * in both branches.
308
       */
309
      OPT(nir_opt_peephole_select, 0, false, false);
310
      OPT(nir_opt_peephole_select, 8, false, true);
311

312
      OPT(nir_opt_intrinsics);
313
      OPT(nir_opt_idiv_const, 32);
314
      OPT(nir_opt_algebraic);
315
      OPT(nir_opt_constant_folding);
316

317
      OPT(nir_opt_dead_cf);
318

319
      OPT(nir_opt_if, false);
320
      OPT(nir_opt_conditional_discard);
321

322
      OPT(nir_opt_remove_phis);
323
      OPT(nir_opt_undef);
324
      OPT(nir_lower_pack);
325
   } while (progress);
326

327
   OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
328
}
329

330
static void
331
preprocess_nir(nir_shader *nir,
332
               struct v3dv_pipeline_stage *stage)
333
{
334
   /* Make sure we lower variable initializers on output variables so that
335
    * nir_remove_dead_variables below sees the corresponding stores
336
    */
337
   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
338

339
   /* Now that we've deleted all but the main function, we can go ahead and
340
    * lower the rest of the variable initializers.
341
    */
342
   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
343

344
   /* Split member structs.  We do this before lower_io_to_temporaries so that
345
    * it doesn't lower system values to temporaries by accident.
346
    */
347
   NIR_PASS_V(nir, nir_split_var_copies);
348
   NIR_PASS_V(nir, nir_split_per_member_structs);
349

350
   if (nir->info.stage == MESA_SHADER_FRAGMENT)
351
      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
352
   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
353
      NIR_PASS_V(nir, nir_lower_input_attachments,
354
                 &(nir_input_attachment_options) {
355
                    .use_fragcoord_sysval = false,
356
                       });
357
   }
358

359
   NIR_PASS_V(nir, nir_lower_explicit_io,
360
              nir_var_mem_push_const,
361
              nir_address_format_32bit_offset);
362

363
   NIR_PASS_V(nir, nir_lower_explicit_io,
364
              nir_var_mem_ubo | nir_var_mem_ssbo,
365
              nir_address_format_32bit_index_offset);
366

367
   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
368
              nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
369
              NULL);
370

371
   NIR_PASS_V(nir, nir_propagate_invariant, false);
372
   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
373
              nir_shader_get_entrypoint(nir), true, false);
374

375
   NIR_PASS_V(nir, nir_lower_system_values);
376
   NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
377

378
   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
379

380
   NIR_PASS_V(nir, nir_normalize_cubemap_coords);
381

382
   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
383

384
   NIR_PASS_V(nir, nir_split_var_copies);
385
   NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
386

387
   nir_optimize(nir, stage, true);
388

389
   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
390

391
   /* Lower a bunch of stuff */
392
   NIR_PASS_V(nir, nir_lower_var_copies);
393

394
   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
395

396
   NIR_PASS_V(nir, nir_lower_indirect_derefs,
397
              nir_var_function_temp, 2);
398

399
   NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
400
              nir_var_mem_ubo | nir_var_mem_ssbo,
401
              nir_lower_direct_array_deref_of_vec_load);
402

403
   NIR_PASS_V(nir, nir_lower_frexp);
404

405
   /* Get rid of split copies */
406
   nir_optimize(nir, stage, false);
407
}
408

409
/* FIXME: This is basically the same code at anv, tu and radv. Move to common
410
 * place?
411
 */
412
static struct nir_spirv_specialization*
413
vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
414
                          uint32_t *out_num_spec_entries)
415
{
416
   if (spec_info == NULL || spec_info->mapEntryCount == 0)
417
      return NULL;
418

419
   uint32_t num_spec_entries = spec_info->mapEntryCount;
420
   struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
421

422
   for (uint32_t i = 0; i < num_spec_entries; i++) {
423
      VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
424
      const void *data = spec_info->pData + entry.offset;
425
      assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
426

427
      spec_entries[i].id = spec_info->pMapEntries[i].constantID;
428
      switch (entry.size) {
429
      case 8:
430
         spec_entries[i].value.u64 = *(const uint64_t *)data;
431
         break;
432
      case 4:
433
         spec_entries[i].value.u32 = *(const uint32_t *)data;
434
         break;
435
      case 2:
436
         spec_entries[i].value.u16 = *(const uint16_t *)data;
437
         break;
438
      case 1:
439
         spec_entries[i].value.u8 = *(const uint8_t *)data;
440
         break;
441
      default:
442
         assert(!"Invalid spec constant size");
443
         break;
444
      }
445
   }
446

447
   *out_num_spec_entries = num_spec_entries;
448
   return spec_entries;
449
}
450

451
static nir_shader *
452
shader_module_compile_to_nir(struct v3dv_device *device,
453
                             struct v3dv_pipeline_stage *stage)
454
{
455
   nir_shader *nir;
456
   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
457

458
   if (!stage->module->nir) {
459
      uint32_t *spirv = (uint32_t *) stage->module->data;
460
      assert(stage->module->size % 4 == 0);
461

462
      if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
463
         v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
464

465
      uint32_t num_spec_entries = 0;
466
      struct nir_spirv_specialization *spec_entries =
467
         vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
468
      const struct spirv_to_nir_options spirv_options = default_spirv_options;
469
      nir = spirv_to_nir(spirv, stage->module->size / 4,
470
                         spec_entries, num_spec_entries,
471
                         broadcom_shader_stage_to_gl(stage->stage),
472
                         stage->entrypoint,
473
                         &spirv_options, nir_options);
474
      assert(nir);
475
      nir_validate_shader(nir, "after spirv_to_nir");
476
      free(spec_entries);
477
   } else {
478
      /* For NIR modules created by the driver we can't consume the NIR
479
       * directly, we need to clone it first, since ownership of the NIR code
480
       * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
481
       * of the module and modules can be destroyed immediately after been used
482
       * to create pipelines.
483
       */
484
      nir = nir_shader_clone(NULL, stage->module->nir);
485
      nir_validate_shader(nir, "nir module");
486
   }
487
   assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
488

489
   if (V3D_DEBUG & (V3D_DEBUG_NIR |
490
                    v3d_debug_flag_for_shader_stage(
491
                       broadcom_shader_stage_to_gl(stage->stage)))) {
492
      fprintf(stderr, "Initial form: %s prog %d NIR:\n",
493
              broadcom_shader_stage_name(stage->stage),
494
              stage->program_id);
495
      nir_print_shader(nir, stderr);
496
      fprintf(stderr, "\n");
497
   }
498

499
   /* We have to lower away local variable initializers right before we
500
    * inline functions.  That way they get properly initialized at the top
501
    * of the function and not at the top of its caller.
502
    */
503
   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
504
   NIR_PASS_V(nir, nir_lower_returns);
505
   NIR_PASS_V(nir, nir_inline_functions);
506
   NIR_PASS_V(nir, nir_opt_deref);
507

508
   /* Pick off the single entrypoint that we want */
509
   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
510
      if (func->is_entrypoint)
511
         func->name = ralloc_strdup(func, "main");
512
      else
513
         exec_node_remove(&func->node);
514
   }
515
   assert(exec_list_length(&nir->functions) == 1);
516

517
   /* Vulkan uses the separate-shader linking model */
518
   nir->info.separate_shader = true;
519

520
   preprocess_nir(nir, stage);
521

522
   return nir;
523
}
524

525
static int
526
type_size_vec4(const struct glsl_type *type, bool bindless)
527
{
528
   return glsl_count_attribute_slots(type, false);
529
}
530

531
/* FIXME: the number of parameters for this method is somewhat big. Perhaps
532
 * rethink.
533
 */
534
static unsigned
535
descriptor_map_add(struct v3dv_descriptor_map *map,
536
                   int set,
537
                   int binding,
538
                   int array_index,
539
                   int array_size,
540
                   uint8_t return_size)
541
{
542
   assert(array_index < array_size);
543
   assert(return_size == 16 || return_size == 32);
544

545
   unsigned index = 0;
546
   for (unsigned i = 0; i < map->num_desc; i++) {
547
      if (set == map->set[i] &&
548
          binding == map->binding[i] &&
549
          array_index == map->array_index[i]) {
550
         assert(array_size == map->array_size[i]);
551
         if (return_size != map->return_size[index]) {
552
            /* It the return_size is different it means that the same sampler
553
             * was used for operations with different precision
554
             * requirement. In this case we need to ensure that we use the
555
             * larger one.
556
             */
557
            map->return_size[index] = 32;
558
         }
559
         return index;
560
      }
561
      index++;
562
   }
563

564
   assert(index == map->num_desc);
565

566
   map->set[map->num_desc] = set;
567
   map->binding[map->num_desc] = binding;
568
   map->array_index[map->num_desc] = array_index;
569
   map->array_size[map->num_desc] = array_size;
570
   map->return_size[map->num_desc] = return_size;
571
   map->num_desc++;
572

573
   return index;
574
}
575

576

577
static void
578
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
579
                         struct v3dv_pipeline *pipeline)
580
{
581
   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
582
   instr->intrinsic = nir_intrinsic_load_uniform;
583
}
584

585
static struct v3dv_descriptor_map*
586
pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
587
                            VkDescriptorType desc_type,
588
                            gl_shader_stage gl_stage,
589
                            bool is_sampler)
590
{
591
   enum broadcom_shader_stage broadcom_stage =
592
      gl_shader_stage_to_broadcom(gl_stage);
593

594
   assert(pipeline->shared_data &&
595
          pipeline->shared_data->maps[broadcom_stage]);
596

597
   switch(desc_type) {
598
   case VK_DESCRIPTOR_TYPE_SAMPLER:
599
      return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
600
   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
601
   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
602
   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
603
   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
604
   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
605
      return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
606
   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
607
      return is_sampler ?
608
         &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
609
         &pipeline->shared_data->maps[broadcom_stage]->texture_map;
610
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
611
      return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
612
   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
613
      return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
614
   default:
615
      unreachable("Descriptor type unknown or not having a descriptor map");
616
   }
617
}
618

619
/* Gathers info from the intrinsic (set and binding) and then lowers it so it
620
 * could be used by the v3d_compiler */
621
static void
622
lower_vulkan_resource_index(nir_builder *b,
623
                            nir_intrinsic_instr *instr,
624
                            nir_shader *shader,
625
                            struct v3dv_pipeline *pipeline,
626
                            const struct v3dv_pipeline_layout *layout)
627
{
628
   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
629

630
   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
631

632
   unsigned set = nir_intrinsic_desc_set(instr);
633
   unsigned binding = nir_intrinsic_binding(instr);
634
   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
635
   struct v3dv_descriptor_set_binding_layout *binding_layout =
636
      &set_layout->binding[binding];
637
   unsigned index = 0;
638
   const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
639

640
   switch (desc_type) {
641
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
642
   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
643
      struct v3dv_descriptor_map *descriptor_map =
644
         pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
645

646
      if (!const_val)
647
         unreachable("non-constant vulkan_resource_index array index");
648

649
      index = descriptor_map_add(descriptor_map, set, binding,
650
                                 const_val->u32,
651
                                 binding_layout->array_size,
652
                                 32 /* return_size: doesn't really apply for this case */);
653

654
      if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
655
         /* skip index 0 which is used for push constants */
656
         index++;
657
      }
658
      break;
659
   }
660

661
   default:
662
      unreachable("unsupported desc_type for vulkan_resource_index");
663
      break;
664
   }
665

666
   /* Since we use the deref pass, both vulkan_resource_index and
667
    * vulkan_load_descriptor return a vec2 providing an index and
668
    * offset. Our backend compiler only cares about the index part.
669
    */
670
   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
671
                            nir_imm_ivec2(b, index, 0));
672
   nir_instr_remove(&instr->instr);
673
}
674

675
/* Returns return_size, so it could be used for the case of not having a
676
 * sampler object
677
 */
678
static uint8_t
679
lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
680
                        nir_shader *shader,
681
                        struct v3dv_pipeline *pipeline,
682
                        const struct v3dv_pipeline_layout *layout)
683
{
684
   nir_ssa_def *index = NULL;
685
   unsigned base_index = 0;
686
   unsigned array_elements = 1;
687
   nir_tex_src *src = &instr->src[src_idx];
688
   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
689

690
   /* We compute first the offsets */
691
   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
692
   while (deref->deref_type != nir_deref_type_var) {
693
      assert(deref->parent.is_ssa);
694
      nir_deref_instr *parent =
695
         nir_instr_as_deref(deref->parent.ssa->parent_instr);
696

697
      assert(deref->deref_type == nir_deref_type_array);
698

699
      if (nir_src_is_const(deref->arr.index) && index == NULL) {
700
         /* We're still building a direct index */
701
         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
702
      } else {
703
         if (index == NULL) {
704
            /* We used to be direct but not anymore */
705
            index = nir_imm_int(b, base_index);
706
            base_index = 0;
707
         }
708

709
         index = nir_iadd(b, index,
710
                          nir_imul(b, nir_imm_int(b, array_elements),
711
                                   nir_ssa_for_src(b, deref->arr.index, 1)));
712
      }
713

714
      array_elements *= glsl_get_length(parent->type);
715

716
      deref = parent;
717
   }
718

719
   if (index)
720
      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
721

722
   /* We have the offsets, we apply them, rewriting the source or removing
723
    * instr if needed
724
    */
725
   if (index) {
726
      nir_instr_rewrite_src(&instr->instr, &src->src,
727
                            nir_src_for_ssa(index));
728

729
      src->src_type = is_sampler ?
730
         nir_tex_src_sampler_offset :
731
         nir_tex_src_texture_offset;
732
   } else {
733
      nir_tex_instr_remove_src(instr, src_idx);
734
   }
735

736
   uint32_t set = deref->var->data.descriptor_set;
737
   uint32_t binding = deref->var->data.binding;
738
   /* FIXME: this is a really simplified check for the precision to be used
739
    * for the sampling. Right now we are ony checking for the variables used
740
    * on the operation itself, but there are other cases that we could use to
741
    * infer the precision requirement.
742
    */
743
   bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
744
                            deref->var->data.precision == GLSL_PRECISION_LOW;
745
   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
746
   struct v3dv_descriptor_set_binding_layout *binding_layout =
747
      &set_layout->binding[binding];
748

749
   /* For input attachments, the shader includes the attachment_idx. As we are
750
    * treating them as a texture, we only want the base_index
751
    */
752
   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
753
      deref->var->data.index + base_index :
754
      base_index;
755

756
   uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
757

758
   struct v3dv_descriptor_map *map =
759
      pipeline_get_descriptor_map(pipeline, binding_layout->type,
760
                                  shader->info.stage, is_sampler);
761
   int desc_index =
762
      descriptor_map_add(map,
763
                         deref->var->data.descriptor_set,
764
                         deref->var->data.binding,
765
                         array_index,
766
                         binding_layout->array_size,
767
                         return_size);
768

769
   if (is_sampler)
770
      instr->sampler_index = desc_index;
771
   else
772
      instr->texture_index = desc_index;
773

774
   return return_size;
775
}
776

777
static bool
778
lower_sampler(nir_builder *b, nir_tex_instr *instr,
779
              nir_shader *shader,
780
              struct v3dv_pipeline *pipeline,
781
              const struct v3dv_pipeline_layout *layout)
782
{
783
   uint8_t return_size = 0;
784

785
   int texture_idx =
786
      nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
787

788
   if (texture_idx >= 0)
789
      return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
790
                                            pipeline, layout);
791

792
   int sampler_idx =
793
      nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
794

795
   if (sampler_idx >= 0)
796
      lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
797

798
   if (texture_idx < 0 && sampler_idx < 0)
799
      return false;
800

801
   /* If we don't have a sampler, we assign it the idx we reserve for this
802
    * case, and we ensure that it is using the correct return size.
803
    */
804
   if (sampler_idx < 0) {
805
      instr->sampler_index = return_size == 16 ?
806
         V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
807
   }
808

809
   return true;
810
}
811

812
/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
813
static void
814
lower_image_deref(nir_builder *b,
815
                  nir_intrinsic_instr *instr,
816
                  nir_shader *shader,
817
                  struct v3dv_pipeline *pipeline,
818
                  const struct v3dv_pipeline_layout *layout)
819
{
820
   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
821
   nir_ssa_def *index = NULL;
822
   unsigned array_elements = 1;
823
   unsigned base_index = 0;
824

825
   while (deref->deref_type != nir_deref_type_var) {
826
      assert(deref->parent.is_ssa);
827
      nir_deref_instr *parent =
828
         nir_instr_as_deref(deref->parent.ssa->parent_instr);
829

830
      assert(deref->deref_type == nir_deref_type_array);
831

832
      if (nir_src_is_const(deref->arr.index) && index == NULL) {
833
         /* We're still building a direct index */
834
         base_index += nir_src_as_uint(deref->arr.index) * array_elements;
835
      } else {
836
         if (index == NULL) {
837
            /* We used to be direct but not anymore */
838
            index = nir_imm_int(b, base_index);
839
            base_index = 0;
840
         }
841

842
         index = nir_iadd(b, index,
843
                          nir_imul(b, nir_imm_int(b, array_elements),
844
                                   nir_ssa_for_src(b, deref->arr.index, 1)));
845
      }
846

847
      array_elements *= glsl_get_length(parent->type);
848

849
      deref = parent;
850
   }
851

852
   if (index)
853
      index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
854

855
   uint32_t set = deref->var->data.descriptor_set;
856
   uint32_t binding = deref->var->data.binding;
857
   struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
858
   struct v3dv_descriptor_set_binding_layout *binding_layout =
859
      &set_layout->binding[binding];
860

861
   uint32_t array_index = deref->var->data.index + base_index;
862

863
   assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
864
          binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
865

866
   struct v3dv_descriptor_map *map =
867
      pipeline_get_descriptor_map(pipeline, binding_layout->type,
868
                                  shader->info.stage, false);
869

870
   int desc_index =
871
      descriptor_map_add(map,
872
                         deref->var->data.descriptor_set,
873
                         deref->var->data.binding,
874
                         array_index,
875
                         binding_layout->array_size,
876
                         32 /* return_size: doesn't apply for textures */);
877

878
   /* Note: we don't need to do anything here in relation to the precision and
879
    * the output size because for images we can infer that info from the image
880
    * intrinsic, that includes the image format (see
881
    * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
882
    */
883

884
   index = nir_imm_int(b, desc_index);
885

886
   nir_rewrite_image_intrinsic(instr, index, false);
887
}
888

889
static bool
890
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
891
                nir_shader *shader,
892
                struct v3dv_pipeline *pipeline,
893
                const struct v3dv_pipeline_layout *layout)
894
{
895
   switch (instr->intrinsic) {
896
   case nir_intrinsic_load_layer_id:
897
      /* FIXME: if layered rendering gets supported, this would need a real
898
       * lowering
899
       */
900
      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
901
                               nir_imm_int(b, 0));
902
      nir_instr_remove(&instr->instr);
903
      return true;
904

905
   case nir_intrinsic_load_push_constant:
906
      lower_load_push_constant(b, instr, pipeline);
907
      return true;
908

909
   case nir_intrinsic_vulkan_resource_index:
910
      lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
911
      return true;
912

913
   case nir_intrinsic_load_vulkan_descriptor: {
914
      /* Loading the descriptor happens as part of load/store instructions,
915
       * so for us this is a no-op.
916
       */
917
      nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
918
      nir_instr_remove(&instr->instr);
919
      return true;
920
   }
921

922
   case nir_intrinsic_image_deref_load:
923
   case nir_intrinsic_image_deref_store:
924
   case nir_intrinsic_image_deref_atomic_add:
925
   case nir_intrinsic_image_deref_atomic_imin:
926
   case nir_intrinsic_image_deref_atomic_umin:
927
   case nir_intrinsic_image_deref_atomic_imax:
928
   case nir_intrinsic_image_deref_atomic_umax:
929
   case nir_intrinsic_image_deref_atomic_and:
930
   case nir_intrinsic_image_deref_atomic_or:
931
   case nir_intrinsic_image_deref_atomic_xor:
932
   case nir_intrinsic_image_deref_atomic_exchange:
933
   case nir_intrinsic_image_deref_atomic_comp_swap:
934
   case nir_intrinsic_image_deref_size:
935
   case nir_intrinsic_image_deref_samples:
936
      lower_image_deref(b, instr, shader, pipeline, layout);
937
      return true;
938

939
   default:
940
      return false;
941
   }
942
}
943

944
static bool
945
lower_impl(nir_function_impl *impl,
946
           nir_shader *shader,
947
           struct v3dv_pipeline *pipeline,
948
           const struct v3dv_pipeline_layout *layout)
949
{
950
   nir_builder b;
951
   nir_builder_init(&b, impl);
952
   bool progress = false;
953

954
   nir_foreach_block(block, impl) {
955
      nir_foreach_instr_safe(instr, block) {
956
         b.cursor = nir_before_instr(instr);
957
         switch (instr->type) {
958
         case nir_instr_type_tex:
959
            progress |=
960
               lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
961
            break;
962
         case nir_instr_type_intrinsic:
963
            progress |=
964
               lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
965
                               pipeline, layout);
966
            break;
967
         default:
968
            break;
969
         }
970
      }
971
   }
972

973
   return progress;
974
}
975

976
static bool
977
lower_pipeline_layout_info(nir_shader *shader,
978
                           struct v3dv_pipeline *pipeline,
979
                           const struct v3dv_pipeline_layout *layout)
980
{
981
   bool progress = false;
982

983
   nir_foreach_function(function, shader) {
984
      if (function->impl)
985
         progress |= lower_impl(function->impl, shader, pipeline, layout);
986
   }
987

988
   return progress;
989
}
990

991

992
static void
993
lower_fs_io(nir_shader *nir)
994
{
995
   /* Our backend doesn't handle array fragment shader outputs */
996
   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
997
   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
998

999
   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1000
                               MESA_SHADER_FRAGMENT);
1001

1002
   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1003
                               MESA_SHADER_FRAGMENT);
1004

1005
   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
1006
              type_size_vec4, 0);
1007
}
1008

1009
static void
1010
lower_gs_io(struct nir_shader *nir)
1011
{
1012
   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1013

1014
   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1015
                               MESA_SHADER_GEOMETRY);
1016

1017
   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1018
                               MESA_SHADER_GEOMETRY);
1019
}
1020

1021
static void
1022
lower_vs_io(struct nir_shader *nir)
1023
{
1024
   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1025

1026
   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
1027
                               MESA_SHADER_VERTEX);
1028

1029
   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1030
                               MESA_SHADER_VERTEX);
1031

1032
   /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1033
    * overlaps with v3d_nir_lower_io. Need further research though.
1034
    */
1035
}
1036

1037
static void
1038
shader_debug_output(const char *message, void *data)
1039
{
1040
   /* FIXME: We probably don't want to debug anything extra here, and in fact
1041
    * the compiler is not using this callback too much, only as an alternative
1042
    * way to debug out the shaderdb stats, that you can already get using
1043
    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1044
    * compiler to remove that callback.
1045
    */
1046
}
1047

1048
static void
1049
pipeline_populate_v3d_key(struct v3d_key *key,
1050
                          const struct v3dv_pipeline_stage *p_stage,
1051
                          uint32_t ucp_enables,
1052
                          bool robust_buffer_access)
1053
{
1054
   assert(p_stage->pipeline->shared_data &&
1055
          p_stage->pipeline->shared_data->maps[p_stage->stage]);
1056

1057
   /* The following values are default values used at pipeline create. We use
1058
    * there 32 bit as default return size.
1059
    */
1060
   struct v3dv_descriptor_map *sampler_map =
1061
      &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1062
   struct v3dv_descriptor_map *texture_map =
1063
      &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1064

1065
   key->num_tex_used = texture_map->num_desc;
1066
   assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1067
   for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1068
      key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1069
      key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1070
      key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1071
      key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1072
   }
1073

1074
   key->num_samplers_used = sampler_map->num_desc;
1075
   assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1076
   for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1077
        sampler_idx++) {
1078
      key->sampler[sampler_idx].return_size =
1079
         sampler_map->return_size[sampler_idx];
1080

1081
      key->sampler[sampler_idx].return_channels =
1082
         key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1083
   }
1084

1085
   switch (p_stage->stage) {
1086
   case BROADCOM_SHADER_VERTEX:
1087
   case BROADCOM_SHADER_VERTEX_BIN:
1088
      key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1089
      break;
1090
   case BROADCOM_SHADER_GEOMETRY:
1091
   case BROADCOM_SHADER_GEOMETRY_BIN:
1092
      /* FIXME: while we don't implement tessellation shaders */
1093
      key->is_last_geometry_stage = true;
1094
      break;
1095
   case BROADCOM_SHADER_FRAGMENT:
1096
   case BROADCOM_SHADER_COMPUTE:
1097
      key->is_last_geometry_stage = false;
1098
      break;
1099
   default:
1100
      unreachable("unsupported shader stage");
1101
   }
1102

1103
   /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1104
    * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1105
    * takes care of adding a single compact array variable at
1106
    * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1107
    *
1108
    * The only lowering we are interested is specific to the fragment shader,
1109
    * where we want to emit discards to honor writes to gl_ClipDistance[] in
1110
    * previous stages. This is done via nir_lower_clip_fs() so we only set up
1111
    * the ucp enable mask for that stage.
1112
    */
1113
   key->ucp_enables = ucp_enables;
1114

1115
   key->robust_buffer_access = robust_buffer_access;
1116

1117
   key->environment = V3D_ENVIRONMENT_VULKAN;
1118
}
1119

1120
/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1121
 * same. For not using prim_mode that is the one already used on v3d
1122
 */
1123
static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1124
   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1125
   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1126
   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1127
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1128
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1129
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1130
   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1131
   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1132
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1133
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1134
};
1135

1136
static const enum pipe_logicop vk_to_pipe_logicop[] = {
1137
   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1138
   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1139
   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1140
   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1141
   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1142
   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1143
   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1144
   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1145
   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1146
   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1147
   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1148
   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1149
   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1150
   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1151
   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1152
   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1153
};
1154

1155
static void
1156
pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1157
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1158
                             const struct v3dv_pipeline_stage *p_stage,
1159
                             bool has_geometry_shader,
1160
                             uint32_t ucp_enables)
1161
{
1162
   assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1163

1164
   memset(key, 0, sizeof(*key));
1165

1166
   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1167
   pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1168

1169
   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1170
      pCreateInfo->pInputAssemblyState;
1171
   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1172

1173
   key->is_points = (topology == PIPE_PRIM_POINTS);
1174
   key->is_lines = (topology >= PIPE_PRIM_LINES &&
1175
                    topology <= PIPE_PRIM_LINE_STRIP);
1176
   key->has_gs = has_geometry_shader;
1177

1178
   const VkPipelineColorBlendStateCreateInfo *cb_info =
1179
      pCreateInfo->pColorBlendState;
1180

1181
   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1182
                       vk_to_pipe_logicop[cb_info->logicOp] :
1183
                       PIPE_LOGICOP_COPY;
1184

1185
   const bool raster_enabled =
1186
      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1187

1188
   /* Multisample rasterization state must be ignored if rasterization
1189
    * is disabled.
1190
    */
1191
   const VkPipelineMultisampleStateCreateInfo *ms_info =
1192
      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1193
   if (ms_info) {
1194
      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1195
             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1196
      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1197

1198
      if (key->msaa) {
1199
         key->sample_coverage =
1200
            p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1201
         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1202
         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1203
      }
1204
   }
1205

1206
   /* This is intended for V3D versions before 4.1, otherwise we just use the
1207
    * tile buffer load/store swap R/B bit.
1208
    */
1209
   key->swap_color_rb = 0;
1210

1211
   const struct v3dv_render_pass *pass =
1212
      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1213
   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1214
   for (uint32_t i = 0; i < subpass->color_count; i++) {
1215
      const uint32_t att_idx = subpass->color_attachments[i].attachment;
1216
      if (att_idx == VK_ATTACHMENT_UNUSED)
1217
         continue;
1218

1219
      key->cbufs |= 1 << i;
1220

1221
      VkFormat fb_format = pass->attachments[att_idx].desc.format;
1222
      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1223

1224
      /* If logic operations are enabled then we might emit color reads and we
1225
       * need to know the color buffer format and swizzle for that
1226
       */
1227
      if (key->logicop_func != PIPE_LOGICOP_COPY) {
1228
         key->color_fmt[i].format = fb_pipe_format;
1229
         key->color_fmt[i].swizzle =
1230
            v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
1231
      }
1232

1233
      const struct util_format_description *desc =
1234
         vk_format_description(fb_format);
1235

1236
      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1237
          desc->channel[0].size == 32) {
1238
         key->f32_color_rb |= 1 << i;
1239
      }
1240

1241
      if (p_stage->nir->info.fs.untyped_color_outputs) {
1242
         if (util_format_is_pure_uint(fb_pipe_format))
1243
            key->uint_color_rb |= 1 << i;
1244
         else if (util_format_is_pure_sint(fb_pipe_format))
1245
            key->int_color_rb |= 1 << i;
1246
      }
1247

1248
      if (key->is_points) {
1249
         /* FIXME: The mask would need to be computed based on the shader
1250
          * inputs. On gallium it is done at st_atom_rasterizer
1251
          * (sprite_coord_enable). anv seems (need to confirm) to do that on
1252
          * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1253
          * better to have tests to guide filling the mask.
1254
          */
1255
         key->point_sprite_mask = 0;
1256

1257
         /* Vulkan mandates upper left. */
1258
         key->point_coord_upper_left = true;
1259
      }
1260
   }
1261
}
1262

1263
static void
1264
setup_stage_outputs_from_next_stage_inputs(
1265
   uint8_t next_stage_num_inputs,
1266
   struct v3d_varying_slot *next_stage_input_slots,
1267
   uint8_t *num_used_outputs,
1268
   struct v3d_varying_slot *used_output_slots,
1269
   uint32_t size_of_used_output_slots)
1270
{
1271
   *num_used_outputs = next_stage_num_inputs;
1272
   memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1273
}
1274

1275
static void
1276
pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1277
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1278
                             const struct v3dv_pipeline_stage *p_stage)
1279
{
1280
   assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1281
          p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1282

1283
   memset(key, 0, sizeof(*key));
1284

1285
   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1286
   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1287

1288
   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1289

1290
   key->per_vertex_point_size =
1291
      p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1292

1293
   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1294

1295
   assert(key->base.is_last_geometry_stage);
1296
   if (key->is_coord) {
1297
      /* Output varyings in the last binning shader are only used for transform
1298
       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1299
       */
1300
      key->num_used_outputs = 0;
1301
   } else {
1302
      struct v3dv_shader_variant *fs_variant =
1303
         pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1304

1305
      STATIC_ASSERT(sizeof(key->used_outputs) ==
1306
                    sizeof(fs_variant->prog_data.fs->input_slots));
1307

1308
      setup_stage_outputs_from_next_stage_inputs(
1309
         fs_variant->prog_data.fs->num_inputs,
1310
         fs_variant->prog_data.fs->input_slots,
1311
         &key->num_used_outputs,
1312
         key->used_outputs,
1313
         sizeof(key->used_outputs));
1314
   }
1315
}
1316

1317
static void
1318
pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1319
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1320
                             const struct v3dv_pipeline_stage *p_stage)
1321
{
1322
   assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1323
          p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1324

1325
   memset(key, 0, sizeof(*key));
1326

1327
   const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1328
   pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1329

1330
   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1331

1332
   /* Vulkan specifies a point size per vertex, so true for if the prim are
1333
    * points, like on ES2)
1334
    */
1335
   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1336
      pCreateInfo->pInputAssemblyState;
1337
   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1338

1339
   /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1340
    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1341
   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1342

1343
   key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1344

1345
   if (key->is_coord) { /* Binning VS*/
1346
      if (key->base.is_last_geometry_stage) {
1347
         /* Output varyings in the last binning shader are only used for
1348
          * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1349
          * supported.
1350
          */
1351
         key->num_used_outputs = 0;
1352
      } else {
1353
         /* Linking against GS binning program */
1354
         assert(pipeline->gs);
1355
         struct v3dv_shader_variant *gs_bin_variant =
1356
            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1357

1358
         STATIC_ASSERT(sizeof(key->used_outputs) ==
1359
                       sizeof(gs_bin_variant->prog_data.gs->input_slots));
1360

1361
         setup_stage_outputs_from_next_stage_inputs(
1362
            gs_bin_variant->prog_data.gs->num_inputs,
1363
            gs_bin_variant->prog_data.gs->input_slots,
1364
            &key->num_used_outputs,
1365
            key->used_outputs,
1366
            sizeof(key->used_outputs));
1367
      }
1368
   } else { /* Render VS */
1369
      if (pipeline->gs) {
1370
         /* Linking against GS render program */
1371
         struct v3dv_shader_variant *gs_variant =
1372
            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1373

1374
         STATIC_ASSERT(sizeof(key->used_outputs) ==
1375
                       sizeof(gs_variant->prog_data.gs->input_slots));
1376

1377
         setup_stage_outputs_from_next_stage_inputs(
1378
            gs_variant->prog_data.gs->num_inputs,
1379
            gs_variant->prog_data.gs->input_slots,
1380
            &key->num_used_outputs,
1381
            key->used_outputs,
1382
            sizeof(key->used_outputs));
1383
      } else {
1384
         /* Linking against FS program */
1385
         struct v3dv_shader_variant *fs_variant =
1386
            pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1387

1388
         STATIC_ASSERT(sizeof(key->used_outputs) ==
1389
                       sizeof(fs_variant->prog_data.fs->input_slots));
1390

1391
         setup_stage_outputs_from_next_stage_inputs(
1392
            fs_variant->prog_data.fs->num_inputs,
1393
            fs_variant->prog_data.fs->input_slots,
1394
            &key->num_used_outputs,
1395
            key->used_outputs,
1396
            sizeof(key->used_outputs));
1397
      }
1398
   }
1399

1400
   const VkPipelineVertexInputStateCreateInfo *vi_info =
1401
      pCreateInfo->pVertexInputState;
1402
   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1403
      const VkVertexInputAttributeDescription *desc =
1404
         &vi_info->pVertexAttributeDescriptions[i];
1405
      assert(desc->location < MAX_VERTEX_ATTRIBS);
1406
      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1407
         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1408
   }
1409
}
1410

1411
/**
1412
 * Creates the initial form of the pipeline stage for a binning shader by
1413
 * cloning the render shader and flagging it as a coordinate shader.
1414
 *
1415
 * Returns NULL if it was not able to allocate the object, so it should be
1416
 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1417
 */
1418
static struct v3dv_pipeline_stage *
1419
pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1420
                              const VkAllocationCallbacks *pAllocator)
1421
{
1422
   struct v3dv_device *device = src->pipeline->device;
1423

1424
   struct v3dv_pipeline_stage *p_stage =
1425
      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1426
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1427

1428
   if (p_stage == NULL)
1429
      return NULL;
1430

1431
   assert(src->stage == BROADCOM_SHADER_VERTEX ||
1432
          src->stage == BROADCOM_SHADER_GEOMETRY);
1433

1434
   enum broadcom_shader_stage bin_stage =
1435
      src->stage == BROADCOM_SHADER_VERTEX ?
1436
         BROADCOM_SHADER_VERTEX_BIN :
1437
         BROADCOM_SHADER_GEOMETRY_BIN;
1438

1439
   p_stage->pipeline = src->pipeline;
1440
   p_stage->stage = bin_stage;
1441
   p_stage->entrypoint = src->entrypoint;
1442
   p_stage->module = src->module;
1443
   p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL;
1444
   p_stage->spec_info = src->spec_info;
1445
   memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1446

1447
   return p_stage;
1448
}
1449

1450
/**
1451
 * Returns false if it was not able to allocate or map the assembly bo memory.
1452
 */
1453
static bool
1454
upload_assembly(struct v3dv_pipeline *pipeline)
1455
{
1456
   uint32_t total_size = 0;
1457
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1458
      struct v3dv_shader_variant *variant =
1459
         pipeline->shared_data->variants[stage];
1460

1461
      if (variant != NULL)
1462
         total_size += variant->qpu_insts_size;
1463
   }
1464

1465
   struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1466
                                      "pipeline shader assembly", true);
1467
   if (!bo) {
1468
      fprintf(stderr, "failed to allocate memory for shader\n");
1469
      return false;
1470
   }
1471

1472
   bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1473
   if (!ok) {
1474
      fprintf(stderr, "failed to map source shader buffer\n");
1475
      return false;
1476
   }
1477

1478
   uint32_t offset = 0;
1479
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1480
      struct v3dv_shader_variant *variant =
1481
         pipeline->shared_data->variants[stage];
1482

1483
      if (variant != NULL) {
1484
         variant->assembly_offset = offset;
1485

1486
         memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1487
         offset += variant->qpu_insts_size;
1488

1489
         /* We dont need qpu_insts anymore. */
1490
         free(variant->qpu_insts);
1491
         variant->qpu_insts = NULL;
1492
      }
1493
   }
1494
   assert(total_size == offset);
1495

1496
   pipeline->shared_data->assembly_bo = bo;
1497

1498
   return true;
1499
}
1500

1501
static void
1502
pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1503
                       struct v3dv_pipeline_key *key,
1504
                       unsigned char *sha1_out)
1505
{
1506
   struct mesa_sha1 ctx;
1507
   _mesa_sha1_init(&ctx);
1508

1509
   /* We need to include all shader stages in the sha1 key as linking may modify
1510
    * the shader code in any stage. An alternative would be to use the
1511
    * serialized NIR, but that seems like an overkill.
1512
    */
1513
   _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1514
                     sizeof(pipeline->vs->shader_sha1));
1515

1516
   if (pipeline->gs) {
1517
      _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1518
                        sizeof(pipeline->gs->shader_sha1));
1519
   }
1520

1521
   _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1522
                     sizeof(pipeline->fs->shader_sha1));
1523

1524
   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1525

1526
   _mesa_sha1_final(&ctx, sha1_out);
1527
}
1528

1529
static void
1530
pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1531
                      struct v3dv_pipeline_key *key,
1532
                      unsigned char *sha1_out)
1533
{
1534
   struct mesa_sha1 ctx;
1535
   _mesa_sha1_init(&ctx);
1536

1537
   _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1538
                     sizeof(pipeline->cs->shader_sha1));
1539

1540
   _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1541

1542
   _mesa_sha1_final(&ctx, sha1_out);
1543
}
1544

1545
/* Checks that the pipeline has enough spill size to use for any of their
1546
 * variants
1547
 */
1548
static void
1549
pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1550
{
1551
   uint32_t max_spill_size = 0;
1552

1553
   for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1554
      struct v3dv_shader_variant *variant =
1555
         pipeline->shared_data->variants[stage];
1556

1557
      if (variant != NULL) {
1558
         max_spill_size = MAX2(variant->prog_data.base->spill_size,
1559
                               max_spill_size);
1560
      }
1561
   }
1562

1563
   if (max_spill_size > 0) {
1564
      struct v3dv_device *device = pipeline->device;
1565

1566
      /* The TIDX register we use for choosing the area to access
1567
       * for scratch space is: (core << 6) | (qpu << 2) | thread.
1568
       * Even at minimum threadcount in a particular shader, that
1569
       * means we still multiply by qpus by 4.
1570
       */
1571
      const uint32_t total_spill_size =
1572
         4 * device->devinfo.qpu_count * max_spill_size;
1573
      if (pipeline->spill.bo) {
1574
         assert(pipeline->spill.size_per_thread > 0);
1575
         v3dv_bo_free(device, pipeline->spill.bo);
1576
      }
1577
      pipeline->spill.bo =
1578
         v3dv_bo_alloc(device, total_spill_size, "spill", true);
1579
      pipeline->spill.size_per_thread = max_spill_size;
1580
   }
1581
}
1582

1583
/**
1584
 * Creates a new shader_variant_create. Note that for prog_data is not const,
1585
 * so it is assumed that the caller will prove a pointer that the
1586
 * shader_variant will own.
1587
 *
1588
 * Creation doesn't include allocate a BD to store the content of qpu_insts,
1589
 * as we will try to share the same bo for several shader variants. Also note
1590
 * that qpu_ints being NULL is valid, for example if we are creating the
1591
 * shader_variants from the cache, so we can just upload the assembly of all
1592
 * the shader stages at once.
1593
 */
1594
struct v3dv_shader_variant *
1595
v3dv_shader_variant_create(struct v3dv_device *device,
1596
                           enum broadcom_shader_stage stage,
1597
                           struct v3d_prog_data *prog_data,
1598
                           uint32_t prog_data_size,
1599
                           uint32_t assembly_offset,
1600
                           uint64_t *qpu_insts,
1601
                           uint32_t qpu_insts_size,
1602
                           VkResult *out_vk_result)
1603
{
1604
   struct v3dv_shader_variant *variant =
1605
      vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1606
                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1607

1608
   if (variant == NULL) {
1609
      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1610
      return NULL;
1611
   }
1612

1613
   variant->stage = stage;
1614
   variant->prog_data_size = prog_data_size;
1615
   variant->prog_data.base = prog_data;
1616

1617
   variant->assembly_offset = assembly_offset;
1618
   variant->qpu_insts_size = qpu_insts_size;
1619
   variant->qpu_insts = qpu_insts;
1620

1621
   *out_vk_result = VK_SUCCESS;
1622

1623
   return variant;
1624
}
1625

1626
/* For a given key, it returns the compiled version of the shader.  Returns a
1627
 * new reference to the shader_variant to the caller, or NULL.
1628
 *
1629
 * If the method returns NULL it means that something wrong happened:
1630
 *   * Not enough memory: this is one of the possible outcomes defined by
1631
 *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1632
 *   * Compilation error: hypothetically this shouldn't happen, as the spec
1633
 *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1634
 *     any compilation failure is a driver bug. In the practice, something as
1635
 *     common as failing to register allocate can lead to a compilation
1636
 *     failure. In that case the only option (for any driver) is
1637
 *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1638
 *     error.
1639
 */
1640
static struct v3dv_shader_variant *
1641
pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1642
                                struct v3d_key *key,
1643
                                size_t key_size,
1644
                                const VkAllocationCallbacks *pAllocator,
1645
                                VkResult *out_vk_result)
1646
{
1647
   struct v3dv_pipeline *pipeline = p_stage->pipeline;
1648
   struct v3dv_physical_device *physical_device =
1649
      &pipeline->device->instance->physicalDevice;
1650
   const struct v3d_compiler *compiler = physical_device->compiler;
1651

1652
   if (V3D_DEBUG & (V3D_DEBUG_NIR |
1653
                    v3d_debug_flag_for_shader_stage
1654
                    (broadcom_shader_stage_to_gl(p_stage->stage)))) {
1655
      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1656
              broadcom_shader_stage_name(p_stage->stage),
1657
              p_stage->program_id);
1658
      nir_print_shader(p_stage->nir, stderr);
1659
      fprintf(stderr, "\n");
1660
   }
1661

1662
   uint64_t *qpu_insts;
1663
   uint32_t qpu_insts_size;
1664
   struct v3d_prog_data *prog_data;
1665
   uint32_t prog_data_size =
1666
      v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1667

1668
   qpu_insts = v3d_compile(compiler,
1669
                           key, &prog_data,
1670
                           p_stage->nir,
1671
                           shader_debug_output, NULL,
1672
                           p_stage->program_id, 0,
1673
                           &qpu_insts_size);
1674

1675
   struct v3dv_shader_variant *variant = NULL;
1676

1677
   if (!qpu_insts) {
1678
      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1679
              gl_shader_stage_name(p_stage->stage),
1680
              p_stage->program_id);
1681
      *out_vk_result = VK_ERROR_UNKNOWN;
1682
   } else {
1683
      variant =
1684
         v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1685
                                    prog_data, prog_data_size,
1686
                                    0, /* assembly_offset, no final value yet */
1687
                                    qpu_insts, qpu_insts_size,
1688
                                    out_vk_result);
1689
   }
1690
   /* At this point we don't need anymore the nir shader, but we are freeing
1691
    * all the temporary p_stage structs used during the pipeline creation when
1692
    * we finish it, so let's not worry about freeing the nir here.
1693
    */
1694

1695
   return variant;
1696
}
1697

1698
/* FIXME: C&P from st, common place? */
1699
static void
1700
st_nir_opts(nir_shader *nir)
1701
{
1702
   bool progress;
1703

1704
   do {
1705
      progress = false;
1706

1707
      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1708

1709
      /* Linking deals with unused inputs/outputs, but here we can remove
1710
       * things local to the shader in the hopes that we can cleanup other
1711
       * things. This pass will also remove variables with only stores, so we
1712
       * might be able to make progress after it.
1713
       */
1714
      NIR_PASS(progress, nir, nir_remove_dead_variables,
1715
               (nir_variable_mode)(nir_var_function_temp |
1716
                                   nir_var_shader_temp |
1717
                                   nir_var_mem_shared),
1718
               NULL);
1719

1720
      NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1721
      NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1722

1723
      if (nir->options->lower_to_scalar) {
1724
         NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1725
         NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1726
      }
1727

1728
      NIR_PASS_V(nir, nir_lower_alu);
1729
      NIR_PASS_V(nir, nir_lower_pack);
1730
      NIR_PASS(progress, nir, nir_copy_prop);
1731
      NIR_PASS(progress, nir, nir_opt_remove_phis);
1732
      NIR_PASS(progress, nir, nir_opt_dce);
1733
      if (nir_opt_trivial_continues(nir)) {
1734
         progress = true;
1735
         NIR_PASS(progress, nir, nir_copy_prop);
1736
         NIR_PASS(progress, nir, nir_opt_dce);
1737
      }
1738
      NIR_PASS(progress, nir, nir_opt_if, false);
1739
      NIR_PASS(progress, nir, nir_opt_dead_cf);
1740
      NIR_PASS(progress, nir, nir_opt_cse);
1741
      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1742

1743
      NIR_PASS(progress, nir, nir_opt_algebraic);
1744
      NIR_PASS(progress, nir, nir_opt_constant_folding);
1745

1746
      NIR_PASS(progress, nir, nir_opt_undef);
1747
      NIR_PASS(progress, nir, nir_opt_conditional_discard);
1748
   } while (progress);
1749
}
1750

1751
static void
1752
link_shaders(nir_shader *producer, nir_shader *consumer)
1753
{
1754
   assert(producer);
1755
   assert(consumer);
1756

1757
   if (producer->options->lower_to_scalar) {
1758
      NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1759
      NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1760
   }
1761

1762
   nir_lower_io_arrays_to_elements(producer, consumer);
1763

1764
   st_nir_opts(producer);
1765
   st_nir_opts(consumer);
1766

1767
   if (nir_link_opt_varyings(producer, consumer))
1768
      st_nir_opts(consumer);
1769

1770
   NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1771
   NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1772

1773
   if (nir_remove_unused_varyings(producer, consumer)) {
1774
      NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1775
      NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1776

1777
      st_nir_opts(producer);
1778
      st_nir_opts(consumer);
1779

1780
      /* Optimizations can cause varyings to become unused.
1781
       * nir_compact_varyings() depends on all dead varyings being removed so
1782
       * we need to call nir_remove_dead_variables() again here.
1783
       */
1784
      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1785
      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1786
   }
1787
}
1788

1789
static void
1790
pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1791
                   struct v3dv_pipeline_stage *p_stage,
1792
                   struct v3dv_pipeline_layout *layout)
1793
{
1794
   assert(pipeline->shared_data &&
1795
          pipeline->shared_data->maps[p_stage->stage]);
1796

1797
   nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1798

1799
   /* We add this because we need a valid sampler for nir_lower_tex to do
1800
    * unpacking of the texture operation result, even for the case where there
1801
    * is no sampler state.
1802
    *
1803
    * We add two of those, one for the case we need a 16bit return_size, and
1804
    * another for the case we need a 32bit return size.
1805
    */
1806
   UNUSED unsigned index =
1807
      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1808
                         -1, -1, -1, 0, 16);
1809
   assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1810

1811
   index =
1812
      descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1813
                         -2, -2, -2, 0, 32);
1814
   assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1815

1816
   /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1817
   NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1818
}
1819

1820
/**
1821
 * The SPIR-V compiler will insert a sized compact array for
1822
 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1823
 * where the size of the array determines the number of active clip planes.
1824
 */
1825
static uint32_t
1826
get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1827
{
1828
   assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1829
   const nir_shader *shader = p_stage->nir;
1830
   assert(shader);
1831

1832
   nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1833
      if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1834
         assert(var->data.compact);
1835
         return (1 << glsl_get_length(var->type)) - 1;
1836
      }
1837
   }
1838
   return 0;
1839
}
1840

1841
static nir_shader *
1842
pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1843
                       struct v3dv_pipeline *pipeline,
1844
                       struct v3dv_pipeline_cache *cache)
1845
{
1846
   nir_shader *nir = NULL;
1847

1848
   nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1849
                                            &v3dv_nir_options,
1850
                                            p_stage->shader_sha1);
1851

1852
   if (nir) {
1853
      assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1854
      return nir;
1855
   }
1856

1857
   nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1858

1859
   if (nir) {
1860
      struct v3dv_pipeline_cache *default_cache =
1861
         &pipeline->device->default_pipeline_cache;
1862

1863
      v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1864
                                     p_stage->shader_sha1);
1865

1866
      /* Ensure that the variant is on the default cache, as cmd_buffer could
1867
       * need to change the current variant
1868
       */
1869
      if (default_cache != cache) {
1870
         v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1871
                                        p_stage->shader_sha1);
1872
      }
1873
      return nir;
1874
   }
1875

1876
   /* FIXME: this shouldn't happen, raise error? */
1877
   return NULL;
1878
}
1879

1880
static void
1881
pipeline_hash_shader(const struct vk_shader_module *module,
1882
                     const char *entrypoint,
1883
                     gl_shader_stage stage,
1884
                     const VkSpecializationInfo *spec_info,
1885
                     unsigned char *sha1_out)
1886
{
1887
   struct mesa_sha1 ctx;
1888
   _mesa_sha1_init(&ctx);
1889

1890
   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1891
   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1892
   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1893
   if (spec_info) {
1894
      _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1895
                        spec_info->mapEntryCount *
1896
                        sizeof(*spec_info->pMapEntries));
1897
      _mesa_sha1_update(&ctx, spec_info->pData,
1898
                        spec_info->dataSize);
1899
   }
1900

1901
   _mesa_sha1_final(&ctx, sha1_out);
1902
}
1903

1904
static VkResult
1905
pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1906
                               const VkAllocationCallbacks *pAllocator,
1907
                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1908
{
1909
   assert(pipeline->vs_bin != NULL);
1910
   if (pipeline->vs_bin->nir == NULL) {
1911
      assert(pipeline->vs->nir);
1912
      pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1913
   }
1914

1915
   VkResult vk_result;
1916
   struct v3d_vs_key key;
1917
   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1918
   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1919
      pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1920
                                      pAllocator, &vk_result);
1921
   if (vk_result != VK_SUCCESS)
1922
      return vk_result;
1923

1924
   pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1925
   pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1926
      pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1927
                                      pAllocator, &vk_result);
1928

1929
   return vk_result;
1930
}
1931

1932
static VkResult
1933
pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1934
                                 const VkAllocationCallbacks *pAllocator,
1935
                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1936
{
1937
   assert(pipeline->gs);
1938

1939
   assert(pipeline->gs_bin != NULL);
1940
   if (pipeline->gs_bin->nir == NULL) {
1941
      assert(pipeline->gs->nir);
1942
      pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1943
   }
1944

1945
   VkResult vk_result;
1946
   struct v3d_gs_key key;
1947
   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1948
   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1949
      pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1950
                                      pAllocator, &vk_result);
1951
   if (vk_result != VK_SUCCESS)
1952
      return vk_result;
1953

1954
   pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1955
   pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1956
      pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1957
                                      pAllocator, &vk_result);
1958

1959
   return vk_result;
1960
}
1961

1962
static VkResult
1963
pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1964
                                 const VkAllocationCallbacks *pAllocator,
1965
                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1966
{
1967
   struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1968

1969
   p_stage = pipeline->fs;
1970

1971
   struct v3d_fs_key key;
1972

1973
   pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1974
                                pipeline->gs != NULL,
1975
                                get_ucp_enable_mask(pipeline->vs));
1976

1977
   VkResult vk_result;
1978
   pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1979
      pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1980
                                      pAllocator, &vk_result);
1981

1982
   return vk_result;
1983
}
1984

1985
static void
1986
pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1987
                               struct v3dv_pipeline_key *key,
1988
                               const VkGraphicsPipelineCreateInfo *pCreateInfo)
1989
{
1990
   memset(key, 0, sizeof(*key));
1991
   key->robust_buffer_access =
1992
      pipeline->device->features.robustBufferAccess;
1993

1994
   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1995
      pCreateInfo->pInputAssemblyState;
1996
   key->topology = vk_to_pipe_prim_type[ia_info->topology];
1997

1998
   const VkPipelineColorBlendStateCreateInfo *cb_info =
1999
      pCreateInfo->pColorBlendState;
2000
   key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
2001
      vk_to_pipe_logicop[cb_info->logicOp] :
2002
      PIPE_LOGICOP_COPY;
2003

2004
   const bool raster_enabled =
2005
      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2006

2007
   /* Multisample rasterization state must be ignored if rasterization
2008
    * is disabled.
2009
    */
2010
   const VkPipelineMultisampleStateCreateInfo *ms_info =
2011
      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2012
   if (ms_info) {
2013
      assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2014
             ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2015
      key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2016

2017
      if (key->msaa) {
2018
         key->sample_coverage =
2019
            pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2020
         key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2021
         key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2022
      }
2023
   }
2024

2025
   const struct v3dv_render_pass *pass =
2026
      v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2027
   const struct v3dv_subpass *subpass = pipeline->subpass;
2028
   for (uint32_t i = 0; i < subpass->color_count; i++) {
2029
      const uint32_t att_idx = subpass->color_attachments[i].attachment;
2030
      if (att_idx == VK_ATTACHMENT_UNUSED)
2031
         continue;
2032

2033
      key->cbufs |= 1 << i;
2034

2035
      VkFormat fb_format = pass->attachments[att_idx].desc.format;
2036
      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2037

2038
      /* If logic operations are enabled then we might emit color reads and we
2039
       * need to know the color buffer format and swizzle for that
2040
       */
2041
      if (key->logicop_func != PIPE_LOGICOP_COPY) {
2042
         key->color_fmt[i].format = fb_pipe_format;
2043
         key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
2044
                                                             fb_format);
2045
      }
2046

2047
      const struct util_format_description *desc =
2048
         vk_format_description(fb_format);
2049

2050
      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2051
          desc->channel[0].size == 32) {
2052
         key->f32_color_rb |= 1 << i;
2053
      }
2054
   }
2055

2056
   const VkPipelineVertexInputStateCreateInfo *vi_info =
2057
      pCreateInfo->pVertexInputState;
2058
   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2059
      const VkVertexInputAttributeDescription *desc =
2060
         &vi_info->pVertexAttributeDescriptions[i];
2061
      assert(desc->location < MAX_VERTEX_ATTRIBS);
2062
      if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2063
         key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2064
   }
2065

2066
}
2067

2068
static void
2069
pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2070
                              struct v3dv_pipeline_key *key,
2071
                              const VkComputePipelineCreateInfo *pCreateInfo)
2072
{
2073
   /* We use the same pipeline key for graphics and compute, but we don't need
2074
    * to add a field to flag compute keys because this key is not used alone
2075
    * to search in the cache, we also use the SPIR-V or the serialized NIR for
2076
    * example, which already flags compute shaders.
2077
    */
2078
   memset(key, 0, sizeof(*key));
2079
   key->robust_buffer_access =
2080
      pipeline->device->features.robustBufferAccess;
2081
}
2082

2083
static struct v3dv_pipeline_shared_data *
2084
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2085
                                    struct v3dv_pipeline *pipeline,
2086
                                    bool is_graphics_pipeline)
2087
{
2088
   /* We create new_entry using the device alloc. Right now shared_data is ref
2089
    * and unref by both the pipeline and the pipeline cache, so we can't
2090
    * ensure that the cache or pipeline alloc will be available on the last
2091
    * unref.
2092
    */
2093
   struct v3dv_pipeline_shared_data *new_entry =
2094
      vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2095
                 sizeof(struct v3dv_pipeline_shared_data), 8,
2096
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2097

2098
   if (new_entry == NULL)
2099
      return NULL;
2100

2101
   for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2102
      /* We don't need specific descriptor maps for binning stages we use the
2103
       * map for the render stage.
2104
       */
2105
      if (broadcom_shader_stage_is_binning(stage))
2106
         continue;
2107

2108
      if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2109
          (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2110
         continue;
2111
      }
2112

2113
      if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs)
2114
         continue;
2115

2116
      struct v3dv_descriptor_maps *new_maps =
2117
         vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2118
                    sizeof(struct v3dv_descriptor_maps), 8,
2119
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2120

2121
      if (new_maps == NULL)
2122
         goto fail;
2123

2124
      new_entry->maps[stage] = new_maps;
2125
   }
2126

2127
   new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2128
      new_entry->maps[BROADCOM_SHADER_VERTEX];
2129

2130
   new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2131
      new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2132

2133
   new_entry->ref_cnt = 1;
2134
   memcpy(new_entry->sha1_key, sha1_key, 20);
2135

2136
   return new_entry;
2137

2138
fail:
2139
   if (new_entry != NULL) {
2140
      for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2141
         if (new_entry->maps[stage] != NULL)
2142
            vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2143
      }
2144
   }
2145

2146
   vk_free(&pipeline->device->vk.alloc, new_entry);
2147

2148
   return NULL;
2149
}
2150

2151
/*
2152
 * It compiles a pipeline. Note that it also allocate internal object, but if
2153
 * some allocations success, but other fails, the method is not freeing the
2154
 * successful ones.
2155
 *
2156
 * This is done to simplify the code, as what we do in this case is just call
2157
 * the pipeline destroy method, and this would handle freeing the internal
2158
 * objects allocated. We just need to be careful setting to NULL the objects
2159
 * not allocated.
2160
 */
2161
static VkResult
2162
pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2163
                          struct v3dv_pipeline_cache *cache,
2164
                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
2165
                          const VkAllocationCallbacks *pAllocator)
2166
{
2167
   struct v3dv_device *device = pipeline->device;
2168
   struct v3dv_physical_device *physical_device =
2169
      &device->instance->physicalDevice;
2170

2171
   /* First pass to get some common info from the shader, and create the
2172
    * individual pipeline_stage objects
2173
    */
2174
   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2175
      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2176
      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2177

2178
      struct v3dv_pipeline_stage *p_stage =
2179
         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2180
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2181

2182
      if (p_stage == NULL)
2183
         return VK_ERROR_OUT_OF_HOST_MEMORY;
2184

2185
      /* Note that we are assigning program_id slightly differently that
2186
       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2187
       * would have a different program_id, while v3d would have the same for
2188
       * both. For the case of v3dv, it is more natural to have an id this way,
2189
       * as right now we are using it for debugging, not for shader-db.
2190
       */
2191
      p_stage->program_id =
2192
         p_atomic_inc_return(&physical_device->next_program_id);
2193

2194
      p_stage->pipeline = pipeline;
2195
      p_stage->stage = gl_shader_stage_to_broadcom(stage);
2196
      p_stage->entrypoint = sinfo->pName;
2197
      p_stage->module = vk_shader_module_from_handle(sinfo->module);
2198
      p_stage->spec_info = sinfo->pSpecializationInfo;
2199

2200
      pipeline_hash_shader(p_stage->module,
2201
                           p_stage->entrypoint,
2202
                           stage,
2203
                           p_stage->spec_info,
2204
                           p_stage->shader_sha1);
2205

2206
      pipeline->active_stages |= sinfo->stage;
2207

2208
      /* We will try to get directly the compiled shader variant, so let's not
2209
       * worry about getting the nir shader for now.
2210
       */
2211
      p_stage->nir = NULL;
2212

2213
      switch(stage) {
2214
      case MESA_SHADER_VERTEX:
2215
         pipeline->vs = p_stage;
2216
         pipeline->vs_bin =
2217
            pipeline_stage_create_binning(pipeline->vs, pAllocator);
2218
         if (pipeline->vs_bin == NULL)
2219
            return VK_ERROR_OUT_OF_HOST_MEMORY;
2220
         break;
2221

2222
      case MESA_SHADER_GEOMETRY:
2223
         pipeline->has_gs = true;
2224
         pipeline->gs = p_stage;
2225
         pipeline->gs_bin =
2226
            pipeline_stage_create_binning(pipeline->gs, pAllocator);
2227
         if (pipeline->gs_bin == NULL)
2228
            return VK_ERROR_OUT_OF_HOST_MEMORY;
2229
         break;
2230

2231
      case MESA_SHADER_FRAGMENT:
2232
         pipeline->fs = p_stage;
2233
         break;
2234

2235
      default:
2236
         unreachable("not supported shader stage");
2237
      }
2238
   }
2239

2240
   /* Add a no-op fragment shader if needed */
2241
   if (!pipeline->fs) {
2242
      nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2243
                                                     &v3dv_nir_options,
2244
                                                     "noop_fs");
2245

2246
      struct v3dv_pipeline_stage *p_stage =
2247
         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2248
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2249

2250
      if (p_stage == NULL)
2251
         return VK_ERROR_OUT_OF_HOST_MEMORY;
2252

2253
      p_stage->pipeline = pipeline;
2254
      p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2255
      p_stage->entrypoint = "main";
2256
      p_stage->module = 0;
2257
      p_stage->nir = b.shader;
2258
      pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2259
      p_stage->program_id =
2260
         p_atomic_inc_return(&physical_device->next_program_id);
2261

2262
      pipeline->fs = p_stage;
2263
      pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2264
   }
2265

2266
   /* First we try to get the variants from the pipeline cache */
2267
   struct v3dv_pipeline_key pipeline_key;
2268
   pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2269
   unsigned char pipeline_sha1[20];
2270
   pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
2271

2272
   pipeline->shared_data =
2273
      v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
2274

2275
   if (pipeline->shared_data != NULL) {
2276
      /* A correct pipeline must have at least a VS and FS */
2277
      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2278
      assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2279
      assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2280
      assert(!pipeline->gs ||
2281
             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2282
      assert(!pipeline->gs ||
2283
             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2284
      goto success;
2285
   }
2286

2287
   /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2288
    * shader or the pipeline cache) and compile.
2289
    */
2290
   pipeline->shared_data =
2291
      v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
2292

2293
   if (!pipeline->vs->nir)
2294
      pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2295
   if (pipeline->gs && !pipeline->gs->nir)
2296
      pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2297
   if (!pipeline->fs->nir)
2298
      pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2299

2300
   /* Linking + pipeline lowerings */
2301
   if (pipeline->gs) {
2302
      link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2303
      link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2304
   } else {
2305
      link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2306
   }
2307

2308
   pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2309
   lower_fs_io(pipeline->fs->nir);
2310

2311
   if (pipeline->gs) {
2312
      pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2313
      lower_gs_io(pipeline->vs->nir);
2314
   }
2315

2316
   pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2317
   lower_vs_io(pipeline->vs->nir);
2318

2319
   /* Compiling to vir */
2320
   VkResult vk_result;
2321

2322
   /* We should have got all the variants or no variants from the cache */
2323
   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2324
   vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2325
   if (vk_result != VK_SUCCESS)
2326
      return vk_result;
2327

2328
   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2329
          !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2330

2331
   if (pipeline->gs) {
2332
      vk_result =
2333
         pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2334
      if (vk_result != VK_SUCCESS)
2335
         return vk_result;
2336
   }
2337

2338
   assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2339
          !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2340

2341
   vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2342
   if (vk_result != VK_SUCCESS)
2343
      return vk_result;
2344

2345
   if (!upload_assembly(pipeline))
2346
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2347

2348
   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2349

2350
 success:
2351
   /* Since we have the variants in the pipeline shared data we can now free
2352
    * the pipeline stages.
2353
    */
2354
   pipeline_free_stages(device, pipeline, pAllocator);
2355

2356
   pipeline_check_spill_size(pipeline);
2357

2358
   return compute_vpm_config(pipeline);
2359
}
2360

2361
static VkResult
2362
compute_vpm_config(struct v3dv_pipeline *pipeline)
2363
{
2364
   struct v3dv_shader_variant *vs_variant =
2365
      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2366
   struct v3dv_shader_variant *vs_bin_variant =
2367
      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2368
   struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2369
   struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2370

2371
   struct v3d_gs_prog_data *gs = NULL;
2372
   struct v3d_gs_prog_data *gs_bin = NULL;
2373
   if (pipeline->has_gs) {
2374
      struct v3dv_shader_variant *gs_variant =
2375
         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2376
      struct v3dv_shader_variant *gs_bin_variant =
2377
         pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2378
      gs = gs_variant->prog_data.gs;
2379
      gs_bin = gs_bin_variant->prog_data.gs;
2380
   }
2381

2382
   if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2383
                               vs_bin, vs, gs_bin, gs,
2384
                               &pipeline->vpm_cfg_bin,
2385
                               &pipeline->vpm_cfg)) {
2386
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2387
   }
2388

2389
   return VK_SUCCESS;
2390
}
2391

2392
static unsigned
2393
v3dv_dynamic_state_mask(VkDynamicState state)
2394
{
2395
   switch(state) {
2396
   case VK_DYNAMIC_STATE_VIEWPORT:
2397
      return V3DV_DYNAMIC_VIEWPORT;
2398
   case VK_DYNAMIC_STATE_SCISSOR:
2399
      return V3DV_DYNAMIC_SCISSOR;
2400
   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2401
      return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2402
   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2403
      return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2404
   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2405
      return V3DV_DYNAMIC_STENCIL_REFERENCE;
2406
   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2407
      return V3DV_DYNAMIC_BLEND_CONSTANTS;
2408
   case VK_DYNAMIC_STATE_DEPTH_BIAS:
2409
      return V3DV_DYNAMIC_DEPTH_BIAS;
2410
   case VK_DYNAMIC_STATE_LINE_WIDTH:
2411
      return V3DV_DYNAMIC_LINE_WIDTH;
2412

2413
   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2414
    * ignoring this dynamic state. We are already asserting at pipeline creation
2415
    * time that depth bounds testing is not enabled.
2416
    */
2417
   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2418
      return 0;
2419

2420
   default:
2421
      unreachable("Unhandled dynamic state");
2422
   }
2423
}
2424

2425
static void
2426
pipeline_init_dynamic_state(
2427
   struct v3dv_pipeline *pipeline,
2428
   const VkPipelineDynamicStateCreateInfo *pDynamicState,
2429
   const VkPipelineViewportStateCreateInfo *pViewportState,
2430
   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2431
   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2432
   const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
2433
{
2434
   pipeline->dynamic_state = default_dynamic_state;
2435
   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2436

2437
   /* Create a mask of enabled dynamic states */
2438
   uint32_t dynamic_states = 0;
2439
   if (pDynamicState) {
2440
      uint32_t count = pDynamicState->dynamicStateCount;
2441
      for (uint32_t s = 0; s < count; s++) {
2442
         dynamic_states |=
2443
            v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2444
      }
2445
   }
2446

2447
   /* For any pipeline states that are not dynamic, set the dynamic state
2448
    * from the static pipeline state.
2449
    */
2450
   if (pViewportState) {
2451
      if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2452
         dynamic->viewport.count = pViewportState->viewportCount;
2453
         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2454
                      pViewportState->viewportCount);
2455

2456
         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2457
            v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2458
                                        dynamic->viewport.scale[i],
2459
                                        dynamic->viewport.translate[i]);
2460
         }
2461
      }
2462

2463
      if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2464
         dynamic->scissor.count = pViewportState->scissorCount;
2465
         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2466
                      pViewportState->scissorCount);
2467
      }
2468
   }
2469

2470
   if (pDepthStencilState) {
2471
      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2472
         dynamic->stencil_compare_mask.front =
2473
            pDepthStencilState->front.compareMask;
2474
         dynamic->stencil_compare_mask.back =
2475
            pDepthStencilState->back.compareMask;
2476
      }
2477

2478
      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2479
         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2480
         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2481
      }
2482

2483
      if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2484
         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2485
         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2486
      }
2487
   }
2488

2489
   if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2490
      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2491
             sizeof(dynamic->blend_constants));
2492
   }
2493

2494
   if (pRasterizationState) {
2495
      if (pRasterizationState->depthBiasEnable &&
2496
          !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2497
         dynamic->depth_bias.constant_factor =
2498
            pRasterizationState->depthBiasConstantFactor;
2499
         dynamic->depth_bias.depth_bias_clamp =
2500
            pRasterizationState->depthBiasClamp;
2501
         dynamic->depth_bias.slope_factor =
2502
            pRasterizationState->depthBiasSlopeFactor;
2503
      }
2504
      if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2505
         dynamic->line_width = pRasterizationState->lineWidth;
2506
   }
2507

2508
   pipeline->dynamic_state.mask = dynamic_states;
2509
}
2510

2511
static bool
2512
stencil_op_is_no_op(const VkStencilOpState *stencil)
2513
{
2514
   return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2515
          stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2516
}
2517

2518
static void
2519
enable_depth_bias(struct v3dv_pipeline *pipeline,
2520
                  const VkPipelineRasterizationStateCreateInfo *rs_info)
2521
{
2522
   pipeline->depth_bias.enabled = false;
2523
   pipeline->depth_bias.is_z16 = false;
2524

2525
   if (!rs_info || !rs_info->depthBiasEnable)
2526
      return;
2527

2528
   /* Check the depth/stencil attachment description for the subpass used with
2529
    * this pipeline.
2530
    */
2531
   assert(pipeline->pass && pipeline->subpass);
2532
   struct v3dv_render_pass *pass = pipeline->pass;
2533
   struct v3dv_subpass *subpass = pipeline->subpass;
2534

2535
   if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2536
      return;
2537

2538
   assert(subpass->ds_attachment.attachment < pass->attachment_count);
2539
   struct v3dv_render_pass_attachment *att =
2540
      &pass->attachments[subpass->ds_attachment.attachment];
2541

2542
   if (att->desc.format == VK_FORMAT_D16_UNORM)
2543
      pipeline->depth_bias.is_z16 = true;
2544

2545
   pipeline->depth_bias.enabled = true;
2546
}
2547

2548
static void
2549
pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2550
                      const VkPipelineDepthStencilStateCreateInfo *ds_info)
2551
{
2552
   if (!ds_info || !ds_info->depthTestEnable) {
2553
      pipeline->ez_state = V3D_EZ_DISABLED;
2554
      return;
2555
   }
2556

2557
   switch (ds_info->depthCompareOp) {
2558
   case VK_COMPARE_OP_LESS:
2559
   case VK_COMPARE_OP_LESS_OR_EQUAL:
2560
      pipeline->ez_state = V3D_EZ_LT_LE;
2561
      break;
2562
   case VK_COMPARE_OP_GREATER:
2563
   case VK_COMPARE_OP_GREATER_OR_EQUAL:
2564
      pipeline->ez_state = V3D_EZ_GT_GE;
2565
      break;
2566
   case VK_COMPARE_OP_NEVER:
2567
   case VK_COMPARE_OP_EQUAL:
2568
      pipeline->ez_state = V3D_EZ_UNDECIDED;
2569
      break;
2570
   default:
2571
      pipeline->ez_state = V3D_EZ_DISABLED;
2572
      break;
2573
   }
2574

2575
   /* If stencil is enabled and is not a no-op, we need to disable EZ */
2576
   if (ds_info->stencilTestEnable &&
2577
       (!stencil_op_is_no_op(&ds_info->front) ||
2578
        !stencil_op_is_no_op(&ds_info->back))) {
2579
         pipeline->ez_state = V3D_EZ_DISABLED;
2580
   }
2581
}
2582

2583
static bool
2584
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2585
{
2586
   for (uint8_t i = 0; i < pipeline->va_count; i++) {
2587
      if (vk_format_is_int(pipeline->va[i].vk_format))
2588
         return true;
2589
   }
2590
   return false;
2591
}
2592

2593
/* @pipeline can be NULL. We assume in that case that all the attributes have
2594
 * a float format (we only create an all-float BO once and we reuse it with
2595
 * all float pipelines), otherwise we look at the actual type of each
2596
 * attribute used with the specific pipeline passed in.
2597
 */
2598
struct v3dv_bo *
2599
v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2600
                                              struct v3dv_pipeline *pipeline)
2601
{
2602
   uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2603
   struct v3dv_bo *bo;
2604

2605
   bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2606

2607
   if (!bo) {
2608
      fprintf(stderr, "failed to allocate memory for the default "
2609
              "attribute values\n");
2610
      return NULL;
2611
   }
2612

2613
   bool ok = v3dv_bo_map(device, bo, size);
2614
   if (!ok) {
2615
      fprintf(stderr, "failed to map default attribute values buffer\n");
2616
      return false;
2617
   }
2618

2619
   uint32_t *attrs = bo->map;
2620
   uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2621
   for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2622
      attrs[i * 4 + 0] = 0;
2623
      attrs[i * 4 + 1] = 0;
2624
      attrs[i * 4 + 2] = 0;
2625
      VkFormat attr_format =
2626
         pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2627
      if (i < va_count && vk_format_is_int(attr_format)) {
2628
         attrs[i * 4 + 3] = 1;
2629
      } else {
2630
         attrs[i * 4 + 3] = fui(1.0);
2631
      }
2632
   }
2633

2634
   v3dv_bo_unmap(device, bo);
2635

2636
   return bo;
2637
}
2638

2639
static void
2640
pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2641
                         const VkPipelineMultisampleStateCreateInfo *ms_info)
2642
{
2643
   pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2644

2645
   /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2646
    * requires this to be 0xf or 0x0 if using a single sample.
2647
    */
2648
   if (ms_info && ms_info->pSampleMask &&
2649
       ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2650
      pipeline->sample_mask &= ms_info->pSampleMask[0];
2651
   }
2652
}
2653

2654
static void
2655
pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2656
                                 const VkPipelineMultisampleStateCreateInfo *ms_info)
2657
{
2658
   pipeline->sample_rate_shading =
2659
      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2660
      ms_info->sampleShadingEnable;
2661
}
2662

2663
static VkResult
2664
pipeline_init(struct v3dv_pipeline *pipeline,
2665
              struct v3dv_device *device,
2666
              struct v3dv_pipeline_cache *cache,
2667
              const VkGraphicsPipelineCreateInfo *pCreateInfo,
2668
              const VkAllocationCallbacks *pAllocator)
2669
{
2670
   VkResult result = VK_SUCCESS;
2671

2672
   pipeline->device = device;
2673

2674
   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2675
   pipeline->layout = layout;
2676

2677
   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2678
   assert(pCreateInfo->subpass < render_pass->subpass_count);
2679
   pipeline->pass = render_pass;
2680
   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2681

2682
   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2683
      pCreateInfo->pInputAssemblyState;
2684
   pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2685

2686
   /* If rasterization is not enabled, various CreateInfo structs must be
2687
    * ignored.
2688
    */
2689
   const bool raster_enabled =
2690
      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2691

2692
   const VkPipelineViewportStateCreateInfo *vp_info =
2693
      raster_enabled ? pCreateInfo->pViewportState : NULL;
2694

2695
   const VkPipelineDepthStencilStateCreateInfo *ds_info =
2696
      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2697

2698
   const VkPipelineRasterizationStateCreateInfo *rs_info =
2699
      raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2700

2701
   const VkPipelineColorBlendStateCreateInfo *cb_info =
2702
      raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2703

2704
   const VkPipelineMultisampleStateCreateInfo *ms_info =
2705
      raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2706

2707
   pipeline_init_dynamic_state(pipeline,
2708
                               pCreateInfo->pDynamicState,
2709
                               vp_info, ds_info, cb_info, rs_info);
2710

2711
   /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2712
    * feature and it shouldn't be used by any pipeline.
2713
    */
2714
   assert(!ds_info || !ds_info->depthBoundsTestEnable);
2715

2716
   v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2717
                                       rs_info, ms_info);
2718

2719
   pipeline_set_ez_state(pipeline, ds_info);
2720
   enable_depth_bias(pipeline, rs_info);
2721
   pipeline_set_sample_mask(pipeline, ms_info);
2722
   pipeline_set_sample_rate_shading(pipeline, ms_info);
2723

2724
   pipeline->primitive_restart =
2725
      pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2726

2727
   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2728

2729
   if (result != VK_SUCCESS) {
2730
      /* Caller would already destroy the pipeline, and we didn't allocate any
2731
       * extra info. We don't need to do anything else.
2732
       */
2733
      return result;
2734
   }
2735

2736
   v3dv_X(device, pipeline_pack_compile_state)(pipeline,
2737
                                               pCreateInfo->pVertexInputState);
2738

2739
   if (pipeline_has_integer_vertex_attrib(pipeline)) {
2740
      pipeline->default_attribute_values =
2741
         v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
2742
      if (!pipeline->default_attribute_values)
2743
         return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2744
   } else {
2745
      pipeline->default_attribute_values = NULL;
2746
   }
2747

2748
   return result;
2749
}
2750

2751
static VkResult
2752
graphics_pipeline_create(VkDevice _device,
2753
                         VkPipelineCache _cache,
2754
                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
2755
                         const VkAllocationCallbacks *pAllocator,
2756
                         VkPipeline *pPipeline)
2757
{
2758
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
2759
   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2760

2761
   struct v3dv_pipeline *pipeline;
2762
   VkResult result;
2763

2764
   /* Use the default pipeline cache if none is specified */
2765
   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2766
      cache = &device->default_pipeline_cache;
2767

2768
   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
2769
                               VK_OBJECT_TYPE_PIPELINE);
2770

2771
   if (pipeline == NULL)
2772
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2773

2774
   result = pipeline_init(pipeline, device, cache,
2775
                          pCreateInfo,
2776
                          pAllocator);
2777

2778
   if (result != VK_SUCCESS) {
2779
      v3dv_destroy_pipeline(pipeline, device, pAllocator);
2780
      return result;
2781
   }
2782

2783
   *pPipeline = v3dv_pipeline_to_handle(pipeline);
2784

2785
   return VK_SUCCESS;
2786
}
2787

2788
VKAPI_ATTR VkResult VKAPI_CALL
2789
v3dv_CreateGraphicsPipelines(VkDevice _device,
2790
                             VkPipelineCache pipelineCache,
2791
                             uint32_t count,
2792
                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
2793
                             const VkAllocationCallbacks *pAllocator,
2794
                             VkPipeline *pPipelines)
2795
{
2796
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
2797
   VkResult result = VK_SUCCESS;
2798

2799
   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2800
      mtx_lock(&device->pdevice->mutex);
2801

2802
   for (uint32_t i = 0; i < count; i++) {
2803
      VkResult local_result;
2804

2805
      local_result = graphics_pipeline_create(_device,
2806
                                              pipelineCache,
2807
                                              &pCreateInfos[i],
2808
                                              pAllocator,
2809
                                              &pPipelines[i]);
2810

2811
      if (local_result != VK_SUCCESS) {
2812
         result = local_result;
2813
         pPipelines[i] = VK_NULL_HANDLE;
2814
      }
2815
   }
2816

2817
   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2818
      mtx_unlock(&device->pdevice->mutex);
2819

2820
   return result;
2821
}
2822

2823
static void
2824
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
2825
{
2826
   assert(glsl_type_is_vector_or_scalar(type));
2827

2828
   uint32_t comp_size = glsl_type_is_boolean(type)
2829
      ? 4 : glsl_get_bit_size(type) / 8;
2830
   unsigned length = glsl_get_vector_elements(type);
2831
   *size = comp_size * length,
2832
   *align = comp_size * (length == 3 ? 4 : length);
2833
}
2834

2835
static void
2836
lower_cs_shared(struct nir_shader *nir)
2837
{
2838
   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
2839
              nir_var_mem_shared, shared_type_info);
2840
   NIR_PASS_V(nir, nir_lower_explicit_io,
2841
              nir_var_mem_shared, nir_address_format_32bit_offset);
2842
}
2843

2844
static VkResult
2845
pipeline_compile_compute(struct v3dv_pipeline *pipeline,
2846
                         struct v3dv_pipeline_cache *cache,
2847
                         const VkComputePipelineCreateInfo *info,
2848
                         const VkAllocationCallbacks *alloc)
2849
{
2850
   struct v3dv_device *device = pipeline->device;
2851
   struct v3dv_physical_device *physical_device =
2852
      &device->instance->physicalDevice;
2853

2854
   const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
2855
   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2856

2857
   struct v3dv_pipeline_stage *p_stage =
2858
      vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
2859
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2860
   if (!p_stage)
2861
      return VK_ERROR_OUT_OF_HOST_MEMORY;
2862

2863
   p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2864
   p_stage->pipeline = pipeline;
2865
   p_stage->stage = gl_shader_stage_to_broadcom(stage);
2866
   p_stage->entrypoint = sinfo->pName;
2867
   p_stage->module = vk_shader_module_from_handle(sinfo->module);
2868
   p_stage->spec_info = sinfo->pSpecializationInfo;
2869

2870
   pipeline_hash_shader(p_stage->module,
2871
                        p_stage->entrypoint,
2872
                        stage,
2873
                        p_stage->spec_info,
2874
                        p_stage->shader_sha1);
2875

2876
   /* We try to get directly the variant first from the cache */
2877
   p_stage->nir = NULL;
2878

2879
   pipeline->cs = p_stage;
2880
   pipeline->active_stages |= sinfo->stage;
2881

2882
   struct v3dv_pipeline_key pipeline_key;
2883
   pipeline_populate_compute_key(pipeline, &pipeline_key, info);
2884
   unsigned char pipeline_sha1[20];
2885
   pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
2886

2887
   pipeline->shared_data =
2888
      v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
2889

2890
   if (pipeline->shared_data != NULL) {
2891
      assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
2892
      goto success;
2893
   }
2894

2895
   pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
2896
                                                               pipeline,
2897
                                                               false);
2898

2899
   /* If not found on cache, compile it */
2900
   p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
2901
   assert(p_stage->nir);
2902

2903
   st_nir_opts(p_stage->nir);
2904
   pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
2905
   lower_cs_shared(p_stage->nir);
2906

2907
   VkResult result = VK_SUCCESS;
2908

2909
   struct v3d_key key;
2910
   memset(&key, 0, sizeof(key));
2911
   pipeline_populate_v3d_key(&key, p_stage, 0,
2912
                             pipeline->device->features.robustBufferAccess);
2913
   pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
2914
      pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
2915
                                      alloc, &result);
2916

2917
   if (result != VK_SUCCESS)
2918
      return result;
2919

2920
   if (!upload_assembly(pipeline))
2921
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2922

2923
   v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2924
   /* As we got the variants in pipeline->shared_data, after compiling we
2925
    * don't need the pipeline_stages
2926
    */
2927
   pipeline_free_stages(device, pipeline, alloc);
2928

2929
 success:
2930
   pipeline_check_spill_size(pipeline);
2931

2932
   return VK_SUCCESS;
2933
}
2934

2935
static VkResult
2936
compute_pipeline_init(struct v3dv_pipeline *pipeline,
2937
                      struct v3dv_device *device,
2938
                      struct v3dv_pipeline_cache *cache,
2939
                      const VkComputePipelineCreateInfo *info,
2940
                      const VkAllocationCallbacks *alloc)
2941
{
2942
   V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
2943

2944
   pipeline->device = device;
2945
   pipeline->layout = layout;
2946

2947
   VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
2948

2949
   return result;
2950
}
2951

2952
static VkResult
2953
compute_pipeline_create(VkDevice _device,
2954
                         VkPipelineCache _cache,
2955
                         const VkComputePipelineCreateInfo *pCreateInfo,
2956
                         const VkAllocationCallbacks *pAllocator,
2957
                         VkPipeline *pPipeline)
2958
{
2959
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
2960
   V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
2961

2962
   struct v3dv_pipeline *pipeline;
2963
   VkResult result;
2964

2965
   /* Use the default pipeline cache if none is specified */
2966
   if (cache == NULL && device->instance->default_pipeline_cache_enabled)
2967
      cache = &device->default_pipeline_cache;
2968

2969
   pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
2970
                               VK_OBJECT_TYPE_PIPELINE);
2971
   if (pipeline == NULL)
2972
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2973

2974
   result = compute_pipeline_init(pipeline, device, cache,
2975
                                  pCreateInfo, pAllocator);
2976
   if (result != VK_SUCCESS) {
2977
      v3dv_destroy_pipeline(pipeline, device, pAllocator);
2978
      return result;
2979
   }
2980

2981
   *pPipeline = v3dv_pipeline_to_handle(pipeline);
2982

2983
   return VK_SUCCESS;
2984
}
2985

2986
VKAPI_ATTR VkResult VKAPI_CALL
2987
v3dv_CreateComputePipelines(VkDevice _device,
2988
                            VkPipelineCache pipelineCache,
2989
                            uint32_t createInfoCount,
2990
                            const VkComputePipelineCreateInfo *pCreateInfos,
2991
                            const VkAllocationCallbacks *pAllocator,
2992
                            VkPipeline *pPipelines)
2993
{
2994
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
2995
   VkResult result = VK_SUCCESS;
2996

2997
   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
2998
      mtx_lock(&device->pdevice->mutex);
2999

3000
   for (uint32_t i = 0; i < createInfoCount; i++) {
3001
      VkResult local_result;
3002
      local_result = compute_pipeline_create(_device,
3003
                                              pipelineCache,
3004
                                              &pCreateInfos[i],
3005
                                              pAllocator,
3006
                                              &pPipelines[i]);
3007

3008
      if (local_result != VK_SUCCESS) {
3009
         result = local_result;
3010
         pPipelines[i] = VK_NULL_HANDLE;
3011
      }
3012
   }
3013

3014
   if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3015
      mtx_unlock(&device->pdevice->mutex);
3016

3017
   return result;
3018
}
3019

3020
Product

Resources

Company