CoCalc -- nir_linking

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_linking_helpers.c
⁴⁵⁴⁵ views
1
/*
2
 * Copyright © 2015 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "nir.h"
25
#include "nir_builder.h"
26
#include "util/set.h"
27
#include "util/hash_table.h"
28

29
/* This file contains various little helpers for doing simple linking in
30
 * NIR.  Eventually, we'll probably want a full-blown varying packing
31
 * implementation in here.  Right now, it just deletes unused things.
32
 */
33

34
/**
35
 * Returns the bits in the inputs_read, or outputs_written
36
 * bitfield corresponding to this variable.
37
 */
38
static uint64_t
39
get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40
{
41
   if (var->data.location < 0)
42
      return 0;
43

44
   unsigned location = var->data.patch ?
45
      var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46

47
   assert(var->data.mode == nir_var_shader_in ||
48
          var->data.mode == nir_var_shader_out);
49
   assert(var->data.location >= 0);
50

51
   const struct glsl_type *type = var->type;
52
   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53
      assert(glsl_type_is_array(type));
54
      type = glsl_get_array_element(type);
55
   }
56

57
   unsigned slots = glsl_count_attribute_slots(type, false);
58
   return ((1ull << slots) - 1) << location;
59
}
60

61
static uint8_t
62
get_num_components(nir_variable *var)
63
{
64
   if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
65
      return 4;
66

67
   return glsl_get_vector_elements(glsl_without_array(var->type));
68
}
69

70
static void
71
tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
72
{
73
   nir_foreach_function(function, shader) {
74
      if (!function->impl)
75
         continue;
76

77
      nir_foreach_block(block, function->impl) {
78
         nir_foreach_instr(instr, block) {
79
            if (instr->type != nir_instr_type_intrinsic)
80
               continue;
81

82
            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
83
            if (intrin->intrinsic != nir_intrinsic_load_deref)
84
               continue;
85

86
            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
87
            if (!nir_deref_mode_is(deref, nir_var_shader_out))
88
               continue;
89

90
            nir_variable *var = nir_deref_instr_get_variable(deref);
91
            for (unsigned i = 0; i < get_num_components(var); i++) {
92
               if (var->data.patch) {
93
                  patches_read[var->data.location_frac + i] |=
94
                     get_variable_io_mask(var, shader->info.stage);
95
               } else {
96
                  read[var->data.location_frac + i] |=
97
                     get_variable_io_mask(var, shader->info.stage);
98
               }
99
            }
100
         }
101
      }
102
   }
103
}
104

105
/**
106
 * Helper for removing unused shader I/O variables, by demoting them to global
107
 * variables (which may then by dead code eliminated).
108
 *
109
 * Example usage is:
110
 *
111
 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
112
 *                                      read, patches_read) ||
113
 *                                      progress;
114
 *
115
 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
116
 * representing each .location_frac used.  Note that for vector variables,
117
 * only the first channel (.location_frac) is examined for deciding if the
118
 * variable is used!
119
 */
120
bool
121
nir_remove_unused_io_vars(nir_shader *shader,
122
                          nir_variable_mode mode,
123
                          uint64_t *used_by_other_stage,
124
                          uint64_t *used_by_other_stage_patches)
125
{
126
   bool progress = false;
127
   uint64_t *used;
128

129
   assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
130

131
   nir_foreach_variable_with_modes_safe(var, shader, mode) {
132
      if (var->data.patch)
133
         used = used_by_other_stage_patches;
134
      else
135
         used = used_by_other_stage;
136

137
      if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
138
         continue;
139

140
      if (var->data.always_active_io)
141
         continue;
142

143
      if (var->data.explicit_xfb_buffer)
144
         continue;
145

146
      uint64_t other_stage = used[var->data.location_frac];
147

148
      if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
149
         /* This one is invalid, make it a global variable instead */
150
         var->data.location = 0;
151
         var->data.mode = nir_var_shader_temp;
152

153
         progress = true;
154
      }
155
   }
156

157
   if (progress)
158
      nir_fixup_deref_modes(shader);
159

160
   return progress;
161
}
162

163
bool
164
nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
165
{
166
   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
167
   assert(consumer->info.stage != MESA_SHADER_VERTEX);
168

169
   uint64_t read[4] = { 0 }, written[4] = { 0 };
170
   uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
171

172
   nir_foreach_shader_out_variable(var, producer) {
173
      for (unsigned i = 0; i < get_num_components(var); i++) {
174
         if (var->data.patch) {
175
            patches_written[var->data.location_frac + i] |=
176
               get_variable_io_mask(var, producer->info.stage);
177
         } else {
178
            written[var->data.location_frac + i] |=
179
               get_variable_io_mask(var, producer->info.stage);
180
         }
181
      }
182
   }
183

184
   nir_foreach_shader_in_variable(var, consumer) {
185
      for (unsigned i = 0; i < get_num_components(var); i++) {
186
         if (var->data.patch) {
187
            patches_read[var->data.location_frac + i] |=
188
               get_variable_io_mask(var, consumer->info.stage);
189
         } else {
190
            read[var->data.location_frac + i] |=
191
               get_variable_io_mask(var, consumer->info.stage);
192
         }
193
      }
194
   }
195

196
   /* Each TCS invocation can read data written by other TCS invocations,
197
    * so even if the outputs are not used by the TES we must also make
198
    * sure they are not read by the TCS before demoting them to globals.
199
    */
200
   if (producer->info.stage == MESA_SHADER_TESS_CTRL)
201
      tcs_add_output_reads(producer, read, patches_read);
202

203
   bool progress = false;
204
   progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
205
                                        patches_read);
206

207
   progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
208
                                        patches_written) || progress;
209

210
   return progress;
211
}
212

213
static uint8_t
214
get_interp_type(nir_variable *var, const struct glsl_type *type,
215
                bool default_to_smooth_interp)
216
{
217
   if (glsl_type_is_integer(type))
218
      return INTERP_MODE_FLAT;
219
   else if (var->data.interpolation != INTERP_MODE_NONE)
220
      return var->data.interpolation;
221
   else if (default_to_smooth_interp)
222
      return INTERP_MODE_SMOOTH;
223
   else
224
      return INTERP_MODE_NONE;
225
}
226

227
#define INTERPOLATE_LOC_SAMPLE 0
228
#define INTERPOLATE_LOC_CENTROID 1
229
#define INTERPOLATE_LOC_CENTER 2
230

231
static uint8_t
232
get_interp_loc(nir_variable *var)
233
{
234
   if (var->data.sample)
235
      return INTERPOLATE_LOC_SAMPLE;
236
   else if (var->data.centroid)
237
      return INTERPOLATE_LOC_CENTROID;
238
   else
239
      return INTERPOLATE_LOC_CENTER;
240
}
241

242
static bool
243
is_packing_supported_for_type(const struct glsl_type *type)
244
{
245
   /* We ignore complex types such as arrays, matrices, structs and bitsizes
246
    * other then 32bit. All other vector types should have been split into
247
    * scalar variables by the lower_io_to_scalar pass. The only exception
248
    * should be OpenGL xfb varyings.
249
    * TODO: add support for more complex types?
250
    */
251
   return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
252
}
253

254
struct assigned_comps
255
{
256
   uint8_t comps;
257
   uint8_t interp_type;
258
   uint8_t interp_loc;
259
   bool is_32bit;
260
   bool is_mediump;
261
};
262

263
/* Packing arrays and dual slot varyings is difficult so to avoid complex
264
 * algorithms this function just assigns them their existing location for now.
265
 * TODO: allow better packing of complex types.
266
 */
267
static void
268
get_unmoveable_components_masks(nir_shader *shader,
269
                                nir_variable_mode mode,
270
                                struct assigned_comps *comps,
271
                                gl_shader_stage stage,
272
                                bool default_to_smooth_interp)
273
{
274
   nir_foreach_variable_with_modes_safe(var, shader, mode) {
275
      assert(var->data.location >= 0);
276

277
      /* Only remap things that aren't built-ins. */
278
      if (var->data.location >= VARYING_SLOT_VAR0 &&
279
          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
280

281
         const struct glsl_type *type = var->type;
282
         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
283
            assert(glsl_type_is_array(type));
284
            type = glsl_get_array_element(type);
285
         }
286

287
         /* If we can pack this varying then don't mark the components as
288
          * used.
289
          */
290
         if (is_packing_supported_for_type(type))
291
            continue;
292

293
         unsigned location = var->data.location - VARYING_SLOT_VAR0;
294

295
         unsigned elements =
296
            glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
297
            glsl_get_vector_elements(glsl_without_array(type)) : 4;
298

299
         bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
300
         unsigned slots = glsl_count_attribute_slots(type, false);
301
         unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
302
         unsigned comps_slot2 = 0;
303
         for (unsigned i = 0; i < slots; i++) {
304
            if (dual_slot) {
305
               if (i & 1) {
306
                  comps[location + i].comps |= ((1 << comps_slot2) - 1);
307
               } else {
308
                  unsigned num_comps = 4 - var->data.location_frac;
309
                  comps_slot2 = (elements * dmul) - num_comps;
310

311
                  /* Assume ARB_enhanced_layouts packing rules for doubles */
312
                  assert(var->data.location_frac == 0 ||
313
                         var->data.location_frac == 2);
314
                  assert(comps_slot2 <= 4);
315

316
                  comps[location + i].comps |=
317
                     ((1 << num_comps) - 1) << var->data.location_frac;
318
               }
319
            } else {
320
               comps[location + i].comps |=
321
                  ((1 << (elements * dmul)) - 1) << var->data.location_frac;
322
            }
323

324
            comps[location + i].interp_type =
325
               get_interp_type(var, type, default_to_smooth_interp);
326
            comps[location + i].interp_loc = get_interp_loc(var);
327
            comps[location + i].is_32bit =
328
               glsl_type_is_32bit(glsl_without_array(type));
329
            comps[location + i].is_mediump =
330
               var->data.precision == GLSL_PRECISION_MEDIUM ||
331
               var->data.precision == GLSL_PRECISION_LOW;
332
         }
333
      }
334
   }
335
}
336

337
struct varying_loc
338
{
339
   uint8_t component;
340
   uint32_t location;
341
};
342

343
static void
344
mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
345
                    uint64_t slots_used_mask, unsigned num_slots)
346
{
347
   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
348

349
   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
350
      BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
351
}
352

353
static void
354
mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
355
{
356
   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
357

358
   slots_used[var->data.patch ? 1 : 0] |=
359
      BITFIELD64_BIT(var->data.location - loc_offset + offset);
360
}
361

362
static void
363
remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
364
                           struct varying_loc (*remap)[4],
365
                           uint64_t *slots_used, uint64_t *out_slots_read,
366
                           uint32_t *p_slots_used, uint32_t *p_out_slots_read)
367
 {
368
   const gl_shader_stage stage = shader->info.stage;
369
   uint64_t out_slots_read_tmp[2] = {0};
370
   uint64_t slots_used_tmp[2] = {0};
371

372
   /* We don't touch builtins so just copy the bitmask */
373
   slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
374

375
   nir_foreach_variable_with_modes(var, shader, mode) {
376
      assert(var->data.location >= 0);
377

378
      /* Only remap things that aren't built-ins */
379
      if (var->data.location >= VARYING_SLOT_VAR0 &&
380
          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
381

382
         const struct glsl_type *type = var->type;
383
         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
384
            assert(glsl_type_is_array(type));
385
            type = glsl_get_array_element(type);
386
         }
387

388
         unsigned num_slots = glsl_count_attribute_slots(type, false);
389
         bool used_across_stages = false;
390
         bool outputs_read = false;
391

392
         unsigned location = var->data.location - VARYING_SLOT_VAR0;
393
         struct varying_loc *new_loc = &remap[location][var->data.location_frac];
394

395
         unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
396
         uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
397
         uint64_t outs_used =
398
            var->data.patch ? *p_out_slots_read : *out_slots_read;
399
         uint64_t slots =
400
            BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
401

402
         if (slots & used)
403
            used_across_stages = true;
404

405
         if (slots & outs_used)
406
            outputs_read = true;
407

408
         if (new_loc->location) {
409
            var->data.location = new_loc->location;
410
            var->data.location_frac = new_loc->component;
411
         }
412

413
         if (var->data.always_active_io) {
414
            /* We can't apply link time optimisations (specifically array
415
             * splitting) to these so we need to copy the existing mask
416
             * otherwise we will mess up the mask for things like partially
417
             * marked arrays.
418
             */
419
            if (used_across_stages)
420
               mark_all_used_slots(var, slots_used_tmp, used, num_slots);
421

422
            if (outputs_read) {
423
               mark_all_used_slots(var, out_slots_read_tmp, outs_used,
424
                                   num_slots);
425
            }
426
         } else {
427
            for (unsigned i = 0; i < num_slots; i++) {
428
               if (used_across_stages)
429
                  mark_used_slot(var, slots_used_tmp, i);
430

431
               if (outputs_read)
432
                  mark_used_slot(var, out_slots_read_tmp, i);
433
            }
434
         }
435
      }
436
   }
437

438
   *slots_used = slots_used_tmp[0];
439
   *out_slots_read = out_slots_read_tmp[0];
440
   *p_slots_used = slots_used_tmp[1];
441
   *p_out_slots_read = out_slots_read_tmp[1];
442
}
443

444
struct varying_component {
445
   nir_variable *var;
446
   uint8_t interp_type;
447
   uint8_t interp_loc;
448
   bool is_32bit;
449
   bool is_patch;
450
   bool is_mediump;
451
   bool is_intra_stage_only;
452
   bool initialised;
453
};
454

455
static int
456
cmp_varying_component(const void *comp1_v, const void *comp2_v)
457
{
458
   struct varying_component *comp1 = (struct varying_component *) comp1_v;
459
   struct varying_component *comp2 = (struct varying_component *) comp2_v;
460

461
   /* We want patches to be order at the end of the array */
462
   if (comp1->is_patch != comp2->is_patch)
463
      return comp1->is_patch ? 1 : -1;
464

465
   /* We want to try to group together TCS outputs that are only read by other
466
    * TCS invocations and not consumed by the follow stage.
467
    */
468
   if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
469
      return comp1->is_intra_stage_only ? 1 : -1;
470

471
   /* Group mediump varyings together. */
472
   if (comp1->is_mediump != comp2->is_mediump)
473
      return comp1->is_mediump ? 1 : -1;
474

475
   /* We can only pack varyings with matching interpolation types so group
476
    * them together.
477
    */
478
   if (comp1->interp_type != comp2->interp_type)
479
      return comp1->interp_type - comp2->interp_type;
480

481
   /* Interpolation loc must match also. */
482
   if (comp1->interp_loc != comp2->interp_loc)
483
      return comp1->interp_loc - comp2->interp_loc;
484

485
   /* If everything else matches just use the original location to sort */
486
   const struct nir_variable_data *const data1 = &comp1->var->data;
487
   const struct nir_variable_data *const data2 = &comp2->var->data;
488
   if (data1->location != data2->location)
489
      return data1->location - data2->location;
490
   return (int)data1->location_frac - (int)data2->location_frac;
491
}
492

493
static void
494
gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
495
                              struct varying_component **varying_comp_info,
496
                              unsigned *varying_comp_info_size,
497
                              bool default_to_smooth_interp)
498
{
499
   unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
500
   unsigned num_of_comps_to_pack = 0;
501

502
   /* Count the number of varying that can be packed and create a mapping
503
    * of those varyings to the array we will pass to qsort.
504
    */
505
   nir_foreach_shader_out_variable(var, producer) {
506

507
      /* Only remap things that aren't builtins. */
508
      if (var->data.location >= VARYING_SLOT_VAR0 &&
509
          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
510

511
         /* We can't repack xfb varyings. */
512
         if (var->data.always_active_io)
513
            continue;
514

515
         const struct glsl_type *type = var->type;
516
         if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
517
            assert(glsl_type_is_array(type));
518
            type = glsl_get_array_element(type);
519
         }
520

521
         if (!is_packing_supported_for_type(type))
522
            continue;
523

524
         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
525
         store_varying_info_idx[loc][var->data.location_frac] =
526
            ++num_of_comps_to_pack;
527
      }
528
   }
529

530
   *varying_comp_info_size = num_of_comps_to_pack;
531
   *varying_comp_info = rzalloc_array(NULL, struct varying_component,
532
                                      num_of_comps_to_pack);
533

534
   nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
535

536
   /* Walk over the shader and populate the varying component info array */
537
   nir_foreach_block(block, impl) {
538
      nir_foreach_instr(instr, block) {
539
         if (instr->type != nir_instr_type_intrinsic)
540
            continue;
541

542
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
543
         if (intr->intrinsic != nir_intrinsic_load_deref &&
544
             intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
545
             intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
546
             intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
547
             intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
548
            continue;
549

550
         nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
551
         if (!nir_deref_mode_is(deref, nir_var_shader_in))
552
            continue;
553

554
         /* We only remap things that aren't builtins. */
555
         nir_variable *in_var = nir_deref_instr_get_variable(deref);
556
         if (in_var->data.location < VARYING_SLOT_VAR0)
557
            continue;
558

559
         unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
560
         if (location >= MAX_VARYINGS_INCL_PATCH)
561
            continue;
562

563
         unsigned var_info_idx =
564
            store_varying_info_idx[location][in_var->data.location_frac];
565
         if (!var_info_idx)
566
            continue;
567

568
         struct varying_component *vc_info =
569
            &(*varying_comp_info)[var_info_idx-1];
570

571
         if (!vc_info->initialised) {
572
            const struct glsl_type *type = in_var->type;
573
            if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
574
                in_var->data.per_view) {
575
               assert(glsl_type_is_array(type));
576
               type = glsl_get_array_element(type);
577
            }
578

579
            vc_info->var = in_var;
580
            vc_info->interp_type =
581
               get_interp_type(in_var, type, default_to_smooth_interp);
582
            vc_info->interp_loc = get_interp_loc(in_var);
583
            vc_info->is_32bit = glsl_type_is_32bit(type);
584
            vc_info->is_patch = in_var->data.patch;
585
            vc_info->is_mediump = !producer->options->linker_ignore_precision &&
586
               (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
587
                in_var->data.precision == GLSL_PRECISION_LOW);
588
            vc_info->is_intra_stage_only = false;
589
            vc_info->initialised = true;
590
         }
591
      }
592
   }
593

594
   /* Walk over the shader and populate the varying component info array
595
    * for varyings which are read by other TCS instances but are not consumed
596
    * by the TES.
597
    */
598
   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
599
      impl = nir_shader_get_entrypoint(producer);
600

601
      nir_foreach_block(block, impl) {
602
         nir_foreach_instr(instr, block) {
603
            if (instr->type != nir_instr_type_intrinsic)
604
               continue;
605

606
            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
607
            if (intr->intrinsic != nir_intrinsic_load_deref)
608
               continue;
609

610
            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
611
            if (!nir_deref_mode_is(deref, nir_var_shader_out))
612
               continue;
613

614
            /* We only remap things that aren't builtins. */
615
            nir_variable *out_var = nir_deref_instr_get_variable(deref);
616
            if (out_var->data.location < VARYING_SLOT_VAR0)
617
               continue;
618

619
            unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
620
            if (location >= MAX_VARYINGS_INCL_PATCH)
621
               continue;
622

623
            unsigned var_info_idx =
624
               store_varying_info_idx[location][out_var->data.location_frac];
625
            if (!var_info_idx) {
626
               /* Something went wrong, the shader interfaces didn't match, so
627
                * abandon packing. This can happen for example when the
628
                * inputs are scalars but the outputs are struct members.
629
                */
630
               *varying_comp_info_size = 0;
631
               break;
632
            }
633

634
            struct varying_component *vc_info =
635
               &(*varying_comp_info)[var_info_idx-1];
636

637
            if (!vc_info->initialised) {
638
               const struct glsl_type *type = out_var->type;
639
               if (nir_is_arrayed_io(out_var, producer->info.stage)) {
640
                  assert(glsl_type_is_array(type));
641
                  type = glsl_get_array_element(type);
642
               }
643

644
               vc_info->var = out_var;
645
               vc_info->interp_type =
646
                  get_interp_type(out_var, type, default_to_smooth_interp);
647
               vc_info->interp_loc = get_interp_loc(out_var);
648
               vc_info->is_32bit = glsl_type_is_32bit(type);
649
               vc_info->is_patch = out_var->data.patch;
650
               vc_info->is_mediump = !producer->options->linker_ignore_precision &&
651
                  (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
652
                   out_var->data.precision == GLSL_PRECISION_LOW);
653
               vc_info->is_intra_stage_only = true;
654
               vc_info->initialised = true;
655
            }
656
         }
657
      }
658
   }
659

660
   for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
661
      struct varying_component *vc_info = &(*varying_comp_info)[i];
662
      if (!vc_info->initialised) {
663
         /* Something went wrong, the shader interfaces didn't match, so
664
          * abandon packing. This can happen for example when the outputs are
665
          * scalars but the inputs are struct members.
666
          */
667
         *varying_comp_info_size = 0;
668
         break;
669
      }
670
   }
671
}
672

673
static void
674
assign_remap_locations(struct varying_loc (*remap)[4],
675
                       struct assigned_comps *assigned_comps,
676
                       struct varying_component *info,
677
                       unsigned *cursor, unsigned *comp,
678
                       unsigned max_location)
679
{
680
   unsigned tmp_cursor = *cursor;
681
   unsigned tmp_comp = *comp;
682

683
   for (; tmp_cursor < max_location; tmp_cursor++) {
684

685
      if (assigned_comps[tmp_cursor].comps) {
686
         /* We can only pack varyings with matching interpolation types,
687
          * interpolation loc must match also.
688
          * TODO: i965 can handle interpolation locations that don't match,
689
          * but the radeonsi nir backend handles everything as vec4s and so
690
          * expects this to be the same for all components. We could make this
691
          * check driver specfific or drop it if NIR ever become the only
692
          * radeonsi backend.
693
          * TODO2: The radeonsi comment above is not true. Only "flat" is per
694
          * vec4 (128-bit granularity), all other interpolation qualifiers are
695
          * per component (16-bit granularity for float16, 32-bit granularity
696
          * otherwise). Each vec4 (128 bits) must be either vec4 or f16vec8.
697
          */
698
         if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
699
             assigned_comps[tmp_cursor].interp_loc != info->interp_loc ||
700
             assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
701
            tmp_comp = 0;
702
            continue;
703
         }
704

705
         /* We can only pack varyings with matching types, and the current
706
          * algorithm only supports packing 32-bit.
707
          */
708
         if (!assigned_comps[tmp_cursor].is_32bit) {
709
            tmp_comp = 0;
710
            continue;
711
         }
712

713
         while (tmp_comp < 4 &&
714
                (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
715
            tmp_comp++;
716
         }
717
      }
718

719
      if (tmp_comp == 4) {
720
         tmp_comp = 0;
721
         continue;
722
      }
723

724
      unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
725

726
      /* Once we have assigned a location mark it as used */
727
      assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
728
      assigned_comps[tmp_cursor].interp_type = info->interp_type;
729
      assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
730
      assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
731
      assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
732

733
      /* Assign remap location */
734
      remap[location][info->var->data.location_frac].component = tmp_comp++;
735
      remap[location][info->var->data.location_frac].location =
736
         tmp_cursor + VARYING_SLOT_VAR0;
737

738
      break;
739
   }
740

741
   *cursor = tmp_cursor;
742
   *comp = tmp_comp;
743
}
744

745
/* If there are empty components in the slot compact the remaining components
746
 * as close to component 0 as possible. This will make it easier to fill the
747
 * empty components with components from a different slot in a following pass.
748
 */
749
static void
750
compact_components(nir_shader *producer, nir_shader *consumer,
751
                   struct assigned_comps *assigned_comps,
752
                   bool default_to_smooth_interp)
753
{
754
   struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
755
   struct varying_component *varying_comp_info;
756
   unsigned varying_comp_info_size;
757

758
   /* Gather varying component info */
759
   gather_varying_component_info(producer, consumer, &varying_comp_info,
760
                                 &varying_comp_info_size,
761
                                 default_to_smooth_interp);
762

763
   /* Sort varying components. */
764
   qsort(varying_comp_info, varying_comp_info_size,
765
         sizeof(struct varying_component), cmp_varying_component);
766

767
   unsigned cursor = 0;
768
   unsigned comp = 0;
769

770
   /* Set the remap array based on the sorted components */
771
   for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
772
      struct varying_component *info = &varying_comp_info[i];
773

774
      assert(info->is_patch || cursor < MAX_VARYING);
775
      if (info->is_patch) {
776
         /* The list should be sorted with all non-patch inputs first followed
777
          * by patch inputs.  When we hit our first patch input, we need to
778
          * reset the cursor to MAX_VARYING so we put them in the right slot.
779
          */
780
         if (cursor < MAX_VARYING) {
781
            cursor = MAX_VARYING;
782
            comp = 0;
783
         }
784

785
         assign_remap_locations(remap, assigned_comps, info,
786
                                &cursor, &comp, MAX_VARYINGS_INCL_PATCH);
787
      } else {
788
         assign_remap_locations(remap, assigned_comps, info,
789
                                &cursor, &comp, MAX_VARYING);
790

791
         /* Check if we failed to assign a remap location. This can happen if
792
          * for example there are a bunch of unmovable components with
793
          * mismatching interpolation types causing us to skip over locations
794
          * that would have been useful for packing later components.
795
          * The solution is to iterate over the locations again (this should
796
          * happen very rarely in practice).
797
          */
798
         if (cursor == MAX_VARYING) {
799
            cursor = 0;
800
            comp = 0;
801
            assign_remap_locations(remap, assigned_comps, info,
802
                                   &cursor, &comp, MAX_VARYING);
803
         }
804
      }
805
   }
806

807
   ralloc_free(varying_comp_info);
808

809
   uint64_t zero = 0;
810
   uint32_t zero32 = 0;
811
   remap_slots_and_components(consumer, nir_var_shader_in, remap,
812
                              &consumer->info.inputs_read, &zero,
813
                              &consumer->info.patch_inputs_read, &zero32);
814
   remap_slots_and_components(producer, nir_var_shader_out, remap,
815
                              &producer->info.outputs_written,
816
                              &producer->info.outputs_read,
817
                              &producer->info.patch_outputs_written,
818
                              &producer->info.patch_outputs_read);
819
}
820

821
/* We assume that this has been called more-or-less directly after
822
 * remove_unused_varyings.  At this point, all of the varyings that we
823
 * aren't going to be using have been completely removed and the
824
 * inputs_read and outputs_written fields in nir_shader_info reflect
825
 * this.  Therefore, the total set of valid slots is the OR of the two
826
 * sets of varyings;  this accounts for varyings which one side may need
827
 * to read/write even if the other doesn't.  This can happen if, for
828
 * instance, an array is used indirectly from one side causing it to be
829
 * unsplittable but directly from the other.
830
 */
831
void
832
nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
833
                     bool default_to_smooth_interp)
834
{
835
   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
836
   assert(consumer->info.stage != MESA_SHADER_VERTEX);
837

838
   struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
839

840
   get_unmoveable_components_masks(producer, nir_var_shader_out,
841
                                   assigned_comps,
842
                                   producer->info.stage,
843
                                   default_to_smooth_interp);
844
   get_unmoveable_components_masks(consumer, nir_var_shader_in,
845
                                   assigned_comps,
846
                                   consumer->info.stage,
847
                                   default_to_smooth_interp);
848

849
   compact_components(producer, consumer, assigned_comps,
850
                      default_to_smooth_interp);
851
}
852

853
/*
854
 * Mark XFB varyings as always_active_io in the consumer so the linking opts
855
 * don't touch them.
856
 */
857
void
858
nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
859
{
860
   nir_variable *input_vars[MAX_VARYING] = { 0 };
861

862
   nir_foreach_shader_in_variable(var, consumer) {
863
      if (var->data.location >= VARYING_SLOT_VAR0 &&
864
          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
865

866
         unsigned location = var->data.location - VARYING_SLOT_VAR0;
867
         input_vars[location] = var;
868
      }
869
   }
870

871
   nir_foreach_shader_out_variable(var, producer) {
872
      if (var->data.location >= VARYING_SLOT_VAR0 &&
873
          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
874

875
         if (!var->data.always_active_io)
876
            continue;
877

878
         unsigned location = var->data.location - VARYING_SLOT_VAR0;
879
         if (input_vars[location]) {
880
            input_vars[location]->data.always_active_io = true;
881
         }
882
      }
883
   }
884
}
885

886
static bool
887
does_varying_match(nir_variable *out_var, nir_variable *in_var)
888
{
889
   return in_var->data.location == out_var->data.location &&
890
          in_var->data.location_frac == out_var->data.location_frac;
891
}
892

893
static nir_variable *
894
get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
895
{
896
   nir_foreach_shader_in_variable(var, consumer) {
897
      if (does_varying_match(out_var, var))
898
         return var;
899
   }
900

901
   return NULL;
902
}
903

904
static bool
905
can_replace_varying(nir_variable *out_var)
906
{
907
   /* Skip types that require more complex handling.
908
    * TODO: add support for these types.
909
    */
910
   if (glsl_type_is_array(out_var->type) ||
911
       glsl_type_is_dual_slot(out_var->type) ||
912
       glsl_type_is_matrix(out_var->type) ||
913
       glsl_type_is_struct_or_ifc(out_var->type))
914
      return false;
915

916
   /* Limit this pass to scalars for now to keep things simple. Most varyings
917
    * should have been lowered to scalars at this point anyway.
918
    */
919
   if (!glsl_type_is_scalar(out_var->type))
920
      return false;
921

922
   if (out_var->data.location < VARYING_SLOT_VAR0 ||
923
       out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
924
      return false;
925

926
   return true;
927
}
928

929
static bool
930
replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
931
{
932
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
933

934
   nir_builder b;
935
   nir_builder_init(&b, impl);
936

937
   nir_variable *out_var =
938
      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
939

940
   bool progress = false;
941
   nir_foreach_block(block, impl) {
942
      nir_foreach_instr(instr, block) {
943
         if (instr->type != nir_instr_type_intrinsic)
944
            continue;
945

946
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
947
         if (intr->intrinsic != nir_intrinsic_load_deref)
948
            continue;
949

950
         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
951
         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
952
            continue;
953

954
         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
955

956
         if (!does_varying_match(out_var, in_var))
957
            continue;
958

959
         b.cursor = nir_before_instr(instr);
960

961
         nir_load_const_instr *out_const =
962
            nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
963

964
         /* Add new const to replace the input */
965
         nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
966
                                             intr->dest.ssa.bit_size,
967
                                             out_const->value);
968

969
         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
970

971
         progress = true;
972
      }
973
   }
974

975
   return progress;
976
}
977

978
static bool
979
replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
980
                         nir_intrinsic_instr *dup_store_intr)
981
{
982
   assert(input_var);
983

984
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
985

986
   nir_builder b;
987
   nir_builder_init(&b, impl);
988

989
   nir_variable *dup_out_var =
990
      nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
991

992
   bool progress = false;
993
   nir_foreach_block(block, impl) {
994
      nir_foreach_instr(instr, block) {
995
         if (instr->type != nir_instr_type_intrinsic)
996
            continue;
997

998
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
999
         if (intr->intrinsic != nir_intrinsic_load_deref)
1000
            continue;
1001

1002
         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1003
         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1004
            continue;
1005

1006
         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1007

1008
         if (!does_varying_match(dup_out_var, in_var) ||
1009
             in_var->data.interpolation != input_var->data.interpolation ||
1010
             get_interp_loc(in_var) != get_interp_loc(input_var))
1011
            continue;
1012

1013
         b.cursor = nir_before_instr(instr);
1014

1015
         nir_ssa_def *load = nir_load_var(&b, input_var);
1016
         nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1017

1018
         progress = true;
1019
      }
1020
   }
1021

1022
   return progress;
1023
}
1024

1025
/* The GLSL ES 3.20 spec says:
1026
 *
1027
 * "The precision of a vertex output does not need to match the precision of
1028
 * the corresponding fragment input. The minimum precision at which vertex
1029
 * outputs are interpolated is the minimum of the vertex output precision and
1030
 * the fragment input precision, with the exception that for highp,
1031
 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1032
 * Output Matching by Name in Linked Programs")
1033
 *
1034
 * To implement this, when linking shaders we will take the minimum precision
1035
 * qualifier (allowing drivers to interpolate at lower precision). For
1036
 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1037
 * requires we use the *last* specified precision if there is a conflict.
1038
 *
1039
 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1040
 * NONE, we'll return the other precision, since there is no conflict.
1041
 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1042
 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1043
 * "backwards". For non-fragment stages, we'll pick the latter precision to
1044
 * comply with the spec. (Note that the order matters.)
1045
 *
1046
 * For streamout, "Variables declared with lowp or mediump precision are
1047
 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1048
 * of OpenGL ES 3.2 specification). So drivers should promote them
1049
 * the transform feedback memory store, but not the output store.
1050
 */
1051

1052
static unsigned
1053
nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1054
{
1055
   if (producer == GLSL_PRECISION_NONE)
1056
      return consumer;
1057
   else if (consumer == GLSL_PRECISION_NONE)
1058
      return producer;
1059
   else
1060
      return fs ? MAX2(producer, consumer) : consumer;
1061
}
1062

1063
void
1064
nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1065
{
1066
   bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1067

1068
   nir_foreach_shader_out_variable(producer_var, producer) {
1069
      /* Skip if the slot is not assigned */
1070
      if (producer_var->data.location < 0)
1071
         continue;
1072

1073
      nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1074
            nir_var_shader_in, producer_var->data.location);
1075

1076
      /* Skip if the variable will be eliminated */
1077
      if (!consumer_var)
1078
         continue;
1079

1080
      /* Now we have a pair of variables. Let's pick the smaller precision. */
1081
      unsigned precision_1 = producer_var->data.precision;
1082
      unsigned precision_2 = consumer_var->data.precision;
1083
      unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1084

1085
      /* Propagate the new precision */
1086
      producer_var->data.precision = consumer_var->data.precision = minimum;
1087
   }
1088
}
1089

1090
bool
1091
nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1092
{
1093
   /* TODO: Add support for more shader stage combinations */
1094
   if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1095
       (producer->info.stage != MESA_SHADER_VERTEX &&
1096
        producer->info.stage != MESA_SHADER_TESS_EVAL))
1097
      return false;
1098

1099
   bool progress = false;
1100

1101
   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1102

1103
   struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1104

1105
   /* If we find a store in the last block of the producer we can be sure this
1106
    * is the only possible value for this output.
1107
    */
1108
   nir_block *last_block = nir_impl_last_block(impl);
1109
   nir_foreach_instr_reverse(instr, last_block) {
1110
      if (instr->type != nir_instr_type_intrinsic)
1111
         continue;
1112

1113
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1114

1115
      if (intr->intrinsic != nir_intrinsic_store_deref)
1116
         continue;
1117

1118
      nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1119
      if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1120
         continue;
1121

1122
      nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1123
      if (!can_replace_varying(out_var))
1124
         continue;
1125

1126
      if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1127
         progress |= replace_constant_input(consumer, intr);
1128
      } else {
1129
         struct hash_entry *entry =
1130
               _mesa_hash_table_search(varying_values, intr->src[1].ssa);
1131
         if (entry) {
1132
            progress |= replace_duplicate_input(consumer,
1133
                                                (nir_variable *) entry->data,
1134
                                                intr);
1135
         } else {
1136
            nir_variable *in_var = get_matching_input_var(consumer, out_var);
1137
            if (in_var) {
1138
               _mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1139
                                       in_var);
1140
            }
1141
         }
1142
      }
1143
   }
1144

1145
   _mesa_hash_table_destroy(varying_values, NULL);
1146

1147
   return progress;
1148
}
1149

1150
/* TODO any better helper somewhere to sort a list? */
1151

1152
static void
1153
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1154
{
1155
   nir_foreach_variable_in_list(var, var_list) {
1156
      if (var->data.location > new_var->data.location) {
1157
         exec_node_insert_node_before(&var->node, &new_var->node);
1158
         return;
1159
      }
1160
   }
1161
   exec_list_push_tail(var_list, &new_var->node);
1162
}
1163

1164
static void
1165
sort_varyings(nir_shader *shader, nir_variable_mode mode,
1166
              struct exec_list *sorted_list)
1167
{
1168
   exec_list_make_empty(sorted_list);
1169
   nir_foreach_variable_with_modes_safe(var, shader, mode) {
1170
      exec_node_remove(&var->node);
1171
      insert_sorted(sorted_list, var);
1172
   }
1173
}
1174

1175
void
1176
nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1177
                            unsigned *size, gl_shader_stage stage)
1178
{
1179
   unsigned location = 0;
1180
   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1181
   uint64_t processed_locs[2] = {0};
1182

1183
   struct exec_list io_vars;
1184
   sort_varyings(shader, mode, &io_vars);
1185

1186
   int UNUSED last_loc = 0;
1187
   bool last_partial = false;
1188
   nir_foreach_variable_in_list(var, &io_vars) {
1189
      const struct glsl_type *type = var->type;
1190
      if (nir_is_arrayed_io(var, stage)) {
1191
         assert(glsl_type_is_array(type));
1192
         type = glsl_get_array_element(type);
1193
      }
1194

1195
      int base;
1196
      if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1197
         base = VERT_ATTRIB_GENERIC0;
1198
      else if (var->data.mode == nir_var_shader_out &&
1199
               stage == MESA_SHADER_FRAGMENT)
1200
         base = FRAG_RESULT_DATA0;
1201
      else
1202
         base = VARYING_SLOT_VAR0;
1203

1204
      unsigned var_size, driver_size;
1205
      if (var->data.compact) {
1206
         /* If we are inside a partial compact,
1207
          * don't allow another compact to be in this slot
1208
          * if it starts at component 0.
1209
          */
1210
         if (last_partial && var->data.location_frac == 0) {
1211
            location++;
1212
         }
1213

1214
         /* compact variables must be arrays of scalars */
1215
         assert(!var->data.per_view);
1216
         assert(glsl_type_is_array(type));
1217
         assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1218
         unsigned start = 4 * location + var->data.location_frac;
1219
         unsigned end = start + glsl_get_length(type);
1220
         var_size = driver_size = end / 4 - location;
1221
         last_partial = end % 4 != 0;
1222
      } else {
1223
         /* Compact variables bypass the normal varying compacting pass,
1224
          * which means they cannot be in the same vec4 slot as a normal
1225
          * variable. If part of the current slot is taken up by a compact
1226
          * variable, we need to go to the next one.
1227
          */
1228
         if (last_partial) {
1229
            location++;
1230
            last_partial = false;
1231
         }
1232

1233
         /* per-view variables have an extra array dimension, which is ignored
1234
          * when counting user-facing slots (var->data.location), but *not*
1235
          * with driver slots (var->data.driver_location). That is, each user
1236
          * slot maps to multiple driver slots.
1237
          */
1238
         driver_size = glsl_count_attribute_slots(type, false);
1239
         if (var->data.per_view) {
1240
            assert(glsl_type_is_array(type));
1241
            var_size =
1242
               glsl_count_attribute_slots(glsl_get_array_element(type), false);
1243
         } else {
1244
            var_size = driver_size;
1245
         }
1246
      }
1247

1248
      /* Builtins don't allow component packing so we only need to worry about
1249
       * user defined varyings sharing the same location.
1250
       */
1251
      bool processed = false;
1252
      if (var->data.location >= base) {
1253
         unsigned glsl_location = var->data.location - base;
1254

1255
         for (unsigned i = 0; i < var_size; i++) {
1256
            if (processed_locs[var->data.index] &
1257
                ((uint64_t)1 << (glsl_location + i)))
1258
               processed = true;
1259
            else
1260
               processed_locs[var->data.index] |=
1261
                  ((uint64_t)1 << (glsl_location + i));
1262
         }
1263
      }
1264

1265
      /* Because component packing allows varyings to share the same location
1266
       * we may have already have processed this location.
1267
       */
1268
      if (processed) {
1269
         /* TODO handle overlapping per-view variables */
1270
         assert(!var->data.per_view);
1271
         unsigned driver_location = assigned_locations[var->data.location];
1272
         var->data.driver_location = driver_location;
1273

1274
         /* An array may be packed such that is crosses multiple other arrays
1275
          * or variables, we need to make sure we have allocated the elements
1276
          * consecutively if the previously proccessed var was shorter than
1277
          * the current array we are processing.
1278
          *
1279
          * NOTE: The code below assumes the var list is ordered in ascending
1280
          * location order.
1281
          */
1282
         assert(last_loc <= var->data.location);
1283
         last_loc = var->data.location;
1284
         unsigned last_slot_location = driver_location + var_size;
1285
         if (last_slot_location > location) {
1286
            unsigned num_unallocated_slots = last_slot_location - location;
1287
            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1288
            for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1289
               assigned_locations[var->data.location + i] = location;
1290
               location++;
1291
            }
1292
         }
1293
         continue;
1294
      }
1295

1296
      for (unsigned i = 0; i < var_size; i++) {
1297
         assigned_locations[var->data.location + i] = location + i;
1298
      }
1299

1300
      var->data.driver_location = location;
1301
      location += driver_size;
1302
   }
1303

1304
   if (last_partial)
1305
      location++;
1306

1307
   exec_list_append(&shader->variables, &io_vars);
1308
   *size = location;
1309
}
1310

1311
static uint64_t
1312
get_linked_variable_location(unsigned location, bool patch)
1313
{
1314
   if (!patch)
1315
      return location;
1316

1317
   /* Reserve locations 0...3 for special patch variables
1318
    * like tess factors and bounding boxes, and the generic patch
1319
    * variables will come after them.
1320
    */
1321
   if (location >= VARYING_SLOT_PATCH0)
1322
      return location - VARYING_SLOT_PATCH0 + 4;
1323
   else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1324
            location <= VARYING_SLOT_BOUNDING_BOX1)
1325
      return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1326
   else
1327
      unreachable("Unsupported variable in get_linked_variable_location.");
1328
}
1329

1330
static uint64_t
1331
get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1332
{
1333
   const struct glsl_type *type = variable->type;
1334

1335
   if (nir_is_arrayed_io(variable, stage)) {
1336
      assert(glsl_type_is_array(type));
1337
      type = glsl_get_array_element(type);
1338
   }
1339

1340
   unsigned slots = glsl_count_attribute_slots(type, false);
1341
   if (variable->data.compact) {
1342
      unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1343
      slots = DIV_ROUND_UP(component_count, 4);
1344
   }
1345

1346
   uint64_t mask = u_bit_consecutive64(0, slots);
1347
   return mask;
1348
}
1349

1350
nir_linked_io_var_info
1351
nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1352
{
1353
   assert(producer);
1354
   assert(consumer);
1355

1356
   uint64_t producer_output_mask = 0;
1357
   uint64_t producer_patch_output_mask = 0;
1358

1359
   nir_foreach_shader_out_variable(variable, producer) {
1360
      uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1361
      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1362

1363
      if (variable->data.patch)
1364
         producer_patch_output_mask |= mask << loc;
1365
      else
1366
         producer_output_mask |= mask << loc;
1367
   }
1368

1369
   uint64_t consumer_input_mask = 0;
1370
   uint64_t consumer_patch_input_mask = 0;
1371

1372
   nir_foreach_shader_in_variable(variable, consumer) {
1373
      uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1374
      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1375

1376
      if (variable->data.patch)
1377
         consumer_patch_input_mask |= mask << loc;
1378
      else
1379
         consumer_input_mask |= mask << loc;
1380
   }
1381

1382
   uint64_t io_mask = producer_output_mask | consumer_input_mask;
1383
   uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1384

1385
   nir_foreach_shader_out_variable(variable, producer) {
1386
      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1387

1388
      if (variable->data.patch)
1389
         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1390
      else
1391
         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1392
   }
1393

1394
   nir_foreach_shader_in_variable(variable, consumer) {
1395
      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1396

1397
      if (variable->data.patch)
1398
         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1399
      else
1400
         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1401
   }
1402

1403
   nir_linked_io_var_info result = {
1404
      .num_linked_io_vars = util_bitcount64(io_mask),
1405
      .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1406
   };
1407

1408
   return result;
1409
}
1410

1411
Product

Resources

Company