Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_linking_helpers.c
4545 views
1
/*
2
* Copyright © 2015 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "nir.h"
25
#include "nir_builder.h"
26
#include "util/set.h"
27
#include "util/hash_table.h"
28
29
/* This file contains various little helpers for doing simple linking in
30
* NIR. Eventually, we'll probably want a full-blown varying packing
31
* implementation in here. Right now, it just deletes unused things.
32
*/
33
34
/**
35
* Returns the bits in the inputs_read, or outputs_written
36
* bitfield corresponding to this variable.
37
*/
38
static uint64_t
39
get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40
{
41
if (var->data.location < 0)
42
return 0;
43
44
unsigned location = var->data.patch ?
45
var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47
assert(var->data.mode == nir_var_shader_in ||
48
var->data.mode == nir_var_shader_out);
49
assert(var->data.location >= 0);
50
51
const struct glsl_type *type = var->type;
52
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53
assert(glsl_type_is_array(type));
54
type = glsl_get_array_element(type);
55
}
56
57
unsigned slots = glsl_count_attribute_slots(type, false);
58
return ((1ull << slots) - 1) << location;
59
}
60
61
static uint8_t
62
get_num_components(nir_variable *var)
63
{
64
if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
65
return 4;
66
67
return glsl_get_vector_elements(glsl_without_array(var->type));
68
}
69
70
static void
71
tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
72
{
73
nir_foreach_function(function, shader) {
74
if (!function->impl)
75
continue;
76
77
nir_foreach_block(block, function->impl) {
78
nir_foreach_instr(instr, block) {
79
if (instr->type != nir_instr_type_intrinsic)
80
continue;
81
82
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
83
if (intrin->intrinsic != nir_intrinsic_load_deref)
84
continue;
85
86
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
87
if (!nir_deref_mode_is(deref, nir_var_shader_out))
88
continue;
89
90
nir_variable *var = nir_deref_instr_get_variable(deref);
91
for (unsigned i = 0; i < get_num_components(var); i++) {
92
if (var->data.patch) {
93
patches_read[var->data.location_frac + i] |=
94
get_variable_io_mask(var, shader->info.stage);
95
} else {
96
read[var->data.location_frac + i] |=
97
get_variable_io_mask(var, shader->info.stage);
98
}
99
}
100
}
101
}
102
}
103
}
104
105
/**
106
* Helper for removing unused shader I/O variables, by demoting them to global
107
* variables (which may then by dead code eliminated).
108
*
109
* Example usage is:
110
*
111
* progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
112
* read, patches_read) ||
113
* progress;
114
*
115
* The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
116
* representing each .location_frac used. Note that for vector variables,
117
* only the first channel (.location_frac) is examined for deciding if the
118
* variable is used!
119
*/
120
bool
121
nir_remove_unused_io_vars(nir_shader *shader,
122
nir_variable_mode mode,
123
uint64_t *used_by_other_stage,
124
uint64_t *used_by_other_stage_patches)
125
{
126
bool progress = false;
127
uint64_t *used;
128
129
assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
130
131
nir_foreach_variable_with_modes_safe(var, shader, mode) {
132
if (var->data.patch)
133
used = used_by_other_stage_patches;
134
else
135
used = used_by_other_stage;
136
137
if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
138
continue;
139
140
if (var->data.always_active_io)
141
continue;
142
143
if (var->data.explicit_xfb_buffer)
144
continue;
145
146
uint64_t other_stage = used[var->data.location_frac];
147
148
if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
149
/* This one is invalid, make it a global variable instead */
150
var->data.location = 0;
151
var->data.mode = nir_var_shader_temp;
152
153
progress = true;
154
}
155
}
156
157
if (progress)
158
nir_fixup_deref_modes(shader);
159
160
return progress;
161
}
162
163
bool
164
nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
165
{
166
assert(producer->info.stage != MESA_SHADER_FRAGMENT);
167
assert(consumer->info.stage != MESA_SHADER_VERTEX);
168
169
uint64_t read[4] = { 0 }, written[4] = { 0 };
170
uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
171
172
nir_foreach_shader_out_variable(var, producer) {
173
for (unsigned i = 0; i < get_num_components(var); i++) {
174
if (var->data.patch) {
175
patches_written[var->data.location_frac + i] |=
176
get_variable_io_mask(var, producer->info.stage);
177
} else {
178
written[var->data.location_frac + i] |=
179
get_variable_io_mask(var, producer->info.stage);
180
}
181
}
182
}
183
184
nir_foreach_shader_in_variable(var, consumer) {
185
for (unsigned i = 0; i < get_num_components(var); i++) {
186
if (var->data.patch) {
187
patches_read[var->data.location_frac + i] |=
188
get_variable_io_mask(var, consumer->info.stage);
189
} else {
190
read[var->data.location_frac + i] |=
191
get_variable_io_mask(var, consumer->info.stage);
192
}
193
}
194
}
195
196
/* Each TCS invocation can read data written by other TCS invocations,
197
* so even if the outputs are not used by the TES we must also make
198
* sure they are not read by the TCS before demoting them to globals.
199
*/
200
if (producer->info.stage == MESA_SHADER_TESS_CTRL)
201
tcs_add_output_reads(producer, read, patches_read);
202
203
bool progress = false;
204
progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
205
patches_read);
206
207
progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
208
patches_written) || progress;
209
210
return progress;
211
}
212
213
static uint8_t
214
get_interp_type(nir_variable *var, const struct glsl_type *type,
215
bool default_to_smooth_interp)
216
{
217
if (glsl_type_is_integer(type))
218
return INTERP_MODE_FLAT;
219
else if (var->data.interpolation != INTERP_MODE_NONE)
220
return var->data.interpolation;
221
else if (default_to_smooth_interp)
222
return INTERP_MODE_SMOOTH;
223
else
224
return INTERP_MODE_NONE;
225
}
226
227
#define INTERPOLATE_LOC_SAMPLE 0
228
#define INTERPOLATE_LOC_CENTROID 1
229
#define INTERPOLATE_LOC_CENTER 2
230
231
static uint8_t
232
get_interp_loc(nir_variable *var)
233
{
234
if (var->data.sample)
235
return INTERPOLATE_LOC_SAMPLE;
236
else if (var->data.centroid)
237
return INTERPOLATE_LOC_CENTROID;
238
else
239
return INTERPOLATE_LOC_CENTER;
240
}
241
242
static bool
243
is_packing_supported_for_type(const struct glsl_type *type)
244
{
245
/* We ignore complex types such as arrays, matrices, structs and bitsizes
246
* other then 32bit. All other vector types should have been split into
247
* scalar variables by the lower_io_to_scalar pass. The only exception
248
* should be OpenGL xfb varyings.
249
* TODO: add support for more complex types?
250
*/
251
return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
252
}
253
254
struct assigned_comps
255
{
256
uint8_t comps;
257
uint8_t interp_type;
258
uint8_t interp_loc;
259
bool is_32bit;
260
bool is_mediump;
261
};
262
263
/* Packing arrays and dual slot varyings is difficult so to avoid complex
264
* algorithms this function just assigns them their existing location for now.
265
* TODO: allow better packing of complex types.
266
*/
267
static void
268
get_unmoveable_components_masks(nir_shader *shader,
269
nir_variable_mode mode,
270
struct assigned_comps *comps,
271
gl_shader_stage stage,
272
bool default_to_smooth_interp)
273
{
274
nir_foreach_variable_with_modes_safe(var, shader, mode) {
275
assert(var->data.location >= 0);
276
277
/* Only remap things that aren't built-ins. */
278
if (var->data.location >= VARYING_SLOT_VAR0 &&
279
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
280
281
const struct glsl_type *type = var->type;
282
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
283
assert(glsl_type_is_array(type));
284
type = glsl_get_array_element(type);
285
}
286
287
/* If we can pack this varying then don't mark the components as
288
* used.
289
*/
290
if (is_packing_supported_for_type(type))
291
continue;
292
293
unsigned location = var->data.location - VARYING_SLOT_VAR0;
294
295
unsigned elements =
296
glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
297
glsl_get_vector_elements(glsl_without_array(type)) : 4;
298
299
bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
300
unsigned slots = glsl_count_attribute_slots(type, false);
301
unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
302
unsigned comps_slot2 = 0;
303
for (unsigned i = 0; i < slots; i++) {
304
if (dual_slot) {
305
if (i & 1) {
306
comps[location + i].comps |= ((1 << comps_slot2) - 1);
307
} else {
308
unsigned num_comps = 4 - var->data.location_frac;
309
comps_slot2 = (elements * dmul) - num_comps;
310
311
/* Assume ARB_enhanced_layouts packing rules for doubles */
312
assert(var->data.location_frac == 0 ||
313
var->data.location_frac == 2);
314
assert(comps_slot2 <= 4);
315
316
comps[location + i].comps |=
317
((1 << num_comps) - 1) << var->data.location_frac;
318
}
319
} else {
320
comps[location + i].comps |=
321
((1 << (elements * dmul)) - 1) << var->data.location_frac;
322
}
323
324
comps[location + i].interp_type =
325
get_interp_type(var, type, default_to_smooth_interp);
326
comps[location + i].interp_loc = get_interp_loc(var);
327
comps[location + i].is_32bit =
328
glsl_type_is_32bit(glsl_without_array(type));
329
comps[location + i].is_mediump =
330
var->data.precision == GLSL_PRECISION_MEDIUM ||
331
var->data.precision == GLSL_PRECISION_LOW;
332
}
333
}
334
}
335
}
336
337
struct varying_loc
338
{
339
uint8_t component;
340
uint32_t location;
341
};
342
343
static void
344
mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
345
uint64_t slots_used_mask, unsigned num_slots)
346
{
347
unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
348
349
slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
350
BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
351
}
352
353
static void
354
mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
355
{
356
unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
357
358
slots_used[var->data.patch ? 1 : 0] |=
359
BITFIELD64_BIT(var->data.location - loc_offset + offset);
360
}
361
362
static void
363
remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
364
struct varying_loc (*remap)[4],
365
uint64_t *slots_used, uint64_t *out_slots_read,
366
uint32_t *p_slots_used, uint32_t *p_out_slots_read)
367
{
368
const gl_shader_stage stage = shader->info.stage;
369
uint64_t out_slots_read_tmp[2] = {0};
370
uint64_t slots_used_tmp[2] = {0};
371
372
/* We don't touch builtins so just copy the bitmask */
373
slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
374
375
nir_foreach_variable_with_modes(var, shader, mode) {
376
assert(var->data.location >= 0);
377
378
/* Only remap things that aren't built-ins */
379
if (var->data.location >= VARYING_SLOT_VAR0 &&
380
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
381
382
const struct glsl_type *type = var->type;
383
if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
384
assert(glsl_type_is_array(type));
385
type = glsl_get_array_element(type);
386
}
387
388
unsigned num_slots = glsl_count_attribute_slots(type, false);
389
bool used_across_stages = false;
390
bool outputs_read = false;
391
392
unsigned location = var->data.location - VARYING_SLOT_VAR0;
393
struct varying_loc *new_loc = &remap[location][var->data.location_frac];
394
395
unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
396
uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
397
uint64_t outs_used =
398
var->data.patch ? *p_out_slots_read : *out_slots_read;
399
uint64_t slots =
400
BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
401
402
if (slots & used)
403
used_across_stages = true;
404
405
if (slots & outs_used)
406
outputs_read = true;
407
408
if (new_loc->location) {
409
var->data.location = new_loc->location;
410
var->data.location_frac = new_loc->component;
411
}
412
413
if (var->data.always_active_io) {
414
/* We can't apply link time optimisations (specifically array
415
* splitting) to these so we need to copy the existing mask
416
* otherwise we will mess up the mask for things like partially
417
* marked arrays.
418
*/
419
if (used_across_stages)
420
mark_all_used_slots(var, slots_used_tmp, used, num_slots);
421
422
if (outputs_read) {
423
mark_all_used_slots(var, out_slots_read_tmp, outs_used,
424
num_slots);
425
}
426
} else {
427
for (unsigned i = 0; i < num_slots; i++) {
428
if (used_across_stages)
429
mark_used_slot(var, slots_used_tmp, i);
430
431
if (outputs_read)
432
mark_used_slot(var, out_slots_read_tmp, i);
433
}
434
}
435
}
436
}
437
438
*slots_used = slots_used_tmp[0];
439
*out_slots_read = out_slots_read_tmp[0];
440
*p_slots_used = slots_used_tmp[1];
441
*p_out_slots_read = out_slots_read_tmp[1];
442
}
443
444
struct varying_component {
445
nir_variable *var;
446
uint8_t interp_type;
447
uint8_t interp_loc;
448
bool is_32bit;
449
bool is_patch;
450
bool is_mediump;
451
bool is_intra_stage_only;
452
bool initialised;
453
};
454
455
static int
456
cmp_varying_component(const void *comp1_v, const void *comp2_v)
457
{
458
struct varying_component *comp1 = (struct varying_component *) comp1_v;
459
struct varying_component *comp2 = (struct varying_component *) comp2_v;
460
461
/* We want patches to be order at the end of the array */
462
if (comp1->is_patch != comp2->is_patch)
463
return comp1->is_patch ? 1 : -1;
464
465
/* We want to try to group together TCS outputs that are only read by other
466
* TCS invocations and not consumed by the follow stage.
467
*/
468
if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
469
return comp1->is_intra_stage_only ? 1 : -1;
470
471
/* Group mediump varyings together. */
472
if (comp1->is_mediump != comp2->is_mediump)
473
return comp1->is_mediump ? 1 : -1;
474
475
/* We can only pack varyings with matching interpolation types so group
476
* them together.
477
*/
478
if (comp1->interp_type != comp2->interp_type)
479
return comp1->interp_type - comp2->interp_type;
480
481
/* Interpolation loc must match also. */
482
if (comp1->interp_loc != comp2->interp_loc)
483
return comp1->interp_loc - comp2->interp_loc;
484
485
/* If everything else matches just use the original location to sort */
486
const struct nir_variable_data *const data1 = &comp1->var->data;
487
const struct nir_variable_data *const data2 = &comp2->var->data;
488
if (data1->location != data2->location)
489
return data1->location - data2->location;
490
return (int)data1->location_frac - (int)data2->location_frac;
491
}
492
493
static void
494
gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
495
struct varying_component **varying_comp_info,
496
unsigned *varying_comp_info_size,
497
bool default_to_smooth_interp)
498
{
499
unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
500
unsigned num_of_comps_to_pack = 0;
501
502
/* Count the number of varying that can be packed and create a mapping
503
* of those varyings to the array we will pass to qsort.
504
*/
505
nir_foreach_shader_out_variable(var, producer) {
506
507
/* Only remap things that aren't builtins. */
508
if (var->data.location >= VARYING_SLOT_VAR0 &&
509
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
510
511
/* We can't repack xfb varyings. */
512
if (var->data.always_active_io)
513
continue;
514
515
const struct glsl_type *type = var->type;
516
if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
517
assert(glsl_type_is_array(type));
518
type = glsl_get_array_element(type);
519
}
520
521
if (!is_packing_supported_for_type(type))
522
continue;
523
524
unsigned loc = var->data.location - VARYING_SLOT_VAR0;
525
store_varying_info_idx[loc][var->data.location_frac] =
526
++num_of_comps_to_pack;
527
}
528
}
529
530
*varying_comp_info_size = num_of_comps_to_pack;
531
*varying_comp_info = rzalloc_array(NULL, struct varying_component,
532
num_of_comps_to_pack);
533
534
nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
535
536
/* Walk over the shader and populate the varying component info array */
537
nir_foreach_block(block, impl) {
538
nir_foreach_instr(instr, block) {
539
if (instr->type != nir_instr_type_intrinsic)
540
continue;
541
542
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
543
if (intr->intrinsic != nir_intrinsic_load_deref &&
544
intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
545
intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
546
intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
547
intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
548
continue;
549
550
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
551
if (!nir_deref_mode_is(deref, nir_var_shader_in))
552
continue;
553
554
/* We only remap things that aren't builtins. */
555
nir_variable *in_var = nir_deref_instr_get_variable(deref);
556
if (in_var->data.location < VARYING_SLOT_VAR0)
557
continue;
558
559
unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
560
if (location >= MAX_VARYINGS_INCL_PATCH)
561
continue;
562
563
unsigned var_info_idx =
564
store_varying_info_idx[location][in_var->data.location_frac];
565
if (!var_info_idx)
566
continue;
567
568
struct varying_component *vc_info =
569
&(*varying_comp_info)[var_info_idx-1];
570
571
if (!vc_info->initialised) {
572
const struct glsl_type *type = in_var->type;
573
if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
574
in_var->data.per_view) {
575
assert(glsl_type_is_array(type));
576
type = glsl_get_array_element(type);
577
}
578
579
vc_info->var = in_var;
580
vc_info->interp_type =
581
get_interp_type(in_var, type, default_to_smooth_interp);
582
vc_info->interp_loc = get_interp_loc(in_var);
583
vc_info->is_32bit = glsl_type_is_32bit(type);
584
vc_info->is_patch = in_var->data.patch;
585
vc_info->is_mediump = !producer->options->linker_ignore_precision &&
586
(in_var->data.precision == GLSL_PRECISION_MEDIUM ||
587
in_var->data.precision == GLSL_PRECISION_LOW);
588
vc_info->is_intra_stage_only = false;
589
vc_info->initialised = true;
590
}
591
}
592
}
593
594
/* Walk over the shader and populate the varying component info array
595
* for varyings which are read by other TCS instances but are not consumed
596
* by the TES.
597
*/
598
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
599
impl = nir_shader_get_entrypoint(producer);
600
601
nir_foreach_block(block, impl) {
602
nir_foreach_instr(instr, block) {
603
if (instr->type != nir_instr_type_intrinsic)
604
continue;
605
606
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
607
if (intr->intrinsic != nir_intrinsic_load_deref)
608
continue;
609
610
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
611
if (!nir_deref_mode_is(deref, nir_var_shader_out))
612
continue;
613
614
/* We only remap things that aren't builtins. */
615
nir_variable *out_var = nir_deref_instr_get_variable(deref);
616
if (out_var->data.location < VARYING_SLOT_VAR0)
617
continue;
618
619
unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
620
if (location >= MAX_VARYINGS_INCL_PATCH)
621
continue;
622
623
unsigned var_info_idx =
624
store_varying_info_idx[location][out_var->data.location_frac];
625
if (!var_info_idx) {
626
/* Something went wrong, the shader interfaces didn't match, so
627
* abandon packing. This can happen for example when the
628
* inputs are scalars but the outputs are struct members.
629
*/
630
*varying_comp_info_size = 0;
631
break;
632
}
633
634
struct varying_component *vc_info =
635
&(*varying_comp_info)[var_info_idx-1];
636
637
if (!vc_info->initialised) {
638
const struct glsl_type *type = out_var->type;
639
if (nir_is_arrayed_io(out_var, producer->info.stage)) {
640
assert(glsl_type_is_array(type));
641
type = glsl_get_array_element(type);
642
}
643
644
vc_info->var = out_var;
645
vc_info->interp_type =
646
get_interp_type(out_var, type, default_to_smooth_interp);
647
vc_info->interp_loc = get_interp_loc(out_var);
648
vc_info->is_32bit = glsl_type_is_32bit(type);
649
vc_info->is_patch = out_var->data.patch;
650
vc_info->is_mediump = !producer->options->linker_ignore_precision &&
651
(out_var->data.precision == GLSL_PRECISION_MEDIUM ||
652
out_var->data.precision == GLSL_PRECISION_LOW);
653
vc_info->is_intra_stage_only = true;
654
vc_info->initialised = true;
655
}
656
}
657
}
658
}
659
660
for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
661
struct varying_component *vc_info = &(*varying_comp_info)[i];
662
if (!vc_info->initialised) {
663
/* Something went wrong, the shader interfaces didn't match, so
664
* abandon packing. This can happen for example when the outputs are
665
* scalars but the inputs are struct members.
666
*/
667
*varying_comp_info_size = 0;
668
break;
669
}
670
}
671
}
672
673
static void
674
assign_remap_locations(struct varying_loc (*remap)[4],
675
struct assigned_comps *assigned_comps,
676
struct varying_component *info,
677
unsigned *cursor, unsigned *comp,
678
unsigned max_location)
679
{
680
unsigned tmp_cursor = *cursor;
681
unsigned tmp_comp = *comp;
682
683
for (; tmp_cursor < max_location; tmp_cursor++) {
684
685
if (assigned_comps[tmp_cursor].comps) {
686
/* We can only pack varyings with matching interpolation types,
687
* interpolation loc must match also.
688
* TODO: i965 can handle interpolation locations that don't match,
689
* but the radeonsi nir backend handles everything as vec4s and so
690
* expects this to be the same for all components. We could make this
691
* check driver specfific or drop it if NIR ever become the only
692
* radeonsi backend.
693
* TODO2: The radeonsi comment above is not true. Only "flat" is per
694
* vec4 (128-bit granularity), all other interpolation qualifiers are
695
* per component (16-bit granularity for float16, 32-bit granularity
696
* otherwise). Each vec4 (128 bits) must be either vec4 or f16vec8.
697
*/
698
if (assigned_comps[tmp_cursor].interp_type != info->interp_type ||
699
assigned_comps[tmp_cursor].interp_loc != info->interp_loc ||
700
assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
701
tmp_comp = 0;
702
continue;
703
}
704
705
/* We can only pack varyings with matching types, and the current
706
* algorithm only supports packing 32-bit.
707
*/
708
if (!assigned_comps[tmp_cursor].is_32bit) {
709
tmp_comp = 0;
710
continue;
711
}
712
713
while (tmp_comp < 4 &&
714
(assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
715
tmp_comp++;
716
}
717
}
718
719
if (tmp_comp == 4) {
720
tmp_comp = 0;
721
continue;
722
}
723
724
unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
725
726
/* Once we have assigned a location mark it as used */
727
assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
728
assigned_comps[tmp_cursor].interp_type = info->interp_type;
729
assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
730
assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
731
assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
732
733
/* Assign remap location */
734
remap[location][info->var->data.location_frac].component = tmp_comp++;
735
remap[location][info->var->data.location_frac].location =
736
tmp_cursor + VARYING_SLOT_VAR0;
737
738
break;
739
}
740
741
*cursor = tmp_cursor;
742
*comp = tmp_comp;
743
}
744
745
/* If there are empty components in the slot compact the remaining components
746
* as close to component 0 as possible. This will make it easier to fill the
747
* empty components with components from a different slot in a following pass.
748
*/
749
static void
750
compact_components(nir_shader *producer, nir_shader *consumer,
751
struct assigned_comps *assigned_comps,
752
bool default_to_smooth_interp)
753
{
754
struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
755
struct varying_component *varying_comp_info;
756
unsigned varying_comp_info_size;
757
758
/* Gather varying component info */
759
gather_varying_component_info(producer, consumer, &varying_comp_info,
760
&varying_comp_info_size,
761
default_to_smooth_interp);
762
763
/* Sort varying components. */
764
qsort(varying_comp_info, varying_comp_info_size,
765
sizeof(struct varying_component), cmp_varying_component);
766
767
unsigned cursor = 0;
768
unsigned comp = 0;
769
770
/* Set the remap array based on the sorted components */
771
for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
772
struct varying_component *info = &varying_comp_info[i];
773
774
assert(info->is_patch || cursor < MAX_VARYING);
775
if (info->is_patch) {
776
/* The list should be sorted with all non-patch inputs first followed
777
* by patch inputs. When we hit our first patch input, we need to
778
* reset the cursor to MAX_VARYING so we put them in the right slot.
779
*/
780
if (cursor < MAX_VARYING) {
781
cursor = MAX_VARYING;
782
comp = 0;
783
}
784
785
assign_remap_locations(remap, assigned_comps, info,
786
&cursor, &comp, MAX_VARYINGS_INCL_PATCH);
787
} else {
788
assign_remap_locations(remap, assigned_comps, info,
789
&cursor, &comp, MAX_VARYING);
790
791
/* Check if we failed to assign a remap location. This can happen if
792
* for example there are a bunch of unmovable components with
793
* mismatching interpolation types causing us to skip over locations
794
* that would have been useful for packing later components.
795
* The solution is to iterate over the locations again (this should
796
* happen very rarely in practice).
797
*/
798
if (cursor == MAX_VARYING) {
799
cursor = 0;
800
comp = 0;
801
assign_remap_locations(remap, assigned_comps, info,
802
&cursor, &comp, MAX_VARYING);
803
}
804
}
805
}
806
807
ralloc_free(varying_comp_info);
808
809
uint64_t zero = 0;
810
uint32_t zero32 = 0;
811
remap_slots_and_components(consumer, nir_var_shader_in, remap,
812
&consumer->info.inputs_read, &zero,
813
&consumer->info.patch_inputs_read, &zero32);
814
remap_slots_and_components(producer, nir_var_shader_out, remap,
815
&producer->info.outputs_written,
816
&producer->info.outputs_read,
817
&producer->info.patch_outputs_written,
818
&producer->info.patch_outputs_read);
819
}
820
821
/* We assume that this has been called more-or-less directly after
822
* remove_unused_varyings. At this point, all of the varyings that we
823
* aren't going to be using have been completely removed and the
824
* inputs_read and outputs_written fields in nir_shader_info reflect
825
* this. Therefore, the total set of valid slots is the OR of the two
826
* sets of varyings; this accounts for varyings which one side may need
827
* to read/write even if the other doesn't. This can happen if, for
828
* instance, an array is used indirectly from one side causing it to be
829
* unsplittable but directly from the other.
830
*/
831
void
832
nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
833
bool default_to_smooth_interp)
834
{
835
assert(producer->info.stage != MESA_SHADER_FRAGMENT);
836
assert(consumer->info.stage != MESA_SHADER_VERTEX);
837
838
struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
839
840
get_unmoveable_components_masks(producer, nir_var_shader_out,
841
assigned_comps,
842
producer->info.stage,
843
default_to_smooth_interp);
844
get_unmoveable_components_masks(consumer, nir_var_shader_in,
845
assigned_comps,
846
consumer->info.stage,
847
default_to_smooth_interp);
848
849
compact_components(producer, consumer, assigned_comps,
850
default_to_smooth_interp);
851
}
852
853
/*
854
* Mark XFB varyings as always_active_io in the consumer so the linking opts
855
* don't touch them.
856
*/
857
void
858
nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
859
{
860
nir_variable *input_vars[MAX_VARYING] = { 0 };
861
862
nir_foreach_shader_in_variable(var, consumer) {
863
if (var->data.location >= VARYING_SLOT_VAR0 &&
864
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
865
866
unsigned location = var->data.location - VARYING_SLOT_VAR0;
867
input_vars[location] = var;
868
}
869
}
870
871
nir_foreach_shader_out_variable(var, producer) {
872
if (var->data.location >= VARYING_SLOT_VAR0 &&
873
var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
874
875
if (!var->data.always_active_io)
876
continue;
877
878
unsigned location = var->data.location - VARYING_SLOT_VAR0;
879
if (input_vars[location]) {
880
input_vars[location]->data.always_active_io = true;
881
}
882
}
883
}
884
}
885
886
static bool
887
does_varying_match(nir_variable *out_var, nir_variable *in_var)
888
{
889
return in_var->data.location == out_var->data.location &&
890
in_var->data.location_frac == out_var->data.location_frac;
891
}
892
893
static nir_variable *
894
get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
895
{
896
nir_foreach_shader_in_variable(var, consumer) {
897
if (does_varying_match(out_var, var))
898
return var;
899
}
900
901
return NULL;
902
}
903
904
static bool
905
can_replace_varying(nir_variable *out_var)
906
{
907
/* Skip types that require more complex handling.
908
* TODO: add support for these types.
909
*/
910
if (glsl_type_is_array(out_var->type) ||
911
glsl_type_is_dual_slot(out_var->type) ||
912
glsl_type_is_matrix(out_var->type) ||
913
glsl_type_is_struct_or_ifc(out_var->type))
914
return false;
915
916
/* Limit this pass to scalars for now to keep things simple. Most varyings
917
* should have been lowered to scalars at this point anyway.
918
*/
919
if (!glsl_type_is_scalar(out_var->type))
920
return false;
921
922
if (out_var->data.location < VARYING_SLOT_VAR0 ||
923
out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
924
return false;
925
926
return true;
927
}
928
929
static bool
930
replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr)
931
{
932
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
933
934
nir_builder b;
935
nir_builder_init(&b, impl);
936
937
nir_variable *out_var =
938
nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
939
940
bool progress = false;
941
nir_foreach_block(block, impl) {
942
nir_foreach_instr(instr, block) {
943
if (instr->type != nir_instr_type_intrinsic)
944
continue;
945
946
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
947
if (intr->intrinsic != nir_intrinsic_load_deref)
948
continue;
949
950
nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
951
if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
952
continue;
953
954
nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
955
956
if (!does_varying_match(out_var, in_var))
957
continue;
958
959
b.cursor = nir_before_instr(instr);
960
961
nir_load_const_instr *out_const =
962
nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
963
964
/* Add new const to replace the input */
965
nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
966
intr->dest.ssa.bit_size,
967
out_const->value);
968
969
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
970
971
progress = true;
972
}
973
}
974
975
return progress;
976
}
977
978
static bool
979
replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
980
nir_intrinsic_instr *dup_store_intr)
981
{
982
assert(input_var);
983
984
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
985
986
nir_builder b;
987
nir_builder_init(&b, impl);
988
989
nir_variable *dup_out_var =
990
nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
991
992
bool progress = false;
993
nir_foreach_block(block, impl) {
994
nir_foreach_instr(instr, block) {
995
if (instr->type != nir_instr_type_intrinsic)
996
continue;
997
998
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
999
if (intr->intrinsic != nir_intrinsic_load_deref)
1000
continue;
1001
1002
nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1003
if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1004
continue;
1005
1006
nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1007
1008
if (!does_varying_match(dup_out_var, in_var) ||
1009
in_var->data.interpolation != input_var->data.interpolation ||
1010
get_interp_loc(in_var) != get_interp_loc(input_var))
1011
continue;
1012
1013
b.cursor = nir_before_instr(instr);
1014
1015
nir_ssa_def *load = nir_load_var(&b, input_var);
1016
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1017
1018
progress = true;
1019
}
1020
}
1021
1022
return progress;
1023
}
1024
1025
/* The GLSL ES 3.20 spec says:
1026
*
1027
* "The precision of a vertex output does not need to match the precision of
1028
* the corresponding fragment input. The minimum precision at which vertex
1029
* outputs are interpolated is the minimum of the vertex output precision and
1030
* the fragment input precision, with the exception that for highp,
1031
* implementations do not have to support full IEEE 754 precision." (9.1 "Input
1032
* Output Matching by Name in Linked Programs")
1033
*
1034
* To implement this, when linking shaders we will take the minimum precision
1035
* qualifier (allowing drivers to interpolate at lower precision). For
1036
* input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1037
* requires we use the *last* specified precision if there is a conflict.
1038
*
1039
* Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1040
* NONE, we'll return the other precision, since there is no conflict.
1041
* Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1042
* MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1043
* "backwards". For non-fragment stages, we'll pick the latter precision to
1044
* comply with the spec. (Note that the order matters.)
1045
*
1046
* For streamout, "Variables declared with lowp or mediump precision are
1047
* promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1048
* of OpenGL ES 3.2 specification). So drivers should promote them
1049
* the transform feedback memory store, but not the output store.
1050
*/
1051
1052
static unsigned
1053
nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1054
{
1055
if (producer == GLSL_PRECISION_NONE)
1056
return consumer;
1057
else if (consumer == GLSL_PRECISION_NONE)
1058
return producer;
1059
else
1060
return fs ? MAX2(producer, consumer) : consumer;
1061
}
1062
1063
void
1064
nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1065
{
1066
bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1067
1068
nir_foreach_shader_out_variable(producer_var, producer) {
1069
/* Skip if the slot is not assigned */
1070
if (producer_var->data.location < 0)
1071
continue;
1072
1073
nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1074
nir_var_shader_in, producer_var->data.location);
1075
1076
/* Skip if the variable will be eliminated */
1077
if (!consumer_var)
1078
continue;
1079
1080
/* Now we have a pair of variables. Let's pick the smaller precision. */
1081
unsigned precision_1 = producer_var->data.precision;
1082
unsigned precision_2 = consumer_var->data.precision;
1083
unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1084
1085
/* Propagate the new precision */
1086
producer_var->data.precision = consumer_var->data.precision = minimum;
1087
}
1088
}
1089
1090
bool
1091
nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1092
{
1093
/* TODO: Add support for more shader stage combinations */
1094
if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1095
(producer->info.stage != MESA_SHADER_VERTEX &&
1096
producer->info.stage != MESA_SHADER_TESS_EVAL))
1097
return false;
1098
1099
bool progress = false;
1100
1101
nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1102
1103
struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1104
1105
/* If we find a store in the last block of the producer we can be sure this
1106
* is the only possible value for this output.
1107
*/
1108
nir_block *last_block = nir_impl_last_block(impl);
1109
nir_foreach_instr_reverse(instr, last_block) {
1110
if (instr->type != nir_instr_type_intrinsic)
1111
continue;
1112
1113
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1114
1115
if (intr->intrinsic != nir_intrinsic_store_deref)
1116
continue;
1117
1118
nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1119
if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1120
continue;
1121
1122
nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1123
if (!can_replace_varying(out_var))
1124
continue;
1125
1126
if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) {
1127
progress |= replace_constant_input(consumer, intr);
1128
} else {
1129
struct hash_entry *entry =
1130
_mesa_hash_table_search(varying_values, intr->src[1].ssa);
1131
if (entry) {
1132
progress |= replace_duplicate_input(consumer,
1133
(nir_variable *) entry->data,
1134
intr);
1135
} else {
1136
nir_variable *in_var = get_matching_input_var(consumer, out_var);
1137
if (in_var) {
1138
_mesa_hash_table_insert(varying_values, intr->src[1].ssa,
1139
in_var);
1140
}
1141
}
1142
}
1143
}
1144
1145
_mesa_hash_table_destroy(varying_values, NULL);
1146
1147
return progress;
1148
}
1149
1150
/* TODO any better helper somewhere to sort a list? */
1151
1152
static void
1153
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1154
{
1155
nir_foreach_variable_in_list(var, var_list) {
1156
if (var->data.location > new_var->data.location) {
1157
exec_node_insert_node_before(&var->node, &new_var->node);
1158
return;
1159
}
1160
}
1161
exec_list_push_tail(var_list, &new_var->node);
1162
}
1163
1164
static void
1165
sort_varyings(nir_shader *shader, nir_variable_mode mode,
1166
struct exec_list *sorted_list)
1167
{
1168
exec_list_make_empty(sorted_list);
1169
nir_foreach_variable_with_modes_safe(var, shader, mode) {
1170
exec_node_remove(&var->node);
1171
insert_sorted(sorted_list, var);
1172
}
1173
}
1174
1175
void
1176
nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1177
unsigned *size, gl_shader_stage stage)
1178
{
1179
unsigned location = 0;
1180
unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1181
uint64_t processed_locs[2] = {0};
1182
1183
struct exec_list io_vars;
1184
sort_varyings(shader, mode, &io_vars);
1185
1186
int UNUSED last_loc = 0;
1187
bool last_partial = false;
1188
nir_foreach_variable_in_list(var, &io_vars) {
1189
const struct glsl_type *type = var->type;
1190
if (nir_is_arrayed_io(var, stage)) {
1191
assert(glsl_type_is_array(type));
1192
type = glsl_get_array_element(type);
1193
}
1194
1195
int base;
1196
if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1197
base = VERT_ATTRIB_GENERIC0;
1198
else if (var->data.mode == nir_var_shader_out &&
1199
stage == MESA_SHADER_FRAGMENT)
1200
base = FRAG_RESULT_DATA0;
1201
else
1202
base = VARYING_SLOT_VAR0;
1203
1204
unsigned var_size, driver_size;
1205
if (var->data.compact) {
1206
/* If we are inside a partial compact,
1207
* don't allow another compact to be in this slot
1208
* if it starts at component 0.
1209
*/
1210
if (last_partial && var->data.location_frac == 0) {
1211
location++;
1212
}
1213
1214
/* compact variables must be arrays of scalars */
1215
assert(!var->data.per_view);
1216
assert(glsl_type_is_array(type));
1217
assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1218
unsigned start = 4 * location + var->data.location_frac;
1219
unsigned end = start + glsl_get_length(type);
1220
var_size = driver_size = end / 4 - location;
1221
last_partial = end % 4 != 0;
1222
} else {
1223
/* Compact variables bypass the normal varying compacting pass,
1224
* which means they cannot be in the same vec4 slot as a normal
1225
* variable. If part of the current slot is taken up by a compact
1226
* variable, we need to go to the next one.
1227
*/
1228
if (last_partial) {
1229
location++;
1230
last_partial = false;
1231
}
1232
1233
/* per-view variables have an extra array dimension, which is ignored
1234
* when counting user-facing slots (var->data.location), but *not*
1235
* with driver slots (var->data.driver_location). That is, each user
1236
* slot maps to multiple driver slots.
1237
*/
1238
driver_size = glsl_count_attribute_slots(type, false);
1239
if (var->data.per_view) {
1240
assert(glsl_type_is_array(type));
1241
var_size =
1242
glsl_count_attribute_slots(glsl_get_array_element(type), false);
1243
} else {
1244
var_size = driver_size;
1245
}
1246
}
1247
1248
/* Builtins don't allow component packing so we only need to worry about
1249
* user defined varyings sharing the same location.
1250
*/
1251
bool processed = false;
1252
if (var->data.location >= base) {
1253
unsigned glsl_location = var->data.location - base;
1254
1255
for (unsigned i = 0; i < var_size; i++) {
1256
if (processed_locs[var->data.index] &
1257
((uint64_t)1 << (glsl_location + i)))
1258
processed = true;
1259
else
1260
processed_locs[var->data.index] |=
1261
((uint64_t)1 << (glsl_location + i));
1262
}
1263
}
1264
1265
/* Because component packing allows varyings to share the same location
1266
* we may have already have processed this location.
1267
*/
1268
if (processed) {
1269
/* TODO handle overlapping per-view variables */
1270
assert(!var->data.per_view);
1271
unsigned driver_location = assigned_locations[var->data.location];
1272
var->data.driver_location = driver_location;
1273
1274
/* An array may be packed such that is crosses multiple other arrays
1275
* or variables, we need to make sure we have allocated the elements
1276
* consecutively if the previously proccessed var was shorter than
1277
* the current array we are processing.
1278
*
1279
* NOTE: The code below assumes the var list is ordered in ascending
1280
* location order.
1281
*/
1282
assert(last_loc <= var->data.location);
1283
last_loc = var->data.location;
1284
unsigned last_slot_location = driver_location + var_size;
1285
if (last_slot_location > location) {
1286
unsigned num_unallocated_slots = last_slot_location - location;
1287
unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1288
for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1289
assigned_locations[var->data.location + i] = location;
1290
location++;
1291
}
1292
}
1293
continue;
1294
}
1295
1296
for (unsigned i = 0; i < var_size; i++) {
1297
assigned_locations[var->data.location + i] = location + i;
1298
}
1299
1300
var->data.driver_location = location;
1301
location += driver_size;
1302
}
1303
1304
if (last_partial)
1305
location++;
1306
1307
exec_list_append(&shader->variables, &io_vars);
1308
*size = location;
1309
}
1310
1311
static uint64_t
1312
get_linked_variable_location(unsigned location, bool patch)
1313
{
1314
if (!patch)
1315
return location;
1316
1317
/* Reserve locations 0...3 for special patch variables
1318
* like tess factors and bounding boxes, and the generic patch
1319
* variables will come after them.
1320
*/
1321
if (location >= VARYING_SLOT_PATCH0)
1322
return location - VARYING_SLOT_PATCH0 + 4;
1323
else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1324
location <= VARYING_SLOT_BOUNDING_BOX1)
1325
return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1326
else
1327
unreachable("Unsupported variable in get_linked_variable_location.");
1328
}
1329
1330
static uint64_t
1331
get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1332
{
1333
const struct glsl_type *type = variable->type;
1334
1335
if (nir_is_arrayed_io(variable, stage)) {
1336
assert(glsl_type_is_array(type));
1337
type = glsl_get_array_element(type);
1338
}
1339
1340
unsigned slots = glsl_count_attribute_slots(type, false);
1341
if (variable->data.compact) {
1342
unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1343
slots = DIV_ROUND_UP(component_count, 4);
1344
}
1345
1346
uint64_t mask = u_bit_consecutive64(0, slots);
1347
return mask;
1348
}
1349
1350
nir_linked_io_var_info
1351
nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1352
{
1353
assert(producer);
1354
assert(consumer);
1355
1356
uint64_t producer_output_mask = 0;
1357
uint64_t producer_patch_output_mask = 0;
1358
1359
nir_foreach_shader_out_variable(variable, producer) {
1360
uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1361
uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1362
1363
if (variable->data.patch)
1364
producer_patch_output_mask |= mask << loc;
1365
else
1366
producer_output_mask |= mask << loc;
1367
}
1368
1369
uint64_t consumer_input_mask = 0;
1370
uint64_t consumer_patch_input_mask = 0;
1371
1372
nir_foreach_shader_in_variable(variable, consumer) {
1373
uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1374
uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1375
1376
if (variable->data.patch)
1377
consumer_patch_input_mask |= mask << loc;
1378
else
1379
consumer_input_mask |= mask << loc;
1380
}
1381
1382
uint64_t io_mask = producer_output_mask | consumer_input_mask;
1383
uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1384
1385
nir_foreach_shader_out_variable(variable, producer) {
1386
uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1387
1388
if (variable->data.patch)
1389
variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1390
else
1391
variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1392
}
1393
1394
nir_foreach_shader_in_variable(variable, consumer) {
1395
uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1396
1397
if (variable->data.patch)
1398
variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1399
else
1400
variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1401
}
1402
1403
nir_linked_io_var_info result = {
1404
.num_linked_io_vars = util_bitcount64(io_mask),
1405
.num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1406
};
1407
1408
return result;
1409
}
1410
1411