Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir.c
4549 views
1
/*
2
* Copyright © 2014 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* Authors:
24
* Connor Abbott ([email protected])
25
*
26
*/
27
28
#include "nir.h"
29
#include "nir_builder.h"
30
#include "nir_control_flow_private.h"
31
#include "nir_worklist.h"
32
#include "util/half_float.h"
33
#include <limits.h>
34
#include <assert.h>
35
#include <math.h>
36
#include "util/u_math.h"
37
#include "util/u_qsort.h"
38
39
#include "main/menums.h" /* BITFIELD64_MASK */
40
41
42
/** Return true if the component mask "mask" with bit size "old_bit_size" can
43
* be re-interpreted to be used with "new_bit_size".
44
*/
45
bool
46
nir_component_mask_can_reinterpret(nir_component_mask_t mask,
47
unsigned old_bit_size,
48
unsigned new_bit_size)
49
{
50
assert(util_is_power_of_two_nonzero(old_bit_size));
51
assert(util_is_power_of_two_nonzero(new_bit_size));
52
53
if (old_bit_size == new_bit_size)
54
return true;
55
56
if (old_bit_size == 1 || new_bit_size == 1)
57
return false;
58
59
if (old_bit_size > new_bit_size) {
60
unsigned ratio = old_bit_size / new_bit_size;
61
return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS;
62
}
63
64
unsigned iter = mask;
65
while (iter) {
66
int start, count;
67
u_bit_scan_consecutive_range(&iter, &start, &count);
68
start *= old_bit_size;
69
count *= old_bit_size;
70
if (start % new_bit_size != 0)
71
return false;
72
if (count % new_bit_size != 0)
73
return false;
74
}
75
return true;
76
}
77
78
/** Re-interprets a component mask "mask" with bit size "old_bit_size" so that
79
* it can be used can be used with "new_bit_size".
80
*/
81
nir_component_mask_t
82
nir_component_mask_reinterpret(nir_component_mask_t mask,
83
unsigned old_bit_size,
84
unsigned new_bit_size)
85
{
86
assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size));
87
88
if (old_bit_size == new_bit_size)
89
return mask;
90
91
nir_component_mask_t new_mask = 0;
92
unsigned iter = mask;
93
while (iter) {
94
int start, count;
95
u_bit_scan_consecutive_range(&iter, &start, &count);
96
start = start * old_bit_size / new_bit_size;
97
count = count * old_bit_size / new_bit_size;
98
new_mask |= BITFIELD_RANGE(start, count);
99
}
100
return new_mask;
101
}
102
103
nir_shader *
104
nir_shader_create(void *mem_ctx,
105
gl_shader_stage stage,
106
const nir_shader_compiler_options *options,
107
shader_info *si)
108
{
109
nir_shader *shader = rzalloc(mem_ctx, nir_shader);
110
111
exec_list_make_empty(&shader->variables);
112
113
shader->options = options;
114
115
if (si) {
116
assert(si->stage == stage);
117
shader->info = *si;
118
} else {
119
shader->info.stage = stage;
120
}
121
122
exec_list_make_empty(&shader->functions);
123
124
shader->num_inputs = 0;
125
shader->num_outputs = 0;
126
shader->num_uniforms = 0;
127
128
return shader;
129
}
130
131
static nir_register *
132
reg_create(void *mem_ctx, struct exec_list *list)
133
{
134
nir_register *reg = ralloc(mem_ctx, nir_register);
135
136
list_inithead(&reg->uses);
137
list_inithead(&reg->defs);
138
list_inithead(&reg->if_uses);
139
140
reg->num_components = 0;
141
reg->bit_size = 32;
142
reg->num_array_elems = 0;
143
144
exec_list_push_tail(list, &reg->node);
145
146
return reg;
147
}
148
149
nir_register *
150
nir_local_reg_create(nir_function_impl *impl)
151
{
152
nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
153
reg->index = impl->reg_alloc++;
154
155
return reg;
156
}
157
158
void
159
nir_reg_remove(nir_register *reg)
160
{
161
exec_node_remove(&reg->node);
162
}
163
164
void
165
nir_shader_add_variable(nir_shader *shader, nir_variable *var)
166
{
167
switch (var->data.mode) {
168
case nir_var_function_temp:
169
assert(!"nir_shader_add_variable cannot be used for local variables");
170
return;
171
172
case nir_var_shader_temp:
173
case nir_var_shader_in:
174
case nir_var_shader_out:
175
case nir_var_uniform:
176
case nir_var_mem_ubo:
177
case nir_var_mem_ssbo:
178
case nir_var_mem_shared:
179
case nir_var_system_value:
180
case nir_var_mem_push_const:
181
case nir_var_mem_constant:
182
case nir_var_shader_call_data:
183
case nir_var_ray_hit_attrib:
184
break;
185
186
case nir_var_mem_global:
187
assert(!"nir_shader_add_variable cannot be used for global memory");
188
return;
189
190
default:
191
assert(!"invalid mode");
192
return;
193
}
194
195
exec_list_push_tail(&shader->variables, &var->node);
196
}
197
198
nir_variable *
199
nir_variable_create(nir_shader *shader, nir_variable_mode mode,
200
const struct glsl_type *type, const char *name)
201
{
202
nir_variable *var = rzalloc(shader, nir_variable);
203
var->name = ralloc_strdup(var, name);
204
var->type = type;
205
var->data.mode = mode;
206
var->data.how_declared = nir_var_declared_normally;
207
208
if ((mode == nir_var_shader_in &&
209
shader->info.stage != MESA_SHADER_VERTEX &&
210
shader->info.stage != MESA_SHADER_KERNEL) ||
211
(mode == nir_var_shader_out &&
212
shader->info.stage != MESA_SHADER_FRAGMENT))
213
var->data.interpolation = INTERP_MODE_SMOOTH;
214
215
if (mode == nir_var_shader_in || mode == nir_var_uniform)
216
var->data.read_only = true;
217
218
nir_shader_add_variable(shader, var);
219
220
return var;
221
}
222
223
nir_variable *
224
nir_local_variable_create(nir_function_impl *impl,
225
const struct glsl_type *type, const char *name)
226
{
227
nir_variable *var = rzalloc(impl->function->shader, nir_variable);
228
var->name = ralloc_strdup(var, name);
229
var->type = type;
230
var->data.mode = nir_var_function_temp;
231
232
nir_function_impl_add_variable(impl, var);
233
234
return var;
235
}
236
237
nir_variable *
238
nir_find_variable_with_location(nir_shader *shader,
239
nir_variable_mode mode,
240
unsigned location)
241
{
242
assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
243
nir_foreach_variable_with_modes(var, shader, mode) {
244
if (var->data.location == location)
245
return var;
246
}
247
return NULL;
248
}
249
250
nir_variable *
251
nir_find_variable_with_driver_location(nir_shader *shader,
252
nir_variable_mode mode,
253
unsigned location)
254
{
255
assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp);
256
nir_foreach_variable_with_modes(var, shader, mode) {
257
if (var->data.driver_location == location)
258
return var;
259
}
260
return NULL;
261
}
262
263
/* Annoyingly, qsort_r is not in the C standard library and, in particular, we
264
* can't count on it on MSV and Android. So we stuff the CMP function into
265
* each array element. It's a bit messy and burns more memory but the list of
266
* variables should hever be all that long.
267
*/
268
struct var_cmp {
269
nir_variable *var;
270
int (*cmp)(const nir_variable *, const nir_variable *);
271
};
272
273
static int
274
var_sort_cmp(const void *_a, const void *_b, void *_cmp)
275
{
276
const struct var_cmp *a = _a;
277
const struct var_cmp *b = _b;
278
assert(a->cmp == b->cmp);
279
return a->cmp(a->var, b->var);
280
}
281
282
void
283
nir_sort_variables_with_modes(nir_shader *shader,
284
int (*cmp)(const nir_variable *,
285
const nir_variable *),
286
nir_variable_mode modes)
287
{
288
unsigned num_vars = 0;
289
nir_foreach_variable_with_modes(var, shader, modes) {
290
++num_vars;
291
}
292
struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars);
293
unsigned i = 0;
294
nir_foreach_variable_with_modes_safe(var, shader, modes) {
295
exec_node_remove(&var->node);
296
vars[i++] = (struct var_cmp){
297
.var = var,
298
.cmp = cmp,
299
};
300
}
301
assert(i == num_vars);
302
303
util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp);
304
305
for (i = 0; i < num_vars; i++)
306
exec_list_push_tail(&shader->variables, &vars[i].var->node);
307
308
ralloc_free(vars);
309
}
310
311
nir_function *
312
nir_function_create(nir_shader *shader, const char *name)
313
{
314
nir_function *func = ralloc(shader, nir_function);
315
316
exec_list_push_tail(&shader->functions, &func->node);
317
318
func->name = ralloc_strdup(func, name);
319
func->shader = shader;
320
func->num_params = 0;
321
func->params = NULL;
322
func->impl = NULL;
323
func->is_entrypoint = false;
324
325
return func;
326
}
327
328
/* NOTE: if the instruction you are copying a src to is already added
329
* to the IR, use nir_instr_rewrite_src() instead.
330
*/
331
void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
332
{
333
dest->is_ssa = src->is_ssa;
334
if (src->is_ssa) {
335
dest->ssa = src->ssa;
336
} else {
337
dest->reg.base_offset = src->reg.base_offset;
338
dest->reg.reg = src->reg.reg;
339
if (src->reg.indirect) {
340
dest->reg.indirect = ralloc(mem_ctx, nir_src);
341
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
342
} else {
343
dest->reg.indirect = NULL;
344
}
345
}
346
}
347
348
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
349
{
350
/* Copying an SSA definition makes no sense whatsoever. */
351
assert(!src->is_ssa);
352
353
dest->is_ssa = false;
354
355
dest->reg.base_offset = src->reg.base_offset;
356
dest->reg.reg = src->reg.reg;
357
if (src->reg.indirect) {
358
dest->reg.indirect = ralloc(instr, nir_src);
359
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
360
} else {
361
dest->reg.indirect = NULL;
362
}
363
}
364
365
void
366
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
367
nir_alu_instr *instr)
368
{
369
nir_src_copy(&dest->src, &src->src, &instr->instr);
370
dest->abs = src->abs;
371
dest->negate = src->negate;
372
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
373
dest->swizzle[i] = src->swizzle[i];
374
}
375
376
void
377
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
378
nir_alu_instr *instr)
379
{
380
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
381
dest->write_mask = src->write_mask;
382
dest->saturate = src->saturate;
383
}
384
385
bool
386
nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn)
387
{
388
static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
389
STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS);
390
391
const nir_alu_src *src = &alu->src[srcn];
392
unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn);
393
394
return src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
395
!src->abs && !src->negate &&
396
(memcmp(src->swizzle, trivial_swizzle, num_components) == 0);
397
}
398
399
400
static void
401
cf_init(nir_cf_node *node, nir_cf_node_type type)
402
{
403
exec_node_init(&node->node);
404
node->parent = NULL;
405
node->type = type;
406
}
407
408
nir_function_impl *
409
nir_function_impl_create_bare(nir_shader *shader)
410
{
411
nir_function_impl *impl = ralloc(shader, nir_function_impl);
412
413
impl->function = NULL;
414
415
cf_init(&impl->cf_node, nir_cf_node_function);
416
417
exec_list_make_empty(&impl->body);
418
exec_list_make_empty(&impl->registers);
419
exec_list_make_empty(&impl->locals);
420
impl->reg_alloc = 0;
421
impl->ssa_alloc = 0;
422
impl->num_blocks = 0;
423
impl->valid_metadata = nir_metadata_none;
424
impl->structured = true;
425
426
/* create start & end blocks */
427
nir_block *start_block = nir_block_create(shader);
428
nir_block *end_block = nir_block_create(shader);
429
start_block->cf_node.parent = &impl->cf_node;
430
end_block->cf_node.parent = &impl->cf_node;
431
impl->end_block = end_block;
432
433
exec_list_push_tail(&impl->body, &start_block->cf_node.node);
434
435
start_block->successors[0] = end_block;
436
_mesa_set_add(end_block->predecessors, start_block);
437
return impl;
438
}
439
440
nir_function_impl *
441
nir_function_impl_create(nir_function *function)
442
{
443
assert(function->impl == NULL);
444
445
nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
446
447
function->impl = impl;
448
impl->function = function;
449
450
return impl;
451
}
452
453
nir_block *
454
nir_block_create(nir_shader *shader)
455
{
456
nir_block *block = rzalloc(shader, nir_block);
457
458
cf_init(&block->cf_node, nir_cf_node_block);
459
460
block->successors[0] = block->successors[1] = NULL;
461
block->predecessors = _mesa_pointer_set_create(block);
462
block->imm_dom = NULL;
463
/* XXX maybe it would be worth it to defer allocation? This
464
* way it doesn't get allocated for shader refs that never run
465
* nir_calc_dominance? For example, state-tracker creates an
466
* initial IR, clones that, runs appropriate lowering pass, passes
467
* to driver which does common lowering/opt, and then stores ref
468
* which is later used to do state specific lowering and futher
469
* opt. Do any of the references not need dominance metadata?
470
*/
471
block->dom_frontier = _mesa_pointer_set_create(block);
472
473
exec_list_make_empty(&block->instr_list);
474
475
return block;
476
}
477
478
static inline void
479
src_init(nir_src *src)
480
{
481
src->is_ssa = false;
482
src->reg.reg = NULL;
483
src->reg.indirect = NULL;
484
src->reg.base_offset = 0;
485
}
486
487
nir_if *
488
nir_if_create(nir_shader *shader)
489
{
490
nir_if *if_stmt = ralloc(shader, nir_if);
491
492
if_stmt->control = nir_selection_control_none;
493
494
cf_init(&if_stmt->cf_node, nir_cf_node_if);
495
src_init(&if_stmt->condition);
496
497
nir_block *then = nir_block_create(shader);
498
exec_list_make_empty(&if_stmt->then_list);
499
exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
500
then->cf_node.parent = &if_stmt->cf_node;
501
502
nir_block *else_stmt = nir_block_create(shader);
503
exec_list_make_empty(&if_stmt->else_list);
504
exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
505
else_stmt->cf_node.parent = &if_stmt->cf_node;
506
507
return if_stmt;
508
}
509
510
nir_loop *
511
nir_loop_create(nir_shader *shader)
512
{
513
nir_loop *loop = rzalloc(shader, nir_loop);
514
515
cf_init(&loop->cf_node, nir_cf_node_loop);
516
/* Assume that loops are divergent until proven otherwise */
517
loop->divergent = true;
518
519
nir_block *body = nir_block_create(shader);
520
exec_list_make_empty(&loop->body);
521
exec_list_push_tail(&loop->body, &body->cf_node.node);
522
body->cf_node.parent = &loop->cf_node;
523
524
body->successors[0] = body;
525
_mesa_set_add(body->predecessors, body);
526
527
return loop;
528
}
529
530
static void
531
instr_init(nir_instr *instr, nir_instr_type type)
532
{
533
instr->type = type;
534
instr->block = NULL;
535
exec_node_init(&instr->node);
536
}
537
538
static void
539
dest_init(nir_dest *dest)
540
{
541
dest->is_ssa = false;
542
dest->reg.reg = NULL;
543
dest->reg.indirect = NULL;
544
dest->reg.base_offset = 0;
545
}
546
547
static void
548
alu_dest_init(nir_alu_dest *dest)
549
{
550
dest_init(&dest->dest);
551
dest->saturate = false;
552
dest->write_mask = 0xf;
553
}
554
555
static void
556
alu_src_init(nir_alu_src *src)
557
{
558
src_init(&src->src);
559
src->abs = src->negate = false;
560
for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
561
src->swizzle[i] = i;
562
}
563
564
nir_alu_instr *
565
nir_alu_instr_create(nir_shader *shader, nir_op op)
566
{
567
unsigned num_srcs = nir_op_infos[op].num_inputs;
568
/* TODO: don't use rzalloc */
569
nir_alu_instr *instr =
570
rzalloc_size(shader,
571
sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
572
573
instr_init(&instr->instr, nir_instr_type_alu);
574
instr->op = op;
575
alu_dest_init(&instr->dest);
576
for (unsigned i = 0; i < num_srcs; i++)
577
alu_src_init(&instr->src[i]);
578
579
return instr;
580
}
581
582
nir_deref_instr *
583
nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
584
{
585
nir_deref_instr *instr =
586
rzalloc_size(shader, sizeof(nir_deref_instr));
587
588
instr_init(&instr->instr, nir_instr_type_deref);
589
590
instr->deref_type = deref_type;
591
if (deref_type != nir_deref_type_var)
592
src_init(&instr->parent);
593
594
if (deref_type == nir_deref_type_array ||
595
deref_type == nir_deref_type_ptr_as_array)
596
src_init(&instr->arr.index);
597
598
dest_init(&instr->dest);
599
600
return instr;
601
}
602
603
nir_jump_instr *
604
nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
605
{
606
nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
607
instr_init(&instr->instr, nir_instr_type_jump);
608
src_init(&instr->condition);
609
instr->type = type;
610
instr->target = NULL;
611
instr->else_target = NULL;
612
return instr;
613
}
614
615
nir_load_const_instr *
616
nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
617
unsigned bit_size)
618
{
619
nir_load_const_instr *instr =
620
rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value));
621
instr_init(&instr->instr, nir_instr_type_load_const);
622
623
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
624
625
return instr;
626
}
627
628
nir_intrinsic_instr *
629
nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
630
{
631
unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
632
/* TODO: don't use rzalloc */
633
nir_intrinsic_instr *instr =
634
rzalloc_size(shader,
635
sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
636
637
instr_init(&instr->instr, nir_instr_type_intrinsic);
638
instr->intrinsic = op;
639
640
if (nir_intrinsic_infos[op].has_dest)
641
dest_init(&instr->dest);
642
643
for (unsigned i = 0; i < num_srcs; i++)
644
src_init(&instr->src[i]);
645
646
return instr;
647
}
648
649
nir_call_instr *
650
nir_call_instr_create(nir_shader *shader, nir_function *callee)
651
{
652
const unsigned num_params = callee->num_params;
653
nir_call_instr *instr =
654
rzalloc_size(shader, sizeof(*instr) +
655
num_params * sizeof(instr->params[0]));
656
657
instr_init(&instr->instr, nir_instr_type_call);
658
instr->callee = callee;
659
instr->num_params = num_params;
660
for (unsigned i = 0; i < num_params; i++)
661
src_init(&instr->params[i]);
662
663
return instr;
664
}
665
666
static int8_t default_tg4_offsets[4][2] =
667
{
668
{ 0, 1 },
669
{ 1, 1 },
670
{ 1, 0 },
671
{ 0, 0 },
672
};
673
674
nir_tex_instr *
675
nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
676
{
677
nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
678
instr_init(&instr->instr, nir_instr_type_tex);
679
680
dest_init(&instr->dest);
681
682
instr->num_srcs = num_srcs;
683
instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
684
for (unsigned i = 0; i < num_srcs; i++)
685
src_init(&instr->src[i].src);
686
687
instr->texture_index = 0;
688
instr->sampler_index = 0;
689
memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
690
691
return instr;
692
}
693
694
void
695
nir_tex_instr_add_src(nir_tex_instr *tex,
696
nir_tex_src_type src_type,
697
nir_src src)
698
{
699
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
700
tex->num_srcs + 1);
701
702
for (unsigned i = 0; i < tex->num_srcs; i++) {
703
new_srcs[i].src_type = tex->src[i].src_type;
704
nir_instr_move_src(&tex->instr, &new_srcs[i].src,
705
&tex->src[i].src);
706
}
707
708
ralloc_free(tex->src);
709
tex->src = new_srcs;
710
711
tex->src[tex->num_srcs].src_type = src_type;
712
nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
713
tex->num_srcs++;
714
}
715
716
void
717
nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
718
{
719
assert(src_idx < tex->num_srcs);
720
721
/* First rewrite the source to NIR_SRC_INIT */
722
nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
723
724
/* Now, move all of the other sources down */
725
for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
726
tex->src[i-1].src_type = tex->src[i].src_type;
727
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
728
}
729
tex->num_srcs--;
730
}
731
732
bool
733
nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
734
{
735
if (tex->op != nir_texop_tg4)
736
return false;
737
return memcmp(tex->tg4_offsets, default_tg4_offsets,
738
sizeof(tex->tg4_offsets)) != 0;
739
}
740
741
nir_phi_instr *
742
nir_phi_instr_create(nir_shader *shader)
743
{
744
nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
745
instr_init(&instr->instr, nir_instr_type_phi);
746
747
dest_init(&instr->dest);
748
exec_list_make_empty(&instr->srcs);
749
return instr;
750
}
751
752
nir_parallel_copy_instr *
753
nir_parallel_copy_instr_create(nir_shader *shader)
754
{
755
nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
756
instr_init(&instr->instr, nir_instr_type_parallel_copy);
757
758
exec_list_make_empty(&instr->entries);
759
760
return instr;
761
}
762
763
nir_ssa_undef_instr *
764
nir_ssa_undef_instr_create(nir_shader *shader,
765
unsigned num_components,
766
unsigned bit_size)
767
{
768
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
769
instr_init(&instr->instr, nir_instr_type_ssa_undef);
770
771
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
772
773
return instr;
774
}
775
776
static nir_const_value
777
const_value_float(double d, unsigned bit_size)
778
{
779
nir_const_value v;
780
memset(&v, 0, sizeof(v));
781
switch (bit_size) {
782
case 16: v.u16 = _mesa_float_to_half(d); break;
783
case 32: v.f32 = d; break;
784
case 64: v.f64 = d; break;
785
default:
786
unreachable("Invalid bit size");
787
}
788
return v;
789
}
790
791
static nir_const_value
792
const_value_int(int64_t i, unsigned bit_size)
793
{
794
nir_const_value v;
795
memset(&v, 0, sizeof(v));
796
switch (bit_size) {
797
case 1: v.b = i & 1; break;
798
case 8: v.i8 = i; break;
799
case 16: v.i16 = i; break;
800
case 32: v.i32 = i; break;
801
case 64: v.i64 = i; break;
802
default:
803
unreachable("Invalid bit size");
804
}
805
return v;
806
}
807
808
nir_const_value
809
nir_alu_binop_identity(nir_op binop, unsigned bit_size)
810
{
811
const int64_t max_int = (1ull << (bit_size - 1)) - 1;
812
const int64_t min_int = -max_int - 1;
813
switch (binop) {
814
case nir_op_iadd:
815
return const_value_int(0, bit_size);
816
case nir_op_fadd:
817
return const_value_float(0, bit_size);
818
case nir_op_imul:
819
return const_value_int(1, bit_size);
820
case nir_op_fmul:
821
return const_value_float(1, bit_size);
822
case nir_op_imin:
823
return const_value_int(max_int, bit_size);
824
case nir_op_umin:
825
return const_value_int(~0ull, bit_size);
826
case nir_op_fmin:
827
return const_value_float(INFINITY, bit_size);
828
case nir_op_imax:
829
return const_value_int(min_int, bit_size);
830
case nir_op_umax:
831
return const_value_int(0, bit_size);
832
case nir_op_fmax:
833
return const_value_float(-INFINITY, bit_size);
834
case nir_op_iand:
835
return const_value_int(~0ull, bit_size);
836
case nir_op_ior:
837
return const_value_int(0, bit_size);
838
case nir_op_ixor:
839
return const_value_int(0, bit_size);
840
default:
841
unreachable("Invalid reduction operation");
842
}
843
}
844
845
nir_function_impl *
846
nir_cf_node_get_function(nir_cf_node *node)
847
{
848
while (node->type != nir_cf_node_function) {
849
node = node->parent;
850
}
851
852
return nir_cf_node_as_function(node);
853
}
854
855
/* Reduces a cursor by trying to convert everything to after and trying to
856
* go up to block granularity when possible.
857
*/
858
static nir_cursor
859
reduce_cursor(nir_cursor cursor)
860
{
861
switch (cursor.option) {
862
case nir_cursor_before_block:
863
if (exec_list_is_empty(&cursor.block->instr_list)) {
864
/* Empty block. After is as good as before. */
865
cursor.option = nir_cursor_after_block;
866
}
867
return cursor;
868
869
case nir_cursor_after_block:
870
return cursor;
871
872
case nir_cursor_before_instr: {
873
nir_instr *prev_instr = nir_instr_prev(cursor.instr);
874
if (prev_instr) {
875
/* Before this instruction is after the previous */
876
cursor.instr = prev_instr;
877
cursor.option = nir_cursor_after_instr;
878
} else {
879
/* No previous instruction. Switch to before block */
880
cursor.block = cursor.instr->block;
881
cursor.option = nir_cursor_before_block;
882
}
883
return reduce_cursor(cursor);
884
}
885
886
case nir_cursor_after_instr:
887
if (nir_instr_next(cursor.instr) == NULL) {
888
/* This is the last instruction, switch to after block */
889
cursor.option = nir_cursor_after_block;
890
cursor.block = cursor.instr->block;
891
}
892
return cursor;
893
894
default:
895
unreachable("Inavlid cursor option");
896
}
897
}
898
899
bool
900
nir_cursors_equal(nir_cursor a, nir_cursor b)
901
{
902
/* Reduced cursors should be unique */
903
a = reduce_cursor(a);
904
b = reduce_cursor(b);
905
906
return a.block == b.block && a.option == b.option;
907
}
908
909
static bool
910
add_use_cb(nir_src *src, void *state)
911
{
912
nir_instr *instr = state;
913
914
src->parent_instr = instr;
915
list_addtail(&src->use_link,
916
src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
917
918
return true;
919
}
920
921
static bool
922
add_ssa_def_cb(nir_ssa_def *def, void *state)
923
{
924
nir_instr *instr = state;
925
926
if (instr->block && def->index == UINT_MAX) {
927
nir_function_impl *impl =
928
nir_cf_node_get_function(&instr->block->cf_node);
929
930
def->index = impl->ssa_alloc++;
931
932
impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
933
}
934
935
return true;
936
}
937
938
static bool
939
add_reg_def_cb(nir_dest *dest, void *state)
940
{
941
nir_instr *instr = state;
942
943
if (!dest->is_ssa) {
944
dest->reg.parent_instr = instr;
945
list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
946
}
947
948
return true;
949
}
950
951
static void
952
add_defs_uses(nir_instr *instr)
953
{
954
nir_foreach_src(instr, add_use_cb, instr);
955
nir_foreach_dest(instr, add_reg_def_cb, instr);
956
nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
957
}
958
959
void
960
nir_instr_insert(nir_cursor cursor, nir_instr *instr)
961
{
962
switch (cursor.option) {
963
case nir_cursor_before_block:
964
/* Only allow inserting jumps into empty blocks. */
965
if (instr->type == nir_instr_type_jump)
966
assert(exec_list_is_empty(&cursor.block->instr_list));
967
968
instr->block = cursor.block;
969
add_defs_uses(instr);
970
exec_list_push_head(&cursor.block->instr_list, &instr->node);
971
break;
972
case nir_cursor_after_block: {
973
/* Inserting instructions after a jump is illegal. */
974
nir_instr *last = nir_block_last_instr(cursor.block);
975
assert(last == NULL || last->type != nir_instr_type_jump);
976
(void) last;
977
978
instr->block = cursor.block;
979
add_defs_uses(instr);
980
exec_list_push_tail(&cursor.block->instr_list, &instr->node);
981
break;
982
}
983
case nir_cursor_before_instr:
984
assert(instr->type != nir_instr_type_jump);
985
instr->block = cursor.instr->block;
986
add_defs_uses(instr);
987
exec_node_insert_node_before(&cursor.instr->node, &instr->node);
988
break;
989
case nir_cursor_after_instr:
990
/* Inserting instructions after a jump is illegal. */
991
assert(cursor.instr->type != nir_instr_type_jump);
992
993
/* Only allow inserting jumps at the end of the block. */
994
if (instr->type == nir_instr_type_jump)
995
assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
996
997
instr->block = cursor.instr->block;
998
add_defs_uses(instr);
999
exec_node_insert_after(&cursor.instr->node, &instr->node);
1000
break;
1001
}
1002
1003
if (instr->type == nir_instr_type_jump)
1004
nir_handle_add_jump(instr->block);
1005
1006
nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node);
1007
impl->valid_metadata &= ~nir_metadata_instr_index;
1008
}
1009
1010
bool
1011
nir_instr_move(nir_cursor cursor, nir_instr *instr)
1012
{
1013
/* If the cursor happens to refer to this instruction (either before or
1014
* after), don't do anything.
1015
*/
1016
if ((cursor.option == nir_cursor_before_instr ||
1017
cursor.option == nir_cursor_after_instr) &&
1018
cursor.instr == instr)
1019
return false;
1020
1021
nir_instr_remove(instr);
1022
nir_instr_insert(cursor, instr);
1023
return true;
1024
}
1025
1026
static bool
1027
src_is_valid(const nir_src *src)
1028
{
1029
return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
1030
}
1031
1032
static bool
1033
remove_use_cb(nir_src *src, void *state)
1034
{
1035
(void) state;
1036
1037
if (src_is_valid(src))
1038
list_del(&src->use_link);
1039
1040
return true;
1041
}
1042
1043
static bool
1044
remove_def_cb(nir_dest *dest, void *state)
1045
{
1046
(void) state;
1047
1048
if (!dest->is_ssa)
1049
list_del(&dest->reg.def_link);
1050
1051
return true;
1052
}
1053
1054
static void
1055
remove_defs_uses(nir_instr *instr)
1056
{
1057
nir_foreach_dest(instr, remove_def_cb, instr);
1058
nir_foreach_src(instr, remove_use_cb, instr);
1059
}
1060
1061
void nir_instr_remove_v(nir_instr *instr)
1062
{
1063
remove_defs_uses(instr);
1064
exec_node_remove(&instr->node);
1065
1066
if (instr->type == nir_instr_type_jump) {
1067
nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
1068
nir_handle_remove_jump(instr->block, jump_instr->type);
1069
}
1070
}
1071
1072
static bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
1073
{
1074
bool *live = state;
1075
1076
if (!nir_ssa_def_is_unused(def)) {
1077
*live = true;
1078
return false;
1079
} else {
1080
return true;
1081
}
1082
}
1083
1084
static bool nir_instr_free_and_dce_is_live(nir_instr *instr)
1085
{
1086
/* Note: don't have to worry about jumps because they don't have dests to
1087
* become unused.
1088
*/
1089
if (instr->type == nir_instr_type_intrinsic) {
1090
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1091
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1092
if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE))
1093
return true;
1094
}
1095
1096
bool live = false;
1097
nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live);
1098
return live;
1099
}
1100
1101
/**
1102
* Frees an instruction and any SSA defs that it used that are now dead,
1103
* returning a nir_cursor where the instruction previously was.
1104
*/
1105
nir_cursor
1106
nir_instr_free_and_dce(nir_instr *instr)
1107
{
1108
nir_instr_worklist *worklist = nir_instr_worklist_create();
1109
1110
nir_instr_worklist_add_ssa_srcs(worklist, instr);
1111
nir_cursor c = nir_instr_remove(instr);
1112
1113
struct exec_list to_free;
1114
exec_list_make_empty(&to_free);
1115
1116
nir_instr *dce_instr;
1117
while ((dce_instr = nir_instr_worklist_pop_head(worklist))) {
1118
if (!nir_instr_free_and_dce_is_live(dce_instr)) {
1119
nir_instr_worklist_add_ssa_srcs(worklist, dce_instr);
1120
1121
/* If we're removing the instr where our cursor is, then we have to
1122
* point the cursor elsewhere.
1123
*/
1124
if ((c.option == nir_cursor_before_instr ||
1125
c.option == nir_cursor_after_instr) &&
1126
c.instr == dce_instr)
1127
c = nir_instr_remove(dce_instr);
1128
else
1129
nir_instr_remove(dce_instr);
1130
exec_list_push_tail(&to_free, &dce_instr->node);
1131
}
1132
}
1133
1134
struct exec_node *node;
1135
while ((node = exec_list_pop_head(&to_free))) {
1136
nir_instr *removed_instr = exec_node_data(nir_instr, node, node);
1137
ralloc_free(removed_instr);
1138
}
1139
1140
nir_instr_worklist_destroy(worklist);
1141
1142
return c;
1143
}
1144
1145
/*@}*/
1146
1147
void
1148
nir_index_local_regs(nir_function_impl *impl)
1149
{
1150
unsigned index = 0;
1151
foreach_list_typed(nir_register, reg, node, &impl->registers) {
1152
reg->index = index++;
1153
}
1154
impl->reg_alloc = index;
1155
}
1156
1157
struct foreach_ssa_def_state {
1158
nir_foreach_ssa_def_cb cb;
1159
void *client_state;
1160
};
1161
1162
static inline bool
1163
nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1164
{
1165
struct foreach_ssa_def_state *state = void_state;
1166
1167
if (dest->is_ssa)
1168
return state->cb(&dest->ssa, state->client_state);
1169
else
1170
return true;
1171
}
1172
1173
bool
1174
nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1175
{
1176
switch (instr->type) {
1177
case nir_instr_type_alu:
1178
case nir_instr_type_deref:
1179
case nir_instr_type_tex:
1180
case nir_instr_type_intrinsic:
1181
case nir_instr_type_phi:
1182
case nir_instr_type_parallel_copy: {
1183
struct foreach_ssa_def_state foreach_state = {cb, state};
1184
return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1185
}
1186
1187
case nir_instr_type_load_const:
1188
return cb(&nir_instr_as_load_const(instr)->def, state);
1189
case nir_instr_type_ssa_undef:
1190
return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1191
case nir_instr_type_call:
1192
case nir_instr_type_jump:
1193
return true;
1194
default:
1195
unreachable("Invalid instruction type");
1196
}
1197
}
1198
1199
nir_ssa_def *
1200
nir_instr_ssa_def(nir_instr *instr)
1201
{
1202
switch (instr->type) {
1203
case nir_instr_type_alu:
1204
assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
1205
return &nir_instr_as_alu(instr)->dest.dest.ssa;
1206
1207
case nir_instr_type_deref:
1208
assert(nir_instr_as_deref(instr)->dest.is_ssa);
1209
return &nir_instr_as_deref(instr)->dest.ssa;
1210
1211
case nir_instr_type_tex:
1212
assert(nir_instr_as_tex(instr)->dest.is_ssa);
1213
return &nir_instr_as_tex(instr)->dest.ssa;
1214
1215
case nir_instr_type_intrinsic: {
1216
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1217
if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
1218
assert(intrin->dest.is_ssa);
1219
return &intrin->dest.ssa;
1220
} else {
1221
return NULL;
1222
}
1223
}
1224
1225
case nir_instr_type_phi:
1226
assert(nir_instr_as_phi(instr)->dest.is_ssa);
1227
return &nir_instr_as_phi(instr)->dest.ssa;
1228
1229
case nir_instr_type_parallel_copy:
1230
unreachable("Parallel copies are unsupported by this function");
1231
1232
case nir_instr_type_load_const:
1233
return &nir_instr_as_load_const(instr)->def;
1234
1235
case nir_instr_type_ssa_undef:
1236
return &nir_instr_as_ssa_undef(instr)->def;
1237
1238
case nir_instr_type_call:
1239
case nir_instr_type_jump:
1240
return NULL;
1241
}
1242
1243
unreachable("Invalid instruction type");
1244
}
1245
1246
bool
1247
nir_foreach_phi_src_leaving_block(nir_block *block,
1248
nir_foreach_src_cb cb,
1249
void *state)
1250
{
1251
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
1252
if (block->successors[i] == NULL)
1253
continue;
1254
1255
nir_foreach_instr(instr, block->successors[i]) {
1256
if (instr->type != nir_instr_type_phi)
1257
break;
1258
1259
nir_phi_instr *phi = nir_instr_as_phi(instr);
1260
nir_foreach_phi_src(phi_src, phi) {
1261
if (phi_src->pred == block) {
1262
if (!cb(&phi_src->src, state))
1263
return false;
1264
}
1265
}
1266
}
1267
}
1268
1269
return true;
1270
}
1271
1272
nir_const_value
1273
nir_const_value_for_float(double f, unsigned bit_size)
1274
{
1275
nir_const_value v;
1276
memset(&v, 0, sizeof(v));
1277
1278
switch (bit_size) {
1279
case 16:
1280
v.u16 = _mesa_float_to_half(f);
1281
break;
1282
case 32:
1283
v.f32 = f;
1284
break;
1285
case 64:
1286
v.f64 = f;
1287
break;
1288
default:
1289
unreachable("Invalid bit size");
1290
}
1291
1292
return v;
1293
}
1294
1295
double
1296
nir_const_value_as_float(nir_const_value value, unsigned bit_size)
1297
{
1298
switch (bit_size) {
1299
case 16: return _mesa_half_to_float(value.u16);
1300
case 32: return value.f32;
1301
case 64: return value.f64;
1302
default:
1303
unreachable("Invalid bit size");
1304
}
1305
}
1306
1307
nir_const_value *
1308
nir_src_as_const_value(nir_src src)
1309
{
1310
if (!src.is_ssa)
1311
return NULL;
1312
1313
if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1314
return NULL;
1315
1316
nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1317
1318
return load->value;
1319
}
1320
1321
/**
1322
* Returns true if the source is known to be dynamically uniform. Otherwise it
1323
* returns false which means it may or may not be dynamically uniform but it
1324
* can't be determined.
1325
*/
1326
bool
1327
nir_src_is_dynamically_uniform(nir_src src)
1328
{
1329
if (!src.is_ssa)
1330
return false;
1331
1332
/* Constants are trivially dynamically uniform */
1333
if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1334
return true;
1335
1336
/* As are uniform variables */
1337
if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1338
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1339
if (intr->intrinsic == nir_intrinsic_load_uniform &&
1340
nir_src_is_dynamically_uniform(intr->src[0]))
1341
return true;
1342
}
1343
1344
/* Operating together dynamically uniform expressions produces a
1345
* dynamically uniform result
1346
*/
1347
if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1348
nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1349
for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1350
if (!nir_src_is_dynamically_uniform(alu->src[i].src))
1351
return false;
1352
}
1353
1354
return true;
1355
}
1356
1357
/* XXX: this could have many more tests, such as when a sampler function is
1358
* called with dynamically uniform arguments.
1359
*/
1360
return false;
1361
}
1362
1363
static void
1364
src_remove_all_uses(nir_src *src)
1365
{
1366
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1367
if (!src_is_valid(src))
1368
continue;
1369
1370
list_del(&src->use_link);
1371
}
1372
}
1373
1374
static void
1375
src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1376
{
1377
for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1378
if (!src_is_valid(src))
1379
continue;
1380
1381
if (parent_instr) {
1382
src->parent_instr = parent_instr;
1383
if (src->is_ssa)
1384
list_addtail(&src->use_link, &src->ssa->uses);
1385
else
1386
list_addtail(&src->use_link, &src->reg.reg->uses);
1387
} else {
1388
assert(parent_if);
1389
src->parent_if = parent_if;
1390
if (src->is_ssa)
1391
list_addtail(&src->use_link, &src->ssa->if_uses);
1392
else
1393
list_addtail(&src->use_link, &src->reg.reg->if_uses);
1394
}
1395
}
1396
}
1397
1398
void
1399
nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1400
{
1401
assert(!src_is_valid(src) || src->parent_instr == instr);
1402
1403
src_remove_all_uses(src);
1404
*src = new_src;
1405
src_add_all_uses(src, instr, NULL);
1406
}
1407
1408
void
1409
nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1410
{
1411
assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1412
1413
src_remove_all_uses(dest);
1414
src_remove_all_uses(src);
1415
*dest = *src;
1416
*src = NIR_SRC_INIT;
1417
src_add_all_uses(dest, dest_instr, NULL);
1418
}
1419
1420
void
1421
nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1422
{
1423
nir_src *src = &if_stmt->condition;
1424
assert(!src_is_valid(src) || src->parent_if == if_stmt);
1425
1426
src_remove_all_uses(src);
1427
*src = new_src;
1428
src_add_all_uses(src, NULL, if_stmt);
1429
}
1430
1431
void
1432
nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1433
{
1434
if (dest->is_ssa) {
1435
/* We can only overwrite an SSA destination if it has no uses. */
1436
assert(nir_ssa_def_is_unused(&dest->ssa));
1437
} else {
1438
list_del(&dest->reg.def_link);
1439
if (dest->reg.indirect)
1440
src_remove_all_uses(dest->reg.indirect);
1441
}
1442
1443
/* We can't re-write with an SSA def */
1444
assert(!new_dest.is_ssa);
1445
1446
nir_dest_copy(dest, &new_dest, instr);
1447
1448
dest->reg.parent_instr = instr;
1449
list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1450
1451
if (dest->reg.indirect)
1452
src_add_all_uses(dest->reg.indirect, instr, NULL);
1453
}
1454
1455
/* note: does *not* take ownership of 'name' */
1456
void
1457
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1458
unsigned num_components,
1459
unsigned bit_size)
1460
{
1461
def->parent_instr = instr;
1462
list_inithead(&def->uses);
1463
list_inithead(&def->if_uses);
1464
def->num_components = num_components;
1465
def->bit_size = bit_size;
1466
def->divergent = true; /* This is the safer default */
1467
1468
if (instr->block) {
1469
nir_function_impl *impl =
1470
nir_cf_node_get_function(&instr->block->cf_node);
1471
1472
def->index = impl->ssa_alloc++;
1473
1474
impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1475
} else {
1476
def->index = UINT_MAX;
1477
}
1478
}
1479
1480
/* note: does *not* take ownership of 'name' */
1481
void
1482
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1483
unsigned num_components, unsigned bit_size,
1484
const char *name)
1485
{
1486
dest->is_ssa = true;
1487
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
1488
}
1489
1490
void
1491
nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
1492
{
1493
assert(def != new_ssa);
1494
nir_foreach_use_safe(use_src, def)
1495
nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1496
1497
nir_foreach_if_use_safe(use_src, def)
1498
nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa);
1499
}
1500
1501
void
1502
nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
1503
{
1504
if (new_src.is_ssa) {
1505
nir_ssa_def_rewrite_uses(def, new_src.ssa);
1506
} else {
1507
nir_foreach_use_safe(use_src, def)
1508
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1509
1510
nir_foreach_if_use_safe(use_src, def)
1511
nir_if_rewrite_condition(use_src->parent_if, new_src);
1512
}
1513
}
1514
1515
static bool
1516
is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1517
{
1518
assert(start->block == end->block);
1519
1520
if (between->block != start->block)
1521
return false;
1522
1523
/* Search backwards looking for "between" */
1524
while (start != end) {
1525
if (between == end)
1526
return true;
1527
1528
end = nir_instr_prev(end);
1529
assert(end);
1530
}
1531
1532
return false;
1533
}
1534
1535
/* Replaces all uses of the given SSA def with the given source but only if
1536
* the use comes after the after_me instruction. This can be useful if you
1537
* are emitting code to fix up the result of some instruction: you can freely
1538
* use the result in that code and then call rewrite_uses_after and pass the
1539
* last fixup instruction as after_me and it will replace all of the uses you
1540
* want without touching the fixup code.
1541
*
1542
* This function assumes that after_me is in the same block as
1543
* def->parent_instr and that after_me comes after def->parent_instr.
1544
*/
1545
void
1546
nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
1547
nir_instr *after_me)
1548
{
1549
if (def == new_ssa)
1550
return;
1551
1552
nir_foreach_use_safe(use_src, def) {
1553
assert(use_src->parent_instr != def->parent_instr);
1554
/* Since def already dominates all of its uses, the only way a use can
1555
* not be dominated by after_me is if it is between def and after_me in
1556
* the instruction list.
1557
*/
1558
if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1559
nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa);
1560
}
1561
1562
nir_foreach_if_use_safe(use_src, def) {
1563
nir_if_rewrite_condition_ssa(use_src->parent_if,
1564
&use_src->parent_if->condition,
1565
new_ssa);
1566
}
1567
}
1568
1569
nir_component_mask_t
1570
nir_ssa_def_components_read(const nir_ssa_def *def)
1571
{
1572
nir_component_mask_t read_mask = 0;
1573
nir_foreach_use(use, def) {
1574
if (use->parent_instr->type == nir_instr_type_alu) {
1575
nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1576
nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1577
int src_idx = alu_src - &alu->src[0];
1578
assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1579
read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1580
} else {
1581
return (1 << def->num_components) - 1;
1582
}
1583
}
1584
1585
if (!list_is_empty(&def->if_uses))
1586
read_mask |= 1;
1587
1588
return read_mask;
1589
}
1590
1591
nir_block *
1592
nir_block_unstructured_next(nir_block *block)
1593
{
1594
if (block == NULL) {
1595
/* nir_foreach_block_unstructured_safe() will call this function on a
1596
* NULL block after the last iteration, but it won't use the result so
1597
* just return NULL here.
1598
*/
1599
return NULL;
1600
}
1601
1602
nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1603
if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function)
1604
return NULL;
1605
1606
if (cf_next && cf_next->type == nir_cf_node_block)
1607
return nir_cf_node_as_block(cf_next);
1608
1609
return nir_block_cf_tree_next(block);
1610
}
1611
1612
nir_block *
1613
nir_unstructured_start_block(nir_function_impl *impl)
1614
{
1615
return nir_start_block(impl);
1616
}
1617
1618
nir_block *
1619
nir_block_cf_tree_next(nir_block *block)
1620
{
1621
if (block == NULL) {
1622
/* nir_foreach_block_safe() will call this function on a NULL block
1623
* after the last iteration, but it won't use the result so just return
1624
* NULL here.
1625
*/
1626
return NULL;
1627
}
1628
1629
assert(nir_cf_node_get_function(&block->cf_node)->structured);
1630
1631
nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1632
if (cf_next)
1633
return nir_cf_node_cf_tree_first(cf_next);
1634
1635
nir_cf_node *parent = block->cf_node.parent;
1636
1637
switch (parent->type) {
1638
case nir_cf_node_if: {
1639
/* Are we at the end of the if? Go to the beginning of the else */
1640
nir_if *if_stmt = nir_cf_node_as_if(parent);
1641
if (block == nir_if_last_then_block(if_stmt))
1642
return nir_if_first_else_block(if_stmt);
1643
1644
assert(block == nir_if_last_else_block(if_stmt));
1645
}
1646
FALLTHROUGH;
1647
1648
case nir_cf_node_loop:
1649
return nir_cf_node_as_block(nir_cf_node_next(parent));
1650
1651
case nir_cf_node_function:
1652
return NULL;
1653
1654
default:
1655
unreachable("unknown cf node type");
1656
}
1657
}
1658
1659
nir_block *
1660
nir_block_cf_tree_prev(nir_block *block)
1661
{
1662
if (block == NULL) {
1663
/* do this for consistency with nir_block_cf_tree_next() */
1664
return NULL;
1665
}
1666
1667
assert(nir_cf_node_get_function(&block->cf_node)->structured);
1668
1669
nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1670
if (cf_prev)
1671
return nir_cf_node_cf_tree_last(cf_prev);
1672
1673
nir_cf_node *parent = block->cf_node.parent;
1674
1675
switch (parent->type) {
1676
case nir_cf_node_if: {
1677
/* Are we at the beginning of the else? Go to the end of the if */
1678
nir_if *if_stmt = nir_cf_node_as_if(parent);
1679
if (block == nir_if_first_else_block(if_stmt))
1680
return nir_if_last_then_block(if_stmt);
1681
1682
assert(block == nir_if_first_then_block(if_stmt));
1683
}
1684
FALLTHROUGH;
1685
1686
case nir_cf_node_loop:
1687
return nir_cf_node_as_block(nir_cf_node_prev(parent));
1688
1689
case nir_cf_node_function:
1690
return NULL;
1691
1692
default:
1693
unreachable("unknown cf node type");
1694
}
1695
}
1696
1697
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1698
{
1699
switch (node->type) {
1700
case nir_cf_node_function: {
1701
nir_function_impl *impl = nir_cf_node_as_function(node);
1702
return nir_start_block(impl);
1703
}
1704
1705
case nir_cf_node_if: {
1706
nir_if *if_stmt = nir_cf_node_as_if(node);
1707
return nir_if_first_then_block(if_stmt);
1708
}
1709
1710
case nir_cf_node_loop: {
1711
nir_loop *loop = nir_cf_node_as_loop(node);
1712
return nir_loop_first_block(loop);
1713
}
1714
1715
case nir_cf_node_block: {
1716
return nir_cf_node_as_block(node);
1717
}
1718
1719
default:
1720
unreachable("unknown node type");
1721
}
1722
}
1723
1724
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1725
{
1726
switch (node->type) {
1727
case nir_cf_node_function: {
1728
nir_function_impl *impl = nir_cf_node_as_function(node);
1729
return nir_impl_last_block(impl);
1730
}
1731
1732
case nir_cf_node_if: {
1733
nir_if *if_stmt = nir_cf_node_as_if(node);
1734
return nir_if_last_else_block(if_stmt);
1735
}
1736
1737
case nir_cf_node_loop: {
1738
nir_loop *loop = nir_cf_node_as_loop(node);
1739
return nir_loop_last_block(loop);
1740
}
1741
1742
case nir_cf_node_block: {
1743
return nir_cf_node_as_block(node);
1744
}
1745
1746
default:
1747
unreachable("unknown node type");
1748
}
1749
}
1750
1751
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1752
{
1753
if (node->type == nir_cf_node_block)
1754
return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1755
else if (node->type == nir_cf_node_function)
1756
return NULL;
1757
else
1758
return nir_cf_node_as_block(nir_cf_node_next(node));
1759
}
1760
1761
nir_if *
1762
nir_block_get_following_if(nir_block *block)
1763
{
1764
if (exec_node_is_tail_sentinel(&block->cf_node.node))
1765
return NULL;
1766
1767
if (nir_cf_node_is_last(&block->cf_node))
1768
return NULL;
1769
1770
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1771
1772
if (next_node->type != nir_cf_node_if)
1773
return NULL;
1774
1775
return nir_cf_node_as_if(next_node);
1776
}
1777
1778
nir_loop *
1779
nir_block_get_following_loop(nir_block *block)
1780
{
1781
if (exec_node_is_tail_sentinel(&block->cf_node.node))
1782
return NULL;
1783
1784
if (nir_cf_node_is_last(&block->cf_node))
1785
return NULL;
1786
1787
nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1788
1789
if (next_node->type != nir_cf_node_loop)
1790
return NULL;
1791
1792
return nir_cf_node_as_loop(next_node);
1793
}
1794
1795
static int
1796
compare_block_index(const void *p1, const void *p2)
1797
{
1798
const nir_block *block1 = *((const nir_block **) p1);
1799
const nir_block *block2 = *((const nir_block **) p2);
1800
1801
return (int) block1->index - (int) block2->index;
1802
}
1803
1804
nir_block **
1805
nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx)
1806
{
1807
nir_block **preds =
1808
ralloc_array(mem_ctx, nir_block *, block->predecessors->entries);
1809
1810
unsigned i = 0;
1811
set_foreach(block->predecessors, entry)
1812
preds[i++] = (nir_block *) entry->key;
1813
assert(i == block->predecessors->entries);
1814
1815
qsort(preds, block->predecessors->entries, sizeof(nir_block *),
1816
compare_block_index);
1817
1818
return preds;
1819
}
1820
1821
void
1822
nir_index_blocks(nir_function_impl *impl)
1823
{
1824
unsigned index = 0;
1825
1826
if (impl->valid_metadata & nir_metadata_block_index)
1827
return;
1828
1829
nir_foreach_block_unstructured(block, impl) {
1830
block->index = index++;
1831
}
1832
1833
/* The end_block isn't really part of the program, which is why its index
1834
* is >= num_blocks.
1835
*/
1836
impl->num_blocks = impl->end_block->index = index;
1837
}
1838
1839
static bool
1840
index_ssa_def_cb(nir_ssa_def *def, void *state)
1841
{
1842
unsigned *index = (unsigned *) state;
1843
def->index = (*index)++;
1844
1845
return true;
1846
}
1847
1848
/**
1849
* The indices are applied top-to-bottom which has the very nice property
1850
* that, if A dominates B, then A->index <= B->index.
1851
*/
1852
void
1853
nir_index_ssa_defs(nir_function_impl *impl)
1854
{
1855
unsigned index = 0;
1856
1857
impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
1858
1859
nir_foreach_block_unstructured(block, impl) {
1860
nir_foreach_instr(instr, block)
1861
nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1862
}
1863
1864
impl->ssa_alloc = index;
1865
}
1866
1867
/**
1868
* The indices are applied top-to-bottom which has the very nice property
1869
* that, if A dominates B, then A->index <= B->index.
1870
*/
1871
unsigned
1872
nir_index_instrs(nir_function_impl *impl)
1873
{
1874
unsigned index = 0;
1875
1876
nir_foreach_block(block, impl) {
1877
block->start_ip = index++;
1878
1879
nir_foreach_instr(instr, block)
1880
instr->index = index++;
1881
1882
block->end_ip = index++;
1883
}
1884
1885
return index;
1886
}
1887
1888
unsigned
1889
nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes)
1890
{
1891
unsigned count = 0;
1892
nir_foreach_variable_with_modes(var, shader, modes)
1893
var->index = count++;
1894
return count;
1895
}
1896
1897
unsigned
1898
nir_function_impl_index_vars(nir_function_impl *impl)
1899
{
1900
unsigned count = 0;
1901
nir_foreach_function_temp_variable(var, impl)
1902
var->index = count++;
1903
return count;
1904
}
1905
1906
static nir_instr *
1907
cursor_next_instr(nir_cursor cursor)
1908
{
1909
switch (cursor.option) {
1910
case nir_cursor_before_block:
1911
for (nir_block *block = cursor.block; block;
1912
block = nir_block_cf_tree_next(block)) {
1913
nir_instr *instr = nir_block_first_instr(block);
1914
if (instr)
1915
return instr;
1916
}
1917
return NULL;
1918
1919
case nir_cursor_after_block:
1920
cursor.block = nir_block_cf_tree_next(cursor.block);
1921
if (cursor.block == NULL)
1922
return NULL;
1923
1924
cursor.option = nir_cursor_before_block;
1925
return cursor_next_instr(cursor);
1926
1927
case nir_cursor_before_instr:
1928
return cursor.instr;
1929
1930
case nir_cursor_after_instr:
1931
if (nir_instr_next(cursor.instr))
1932
return nir_instr_next(cursor.instr);
1933
1934
cursor.option = nir_cursor_after_block;
1935
cursor.block = cursor.instr->block;
1936
return cursor_next_instr(cursor);
1937
}
1938
1939
unreachable("Inavlid cursor option");
1940
}
1941
1942
ASSERTED static bool
1943
dest_is_ssa(nir_dest *dest, void *_state)
1944
{
1945
(void) _state;
1946
return dest->is_ssa;
1947
}
1948
1949
bool
1950
nir_function_impl_lower_instructions(nir_function_impl *impl,
1951
nir_instr_filter_cb filter,
1952
nir_lower_instr_cb lower,
1953
void *cb_data)
1954
{
1955
nir_builder b;
1956
nir_builder_init(&b, impl);
1957
1958
nir_metadata preserved = nir_metadata_block_index |
1959
nir_metadata_dominance;
1960
1961
bool progress = false;
1962
nir_cursor iter = nir_before_cf_list(&impl->body);
1963
nir_instr *instr;
1964
while ((instr = cursor_next_instr(iter)) != NULL) {
1965
if (filter && !filter(instr, cb_data)) {
1966
iter = nir_after_instr(instr);
1967
continue;
1968
}
1969
1970
assert(nir_foreach_dest(instr, dest_is_ssa, NULL));
1971
nir_ssa_def *old_def = nir_instr_ssa_def(instr);
1972
struct list_head old_uses, old_if_uses;
1973
if (old_def != NULL) {
1974
/* We're about to ask the callback to generate a replacement for instr.
1975
* Save off the uses from instr's SSA def so we know what uses to
1976
* rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the
1977
* case where the generated replacement code uses the result of instr
1978
* itself. If we use nir_ssa_def_rewrite_uses_after (which is the
1979
* normal solution to this problem), it doesn't work well if control-
1980
* flow is inserted as part of the replacement, doesn't handle cases
1981
* where the replacement is something consumed by instr, and suffers
1982
* from performance issues. This is the only way to 100% guarantee
1983
* that we rewrite the correct set efficiently.
1984
*/
1985
1986
list_replace(&old_def->uses, &old_uses);
1987
list_inithead(&old_def->uses);
1988
list_replace(&old_def->if_uses, &old_if_uses);
1989
list_inithead(&old_def->if_uses);
1990
}
1991
1992
b.cursor = nir_after_instr(instr);
1993
nir_ssa_def *new_def = lower(&b, instr, cb_data);
1994
if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
1995
new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
1996
assert(old_def != NULL);
1997
if (new_def->parent_instr->block != instr->block)
1998
preserved = nir_metadata_none;
1999
2000
nir_src new_src = nir_src_for_ssa(new_def);
2001
list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link)
2002
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
2003
2004
list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link)
2005
nir_if_rewrite_condition(use_src->parent_if, new_src);
2006
2007
if (nir_ssa_def_is_unused(old_def)) {
2008
iter = nir_instr_free_and_dce(instr);
2009
} else {
2010
iter = nir_after_instr(instr);
2011
}
2012
progress = true;
2013
} else {
2014
/* We didn't end up lowering after all. Put the uses back */
2015
if (old_def) {
2016
list_replace(&old_uses, &old_def->uses);
2017
list_replace(&old_if_uses, &old_def->if_uses);
2018
}
2019
if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) {
2020
/* Only instructions without a return value can be removed like this */
2021
assert(!old_def);
2022
iter = nir_instr_free_and_dce(instr);
2023
progress = true;
2024
} else
2025
iter = nir_after_instr(instr);
2026
2027
if (new_def == NIR_LOWER_INSTR_PROGRESS)
2028
progress = true;
2029
}
2030
}
2031
2032
if (progress) {
2033
nir_metadata_preserve(impl, preserved);
2034
} else {
2035
nir_metadata_preserve(impl, nir_metadata_all);
2036
}
2037
2038
return progress;
2039
}
2040
2041
bool
2042
nir_shader_lower_instructions(nir_shader *shader,
2043
nir_instr_filter_cb filter,
2044
nir_lower_instr_cb lower,
2045
void *cb_data)
2046
{
2047
bool progress = false;
2048
2049
nir_foreach_function(function, shader) {
2050
if (function->impl &&
2051
nir_function_impl_lower_instructions(function->impl,
2052
filter, lower, cb_data))
2053
progress = true;
2054
}
2055
2056
return progress;
2057
}
2058
2059
nir_intrinsic_op
2060
nir_intrinsic_from_system_value(gl_system_value val)
2061
{
2062
switch (val) {
2063
case SYSTEM_VALUE_VERTEX_ID:
2064
return nir_intrinsic_load_vertex_id;
2065
case SYSTEM_VALUE_INSTANCE_ID:
2066
return nir_intrinsic_load_instance_id;
2067
case SYSTEM_VALUE_DRAW_ID:
2068
return nir_intrinsic_load_draw_id;
2069
case SYSTEM_VALUE_BASE_INSTANCE:
2070
return nir_intrinsic_load_base_instance;
2071
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
2072
return nir_intrinsic_load_vertex_id_zero_base;
2073
case SYSTEM_VALUE_IS_INDEXED_DRAW:
2074
return nir_intrinsic_load_is_indexed_draw;
2075
case SYSTEM_VALUE_FIRST_VERTEX:
2076
return nir_intrinsic_load_first_vertex;
2077
case SYSTEM_VALUE_BASE_VERTEX:
2078
return nir_intrinsic_load_base_vertex;
2079
case SYSTEM_VALUE_INVOCATION_ID:
2080
return nir_intrinsic_load_invocation_id;
2081
case SYSTEM_VALUE_FRAG_COORD:
2082
return nir_intrinsic_load_frag_coord;
2083
case SYSTEM_VALUE_POINT_COORD:
2084
return nir_intrinsic_load_point_coord;
2085
case SYSTEM_VALUE_LINE_COORD:
2086
return nir_intrinsic_load_line_coord;
2087
case SYSTEM_VALUE_FRONT_FACE:
2088
return nir_intrinsic_load_front_face;
2089
case SYSTEM_VALUE_SAMPLE_ID:
2090
return nir_intrinsic_load_sample_id;
2091
case SYSTEM_VALUE_SAMPLE_POS:
2092
return nir_intrinsic_load_sample_pos;
2093
case SYSTEM_VALUE_SAMPLE_MASK_IN:
2094
return nir_intrinsic_load_sample_mask_in;
2095
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
2096
return nir_intrinsic_load_local_invocation_id;
2097
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
2098
return nir_intrinsic_load_local_invocation_index;
2099
case SYSTEM_VALUE_WORKGROUP_ID:
2100
return nir_intrinsic_load_workgroup_id;
2101
case SYSTEM_VALUE_NUM_WORKGROUPS:
2102
return nir_intrinsic_load_num_workgroups;
2103
case SYSTEM_VALUE_PRIMITIVE_ID:
2104
return nir_intrinsic_load_primitive_id;
2105
case SYSTEM_VALUE_TESS_COORD:
2106
return nir_intrinsic_load_tess_coord;
2107
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
2108
return nir_intrinsic_load_tess_level_outer;
2109
case SYSTEM_VALUE_TESS_LEVEL_INNER:
2110
return nir_intrinsic_load_tess_level_inner;
2111
case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT:
2112
return nir_intrinsic_load_tess_level_outer_default;
2113
case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT:
2114
return nir_intrinsic_load_tess_level_inner_default;
2115
case SYSTEM_VALUE_VERTICES_IN:
2116
return nir_intrinsic_load_patch_vertices_in;
2117
case SYSTEM_VALUE_HELPER_INVOCATION:
2118
return nir_intrinsic_load_helper_invocation;
2119
case SYSTEM_VALUE_COLOR0:
2120
return nir_intrinsic_load_color0;
2121
case SYSTEM_VALUE_COLOR1:
2122
return nir_intrinsic_load_color1;
2123
case SYSTEM_VALUE_VIEW_INDEX:
2124
return nir_intrinsic_load_view_index;
2125
case SYSTEM_VALUE_SUBGROUP_SIZE:
2126
return nir_intrinsic_load_subgroup_size;
2127
case SYSTEM_VALUE_SUBGROUP_INVOCATION:
2128
return nir_intrinsic_load_subgroup_invocation;
2129
case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
2130
return nir_intrinsic_load_subgroup_eq_mask;
2131
case SYSTEM_VALUE_SUBGROUP_GE_MASK:
2132
return nir_intrinsic_load_subgroup_ge_mask;
2133
case SYSTEM_VALUE_SUBGROUP_GT_MASK:
2134
return nir_intrinsic_load_subgroup_gt_mask;
2135
case SYSTEM_VALUE_SUBGROUP_LE_MASK:
2136
return nir_intrinsic_load_subgroup_le_mask;
2137
case SYSTEM_VALUE_SUBGROUP_LT_MASK:
2138
return nir_intrinsic_load_subgroup_lt_mask;
2139
case SYSTEM_VALUE_NUM_SUBGROUPS:
2140
return nir_intrinsic_load_num_subgroups;
2141
case SYSTEM_VALUE_SUBGROUP_ID:
2142
return nir_intrinsic_load_subgroup_id;
2143
case SYSTEM_VALUE_WORKGROUP_SIZE:
2144
return nir_intrinsic_load_workgroup_size;
2145
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
2146
return nir_intrinsic_load_global_invocation_id;
2147
case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
2148
return nir_intrinsic_load_base_global_invocation_id;
2149
case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
2150
return nir_intrinsic_load_global_invocation_index;
2151
case SYSTEM_VALUE_WORK_DIM:
2152
return nir_intrinsic_load_work_dim;
2153
case SYSTEM_VALUE_USER_DATA_AMD:
2154
return nir_intrinsic_load_user_data_amd;
2155
case SYSTEM_VALUE_RAY_LAUNCH_ID:
2156
return nir_intrinsic_load_ray_launch_id;
2157
case SYSTEM_VALUE_RAY_LAUNCH_SIZE:
2158
return nir_intrinsic_load_ray_launch_size;
2159
case SYSTEM_VALUE_RAY_WORLD_ORIGIN:
2160
return nir_intrinsic_load_ray_world_origin;
2161
case SYSTEM_VALUE_RAY_WORLD_DIRECTION:
2162
return nir_intrinsic_load_ray_world_direction;
2163
case SYSTEM_VALUE_RAY_OBJECT_ORIGIN:
2164
return nir_intrinsic_load_ray_object_origin;
2165
case SYSTEM_VALUE_RAY_OBJECT_DIRECTION:
2166
return nir_intrinsic_load_ray_object_direction;
2167
case SYSTEM_VALUE_RAY_T_MIN:
2168
return nir_intrinsic_load_ray_t_min;
2169
case SYSTEM_VALUE_RAY_T_MAX:
2170
return nir_intrinsic_load_ray_t_max;
2171
case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD:
2172
return nir_intrinsic_load_ray_object_to_world;
2173
case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT:
2174
return nir_intrinsic_load_ray_world_to_object;
2175
case SYSTEM_VALUE_RAY_HIT_KIND:
2176
return nir_intrinsic_load_ray_hit_kind;
2177
case SYSTEM_VALUE_RAY_FLAGS:
2178
return nir_intrinsic_load_ray_flags;
2179
case SYSTEM_VALUE_RAY_GEOMETRY_INDEX:
2180
return nir_intrinsic_load_ray_geometry_index;
2181
case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX:
2182
return nir_intrinsic_load_ray_instance_custom_index;
2183
case SYSTEM_VALUE_FRAG_SHADING_RATE:
2184
return nir_intrinsic_load_frag_shading_rate;
2185
default:
2186
unreachable("system value does not directly correspond to intrinsic");
2187
}
2188
}
2189
2190
gl_system_value
2191
nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
2192
{
2193
switch (intrin) {
2194
case nir_intrinsic_load_vertex_id:
2195
return SYSTEM_VALUE_VERTEX_ID;
2196
case nir_intrinsic_load_instance_id:
2197
return SYSTEM_VALUE_INSTANCE_ID;
2198
case nir_intrinsic_load_draw_id:
2199
return SYSTEM_VALUE_DRAW_ID;
2200
case nir_intrinsic_load_base_instance:
2201
return SYSTEM_VALUE_BASE_INSTANCE;
2202
case nir_intrinsic_load_vertex_id_zero_base:
2203
return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
2204
case nir_intrinsic_load_first_vertex:
2205
return SYSTEM_VALUE_FIRST_VERTEX;
2206
case nir_intrinsic_load_is_indexed_draw:
2207
return SYSTEM_VALUE_IS_INDEXED_DRAW;
2208
case nir_intrinsic_load_base_vertex:
2209
return SYSTEM_VALUE_BASE_VERTEX;
2210
case nir_intrinsic_load_invocation_id:
2211
return SYSTEM_VALUE_INVOCATION_ID;
2212
case nir_intrinsic_load_frag_coord:
2213
return SYSTEM_VALUE_FRAG_COORD;
2214
case nir_intrinsic_load_point_coord:
2215
return SYSTEM_VALUE_POINT_COORD;
2216
case nir_intrinsic_load_line_coord:
2217
return SYSTEM_VALUE_LINE_COORD;
2218
case nir_intrinsic_load_front_face:
2219
return SYSTEM_VALUE_FRONT_FACE;
2220
case nir_intrinsic_load_sample_id:
2221
return SYSTEM_VALUE_SAMPLE_ID;
2222
case nir_intrinsic_load_sample_pos:
2223
return SYSTEM_VALUE_SAMPLE_POS;
2224
case nir_intrinsic_load_sample_mask_in:
2225
return SYSTEM_VALUE_SAMPLE_MASK_IN;
2226
case nir_intrinsic_load_local_invocation_id:
2227
return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
2228
case nir_intrinsic_load_local_invocation_index:
2229
return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
2230
case nir_intrinsic_load_num_workgroups:
2231
return SYSTEM_VALUE_NUM_WORKGROUPS;
2232
case nir_intrinsic_load_workgroup_id:
2233
return SYSTEM_VALUE_WORKGROUP_ID;
2234
case nir_intrinsic_load_primitive_id:
2235
return SYSTEM_VALUE_PRIMITIVE_ID;
2236
case nir_intrinsic_load_tess_coord:
2237
return SYSTEM_VALUE_TESS_COORD;
2238
case nir_intrinsic_load_tess_level_outer:
2239
return SYSTEM_VALUE_TESS_LEVEL_OUTER;
2240
case nir_intrinsic_load_tess_level_inner:
2241
return SYSTEM_VALUE_TESS_LEVEL_INNER;
2242
case nir_intrinsic_load_tess_level_outer_default:
2243
return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
2244
case nir_intrinsic_load_tess_level_inner_default:
2245
return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
2246
case nir_intrinsic_load_patch_vertices_in:
2247
return SYSTEM_VALUE_VERTICES_IN;
2248
case nir_intrinsic_load_helper_invocation:
2249
return SYSTEM_VALUE_HELPER_INVOCATION;
2250
case nir_intrinsic_load_color0:
2251
return SYSTEM_VALUE_COLOR0;
2252
case nir_intrinsic_load_color1:
2253
return SYSTEM_VALUE_COLOR1;
2254
case nir_intrinsic_load_view_index:
2255
return SYSTEM_VALUE_VIEW_INDEX;
2256
case nir_intrinsic_load_subgroup_size:
2257
return SYSTEM_VALUE_SUBGROUP_SIZE;
2258
case nir_intrinsic_load_subgroup_invocation:
2259
return SYSTEM_VALUE_SUBGROUP_INVOCATION;
2260
case nir_intrinsic_load_subgroup_eq_mask:
2261
return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
2262
case nir_intrinsic_load_subgroup_ge_mask:
2263
return SYSTEM_VALUE_SUBGROUP_GE_MASK;
2264
case nir_intrinsic_load_subgroup_gt_mask:
2265
return SYSTEM_VALUE_SUBGROUP_GT_MASK;
2266
case nir_intrinsic_load_subgroup_le_mask:
2267
return SYSTEM_VALUE_SUBGROUP_LE_MASK;
2268
case nir_intrinsic_load_subgroup_lt_mask:
2269
return SYSTEM_VALUE_SUBGROUP_LT_MASK;
2270
case nir_intrinsic_load_num_subgroups:
2271
return SYSTEM_VALUE_NUM_SUBGROUPS;
2272
case nir_intrinsic_load_subgroup_id:
2273
return SYSTEM_VALUE_SUBGROUP_ID;
2274
case nir_intrinsic_load_workgroup_size:
2275
return SYSTEM_VALUE_WORKGROUP_SIZE;
2276
case nir_intrinsic_load_global_invocation_id:
2277
return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
2278
case nir_intrinsic_load_base_global_invocation_id:
2279
return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
2280
case nir_intrinsic_load_global_invocation_index:
2281
return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
2282
case nir_intrinsic_load_work_dim:
2283
return SYSTEM_VALUE_WORK_DIM;
2284
case nir_intrinsic_load_user_data_amd:
2285
return SYSTEM_VALUE_USER_DATA_AMD;
2286
case nir_intrinsic_load_barycentric_model:
2287
return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL;
2288
case nir_intrinsic_load_gs_header_ir3:
2289
return SYSTEM_VALUE_GS_HEADER_IR3;
2290
case nir_intrinsic_load_tcs_header_ir3:
2291
return SYSTEM_VALUE_TCS_HEADER_IR3;
2292
case nir_intrinsic_load_ray_launch_id:
2293
return SYSTEM_VALUE_RAY_LAUNCH_ID;
2294
case nir_intrinsic_load_ray_launch_size:
2295
return SYSTEM_VALUE_RAY_LAUNCH_SIZE;
2296
case nir_intrinsic_load_ray_world_origin:
2297
return SYSTEM_VALUE_RAY_WORLD_ORIGIN;
2298
case nir_intrinsic_load_ray_world_direction:
2299
return SYSTEM_VALUE_RAY_WORLD_DIRECTION;
2300
case nir_intrinsic_load_ray_object_origin:
2301
return SYSTEM_VALUE_RAY_OBJECT_ORIGIN;
2302
case nir_intrinsic_load_ray_object_direction:
2303
return SYSTEM_VALUE_RAY_OBJECT_DIRECTION;
2304
case nir_intrinsic_load_ray_t_min:
2305
return SYSTEM_VALUE_RAY_T_MIN;
2306
case nir_intrinsic_load_ray_t_max:
2307
return SYSTEM_VALUE_RAY_T_MAX;
2308
case nir_intrinsic_load_ray_object_to_world:
2309
return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD;
2310
case nir_intrinsic_load_ray_world_to_object:
2311
return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT;
2312
case nir_intrinsic_load_ray_hit_kind:
2313
return SYSTEM_VALUE_RAY_HIT_KIND;
2314
case nir_intrinsic_load_ray_flags:
2315
return SYSTEM_VALUE_RAY_FLAGS;
2316
case nir_intrinsic_load_ray_geometry_index:
2317
return SYSTEM_VALUE_RAY_GEOMETRY_INDEX;
2318
case nir_intrinsic_load_ray_instance_custom_index:
2319
return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX;
2320
case nir_intrinsic_load_frag_shading_rate:
2321
return SYSTEM_VALUE_FRAG_SHADING_RATE;
2322
default:
2323
unreachable("intrinsic doesn't produce a system value");
2324
}
2325
}
2326
2327
/* OpenGL utility method that remaps the location attributes if they are
2328
* doubles. Not needed for vulkan due the differences on the input location
2329
* count for doubles on vulkan vs OpenGL
2330
*
2331
* The bitfield returned in dual_slot is one bit for each double input slot in
2332
* the original OpenGL single-slot input numbering. The mapping from old
2333
* locations to new locations is as follows:
2334
*
2335
* new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
2336
*/
2337
void
2338
nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
2339
{
2340
assert(shader->info.stage == MESA_SHADER_VERTEX);
2341
2342
*dual_slot = 0;
2343
nir_foreach_shader_in_variable(var, shader) {
2344
if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
2345
unsigned slots = glsl_count_attribute_slots(var->type, true);
2346
*dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
2347
}
2348
}
2349
2350
nir_foreach_shader_in_variable(var, shader) {
2351
var->data.location +=
2352
util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
2353
}
2354
}
2355
2356
/* Returns an attribute mask that has been re-compacted using the given
2357
* dual_slot mask.
2358
*/
2359
uint64_t
2360
nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2361
{
2362
while (dual_slot) {
2363
unsigned loc = u_bit_scan64(&dual_slot);
2364
/* mask of all bits up to and including loc */
2365
uint64_t mask = BITFIELD64_MASK(loc + 1);
2366
attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2367
}
2368
return attribs;
2369
}
2370
2371
void
2372
nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2373
bool bindless)
2374
{
2375
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
2376
2377
/* Image intrinsics only have one of these */
2378
assert(!nir_intrinsic_has_src_type(intrin) ||
2379
!nir_intrinsic_has_dest_type(intrin));
2380
2381
nir_alu_type data_type = nir_type_invalid;
2382
if (nir_intrinsic_has_src_type(intrin))
2383
data_type = nir_intrinsic_src_type(intrin);
2384
if (nir_intrinsic_has_dest_type(intrin))
2385
data_type = nir_intrinsic_dest_type(intrin);
2386
2387
switch (intrin->intrinsic) {
2388
#define CASE(op) \
2389
case nir_intrinsic_image_deref_##op: \
2390
intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2391
: nir_intrinsic_image_##op; \
2392
break;
2393
CASE(load)
2394
CASE(sparse_load)
2395
CASE(store)
2396
CASE(atomic_add)
2397
CASE(atomic_imin)
2398
CASE(atomic_umin)
2399
CASE(atomic_imax)
2400
CASE(atomic_umax)
2401
CASE(atomic_and)
2402
CASE(atomic_or)
2403
CASE(atomic_xor)
2404
CASE(atomic_exchange)
2405
CASE(atomic_comp_swap)
2406
CASE(atomic_fadd)
2407
CASE(atomic_fmin)
2408
CASE(atomic_fmax)
2409
CASE(atomic_inc_wrap)
2410
CASE(atomic_dec_wrap)
2411
CASE(size)
2412
CASE(samples)
2413
CASE(load_raw_intel)
2414
CASE(store_raw_intel)
2415
#undef CASE
2416
default:
2417
unreachable("Unhanded image intrinsic");
2418
}
2419
2420
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2421
nir_variable *var = nir_deref_instr_get_variable(deref);
2422
2423
nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
2424
nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
2425
nir_intrinsic_set_access(intrin, access | var->data.access);
2426
nir_intrinsic_set_format(intrin, var->data.image.format);
2427
if (nir_intrinsic_has_src_type(intrin))
2428
nir_intrinsic_set_src_type(intrin, data_type);
2429
if (nir_intrinsic_has_dest_type(intrin))
2430
nir_intrinsic_set_dest_type(intrin, data_type);
2431
2432
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2433
nir_src_for_ssa(src));
2434
}
2435
2436
unsigned
2437
nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr)
2438
{
2439
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
2440
int coords = glsl_get_sampler_dim_coordinate_components(dim);
2441
if (dim == GLSL_SAMPLER_DIM_CUBE)
2442
return coords;
2443
else
2444
return coords + nir_intrinsic_image_array(instr);
2445
}
2446
2447
nir_src *
2448
nir_get_shader_call_payload_src(nir_intrinsic_instr *call)
2449
{
2450
switch (call->intrinsic) {
2451
case nir_intrinsic_trace_ray:
2452
case nir_intrinsic_rt_trace_ray:
2453
return &call->src[10];
2454
case nir_intrinsic_execute_callable:
2455
case nir_intrinsic_rt_execute_callable:
2456
return &call->src[1];
2457
default:
2458
unreachable("Not a call intrinsic");
2459
return NULL;
2460
}
2461
}
2462
2463
nir_binding nir_chase_binding(nir_src rsrc)
2464
{
2465
nir_binding res = {0};
2466
if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2467
const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type);
2468
bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type);
2469
while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) {
2470
nir_deref_instr *deref = nir_src_as_deref(rsrc);
2471
2472
if (deref->deref_type == nir_deref_type_var) {
2473
res.success = true;
2474
res.var = deref->var;
2475
res.desc_set = deref->var->data.descriptor_set;
2476
res.binding = deref->var->data.binding;
2477
return res;
2478
} else if (deref->deref_type == nir_deref_type_array && is_image) {
2479
if (res.num_indices == ARRAY_SIZE(res.indices))
2480
return (nir_binding){0};
2481
res.indices[res.num_indices++] = deref->arr.index;
2482
}
2483
2484
rsrc = deref->parent;
2485
}
2486
}
2487
2488
/* Skip copies and trimming. Trimming can appear as nir_op_mov instructions
2489
* when removing the offset from addresses. We also consider nir_op_is_vec()
2490
* instructions to skip trimming of vec2_index_32bit_offset addresses after
2491
* lowering ALU to scalar.
2492
*/
2493
while (true) {
2494
nir_alu_instr *alu = nir_src_as_alu_instr(rsrc);
2495
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2496
if (alu && alu->op == nir_op_mov) {
2497
for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
2498
if (alu->src[0].swizzle[i] != i)
2499
return (nir_binding){0};
2500
}
2501
rsrc = alu->src[0].src;
2502
} else if (alu && nir_op_is_vec(alu->op)) {
2503
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
2504
if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa)
2505
return (nir_binding){0};
2506
}
2507
rsrc = alu->src[0].src;
2508
} else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) {
2509
/* The caller might want to be aware if only the first invocation of
2510
* the indices are used.
2511
*/
2512
res.read_first_invocation = true;
2513
rsrc = intrin->src[0];
2514
} else {
2515
break;
2516
}
2517
}
2518
2519
if (nir_src_is_const(rsrc)) {
2520
/* GL binding model after deref lowering */
2521
res.success = true;
2522
res.binding = nir_src_as_uint(rsrc);
2523
return res;
2524
}
2525
2526
/* otherwise, must be Vulkan binding model after deref lowering or GL bindless */
2527
2528
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc);
2529
if (!intrin)
2530
return (nir_binding){0};
2531
2532
/* skip load_vulkan_descriptor */
2533
if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
2534
intrin = nir_src_as_intrinsic(intrin->src[0]);
2535
if (!intrin)
2536
return (nir_binding){0};
2537
}
2538
2539
if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2540
return (nir_binding){0};
2541
2542
assert(res.num_indices == 0);
2543
res.success = true;
2544
res.desc_set = nir_intrinsic_desc_set(intrin);
2545
res.binding = nir_intrinsic_binding(intrin);
2546
res.num_indices = 1;
2547
res.indices[0] = intrin->src[0];
2548
return res;
2549
}
2550
2551
nir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding)
2552
{
2553
nir_variable *binding_var = NULL;
2554
unsigned count = 0;
2555
2556
if (!binding.success)
2557
return NULL;
2558
2559
if (binding.var)
2560
return binding.var;
2561
2562
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) {
2563
if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) {
2564
binding_var = var;
2565
count++;
2566
}
2567
}
2568
2569
/* Be conservative if another variable is using the same binding/desc_set
2570
* because the access mask might be different and we can't get it reliably.
2571
*/
2572
if (count > 1)
2573
return NULL;
2574
2575
return binding_var;
2576
}
2577
2578
bool
2579
nir_alu_instr_is_copy(nir_alu_instr *instr)
2580
{
2581
assert(instr->src[0].src.is_ssa);
2582
2583
if (instr->op == nir_op_mov) {
2584
return !instr->dest.saturate &&
2585
!instr->src[0].abs &&
2586
!instr->src[0].negate;
2587
} else if (nir_op_is_vec(instr->op)) {
2588
for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) {
2589
if (instr->src[i].abs || instr->src[i].negate)
2590
return false;
2591
}
2592
return !instr->dest.saturate;
2593
} else {
2594
return false;
2595
}
2596
}
2597
2598
nir_ssa_scalar
2599
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
2600
{
2601
while (nir_ssa_scalar_is_alu(s)) {
2602
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2603
if (!nir_alu_instr_is_copy(alu))
2604
break;
2605
2606
if (alu->op == nir_op_mov) {
2607
s.def = alu->src[0].src.ssa;
2608
s.comp = alu->src[0].swizzle[s.comp];
2609
} else {
2610
assert(nir_op_is_vec(alu->op));
2611
s.def = alu->src[s.comp].src.ssa;
2612
s.comp = alu->src[s.comp].swizzle[0];
2613
}
2614
}
2615
2616
return s;
2617
}
2618
2619