Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir.h
4545 views
1
/*
2
* Copyright © 2014 Connor Abbott
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* Authors:
24
* Connor Abbott ([email protected])
25
*
26
*/
27
28
#ifndef NIR_H
29
#define NIR_H
30
31
#include "util/hash_table.h"
32
#include "compiler/glsl/list.h"
33
#include "GL/gl.h" /* GLenum */
34
#include "util/list.h"
35
#include "util/log.h"
36
#include "util/ralloc.h"
37
#include "util/set.h"
38
#include "util/bitscan.h"
39
#include "util/bitset.h"
40
#include "util/compiler.h"
41
#include "util/enum_operators.h"
42
#include "util/macros.h"
43
#include "util/format/u_format.h"
44
#include "compiler/nir_types.h"
45
#include "compiler/shader_enums.h"
46
#include "compiler/shader_info.h"
47
#define XXH_INLINE_ALL
48
#include "util/xxhash.h"
49
#include <stdio.h>
50
51
#ifndef NDEBUG
52
#include "util/debug.h"
53
#endif /* NDEBUG */
54
55
#include "nir_opcodes.h"
56
57
#if defined(_WIN32) && !defined(snprintf)
58
#define snprintf _snprintf
59
#endif
60
61
#ifdef __cplusplus
62
extern "C" {
63
#endif
64
65
#define NIR_FALSE 0u
66
#define NIR_TRUE (~0u)
67
#define NIR_MAX_VEC_COMPONENTS 16
68
#define NIR_MAX_MATRIX_COLUMNS 4
69
#define NIR_STREAM_PACKED (1 << 8)
70
typedef uint16_t nir_component_mask_t;
71
72
static inline bool
73
nir_num_components_valid(unsigned num_components)
74
{
75
return (num_components >= 1 &&
76
num_components <= 5) ||
77
num_components == 8 ||
78
num_components == 16;
79
}
80
81
bool nir_component_mask_can_reinterpret(nir_component_mask_t mask,
82
unsigned old_bit_size,
83
unsigned new_bit_size);
84
nir_component_mask_t
85
nir_component_mask_reinterpret(nir_component_mask_t mask,
86
unsigned old_bit_size,
87
unsigned new_bit_size);
88
89
/** Defines a cast function
90
*
91
* This macro defines a cast function from in_type to out_type where
92
* out_type is some structure type that contains a field of type out_type.
93
*
94
* Note that you have to be a bit careful as the generated cast function
95
* destroys constness.
96
*/
97
#define NIR_DEFINE_CAST(name, in_type, out_type, field, \
98
type_field, type_value) \
99
static inline out_type * \
100
name(const in_type *parent) \
101
{ \
102
assert(parent && parent->type_field == type_value); \
103
return exec_node_data(out_type, parent, field); \
104
}
105
106
struct nir_function;
107
struct nir_shader;
108
struct nir_instr;
109
struct nir_builder;
110
111
112
/**
113
* Description of built-in state associated with a uniform
114
*
115
* \sa nir_variable::state_slots
116
*/
117
typedef struct {
118
gl_state_index16 tokens[STATE_LENGTH];
119
uint16_t swizzle;
120
} nir_state_slot;
121
122
typedef enum {
123
nir_var_shader_in = (1 << 0),
124
nir_var_shader_out = (1 << 1),
125
nir_var_shader_temp = (1 << 2),
126
nir_var_function_temp = (1 << 3),
127
nir_var_uniform = (1 << 4),
128
nir_var_mem_ubo = (1 << 5),
129
nir_var_system_value = (1 << 6),
130
nir_var_mem_ssbo = (1 << 7),
131
nir_var_mem_shared = (1 << 8),
132
nir_var_mem_global = (1 << 9),
133
nir_var_mem_generic = (nir_var_shader_temp |
134
nir_var_function_temp |
135
nir_var_mem_shared |
136
nir_var_mem_global),
137
nir_var_mem_push_const = (1 << 10), /* not actually used for variables */
138
nir_var_mem_constant = (1 << 11),
139
/** Incoming call or ray payload data for ray-tracing shaders */
140
nir_var_shader_call_data = (1 << 12),
141
/** Ray hit attributes */
142
nir_var_ray_hit_attrib = (1 << 13),
143
nir_var_read_only_modes = nir_var_shader_in | nir_var_uniform |
144
nir_var_system_value | nir_var_mem_constant |
145
nir_var_mem_ubo,
146
/** Modes where vector derefs can be indexed as arrays */
147
nir_var_vec_indexable_modes = nir_var_mem_ubo | nir_var_mem_ssbo |
148
nir_var_mem_shared | nir_var_mem_global |
149
nir_var_mem_push_const,
150
nir_num_variable_modes = 14,
151
nir_var_all = (1 << nir_num_variable_modes) - 1,
152
} nir_variable_mode;
153
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
154
155
/**
156
* Rounding modes.
157
*/
158
typedef enum {
159
nir_rounding_mode_undef = 0,
160
nir_rounding_mode_rtne = 1, /* round to nearest even */
161
nir_rounding_mode_ru = 2, /* round up */
162
nir_rounding_mode_rd = 3, /* round down */
163
nir_rounding_mode_rtz = 4, /* round towards zero */
164
} nir_rounding_mode;
165
166
typedef union {
167
bool b;
168
float f32;
169
double f64;
170
int8_t i8;
171
uint8_t u8;
172
int16_t i16;
173
uint16_t u16;
174
int32_t i32;
175
uint32_t u32;
176
int64_t i64;
177
uint64_t u64;
178
} nir_const_value;
179
180
#define nir_const_value_to_array(arr, c, components, m) \
181
{ \
182
for (unsigned i = 0; i < components; ++i) \
183
arr[i] = c[i].m; \
184
} while (false)
185
186
static inline nir_const_value
187
nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
188
{
189
nir_const_value v;
190
memset(&v, 0, sizeof(v));
191
192
switch (bit_size) {
193
case 1: v.b = x; break;
194
case 8: v.u8 = x; break;
195
case 16: v.u16 = x; break;
196
case 32: v.u32 = x; break;
197
case 64: v.u64 = x; break;
198
default:
199
unreachable("Invalid bit size");
200
}
201
202
return v;
203
}
204
205
static inline nir_const_value
206
nir_const_value_for_int(int64_t i, unsigned bit_size)
207
{
208
nir_const_value v;
209
memset(&v, 0, sizeof(v));
210
211
assert(bit_size <= 64);
212
if (bit_size < 64) {
213
assert(i >= (-(1ll << (bit_size - 1))));
214
assert(i < (1ll << (bit_size - 1)));
215
}
216
217
return nir_const_value_for_raw_uint(i, bit_size);
218
}
219
220
static inline nir_const_value
221
nir_const_value_for_uint(uint64_t u, unsigned bit_size)
222
{
223
nir_const_value v;
224
memset(&v, 0, sizeof(v));
225
226
assert(bit_size <= 64);
227
if (bit_size < 64)
228
assert(u < (1ull << bit_size));
229
230
return nir_const_value_for_raw_uint(u, bit_size);
231
}
232
233
static inline nir_const_value
234
nir_const_value_for_bool(bool b, unsigned bit_size)
235
{
236
/* Booleans use a 0/-1 convention */
237
return nir_const_value_for_int(-(int)b, bit_size);
238
}
239
240
/* This one isn't inline because it requires half-float conversion */
241
nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
242
243
static inline int64_t
244
nir_const_value_as_int(nir_const_value value, unsigned bit_size)
245
{
246
switch (bit_size) {
247
/* int1_t uses 0/-1 convention */
248
case 1: return -(int)value.b;
249
case 8: return value.i8;
250
case 16: return value.i16;
251
case 32: return value.i32;
252
case 64: return value.i64;
253
default:
254
unreachable("Invalid bit size");
255
}
256
}
257
258
static inline uint64_t
259
nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
260
{
261
switch (bit_size) {
262
case 1: return value.b;
263
case 8: return value.u8;
264
case 16: return value.u16;
265
case 32: return value.u32;
266
case 64: return value.u64;
267
default:
268
unreachable("Invalid bit size");
269
}
270
}
271
272
static inline bool
273
nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
274
{
275
int64_t i = nir_const_value_as_int(value, bit_size);
276
277
/* Booleans of any size use 0/-1 convention */
278
assert(i == 0 || i == -1);
279
280
return i;
281
}
282
283
/* This one isn't inline because it requires half-float conversion */
284
double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
285
286
typedef struct nir_constant {
287
/**
288
* Value of the constant.
289
*
290
* The field used to back the values supplied by the constant is determined
291
* by the type associated with the \c nir_variable. Constants may be
292
* scalars, vectors, or matrices.
293
*/
294
nir_const_value values[NIR_MAX_VEC_COMPONENTS];
295
296
/* we could get this from the var->type but makes clone *much* easier to
297
* not have to care about the type.
298
*/
299
unsigned num_elements;
300
301
/* Array elements / Structure Fields */
302
struct nir_constant **elements;
303
} nir_constant;
304
305
/**
306
* \brief Layout qualifiers for gl_FragDepth.
307
*
308
* The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
309
* with a layout qualifier.
310
*/
311
typedef enum {
312
nir_depth_layout_none, /**< No depth layout is specified. */
313
nir_depth_layout_any,
314
nir_depth_layout_greater,
315
nir_depth_layout_less,
316
nir_depth_layout_unchanged
317
} nir_depth_layout;
318
319
/**
320
* Enum keeping track of how a variable was declared.
321
*/
322
typedef enum {
323
/**
324
* Normal declaration.
325
*/
326
nir_var_declared_normally = 0,
327
328
/**
329
* Variable is implicitly generated by the compiler and should not be
330
* visible via the API.
331
*/
332
nir_var_hidden,
333
} nir_var_declaration_type;
334
335
/**
336
* Either a uniform, global variable, shader input, or shader output. Based on
337
* ir_variable - it should be easy to translate between the two.
338
*/
339
340
typedef struct nir_variable {
341
struct exec_node node;
342
343
/**
344
* Declared type of the variable
345
*/
346
const struct glsl_type *type;
347
348
/**
349
* Declared name of the variable
350
*/
351
char *name;
352
353
struct nir_variable_data {
354
/**
355
* Storage class of the variable.
356
*
357
* \sa nir_variable_mode
358
*/
359
unsigned mode:14;
360
361
/**
362
* Is the variable read-only?
363
*
364
* This is set for variables declared as \c const, shader inputs,
365
* and uniforms.
366
*/
367
unsigned read_only:1;
368
unsigned centroid:1;
369
unsigned sample:1;
370
unsigned patch:1;
371
unsigned invariant:1;
372
373
/**
374
* Precision qualifier.
375
*
376
* In desktop GLSL we do not care about precision qualifiers at all, in
377
* fact, the spec says that precision qualifiers are ignored.
378
*
379
* To make things easy, we make it so that this field is always
380
* GLSL_PRECISION_NONE on desktop shaders. This way all the variables
381
* have the same precision value and the checks we add in the compiler
382
* for this field will never break a desktop shader compile.
383
*/
384
unsigned precision:2;
385
386
/**
387
* Can this variable be coalesced with another?
388
*
389
* This is set by nir_lower_io_to_temporaries to say that any
390
* copies involving this variable should stay put. Propagating it can
391
* duplicate the resulting load/store, which is not wanted, and may
392
* result in a load/store of the variable with an indirect offset which
393
* the backend may not be able to handle.
394
*/
395
unsigned cannot_coalesce:1;
396
397
/**
398
* When separate shader programs are enabled, only input/outputs between
399
* the stages of a multi-stage separate program can be safely removed
400
* from the shader interface. Other input/outputs must remains active.
401
*
402
* This is also used to make sure xfb varyings that are unused by the
403
* fragment shader are not removed.
404
*/
405
unsigned always_active_io:1;
406
407
/**
408
* Interpolation mode for shader inputs / outputs
409
*
410
* \sa glsl_interp_mode
411
*/
412
unsigned interpolation:3;
413
414
/**
415
* If non-zero, then this variable may be packed along with other variables
416
* into a single varying slot, so this offset should be applied when
417
* accessing components. For example, an offset of 1 means that the x
418
* component of this variable is actually stored in component y of the
419
* location specified by \c location.
420
*/
421
unsigned location_frac:2;
422
423
/**
424
* If true, this variable represents an array of scalars that should
425
* be tightly packed. In other words, consecutive array elements
426
* should be stored one component apart, rather than one slot apart.
427
*/
428
unsigned compact:1;
429
430
/**
431
* Whether this is a fragment shader output implicitly initialized with
432
* the previous contents of the specified render target at the
433
* framebuffer location corresponding to this shader invocation.
434
*/
435
unsigned fb_fetch_output:1;
436
437
/**
438
* Non-zero if this variable is considered bindless as defined by
439
* ARB_bindless_texture.
440
*/
441
unsigned bindless:1;
442
443
/**
444
* Was an explicit binding set in the shader?
445
*/
446
unsigned explicit_binding:1;
447
448
/**
449
* Was the location explicitly set in the shader?
450
*
451
* If the location is explicitly set in the shader, it \b cannot be changed
452
* by the linker or by the API (e.g., calls to \c glBindAttribLocation have
453
* no effect).
454
*/
455
unsigned explicit_location:1;
456
457
/**
458
* Was a transfer feedback buffer set in the shader?
459
*/
460
unsigned explicit_xfb_buffer:1;
461
462
/**
463
* Was a transfer feedback stride set in the shader?
464
*/
465
unsigned explicit_xfb_stride:1;
466
467
/**
468
* Was an explicit offset set in the shader?
469
*/
470
unsigned explicit_offset:1;
471
472
/**
473
* Layout of the matrix. Uses glsl_matrix_layout values.
474
*/
475
unsigned matrix_layout:2;
476
477
/**
478
* Non-zero if this variable was created by lowering a named interface
479
* block.
480
*/
481
unsigned from_named_ifc_block:1;
482
483
/**
484
* How the variable was declared. See nir_var_declaration_type.
485
*
486
* This is used to detect variables generated by the compiler, so should
487
* not be visible via the API.
488
*/
489
unsigned how_declared:2;
490
491
/**
492
* Is this variable per-view? If so, we know it must be an array with
493
* size corresponding to the number of views.
494
*/
495
unsigned per_view:1;
496
497
/**
498
* \brief Layout qualifier for gl_FragDepth. See nir_depth_layout.
499
*
500
* This is not equal to \c ir_depth_layout_none if and only if this
501
* variable is \c gl_FragDepth and a layout qualifier is specified.
502
*/
503
unsigned depth_layout:3;
504
505
/**
506
* Vertex stream output identifier.
507
*
508
* For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
509
* indicate the stream of the i-th component.
510
*/
511
unsigned stream:9;
512
513
/**
514
* See gl_access_qualifier.
515
*
516
* Access flags for memory variables (SSBO/global), image uniforms, and
517
* bindless images in uniforms/inputs/outputs.
518
*/
519
unsigned access:8;
520
521
/**
522
* Descriptor set binding for sampler or UBO.
523
*/
524
unsigned descriptor_set:5;
525
526
/**
527
* output index for dual source blending.
528
*/
529
unsigned index;
530
531
/**
532
* Initial binding point for a sampler or UBO.
533
*
534
* For array types, this represents the binding point for the first element.
535
*/
536
unsigned binding;
537
538
/**
539
* Storage location of the base of this variable
540
*
541
* The precise meaning of this field depends on the nature of the variable.
542
*
543
* - Vertex shader input: one of the values from \c gl_vert_attrib.
544
* - Vertex shader output: one of the values from \c gl_varying_slot.
545
* - Geometry shader input: one of the values from \c gl_varying_slot.
546
* - Geometry shader output: one of the values from \c gl_varying_slot.
547
* - Fragment shader input: one of the values from \c gl_varying_slot.
548
* - Fragment shader output: one of the values from \c gl_frag_result.
549
* - Uniforms: Per-stage uniform slot number for default uniform block.
550
* - Uniforms: Index within the uniform block definition for UBO members.
551
* - Non-UBO Uniforms: uniform slot number.
552
* - Other: This field is not currently used.
553
*
554
* If the variable is a uniform, shader input, or shader output, and the
555
* slot has not been assigned, the value will be -1.
556
*/
557
int location;
558
559
/**
560
* The actual location of the variable in the IR. Only valid for inputs,
561
* outputs, and uniforms (including samplers and images).
562
*/
563
unsigned driver_location;
564
565
/**
566
* Location an atomic counter or transform feedback is stored at.
567
*/
568
unsigned offset;
569
570
union {
571
struct {
572
/** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
573
enum pipe_format format;
574
} image;
575
576
struct {
577
/**
578
* For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
579
*/
580
unsigned is_inline_sampler : 1;
581
unsigned addressing_mode : 3;
582
unsigned normalized_coordinates : 1;
583
unsigned filter_mode : 1;
584
} sampler;
585
586
struct {
587
/**
588
* Transform feedback buffer.
589
*/
590
uint16_t buffer:2;
591
592
/**
593
* Transform feedback stride.
594
*/
595
uint16_t stride;
596
} xfb;
597
};
598
} data;
599
600
/**
601
* Identifier for this variable generated by nir_index_vars() that is unique
602
* among other variables in the same exec_list.
603
*/
604
unsigned index;
605
606
/* Number of nir_variable_data members */
607
uint16_t num_members;
608
609
/**
610
* Built-in state that backs this uniform
611
*
612
* Once set at variable creation, \c state_slots must remain invariant.
613
* This is because, ideally, this array would be shared by all clones of
614
* this variable in the IR tree. In other words, we'd really like for it
615
* to be a fly-weight.
616
*
617
* If the variable is not a uniform, \c num_state_slots will be zero and
618
* \c state_slots will be \c NULL.
619
*/
620
/*@{*/
621
uint16_t num_state_slots; /**< Number of state slots used */
622
nir_state_slot *state_slots; /**< State descriptors. */
623
/*@}*/
624
625
/**
626
* Constant expression assigned in the initializer of the variable
627
*
628
* This field should only be used temporarily by creators of NIR shaders
629
* and then nir_lower_variable_initializers can be used to get rid of them.
630
* Most of the rest of NIR ignores this field or asserts that it's NULL.
631
*/
632
nir_constant *constant_initializer;
633
634
/**
635
* Global variable assigned in the initializer of the variable
636
* This field should only be used temporarily by creators of NIR shaders
637
* and then nir_lower_variable_initializers can be used to get rid of them.
638
* Most of the rest of NIR ignores this field or asserts that it's NULL.
639
*/
640
struct nir_variable *pointer_initializer;
641
642
/**
643
* For variables that are in an interface block or are an instance of an
644
* interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
645
*
646
* \sa ir_variable::location
647
*/
648
const struct glsl_type *interface_type;
649
650
/**
651
* Description of per-member data for per-member struct variables
652
*
653
* This is used for variables which are actually an amalgamation of
654
* multiple entities such as a struct of built-in values or a struct of
655
* inputs each with their own layout specifier. This is only allowed on
656
* variables with a struct or array of array of struct type.
657
*/
658
struct nir_variable_data *members;
659
} nir_variable;
660
661
static inline bool
662
_nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
663
{
664
/* This isn't a shader variable */
665
assert(!(modes & nir_var_function_temp));
666
return var->data.mode & modes;
667
}
668
669
#define nir_foreach_variable_in_list(var, var_list) \
670
foreach_list_typed(nir_variable, var, node, var_list)
671
672
#define nir_foreach_variable_in_list_safe(var, var_list) \
673
foreach_list_typed_safe(nir_variable, var, node, var_list)
674
675
#define nir_foreach_variable_in_shader(var, shader) \
676
nir_foreach_variable_in_list(var, &(shader)->variables)
677
678
#define nir_foreach_variable_in_shader_safe(var, shader) \
679
nir_foreach_variable_in_list_safe(var, &(shader)->variables)
680
681
#define nir_foreach_variable_with_modes(var, shader, modes) \
682
nir_foreach_variable_in_shader(var, shader) \
683
if (_nir_shader_variable_has_mode(var, modes))
684
685
#define nir_foreach_variable_with_modes_safe(var, shader, modes) \
686
nir_foreach_variable_in_shader_safe(var, shader) \
687
if (_nir_shader_variable_has_mode(var, modes))
688
689
#define nir_foreach_shader_in_variable(var, shader) \
690
nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
691
692
#define nir_foreach_shader_in_variable_safe(var, shader) \
693
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
694
695
#define nir_foreach_shader_out_variable(var, shader) \
696
nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
697
698
#define nir_foreach_shader_out_variable_safe(var, shader) \
699
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
700
701
#define nir_foreach_uniform_variable(var, shader) \
702
nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
703
704
#define nir_foreach_uniform_variable_safe(var, shader) \
705
nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
706
707
static inline bool
708
nir_variable_is_global(const nir_variable *var)
709
{
710
return var->data.mode != nir_var_function_temp;
711
}
712
713
typedef struct nir_register {
714
struct exec_node node;
715
716
unsigned num_components; /** < number of vector components */
717
unsigned num_array_elems; /** < size of array (0 for no array) */
718
719
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
720
uint8_t bit_size;
721
722
/**
723
* True if this register may have different values in different SIMD
724
* invocations of the shader.
725
*/
726
bool divergent;
727
728
/** generic register index. */
729
unsigned index;
730
731
/** set of nir_srcs where this register is used (read from) */
732
struct list_head uses;
733
734
/** set of nir_dests where this register is defined (written to) */
735
struct list_head defs;
736
737
/** set of nir_ifs where this register is used as a condition */
738
struct list_head if_uses;
739
} nir_register;
740
741
#define nir_foreach_register(reg, reg_list) \
742
foreach_list_typed(nir_register, reg, node, reg_list)
743
#define nir_foreach_register_safe(reg, reg_list) \
744
foreach_list_typed_safe(nir_register, reg, node, reg_list)
745
746
typedef enum PACKED {
747
nir_instr_type_alu,
748
nir_instr_type_deref,
749
nir_instr_type_call,
750
nir_instr_type_tex,
751
nir_instr_type_intrinsic,
752
nir_instr_type_load_const,
753
nir_instr_type_jump,
754
nir_instr_type_ssa_undef,
755
nir_instr_type_phi,
756
nir_instr_type_parallel_copy,
757
} nir_instr_type;
758
759
typedef struct nir_instr {
760
struct exec_node node;
761
struct nir_block *block;
762
nir_instr_type type;
763
764
/* A temporary for optimization and analysis passes to use for storing
765
* flags. For instance, DCE uses this to store the "dead/live" info.
766
*/
767
uint8_t pass_flags;
768
769
/** generic instruction index. */
770
uint32_t index;
771
} nir_instr;
772
773
static inline nir_instr *
774
nir_instr_next(nir_instr *instr)
775
{
776
struct exec_node *next = exec_node_get_next(&instr->node);
777
if (exec_node_is_tail_sentinel(next))
778
return NULL;
779
else
780
return exec_node_data(nir_instr, next, node);
781
}
782
783
static inline nir_instr *
784
nir_instr_prev(nir_instr *instr)
785
{
786
struct exec_node *prev = exec_node_get_prev(&instr->node);
787
if (exec_node_is_head_sentinel(prev))
788
return NULL;
789
else
790
return exec_node_data(nir_instr, prev, node);
791
}
792
793
static inline bool
794
nir_instr_is_first(const nir_instr *instr)
795
{
796
return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
797
}
798
799
static inline bool
800
nir_instr_is_last(const nir_instr *instr)
801
{
802
return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
803
}
804
805
typedef struct nir_ssa_def {
806
/** Instruction which produces this SSA value. */
807
nir_instr *parent_instr;
808
809
/** set of nir_instrs where this register is used (read from) */
810
struct list_head uses;
811
812
/** set of nir_ifs where this register is used as a condition */
813
struct list_head if_uses;
814
815
/** generic SSA definition index. */
816
unsigned index;
817
818
uint8_t num_components;
819
820
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
821
uint8_t bit_size;
822
823
/**
824
* True if this SSA value may have different values in different SIMD
825
* invocations of the shader. This is set by nir_divergence_analysis.
826
*/
827
bool divergent;
828
} nir_ssa_def;
829
830
struct nir_src;
831
832
typedef struct {
833
nir_register *reg;
834
struct nir_src *indirect; /** < NULL for no indirect offset */
835
unsigned base_offset;
836
837
/* TODO use-def chain goes here */
838
} nir_reg_src;
839
840
typedef struct {
841
nir_instr *parent_instr;
842
struct list_head def_link;
843
844
nir_register *reg;
845
struct nir_src *indirect; /** < NULL for no indirect offset */
846
unsigned base_offset;
847
848
/* TODO def-use chain goes here */
849
} nir_reg_dest;
850
851
struct nir_if;
852
853
typedef struct nir_src {
854
union {
855
/** Instruction that consumes this value as a source. */
856
nir_instr *parent_instr;
857
struct nir_if *parent_if;
858
};
859
860
struct list_head use_link;
861
862
union {
863
nir_reg_src reg;
864
nir_ssa_def *ssa;
865
};
866
867
bool is_ssa;
868
} nir_src;
869
870
static inline nir_src
871
nir_src_init(void)
872
{
873
nir_src src = { { NULL } };
874
return src;
875
}
876
877
#define NIR_SRC_INIT nir_src_init()
878
879
#define nir_foreach_use(src, reg_or_ssa_def) \
880
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
881
882
#define nir_foreach_use_safe(src, reg_or_ssa_def) \
883
list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
884
885
#define nir_foreach_if_use(src, reg_or_ssa_def) \
886
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
887
888
#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
889
list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
890
891
typedef struct {
892
union {
893
nir_reg_dest reg;
894
nir_ssa_def ssa;
895
};
896
897
bool is_ssa;
898
} nir_dest;
899
900
static inline nir_dest
901
nir_dest_init(void)
902
{
903
nir_dest dest = { { { NULL } } };
904
return dest;
905
}
906
907
#define NIR_DEST_INIT nir_dest_init()
908
909
#define nir_foreach_def(dest, reg) \
910
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
911
912
#define nir_foreach_def_safe(dest, reg) \
913
list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
914
915
static inline nir_src
916
nir_src_for_ssa(nir_ssa_def *def)
917
{
918
nir_src src = NIR_SRC_INIT;
919
920
src.is_ssa = true;
921
src.ssa = def;
922
923
return src;
924
}
925
926
static inline nir_src
927
nir_src_for_reg(nir_register *reg)
928
{
929
nir_src src = NIR_SRC_INIT;
930
931
src.is_ssa = false;
932
src.reg.reg = reg;
933
src.reg.indirect = NULL;
934
src.reg.base_offset = 0;
935
936
return src;
937
}
938
939
static inline nir_dest
940
nir_dest_for_reg(nir_register *reg)
941
{
942
nir_dest dest = NIR_DEST_INIT;
943
944
dest.reg.reg = reg;
945
946
return dest;
947
}
948
949
static inline unsigned
950
nir_src_bit_size(nir_src src)
951
{
952
return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
953
}
954
955
static inline unsigned
956
nir_src_num_components(nir_src src)
957
{
958
return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
959
}
960
961
static inline bool
962
nir_src_is_const(nir_src src)
963
{
964
return src.is_ssa &&
965
src.ssa->parent_instr->type == nir_instr_type_load_const;
966
}
967
968
static inline bool
969
nir_src_is_undef(nir_src src)
970
{
971
return src.is_ssa &&
972
src.ssa->parent_instr->type == nir_instr_type_ssa_undef;
973
}
974
975
static inline bool
976
nir_src_is_divergent(nir_src src)
977
{
978
return src.is_ssa ? src.ssa->divergent : src.reg.reg->divergent;
979
}
980
981
static inline unsigned
982
nir_dest_bit_size(nir_dest dest)
983
{
984
return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
985
}
986
987
static inline unsigned
988
nir_dest_num_components(nir_dest dest)
989
{
990
return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
991
}
992
993
static inline bool
994
nir_dest_is_divergent(nir_dest dest)
995
{
996
return dest.is_ssa ? dest.ssa.divergent : dest.reg.reg->divergent;
997
}
998
999
/* Are all components the same, ie. .xxxx */
1000
static inline bool
1001
nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1002
{
1003
for (unsigned i = 1; i < nr_comp; i++)
1004
if (swiz[i] != swiz[0])
1005
return false;
1006
return true;
1007
}
1008
1009
/* Are all components sequential, ie. .yzw */
1010
static inline bool
1011
nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
1012
{
1013
for (unsigned i = 1; i < nr_comp; i++)
1014
if (swiz[i] != (swiz[0] + i))
1015
return false;
1016
return true;
1017
}
1018
1019
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
1020
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
1021
1022
typedef struct {
1023
/** Base source */
1024
nir_src src;
1025
1026
/**
1027
* \name input modifiers
1028
*/
1029
/*@{*/
1030
/**
1031
* For inputs interpreted as floating point, flips the sign bit. For
1032
* inputs interpreted as integers, performs the two's complement negation.
1033
*/
1034
bool negate;
1035
1036
/**
1037
* Clears the sign bit for floating point values, and computes the integer
1038
* absolute value for integers. Note that the negate modifier acts after
1039
* the absolute value modifier, therefore if both are set then all inputs
1040
* will become negative.
1041
*/
1042
bool abs;
1043
/*@}*/
1044
1045
/**
1046
* For each input component, says which component of the register it is
1047
* chosen from.
1048
*
1049
* Note that which elements of the swizzle are used and which are ignored
1050
* are based on the write mask for most opcodes - for example, a statement
1051
* like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
1052
* of {2, 1, x, 0} where x means "don't care."
1053
*/
1054
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
1055
} nir_alu_src;
1056
1057
typedef struct {
1058
/** Base destination */
1059
nir_dest dest;
1060
1061
/**
1062
* Saturate output modifier
1063
*
1064
* Only valid for opcodes that output floating-point numbers. Clamps the
1065
* output to between 0.0 and 1.0 inclusive.
1066
*/
1067
bool saturate;
1068
1069
/**
1070
* Write-mask
1071
*
1072
* Ignored if dest.is_ssa is true
1073
*/
1074
unsigned write_mask : NIR_MAX_VEC_COMPONENTS;
1075
} nir_alu_dest;
1076
1077
/** NIR sized and unsized types
1078
*
1079
* The values in this enum are carefully chosen so that the sized type is
1080
* just the unsized type OR the number of bits.
1081
*/
1082
typedef enum PACKED {
1083
nir_type_invalid = 0, /* Not a valid type */
1084
nir_type_int = 2,
1085
nir_type_uint = 4,
1086
nir_type_bool = 6,
1087
nir_type_float = 128,
1088
nir_type_bool1 = 1 | nir_type_bool,
1089
nir_type_bool8 = 8 | nir_type_bool,
1090
nir_type_bool16 = 16 | nir_type_bool,
1091
nir_type_bool32 = 32 | nir_type_bool,
1092
nir_type_int1 = 1 | nir_type_int,
1093
nir_type_int8 = 8 | nir_type_int,
1094
nir_type_int16 = 16 | nir_type_int,
1095
nir_type_int32 = 32 | nir_type_int,
1096
nir_type_int64 = 64 | nir_type_int,
1097
nir_type_uint1 = 1 | nir_type_uint,
1098
nir_type_uint8 = 8 | nir_type_uint,
1099
nir_type_uint16 = 16 | nir_type_uint,
1100
nir_type_uint32 = 32 | nir_type_uint,
1101
nir_type_uint64 = 64 | nir_type_uint,
1102
nir_type_float16 = 16 | nir_type_float,
1103
nir_type_float32 = 32 | nir_type_float,
1104
nir_type_float64 = 64 | nir_type_float,
1105
} nir_alu_type;
1106
1107
#define NIR_ALU_TYPE_SIZE_MASK 0x79
1108
#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
1109
1110
static inline unsigned
1111
nir_alu_type_get_type_size(nir_alu_type type)
1112
{
1113
return type & NIR_ALU_TYPE_SIZE_MASK;
1114
}
1115
1116
static inline nir_alu_type
1117
nir_alu_type_get_base_type(nir_alu_type type)
1118
{
1119
return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
1120
}
1121
1122
static inline nir_alu_type
1123
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
1124
{
1125
switch (base_type) {
1126
case GLSL_TYPE_BOOL:
1127
return nir_type_bool1;
1128
break;
1129
case GLSL_TYPE_UINT:
1130
return nir_type_uint32;
1131
break;
1132
case GLSL_TYPE_INT:
1133
return nir_type_int32;
1134
break;
1135
case GLSL_TYPE_UINT16:
1136
return nir_type_uint16;
1137
break;
1138
case GLSL_TYPE_INT16:
1139
return nir_type_int16;
1140
break;
1141
case GLSL_TYPE_UINT8:
1142
return nir_type_uint8;
1143
case GLSL_TYPE_INT8:
1144
return nir_type_int8;
1145
case GLSL_TYPE_UINT64:
1146
return nir_type_uint64;
1147
break;
1148
case GLSL_TYPE_INT64:
1149
return nir_type_int64;
1150
break;
1151
case GLSL_TYPE_FLOAT:
1152
return nir_type_float32;
1153
break;
1154
case GLSL_TYPE_FLOAT16:
1155
return nir_type_float16;
1156
break;
1157
case GLSL_TYPE_DOUBLE:
1158
return nir_type_float64;
1159
break;
1160
1161
case GLSL_TYPE_SAMPLER:
1162
case GLSL_TYPE_IMAGE:
1163
case GLSL_TYPE_ATOMIC_UINT:
1164
case GLSL_TYPE_STRUCT:
1165
case GLSL_TYPE_INTERFACE:
1166
case GLSL_TYPE_ARRAY:
1167
case GLSL_TYPE_VOID:
1168
case GLSL_TYPE_SUBROUTINE:
1169
case GLSL_TYPE_FUNCTION:
1170
case GLSL_TYPE_ERROR:
1171
return nir_type_invalid;
1172
}
1173
1174
unreachable("unknown type");
1175
}
1176
1177
static inline nir_alu_type
1178
nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
1179
{
1180
return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
1181
}
1182
1183
static inline enum glsl_base_type
1184
nir_get_glsl_base_type_for_nir_type(nir_alu_type base_type)
1185
{
1186
switch (base_type) {
1187
case nir_type_bool1:
1188
return GLSL_TYPE_BOOL;
1189
case nir_type_uint32:
1190
return GLSL_TYPE_UINT;
1191
case nir_type_int32:
1192
return GLSL_TYPE_INT;
1193
case nir_type_uint16:
1194
return GLSL_TYPE_UINT16;
1195
case nir_type_int16:
1196
return GLSL_TYPE_INT16;
1197
case nir_type_uint8:
1198
return GLSL_TYPE_UINT8;
1199
case nir_type_int8:
1200
return GLSL_TYPE_INT8;
1201
case nir_type_uint64:
1202
return GLSL_TYPE_UINT64;
1203
case nir_type_int64:
1204
return GLSL_TYPE_INT64;
1205
case nir_type_float32:
1206
return GLSL_TYPE_FLOAT;
1207
case nir_type_float16:
1208
return GLSL_TYPE_FLOAT16;
1209
case nir_type_float64:
1210
return GLSL_TYPE_DOUBLE;
1211
1212
default: unreachable("Not a sized nir_alu_type");
1213
}
1214
}
1215
1216
nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
1217
nir_rounding_mode rnd);
1218
1219
static inline nir_op
1220
nir_op_vec(unsigned components)
1221
{
1222
switch (components) {
1223
case 1: return nir_op_mov;
1224
case 2: return nir_op_vec2;
1225
case 3: return nir_op_vec3;
1226
case 4: return nir_op_vec4;
1227
case 5: return nir_op_vec5;
1228
case 8: return nir_op_vec8;
1229
case 16: return nir_op_vec16;
1230
default: unreachable("bad component count");
1231
}
1232
}
1233
1234
static inline bool
1235
nir_op_is_vec(nir_op op)
1236
{
1237
switch (op) {
1238
case nir_op_mov:
1239
case nir_op_vec2:
1240
case nir_op_vec3:
1241
case nir_op_vec4:
1242
case nir_op_vec5:
1243
case nir_op_vec8:
1244
case nir_op_vec16:
1245
return true;
1246
default:
1247
return false;
1248
}
1249
}
1250
1251
static inline bool
1252
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
1253
{
1254
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
1255
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
1256
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
1257
}
1258
1259
static inline bool
1260
nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
1261
{
1262
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
1263
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
1264
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
1265
}
1266
1267
static inline bool
1268
nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
1269
{
1270
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
1271
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
1272
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
1273
}
1274
1275
static inline bool
1276
nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
1277
{
1278
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1279
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1280
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1281
}
1282
1283
static inline bool
1284
nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
1285
{
1286
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1287
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1288
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1289
}
1290
1291
static inline bool
1292
nir_has_any_rounding_mode_rtz(unsigned execution_mode)
1293
{
1294
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
1295
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
1296
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
1297
}
1298
1299
static inline bool
1300
nir_has_any_rounding_mode_rtne(unsigned execution_mode)
1301
{
1302
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
1303
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
1304
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
1305
}
1306
1307
static inline nir_rounding_mode
1308
nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
1309
nir_alu_type type)
1310
{
1311
if (nir_alu_type_get_base_type(type) != nir_type_float)
1312
return nir_rounding_mode_undef;
1313
1314
unsigned bit_size = nir_alu_type_get_type_size(type);
1315
1316
if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
1317
return nir_rounding_mode_rtz;
1318
if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
1319
return nir_rounding_mode_rtne;
1320
return nir_rounding_mode_undef;
1321
}
1322
1323
static inline bool
1324
nir_has_any_rounding_mode_enabled(unsigned execution_mode)
1325
{
1326
bool result =
1327
nir_has_any_rounding_mode_rtne(execution_mode) ||
1328
nir_has_any_rounding_mode_rtz(execution_mode);
1329
return result;
1330
}
1331
1332
typedef enum {
1333
/**
1334
* Operation where the first two sources are commutative.
1335
*
1336
* For 2-source operations, this just mathematical commutativity. Some
1337
* 3-source operations, like ffma, are only commutative in the first two
1338
* sources.
1339
*/
1340
NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
1341
1342
/**
1343
* Operation is associative
1344
*/
1345
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
1346
} nir_op_algebraic_property;
1347
1348
/* vec16 is the widest ALU op in NIR, making the max number of input of ALU
1349
* instructions to be the same as NIR_MAX_VEC_COMPONENTS.
1350
*/
1351
#define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
1352
1353
typedef struct nir_op_info {
1354
/** Name of the NIR ALU opcode */
1355
const char *name;
1356
1357
/** Number of inputs (sources) */
1358
uint8_t num_inputs;
1359
1360
/**
1361
* The number of components in the output
1362
*
1363
* If non-zero, this is the size of the output and input sizes are
1364
* explicitly given; swizzle and writemask are still in effect, but if
1365
* the output component is masked out, then the input component may
1366
* still be in use.
1367
*
1368
* If zero, the opcode acts in the standard, per-component manner; the
1369
* operation is performed on each component (except the ones that are
1370
* masked out) with the input being taken from the input swizzle for
1371
* that component.
1372
*
1373
* The size of some of the inputs may be given (i.e. non-zero) even
1374
* though output_size is zero; in that case, the inputs with a zero
1375
* size act per-component, while the inputs with non-zero size don't.
1376
*/
1377
uint8_t output_size;
1378
1379
/**
1380
* The type of vector that the instruction outputs. Note that the
1381
* staurate modifier is only allowed on outputs with the float type.
1382
*/
1383
nir_alu_type output_type;
1384
1385
/**
1386
* The number of components in each input
1387
*
1388
* See nir_op_infos::output_size for more detail about the relationship
1389
* between input and output sizes.
1390
*/
1391
uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
1392
1393
/**
1394
* The type of vector that each input takes. Note that negate and
1395
* absolute value are only allowed on inputs with int or float type and
1396
* behave differently on the two.
1397
*/
1398
nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
1399
1400
/** Algebraic properties of this opcode */
1401
nir_op_algebraic_property algebraic_properties;
1402
1403
/** Whether this represents a numeric conversion opcode */
1404
bool is_conversion;
1405
} nir_op_info;
1406
1407
/** Metadata for each nir_op, indexed by opcode */
1408
extern const nir_op_info nir_op_infos[nir_num_opcodes];
1409
1410
typedef struct nir_alu_instr {
1411
/** Base instruction */
1412
nir_instr instr;
1413
1414
/** Opcode */
1415
nir_op op;
1416
1417
/** Indicates that this ALU instruction generates an exact value
1418
*
1419
* This is kind of a mixture of GLSL "precise" and "invariant" and not
1420
* really equivalent to either. This indicates that the value generated by
1421
* this operation is high-precision and any code transformations that touch
1422
* it must ensure that the resulting value is bit-for-bit identical to the
1423
* original.
1424
*/
1425
bool exact:1;
1426
1427
/**
1428
* Indicates that this instruction doese not cause signed integer wrapping
1429
* to occur, in the form of overflow or underflow.
1430
*/
1431
bool no_signed_wrap:1;
1432
1433
/**
1434
* Indicates that this instruction does not cause unsigned integer wrapping
1435
* to occur, in the form of overflow or underflow.
1436
*/
1437
bool no_unsigned_wrap:1;
1438
1439
/** Destination */
1440
nir_alu_dest dest;
1441
1442
/** Sources
1443
*
1444
* The size of the array is given by nir_op_info::num_inputs.
1445
*/
1446
nir_alu_src src[];
1447
} nir_alu_instr;
1448
1449
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
1450
nir_alu_instr *instr);
1451
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
1452
nir_alu_instr *instr);
1453
1454
bool nir_alu_instr_is_copy(nir_alu_instr *instr);
1455
1456
/* is this source channel used? */
1457
static inline bool
1458
nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
1459
unsigned channel)
1460
{
1461
if (nir_op_infos[instr->op].input_sizes[src] > 0)
1462
return channel < nir_op_infos[instr->op].input_sizes[src];
1463
1464
return (instr->dest.write_mask >> channel) & 1;
1465
}
1466
1467
static inline nir_component_mask_t
1468
nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
1469
{
1470
nir_component_mask_t read_mask = 0;
1471
for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
1472
if (!nir_alu_instr_channel_used(instr, src, c))
1473
continue;
1474
1475
read_mask |= (1 << instr->src[src].swizzle[c]);
1476
}
1477
return read_mask;
1478
}
1479
1480
/**
1481
* Get the number of channels used for a source
1482
*/
1483
static inline unsigned
1484
nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
1485
{
1486
if (nir_op_infos[instr->op].input_sizes[src] > 0)
1487
return nir_op_infos[instr->op].input_sizes[src];
1488
1489
return nir_dest_num_components(instr->dest.dest);
1490
}
1491
1492
static inline bool
1493
nir_alu_instr_is_comparison(const nir_alu_instr *instr)
1494
{
1495
switch (instr->op) {
1496
case nir_op_flt:
1497
case nir_op_fge:
1498
case nir_op_feq:
1499
case nir_op_fneu:
1500
case nir_op_ilt:
1501
case nir_op_ult:
1502
case nir_op_ige:
1503
case nir_op_uge:
1504
case nir_op_ieq:
1505
case nir_op_ine:
1506
case nir_op_i2b1:
1507
case nir_op_f2b1:
1508
case nir_op_inot:
1509
return true;
1510
default:
1511
return false;
1512
}
1513
}
1514
1515
bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
1516
nir_alu_type full_type);
1517
1518
bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
1519
unsigned src1, unsigned src2);
1520
1521
bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
1522
const nir_alu_instr *alu2,
1523
unsigned src1, unsigned src2);
1524
1525
bool nir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn);
1526
1527
typedef enum {
1528
nir_deref_type_var,
1529
nir_deref_type_array,
1530
nir_deref_type_array_wildcard,
1531
nir_deref_type_ptr_as_array,
1532
nir_deref_type_struct,
1533
nir_deref_type_cast,
1534
} nir_deref_type;
1535
1536
typedef struct {
1537
nir_instr instr;
1538
1539
/** The type of this deref instruction */
1540
nir_deref_type deref_type;
1541
1542
/** Bitmask what modes the underlying variable might be
1543
*
1544
* For OpenCL-style generic pointers, we may not know exactly what mode it
1545
* is at any given point in time in the compile process. This bitfield
1546
* contains the set of modes which it MAY be.
1547
*
1548
* Generally, this field should not be accessed directly. Use one of the
1549
* nir_deref_mode_ helpers instead.
1550
*/
1551
nir_variable_mode modes;
1552
1553
/** The dereferenced type of the resulting pointer value */
1554
const struct glsl_type *type;
1555
1556
union {
1557
/** Variable being dereferenced if deref_type is a deref_var */
1558
nir_variable *var;
1559
1560
/** Parent deref if deref_type is not deref_var */
1561
nir_src parent;
1562
};
1563
1564
/** Additional deref parameters */
1565
union {
1566
struct {
1567
nir_src index;
1568
} arr;
1569
1570
struct {
1571
unsigned index;
1572
} strct;
1573
1574
struct {
1575
unsigned ptr_stride;
1576
unsigned align_mul;
1577
unsigned align_offset;
1578
} cast;
1579
};
1580
1581
/** Destination to store the resulting "pointer" */
1582
nir_dest dest;
1583
} nir_deref_instr;
1584
1585
/** Returns true if deref might have one of the given modes
1586
*
1587
* For multi-mode derefs, this returns true if any of the possible modes for
1588
* the deref to have any of the specified modes. This function returning true
1589
* does NOT mean that the deref definitely has one of those modes. It simply
1590
* means that, with the best information we have at the time, it might.
1591
*/
1592
static inline bool
1593
nir_deref_mode_may_be(const nir_deref_instr *deref, nir_variable_mode modes)
1594
{
1595
assert(!(modes & ~nir_var_all));
1596
assert(deref->modes != 0);
1597
return deref->modes & modes;
1598
}
1599
1600
/** Returns true if deref must have one of the given modes
1601
*
1602
* For multi-mode derefs, this returns true if NIR can prove that the given
1603
* deref has one of the specified modes. This function returning false does
1604
* NOT mean that deref doesn't have one of the given mode. It very well may
1605
* have one of those modes, we just don't have enough information to prove
1606
* that it does for sure.
1607
*/
1608
static inline bool
1609
nir_deref_mode_must_be(const nir_deref_instr *deref, nir_variable_mode modes)
1610
{
1611
assert(!(modes & ~nir_var_all));
1612
assert(deref->modes != 0);
1613
return !(deref->modes & ~modes);
1614
}
1615
1616
/** Returns true if deref has the given mode
1617
*
1618
* This returns true if the deref has exactly the mode specified. If the
1619
* deref may have that mode but may also have a different mode (i.e. modes has
1620
* multiple bits set), this will assert-fail.
1621
*
1622
* If you're confused about which nir_deref_mode_ helper to use, use this one
1623
* or nir_deref_mode_is_one_of below.
1624
*/
1625
static inline bool
1626
nir_deref_mode_is(const nir_deref_instr *deref, nir_variable_mode mode)
1627
{
1628
assert(util_bitcount(mode) == 1 && (mode & nir_var_all));
1629
assert(deref->modes != 0);
1630
1631
/* This is only for "simple" cases so, if modes might interact with this
1632
* deref then the deref has to have a single mode.
1633
*/
1634
if (nir_deref_mode_may_be(deref, mode)) {
1635
assert(util_bitcount(deref->modes) == 1);
1636
assert(deref->modes == mode);
1637
}
1638
1639
return deref->modes == mode;
1640
}
1641
1642
/** Returns true if deref has one of the given modes
1643
*
1644
* This returns true if the deref has exactly one possible mode and that mode
1645
* is one of the modes specified. If the deref may have one of those modes
1646
* but may also have a different mode (i.e. modes has multiple bits set), this
1647
* will assert-fail.
1648
*/
1649
static inline bool
1650
nir_deref_mode_is_one_of(const nir_deref_instr *deref, nir_variable_mode modes)
1651
{
1652
/* This is only for "simple" cases so, if modes might interact with this
1653
* deref then the deref has to have a single mode.
1654
*/
1655
if (nir_deref_mode_may_be(deref, modes)) {
1656
assert(util_bitcount(deref->modes) == 1);
1657
assert(nir_deref_mode_must_be(deref, modes));
1658
}
1659
1660
return nir_deref_mode_may_be(deref, modes);
1661
}
1662
1663
/** Returns true if deref's possible modes lie in the given set of modes
1664
*
1665
* This returns true if the deref's modes lie in the given set of modes. If
1666
* the deref's modes overlap with the specified modes but aren't entirely
1667
* contained in the specified set of modes, this will assert-fail. In
1668
* particular, if this is used in a generic pointers scenario, the specified
1669
* modes has to contain all or none of the possible generic pointer modes.
1670
*
1671
* This is intended mostly for mass-lowering of derefs which might have
1672
* generic pointers.
1673
*/
1674
static inline bool
1675
nir_deref_mode_is_in_set(const nir_deref_instr *deref, nir_variable_mode modes)
1676
{
1677
if (nir_deref_mode_may_be(deref, modes))
1678
assert(nir_deref_mode_must_be(deref, modes));
1679
1680
return nir_deref_mode_may_be(deref, modes);
1681
}
1682
1683
static inline nir_deref_instr *nir_src_as_deref(nir_src src);
1684
1685
static inline nir_deref_instr *
1686
nir_deref_instr_parent(const nir_deref_instr *instr)
1687
{
1688
if (instr->deref_type == nir_deref_type_var)
1689
return NULL;
1690
else
1691
return nir_src_as_deref(instr->parent);
1692
}
1693
1694
static inline nir_variable *
1695
nir_deref_instr_get_variable(const nir_deref_instr *instr)
1696
{
1697
while (instr->deref_type != nir_deref_type_var) {
1698
if (instr->deref_type == nir_deref_type_cast)
1699
return NULL;
1700
1701
instr = nir_deref_instr_parent(instr);
1702
}
1703
1704
return instr->var;
1705
}
1706
1707
bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
1708
bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
1709
bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
1710
1711
bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
1712
1713
unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
1714
1715
typedef struct {
1716
nir_instr instr;
1717
1718
struct nir_function *callee;
1719
1720
unsigned num_params;
1721
nir_src params[];
1722
} nir_call_instr;
1723
1724
#include "nir_intrinsics.h"
1725
1726
#define NIR_INTRINSIC_MAX_CONST_INDEX 5
1727
1728
/** Represents an intrinsic
1729
*
1730
* An intrinsic is an instruction type for handling things that are
1731
* more-or-less regular operations but don't just consume and produce SSA
1732
* values like ALU operations do. Intrinsics are not for things that have
1733
* special semantic meaning such as phi nodes and parallel copies.
1734
* Examples of intrinsics include variable load/store operations, system
1735
* value loads, and the like. Even though texturing more-or-less falls
1736
* under this category, texturing is its own instruction type because
1737
* trying to represent texturing with intrinsics would lead to a
1738
* combinatorial explosion of intrinsic opcodes.
1739
*
1740
* By having a single instruction type for handling a lot of different
1741
* cases, optimization passes can look for intrinsics and, for the most
1742
* part, completely ignore them. Each intrinsic type also has a few
1743
* possible flags that govern whether or not they can be reordered or
1744
* eliminated. That way passes like dead code elimination can still work
1745
* on intrisics without understanding the meaning of each.
1746
*
1747
* Each intrinsic has some number of constant indices, some number of
1748
* variables, and some number of sources. What these sources, variables,
1749
* and indices mean depends on the intrinsic and is documented with the
1750
* intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
1751
* instructions are the only types of instruction that can operate on
1752
* variables.
1753
*/
1754
typedef struct {
1755
nir_instr instr;
1756
1757
nir_intrinsic_op intrinsic;
1758
1759
nir_dest dest;
1760
1761
/** number of components if this is a vectorized intrinsic
1762
*
1763
* Similarly to ALU operations, some intrinsics are vectorized.
1764
* An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
1765
* For vectorized intrinsics, the num_components field specifies the
1766
* number of destination components and the number of source components
1767
* for all sources with nir_intrinsic_infos.src_components[i] == 0.
1768
*/
1769
uint8_t num_components;
1770
1771
int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
1772
1773
nir_src src[];
1774
} nir_intrinsic_instr;
1775
1776
static inline nir_variable *
1777
nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
1778
{
1779
return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
1780
}
1781
1782
typedef enum {
1783
/* Memory ordering. */
1784
NIR_MEMORY_ACQUIRE = 1 << 0,
1785
NIR_MEMORY_RELEASE = 1 << 1,
1786
NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
1787
1788
/* Memory visibility operations. */
1789
NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
1790
NIR_MEMORY_MAKE_VISIBLE = 1 << 3,
1791
} nir_memory_semantics;
1792
1793
typedef enum {
1794
NIR_SCOPE_NONE,
1795
NIR_SCOPE_INVOCATION,
1796
NIR_SCOPE_SUBGROUP,
1797
NIR_SCOPE_SHADER_CALL,
1798
NIR_SCOPE_WORKGROUP,
1799
NIR_SCOPE_QUEUE_FAMILY,
1800
NIR_SCOPE_DEVICE,
1801
} nir_scope;
1802
1803
/**
1804
* \name NIR intrinsics semantic flags
1805
*
1806
* information about what the compiler can do with the intrinsics.
1807
*
1808
* \sa nir_intrinsic_info::flags
1809
*/
1810
typedef enum {
1811
/**
1812
* whether the intrinsic can be safely eliminated if none of its output
1813
* value is not being used.
1814
*/
1815
NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
1816
1817
/**
1818
* Whether the intrinsic can be reordered with respect to any other
1819
* intrinsic, i.e. whether the only reordering dependencies of the
1820
* intrinsic are due to the register reads/writes.
1821
*/
1822
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
1823
} nir_intrinsic_semantic_flag;
1824
1825
/**
1826
* Maximum valid value for a nir align_mul value (in intrinsics or derefs).
1827
*
1828
* Offsets can be signed, so this is the largest power of two in int32_t.
1829
*/
1830
#define NIR_ALIGN_MUL_MAX 0x40000000
1831
1832
typedef struct nir_io_semantics {
1833
unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
1834
unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */
1835
unsigned dual_source_blend_index:1;
1836
unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */
1837
unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
1838
unsigned medium_precision:1; /* GLSL mediump qualifier */
1839
unsigned per_view:1;
1840
unsigned high_16bits:1; /* whether accessing low or high half of the slot */
1841
unsigned _pad:6;
1842
} nir_io_semantics;
1843
1844
#define NIR_INTRINSIC_MAX_INPUTS 11
1845
1846
typedef struct {
1847
const char *name;
1848
1849
uint8_t num_srcs; /** < number of register/SSA inputs */
1850
1851
/** number of components of each input register
1852
*
1853
* If this value is 0, the number of components is given by the
1854
* num_components field of nir_intrinsic_instr. If this value is -1, the
1855
* intrinsic consumes however many components are provided and it is not
1856
* validated at all.
1857
*/
1858
int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
1859
1860
bool has_dest;
1861
1862
/** number of components of the output register
1863
*
1864
* If this value is 0, the number of components is given by the
1865
* num_components field of nir_intrinsic_instr.
1866
*/
1867
uint8_t dest_components;
1868
1869
/** bitfield of legal bit sizes */
1870
uint8_t dest_bit_sizes;
1871
1872
/** source which the destination bit size must match
1873
*
1874
* Some intrinsics, such as subgroup intrinsics, are data manipulation
1875
* intrinsics and they have similar bit-size rules to ALU ops. This enables
1876
* validation to validate a bit more and enables auto-generated builder code
1877
* to properly determine destination bit sizes automatically.
1878
*/
1879
int8_t bit_size_src;
1880
1881
/** the number of constant indices used by the intrinsic */
1882
uint8_t num_indices;
1883
1884
/** list of indices */
1885
uint8_t indices[NIR_INTRINSIC_MAX_CONST_INDEX];
1886
1887
/** indicates the usage of intr->const_index[n] */
1888
uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
1889
1890
/** semantic flags for calls to this intrinsic */
1891
nir_intrinsic_semantic_flag flags;
1892
} nir_intrinsic_info;
1893
1894
extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
1895
1896
static inline unsigned
1897
nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
1898
{
1899
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1900
assert(srcn < info->num_srcs);
1901
if (info->src_components[srcn] > 0)
1902
return info->src_components[srcn];
1903
else if (info->src_components[srcn] == 0)
1904
return intr->num_components;
1905
else
1906
return nir_src_num_components(intr->src[srcn]);
1907
}
1908
1909
static inline unsigned
1910
nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
1911
{
1912
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
1913
if (!info->has_dest)
1914
return 0;
1915
else if (info->dest_components)
1916
return info->dest_components;
1917
else
1918
return intr->num_components;
1919
}
1920
1921
/**
1922
* Helper to copy const_index[] from src to dst, without assuming they
1923
* match in order.
1924
*/
1925
static inline void
1926
nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
1927
{
1928
if (src->intrinsic == dst->intrinsic) {
1929
memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
1930
return;
1931
}
1932
1933
const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
1934
const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
1935
1936
for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
1937
if (src_info->index_map[i] == 0)
1938
continue;
1939
1940
/* require that dst instruction also uses the same const_index[]: */
1941
assert(dst_info->index_map[i] > 0);
1942
1943
dst->const_index[dst_info->index_map[i] - 1] =
1944
src->const_index[src_info->index_map[i] - 1];
1945
}
1946
}
1947
1948
#include "nir_intrinsics_indices.h"
1949
1950
static inline void
1951
nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
1952
unsigned align_mul, unsigned align_offset)
1953
{
1954
assert(util_is_power_of_two_nonzero(align_mul));
1955
assert(align_offset < align_mul);
1956
nir_intrinsic_set_align_mul(intrin, align_mul);
1957
nir_intrinsic_set_align_offset(intrin, align_offset);
1958
}
1959
1960
/** Returns a simple alignment for a load/store intrinsic offset
1961
*
1962
* Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
1963
* and ALIGN_OFFSET parameters, this helper takes both into account and
1964
* provides a single simple alignment parameter. The offset X is guaranteed
1965
* to satisfy X % align == 0.
1966
*/
1967
static inline unsigned
1968
nir_intrinsic_align(const nir_intrinsic_instr *intrin)
1969
{
1970
const unsigned align_mul = nir_intrinsic_align_mul(intrin);
1971
const unsigned align_offset = nir_intrinsic_align_offset(intrin);
1972
assert(align_offset < align_mul);
1973
return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
1974
}
1975
1976
static inline bool
1977
nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
1978
{
1979
return nir_intrinsic_has_align_mul(intrin) &&
1980
nir_intrinsic_has_align_offset(intrin);
1981
}
1982
1983
unsigned
1984
nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
1985
1986
/* Converts a image_deref_* intrinsic into a image_* one */
1987
void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
1988
nir_ssa_def *handle, bool bindless);
1989
1990
/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
1991
static inline bool
1992
nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
1993
{
1994
if (instr->intrinsic == nir_intrinsic_load_deref) {
1995
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
1996
return nir_deref_mode_is_in_set(deref, nir_var_read_only_modes) ||
1997
(nir_intrinsic_access(instr) & ACCESS_CAN_REORDER);
1998
} else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
1999
instr->intrinsic == nir_intrinsic_bindless_image_load ||
2000
instr->intrinsic == nir_intrinsic_image_deref_load ||
2001
instr->intrinsic == nir_intrinsic_image_load) {
2002
return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
2003
} else {
2004
const nir_intrinsic_info *info =
2005
&nir_intrinsic_infos[instr->intrinsic];
2006
return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
2007
(info->flags & NIR_INTRINSIC_CAN_REORDER);
2008
}
2009
}
2010
2011
bool nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr);
2012
2013
/**
2014
* \group texture information
2015
*
2016
* This gives semantic information about textures which is useful to the
2017
* frontend, the backend, and lowering passes, but not the optimizer.
2018
*/
2019
2020
typedef enum {
2021
nir_tex_src_coord,
2022
nir_tex_src_projector,
2023
nir_tex_src_comparator, /* shadow comparator */
2024
nir_tex_src_offset,
2025
nir_tex_src_bias,
2026
nir_tex_src_lod,
2027
nir_tex_src_min_lod,
2028
nir_tex_src_ms_index, /* MSAA sample index */
2029
nir_tex_src_ms_mcs, /* MSAA compression value */
2030
nir_tex_src_ddx,
2031
nir_tex_src_ddy,
2032
nir_tex_src_texture_deref, /* < deref pointing to the texture */
2033
nir_tex_src_sampler_deref, /* < deref pointing to the sampler */
2034
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
2035
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
2036
nir_tex_src_texture_handle, /* < bindless texture handle */
2037
nir_tex_src_sampler_handle, /* < bindless sampler handle */
2038
nir_tex_src_plane, /* < selects plane for planar textures */
2039
nir_num_tex_src_types
2040
} nir_tex_src_type;
2041
2042
typedef struct {
2043
nir_src src;
2044
nir_tex_src_type src_type;
2045
} nir_tex_src;
2046
2047
typedef enum {
2048
nir_texop_tex, /**< Regular texture look-up */
2049
nir_texop_txb, /**< Texture look-up with LOD bias */
2050
nir_texop_txl, /**< Texture look-up with explicit LOD */
2051
nir_texop_txd, /**< Texture look-up with partial derivatives */
2052
nir_texop_txf, /**< Texel fetch with explicit LOD */
2053
nir_texop_txf_ms, /**< Multisample texture fetch */
2054
nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */
2055
nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */
2056
nir_texop_txs, /**< Texture size */
2057
nir_texop_lod, /**< Texture lod query */
2058
nir_texop_tg4, /**< Texture gather */
2059
nir_texop_query_levels, /**< Texture levels query */
2060
nir_texop_texture_samples, /**< Texture samples query */
2061
nir_texop_samples_identical, /**< Query whether all samples are definitely
2062
* identical.
2063
*/
2064
nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */
2065
nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */
2066
nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */
2067
} nir_texop;
2068
2069
typedef struct {
2070
nir_instr instr;
2071
2072
enum glsl_sampler_dim sampler_dim;
2073
nir_alu_type dest_type;
2074
2075
nir_texop op;
2076
nir_dest dest;
2077
nir_tex_src *src;
2078
unsigned num_srcs, coord_components;
2079
bool is_array, is_shadow;
2080
2081
/**
2082
* If is_shadow is true, whether this is the old-style shadow that outputs 4
2083
* components or the new-style shadow that outputs 1 component.
2084
*/
2085
bool is_new_style_shadow;
2086
2087
/**
2088
* If this texture instruction should return a sparse residency code. The
2089
* code is in the last component of the result.
2090
*/
2091
bool is_sparse;
2092
2093
/* gather component selector */
2094
unsigned component : 2;
2095
2096
/* Validation needs to know this for gradient component count */
2097
unsigned array_is_lowered_cube : 1;
2098
2099
/* gather offsets */
2100
int8_t tg4_offsets[4][2];
2101
2102
/* True if the texture index or handle is not dynamically uniform */
2103
bool texture_non_uniform;
2104
2105
/* True if the sampler index or handle is not dynamically uniform */
2106
bool sampler_non_uniform;
2107
2108
/** The texture index
2109
*
2110
* If this texture instruction has a nir_tex_src_texture_offset source,
2111
* then the texture index is given by texture_index + texture_offset.
2112
*/
2113
unsigned texture_index;
2114
2115
/** The sampler index
2116
*
2117
* The following operations do not require a sampler and, as such, this
2118
* field should be ignored:
2119
* - nir_texop_txf
2120
* - nir_texop_txf_ms
2121
* - nir_texop_txs
2122
* - nir_texop_query_levels
2123
* - nir_texop_texture_samples
2124
* - nir_texop_samples_identical
2125
*
2126
* If this texture instruction has a nir_tex_src_sampler_offset source,
2127
* then the sampler index is given by sampler_index + sampler_offset.
2128
*/
2129
unsigned sampler_index;
2130
} nir_tex_instr;
2131
2132
/*
2133
* Returns true if the texture operation requires a sampler as a general rule,
2134
* see the documentation of sampler_index.
2135
*
2136
* Note that the specific hw/driver backend could require to a sampler
2137
* object/configuration packet in any case, for some other reason.
2138
*/
2139
static inline bool
2140
nir_tex_instr_need_sampler(const nir_tex_instr *instr)
2141
{
2142
switch (instr->op) {
2143
case nir_texop_txf:
2144
case nir_texop_txf_ms:
2145
case nir_texop_txs:
2146
case nir_texop_query_levels:
2147
case nir_texop_texture_samples:
2148
case nir_texop_samples_identical:
2149
return false;
2150
default:
2151
return true;
2152
}
2153
}
2154
2155
static inline unsigned
2156
nir_tex_instr_result_size(const nir_tex_instr *instr)
2157
{
2158
switch (instr->op) {
2159
case nir_texop_txs: {
2160
unsigned ret;
2161
switch (instr->sampler_dim) {
2162
case GLSL_SAMPLER_DIM_1D:
2163
case GLSL_SAMPLER_DIM_BUF:
2164
ret = 1;
2165
break;
2166
case GLSL_SAMPLER_DIM_2D:
2167
case GLSL_SAMPLER_DIM_CUBE:
2168
case GLSL_SAMPLER_DIM_MS:
2169
case GLSL_SAMPLER_DIM_RECT:
2170
case GLSL_SAMPLER_DIM_EXTERNAL:
2171
case GLSL_SAMPLER_DIM_SUBPASS:
2172
ret = 2;
2173
break;
2174
case GLSL_SAMPLER_DIM_3D:
2175
ret = 3;
2176
break;
2177
default:
2178
unreachable("not reached");
2179
}
2180
if (instr->is_array)
2181
ret++;
2182
return ret;
2183
}
2184
2185
case nir_texop_lod:
2186
return 2;
2187
2188
case nir_texop_texture_samples:
2189
case nir_texop_query_levels:
2190
case nir_texop_samples_identical:
2191
case nir_texop_fragment_mask_fetch:
2192
return 1;
2193
2194
default:
2195
if (instr->is_shadow && instr->is_new_style_shadow)
2196
return 1;
2197
2198
return 4;
2199
}
2200
}
2201
2202
static inline unsigned
2203
nir_tex_instr_dest_size(const nir_tex_instr *instr)
2204
{
2205
/* One more component is needed for the residency code. */
2206
return nir_tex_instr_result_size(instr) + instr->is_sparse;
2207
}
2208
2209
/* Returns true if this texture operation queries something about the texture
2210
* rather than actually sampling it.
2211
*/
2212
static inline bool
2213
nir_tex_instr_is_query(const nir_tex_instr *instr)
2214
{
2215
switch (instr->op) {
2216
case nir_texop_txs:
2217
case nir_texop_lod:
2218
case nir_texop_texture_samples:
2219
case nir_texop_query_levels:
2220
return true;
2221
case nir_texop_tex:
2222
case nir_texop_txb:
2223
case nir_texop_txl:
2224
case nir_texop_txd:
2225
case nir_texop_txf:
2226
case nir_texop_txf_ms:
2227
case nir_texop_txf_ms_fb:
2228
case nir_texop_txf_ms_mcs:
2229
case nir_texop_tg4:
2230
return false;
2231
default:
2232
unreachable("Invalid texture opcode");
2233
}
2234
}
2235
2236
static inline bool
2237
nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
2238
{
2239
switch (instr->op) {
2240
case nir_texop_tex:
2241
case nir_texop_txb:
2242
case nir_texop_lod:
2243
return true;
2244
default:
2245
return false;
2246
}
2247
}
2248
2249
static inline nir_alu_type
2250
nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
2251
{
2252
switch (instr->src[src].src_type) {
2253
case nir_tex_src_coord:
2254
switch (instr->op) {
2255
case nir_texop_txf:
2256
case nir_texop_txf_ms:
2257
case nir_texop_txf_ms_fb:
2258
case nir_texop_txf_ms_mcs:
2259
case nir_texop_samples_identical:
2260
return nir_type_int;
2261
2262
default:
2263
return nir_type_float;
2264
}
2265
2266
case nir_tex_src_lod:
2267
switch (instr->op) {
2268
case nir_texop_txs:
2269
case nir_texop_txf:
2270
case nir_texop_txf_ms:
2271
return nir_type_int;
2272
2273
default:
2274
return nir_type_float;
2275
}
2276
2277
case nir_tex_src_projector:
2278
case nir_tex_src_comparator:
2279
case nir_tex_src_bias:
2280
case nir_tex_src_min_lod:
2281
case nir_tex_src_ddx:
2282
case nir_tex_src_ddy:
2283
return nir_type_float;
2284
2285
case nir_tex_src_offset:
2286
case nir_tex_src_ms_index:
2287
case nir_tex_src_plane:
2288
return nir_type_int;
2289
2290
case nir_tex_src_ms_mcs:
2291
case nir_tex_src_texture_deref:
2292
case nir_tex_src_sampler_deref:
2293
case nir_tex_src_texture_offset:
2294
case nir_tex_src_sampler_offset:
2295
case nir_tex_src_texture_handle:
2296
case nir_tex_src_sampler_handle:
2297
return nir_type_uint;
2298
2299
case nir_num_tex_src_types:
2300
unreachable("nir_num_tex_src_types is not a valid source type");
2301
}
2302
2303
unreachable("Invalid texture source type");
2304
}
2305
2306
static inline unsigned
2307
nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
2308
{
2309
if (instr->src[src].src_type == nir_tex_src_coord)
2310
return instr->coord_components;
2311
2312
/* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */
2313
if (instr->src[src].src_type == nir_tex_src_ms_mcs)
2314
return 4;
2315
2316
if (instr->src[src].src_type == nir_tex_src_ddx ||
2317
instr->src[src].src_type == nir_tex_src_ddy) {
2318
2319
if (instr->is_array && !instr->array_is_lowered_cube)
2320
return instr->coord_components - 1;
2321
else
2322
return instr->coord_components;
2323
}
2324
2325
/* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
2326
* the offset, since a cube maps to a single face.
2327
*/
2328
if (instr->src[src].src_type == nir_tex_src_offset) {
2329
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
2330
return 2;
2331
else if (instr->is_array)
2332
return instr->coord_components - 1;
2333
else
2334
return instr->coord_components;
2335
}
2336
2337
return 1;
2338
}
2339
2340
static inline int
2341
nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
2342
{
2343
for (unsigned i = 0; i < instr->num_srcs; i++)
2344
if (instr->src[i].src_type == type)
2345
return (int) i;
2346
2347
return -1;
2348
}
2349
2350
void nir_tex_instr_add_src(nir_tex_instr *tex,
2351
nir_tex_src_type src_type,
2352
nir_src src);
2353
2354
void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
2355
2356
bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
2357
2358
typedef struct {
2359
nir_instr instr;
2360
2361
nir_ssa_def def;
2362
2363
nir_const_value value[];
2364
} nir_load_const_instr;
2365
2366
typedef enum {
2367
/** Return from a function
2368
*
2369
* This instruction is a classic function return. It jumps to
2370
* nir_function_impl::end_block. No return value is provided in this
2371
* instruction. Instead, the function is expected to write any return
2372
* data to a deref passed in from the caller.
2373
*/
2374
nir_jump_return,
2375
2376
/** Immediately exit the current shader
2377
*
2378
* This instruction is roughly the equivalent of C's "exit()" in that it
2379
* immediately terminates the current shader invocation. From a CFG
2380
* perspective, it looks like a jump to nir_function_impl::end_block but
2381
* it actually jumps to the end block of the shader entrypoint. A halt
2382
* instruction in the shader entrypoint itself is semantically identical
2383
* to a return.
2384
*
2385
* For shaders with built-in I/O, any outputs written prior to a halt
2386
* instruction remain written and any outputs not written prior to the
2387
* halt have undefined values. It does NOT cause an implicit discard of
2388
* written results. If one wants discard results in a fragment shader,
2389
* for instance, a discard or demote intrinsic is required.
2390
*/
2391
nir_jump_halt,
2392
2393
/** Break out of the inner-most loop
2394
*
2395
* This has the same semantics as C's "break" statement.
2396
*/
2397
nir_jump_break,
2398
2399
/** Jump back to the top of the inner-most loop
2400
*
2401
* This has the same semantics as C's "continue" statement assuming that a
2402
* NIR loop is implemented as "while (1) { body }".
2403
*/
2404
nir_jump_continue,
2405
2406
/** Jumps for unstructured CFG.
2407
*
2408
* As within an unstructured CFG we can't rely on block ordering we need to
2409
* place explicit jumps at the end of every block.
2410
*/
2411
nir_jump_goto,
2412
nir_jump_goto_if,
2413
} nir_jump_type;
2414
2415
typedef struct {
2416
nir_instr instr;
2417
nir_jump_type type;
2418
nir_src condition;
2419
struct nir_block *target;
2420
struct nir_block *else_target;
2421
} nir_jump_instr;
2422
2423
/* creates a new SSA variable in an undefined state */
2424
2425
typedef struct {
2426
nir_instr instr;
2427
nir_ssa_def def;
2428
} nir_ssa_undef_instr;
2429
2430
typedef struct {
2431
struct exec_node node;
2432
2433
/* The predecessor block corresponding to this source */
2434
struct nir_block *pred;
2435
2436
nir_src src;
2437
} nir_phi_src;
2438
2439
#define nir_foreach_phi_src(phi_src, phi) \
2440
foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
2441
#define nir_foreach_phi_src_safe(phi_src, phi) \
2442
foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
2443
2444
typedef struct {
2445
nir_instr instr;
2446
2447
struct exec_list srcs; /** < list of nir_phi_src */
2448
2449
nir_dest dest;
2450
} nir_phi_instr;
2451
2452
static inline nir_phi_src *
2453
nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
2454
{
2455
nir_foreach_phi_src(src, phi) {
2456
if (src->pred == block)
2457
return src;
2458
}
2459
2460
assert(!"Block is not a predecessor of phi.");
2461
return NULL;
2462
}
2463
2464
typedef struct {
2465
struct exec_node node;
2466
nir_src src;
2467
nir_dest dest;
2468
} nir_parallel_copy_entry;
2469
2470
#define nir_foreach_parallel_copy_entry(entry, pcopy) \
2471
foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
2472
2473
typedef struct {
2474
nir_instr instr;
2475
2476
/* A list of nir_parallel_copy_entrys. The sources of all of the
2477
* entries are copied to the corresponding destinations "in parallel".
2478
* In other words, if we have two entries: a -> b and b -> a, the values
2479
* get swapped.
2480
*/
2481
struct exec_list entries;
2482
} nir_parallel_copy_instr;
2483
2484
NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
2485
type, nir_instr_type_alu)
2486
NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
2487
type, nir_instr_type_deref)
2488
NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
2489
type, nir_instr_type_call)
2490
NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
2491
type, nir_instr_type_jump)
2492
NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
2493
type, nir_instr_type_tex)
2494
NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
2495
type, nir_instr_type_intrinsic)
2496
NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
2497
type, nir_instr_type_load_const)
2498
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
2499
type, nir_instr_type_ssa_undef)
2500
NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
2501
type, nir_instr_type_phi)
2502
NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
2503
nir_parallel_copy_instr, instr,
2504
type, nir_instr_type_parallel_copy)
2505
2506
2507
#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \
2508
static inline type \
2509
nir_src_comp_as_##suffix(nir_src src, unsigned comp) \
2510
{ \
2511
assert(nir_src_is_const(src)); \
2512
nir_load_const_instr *load = \
2513
nir_instr_as_load_const(src.ssa->parent_instr); \
2514
assert(comp < load->def.num_components); \
2515
return nir_const_value_as_##suffix(load->value[comp], \
2516
load->def.bit_size); \
2517
} \
2518
\
2519
static inline type \
2520
nir_src_as_##suffix(nir_src src) \
2521
{ \
2522
assert(nir_src_num_components(src) == 1); \
2523
return nir_src_comp_as_##suffix(src, 0); \
2524
}
2525
2526
NIR_DEFINE_SRC_AS_CONST(int64_t, int)
2527
NIR_DEFINE_SRC_AS_CONST(uint64_t, uint)
2528
NIR_DEFINE_SRC_AS_CONST(bool, bool)
2529
NIR_DEFINE_SRC_AS_CONST(double, float)
2530
2531
#undef NIR_DEFINE_SRC_AS_CONST
2532
2533
2534
typedef struct {
2535
nir_ssa_def *def;
2536
unsigned comp;
2537
} nir_ssa_scalar;
2538
2539
static inline bool
2540
nir_ssa_scalar_is_const(nir_ssa_scalar s)
2541
{
2542
return s.def->parent_instr->type == nir_instr_type_load_const;
2543
}
2544
2545
static inline nir_const_value
2546
nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
2547
{
2548
assert(s.comp < s.def->num_components);
2549
nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
2550
return load->value[s.comp];
2551
}
2552
2553
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
2554
static inline type \
2555
nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \
2556
{ \
2557
return nir_const_value_as_##suffix( \
2558
nir_ssa_scalar_as_const_value(s), s.def->bit_size); \
2559
}
2560
2561
NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
2562
NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint)
2563
NIR_DEFINE_SCALAR_AS_CONST(bool, bool)
2564
NIR_DEFINE_SCALAR_AS_CONST(double, float)
2565
2566
#undef NIR_DEFINE_SCALAR_AS_CONST
2567
2568
static inline bool
2569
nir_ssa_scalar_is_alu(nir_ssa_scalar s)
2570
{
2571
return s.def->parent_instr->type == nir_instr_type_alu;
2572
}
2573
2574
static inline nir_op
2575
nir_ssa_scalar_alu_op(nir_ssa_scalar s)
2576
{
2577
return nir_instr_as_alu(s.def->parent_instr)->op;
2578
}
2579
2580
static inline nir_ssa_scalar
2581
nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
2582
{
2583
nir_ssa_scalar out = { NULL, 0 };
2584
2585
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
2586
assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
2587
2588
/* Our component must be written */
2589
assert(s.comp < s.def->num_components);
2590
assert(alu->dest.write_mask & (1u << s.comp));
2591
2592
assert(alu->src[alu_src_idx].src.is_ssa);
2593
out.def = alu->src[alu_src_idx].src.ssa;
2594
2595
if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
2596
/* The ALU src is unsized so the source component follows the
2597
* destination component.
2598
*/
2599
out.comp = alu->src[alu_src_idx].swizzle[s.comp];
2600
} else {
2601
/* This is a sized source so all source components work together to
2602
* produce all the destination components. Since we need to return a
2603
* scalar, this only works if the source is a scalar.
2604
*/
2605
assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
2606
out.comp = alu->src[alu_src_idx].swizzle[0];
2607
}
2608
assert(out.comp < out.def->num_components);
2609
2610
return out;
2611
}
2612
2613
nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s);
2614
2615
/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
2616
static inline nir_ssa_scalar
2617
nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel)
2618
{
2619
nir_ssa_scalar s = { def, channel };
2620
return nir_ssa_scalar_chase_movs(s);
2621
}
2622
2623
2624
typedef struct {
2625
bool success;
2626
2627
nir_variable *var;
2628
unsigned desc_set;
2629
unsigned binding;
2630
unsigned num_indices;
2631
nir_src indices[4];
2632
bool read_first_invocation;
2633
} nir_binding;
2634
2635
nir_binding nir_chase_binding(nir_src rsrc);
2636
nir_variable *nir_get_binding_variable(struct nir_shader *shader, nir_binding binding);
2637
2638
2639
/*
2640
* Control flow
2641
*
2642
* Control flow consists of a tree of control flow nodes, which include
2643
* if-statements and loops. The leaves of the tree are basic blocks, lists of
2644
* instructions that always run start-to-finish. Each basic block also keeps
2645
* track of its successors (blocks which may run immediately after the current
2646
* block) and predecessors (blocks which could have run immediately before the
2647
* current block). Each function also has a start block and an end block which
2648
* all return statements point to (which is always empty). Together, all the
2649
* blocks with their predecessors and successors make up the control flow
2650
* graph (CFG) of the function. There are helpers that modify the tree of
2651
* control flow nodes while modifying the CFG appropriately; these should be
2652
* used instead of modifying the tree directly.
2653
*/
2654
2655
typedef enum {
2656
nir_cf_node_block,
2657
nir_cf_node_if,
2658
nir_cf_node_loop,
2659
nir_cf_node_function
2660
} nir_cf_node_type;
2661
2662
typedef struct nir_cf_node {
2663
struct exec_node node;
2664
nir_cf_node_type type;
2665
struct nir_cf_node *parent;
2666
} nir_cf_node;
2667
2668
typedef struct nir_block {
2669
nir_cf_node cf_node;
2670
2671
struct exec_list instr_list; /** < list of nir_instr */
2672
2673
/** generic block index; generated by nir_index_blocks */
2674
unsigned index;
2675
2676
/*
2677
* Each block can only have up to 2 successors, so we put them in a simple
2678
* array - no need for anything more complicated.
2679
*/
2680
struct nir_block *successors[2];
2681
2682
/* Set of nir_block predecessors in the CFG */
2683
struct set *predecessors;
2684
2685
/*
2686
* this node's immediate dominator in the dominance tree - set to NULL for
2687
* the start block.
2688
*/
2689
struct nir_block *imm_dom;
2690
2691
/* This node's children in the dominance tree */
2692
unsigned num_dom_children;
2693
struct nir_block **dom_children;
2694
2695
/* Set of nir_blocks on the dominance frontier of this block */
2696
struct set *dom_frontier;
2697
2698
/*
2699
* These two indices have the property that dom_{pre,post}_index for each
2700
* child of this block in the dominance tree will always be between
2701
* dom_pre_index and dom_post_index for this block, which makes testing if
2702
* a given block is dominated by another block an O(1) operation.
2703
*/
2704
uint32_t dom_pre_index, dom_post_index;
2705
2706
/**
2707
* Value just before the first nir_instr->index in the block, but after
2708
* end_ip that of any predecessor block.
2709
*/
2710
uint32_t start_ip;
2711
/**
2712
* Value just after the last nir_instr->index in the block, but before the
2713
* start_ip of any successor block.
2714
*/
2715
uint32_t end_ip;
2716
2717
/* SSA def live in and out for this block; used for liveness analysis.
2718
* Indexed by ssa_def->index
2719
*/
2720
BITSET_WORD *live_in;
2721
BITSET_WORD *live_out;
2722
} nir_block;
2723
2724
static inline bool
2725
nir_block_is_reachable(nir_block *b)
2726
{
2727
/* See also nir_block_dominates */
2728
return b->dom_post_index != 0;
2729
}
2730
2731
static inline nir_instr *
2732
nir_block_first_instr(nir_block *block)
2733
{
2734
struct exec_node *head = exec_list_get_head(&block->instr_list);
2735
return exec_node_data(nir_instr, head, node);
2736
}
2737
2738
static inline nir_instr *
2739
nir_block_last_instr(nir_block *block)
2740
{
2741
struct exec_node *tail = exec_list_get_tail(&block->instr_list);
2742
return exec_node_data(nir_instr, tail, node);
2743
}
2744
2745
static inline bool
2746
nir_block_ends_in_jump(nir_block *block)
2747
{
2748
return !exec_list_is_empty(&block->instr_list) &&
2749
nir_block_last_instr(block)->type == nir_instr_type_jump;
2750
}
2751
2752
#define nir_foreach_instr(instr, block) \
2753
foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
2754
#define nir_foreach_instr_reverse(instr, block) \
2755
foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
2756
#define nir_foreach_instr_safe(instr, block) \
2757
foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
2758
#define nir_foreach_instr_reverse_safe(instr, block) \
2759
foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
2760
2761
static inline nir_phi_instr *
2762
nir_block_last_phi_instr(nir_block *block)
2763
{
2764
nir_phi_instr *last_phi = NULL;
2765
nir_foreach_instr(instr, block) {
2766
if (instr->type == nir_instr_type_phi)
2767
last_phi = nir_instr_as_phi(instr);
2768
else
2769
return last_phi;
2770
}
2771
return last_phi;
2772
}
2773
2774
typedef enum {
2775
nir_selection_control_none = 0x0,
2776
nir_selection_control_flatten = 0x1,
2777
nir_selection_control_dont_flatten = 0x2,
2778
} nir_selection_control;
2779
2780
typedef struct nir_if {
2781
nir_cf_node cf_node;
2782
nir_src condition;
2783
nir_selection_control control;
2784
2785
struct exec_list then_list; /** < list of nir_cf_node */
2786
struct exec_list else_list; /** < list of nir_cf_node */
2787
} nir_if;
2788
2789
typedef struct {
2790
nir_if *nif;
2791
2792
/** Instruction that generates nif::condition. */
2793
nir_instr *conditional_instr;
2794
2795
/** Block within ::nif that has the break instruction. */
2796
nir_block *break_block;
2797
2798
/** Last block for the then- or else-path that does not contain the break. */
2799
nir_block *continue_from_block;
2800
2801
/** True when ::break_block is in the else-path of ::nif. */
2802
bool continue_from_then;
2803
bool induction_rhs;
2804
2805
/* This is true if the terminators exact trip count is unknown. For
2806
* example:
2807
*
2808
* for (int i = 0; i < imin(x, 4); i++)
2809
* ...
2810
*
2811
* Here loop analysis would have set a max_trip_count of 4 however we dont
2812
* know for sure that this is the exact trip count.
2813
*/
2814
bool exact_trip_count_unknown;
2815
2816
struct list_head loop_terminator_link;
2817
} nir_loop_terminator;
2818
2819
typedef struct {
2820
/* Estimated cost (in number of instructions) of the loop */
2821
unsigned instr_cost;
2822
2823
/* Guessed trip count based on array indexing */
2824
unsigned guessed_trip_count;
2825
2826
/* Maximum number of times the loop is run (if known) */
2827
unsigned max_trip_count;
2828
2829
/* Do we know the exact number of times the loop will be run */
2830
bool exact_trip_count_known;
2831
2832
/* Unroll the loop regardless of its size */
2833
bool force_unroll;
2834
2835
/* Does the loop contain complex loop terminators, continues or other
2836
* complex behaviours? If this is true we can't rely on
2837
* loop_terminator_list to be complete or accurate.
2838
*/
2839
bool complex_loop;
2840
2841
nir_loop_terminator *limiting_terminator;
2842
2843
/* A list of loop_terminators terminating this loop. */
2844
struct list_head loop_terminator_list;
2845
} nir_loop_info;
2846
2847
typedef enum {
2848
nir_loop_control_none = 0x0,
2849
nir_loop_control_unroll = 0x1,
2850
nir_loop_control_dont_unroll = 0x2,
2851
} nir_loop_control;
2852
2853
typedef struct {
2854
nir_cf_node cf_node;
2855
2856
struct exec_list body; /** < list of nir_cf_node */
2857
2858
nir_loop_info *info;
2859
nir_loop_control control;
2860
bool partially_unrolled;
2861
bool divergent;
2862
} nir_loop;
2863
2864
/**
2865
* Various bits of metadata that can may be created or required by
2866
* optimization and analysis passes
2867
*/
2868
typedef enum {
2869
nir_metadata_none = 0x0,
2870
2871
/** Indicates that nir_block::index values are valid.
2872
*
2873
* The start block has index 0 and they increase through a natural walk of
2874
* the CFG. nir_function_impl::num_blocks is the number of blocks and
2875
* every block index is in the range [0, nir_function_impl::num_blocks].
2876
*
2877
* A pass can preserve this metadata type if it doesn't touch the CFG.
2878
*/
2879
nir_metadata_block_index = 0x1,
2880
2881
/** Indicates that block dominance information is valid
2882
*
2883
* This includes:
2884
*
2885
* - nir_block::num_dom_children
2886
* - nir_block::dom_children
2887
* - nir_block::dom_frontier
2888
* - nir_block::dom_pre_index
2889
* - nir_block::dom_post_index
2890
*
2891
* A pass can preserve this metadata type if it doesn't touch the CFG.
2892
*/
2893
nir_metadata_dominance = 0x2,
2894
2895
/** Indicates that SSA def data-flow liveness information is valid
2896
*
2897
* This includes:
2898
*
2899
* - nir_block::live_in
2900
* - nir_block::live_out
2901
*
2902
* A pass can preserve this metadata type if it never adds or removes any
2903
* SSA defs (most passes shouldn't preserve this metadata type).
2904
*/
2905
nir_metadata_live_ssa_defs = 0x4,
2906
2907
/** A dummy metadata value to track when a pass forgot to call
2908
* nir_metadata_preserve.
2909
*
2910
* A pass should always clear this value even if it doesn't make any
2911
* progress to indicate that it thought about preserving metadata.
2912
*/
2913
nir_metadata_not_properly_reset = 0x8,
2914
2915
/** Indicates that loop analysis information is valid.
2916
*
2917
* This includes everything pointed to by nir_loop::info.
2918
*
2919
* A pass can preserve this metadata type if it is guaranteed to not affect
2920
* any loop metadata. However, since loop metadata includes things like
2921
* loop counts which depend on arithmetic in the loop, this is very hard to
2922
* determine. Most passes shouldn't preserve this metadata type.
2923
*/
2924
nir_metadata_loop_analysis = 0x10,
2925
2926
/** Indicates that nir_instr::index values are valid.
2927
*
2928
* The start instruction has index 0 and they increase through a natural
2929
* walk of instructions in blocks in the CFG. The indices my have holes
2930
* after passes such as DCE.
2931
*
2932
* A pass can preserve this metadata type if it never adds or moves any
2933
* instructions (most passes shouldn't preserve this metadata type), but
2934
* can preserve it if it only removes instructions.
2935
*/
2936
nir_metadata_instr_index = 0x20,
2937
2938
/** All metadata
2939
*
2940
* This includes all nir_metadata flags except not_properly_reset. Passes
2941
* which do not change the shader in any way should call
2942
*
2943
* nir_metadata_preserve(impl, nir_metadata_all);
2944
*/
2945
nir_metadata_all = ~nir_metadata_not_properly_reset,
2946
} nir_metadata;
2947
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
2948
2949
typedef struct {
2950
nir_cf_node cf_node;
2951
2952
/** pointer to the function of which this is an implementation */
2953
struct nir_function *function;
2954
2955
struct exec_list body; /** < list of nir_cf_node */
2956
2957
nir_block *end_block;
2958
2959
/** list for all local variables in the function */
2960
struct exec_list locals;
2961
2962
/** list of local registers in the function */
2963
struct exec_list registers;
2964
2965
/** next available local register index */
2966
unsigned reg_alloc;
2967
2968
/** next available SSA value index */
2969
unsigned ssa_alloc;
2970
2971
/* total number of basic blocks, only valid when block_index_dirty = false */
2972
unsigned num_blocks;
2973
2974
/** True if this nir_function_impl uses structured control-flow
2975
*
2976
* Structured nir_function_impls have different validation rules.
2977
*/
2978
bool structured;
2979
2980
nir_metadata valid_metadata;
2981
} nir_function_impl;
2982
2983
#define nir_foreach_function_temp_variable(var, impl) \
2984
foreach_list_typed(nir_variable, var, node, &(impl)->locals)
2985
2986
#define nir_foreach_function_temp_variable_safe(var, impl) \
2987
foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
2988
2989
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
2990
nir_start_block(nir_function_impl *impl)
2991
{
2992
return (nir_block *) impl->body.head_sentinel.next;
2993
}
2994
2995
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
2996
nir_impl_last_block(nir_function_impl *impl)
2997
{
2998
return (nir_block *) impl->body.tail_sentinel.prev;
2999
}
3000
3001
static inline nir_cf_node *
3002
nir_cf_node_next(nir_cf_node *node)
3003
{
3004
struct exec_node *next = exec_node_get_next(&node->node);
3005
if (exec_node_is_tail_sentinel(next))
3006
return NULL;
3007
else
3008
return exec_node_data(nir_cf_node, next, node);
3009
}
3010
3011
static inline nir_cf_node *
3012
nir_cf_node_prev(nir_cf_node *node)
3013
{
3014
struct exec_node *prev = exec_node_get_prev(&node->node);
3015
if (exec_node_is_head_sentinel(prev))
3016
return NULL;
3017
else
3018
return exec_node_data(nir_cf_node, prev, node);
3019
}
3020
3021
static inline bool
3022
nir_cf_node_is_first(const nir_cf_node *node)
3023
{
3024
return exec_node_is_head_sentinel(node->node.prev);
3025
}
3026
3027
static inline bool
3028
nir_cf_node_is_last(const nir_cf_node *node)
3029
{
3030
return exec_node_is_tail_sentinel(node->node.next);
3031
}
3032
3033
NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
3034
type, nir_cf_node_block)
3035
NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
3036
type, nir_cf_node_if)
3037
NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
3038
type, nir_cf_node_loop)
3039
NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
3040
nir_function_impl, cf_node, type, nir_cf_node_function)
3041
3042
static inline nir_block *
3043
nir_if_first_then_block(nir_if *if_stmt)
3044
{
3045
struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
3046
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3047
}
3048
3049
static inline nir_block *
3050
nir_if_last_then_block(nir_if *if_stmt)
3051
{
3052
struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
3053
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3054
}
3055
3056
static inline nir_block *
3057
nir_if_first_else_block(nir_if *if_stmt)
3058
{
3059
struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
3060
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3061
}
3062
3063
static inline nir_block *
3064
nir_if_last_else_block(nir_if *if_stmt)
3065
{
3066
struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
3067
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3068
}
3069
3070
static inline nir_block *
3071
nir_loop_first_block(nir_loop *loop)
3072
{
3073
struct exec_node *head = exec_list_get_head(&loop->body);
3074
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3075
}
3076
3077
static inline nir_block *
3078
nir_loop_last_block(nir_loop *loop)
3079
{
3080
struct exec_node *tail = exec_list_get_tail(&loop->body);
3081
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
3082
}
3083
3084
/**
3085
* Return true if this list of cf_nodes contains a single empty block.
3086
*/
3087
static inline bool
3088
nir_cf_list_is_empty_block(struct exec_list *cf_list)
3089
{
3090
if (exec_list_is_singular(cf_list)) {
3091
struct exec_node *head = exec_list_get_head(cf_list);
3092
nir_block *block =
3093
nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
3094
return exec_list_is_empty(&block->instr_list);
3095
}
3096
return false;
3097
}
3098
3099
typedef struct {
3100
uint8_t num_components;
3101
uint8_t bit_size;
3102
} nir_parameter;
3103
3104
typedef struct nir_printf_info {
3105
unsigned num_args;
3106
unsigned *arg_sizes;
3107
unsigned string_size;
3108
char *strings;
3109
} nir_printf_info;
3110
3111
typedef struct nir_function {
3112
struct exec_node node;
3113
3114
const char *name;
3115
struct nir_shader *shader;
3116
3117
unsigned num_params;
3118
nir_parameter *params;
3119
3120
/** The implementation of this function.
3121
*
3122
* If the function is only declared and not implemented, this is NULL.
3123
*/
3124
nir_function_impl *impl;
3125
3126
bool is_entrypoint;
3127
} nir_function;
3128
3129
typedef enum {
3130
nir_lower_imul64 = (1 << 0),
3131
nir_lower_isign64 = (1 << 1),
3132
/** Lower all int64 modulus and division opcodes */
3133
nir_lower_divmod64 = (1 << 2),
3134
/** Lower all 64-bit umul_high and imul_high opcodes */
3135
nir_lower_imul_high64 = (1 << 3),
3136
nir_lower_mov64 = (1 << 4),
3137
nir_lower_icmp64 = (1 << 5),
3138
nir_lower_iadd64 = (1 << 6),
3139
nir_lower_iabs64 = (1 << 7),
3140
nir_lower_ineg64 = (1 << 8),
3141
nir_lower_logic64 = (1 << 9),
3142
nir_lower_minmax64 = (1 << 10),
3143
nir_lower_shift64 = (1 << 11),
3144
nir_lower_imul_2x32_64 = (1 << 12),
3145
nir_lower_extract64 = (1 << 13),
3146
nir_lower_ufind_msb64 = (1 << 14),
3147
nir_lower_bit_count64 = (1 << 15),
3148
nir_lower_subgroup_shuffle64 = (1 << 16),
3149
nir_lower_scan_reduce_bitwise64 = (1 << 17),
3150
nir_lower_scan_reduce_iadd64 = (1 << 18),
3151
nir_lower_vote_ieq64 = (1 << 19),
3152
} nir_lower_int64_options;
3153
3154
typedef enum {
3155
nir_lower_drcp = (1 << 0),
3156
nir_lower_dsqrt = (1 << 1),
3157
nir_lower_drsq = (1 << 2),
3158
nir_lower_dtrunc = (1 << 3),
3159
nir_lower_dfloor = (1 << 4),
3160
nir_lower_dceil = (1 << 5),
3161
nir_lower_dfract = (1 << 6),
3162
nir_lower_dround_even = (1 << 7),
3163
nir_lower_dmod = (1 << 8),
3164
nir_lower_dsub = (1 << 9),
3165
nir_lower_ddiv = (1 << 10),
3166
nir_lower_fp64_full_software = (1 << 11),
3167
} nir_lower_doubles_options;
3168
3169
typedef enum {
3170
nir_divergence_single_prim_per_subgroup = (1 << 0),
3171
nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
3172
nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
3173
nir_divergence_view_index_uniform = (1 << 3),
3174
nir_divergence_single_frag_shading_rate_per_subgroup = (1 << 4),
3175
nir_divergence_multiple_workgroup_per_compute_subgroup = (1 << 5),
3176
} nir_divergence_options;
3177
3178
/** An instruction filtering callback
3179
*
3180
* Returns true if the instruction should be processed and false otherwise.
3181
*/
3182
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
3183
3184
typedef struct nir_shader_compiler_options {
3185
bool lower_fdiv;
3186
bool lower_ffma16;
3187
bool lower_ffma32;
3188
bool lower_ffma64;
3189
bool fuse_ffma16;
3190
bool fuse_ffma32;
3191
bool fuse_ffma64;
3192
bool lower_flrp16;
3193
bool lower_flrp32;
3194
/** Lowers flrp when it does not support doubles */
3195
bool lower_flrp64;
3196
bool lower_fpow;
3197
bool lower_fsat;
3198
bool lower_fsqrt;
3199
bool lower_sincos;
3200
bool lower_fmod;
3201
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
3202
bool lower_bitfield_extract;
3203
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
3204
bool lower_bitfield_extract_to_shifts;
3205
/** Lowers bitfield_insert to bfi/bfm */
3206
bool lower_bitfield_insert;
3207
/** Lowers bitfield_insert to compares, and shifts. */
3208
bool lower_bitfield_insert_to_shifts;
3209
/** Lowers bitfield_insert to bfm/bitfield_select. */
3210
bool lower_bitfield_insert_to_bitfield_select;
3211
/** Lowers bitfield_reverse to shifts. */
3212
bool lower_bitfield_reverse;
3213
/** Lowers bit_count to shifts. */
3214
bool lower_bit_count;
3215
/** Lowers ifind_msb to compare and ufind_msb */
3216
bool lower_ifind_msb;
3217
/** Lowers ifind_msb and ufind_msb to reverse variants */
3218
bool lower_find_msb_to_reverse;
3219
/** Lowers find_lsb to ufind_msb and logic ops */
3220
bool lower_find_lsb;
3221
bool lower_uadd_carry;
3222
bool lower_usub_borrow;
3223
/** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
3224
bool lower_mul_high;
3225
/** lowers fneg to fmul(x, -1.0). Driver must call nir_opt_algebraic_late() */
3226
bool lower_fneg;
3227
/** lowers ineg to isub. Driver must call nir_opt_algebraic_late(). */
3228
bool lower_ineg;
3229
3230
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
3231
bool lower_scmp;
3232
3233
/* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
3234
bool lower_vector_cmp;
3235
3236
/** enable rules to avoid bit ops */
3237
bool lower_bitops;
3238
3239
/** enables rules to lower isign to imin+imax */
3240
bool lower_isign;
3241
3242
/** enables rules to lower fsign to fsub and flt */
3243
bool lower_fsign;
3244
3245
/** enables rules to lower iabs to ineg+imax */
3246
bool lower_iabs;
3247
3248
/** enable rules that avoid generating umax from signed integer ops */
3249
bool lower_umax;
3250
3251
/** enable rules that avoid generating umin from signed integer ops */
3252
bool lower_umin;
3253
3254
/* lower fdph to fdot4 */
3255
bool lower_fdph;
3256
3257
/** lower fdot to fmul and fsum/fadd. */
3258
bool lower_fdot;
3259
3260
/* Does the native fdot instruction replicate its result for four
3261
* components? If so, then opt_algebraic_late will turn all fdotN
3262
* instructions into fdotN_replicated instructions.
3263
*/
3264
bool fdot_replicates;
3265
3266
/** lowers ffloor to fsub+ffract: */
3267
bool lower_ffloor;
3268
3269
/** lowers ffract to fsub+ffloor: */
3270
bool lower_ffract;
3271
3272
/** lowers fceil to fneg+ffloor+fneg: */
3273
bool lower_fceil;
3274
3275
bool lower_ftrunc;
3276
3277
bool lower_ldexp;
3278
3279
bool lower_pack_half_2x16;
3280
bool lower_pack_unorm_2x16;
3281
bool lower_pack_snorm_2x16;
3282
bool lower_pack_unorm_4x8;
3283
bool lower_pack_snorm_4x8;
3284
bool lower_pack_64_2x32;
3285
bool lower_pack_64_4x16;
3286
bool lower_pack_32_2x16;
3287
bool lower_pack_64_2x32_split;
3288
bool lower_pack_32_2x16_split;
3289
bool lower_unpack_half_2x16;
3290
bool lower_unpack_unorm_2x16;
3291
bool lower_unpack_snorm_2x16;
3292
bool lower_unpack_unorm_4x8;
3293
bool lower_unpack_snorm_4x8;
3294
bool lower_unpack_64_2x32_split;
3295
bool lower_unpack_32_2x16_split;
3296
3297
bool lower_pack_split;
3298
3299
bool lower_extract_byte;
3300
bool lower_extract_word;
3301
bool lower_insert_byte;
3302
bool lower_insert_word;
3303
3304
bool lower_all_io_to_temps;
3305
bool lower_all_io_to_elements;
3306
3307
/* Indicates that the driver only has zero-based vertex id */
3308
bool vertex_id_zero_based;
3309
3310
/**
3311
* If enabled, gl_BaseVertex will be lowered as:
3312
* is_indexed_draw (~0/0) & firstvertex
3313
*/
3314
bool lower_base_vertex;
3315
3316
/**
3317
* If enabled, gl_HelperInvocation will be lowered as:
3318
*
3319
* !((1 << sample_id) & sample_mask_in))
3320
*
3321
* This depends on some possibly hw implementation details, which may
3322
* not be true for all hw. In particular that the FS is only executed
3323
* for covered samples or for helper invocations. So, do not blindly
3324
* enable this option.
3325
*
3326
* Note: See also issue #22 in ARB_shader_image_load_store
3327
*/
3328
bool lower_helper_invocation;
3329
3330
/**
3331
* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
3332
*
3333
* gl_SampleMaskIn == 0 ---> gl_HelperInvocation
3334
* gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
3335
*/
3336
bool optimize_sample_mask_in;
3337
3338
bool lower_cs_local_index_from_id;
3339
bool lower_cs_local_id_from_index;
3340
3341
/* Prevents lowering global_invocation_id to be in terms of workgroup_id */
3342
bool has_cs_global_id;
3343
3344
bool lower_device_index_to_zero;
3345
3346
/* Set if nir_lower_pntc_ytransform() should invert gl_PointCoord.
3347
* Either when frame buffer is flipped or GL_POINT_SPRITE_COORD_ORIGIN
3348
* is GL_LOWER_LEFT.
3349
*/
3350
bool lower_wpos_pntc;
3351
3352
/**
3353
* Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
3354
* lowered to simple arithmetic.
3355
*
3356
* If this flag is set, the lowering will be applied to all bit-sizes of
3357
* these instructions.
3358
*
3359
* \sa ::lower_hadd64
3360
*/
3361
bool lower_hadd;
3362
3363
/**
3364
* Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
3365
* should be lowered to simple arithmetic.
3366
*
3367
* If this flag is set, the lowering will be applied to only 64-bit
3368
* versions of these instructions.
3369
*
3370
* \sa ::lower_hadd
3371
*/
3372
bool lower_hadd64;
3373
3374
/**
3375
* Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple
3376
* arithmetic.
3377
*
3378
* If this flag is set, the lowering will be applied to all bit-sizes of
3379
* these instructions.
3380
*
3381
* \sa ::lower_usub_sat64
3382
*/
3383
bool lower_add_sat;
3384
3385
/**
3386
* Set if only 64-bit nir_op_usub_sat should be lowered to simple
3387
* arithmetic.
3388
*
3389
* \sa ::lower_add_sat
3390
*/
3391
bool lower_usub_sat64;
3392
3393
/**
3394
* Should IO be re-vectorized? Some scalar ISAs still operate on vec4's
3395
* for IO purposes and would prefer loads/stores be vectorized.
3396
*/
3397
bool vectorize_io;
3398
bool lower_to_scalar;
3399
nir_instr_filter_cb lower_to_scalar_filter;
3400
3401
/**
3402
* Whether nir_opt_vectorize should only create 16-bit 2D vectors.
3403
*/
3404
bool vectorize_vec2_16bit;
3405
3406
/**
3407
* Should the linker unify inputs_read/outputs_written between adjacent
3408
* shader stages which are linked into a single program?
3409
*/
3410
bool unify_interfaces;
3411
3412
/**
3413
* Should nir_lower_io() create load_interpolated_input intrinsics?
3414
*
3415
* If not, it generates regular load_input intrinsics and interpolation
3416
* information must be inferred from the list of input nir_variables.
3417
*/
3418
bool use_interpolated_input_intrinsics;
3419
3420
3421
/**
3422
* Whether nir_lower_io() will lower interpolateAt functions to
3423
* load_interpolated_input intrinsics.
3424
*
3425
* Unlike use_interpolated_input_intrinsics this will only lower these
3426
* functions and leave input load intrinsics untouched.
3427
*/
3428
bool lower_interpolate_at;
3429
3430
/* Lowers when 32x32->64 bit multiplication is not supported */
3431
bool lower_mul_2x32_64;
3432
3433
/* Lowers when rotate instruction is not supported */
3434
bool lower_rotate;
3435
3436
/**
3437
* Backend supports imul24, and would like to use it (when possible)
3438
* for address/offset calculation. If true, driver should call
3439
* nir_lower_amul(). (If not set, amul will automatically be lowered
3440
* to imul.)
3441
*/
3442
bool has_imul24;
3443
3444
/** Backend supports umul24, if not set umul24 will automatically be lowered
3445
* to imul with masked inputs */
3446
bool has_umul24;
3447
3448
/** Backend supports umad24, if not set umad24 will automatically be lowered
3449
* to imul with masked inputs and iadd */
3450
bool has_umad24;
3451
3452
/* Backend supports fused comapre against zero and csel */
3453
bool has_fused_comp_and_csel;
3454
3455
/** Backend supports fsub, if not set fsub will automatically be lowered to
3456
* fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */
3457
bool has_fsub;
3458
3459
/** Backend supports isub, if not set isub will automatically be lowered to
3460
* iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
3461
bool has_isub;
3462
3463
/** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants
3464
* for rect texture lowering. */
3465
bool has_txs;
3466
3467
/* Whether to generate only scoped_barrier intrinsics instead of the set of
3468
* memory and control barrier intrinsics based on GLSL.
3469
*/
3470
bool use_scoped_barrier;
3471
3472
/**
3473
* Is this the Intel vec4 backend?
3474
*
3475
* Used to inhibit algebraic optimizations that are known to be harmful on
3476
* the Intel vec4 backend. This is generally applicable to any
3477
* optimization that might cause more immediate values to be used in
3478
* 3-source (e.g., ffma and flrp) instructions.
3479
*/
3480
bool intel_vec4;
3481
3482
/** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
3483
bool lower_bfe_with_two_constants;
3484
3485
/** Whether 8-bit ALU is supported. */
3486
bool support_8bit_alu;
3487
3488
/** Whether 16-bit ALU is supported. */
3489
bool support_16bit_alu;
3490
3491
unsigned max_unroll_iterations;
3492
unsigned max_unroll_iterations_aggressive;
3493
3494
bool lower_uniforms_to_ubo;
3495
3496
/* If the precision is ignored, backends that don't handle
3497
* different precisions when passing data between stages and use
3498
* vectorized IO can pack more varyings when linking. */
3499
bool linker_ignore_precision;
3500
3501
nir_lower_int64_options lower_int64_options;
3502
nir_lower_doubles_options lower_doubles_options;
3503
nir_divergence_options divergence_analysis_options;
3504
} nir_shader_compiler_options;
3505
3506
typedef struct nir_shader {
3507
/** list of uniforms (nir_variable) */
3508
struct exec_list variables;
3509
3510
/** Set of driver-specific options for the shader.
3511
*
3512
* The memory for the options is expected to be kept in a single static
3513
* copy by the driver.
3514
*/
3515
const struct nir_shader_compiler_options *options;
3516
3517
/** Various bits of compile-time information about a given shader */
3518
struct shader_info info;
3519
3520
struct exec_list functions; /** < list of nir_function */
3521
3522
/**
3523
* The size of the variable space for load_input_*, load_uniform_*, etc.
3524
* intrinsics. This is in back-end specific units which is likely one of
3525
* bytes, dwords, or vec4s depending on context and back-end.
3526
*/
3527
unsigned num_inputs, num_uniforms, num_outputs;
3528
3529
/** Size in bytes of required scratch space */
3530
unsigned scratch_size;
3531
3532
/** Constant data associated with this shader.
3533
*
3534
* Constant data is loaded through load_constant intrinsics (as compared to
3535
* the NIR load_const instructions which have the constant value inlined
3536
* into them). This is usually generated by nir_opt_large_constants (so
3537
* shaders don't have to load_const into a temporary array when they want
3538
* to indirect on a const array).
3539
*/
3540
void *constant_data;
3541
/** Size of the constant data associated with the shader, in bytes */
3542
unsigned constant_data_size;
3543
3544
unsigned printf_info_count;
3545
nir_printf_info *printf_info;
3546
} nir_shader;
3547
3548
#define nir_foreach_function(func, shader) \
3549
foreach_list_typed(nir_function, func, node, &(shader)->functions)
3550
3551
static inline nir_function_impl *
3552
nir_shader_get_entrypoint(nir_shader *shader)
3553
{
3554
nir_function *func = NULL;
3555
3556
nir_foreach_function(function, shader) {
3557
assert(func == NULL);
3558
if (function->is_entrypoint) {
3559
func = function;
3560
#ifndef NDEBUG
3561
break;
3562
#endif
3563
}
3564
}
3565
3566
if (!func)
3567
return NULL;
3568
3569
assert(func->num_params == 0);
3570
assert(func->impl);
3571
return func->impl;
3572
}
3573
3574
typedef struct nir_liveness_bounds {
3575
uint32_t start;
3576
uint32_t end;
3577
} nir_liveness_bounds;
3578
3579
typedef struct nir_instr_liveness {
3580
/**
3581
* nir_instr->index for the start and end of a single live interval for SSA
3582
* defs. ssa values last used by a nir_if condition will have an interval
3583
* ending at the first instruction after the last one before the if
3584
* condition.
3585
*
3586
* Indexed by def->index (impl->ssa_alloc elements).
3587
*/
3588
struct nir_liveness_bounds *defs;
3589
} nir_instr_liveness;
3590
3591
nir_instr_liveness *
3592
nir_live_ssa_defs_per_instr(nir_function_impl *impl);
3593
3594
nir_shader *nir_shader_create(void *mem_ctx,
3595
gl_shader_stage stage,
3596
const nir_shader_compiler_options *options,
3597
shader_info *si);
3598
3599
nir_register *nir_local_reg_create(nir_function_impl *impl);
3600
3601
void nir_reg_remove(nir_register *reg);
3602
3603
/** Adds a variable to the appropriate list in nir_shader */
3604
void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
3605
3606
static inline void
3607
nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
3608
{
3609
assert(var->data.mode == nir_var_function_temp);
3610
exec_list_push_tail(&impl->locals, &var->node);
3611
}
3612
3613
/** creates a variable, sets a few defaults, and adds it to the list */
3614
nir_variable *nir_variable_create(nir_shader *shader,
3615
nir_variable_mode mode,
3616
const struct glsl_type *type,
3617
const char *name);
3618
/** creates a local variable and adds it to the list */
3619
nir_variable *nir_local_variable_create(nir_function_impl *impl,
3620
const struct glsl_type *type,
3621
const char *name);
3622
3623
nir_variable *nir_find_variable_with_location(nir_shader *shader,
3624
nir_variable_mode mode,
3625
unsigned location);
3626
3627
nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
3628
nir_variable_mode mode,
3629
unsigned location);
3630
3631
void nir_sort_variables_with_modes(nir_shader *shader,
3632
int (*compar)(const nir_variable *,
3633
const nir_variable *),
3634
nir_variable_mode modes);
3635
3636
/** creates a function and adds it to the shader's list of functions */
3637
nir_function *nir_function_create(nir_shader *shader, const char *name);
3638
3639
nir_function_impl *nir_function_impl_create(nir_function *func);
3640
/** creates a function_impl that isn't tied to any particular function */
3641
nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
3642
3643
nir_block *nir_block_create(nir_shader *shader);
3644
nir_if *nir_if_create(nir_shader *shader);
3645
nir_loop *nir_loop_create(nir_shader *shader);
3646
3647
nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
3648
3649
/** requests that the given pieces of metadata be generated */
3650
void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
3651
/** dirties all but the preserved metadata */
3652
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
3653
/** Preserves all metadata for the given shader */
3654
void nir_shader_preserve_all_metadata(nir_shader *shader);
3655
3656
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
3657
nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
3658
3659
nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
3660
nir_deref_type deref_type);
3661
3662
nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
3663
3664
nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
3665
unsigned num_components,
3666
unsigned bit_size);
3667
3668
nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
3669
nir_intrinsic_op op);
3670
3671
nir_call_instr *nir_call_instr_create(nir_shader *shader,
3672
nir_function *callee);
3673
3674
nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
3675
3676
nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
3677
3678
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
3679
3680
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
3681
unsigned num_components,
3682
unsigned bit_size);
3683
3684
nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
3685
3686
/**
3687
* NIR Cursors and Instruction Insertion API
3688
* @{
3689
*
3690
* A tiny struct representing a point to insert/extract instructions or
3691
* control flow nodes. Helps reduce the combinatorial explosion of possible
3692
* points to insert/extract.
3693
*
3694
* \sa nir_control_flow.h
3695
*/
3696
typedef enum {
3697
nir_cursor_before_block,
3698
nir_cursor_after_block,
3699
nir_cursor_before_instr,
3700
nir_cursor_after_instr,
3701
} nir_cursor_option;
3702
3703
typedef struct {
3704
nir_cursor_option option;
3705
union {
3706
nir_block *block;
3707
nir_instr *instr;
3708
};
3709
} nir_cursor;
3710
3711
static inline nir_block *
3712
nir_cursor_current_block(nir_cursor cursor)
3713
{
3714
if (cursor.option == nir_cursor_before_instr ||
3715
cursor.option == nir_cursor_after_instr) {
3716
return cursor.instr->block;
3717
} else {
3718
return cursor.block;
3719
}
3720
}
3721
3722
bool nir_cursors_equal(nir_cursor a, nir_cursor b);
3723
3724
static inline nir_cursor
3725
nir_before_block(nir_block *block)
3726
{
3727
nir_cursor cursor;
3728
cursor.option = nir_cursor_before_block;
3729
cursor.block = block;
3730
return cursor;
3731
}
3732
3733
static inline nir_cursor
3734
nir_after_block(nir_block *block)
3735
{
3736
nir_cursor cursor;
3737
cursor.option = nir_cursor_after_block;
3738
cursor.block = block;
3739
return cursor;
3740
}
3741
3742
static inline nir_cursor
3743
nir_before_instr(nir_instr *instr)
3744
{
3745
nir_cursor cursor;
3746
cursor.option = nir_cursor_before_instr;
3747
cursor.instr = instr;
3748
return cursor;
3749
}
3750
3751
static inline nir_cursor
3752
nir_after_instr(nir_instr *instr)
3753
{
3754
nir_cursor cursor;
3755
cursor.option = nir_cursor_after_instr;
3756
cursor.instr = instr;
3757
return cursor;
3758
}
3759
3760
static inline nir_cursor
3761
nir_before_block_after_phis(nir_block *block)
3762
{
3763
nir_phi_instr *last_phi = nir_block_last_phi_instr(block);
3764
if (last_phi)
3765
return nir_after_instr(&last_phi->instr);
3766
else
3767
return nir_before_block(block);
3768
}
3769
3770
static inline nir_cursor
3771
nir_after_block_before_jump(nir_block *block)
3772
{
3773
nir_instr *last_instr = nir_block_last_instr(block);
3774
if (last_instr && last_instr->type == nir_instr_type_jump) {
3775
return nir_before_instr(last_instr);
3776
} else {
3777
return nir_after_block(block);
3778
}
3779
}
3780
3781
static inline nir_cursor
3782
nir_before_src(nir_src *src, bool is_if_condition)
3783
{
3784
if (is_if_condition) {
3785
nir_block *prev_block =
3786
nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
3787
assert(!nir_block_ends_in_jump(prev_block));
3788
return nir_after_block(prev_block);
3789
} else if (src->parent_instr->type == nir_instr_type_phi) {
3790
#ifndef NDEBUG
3791
nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
3792
bool found = false;
3793
nir_foreach_phi_src(phi_src, cond_phi) {
3794
if (phi_src->src.ssa == src->ssa) {
3795
found = true;
3796
break;
3797
}
3798
}
3799
assert(found);
3800
#endif
3801
/* The LIST_ENTRY macro is a generic container-of macro, it just happens
3802
* to have a more specific name.
3803
*/
3804
nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src);
3805
return nir_after_block_before_jump(phi_src->pred);
3806
} else {
3807
return nir_before_instr(src->parent_instr);
3808
}
3809
}
3810
3811
static inline nir_cursor
3812
nir_before_cf_node(nir_cf_node *node)
3813
{
3814
if (node->type == nir_cf_node_block)
3815
return nir_before_block(nir_cf_node_as_block(node));
3816
3817
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
3818
}
3819
3820
static inline nir_cursor
3821
nir_after_cf_node(nir_cf_node *node)
3822
{
3823
if (node->type == nir_cf_node_block)
3824
return nir_after_block(nir_cf_node_as_block(node));
3825
3826
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
3827
}
3828
3829
static inline nir_cursor
3830
nir_after_phis(nir_block *block)
3831
{
3832
nir_foreach_instr(instr, block) {
3833
if (instr->type != nir_instr_type_phi)
3834
return nir_before_instr(instr);
3835
}
3836
return nir_after_block(block);
3837
}
3838
3839
static inline nir_cursor
3840
nir_after_instr_and_phis(nir_instr *instr)
3841
{
3842
if (instr->type == nir_instr_type_phi)
3843
return nir_after_phis(instr->block);
3844
else
3845
return nir_after_instr(instr);
3846
}
3847
3848
static inline nir_cursor
3849
nir_after_cf_node_and_phis(nir_cf_node *node)
3850
{
3851
if (node->type == nir_cf_node_block)
3852
return nir_after_block(nir_cf_node_as_block(node));
3853
3854
nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
3855
3856
return nir_after_phis(block);
3857
}
3858
3859
static inline nir_cursor
3860
nir_before_cf_list(struct exec_list *cf_list)
3861
{
3862
nir_cf_node *first_node = exec_node_data(nir_cf_node,
3863
exec_list_get_head(cf_list), node);
3864
return nir_before_cf_node(first_node);
3865
}
3866
3867
static inline nir_cursor
3868
nir_after_cf_list(struct exec_list *cf_list)
3869
{
3870
nir_cf_node *last_node = exec_node_data(nir_cf_node,
3871
exec_list_get_tail(cf_list), node);
3872
return nir_after_cf_node(last_node);
3873
}
3874
3875
/**
3876
* Insert a NIR instruction at the given cursor.
3877
*
3878
* Note: This does not update the cursor.
3879
*/
3880
void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
3881
3882
bool nir_instr_move(nir_cursor cursor, nir_instr *instr);
3883
3884
static inline void
3885
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
3886
{
3887
nir_instr_insert(nir_before_instr(instr), before);
3888
}
3889
3890
static inline void
3891
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
3892
{
3893
nir_instr_insert(nir_after_instr(instr), after);
3894
}
3895
3896
static inline void
3897
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
3898
{
3899
nir_instr_insert(nir_before_block(block), before);
3900
}
3901
3902
static inline void
3903
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
3904
{
3905
nir_instr_insert(nir_after_block(block), after);
3906
}
3907
3908
static inline void
3909
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
3910
{
3911
nir_instr_insert(nir_before_cf_node(node), before);
3912
}
3913
3914
static inline void
3915
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
3916
{
3917
nir_instr_insert(nir_after_cf_node(node), after);
3918
}
3919
3920
static inline void
3921
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
3922
{
3923
nir_instr_insert(nir_before_cf_list(list), before);
3924
}
3925
3926
static inline void
3927
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
3928
{
3929
nir_instr_insert(nir_after_cf_list(list), after);
3930
}
3931
3932
void nir_instr_remove_v(nir_instr *instr);
3933
3934
static inline nir_cursor
3935
nir_instr_remove(nir_instr *instr)
3936
{
3937
nir_cursor cursor;
3938
nir_instr *prev = nir_instr_prev(instr);
3939
if (prev) {
3940
cursor = nir_after_instr(prev);
3941
} else {
3942
cursor = nir_before_block(instr->block);
3943
}
3944
nir_instr_remove_v(instr);
3945
return cursor;
3946
}
3947
3948
nir_cursor nir_instr_free_and_dce(nir_instr *instr);
3949
3950
/** @} */
3951
3952
nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
3953
3954
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
3955
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
3956
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
3957
bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
3958
void *state);
3959
static inline bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
3960
static inline bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
3961
bool nir_foreach_phi_src_leaving_block(nir_block *instr,
3962
nir_foreach_src_cb cb,
3963
void *state);
3964
3965
nir_const_value *nir_src_as_const_value(nir_src src);
3966
3967
#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \
3968
static inline c_type * \
3969
nir_src_as_ ## name (nir_src src) \
3970
{ \
3971
return src.is_ssa && src.ssa->parent_instr->type == type_enum \
3972
? cast_macro(src.ssa->parent_instr) : NULL; \
3973
}
3974
3975
NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
3976
NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
3977
nir_instr_type_intrinsic, nir_instr_as_intrinsic)
3978
NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
3979
3980
bool nir_src_is_dynamically_uniform(nir_src src);
3981
bool nir_srcs_equal(nir_src src1, nir_src src2);
3982
bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
3983
3984
static inline void
3985
nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr,
3986
nir_src *src, nir_ssa_def *new_ssa)
3987
{
3988
assert(src->parent_instr == instr);
3989
assert(src->is_ssa && src->ssa);
3990
list_del(&src->use_link);
3991
src->ssa = new_ssa;
3992
list_addtail(&src->use_link, &new_ssa->uses);
3993
}
3994
3995
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
3996
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
3997
3998
static inline void
3999
nir_if_rewrite_condition_ssa(ASSERTED nir_if *if_stmt,
4000
nir_src *src, nir_ssa_def *new_ssa)
4001
{
4002
assert(src->parent_if == if_stmt);
4003
assert(src->is_ssa && src->ssa);
4004
list_del(&src->use_link);
4005
src->ssa = new_ssa;
4006
list_addtail(&src->use_link, &new_ssa->if_uses);
4007
}
4008
4009
void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
4010
void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
4011
nir_dest new_dest);
4012
4013
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
4014
unsigned num_components, unsigned bit_size,
4015
const char *name);
4016
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
4017
unsigned num_components, unsigned bit_size);
4018
static inline void
4019
nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
4020
const struct glsl_type *type,
4021
const char *name)
4022
{
4023
assert(glsl_type_is_vector_or_scalar(type));
4024
nir_ssa_dest_init(instr, dest, glsl_get_components(type),
4025
glsl_get_bit_size(type), name);
4026
}
4027
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa);
4028
void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src);
4029
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
4030
nir_instr *after_me);
4031
4032
nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
4033
4034
static inline bool
4035
nir_ssa_def_is_unused(nir_ssa_def *ssa)
4036
{
4037
return list_is_empty(&ssa->uses) && list_is_empty(&ssa->if_uses);
4038
}
4039
4040
4041
/** Returns the next block, disregarding structure
4042
*
4043
* The ordering is deterministic but has no guarantees beyond that. In
4044
* particular, it is not guaranteed to be dominance-preserving.
4045
*/
4046
nir_block *nir_block_unstructured_next(nir_block *block);
4047
nir_block *nir_unstructured_start_block(nir_function_impl *impl);
4048
4049
#define nir_foreach_block_unstructured(block, impl) \
4050
for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
4051
block = nir_block_unstructured_next(block))
4052
4053
#define nir_foreach_block_unstructured_safe(block, impl) \
4054
for (nir_block *block = nir_unstructured_start_block(impl), \
4055
*next = nir_block_unstructured_next(block); \
4056
block != NULL; \
4057
block = next, next = nir_block_unstructured_next(block))
4058
4059
/*
4060
* finds the next basic block in source-code order, returns NULL if there is
4061
* none
4062
*/
4063
4064
nir_block *nir_block_cf_tree_next(nir_block *block);
4065
4066
/* Performs the opposite of nir_block_cf_tree_next() */
4067
4068
nir_block *nir_block_cf_tree_prev(nir_block *block);
4069
4070
/* Gets the first block in a CF node in source-code order */
4071
4072
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
4073
4074
/* Gets the last block in a CF node in source-code order */
4075
4076
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
4077
4078
/* Gets the next block after a CF node in source-code order */
4079
4080
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
4081
4082
/* Macros for loops that visit blocks in source-code order */
4083
4084
#define nir_foreach_block(block, impl) \
4085
for (nir_block *block = nir_start_block(impl); block != NULL; \
4086
block = nir_block_cf_tree_next(block))
4087
4088
#define nir_foreach_block_safe(block, impl) \
4089
for (nir_block *block = nir_start_block(impl), \
4090
*next = nir_block_cf_tree_next(block); \
4091
block != NULL; \
4092
block = next, next = nir_block_cf_tree_next(block))
4093
4094
#define nir_foreach_block_reverse(block, impl) \
4095
for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
4096
block = nir_block_cf_tree_prev(block))
4097
4098
#define nir_foreach_block_reverse_safe(block, impl) \
4099
for (nir_block *block = nir_impl_last_block(impl), \
4100
*prev = nir_block_cf_tree_prev(block); \
4101
block != NULL; \
4102
block = prev, prev = nir_block_cf_tree_prev(block))
4103
4104
#define nir_foreach_block_in_cf_node(block, node) \
4105
for (nir_block *block = nir_cf_node_cf_tree_first(node); \
4106
block != nir_cf_node_cf_tree_next(node); \
4107
block = nir_block_cf_tree_next(block))
4108
4109
/* If the following CF node is an if, this function returns that if.
4110
* Otherwise, it returns NULL.
4111
*/
4112
nir_if *nir_block_get_following_if(nir_block *block);
4113
4114
nir_loop *nir_block_get_following_loop(nir_block *block);
4115
4116
nir_block **nir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx);
4117
4118
void nir_index_local_regs(nir_function_impl *impl);
4119
void nir_index_ssa_defs(nir_function_impl *impl);
4120
unsigned nir_index_instrs(nir_function_impl *impl);
4121
4122
void nir_index_blocks(nir_function_impl *impl);
4123
4124
unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
4125
unsigned nir_function_impl_index_vars(nir_function_impl *impl);
4126
4127
void nir_print_shader(nir_shader *shader, FILE *fp);
4128
void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
4129
void nir_print_instr(const nir_instr *instr, FILE *fp);
4130
void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
4131
void nir_log_shader_annotated_tagged(enum mesa_log_level level, const char *tag, nir_shader *shader, struct hash_table *annotations);
4132
#define nir_log_shadere(s) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), NULL)
4133
#define nir_log_shaderw(s) nir_log_shader_annotated_tagged(MESA_LOG_WARN, (MESA_LOG_TAG), (s), NULL)
4134
#define nir_log_shaderi(s) nir_log_shader_annotated_tagged(MESA_LOG_INFO, (MESA_LOG_TAG), (s), NULL)
4135
#define nir_log_shader_annotated(s, annotations) nir_log_shader_annotated_tagged(MESA_LOG_ERROR, (MESA_LOG_TAG), (s), annotations)
4136
4137
char *nir_shader_as_str(nir_shader *nir, void *mem_ctx);
4138
char *nir_shader_as_str_annotated(nir_shader *nir, struct hash_table *annotations, void *mem_ctx);
4139
4140
/** Shallow clone of a single instruction. */
4141
nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
4142
4143
/** Shallow clone of a single ALU instruction. */
4144
nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
4145
4146
nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
4147
nir_function_impl *nir_function_impl_clone(nir_shader *shader,
4148
const nir_function_impl *fi);
4149
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
4150
nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
4151
4152
void nir_shader_replace(nir_shader *dest, nir_shader *src);
4153
4154
void nir_shader_serialize_deserialize(nir_shader *s);
4155
4156
#ifndef NDEBUG
4157
void nir_validate_shader(nir_shader *shader, const char *when);
4158
void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
4159
void nir_metadata_set_validation_flag(nir_shader *shader);
4160
void nir_metadata_check_validation_flag(nir_shader *shader);
4161
4162
static inline bool
4163
should_skip_nir(const char *name)
4164
{
4165
static const char *list = NULL;
4166
if (!list) {
4167
/* Comma separated list of names to skip. */
4168
list = getenv("NIR_SKIP");
4169
if (!list)
4170
list = "";
4171
}
4172
4173
if (!list[0])
4174
return false;
4175
4176
return comma_separated_list_contains(list, name);
4177
}
4178
4179
static inline bool
4180
should_clone_nir(void)
4181
{
4182
static int should_clone = -1;
4183
if (should_clone < 0)
4184
should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
4185
4186
return should_clone;
4187
}
4188
4189
static inline bool
4190
should_serialize_deserialize_nir(void)
4191
{
4192
static int test_serialize = -1;
4193
if (test_serialize < 0)
4194
test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false);
4195
4196
return test_serialize;
4197
}
4198
4199
static inline bool
4200
should_print_nir(nir_shader *shader)
4201
{
4202
static int should_print = -1;
4203
if (should_print < 0)
4204
should_print = env_var_as_unsigned("NIR_PRINT", 0);
4205
4206
if (should_print == 1)
4207
return !shader->info.internal;
4208
4209
return should_print;
4210
}
4211
#else
4212
static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
4213
static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; }
4214
static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
4215
static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
4216
static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
4217
static inline bool should_clone_nir(void) { return false; }
4218
static inline bool should_serialize_deserialize_nir(void) { return false; }
4219
static inline bool should_print_nir(nir_shader *shader) { return false; }
4220
#endif /* NDEBUG */
4221
4222
#define _PASS(pass, nir, do_pass) do { \
4223
if (should_skip_nir(#pass)) { \
4224
printf("skipping %s\n", #pass); \
4225
break; \
4226
} \
4227
do_pass \
4228
if (should_clone_nir()) { \
4229
nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
4230
nir_shader_replace(nir, clone); \
4231
} \
4232
if (should_serialize_deserialize_nir()) { \
4233
nir_shader_serialize_deserialize(nir); \
4234
} \
4235
} while (0)
4236
4237
#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \
4238
nir_metadata_set_validation_flag(nir); \
4239
if (should_print_nir(nir)) \
4240
printf("%s\n", #pass); \
4241
if (pass(nir, ##__VA_ARGS__)) { \
4242
nir_validate_shader(nir, "after " #pass); \
4243
progress = true; \
4244
if (should_print_nir(nir)) \
4245
nir_print_shader(nir, stdout); \
4246
nir_metadata_check_validation_flag(nir); \
4247
} \
4248
)
4249
4250
#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \
4251
if (should_print_nir(nir)) \
4252
printf("%s\n", #pass); \
4253
pass(nir, ##__VA_ARGS__); \
4254
nir_validate_shader(nir, "after " #pass); \
4255
if (should_print_nir(nir)) \
4256
nir_print_shader(nir, stdout); \
4257
)
4258
4259
#define NIR_SKIP(name) should_skip_nir(#name)
4260
4261
/** An instruction filtering callback with writemask
4262
*
4263
* Returns true if the instruction should be processed with the associated
4264
* writemask and false otherwise.
4265
*/
4266
typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
4267
unsigned writemask, const void *);
4268
4269
/** A simple instruction lowering callback
4270
*
4271
* Many instruction lowering passes can be written as a simple function which
4272
* takes an instruction as its input and returns a sequence of instructions
4273
* that implement the consumed instruction. This function type represents
4274
* such a lowering function. When called, a function with this prototype
4275
* should either return NULL indicating that no lowering needs to be done or
4276
* emit a sequence of instructions using the provided builder (whose cursor
4277
* will already be placed after the instruction to be lowered) and return the
4278
* resulting nir_ssa_def.
4279
*/
4280
typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
4281
nir_instr *, void *);
4282
4283
/**
4284
* Special return value for nir_lower_instr_cb when some progress occurred
4285
* (like changing an input to the instr) that didn't result in a replacement
4286
* SSA def being generated.
4287
*/
4288
#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
4289
4290
/**
4291
* Special return value for nir_lower_instr_cb when some progress occurred
4292
* that should remove the current instruction that doesn't create an output
4293
* (like a store)
4294
*/
4295
4296
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2)
4297
4298
/** Iterate over all the instructions in a nir_function_impl and lower them
4299
* using the provided callbacks
4300
*
4301
* This function implements the guts of a standard lowering pass for you. It
4302
* iterates over all of the instructions in a nir_function_impl and calls the
4303
* filter callback on each one. If the filter callback returns true, it then
4304
* calls the lowering call back on the instruction. (Splitting it this way
4305
* allows us to avoid some save/restore work for instructions we know won't be
4306
* lowered.) If the instruction is dead after the lowering is complete, it
4307
* will be removed. If new instructions are added, the lowering callback will
4308
* also be called on them in case multiple lowerings are required.
4309
*
4310
* If the callback indicates that the original instruction is replaced (either
4311
* through a new SSA def or NIR_LOWER_INSTR_PROGRESS_REPLACE), then the
4312
* instruction is removed along with any now-dead SSA defs it used.
4313
*
4314
* The metadata for the nir_function_impl will also be updated. If any blocks
4315
* are added (they cannot be removed), dominance and block indices will be
4316
* invalidated.
4317
*/
4318
bool nir_function_impl_lower_instructions(nir_function_impl *impl,
4319
nir_instr_filter_cb filter,
4320
nir_lower_instr_cb lower,
4321
void *cb_data);
4322
bool nir_shader_lower_instructions(nir_shader *shader,
4323
nir_instr_filter_cb filter,
4324
nir_lower_instr_cb lower,
4325
void *cb_data);
4326
4327
void nir_calc_dominance_impl(nir_function_impl *impl);
4328
void nir_calc_dominance(nir_shader *shader);
4329
4330
nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
4331
bool nir_block_dominates(nir_block *parent, nir_block *child);
4332
bool nir_block_is_unreachable(nir_block *block);
4333
4334
void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
4335
void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
4336
4337
void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
4338
void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
4339
4340
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
4341
void nir_dump_cfg(nir_shader *shader, FILE *fp);
4342
4343
void nir_gs_count_vertices_and_primitives(const nir_shader *shader,
4344
int *out_vtxcnt,
4345
int *out_prmcnt,
4346
unsigned num_streams);
4347
4348
bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
4349
bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
4350
bool nir_split_var_copies(nir_shader *shader);
4351
bool nir_split_per_member_structs(nir_shader *shader);
4352
bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
4353
4354
bool nir_lower_returns_impl(nir_function_impl *impl);
4355
bool nir_lower_returns(nir_shader *shader);
4356
4357
void nir_inline_function_impl(struct nir_builder *b,
4358
const nir_function_impl *impl,
4359
nir_ssa_def **params,
4360
struct hash_table *shader_var_remap);
4361
bool nir_inline_functions(nir_shader *shader);
4362
4363
void nir_find_inlinable_uniforms(nir_shader *shader);
4364
void nir_inline_uniforms(nir_shader *shader, unsigned num_uniforms,
4365
const uint32_t *uniform_values,
4366
const uint16_t *uniform_dw_offsets);
4367
4368
bool nir_propagate_invariant(nir_shader *shader, bool invariant_prim);
4369
4370
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
4371
void nir_lower_deref_copy_instr(struct nir_builder *b,
4372
nir_intrinsic_instr *copy);
4373
bool nir_lower_var_copies(nir_shader *shader);
4374
4375
bool nir_opt_memcpy(nir_shader *shader);
4376
bool nir_lower_memcpy(nir_shader *shader);
4377
4378
void nir_fixup_deref_modes(nir_shader *shader);
4379
4380
bool nir_lower_global_vars_to_local(nir_shader *shader);
4381
4382
typedef enum {
4383
nir_lower_direct_array_deref_of_vec_load = (1 << 0),
4384
nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
4385
nir_lower_direct_array_deref_of_vec_store = (1 << 2),
4386
nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
4387
} nir_lower_array_deref_of_vec_options;
4388
4389
bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
4390
nir_lower_array_deref_of_vec_options options);
4391
4392
bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
4393
uint32_t max_lower_array_len);
4394
4395
bool nir_lower_indirect_builtin_uniform_derefs(nir_shader *shader);
4396
4397
bool nir_lower_locals_to_regs(nir_shader *shader);
4398
4399
void nir_lower_io_to_temporaries(nir_shader *shader,
4400
nir_function_impl *entrypoint,
4401
bool outputs, bool inputs);
4402
4403
bool nir_lower_vars_to_scratch(nir_shader *shader,
4404
nir_variable_mode modes,
4405
int size_threshold,
4406
glsl_type_size_align_func size_align);
4407
4408
void nir_lower_clip_halfz(nir_shader *shader);
4409
4410
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
4411
4412
void nir_gather_ssa_types(nir_function_impl *impl,
4413
BITSET_WORD *float_types,
4414
BITSET_WORD *int_types);
4415
4416
void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
4417
unsigned *size,
4418
int (*type_size)(const struct glsl_type *, bool));
4419
4420
/* Some helpers to do very simple linking */
4421
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
4422
bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
4423
uint64_t *used_by_other_stage,
4424
uint64_t *used_by_other_stage_patches);
4425
void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
4426
bool default_to_smooth_interp);
4427
void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
4428
bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
4429
void nir_link_varying_precision(nir_shader *producer, nir_shader *consumer);
4430
4431
bool nir_lower_amul(nir_shader *shader,
4432
int (*type_size)(const struct glsl_type *, bool));
4433
4434
bool nir_lower_ubo_vec4(nir_shader *shader);
4435
4436
void nir_assign_io_var_locations(nir_shader *shader,
4437
nir_variable_mode mode,
4438
unsigned *size,
4439
gl_shader_stage stage);
4440
4441
typedef struct {
4442
uint8_t num_linked_io_vars;
4443
uint8_t num_linked_patch_io_vars;
4444
} nir_linked_io_var_info;
4445
4446
nir_linked_io_var_info
4447
nir_assign_linked_io_var_locations(nir_shader *producer,
4448
nir_shader *consumer);
4449
4450
typedef enum {
4451
/* If set, this causes all 64-bit IO operations to be lowered on-the-fly
4452
* to 32-bit operations. This is only valid for nir_var_shader_in/out
4453
* modes.
4454
*/
4455
nir_lower_io_lower_64bit_to_32 = (1 << 0),
4456
4457
/* If set, this forces all non-flat fragment shader inputs to be
4458
* interpolated as if with the "sample" qualifier. This requires
4459
* nir_shader_compiler_options::use_interpolated_input_intrinsics.
4460
*/
4461
nir_lower_io_force_sample_interpolation = (1 << 1),
4462
} nir_lower_io_options;
4463
bool nir_lower_io(nir_shader *shader,
4464
nir_variable_mode modes,
4465
int (*type_size)(const struct glsl_type *, bool),
4466
nir_lower_io_options);
4467
4468
bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
4469
4470
bool
4471
nir_lower_vars_to_explicit_types(nir_shader *shader,
4472
nir_variable_mode modes,
4473
glsl_type_size_align_func type_info);
4474
void
4475
nir_gather_explicit_io_initializers(nir_shader *shader,
4476
void *dst, size_t dst_size,
4477
nir_variable_mode mode);
4478
4479
bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes);
4480
4481
typedef enum {
4482
/**
4483
* An address format which is a simple 32-bit global GPU address.
4484
*/
4485
nir_address_format_32bit_global,
4486
4487
/**
4488
* An address format which is a simple 64-bit global GPU address.
4489
*/
4490
nir_address_format_64bit_global,
4491
4492
/**
4493
* An address format which is a 64-bit global base address and a 32-bit
4494
* offset.
4495
*
4496
* The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4497
* address stored with the low bits in .x and high bits in .y, .z is
4498
* undefined, and .w is an offset. This is intended to match
4499
* 64bit_bounded_global but without the bounds checking.
4500
*/
4501
nir_address_format_64bit_global_32bit_offset,
4502
4503
/**
4504
* An address format which is a bounds-checked 64-bit global GPU address.
4505
*
4506
* The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
4507
* address stored with the low bits in .x and high bits in .y, .z is a
4508
* size, and .w is an offset. When the final I/O operation is lowered, .w
4509
* is checked against .z and the operation is predicated on the result.
4510
*/
4511
nir_address_format_64bit_bounded_global,
4512
4513
/**
4514
* An address format which is comprised of a vec2 where the first
4515
* component is a buffer index and the second is an offset.
4516
*/
4517
nir_address_format_32bit_index_offset,
4518
4519
/**
4520
* An address format which is a 64-bit value, where the high 32 bits
4521
* are a buffer index, and the low 32 bits are an offset.
4522
*/
4523
nir_address_format_32bit_index_offset_pack64,
4524
4525
/**
4526
* An address format which is comprised of a vec3 where the first two
4527
* components specify the buffer and the third is an offset.
4528
*/
4529
nir_address_format_vec2_index_32bit_offset,
4530
4531
/**
4532
* An address format which represents generic pointers with a 62-bit
4533
* pointer and a 2-bit enum in the top two bits. The top two bits have
4534
* the following meanings:
4535
*
4536
* - 0x0: Global memory
4537
* - 0x1: Shared memory
4538
* - 0x2: Scratch memory
4539
* - 0x3: Global memory
4540
*
4541
* The redundancy between 0x0 and 0x3 is because of Intel sign-extension of
4542
* addresses. Valid global memory addresses may naturally have either 0 or
4543
* ~0 as their high bits.
4544
*
4545
* Shared and scratch pointers are represented as 32-bit offsets with the
4546
* top 32 bits only being used for the enum. This allows us to avoid
4547
* 64-bit address calculations in a bunch of cases.
4548
*/
4549
nir_address_format_62bit_generic,
4550
4551
/**
4552
* An address format which is a simple 32-bit offset.
4553
*/
4554
nir_address_format_32bit_offset,
4555
4556
/**
4557
* An address format which is a simple 32-bit offset cast to 64-bit.
4558
*/
4559
nir_address_format_32bit_offset_as_64bit,
4560
4561
/**
4562
* An address format representing a purely logical addressing model. In
4563
* this model, all deref chains must be complete from the dereference
4564
* operation to the variable. Cast derefs are not allowed. These
4565
* addresses will be 32-bit scalars but the format is immaterial because
4566
* you can always chase the chain.
4567
*/
4568
nir_address_format_logical,
4569
} nir_address_format;
4570
4571
static inline unsigned
4572
nir_address_format_bit_size(nir_address_format addr_format)
4573
{
4574
switch (addr_format) {
4575
case nir_address_format_32bit_global: return 32;
4576
case nir_address_format_64bit_global: return 64;
4577
case nir_address_format_64bit_global_32bit_offset: return 32;
4578
case nir_address_format_64bit_bounded_global: return 32;
4579
case nir_address_format_32bit_index_offset: return 32;
4580
case nir_address_format_32bit_index_offset_pack64: return 64;
4581
case nir_address_format_vec2_index_32bit_offset: return 32;
4582
case nir_address_format_62bit_generic: return 64;
4583
case nir_address_format_32bit_offset: return 32;
4584
case nir_address_format_32bit_offset_as_64bit: return 64;
4585
case nir_address_format_logical: return 32;
4586
}
4587
unreachable("Invalid address format");
4588
}
4589
4590
static inline unsigned
4591
nir_address_format_num_components(nir_address_format addr_format)
4592
{
4593
switch (addr_format) {
4594
case nir_address_format_32bit_global: return 1;
4595
case nir_address_format_64bit_global: return 1;
4596
case nir_address_format_64bit_global_32bit_offset: return 4;
4597
case nir_address_format_64bit_bounded_global: return 4;
4598
case nir_address_format_32bit_index_offset: return 2;
4599
case nir_address_format_32bit_index_offset_pack64: return 1;
4600
case nir_address_format_vec2_index_32bit_offset: return 3;
4601
case nir_address_format_62bit_generic: return 1;
4602
case nir_address_format_32bit_offset: return 1;
4603
case nir_address_format_32bit_offset_as_64bit: return 1;
4604
case nir_address_format_logical: return 1;
4605
}
4606
unreachable("Invalid address format");
4607
}
4608
4609
static inline const struct glsl_type *
4610
nir_address_format_to_glsl_type(nir_address_format addr_format)
4611
{
4612
unsigned bit_size = nir_address_format_bit_size(addr_format);
4613
assert(bit_size == 32 || bit_size == 64);
4614
return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
4615
nir_address_format_num_components(addr_format));
4616
}
4617
4618
const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
4619
4620
nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4621
nir_address_format addr_format);
4622
4623
nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
4624
nir_address_format addr_format);
4625
4626
nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b,
4627
nir_deref_instr *deref,
4628
nir_ssa_def *base_addr,
4629
nir_address_format addr_format);
4630
4631
bool nir_get_explicit_deref_align(nir_deref_instr *deref,
4632
bool default_to_type_align,
4633
uint32_t *align_mul,
4634
uint32_t *align_offset);
4635
4636
void nir_lower_explicit_io_instr(struct nir_builder *b,
4637
nir_intrinsic_instr *io_instr,
4638
nir_ssa_def *addr,
4639
nir_address_format addr_format);
4640
4641
bool nir_lower_explicit_io(nir_shader *shader,
4642
nir_variable_mode modes,
4643
nir_address_format);
4644
4645
bool
4646
nir_lower_shader_calls(nir_shader *shader,
4647
nir_address_format address_format,
4648
unsigned stack_alignment,
4649
nir_shader ***resume_shaders_out,
4650
uint32_t *num_resume_shaders_out,
4651
void *mem_ctx);
4652
4653
nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
4654
nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
4655
nir_src *nir_get_shader_call_payload_src(nir_intrinsic_instr *call);
4656
4657
bool nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage);
4658
4659
bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl);
4660
bool nir_lower_regs_to_ssa(nir_shader *shader);
4661
bool nir_lower_vars_to_ssa(nir_shader *shader);
4662
4663
bool nir_remove_dead_derefs(nir_shader *shader);
4664
bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
4665
4666
typedef struct nir_remove_dead_variables_options {
4667
bool (*can_remove_var)(nir_variable *var, void *data);
4668
void *can_remove_var_data;
4669
} nir_remove_dead_variables_options;
4670
4671
bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
4672
const nir_remove_dead_variables_options *options);
4673
4674
bool nir_lower_variable_initializers(nir_shader *shader,
4675
nir_variable_mode modes);
4676
bool nir_zero_initialize_shared_memory(nir_shader *shader,
4677
const unsigned shared_size,
4678
const unsigned chunk_size);
4679
4680
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
4681
bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
4682
const void *_data);
4683
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
4684
bool alpha_to_one,
4685
const gl_state_index16 *alpha_ref_state_tokens);
4686
bool nir_lower_alu(nir_shader *shader);
4687
4688
bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
4689
bool always_precise);
4690
4691
bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
4692
bool nir_lower_bool_to_bitsize(nir_shader *shader);
4693
bool nir_lower_bool_to_float(nir_shader *shader);
4694
bool nir_lower_bool_to_int32(nir_shader *shader);
4695
bool nir_opt_simplify_convert_alu_types(nir_shader *shader);
4696
bool nir_lower_convert_alu_types(nir_shader *shader,
4697
bool (*should_lower)(nir_intrinsic_instr *));
4698
bool nir_lower_constant_convert_alu_types(nir_shader *shader);
4699
bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader);
4700
bool nir_lower_int_to_float(nir_shader *shader);
4701
bool nir_lower_load_const_to_scalar(nir_shader *shader);
4702
bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
4703
bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all);
4704
void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
4705
void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
4706
bool outputs_only);
4707
void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
4708
bool nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
4709
bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
4710
bool nir_vectorize_tess_levels(nir_shader *shader);
4711
4712
bool nir_lower_fragcolor(nir_shader *shader, unsigned max_cbufs);
4713
bool nir_lower_fragcoord_wtrans(nir_shader *shader);
4714
void nir_lower_viewport_transform(nir_shader *shader);
4715
bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
4716
4717
bool nir_lower_is_helper_invocation(nir_shader *shader);
4718
4719
typedef struct nir_lower_subgroups_options {
4720
uint8_t subgroup_size;
4721
uint8_t ballot_bit_size;
4722
uint8_t ballot_components;
4723
bool lower_to_scalar:1;
4724
bool lower_vote_trivial:1;
4725
bool lower_vote_eq:1;
4726
bool lower_subgroup_masks:1;
4727
bool lower_shuffle:1;
4728
bool lower_shuffle_to_32bit:1;
4729
bool lower_shuffle_to_swizzle_amd:1;
4730
bool lower_quad:1;
4731
bool lower_quad_broadcast_dynamic:1;
4732
bool lower_quad_broadcast_dynamic_to_const:1;
4733
bool lower_elect:1;
4734
bool lower_read_invocation_to_cond:1;
4735
} nir_lower_subgroups_options;
4736
4737
bool nir_lower_subgroups(nir_shader *shader,
4738
const nir_lower_subgroups_options *options);
4739
4740
bool nir_lower_system_values(nir_shader *shader);
4741
4742
typedef struct nir_lower_compute_system_values_options {
4743
bool has_base_global_invocation_id:1;
4744
bool has_base_workgroup_id:1;
4745
bool shuffle_local_ids_for_quad_derivatives:1;
4746
bool lower_local_invocation_index:1;
4747
} nir_lower_compute_system_values_options;
4748
4749
bool nir_lower_compute_system_values(nir_shader *shader,
4750
const nir_lower_compute_system_values_options *options);
4751
4752
enum PACKED nir_lower_tex_packing {
4753
nir_lower_tex_packing_none = 0,
4754
/* The sampler returns up to 2 32-bit words of half floats or 16-bit signed
4755
* or unsigned ints based on the sampler type
4756
*/
4757
nir_lower_tex_packing_16,
4758
/* The sampler returns 1 32-bit word of 4x8 unorm */
4759
nir_lower_tex_packing_8,
4760
};
4761
4762
typedef struct nir_lower_tex_options {
4763
/**
4764
* bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
4765
* sampler types a texture projector is lowered.
4766
*/
4767
unsigned lower_txp;
4768
4769
/**
4770
* If true, lower away nir_tex_src_offset for all texelfetch instructions.
4771
*/
4772
bool lower_txf_offset;
4773
4774
/**
4775
* If true, lower away nir_tex_src_offset for all rect textures.
4776
*/
4777
bool lower_rect_offset;
4778
4779
/**
4780
* If true, lower rect textures to 2D, using txs to fetch the
4781
* texture dimensions and dividing the texture coords by the
4782
* texture dims to normalize.
4783
*/
4784
bool lower_rect;
4785
4786
/**
4787
* If true, convert yuv to rgb.
4788
*/
4789
unsigned lower_y_uv_external;
4790
unsigned lower_y_u_v_external;
4791
unsigned lower_yx_xuxv_external;
4792
unsigned lower_xy_uxvx_external;
4793
unsigned lower_ayuv_external;
4794
unsigned lower_xyuv_external;
4795
unsigned lower_yuv_external;
4796
unsigned lower_yu_yv_external;
4797
unsigned lower_y41x_external;
4798
unsigned bt709_external;
4799
unsigned bt2020_external;
4800
4801
/**
4802
* To emulate certain texture wrap modes, this can be used
4803
* to saturate the specified tex coord to [0.0, 1.0]. The
4804
* bits are according to sampler #, ie. if, for example:
4805
*
4806
* (conf->saturate_s & (1 << n))
4807
*
4808
* is true, then the s coord for sampler n is saturated.
4809
*
4810
* Note that clamping must happen *after* projector lowering
4811
* so any projected texture sample instruction with a clamped
4812
* coordinate gets automatically lowered, regardless of the
4813
* 'lower_txp' setting.
4814
*/
4815
unsigned saturate_s;
4816
unsigned saturate_t;
4817
unsigned saturate_r;
4818
4819
/* Bitmask of textures that need swizzling.
4820
*
4821
* If (swizzle_result & (1 << texture_index)), then the swizzle in
4822
* swizzles[texture_index] is applied to the result of the texturing
4823
* operation.
4824
*/
4825
unsigned swizzle_result;
4826
4827
/* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles
4828
* while 4 and 5 represent 0 and 1 respectively.
4829
*
4830
* Indexed by texture-id.
4831
*/
4832
uint8_t swizzles[32][4];
4833
4834
/* Can be used to scale sampled values in range required by the
4835
* format.
4836
*
4837
* Indexed by texture-id.
4838
*/
4839
float scale_factors[32];
4840
4841
/**
4842
* Bitmap of textures that need srgb to linear conversion. If
4843
* (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
4844
* of the texture are lowered to linear.
4845
*/
4846
unsigned lower_srgb;
4847
4848
/**
4849
* If true, lower nir_texop_txd on cube maps with nir_texop_txl.
4850
*/
4851
bool lower_txd_cube_map;
4852
4853
/**
4854
* If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
4855
*/
4856
bool lower_txd_3d;
4857
4858
/**
4859
* If true, lower nir_texop_txd on shadow samplers (except cube maps)
4860
* with nir_texop_txl. Notice that cube map shadow samplers are lowered
4861
* with lower_txd_cube_map.
4862
*/
4863
bool lower_txd_shadow;
4864
4865
/**
4866
* If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
4867
* Implies lower_txd_cube_map and lower_txd_shadow.
4868
*/
4869
bool lower_txd;
4870
4871
/**
4872
* If true, lower nir_texop_txb that try to use shadow compare and min_lod
4873
* at the same time to a nir_texop_lod, some math, and nir_texop_tex.
4874
*/
4875
bool lower_txb_shadow_clamp;
4876
4877
/**
4878
* If true, lower nir_texop_txd on shadow samplers when it uses min_lod
4879
* with nir_texop_txl. This includes cube maps.
4880
*/
4881
bool lower_txd_shadow_clamp;
4882
4883
/**
4884
* If true, lower nir_texop_txd on when it uses both offset and min_lod
4885
* with nir_texop_txl. This includes cube maps.
4886
*/
4887
bool lower_txd_offset_clamp;
4888
4889
/**
4890
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
4891
* sampler is bindless.
4892
*/
4893
bool lower_txd_clamp_bindless_sampler;
4894
4895
/**
4896
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
4897
* sampler index is not statically determinable to be less than 16.
4898
*/
4899
bool lower_txd_clamp_if_sampler_index_not_lt_16;
4900
4901
/**
4902
* If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
4903
* 0-lod followed by a nir_ishr.
4904
*/
4905
bool lower_txs_lod;
4906
4907
/**
4908
* If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
4909
* mixed-up tg4 locations.
4910
*/
4911
bool lower_tg4_broadcom_swizzle;
4912
4913
/**
4914
* If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
4915
*/
4916
bool lower_tg4_offsets;
4917
4918
/**
4919
* To lower packed sampler return formats.
4920
*
4921
* Indexed by sampler-id.
4922
*/
4923
enum nir_lower_tex_packing lower_tex_packing[32];
4924
} nir_lower_tex_options;
4925
4926
bool nir_lower_tex(nir_shader *shader,
4927
const nir_lower_tex_options *options);
4928
4929
bool nir_lower_readonly_images_to_tex(nir_shader *shader, bool per_variable);
4930
4931
enum nir_lower_non_uniform_access_type {
4932
nir_lower_non_uniform_ubo_access = (1 << 0),
4933
nir_lower_non_uniform_ssbo_access = (1 << 1),
4934
nir_lower_non_uniform_texture_access = (1 << 2),
4935
nir_lower_non_uniform_image_access = (1 << 3),
4936
};
4937
4938
/* Given the nir_src used for the resource, return the channels which might be non-uniform. */
4939
typedef nir_component_mask_t (*nir_lower_non_uniform_access_callback)(const nir_src *, void *);
4940
4941
typedef struct nir_lower_non_uniform_access_options {
4942
enum nir_lower_non_uniform_access_type types;
4943
nir_lower_non_uniform_access_callback callback;
4944
void *callback_data;
4945
} nir_lower_non_uniform_access_options;
4946
4947
bool nir_lower_non_uniform_access(nir_shader *shader,
4948
const nir_lower_non_uniform_access_options *options);
4949
4950
typedef struct {
4951
/* If true, a 32-bit division lowering based on NV50LegalizeSSA::handleDIV()
4952
* is used. It is the faster of the two but it is not exact in some cases
4953
* (for example, 1091317713u / 1034u gives 5209173 instead of 1055432).
4954
*
4955
* If false, a lowering based on AMDGPUTargetLowering::LowerUDIVREM() and
4956
* AMDGPUTargetLowering::LowerSDIVREM() is used. It requires more
4957
* instructions than the nv50 path and many of them are integer
4958
* multiplications, so it is probably slower. It should always return the
4959
* correct result, though.
4960
*/
4961
bool imprecise_32bit_lowering;
4962
4963
/* Whether 16-bit floating point arithmetic should be allowed in 8-bit
4964
* division lowering
4965
*/
4966
bool allow_fp16;
4967
} nir_lower_idiv_options;
4968
4969
bool nir_lower_idiv(nir_shader *shader, const nir_lower_idiv_options *options);
4970
4971
typedef struct nir_input_attachment_options {
4972
bool use_fragcoord_sysval;
4973
bool use_layer_id_sysval;
4974
bool use_view_id_for_layer;
4975
} nir_input_attachment_options;
4976
4977
bool nir_lower_input_attachments(nir_shader *shader,
4978
const nir_input_attachment_options *options);
4979
4980
bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
4981
bool use_vars,
4982
bool use_clipdist_array,
4983
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
4984
bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
4985
bool use_clipdist_array,
4986
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
4987
bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
4988
bool use_clipdist_array);
4989
bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
4990
bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
4991
4992
void nir_lower_point_size_mov(nir_shader *shader,
4993
const gl_state_index16 *pointsize_state_tokens);
4994
4995
bool nir_lower_frexp(nir_shader *nir);
4996
4997
void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
4998
4999
bool nir_lower_clamp_color_outputs(nir_shader *shader);
5000
5001
bool nir_lower_flatshade(nir_shader *shader);
5002
5003
void nir_lower_passthrough_edgeflags(nir_shader *shader);
5004
bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
5005
const gl_state_index16 *uniform_state_tokens);
5006
5007
typedef struct nir_lower_wpos_ytransform_options {
5008
gl_state_index16 state_tokens[STATE_LENGTH];
5009
bool fs_coord_origin_upper_left :1;
5010
bool fs_coord_origin_lower_left :1;
5011
bool fs_coord_pixel_center_integer :1;
5012
bool fs_coord_pixel_center_half_integer :1;
5013
} nir_lower_wpos_ytransform_options;
5014
5015
bool nir_lower_wpos_ytransform(nir_shader *shader,
5016
const nir_lower_wpos_ytransform_options *options);
5017
bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
5018
5019
bool nir_lower_pntc_ytransform(nir_shader *shader,
5020
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
5021
5022
bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
5023
5024
bool nir_lower_fb_read(nir_shader *shader);
5025
5026
typedef struct nir_lower_drawpixels_options {
5027
gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
5028
gl_state_index16 scale_state_tokens[STATE_LENGTH];
5029
gl_state_index16 bias_state_tokens[STATE_LENGTH];
5030
unsigned drawpix_sampler;
5031
unsigned pixelmap_sampler;
5032
bool pixel_maps :1;
5033
bool scale_and_bias :1;
5034
} nir_lower_drawpixels_options;
5035
5036
void nir_lower_drawpixels(nir_shader *shader,
5037
const nir_lower_drawpixels_options *options);
5038
5039
typedef struct nir_lower_bitmap_options {
5040
unsigned sampler;
5041
bool swizzle_xxxx;
5042
} nir_lower_bitmap_options;
5043
5044
void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
5045
5046
bool nir_lower_atomics_to_ssbo(nir_shader *shader);
5047
5048
typedef enum {
5049
nir_lower_int_source_mods = 1 << 0,
5050
nir_lower_float_source_mods = 1 << 1,
5051
nir_lower_64bit_source_mods = 1 << 2,
5052
nir_lower_triop_abs = 1 << 3,
5053
nir_lower_all_source_mods = (1 << 4) - 1
5054
} nir_lower_to_source_mods_flags;
5055
5056
5057
bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
5058
5059
typedef enum {
5060
nir_lower_gs_intrinsics_per_stream = 1 << 0,
5061
nir_lower_gs_intrinsics_count_primitives = 1 << 1,
5062
nir_lower_gs_intrinsics_count_vertices_per_primitive = 1 << 2,
5063
nir_lower_gs_intrinsics_overwrite_incomplete = 1 << 3,
5064
} nir_lower_gs_intrinsics_flags;
5065
5066
bool nir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options);
5067
5068
typedef unsigned (*nir_lower_bit_size_callback)(const nir_instr *, void *);
5069
5070
bool nir_lower_bit_size(nir_shader *shader,
5071
nir_lower_bit_size_callback callback,
5072
void *callback_data);
5073
bool nir_lower_64bit_phis(nir_shader *shader);
5074
5075
nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
5076
bool nir_lower_int64(nir_shader *shader);
5077
5078
nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
5079
bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
5080
nir_lower_doubles_options options);
5081
bool nir_lower_pack(nir_shader *shader);
5082
5083
bool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes);
5084
bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
5085
uint64_t varying_mask, bool use_16bit_slots);
5086
bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
5087
nir_alu_type types);
5088
bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
5089
bool nir_fold_16bit_sampler_conversions(nir_shader *nir,
5090
unsigned tex_src_types);
5091
5092
typedef struct {
5093
bool legalize_type; /* whether this src should be legalized */
5094
uint8_t bit_size; /* bit_size to enforce */
5095
nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */
5096
} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types];
5097
5098
bool nir_legalize_16bit_sampler_srcs(nir_shader *nir,
5099
nir_tex_src_type_constraints constraints);
5100
5101
bool nir_lower_point_size(nir_shader *shader, float min, float max);
5102
5103
void nir_lower_texcoord_replace(nir_shader *s, unsigned coord_replace,
5104
bool point_coord_is_sysval, bool yinvert);
5105
5106
typedef enum {
5107
nir_lower_interpolation_at_sample = (1 << 1),
5108
nir_lower_interpolation_at_offset = (1 << 2),
5109
nir_lower_interpolation_centroid = (1 << 3),
5110
nir_lower_interpolation_pixel = (1 << 4),
5111
nir_lower_interpolation_sample = (1 << 5),
5112
} nir_lower_interpolation_options;
5113
5114
bool nir_lower_interpolation(nir_shader *shader,
5115
nir_lower_interpolation_options options);
5116
5117
bool nir_lower_discard_or_demote(nir_shader *shader,
5118
bool force_correct_quad_ops_after_discard);
5119
5120
bool nir_lower_memory_model(nir_shader *shader);
5121
5122
bool nir_lower_goto_ifs(nir_shader *shader);
5123
5124
bool nir_shader_uses_view_index(nir_shader *shader);
5125
bool nir_can_lower_multiview(nir_shader *shader);
5126
bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
5127
5128
bool nir_lower_fp16_casts(nir_shader *shader);
5129
bool nir_normalize_cubemap_coords(nir_shader *shader);
5130
5131
void nir_live_ssa_defs_impl(nir_function_impl *impl);
5132
5133
const BITSET_WORD *nir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx);
5134
5135
void nir_loop_analyze_impl(nir_function_impl *impl,
5136
nir_variable_mode indirect_mask);
5137
5138
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
5139
5140
bool nir_repair_ssa_impl(nir_function_impl *impl);
5141
bool nir_repair_ssa(nir_shader *shader);
5142
5143
void nir_convert_loop_to_lcssa(nir_loop *loop);
5144
bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
5145
void nir_divergence_analysis(nir_shader *shader);
5146
bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
5147
5148
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
5149
* registers. If false, convert all values (even those not involved in a phi
5150
* node) to registers.
5151
*/
5152
bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
5153
5154
bool nir_lower_phis_to_regs_block(nir_block *block);
5155
bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
5156
bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
5157
5158
bool nir_lower_samplers(nir_shader *shader);
5159
bool nir_lower_ssbo(nir_shader *shader);
5160
5161
typedef struct nir_lower_printf_options {
5162
bool treat_doubles_as_floats : 1;
5163
unsigned max_buffer_size;
5164
} nir_lower_printf_options;
5165
5166
bool nir_lower_printf(nir_shader *nir, const nir_lower_printf_options *options);
5167
5168
/* This is here for unit tests. */
5169
bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
5170
5171
bool nir_opt_comparison_pre(nir_shader *shader);
5172
5173
typedef struct nir_opt_access_options {
5174
bool is_vulkan;
5175
bool infer_non_readable;
5176
} nir_opt_access_options;
5177
5178
bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options);
5179
bool nir_opt_algebraic(nir_shader *shader);
5180
bool nir_opt_algebraic_before_ffma(nir_shader *shader);
5181
bool nir_opt_algebraic_late(nir_shader *shader);
5182
bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
5183
bool nir_opt_constant_folding(nir_shader *shader);
5184
5185
/* Try to combine a and b into a. Return true if combination was possible,
5186
* which will result in b being removed by the pass. Return false if
5187
* combination wasn't possible.
5188
*/
5189
typedef bool (*nir_combine_memory_barrier_cb)(
5190
nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
5191
5192
bool nir_opt_combine_memory_barriers(nir_shader *shader,
5193
nir_combine_memory_barrier_cb combine_cb,
5194
void *data);
5195
5196
bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
5197
5198
bool nir_copy_prop(nir_shader *shader);
5199
5200
bool nir_opt_copy_prop_vars(nir_shader *shader);
5201
5202
bool nir_opt_cse(nir_shader *shader);
5203
5204
bool nir_opt_dce(nir_shader *shader);
5205
5206
bool nir_opt_dead_cf(nir_shader *shader);
5207
5208
bool nir_opt_dead_write_vars(nir_shader *shader);
5209
5210
bool nir_opt_deref_impl(nir_function_impl *impl);
5211
bool nir_opt_deref(nir_shader *shader);
5212
5213
bool nir_opt_find_array_copies(nir_shader *shader);
5214
5215
bool nir_opt_gcm(nir_shader *shader, bool value_number);
5216
5217
bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
5218
5219
bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
5220
5221
bool nir_opt_intrinsics(nir_shader *shader);
5222
5223
bool nir_opt_large_constants(nir_shader *shader,
5224
glsl_type_size_align_func size_align,
5225
unsigned threshold);
5226
5227
bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
5228
5229
typedef enum {
5230
nir_move_const_undef = (1 << 0),
5231
nir_move_load_ubo = (1 << 1),
5232
nir_move_load_input = (1 << 2),
5233
nir_move_comparisons = (1 << 3),
5234
nir_move_copies = (1 << 4),
5235
nir_move_load_ssbo = (1 << 5),
5236
} nir_move_options;
5237
5238
bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
5239
5240
bool nir_opt_sink(nir_shader *shader, nir_move_options options);
5241
5242
bool nir_opt_move(nir_shader *shader, nir_move_options options);
5243
5244
bool nir_opt_offsets(nir_shader *shader);
5245
5246
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
5247
bool indirect_load_ok, bool expensive_alu_ok);
5248
5249
bool nir_opt_rematerialize_compares(nir_shader *shader);
5250
5251
bool nir_opt_remove_phis(nir_shader *shader);
5252
bool nir_opt_remove_phis_block(nir_block *block);
5253
5254
bool nir_opt_phi_precision(nir_shader *shader);
5255
5256
bool nir_opt_shrink_vectors(nir_shader *shader, bool shrink_image_store);
5257
5258
bool nir_opt_trivial_continues(nir_shader *shader);
5259
5260
bool nir_opt_undef(nir_shader *shader);
5261
5262
bool nir_lower_undef_to_zero(nir_shader *shader);
5263
5264
bool nir_opt_uniform_atomics(nir_shader *shader);
5265
5266
typedef bool (*nir_opt_vectorize_cb)(const nir_instr *instr, void *data);
5267
5268
bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
5269
void *data);
5270
5271
bool nir_opt_conditional_discard(nir_shader *shader);
5272
bool nir_opt_move_discards_to_top(nir_shader *shader);
5273
5274
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
5275
unsigned align_offset,
5276
unsigned bit_size,
5277
unsigned num_components,
5278
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
5279
void *data);
5280
5281
typedef struct {
5282
nir_should_vectorize_mem_func callback;
5283
nir_variable_mode modes;
5284
nir_variable_mode robust_modes;
5285
void *cb_data;
5286
} nir_load_store_vectorize_options;
5287
5288
bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options);
5289
5290
void nir_sweep(nir_shader *shader);
5291
5292
void nir_remap_dual_slot_attributes(nir_shader *shader,
5293
uint64_t *dual_slot_inputs);
5294
uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
5295
5296
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
5297
gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
5298
5299
static inline bool
5300
nir_variable_is_in_ubo(const nir_variable *var)
5301
{
5302
return (var->data.mode == nir_var_mem_ubo &&
5303
var->interface_type != NULL);
5304
}
5305
5306
static inline bool
5307
nir_variable_is_in_ssbo(const nir_variable *var)
5308
{
5309
return (var->data.mode == nir_var_mem_ssbo &&
5310
var->interface_type != NULL);
5311
}
5312
5313
static inline bool
5314
nir_variable_is_in_block(const nir_variable *var)
5315
{
5316
return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
5317
}
5318
5319
typedef struct nir_unsigned_upper_bound_config {
5320
unsigned min_subgroup_size;
5321
unsigned max_subgroup_size;
5322
unsigned max_workgroup_invocations;
5323
unsigned max_workgroup_count[3];
5324
unsigned max_workgroup_size[3];
5325
5326
uint32_t vertex_attrib_max[32];
5327
} nir_unsigned_upper_bound_config;
5328
5329
uint32_t
5330
nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
5331
nir_ssa_scalar scalar,
5332
const nir_unsigned_upper_bound_config *config);
5333
5334
bool
5335
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
5336
nir_ssa_scalar ssa, unsigned const_val,
5337
const nir_unsigned_upper_bound_config *config);
5338
5339
#include "nir_inline_helpers.h"
5340
5341
#ifdef __cplusplus
5342
} /* extern "C" */
5343
#endif
5344
5345
#endif /* NIR_H */
5346
5347