Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_divergence_analysis.c
4546 views
1
/*
2
* Copyright © 2018 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
25
#include "nir.h"
26
27
/* This pass computes for each ssa definition if it is uniform.
28
* That is, the variable has the same value for all invocations
29
* of the group.
30
*
31
* This divergence analysis pass expects the shader to be in LCSSA-form.
32
*
33
* This algorithm implements "The Simple Divergence Analysis" from
34
* Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35
* Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36
* ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37
*/
38
39
struct divergence_state {
40
const gl_shader_stage stage;
41
nir_shader *shader;
42
43
/** current control flow state */
44
/* True if some loop-active invocations might take a different control-flow path.
45
* A divergent break does not cause subsequent control-flow to be considered
46
* divergent because those invocations are no longer active in the loop.
47
* For a divergent if, both sides are considered divergent flow because
48
* the other side is still loop-active. */
49
bool divergent_loop_cf;
50
/* True if a divergent continue happened since the loop header */
51
bool divergent_loop_continue;
52
/* True if a divergent break happened since the loop header */
53
bool divergent_loop_break;
54
55
/* True if we visit the block for the fist time */
56
bool first_visit;
57
};
58
59
static bool
60
visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62
static bool
63
visit_alu(nir_alu_instr *instr)
64
{
65
if (instr->dest.dest.ssa.divergent)
66
return false;
67
68
unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70
for (unsigned i = 0; i < num_src; i++) {
71
if (instr->src[i].src.ssa->divergent) {
72
instr->dest.dest.ssa.divergent = true;
73
return true;
74
}
75
}
76
77
return false;
78
}
79
80
static bool
81
visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82
{
83
if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84
return false;
85
86
if (instr->dest.ssa.divergent)
87
return false;
88
89
nir_divergence_options options = shader->options->divergence_analysis_options;
90
gl_shader_stage stage = shader->info.stage;
91
bool is_divergent = false;
92
switch (instr->intrinsic) {
93
/* Intrinsics which are always uniform */
94
case nir_intrinsic_shader_clock:
95
case nir_intrinsic_ballot:
96
case nir_intrinsic_read_invocation:
97
case nir_intrinsic_read_first_invocation:
98
case nir_intrinsic_vote_any:
99
case nir_intrinsic_vote_all:
100
case nir_intrinsic_vote_feq:
101
case nir_intrinsic_vote_ieq:
102
case nir_intrinsic_load_push_constant:
103
case nir_intrinsic_load_work_dim:
104
case nir_intrinsic_load_num_workgroups:
105
case nir_intrinsic_load_workgroup_size:
106
case nir_intrinsic_load_subgroup_id:
107
case nir_intrinsic_load_num_subgroups:
108
case nir_intrinsic_load_subgroup_size:
109
case nir_intrinsic_load_subgroup_eq_mask:
110
case nir_intrinsic_load_subgroup_ge_mask:
111
case nir_intrinsic_load_subgroup_gt_mask:
112
case nir_intrinsic_load_subgroup_le_mask:
113
case nir_intrinsic_load_subgroup_lt_mask:
114
case nir_intrinsic_first_invocation:
115
case nir_intrinsic_last_invocation:
116
case nir_intrinsic_load_base_instance:
117
case nir_intrinsic_load_base_vertex:
118
case nir_intrinsic_load_first_vertex:
119
case nir_intrinsic_load_draw_id:
120
case nir_intrinsic_load_is_indexed_draw:
121
case nir_intrinsic_load_viewport_scale:
122
case nir_intrinsic_load_user_clip_plane:
123
case nir_intrinsic_load_viewport_x_scale:
124
case nir_intrinsic_load_viewport_y_scale:
125
case nir_intrinsic_load_viewport_z_scale:
126
case nir_intrinsic_load_viewport_offset:
127
case nir_intrinsic_load_viewport_x_offset:
128
case nir_intrinsic_load_viewport_y_offset:
129
case nir_intrinsic_load_viewport_z_offset:
130
case nir_intrinsic_load_blend_const_color_a_float:
131
case nir_intrinsic_load_blend_const_color_b_float:
132
case nir_intrinsic_load_blend_const_color_g_float:
133
case nir_intrinsic_load_blend_const_color_r_float:
134
case nir_intrinsic_load_blend_const_color_rgba:
135
case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
136
case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
137
case nir_intrinsic_load_line_width:
138
case nir_intrinsic_load_aa_line_width:
139
case nir_intrinsic_load_fb_layers_v3d:
140
case nir_intrinsic_load_tcs_num_patches_amd:
141
case nir_intrinsic_load_ring_tess_factors_amd:
142
case nir_intrinsic_load_ring_tess_offchip_amd:
143
case nir_intrinsic_load_ring_tess_factors_offset_amd:
144
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
145
case nir_intrinsic_load_ring_esgs_amd:
146
case nir_intrinsic_load_ring_es2gs_offset_amd:
147
case nir_intrinsic_load_sample_positions_pan:
148
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
149
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
150
case nir_intrinsic_load_shader_query_enabled_amd:
151
case nir_intrinsic_load_cull_front_face_enabled_amd:
152
case nir_intrinsic_load_cull_back_face_enabled_amd:
153
case nir_intrinsic_load_cull_ccw_amd:
154
case nir_intrinsic_load_cull_small_primitives_enabled_amd:
155
case nir_intrinsic_load_cull_any_enabled_amd:
156
case nir_intrinsic_load_cull_small_prim_precision_amd:
157
is_divergent = false;
158
break;
159
160
/* Intrinsics with divergence depending on shader stage and hardware */
161
case nir_intrinsic_load_frag_shading_rate:
162
is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
163
break;
164
case nir_intrinsic_load_input:
165
is_divergent = instr->src[0].ssa->divergent;
166
if (stage == MESA_SHADER_FRAGMENT)
167
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
168
else if (stage == MESA_SHADER_TESS_EVAL)
169
is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
170
else
171
is_divergent = true;
172
break;
173
case nir_intrinsic_load_per_vertex_input:
174
is_divergent = instr->src[0].ssa->divergent ||
175
instr->src[1].ssa->divergent;
176
if (stage == MESA_SHADER_TESS_CTRL)
177
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
178
if (stage == MESA_SHADER_TESS_EVAL)
179
is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
180
else
181
is_divergent = true;
182
break;
183
case nir_intrinsic_load_input_vertex:
184
is_divergent = instr->src[1].ssa->divergent;
185
assert(stage == MESA_SHADER_FRAGMENT);
186
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
187
break;
188
case nir_intrinsic_load_output:
189
assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
190
is_divergent = instr->src[0].ssa->divergent;
191
if (stage == MESA_SHADER_TESS_CTRL)
192
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
193
else
194
is_divergent = true;
195
break;
196
case nir_intrinsic_load_per_vertex_output:
197
assert(stage == MESA_SHADER_TESS_CTRL);
198
is_divergent = instr->src[0].ssa->divergent ||
199
instr->src[1].ssa->divergent ||
200
!(options & nir_divergence_single_patch_per_tcs_subgroup);
201
break;
202
case nir_intrinsic_load_layer_id:
203
case nir_intrinsic_load_front_face:
204
assert(stage == MESA_SHADER_FRAGMENT);
205
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
206
break;
207
case nir_intrinsic_load_view_index:
208
assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
209
if (options & nir_divergence_view_index_uniform)
210
is_divergent = false;
211
else if (stage == MESA_SHADER_FRAGMENT)
212
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
213
break;
214
case nir_intrinsic_load_fs_input_interp_deltas:
215
assert(stage == MESA_SHADER_FRAGMENT);
216
is_divergent = instr->src[0].ssa->divergent;
217
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
218
break;
219
case nir_intrinsic_load_primitive_id:
220
if (stage == MESA_SHADER_FRAGMENT)
221
is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
222
else if (stage == MESA_SHADER_TESS_CTRL)
223
is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
224
else if (stage == MESA_SHADER_TESS_EVAL)
225
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
226
else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
227
is_divergent = true;
228
else
229
unreachable("Invalid stage for load_primitive_id");
230
break;
231
case nir_intrinsic_load_tess_level_inner:
232
case nir_intrinsic_load_tess_level_outer:
233
if (stage == MESA_SHADER_TESS_CTRL)
234
is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
235
else if (stage == MESA_SHADER_TESS_EVAL)
236
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
237
else
238
unreachable("Invalid stage for load_primitive_tess_level_*");
239
break;
240
case nir_intrinsic_load_patch_vertices_in:
241
if (stage == MESA_SHADER_TESS_EVAL)
242
is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
243
else
244
assert(stage == MESA_SHADER_TESS_CTRL);
245
break;
246
247
case nir_intrinsic_load_workgroup_id:
248
assert(stage == MESA_SHADER_COMPUTE);
249
is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
250
break;
251
252
/* Clustered reductions are uniform if cluster_size == subgroup_size or
253
* the source is uniform and the operation is invariant.
254
* Inclusive scans are uniform if
255
* the source is uniform and the operation is invariant
256
*/
257
case nir_intrinsic_reduce:
258
if (nir_intrinsic_cluster_size(instr) == 0)
259
return false;
260
FALLTHROUGH;
261
case nir_intrinsic_inclusive_scan: {
262
nir_op op = nir_intrinsic_reduction_op(instr);
263
is_divergent = instr->src[0].ssa->divergent;
264
if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
265
op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
266
op != nir_op_iand && op != nir_op_ior)
267
is_divergent = true;
268
break;
269
}
270
271
case nir_intrinsic_load_ubo:
272
case nir_intrinsic_load_ssbo:
273
is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
274
instr->src[1].ssa->divergent;
275
break;
276
277
case nir_intrinsic_get_ssbo_size:
278
case nir_intrinsic_deref_buffer_array_length:
279
is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
280
break;
281
282
case nir_intrinsic_image_load:
283
case nir_intrinsic_image_deref_load:
284
case nir_intrinsic_bindless_image_load:
285
case nir_intrinsic_image_sparse_load:
286
case nir_intrinsic_image_deref_sparse_load:
287
case nir_intrinsic_bindless_image_sparse_load:
288
is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
289
instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
290
break;
291
292
293
/* Intrinsics with divergence depending on sources */
294
case nir_intrinsic_ballot_bitfield_extract:
295
case nir_intrinsic_ballot_find_lsb:
296
case nir_intrinsic_ballot_find_msb:
297
case nir_intrinsic_ballot_bit_count_reduce:
298
case nir_intrinsic_shuffle_xor:
299
case nir_intrinsic_shuffle_up:
300
case nir_intrinsic_shuffle_down:
301
case nir_intrinsic_quad_broadcast:
302
case nir_intrinsic_quad_swap_horizontal:
303
case nir_intrinsic_quad_swap_vertical:
304
case nir_intrinsic_quad_swap_diagonal:
305
case nir_intrinsic_byte_permute_amd:
306
case nir_intrinsic_load_deref:
307
case nir_intrinsic_load_shared:
308
case nir_intrinsic_load_global:
309
case nir_intrinsic_load_global_constant:
310
case nir_intrinsic_load_uniform:
311
case nir_intrinsic_load_constant:
312
case nir_intrinsic_load_sample_pos_from_id:
313
case nir_intrinsic_load_kernel_input:
314
case nir_intrinsic_load_buffer_amd:
315
case nir_intrinsic_image_samples:
316
case nir_intrinsic_image_deref_samples:
317
case nir_intrinsic_bindless_image_samples:
318
case nir_intrinsic_image_size:
319
case nir_intrinsic_image_deref_size:
320
case nir_intrinsic_bindless_image_size:
321
case nir_intrinsic_copy_deref:
322
case nir_intrinsic_vulkan_resource_index:
323
case nir_intrinsic_vulkan_resource_reindex:
324
case nir_intrinsic_load_vulkan_descriptor:
325
case nir_intrinsic_atomic_counter_read:
326
case nir_intrinsic_atomic_counter_read_deref:
327
case nir_intrinsic_quad_swizzle_amd:
328
case nir_intrinsic_masked_swizzle_amd:
329
case nir_intrinsic_is_sparse_texels_resident:
330
case nir_intrinsic_sparse_residency_code_and:
331
case nir_intrinsic_load_sbt_amd:
332
case nir_intrinsic_bvh64_intersect_ray_amd:
333
case nir_intrinsic_get_ubo_size:
334
case nir_intrinsic_load_ssbo_address: {
335
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
336
for (unsigned i = 0; i < num_srcs; i++) {
337
if (instr->src[i].ssa->divergent) {
338
is_divergent = true;
339
break;
340
}
341
}
342
break;
343
}
344
345
case nir_intrinsic_shuffle:
346
is_divergent = instr->src[0].ssa->divergent &&
347
instr->src[1].ssa->divergent;
348
break;
349
350
/* Intrinsics which are always divergent */
351
case nir_intrinsic_load_color0:
352
case nir_intrinsic_load_color1:
353
case nir_intrinsic_load_param:
354
case nir_intrinsic_load_sample_id:
355
case nir_intrinsic_load_sample_id_no_per_sample:
356
case nir_intrinsic_load_sample_mask_in:
357
case nir_intrinsic_load_interpolated_input:
358
case nir_intrinsic_load_barycentric_pixel:
359
case nir_intrinsic_load_barycentric_centroid:
360
case nir_intrinsic_load_barycentric_sample:
361
case nir_intrinsic_load_barycentric_model:
362
case nir_intrinsic_load_barycentric_at_sample:
363
case nir_intrinsic_load_barycentric_at_offset:
364
case nir_intrinsic_interp_deref_at_offset:
365
case nir_intrinsic_interp_deref_at_sample:
366
case nir_intrinsic_interp_deref_at_centroid:
367
case nir_intrinsic_interp_deref_at_vertex:
368
case nir_intrinsic_load_tess_coord:
369
case nir_intrinsic_load_point_coord:
370
case nir_intrinsic_load_line_coord:
371
case nir_intrinsic_load_frag_coord:
372
case nir_intrinsic_load_sample_pos:
373
case nir_intrinsic_load_vertex_id_zero_base:
374
case nir_intrinsic_load_vertex_id:
375
case nir_intrinsic_load_instance_id:
376
case nir_intrinsic_load_invocation_id:
377
case nir_intrinsic_load_local_invocation_id:
378
case nir_intrinsic_load_local_invocation_index:
379
case nir_intrinsic_load_global_invocation_id:
380
case nir_intrinsic_load_global_invocation_id_zero_base:
381
case nir_intrinsic_load_global_invocation_index:
382
case nir_intrinsic_load_subgroup_invocation:
383
case nir_intrinsic_load_helper_invocation:
384
case nir_intrinsic_is_helper_invocation:
385
case nir_intrinsic_load_scratch:
386
case nir_intrinsic_deref_atomic_add:
387
case nir_intrinsic_deref_atomic_imin:
388
case nir_intrinsic_deref_atomic_umin:
389
case nir_intrinsic_deref_atomic_imax:
390
case nir_intrinsic_deref_atomic_umax:
391
case nir_intrinsic_deref_atomic_and:
392
case nir_intrinsic_deref_atomic_or:
393
case nir_intrinsic_deref_atomic_xor:
394
case nir_intrinsic_deref_atomic_exchange:
395
case nir_intrinsic_deref_atomic_comp_swap:
396
case nir_intrinsic_deref_atomic_fadd:
397
case nir_intrinsic_deref_atomic_fmin:
398
case nir_intrinsic_deref_atomic_fmax:
399
case nir_intrinsic_deref_atomic_fcomp_swap:
400
case nir_intrinsic_ssbo_atomic_add:
401
case nir_intrinsic_ssbo_atomic_imin:
402
case nir_intrinsic_ssbo_atomic_umin:
403
case nir_intrinsic_ssbo_atomic_imax:
404
case nir_intrinsic_ssbo_atomic_umax:
405
case nir_intrinsic_ssbo_atomic_and:
406
case nir_intrinsic_ssbo_atomic_or:
407
case nir_intrinsic_ssbo_atomic_xor:
408
case nir_intrinsic_ssbo_atomic_exchange:
409
case nir_intrinsic_ssbo_atomic_comp_swap:
410
case nir_intrinsic_ssbo_atomic_fadd:
411
case nir_intrinsic_ssbo_atomic_fmax:
412
case nir_intrinsic_ssbo_atomic_fmin:
413
case nir_intrinsic_ssbo_atomic_fcomp_swap:
414
case nir_intrinsic_image_deref_atomic_add:
415
case nir_intrinsic_image_deref_atomic_imin:
416
case nir_intrinsic_image_deref_atomic_umin:
417
case nir_intrinsic_image_deref_atomic_imax:
418
case nir_intrinsic_image_deref_atomic_umax:
419
case nir_intrinsic_image_deref_atomic_and:
420
case nir_intrinsic_image_deref_atomic_or:
421
case nir_intrinsic_image_deref_atomic_xor:
422
case nir_intrinsic_image_deref_atomic_exchange:
423
case nir_intrinsic_image_deref_atomic_comp_swap:
424
case nir_intrinsic_image_deref_atomic_fadd:
425
case nir_intrinsic_image_deref_atomic_fmin:
426
case nir_intrinsic_image_deref_atomic_fmax:
427
case nir_intrinsic_image_atomic_add:
428
case nir_intrinsic_image_atomic_imin:
429
case nir_intrinsic_image_atomic_umin:
430
case nir_intrinsic_image_atomic_imax:
431
case nir_intrinsic_image_atomic_umax:
432
case nir_intrinsic_image_atomic_and:
433
case nir_intrinsic_image_atomic_or:
434
case nir_intrinsic_image_atomic_xor:
435
case nir_intrinsic_image_atomic_exchange:
436
case nir_intrinsic_image_atomic_comp_swap:
437
case nir_intrinsic_image_atomic_fadd:
438
case nir_intrinsic_image_atomic_fmin:
439
case nir_intrinsic_image_atomic_fmax:
440
case nir_intrinsic_bindless_image_atomic_add:
441
case nir_intrinsic_bindless_image_atomic_imin:
442
case nir_intrinsic_bindless_image_atomic_umin:
443
case nir_intrinsic_bindless_image_atomic_imax:
444
case nir_intrinsic_bindless_image_atomic_umax:
445
case nir_intrinsic_bindless_image_atomic_and:
446
case nir_intrinsic_bindless_image_atomic_or:
447
case nir_intrinsic_bindless_image_atomic_xor:
448
case nir_intrinsic_bindless_image_atomic_exchange:
449
case nir_intrinsic_bindless_image_atomic_comp_swap:
450
case nir_intrinsic_bindless_image_atomic_fadd:
451
case nir_intrinsic_bindless_image_atomic_fmin:
452
case nir_intrinsic_bindless_image_atomic_fmax:
453
case nir_intrinsic_shared_atomic_add:
454
case nir_intrinsic_shared_atomic_imin:
455
case nir_intrinsic_shared_atomic_umin:
456
case nir_intrinsic_shared_atomic_imax:
457
case nir_intrinsic_shared_atomic_umax:
458
case nir_intrinsic_shared_atomic_and:
459
case nir_intrinsic_shared_atomic_or:
460
case nir_intrinsic_shared_atomic_xor:
461
case nir_intrinsic_shared_atomic_exchange:
462
case nir_intrinsic_shared_atomic_comp_swap:
463
case nir_intrinsic_shared_atomic_fadd:
464
case nir_intrinsic_shared_atomic_fmin:
465
case nir_intrinsic_shared_atomic_fmax:
466
case nir_intrinsic_shared_atomic_fcomp_swap:
467
case nir_intrinsic_global_atomic_add:
468
case nir_intrinsic_global_atomic_imin:
469
case nir_intrinsic_global_atomic_umin:
470
case nir_intrinsic_global_atomic_imax:
471
case nir_intrinsic_global_atomic_umax:
472
case nir_intrinsic_global_atomic_and:
473
case nir_intrinsic_global_atomic_or:
474
case nir_intrinsic_global_atomic_xor:
475
case nir_intrinsic_global_atomic_exchange:
476
case nir_intrinsic_global_atomic_comp_swap:
477
case nir_intrinsic_global_atomic_fadd:
478
case nir_intrinsic_global_atomic_fmin:
479
case nir_intrinsic_global_atomic_fmax:
480
case nir_intrinsic_global_atomic_fcomp_swap:
481
case nir_intrinsic_atomic_counter_add:
482
case nir_intrinsic_atomic_counter_min:
483
case nir_intrinsic_atomic_counter_max:
484
case nir_intrinsic_atomic_counter_and:
485
case nir_intrinsic_atomic_counter_or:
486
case nir_intrinsic_atomic_counter_xor:
487
case nir_intrinsic_atomic_counter_inc:
488
case nir_intrinsic_atomic_counter_pre_dec:
489
case nir_intrinsic_atomic_counter_post_dec:
490
case nir_intrinsic_atomic_counter_exchange:
491
case nir_intrinsic_atomic_counter_comp_swap:
492
case nir_intrinsic_atomic_counter_add_deref:
493
case nir_intrinsic_atomic_counter_min_deref:
494
case nir_intrinsic_atomic_counter_max_deref:
495
case nir_intrinsic_atomic_counter_and_deref:
496
case nir_intrinsic_atomic_counter_or_deref:
497
case nir_intrinsic_atomic_counter_xor_deref:
498
case nir_intrinsic_atomic_counter_inc_deref:
499
case nir_intrinsic_atomic_counter_pre_dec_deref:
500
case nir_intrinsic_atomic_counter_post_dec_deref:
501
case nir_intrinsic_atomic_counter_exchange_deref:
502
case nir_intrinsic_atomic_counter_comp_swap_deref:
503
case nir_intrinsic_exclusive_scan:
504
case nir_intrinsic_ballot_bit_count_exclusive:
505
case nir_intrinsic_ballot_bit_count_inclusive:
506
case nir_intrinsic_write_invocation_amd:
507
case nir_intrinsic_mbcnt_amd:
508
case nir_intrinsic_lane_permute_16_amd:
509
case nir_intrinsic_elect:
510
case nir_intrinsic_load_tlb_color_v3d:
511
case nir_intrinsic_load_tess_rel_patch_id_amd:
512
case nir_intrinsic_load_gs_vertex_offset_amd:
513
case nir_intrinsic_has_input_vertex_amd:
514
case nir_intrinsic_has_input_primitive_amd:
515
case nir_intrinsic_load_packed_passthrough_primitive_amd:
516
case nir_intrinsic_load_initial_edgeflag_amd:
517
case nir_intrinsic_gds_atomic_add_amd:
518
is_divergent = true;
519
break;
520
521
default:
522
#ifdef NDEBUG
523
is_divergent = true;
524
break;
525
#else
526
nir_print_instr(&instr->instr, stderr);
527
unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
528
#endif
529
}
530
531
instr->dest.ssa.divergent = is_divergent;
532
return is_divergent;
533
}
534
535
static bool
536
visit_tex(nir_tex_instr *instr)
537
{
538
if (instr->dest.ssa.divergent)
539
return false;
540
541
bool is_divergent = false;
542
543
for (unsigned i = 0; i < instr->num_srcs; i++) {
544
switch (instr->src[i].src_type) {
545
case nir_tex_src_sampler_deref:
546
case nir_tex_src_sampler_handle:
547
case nir_tex_src_sampler_offset:
548
is_divergent |= instr->src[i].src.ssa->divergent &&
549
instr->sampler_non_uniform;
550
break;
551
case nir_tex_src_texture_deref:
552
case nir_tex_src_texture_handle:
553
case nir_tex_src_texture_offset:
554
is_divergent |= instr->src[i].src.ssa->divergent &&
555
instr->texture_non_uniform;
556
break;
557
default:
558
is_divergent |= instr->src[i].src.ssa->divergent;
559
break;
560
}
561
}
562
563
instr->dest.ssa.divergent = is_divergent;
564
return is_divergent;
565
}
566
567
static bool
568
visit_load_const(nir_load_const_instr *instr)
569
{
570
return false;
571
}
572
573
static bool
574
visit_ssa_undef(nir_ssa_undef_instr *instr)
575
{
576
return false;
577
}
578
579
static bool
580
nir_variable_mode_is_uniform(nir_variable_mode mode) {
581
switch (mode) {
582
case nir_var_uniform:
583
case nir_var_mem_ubo:
584
case nir_var_mem_ssbo:
585
case nir_var_mem_shared:
586
case nir_var_mem_global:
587
return true;
588
default:
589
return false;
590
}
591
}
592
593
static bool
594
nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
595
{
596
if (nir_variable_mode_is_uniform(var->data.mode))
597
return true;
598
599
nir_divergence_options options = shader->options->divergence_analysis_options;
600
gl_shader_stage stage = shader->info.stage;
601
602
if (stage == MESA_SHADER_FRAGMENT &&
603
(options & nir_divergence_single_prim_per_subgroup) &&
604
var->data.mode == nir_var_shader_in &&
605
var->data.interpolation == INTERP_MODE_FLAT)
606
return true;
607
608
if (stage == MESA_SHADER_TESS_CTRL &&
609
(options & nir_divergence_single_patch_per_tcs_subgroup) &&
610
var->data.mode == nir_var_shader_out && var->data.patch)
611
return true;
612
613
if (stage == MESA_SHADER_TESS_EVAL &&
614
(options & nir_divergence_single_patch_per_tes_subgroup) &&
615
var->data.mode == nir_var_shader_in && var->data.patch)
616
return true;
617
618
return false;
619
}
620
621
static bool
622
visit_deref(nir_shader *shader, nir_deref_instr *deref)
623
{
624
if (deref->dest.ssa.divergent)
625
return false;
626
627
bool is_divergent = false;
628
switch (deref->deref_type) {
629
case nir_deref_type_var:
630
is_divergent = !nir_variable_is_uniform(shader, deref->var);
631
break;
632
case nir_deref_type_array:
633
case nir_deref_type_ptr_as_array:
634
is_divergent = deref->arr.index.ssa->divergent;
635
FALLTHROUGH;
636
case nir_deref_type_struct:
637
case nir_deref_type_array_wildcard:
638
is_divergent |= deref->parent.ssa->divergent;
639
break;
640
case nir_deref_type_cast:
641
is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
642
deref->parent.ssa->divergent;
643
break;
644
}
645
646
deref->dest.ssa.divergent = is_divergent;
647
return is_divergent;
648
}
649
650
static bool
651
visit_jump(nir_jump_instr *jump, struct divergence_state *state)
652
{
653
switch (jump->type) {
654
case nir_jump_continue:
655
if (state->divergent_loop_continue)
656
return false;
657
if (state->divergent_loop_cf)
658
state->divergent_loop_continue = true;
659
return state->divergent_loop_continue;
660
case nir_jump_break:
661
if (state->divergent_loop_break)
662
return false;
663
if (state->divergent_loop_cf)
664
state->divergent_loop_break = true;
665
return state->divergent_loop_break;
666
case nir_jump_halt:
667
/* This totally kills invocations so it doesn't add divergence */
668
break;
669
case nir_jump_return:
670
unreachable("NIR divergence analysis: Unsupported return instruction.");
671
break;
672
case nir_jump_goto:
673
case nir_jump_goto_if:
674
unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
675
break;
676
}
677
return false;
678
}
679
680
static bool
681
set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
682
{
683
def->divergent = false;
684
return true;
685
}
686
687
static bool
688
update_instr_divergence(nir_shader *shader, nir_instr *instr)
689
{
690
switch (instr->type) {
691
case nir_instr_type_alu:
692
return visit_alu(nir_instr_as_alu(instr));
693
case nir_instr_type_intrinsic:
694
return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
695
case nir_instr_type_tex:
696
return visit_tex(nir_instr_as_tex(instr));
697
case nir_instr_type_load_const:
698
return visit_load_const(nir_instr_as_load_const(instr));
699
case nir_instr_type_ssa_undef:
700
return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
701
case nir_instr_type_deref:
702
return visit_deref(shader, nir_instr_as_deref(instr));
703
case nir_instr_type_jump:
704
case nir_instr_type_phi:
705
case nir_instr_type_call:
706
case nir_instr_type_parallel_copy:
707
default:
708
unreachable("NIR divergence analysis: Unsupported instruction type.");
709
}
710
}
711
712
static bool
713
visit_block(nir_block *block, struct divergence_state *state)
714
{
715
bool has_changed = false;
716
717
nir_foreach_instr(instr, block) {
718
/* phis are handled when processing the branches */
719
if (instr->type == nir_instr_type_phi)
720
continue;
721
722
if (state->first_visit)
723
nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
724
725
if (instr->type == nir_instr_type_jump)
726
has_changed |= visit_jump(nir_instr_as_jump(instr), state);
727
else
728
has_changed |= update_instr_divergence(state->shader, instr);
729
}
730
731
return has_changed;
732
}
733
734
/* There are 3 types of phi instructions:
735
* (1) gamma: represent the joining point of different paths
736
* created by an “if-then-else” branch.
737
* The resulting value is divergent if the branch condition
738
* or any of the source values is divergent. */
739
static bool
740
visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
741
{
742
if (phi->dest.ssa.divergent)
743
return false;
744
745
unsigned defined_srcs = 0;
746
nir_foreach_phi_src(src, phi) {
747
/* if any source value is divergent, the resulting value is divergent */
748
if (src->src.ssa->divergent) {
749
phi->dest.ssa.divergent = true;
750
return true;
751
}
752
if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
753
defined_srcs++;
754
}
755
}
756
757
/* if the condition is divergent and two sources defined, the definition is divergent */
758
if (defined_srcs > 1 && if_cond_divergent) {
759
phi->dest.ssa.divergent = true;
760
return true;
761
}
762
763
return false;
764
}
765
766
/* There are 3 types of phi instructions:
767
* (2) mu: which only exist at loop headers,
768
* merge initial and loop-carried values.
769
* The resulting value is divergent if any source value
770
* is divergent or a divergent loop continue condition
771
* is associated with a different ssa-def. */
772
static bool
773
visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
774
{
775
if (phi->dest.ssa.divergent)
776
return false;
777
778
nir_ssa_def* same = NULL;
779
nir_foreach_phi_src(src, phi) {
780
/* if any source value is divergent, the resulting value is divergent */
781
if (src->src.ssa->divergent) {
782
phi->dest.ssa.divergent = true;
783
return true;
784
}
785
/* if this loop is uniform, we're done here */
786
if (!divergent_continue)
787
continue;
788
/* skip the loop preheader */
789
if (src->pred == preheader)
790
continue;
791
/* skip undef values */
792
if (nir_src_is_undef(src->src))
793
continue;
794
795
/* check if all loop-carried values are from the same ssa-def */
796
if (!same)
797
same = src->src.ssa;
798
else if (same != src->src.ssa) {
799
phi->dest.ssa.divergent = true;
800
return true;
801
}
802
}
803
804
return false;
805
}
806
807
/* There are 3 types of phi instructions:
808
* (3) eta: represent values that leave a loop.
809
* The resulting value is divergent if the source value is divergent
810
* or any loop exit condition is divergent for a value which is
811
* not loop-invariant.
812
* (note: there should be no phi for loop-invariant variables.) */
813
static bool
814
visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
815
{
816
if (phi->dest.ssa.divergent)
817
return false;
818
819
if (divergent_break) {
820
phi->dest.ssa.divergent = true;
821
return true;
822
}
823
824
/* if any source value is divergent, the resulting value is divergent */
825
nir_foreach_phi_src(src, phi) {
826
if (src->src.ssa->divergent) {
827
phi->dest.ssa.divergent = true;
828
return true;
829
}
830
}
831
832
return false;
833
}
834
835
static bool
836
visit_if(nir_if *if_stmt, struct divergence_state *state)
837
{
838
bool progress = false;
839
840
struct divergence_state then_state = *state;
841
then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
842
progress |= visit_cf_list(&if_stmt->then_list, &then_state);
843
844
struct divergence_state else_state = *state;
845
else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
846
progress |= visit_cf_list(&if_stmt->else_list, &else_state);
847
848
/* handle phis after the IF */
849
nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
850
if (instr->type != nir_instr_type_phi)
851
break;
852
853
if (state->first_visit)
854
nir_instr_as_phi(instr)->dest.ssa.divergent = false;
855
progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
856
if_stmt->condition.ssa->divergent);
857
}
858
859
/* join loop divergence information from both branch legs */
860
state->divergent_loop_continue |= then_state.divergent_loop_continue ||
861
else_state.divergent_loop_continue;
862
state->divergent_loop_break |= then_state.divergent_loop_break ||
863
else_state.divergent_loop_break;
864
865
/* A divergent continue makes succeeding loop CF divergent:
866
* not all loop-active invocations participate in the remaining loop-body
867
* which means that a following break might be taken by some invocations, only */
868
state->divergent_loop_cf |= state->divergent_loop_continue;
869
870
return progress;
871
}
872
873
static bool
874
visit_loop(nir_loop *loop, struct divergence_state *state)
875
{
876
bool progress = false;
877
nir_block *loop_header = nir_loop_first_block(loop);
878
nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
879
880
/* handle loop header phis first: we have no knowledge yet about
881
* the loop's control flow or any loop-carried sources. */
882
nir_foreach_instr(instr, loop_header) {
883
if (instr->type != nir_instr_type_phi)
884
break;
885
886
nir_phi_instr *phi = nir_instr_as_phi(instr);
887
if (!state->first_visit && phi->dest.ssa.divergent)
888
continue;
889
890
nir_foreach_phi_src(src, phi) {
891
if (src->pred == loop_preheader) {
892
phi->dest.ssa.divergent = src->src.ssa->divergent;
893
break;
894
}
895
}
896
progress |= phi->dest.ssa.divergent;
897
}
898
899
/* setup loop state */
900
struct divergence_state loop_state = *state;
901
loop_state.divergent_loop_cf = false;
902
loop_state.divergent_loop_continue = false;
903
loop_state.divergent_loop_break = false;
904
905
/* process loop body until no further changes are made */
906
bool repeat;
907
do {
908
progress |= visit_cf_list(&loop->body, &loop_state);
909
repeat = false;
910
911
/* revisit loop header phis to see if something has changed */
912
nir_foreach_instr(instr, loop_header) {
913
if (instr->type != nir_instr_type_phi)
914
break;
915
916
repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
917
loop_preheader,
918
loop_state.divergent_loop_continue);
919
}
920
921
loop_state.divergent_loop_cf = false;
922
loop_state.first_visit = false;
923
} while (repeat);
924
925
/* handle phis after the loop */
926
nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
927
if (instr->type != nir_instr_type_phi)
928
break;
929
930
if (state->first_visit)
931
nir_instr_as_phi(instr)->dest.ssa.divergent = false;
932
progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
933
loop_state.divergent_loop_break);
934
}
935
936
loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
937
938
return progress;
939
}
940
941
static bool
942
visit_cf_list(struct exec_list *list, struct divergence_state *state)
943
{
944
bool has_changed = false;
945
946
foreach_list_typed(nir_cf_node, node, node, list) {
947
switch (node->type) {
948
case nir_cf_node_block:
949
has_changed |= visit_block(nir_cf_node_as_block(node), state);
950
break;
951
case nir_cf_node_if:
952
has_changed |= visit_if(nir_cf_node_as_if(node), state);
953
break;
954
case nir_cf_node_loop:
955
has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
956
break;
957
case nir_cf_node_function:
958
unreachable("NIR divergence analysis: Unsupported cf_node type.");
959
}
960
}
961
962
return has_changed;
963
}
964
965
void
966
nir_divergence_analysis(nir_shader *shader)
967
{
968
struct divergence_state state = {
969
.stage = shader->info.stage,
970
.shader = shader,
971
.divergent_loop_cf = false,
972
.divergent_loop_continue = false,
973
.divergent_loop_break = false,
974
.first_visit = true,
975
};
976
977
visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
978
}
979
980
bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
981
{
982
nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
983
984
if (instr->type == nir_instr_type_phi) {
985
nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
986
/* can only update gamma/if phis */
987
if (!prev || prev->type != nir_cf_node_if)
988
return false;
989
990
nir_if *nif = nir_cf_node_as_if(prev);
991
992
visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
993
return true;
994
}
995
996
update_instr_divergence(shader, instr);
997
return true;
998
}
999
1000
1001