Path: blob/21.2-virgl/src/compiler/nir/nir_divergence_analysis.c
4546 views
/*1* Copyright © 2018 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/2324#include "nir.h"2526/* This pass computes for each ssa definition if it is uniform.27* That is, the variable has the same value for all invocations28* of the group.29*30* This divergence analysis pass expects the shader to be in LCSSA-form.31*32* This algorithm implements "The Simple Divergence Analysis" from33* Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.34* Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),35* ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>36*/3738struct divergence_state {39const gl_shader_stage stage;40nir_shader *shader;4142/** current control flow state */43/* True if some loop-active invocations might take a different control-flow path.44* A divergent break does not cause subsequent control-flow to be considered45* divergent because those invocations are no longer active in the loop.46* For a divergent if, both sides are considered divergent flow because47* the other side is still loop-active. */48bool divergent_loop_cf;49/* True if a divergent continue happened since the loop header */50bool divergent_loop_continue;51/* True if a divergent break happened since the loop header */52bool divergent_loop_break;5354/* True if we visit the block for the fist time */55bool first_visit;56};5758static bool59visit_cf_list(struct exec_list *list, struct divergence_state *state);6061static bool62visit_alu(nir_alu_instr *instr)63{64if (instr->dest.dest.ssa.divergent)65return false;6667unsigned num_src = nir_op_infos[instr->op].num_inputs;6869for (unsigned i = 0; i < num_src; i++) {70if (instr->src[i].src.ssa->divergent) {71instr->dest.dest.ssa.divergent = true;72return true;73}74}7576return false;77}7879static bool80visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)81{82if (!nir_intrinsic_infos[instr->intrinsic].has_dest)83return false;8485if (instr->dest.ssa.divergent)86return false;8788nir_divergence_options options = shader->options->divergence_analysis_options;89gl_shader_stage stage = shader->info.stage;90bool is_divergent = false;91switch (instr->intrinsic) {92/* Intrinsics which are always uniform */93case nir_intrinsic_shader_clock:94case nir_intrinsic_ballot:95case nir_intrinsic_read_invocation:96case nir_intrinsic_read_first_invocation:97case nir_intrinsic_vote_any:98case nir_intrinsic_vote_all:99case nir_intrinsic_vote_feq:100case nir_intrinsic_vote_ieq:101case nir_intrinsic_load_push_constant:102case nir_intrinsic_load_work_dim:103case nir_intrinsic_load_num_workgroups:104case nir_intrinsic_load_workgroup_size:105case nir_intrinsic_load_subgroup_id:106case nir_intrinsic_load_num_subgroups:107case nir_intrinsic_load_subgroup_size:108case nir_intrinsic_load_subgroup_eq_mask:109case nir_intrinsic_load_subgroup_ge_mask:110case nir_intrinsic_load_subgroup_gt_mask:111case nir_intrinsic_load_subgroup_le_mask:112case nir_intrinsic_load_subgroup_lt_mask:113case nir_intrinsic_first_invocation:114case nir_intrinsic_last_invocation:115case nir_intrinsic_load_base_instance:116case nir_intrinsic_load_base_vertex:117case nir_intrinsic_load_first_vertex:118case nir_intrinsic_load_draw_id:119case nir_intrinsic_load_is_indexed_draw:120case nir_intrinsic_load_viewport_scale:121case nir_intrinsic_load_user_clip_plane:122case nir_intrinsic_load_viewport_x_scale:123case nir_intrinsic_load_viewport_y_scale:124case nir_intrinsic_load_viewport_z_scale:125case nir_intrinsic_load_viewport_offset:126case nir_intrinsic_load_viewport_x_offset:127case nir_intrinsic_load_viewport_y_offset:128case nir_intrinsic_load_viewport_z_offset:129case nir_intrinsic_load_blend_const_color_a_float:130case nir_intrinsic_load_blend_const_color_b_float:131case nir_intrinsic_load_blend_const_color_g_float:132case nir_intrinsic_load_blend_const_color_r_float:133case nir_intrinsic_load_blend_const_color_rgba:134case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:135case nir_intrinsic_load_blend_const_color_rgba8888_unorm:136case nir_intrinsic_load_line_width:137case nir_intrinsic_load_aa_line_width:138case nir_intrinsic_load_fb_layers_v3d:139case nir_intrinsic_load_tcs_num_patches_amd:140case nir_intrinsic_load_ring_tess_factors_amd:141case nir_intrinsic_load_ring_tess_offchip_amd:142case nir_intrinsic_load_ring_tess_factors_offset_amd:143case nir_intrinsic_load_ring_tess_offchip_offset_amd:144case nir_intrinsic_load_ring_esgs_amd:145case nir_intrinsic_load_ring_es2gs_offset_amd:146case nir_intrinsic_load_sample_positions_pan:147case nir_intrinsic_load_workgroup_num_input_vertices_amd:148case nir_intrinsic_load_workgroup_num_input_primitives_amd:149case nir_intrinsic_load_shader_query_enabled_amd:150case nir_intrinsic_load_cull_front_face_enabled_amd:151case nir_intrinsic_load_cull_back_face_enabled_amd:152case nir_intrinsic_load_cull_ccw_amd:153case nir_intrinsic_load_cull_small_primitives_enabled_amd:154case nir_intrinsic_load_cull_any_enabled_amd:155case nir_intrinsic_load_cull_small_prim_precision_amd:156is_divergent = false;157break;158159/* Intrinsics with divergence depending on shader stage and hardware */160case nir_intrinsic_load_frag_shading_rate:161is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);162break;163case nir_intrinsic_load_input:164is_divergent = instr->src[0].ssa->divergent;165if (stage == MESA_SHADER_FRAGMENT)166is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);167else if (stage == MESA_SHADER_TESS_EVAL)168is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);169else170is_divergent = true;171break;172case nir_intrinsic_load_per_vertex_input:173is_divergent = instr->src[0].ssa->divergent ||174instr->src[1].ssa->divergent;175if (stage == MESA_SHADER_TESS_CTRL)176is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);177if (stage == MESA_SHADER_TESS_EVAL)178is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);179else180is_divergent = true;181break;182case nir_intrinsic_load_input_vertex:183is_divergent = instr->src[1].ssa->divergent;184assert(stage == MESA_SHADER_FRAGMENT);185is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);186break;187case nir_intrinsic_load_output:188assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);189is_divergent = instr->src[0].ssa->divergent;190if (stage == MESA_SHADER_TESS_CTRL)191is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);192else193is_divergent = true;194break;195case nir_intrinsic_load_per_vertex_output:196assert(stage == MESA_SHADER_TESS_CTRL);197is_divergent = instr->src[0].ssa->divergent ||198instr->src[1].ssa->divergent ||199!(options & nir_divergence_single_patch_per_tcs_subgroup);200break;201case nir_intrinsic_load_layer_id:202case nir_intrinsic_load_front_face:203assert(stage == MESA_SHADER_FRAGMENT);204is_divergent = !(options & nir_divergence_single_prim_per_subgroup);205break;206case nir_intrinsic_load_view_index:207assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);208if (options & nir_divergence_view_index_uniform)209is_divergent = false;210else if (stage == MESA_SHADER_FRAGMENT)211is_divergent = !(options & nir_divergence_single_prim_per_subgroup);212break;213case nir_intrinsic_load_fs_input_interp_deltas:214assert(stage == MESA_SHADER_FRAGMENT);215is_divergent = instr->src[0].ssa->divergent;216is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);217break;218case nir_intrinsic_load_primitive_id:219if (stage == MESA_SHADER_FRAGMENT)220is_divergent = !(options & nir_divergence_single_prim_per_subgroup);221else if (stage == MESA_SHADER_TESS_CTRL)222is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);223else if (stage == MESA_SHADER_TESS_EVAL)224is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);225else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)226is_divergent = true;227else228unreachable("Invalid stage for load_primitive_id");229break;230case nir_intrinsic_load_tess_level_inner:231case nir_intrinsic_load_tess_level_outer:232if (stage == MESA_SHADER_TESS_CTRL)233is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);234else if (stage == MESA_SHADER_TESS_EVAL)235is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);236else237unreachable("Invalid stage for load_primitive_tess_level_*");238break;239case nir_intrinsic_load_patch_vertices_in:240if (stage == MESA_SHADER_TESS_EVAL)241is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);242else243assert(stage == MESA_SHADER_TESS_CTRL);244break;245246case nir_intrinsic_load_workgroup_id:247assert(stage == MESA_SHADER_COMPUTE);248is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);249break;250251/* Clustered reductions are uniform if cluster_size == subgroup_size or252* the source is uniform and the operation is invariant.253* Inclusive scans are uniform if254* the source is uniform and the operation is invariant255*/256case nir_intrinsic_reduce:257if (nir_intrinsic_cluster_size(instr) == 0)258return false;259FALLTHROUGH;260case nir_intrinsic_inclusive_scan: {261nir_op op = nir_intrinsic_reduction_op(instr);262is_divergent = instr->src[0].ssa->divergent;263if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&264op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&265op != nir_op_iand && op != nir_op_ior)266is_divergent = true;267break;268}269270case nir_intrinsic_load_ubo:271case nir_intrinsic_load_ssbo:272is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||273instr->src[1].ssa->divergent;274break;275276case nir_intrinsic_get_ssbo_size:277case nir_intrinsic_deref_buffer_array_length:278is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);279break;280281case nir_intrinsic_image_load:282case nir_intrinsic_image_deref_load:283case nir_intrinsic_bindless_image_load:284case nir_intrinsic_image_sparse_load:285case nir_intrinsic_image_deref_sparse_load:286case nir_intrinsic_bindless_image_sparse_load:287is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||288instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;289break;290291292/* Intrinsics with divergence depending on sources */293case nir_intrinsic_ballot_bitfield_extract:294case nir_intrinsic_ballot_find_lsb:295case nir_intrinsic_ballot_find_msb:296case nir_intrinsic_ballot_bit_count_reduce:297case nir_intrinsic_shuffle_xor:298case nir_intrinsic_shuffle_up:299case nir_intrinsic_shuffle_down:300case nir_intrinsic_quad_broadcast:301case nir_intrinsic_quad_swap_horizontal:302case nir_intrinsic_quad_swap_vertical:303case nir_intrinsic_quad_swap_diagonal:304case nir_intrinsic_byte_permute_amd:305case nir_intrinsic_load_deref:306case nir_intrinsic_load_shared:307case nir_intrinsic_load_global:308case nir_intrinsic_load_global_constant:309case nir_intrinsic_load_uniform:310case nir_intrinsic_load_constant:311case nir_intrinsic_load_sample_pos_from_id:312case nir_intrinsic_load_kernel_input:313case nir_intrinsic_load_buffer_amd:314case nir_intrinsic_image_samples:315case nir_intrinsic_image_deref_samples:316case nir_intrinsic_bindless_image_samples:317case nir_intrinsic_image_size:318case nir_intrinsic_image_deref_size:319case nir_intrinsic_bindless_image_size:320case nir_intrinsic_copy_deref:321case nir_intrinsic_vulkan_resource_index:322case nir_intrinsic_vulkan_resource_reindex:323case nir_intrinsic_load_vulkan_descriptor:324case nir_intrinsic_atomic_counter_read:325case nir_intrinsic_atomic_counter_read_deref:326case nir_intrinsic_quad_swizzle_amd:327case nir_intrinsic_masked_swizzle_amd:328case nir_intrinsic_is_sparse_texels_resident:329case nir_intrinsic_sparse_residency_code_and:330case nir_intrinsic_load_sbt_amd:331case nir_intrinsic_bvh64_intersect_ray_amd:332case nir_intrinsic_get_ubo_size:333case nir_intrinsic_load_ssbo_address: {334unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;335for (unsigned i = 0; i < num_srcs; i++) {336if (instr->src[i].ssa->divergent) {337is_divergent = true;338break;339}340}341break;342}343344case nir_intrinsic_shuffle:345is_divergent = instr->src[0].ssa->divergent &&346instr->src[1].ssa->divergent;347break;348349/* Intrinsics which are always divergent */350case nir_intrinsic_load_color0:351case nir_intrinsic_load_color1:352case nir_intrinsic_load_param:353case nir_intrinsic_load_sample_id:354case nir_intrinsic_load_sample_id_no_per_sample:355case nir_intrinsic_load_sample_mask_in:356case nir_intrinsic_load_interpolated_input:357case nir_intrinsic_load_barycentric_pixel:358case nir_intrinsic_load_barycentric_centroid:359case nir_intrinsic_load_barycentric_sample:360case nir_intrinsic_load_barycentric_model:361case nir_intrinsic_load_barycentric_at_sample:362case nir_intrinsic_load_barycentric_at_offset:363case nir_intrinsic_interp_deref_at_offset:364case nir_intrinsic_interp_deref_at_sample:365case nir_intrinsic_interp_deref_at_centroid:366case nir_intrinsic_interp_deref_at_vertex:367case nir_intrinsic_load_tess_coord:368case nir_intrinsic_load_point_coord:369case nir_intrinsic_load_line_coord:370case nir_intrinsic_load_frag_coord:371case nir_intrinsic_load_sample_pos:372case nir_intrinsic_load_vertex_id_zero_base:373case nir_intrinsic_load_vertex_id:374case nir_intrinsic_load_instance_id:375case nir_intrinsic_load_invocation_id:376case nir_intrinsic_load_local_invocation_id:377case nir_intrinsic_load_local_invocation_index:378case nir_intrinsic_load_global_invocation_id:379case nir_intrinsic_load_global_invocation_id_zero_base:380case nir_intrinsic_load_global_invocation_index:381case nir_intrinsic_load_subgroup_invocation:382case nir_intrinsic_load_helper_invocation:383case nir_intrinsic_is_helper_invocation:384case nir_intrinsic_load_scratch:385case nir_intrinsic_deref_atomic_add:386case nir_intrinsic_deref_atomic_imin:387case nir_intrinsic_deref_atomic_umin:388case nir_intrinsic_deref_atomic_imax:389case nir_intrinsic_deref_atomic_umax:390case nir_intrinsic_deref_atomic_and:391case nir_intrinsic_deref_atomic_or:392case nir_intrinsic_deref_atomic_xor:393case nir_intrinsic_deref_atomic_exchange:394case nir_intrinsic_deref_atomic_comp_swap:395case nir_intrinsic_deref_atomic_fadd:396case nir_intrinsic_deref_atomic_fmin:397case nir_intrinsic_deref_atomic_fmax:398case nir_intrinsic_deref_atomic_fcomp_swap:399case nir_intrinsic_ssbo_atomic_add:400case nir_intrinsic_ssbo_atomic_imin:401case nir_intrinsic_ssbo_atomic_umin:402case nir_intrinsic_ssbo_atomic_imax:403case nir_intrinsic_ssbo_atomic_umax:404case nir_intrinsic_ssbo_atomic_and:405case nir_intrinsic_ssbo_atomic_or:406case nir_intrinsic_ssbo_atomic_xor:407case nir_intrinsic_ssbo_atomic_exchange:408case nir_intrinsic_ssbo_atomic_comp_swap:409case nir_intrinsic_ssbo_atomic_fadd:410case nir_intrinsic_ssbo_atomic_fmax:411case nir_intrinsic_ssbo_atomic_fmin:412case nir_intrinsic_ssbo_atomic_fcomp_swap:413case nir_intrinsic_image_deref_atomic_add:414case nir_intrinsic_image_deref_atomic_imin:415case nir_intrinsic_image_deref_atomic_umin:416case nir_intrinsic_image_deref_atomic_imax:417case nir_intrinsic_image_deref_atomic_umax:418case nir_intrinsic_image_deref_atomic_and:419case nir_intrinsic_image_deref_atomic_or:420case nir_intrinsic_image_deref_atomic_xor:421case nir_intrinsic_image_deref_atomic_exchange:422case nir_intrinsic_image_deref_atomic_comp_swap:423case nir_intrinsic_image_deref_atomic_fadd:424case nir_intrinsic_image_deref_atomic_fmin:425case nir_intrinsic_image_deref_atomic_fmax:426case nir_intrinsic_image_atomic_add:427case nir_intrinsic_image_atomic_imin:428case nir_intrinsic_image_atomic_umin:429case nir_intrinsic_image_atomic_imax:430case nir_intrinsic_image_atomic_umax:431case nir_intrinsic_image_atomic_and:432case nir_intrinsic_image_atomic_or:433case nir_intrinsic_image_atomic_xor:434case nir_intrinsic_image_atomic_exchange:435case nir_intrinsic_image_atomic_comp_swap:436case nir_intrinsic_image_atomic_fadd:437case nir_intrinsic_image_atomic_fmin:438case nir_intrinsic_image_atomic_fmax:439case nir_intrinsic_bindless_image_atomic_add:440case nir_intrinsic_bindless_image_atomic_imin:441case nir_intrinsic_bindless_image_atomic_umin:442case nir_intrinsic_bindless_image_atomic_imax:443case nir_intrinsic_bindless_image_atomic_umax:444case nir_intrinsic_bindless_image_atomic_and:445case nir_intrinsic_bindless_image_atomic_or:446case nir_intrinsic_bindless_image_atomic_xor:447case nir_intrinsic_bindless_image_atomic_exchange:448case nir_intrinsic_bindless_image_atomic_comp_swap:449case nir_intrinsic_bindless_image_atomic_fadd:450case nir_intrinsic_bindless_image_atomic_fmin:451case nir_intrinsic_bindless_image_atomic_fmax:452case nir_intrinsic_shared_atomic_add:453case nir_intrinsic_shared_atomic_imin:454case nir_intrinsic_shared_atomic_umin:455case nir_intrinsic_shared_atomic_imax:456case nir_intrinsic_shared_atomic_umax:457case nir_intrinsic_shared_atomic_and:458case nir_intrinsic_shared_atomic_or:459case nir_intrinsic_shared_atomic_xor:460case nir_intrinsic_shared_atomic_exchange:461case nir_intrinsic_shared_atomic_comp_swap:462case nir_intrinsic_shared_atomic_fadd:463case nir_intrinsic_shared_atomic_fmin:464case nir_intrinsic_shared_atomic_fmax:465case nir_intrinsic_shared_atomic_fcomp_swap:466case nir_intrinsic_global_atomic_add:467case nir_intrinsic_global_atomic_imin:468case nir_intrinsic_global_atomic_umin:469case nir_intrinsic_global_atomic_imax:470case nir_intrinsic_global_atomic_umax:471case nir_intrinsic_global_atomic_and:472case nir_intrinsic_global_atomic_or:473case nir_intrinsic_global_atomic_xor:474case nir_intrinsic_global_atomic_exchange:475case nir_intrinsic_global_atomic_comp_swap:476case nir_intrinsic_global_atomic_fadd:477case nir_intrinsic_global_atomic_fmin:478case nir_intrinsic_global_atomic_fmax:479case nir_intrinsic_global_atomic_fcomp_swap:480case nir_intrinsic_atomic_counter_add:481case nir_intrinsic_atomic_counter_min:482case nir_intrinsic_atomic_counter_max:483case nir_intrinsic_atomic_counter_and:484case nir_intrinsic_atomic_counter_or:485case nir_intrinsic_atomic_counter_xor:486case nir_intrinsic_atomic_counter_inc:487case nir_intrinsic_atomic_counter_pre_dec:488case nir_intrinsic_atomic_counter_post_dec:489case nir_intrinsic_atomic_counter_exchange:490case nir_intrinsic_atomic_counter_comp_swap:491case nir_intrinsic_atomic_counter_add_deref:492case nir_intrinsic_atomic_counter_min_deref:493case nir_intrinsic_atomic_counter_max_deref:494case nir_intrinsic_atomic_counter_and_deref:495case nir_intrinsic_atomic_counter_or_deref:496case nir_intrinsic_atomic_counter_xor_deref:497case nir_intrinsic_atomic_counter_inc_deref:498case nir_intrinsic_atomic_counter_pre_dec_deref:499case nir_intrinsic_atomic_counter_post_dec_deref:500case nir_intrinsic_atomic_counter_exchange_deref:501case nir_intrinsic_atomic_counter_comp_swap_deref:502case nir_intrinsic_exclusive_scan:503case nir_intrinsic_ballot_bit_count_exclusive:504case nir_intrinsic_ballot_bit_count_inclusive:505case nir_intrinsic_write_invocation_amd:506case nir_intrinsic_mbcnt_amd:507case nir_intrinsic_lane_permute_16_amd:508case nir_intrinsic_elect:509case nir_intrinsic_load_tlb_color_v3d:510case nir_intrinsic_load_tess_rel_patch_id_amd:511case nir_intrinsic_load_gs_vertex_offset_amd:512case nir_intrinsic_has_input_vertex_amd:513case nir_intrinsic_has_input_primitive_amd:514case nir_intrinsic_load_packed_passthrough_primitive_amd:515case nir_intrinsic_load_initial_edgeflag_amd:516case nir_intrinsic_gds_atomic_add_amd:517is_divergent = true;518break;519520default:521#ifdef NDEBUG522is_divergent = true;523break;524#else525nir_print_instr(&instr->instr, stderr);526unreachable("\nNIR divergence analysis: Unhandled intrinsic.");527#endif528}529530instr->dest.ssa.divergent = is_divergent;531return is_divergent;532}533534static bool535visit_tex(nir_tex_instr *instr)536{537if (instr->dest.ssa.divergent)538return false;539540bool is_divergent = false;541542for (unsigned i = 0; i < instr->num_srcs; i++) {543switch (instr->src[i].src_type) {544case nir_tex_src_sampler_deref:545case nir_tex_src_sampler_handle:546case nir_tex_src_sampler_offset:547is_divergent |= instr->src[i].src.ssa->divergent &&548instr->sampler_non_uniform;549break;550case nir_tex_src_texture_deref:551case nir_tex_src_texture_handle:552case nir_tex_src_texture_offset:553is_divergent |= instr->src[i].src.ssa->divergent &&554instr->texture_non_uniform;555break;556default:557is_divergent |= instr->src[i].src.ssa->divergent;558break;559}560}561562instr->dest.ssa.divergent = is_divergent;563return is_divergent;564}565566static bool567visit_load_const(nir_load_const_instr *instr)568{569return false;570}571572static bool573visit_ssa_undef(nir_ssa_undef_instr *instr)574{575return false;576}577578static bool579nir_variable_mode_is_uniform(nir_variable_mode mode) {580switch (mode) {581case nir_var_uniform:582case nir_var_mem_ubo:583case nir_var_mem_ssbo:584case nir_var_mem_shared:585case nir_var_mem_global:586return true;587default:588return false;589}590}591592static bool593nir_variable_is_uniform(nir_shader *shader, nir_variable *var)594{595if (nir_variable_mode_is_uniform(var->data.mode))596return true;597598nir_divergence_options options = shader->options->divergence_analysis_options;599gl_shader_stage stage = shader->info.stage;600601if (stage == MESA_SHADER_FRAGMENT &&602(options & nir_divergence_single_prim_per_subgroup) &&603var->data.mode == nir_var_shader_in &&604var->data.interpolation == INTERP_MODE_FLAT)605return true;606607if (stage == MESA_SHADER_TESS_CTRL &&608(options & nir_divergence_single_patch_per_tcs_subgroup) &&609var->data.mode == nir_var_shader_out && var->data.patch)610return true;611612if (stage == MESA_SHADER_TESS_EVAL &&613(options & nir_divergence_single_patch_per_tes_subgroup) &&614var->data.mode == nir_var_shader_in && var->data.patch)615return true;616617return false;618}619620static bool621visit_deref(nir_shader *shader, nir_deref_instr *deref)622{623if (deref->dest.ssa.divergent)624return false;625626bool is_divergent = false;627switch (deref->deref_type) {628case nir_deref_type_var:629is_divergent = !nir_variable_is_uniform(shader, deref->var);630break;631case nir_deref_type_array:632case nir_deref_type_ptr_as_array:633is_divergent = deref->arr.index.ssa->divergent;634FALLTHROUGH;635case nir_deref_type_struct:636case nir_deref_type_array_wildcard:637is_divergent |= deref->parent.ssa->divergent;638break;639case nir_deref_type_cast:640is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||641deref->parent.ssa->divergent;642break;643}644645deref->dest.ssa.divergent = is_divergent;646return is_divergent;647}648649static bool650visit_jump(nir_jump_instr *jump, struct divergence_state *state)651{652switch (jump->type) {653case nir_jump_continue:654if (state->divergent_loop_continue)655return false;656if (state->divergent_loop_cf)657state->divergent_loop_continue = true;658return state->divergent_loop_continue;659case nir_jump_break:660if (state->divergent_loop_break)661return false;662if (state->divergent_loop_cf)663state->divergent_loop_break = true;664return state->divergent_loop_break;665case nir_jump_halt:666/* This totally kills invocations so it doesn't add divergence */667break;668case nir_jump_return:669unreachable("NIR divergence analysis: Unsupported return instruction.");670break;671case nir_jump_goto:672case nir_jump_goto_if:673unreachable("NIR divergence analysis: Unsupported goto_if instruction.");674break;675}676return false;677}678679static bool680set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)681{682def->divergent = false;683return true;684}685686static bool687update_instr_divergence(nir_shader *shader, nir_instr *instr)688{689switch (instr->type) {690case nir_instr_type_alu:691return visit_alu(nir_instr_as_alu(instr));692case nir_instr_type_intrinsic:693return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));694case nir_instr_type_tex:695return visit_tex(nir_instr_as_tex(instr));696case nir_instr_type_load_const:697return visit_load_const(nir_instr_as_load_const(instr));698case nir_instr_type_ssa_undef:699return visit_ssa_undef(nir_instr_as_ssa_undef(instr));700case nir_instr_type_deref:701return visit_deref(shader, nir_instr_as_deref(instr));702case nir_instr_type_jump:703case nir_instr_type_phi:704case nir_instr_type_call:705case nir_instr_type_parallel_copy:706default:707unreachable("NIR divergence analysis: Unsupported instruction type.");708}709}710711static bool712visit_block(nir_block *block, struct divergence_state *state)713{714bool has_changed = false;715716nir_foreach_instr(instr, block) {717/* phis are handled when processing the branches */718if (instr->type == nir_instr_type_phi)719continue;720721if (state->first_visit)722nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);723724if (instr->type == nir_instr_type_jump)725has_changed |= visit_jump(nir_instr_as_jump(instr), state);726else727has_changed |= update_instr_divergence(state->shader, instr);728}729730return has_changed;731}732733/* There are 3 types of phi instructions:734* (1) gamma: represent the joining point of different paths735* created by an “if-then-else” branch.736* The resulting value is divergent if the branch condition737* or any of the source values is divergent. */738static bool739visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)740{741if (phi->dest.ssa.divergent)742return false;743744unsigned defined_srcs = 0;745nir_foreach_phi_src(src, phi) {746/* if any source value is divergent, the resulting value is divergent */747if (src->src.ssa->divergent) {748phi->dest.ssa.divergent = true;749return true;750}751if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {752defined_srcs++;753}754}755756/* if the condition is divergent and two sources defined, the definition is divergent */757if (defined_srcs > 1 && if_cond_divergent) {758phi->dest.ssa.divergent = true;759return true;760}761762return false;763}764765/* There are 3 types of phi instructions:766* (2) mu: which only exist at loop headers,767* merge initial and loop-carried values.768* The resulting value is divergent if any source value769* is divergent or a divergent loop continue condition770* is associated with a different ssa-def. */771static bool772visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)773{774if (phi->dest.ssa.divergent)775return false;776777nir_ssa_def* same = NULL;778nir_foreach_phi_src(src, phi) {779/* if any source value is divergent, the resulting value is divergent */780if (src->src.ssa->divergent) {781phi->dest.ssa.divergent = true;782return true;783}784/* if this loop is uniform, we're done here */785if (!divergent_continue)786continue;787/* skip the loop preheader */788if (src->pred == preheader)789continue;790/* skip undef values */791if (nir_src_is_undef(src->src))792continue;793794/* check if all loop-carried values are from the same ssa-def */795if (!same)796same = src->src.ssa;797else if (same != src->src.ssa) {798phi->dest.ssa.divergent = true;799return true;800}801}802803return false;804}805806/* There are 3 types of phi instructions:807* (3) eta: represent values that leave a loop.808* The resulting value is divergent if the source value is divergent809* or any loop exit condition is divergent for a value which is810* not loop-invariant.811* (note: there should be no phi for loop-invariant variables.) */812static bool813visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)814{815if (phi->dest.ssa.divergent)816return false;817818if (divergent_break) {819phi->dest.ssa.divergent = true;820return true;821}822823/* if any source value is divergent, the resulting value is divergent */824nir_foreach_phi_src(src, phi) {825if (src->src.ssa->divergent) {826phi->dest.ssa.divergent = true;827return true;828}829}830831return false;832}833834static bool835visit_if(nir_if *if_stmt, struct divergence_state *state)836{837bool progress = false;838839struct divergence_state then_state = *state;840then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;841progress |= visit_cf_list(&if_stmt->then_list, &then_state);842843struct divergence_state else_state = *state;844else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;845progress |= visit_cf_list(&if_stmt->else_list, &else_state);846847/* handle phis after the IF */848nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {849if (instr->type != nir_instr_type_phi)850break;851852if (state->first_visit)853nir_instr_as_phi(instr)->dest.ssa.divergent = false;854progress |= visit_if_merge_phi(nir_instr_as_phi(instr),855if_stmt->condition.ssa->divergent);856}857858/* join loop divergence information from both branch legs */859state->divergent_loop_continue |= then_state.divergent_loop_continue ||860else_state.divergent_loop_continue;861state->divergent_loop_break |= then_state.divergent_loop_break ||862else_state.divergent_loop_break;863864/* A divergent continue makes succeeding loop CF divergent:865* not all loop-active invocations participate in the remaining loop-body866* which means that a following break might be taken by some invocations, only */867state->divergent_loop_cf |= state->divergent_loop_continue;868869return progress;870}871872static bool873visit_loop(nir_loop *loop, struct divergence_state *state)874{875bool progress = false;876nir_block *loop_header = nir_loop_first_block(loop);877nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);878879/* handle loop header phis first: we have no knowledge yet about880* the loop's control flow or any loop-carried sources. */881nir_foreach_instr(instr, loop_header) {882if (instr->type != nir_instr_type_phi)883break;884885nir_phi_instr *phi = nir_instr_as_phi(instr);886if (!state->first_visit && phi->dest.ssa.divergent)887continue;888889nir_foreach_phi_src(src, phi) {890if (src->pred == loop_preheader) {891phi->dest.ssa.divergent = src->src.ssa->divergent;892break;893}894}895progress |= phi->dest.ssa.divergent;896}897898/* setup loop state */899struct divergence_state loop_state = *state;900loop_state.divergent_loop_cf = false;901loop_state.divergent_loop_continue = false;902loop_state.divergent_loop_break = false;903904/* process loop body until no further changes are made */905bool repeat;906do {907progress |= visit_cf_list(&loop->body, &loop_state);908repeat = false;909910/* revisit loop header phis to see if something has changed */911nir_foreach_instr(instr, loop_header) {912if (instr->type != nir_instr_type_phi)913break;914915repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),916loop_preheader,917loop_state.divergent_loop_continue);918}919920loop_state.divergent_loop_cf = false;921loop_state.first_visit = false;922} while (repeat);923924/* handle phis after the loop */925nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {926if (instr->type != nir_instr_type_phi)927break;928929if (state->first_visit)930nir_instr_as_phi(instr)->dest.ssa.divergent = false;931progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),932loop_state.divergent_loop_break);933}934935loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);936937return progress;938}939940static bool941visit_cf_list(struct exec_list *list, struct divergence_state *state)942{943bool has_changed = false;944945foreach_list_typed(nir_cf_node, node, node, list) {946switch (node->type) {947case nir_cf_node_block:948has_changed |= visit_block(nir_cf_node_as_block(node), state);949break;950case nir_cf_node_if:951has_changed |= visit_if(nir_cf_node_as_if(node), state);952break;953case nir_cf_node_loop:954has_changed |= visit_loop(nir_cf_node_as_loop(node), state);955break;956case nir_cf_node_function:957unreachable("NIR divergence analysis: Unsupported cf_node type.");958}959}960961return has_changed;962}963964void965nir_divergence_analysis(nir_shader *shader)966{967struct divergence_state state = {968.stage = shader->info.stage,969.shader = shader,970.divergent_loop_cf = false,971.divergent_loop_continue = false,972.divergent_loop_break = false,973.first_visit = true,974};975976visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);977}978979bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)980{981nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);982983if (instr->type == nir_instr_type_phi) {984nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);985/* can only update gamma/if phis */986if (!prev || prev->type != nir_cf_node_if)987return false;988989nir_if *nif = nir_cf_node_as_if(prev);990991visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));992return true;993}994995update_instr_divergence(shader, instr);996return true;997}99899910001001