Path: blob/21.2-virgl/src/compiler/nir/nir_gather_info.c
4545 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir.h"24#include "nir_deref.h"25#include "main/menums.h"2627static bool28src_is_invocation_id(const nir_src *src)29{30assert(src->is_ssa);31if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)32return false;3334return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==35nir_intrinsic_load_invocation_id;36}3738static void39get_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref,40bool *cross_invocation, bool *indirect)41{42*cross_invocation = false;43*indirect = false;4445const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);4647nir_deref_path path;48nir_deref_path_init(&path, deref, NULL);49assert(path.path[0]->deref_type == nir_deref_type_var);50nir_deref_instr **p = &path.path[1];5152/* Vertex index is the outermost array index. */53if (is_arrayed) {54assert((*p)->deref_type == nir_deref_type_array);55*cross_invocation = !src_is_invocation_id(&(*p)->arr.index);56p++;57}5859/* We always lower indirect dereferences for "compact" array vars. */60if (!path.path[0]->var->data.compact) {61/* Non-compact array vars: find out if they are indirect. */62for (; *p; p++) {63if ((*p)->deref_type == nir_deref_type_array) {64*indirect |= !nir_src_is_const((*p)->arr.index);65} else if ((*p)->deref_type == nir_deref_type_struct) {66/* Struct indices are always constant. */67} else {68unreachable("Unsupported deref type");69}70}71}7273nir_deref_path_finish(&path);74}7576static void77set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len,78nir_deref_instr *deref, bool is_output_read)79{80for (int i = 0; i < len; i++) {81assert(var->data.location != -1);8283int idx = var->data.location + offset + i;84bool is_patch_generic = var->data.patch &&85idx != VARYING_SLOT_TESS_LEVEL_INNER &&86idx != VARYING_SLOT_TESS_LEVEL_OUTER &&87idx != VARYING_SLOT_BOUNDING_BOX0 &&88idx != VARYING_SLOT_BOUNDING_BOX1;89uint64_t bitfield;9091if (is_patch_generic) {92assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX);93bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);94}95else {96assert(idx < VARYING_SLOT_MAX);97bitfield = BITFIELD64_BIT(idx);98}99100bool cross_invocation;101bool indirect;102get_deref_info(shader, var, deref, &cross_invocation, &indirect);103104if (var->data.mode == nir_var_shader_in) {105if (is_patch_generic) {106shader->info.patch_inputs_read |= bitfield;107if (indirect)108shader->info.patch_inputs_read_indirectly |= bitfield;109} else {110shader->info.inputs_read |= bitfield;111if (indirect)112shader->info.inputs_read_indirectly |= bitfield;113}114115if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)116shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield;117118if (shader->info.stage == MESA_SHADER_FRAGMENT) {119shader->info.fs.uses_sample_qualifier |= var->data.sample;120}121} else {122assert(var->data.mode == nir_var_shader_out);123if (is_output_read) {124if (is_patch_generic) {125shader->info.patch_outputs_read |= bitfield;126if (indirect)127shader->info.patch_outputs_accessed_indirectly |= bitfield;128} else {129shader->info.outputs_read |= bitfield;130if (indirect)131shader->info.outputs_accessed_indirectly |= bitfield;132}133134if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)135shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield;136} else {137if (is_patch_generic) {138shader->info.patch_outputs_written |= bitfield;139if (indirect)140shader->info.patch_outputs_accessed_indirectly |= bitfield;141} else if (!var->data.read_only) {142shader->info.outputs_written |= bitfield;143if (indirect)144shader->info.outputs_accessed_indirectly |= bitfield;145}146}147148149if (var->data.fb_fetch_output) {150shader->info.outputs_read |= bitfield;151if (shader->info.stage == MESA_SHADER_FRAGMENT)152shader->info.fs.uses_fbfetch_output = true;153}154155if (shader->info.stage == MESA_SHADER_FRAGMENT &&156!is_output_read && var->data.index == 1)157shader->info.fs.color_is_dual_source = true;158}159}160}161162/**163* Mark an entire variable as used. Caller must ensure that the variable164* represents a shader input or output.165*/166static void167mark_whole_variable(nir_shader *shader, nir_variable *var,168nir_deref_instr *deref, bool is_output_read)169{170const struct glsl_type *type = var->type;171172if (nir_is_arrayed_io(var, shader->info.stage)) {173assert(glsl_type_is_array(type));174type = glsl_get_array_element(type);175}176177if (var->data.per_view) {178/* TODO: Per view and Per Vertex are not currently used together. When179* they start to be used (e.g. when adding Primitive Replication for GS180* on Intel), verify that "peeling" the type twice is correct. This181* assert ensures we remember it.182*/183assert(!nir_is_arrayed_io(var, shader->info.stage));184assert(glsl_type_is_array(type));185type = glsl_get_array_element(type);186}187188const unsigned slots =189var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)190: glsl_count_attribute_slots(type, false);191192set_io_mask(shader, var, 0, slots, deref, is_output_read);193}194195static unsigned196get_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed)197{198if (var->data.compact) {199assert(deref->deref_type == nir_deref_type_array);200return nir_src_is_const(deref->arr.index) ?201(nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u :202(unsigned)-1;203}204205unsigned offset = 0;206207for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {208if (d->deref_type == nir_deref_type_array) {209if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var)210break;211212if (!nir_src_is_const(d->arr.index))213return -1;214215offset += glsl_count_attribute_slots(d->type, false) *216nir_src_as_uint(d->arr.index);217} else if (d->deref_type == nir_deref_type_struct) {218const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;219for (unsigned i = 0; i < d->strct.index; i++) {220const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);221offset += glsl_count_attribute_slots(field_type, false);222}223}224}225226return offset;227}228229/**230* Try to mark a portion of the given varying as used. Caller must ensure231* that the variable represents a shader input or output.232*233* If the index can't be interpreted as a constant, or some other problem234* occurs, then nothing will be marked and false will be returned.235*/236static bool237try_mask_partial_io(nir_shader *shader, nir_variable *var,238nir_deref_instr *deref, bool is_output_read)239{240const struct glsl_type *type = var->type;241bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);242243if (is_arrayed) {244assert(glsl_type_is_array(type));245type = glsl_get_array_element(type);246}247248/* Per view variables will be considered as a whole. */249if (var->data.per_view)250return false;251252unsigned offset = get_io_offset(deref, var, is_arrayed);253if (offset == -1)254return false;255256const unsigned slots =257var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)258: glsl_count_attribute_slots(type, false);259260if (offset >= slots) {261/* Constant index outside the bounds of the matrix/array. This could262* arise as a result of constant folding of a legal GLSL program.263*264* Even though the spec says that indexing outside the bounds of a265* matrix/array results in undefined behaviour, we don't want to pass266* out-of-range values to set_io_mask() (since this could result in267* slots that don't exist being marked as used), so just let the caller268* mark the whole variable as used.269*/270return false;271}272273unsigned len = glsl_count_attribute_slots(deref->type, false);274set_io_mask(shader, var, offset, len, deref, is_output_read);275return true;276}277278/** Returns true if the given intrinsic writes external memory279*280* Only returns true for writes to globally visible memory, not scratch and281* not shared.282*/283bool284nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)285{286switch (instr->intrinsic) {287case nir_intrinsic_atomic_counter_inc:288case nir_intrinsic_atomic_counter_inc_deref:289case nir_intrinsic_atomic_counter_add:290case nir_intrinsic_atomic_counter_add_deref:291case nir_intrinsic_atomic_counter_pre_dec:292case nir_intrinsic_atomic_counter_pre_dec_deref:293case nir_intrinsic_atomic_counter_post_dec:294case nir_intrinsic_atomic_counter_post_dec_deref:295case nir_intrinsic_atomic_counter_min:296case nir_intrinsic_atomic_counter_min_deref:297case nir_intrinsic_atomic_counter_max:298case nir_intrinsic_atomic_counter_max_deref:299case nir_intrinsic_atomic_counter_and:300case nir_intrinsic_atomic_counter_and_deref:301case nir_intrinsic_atomic_counter_or:302case nir_intrinsic_atomic_counter_or_deref:303case nir_intrinsic_atomic_counter_xor:304case nir_intrinsic_atomic_counter_xor_deref:305case nir_intrinsic_atomic_counter_exchange:306case nir_intrinsic_atomic_counter_exchange_deref:307case nir_intrinsic_atomic_counter_comp_swap:308case nir_intrinsic_atomic_counter_comp_swap_deref:309case nir_intrinsic_bindless_image_atomic_add:310case nir_intrinsic_bindless_image_atomic_and:311case nir_intrinsic_bindless_image_atomic_comp_swap:312case nir_intrinsic_bindless_image_atomic_dec_wrap:313case nir_intrinsic_bindless_image_atomic_exchange:314case nir_intrinsic_bindless_image_atomic_fadd:315case nir_intrinsic_bindless_image_atomic_imax:316case nir_intrinsic_bindless_image_atomic_imin:317case nir_intrinsic_bindless_image_atomic_inc_wrap:318case nir_intrinsic_bindless_image_atomic_or:319case nir_intrinsic_bindless_image_atomic_umax:320case nir_intrinsic_bindless_image_atomic_umin:321case nir_intrinsic_bindless_image_atomic_xor:322case nir_intrinsic_bindless_image_store:323case nir_intrinsic_bindless_image_store_raw_intel:324case nir_intrinsic_global_atomic_add:325case nir_intrinsic_global_atomic_and:326case nir_intrinsic_global_atomic_comp_swap:327case nir_intrinsic_global_atomic_exchange:328case nir_intrinsic_global_atomic_fadd:329case nir_intrinsic_global_atomic_fcomp_swap:330case nir_intrinsic_global_atomic_fmax:331case nir_intrinsic_global_atomic_fmin:332case nir_intrinsic_global_atomic_imax:333case nir_intrinsic_global_atomic_imin:334case nir_intrinsic_global_atomic_or:335case nir_intrinsic_global_atomic_umax:336case nir_intrinsic_global_atomic_umin:337case nir_intrinsic_global_atomic_xor:338case nir_intrinsic_image_atomic_add:339case nir_intrinsic_image_atomic_and:340case nir_intrinsic_image_atomic_comp_swap:341case nir_intrinsic_image_atomic_dec_wrap:342case nir_intrinsic_image_atomic_exchange:343case nir_intrinsic_image_atomic_fadd:344case nir_intrinsic_image_atomic_imax:345case nir_intrinsic_image_atomic_imin:346case nir_intrinsic_image_atomic_inc_wrap:347case nir_intrinsic_image_atomic_or:348case nir_intrinsic_image_atomic_umax:349case nir_intrinsic_image_atomic_umin:350case nir_intrinsic_image_atomic_xor:351case nir_intrinsic_image_deref_atomic_add:352case nir_intrinsic_image_deref_atomic_and:353case nir_intrinsic_image_deref_atomic_comp_swap:354case nir_intrinsic_image_deref_atomic_dec_wrap:355case nir_intrinsic_image_deref_atomic_exchange:356case nir_intrinsic_image_deref_atomic_fadd:357case nir_intrinsic_image_deref_atomic_imax:358case nir_intrinsic_image_deref_atomic_imin:359case nir_intrinsic_image_deref_atomic_inc_wrap:360case nir_intrinsic_image_deref_atomic_or:361case nir_intrinsic_image_deref_atomic_umax:362case nir_intrinsic_image_deref_atomic_umin:363case nir_intrinsic_image_deref_atomic_xor:364case nir_intrinsic_image_deref_store:365case nir_intrinsic_image_deref_store_raw_intel:366case nir_intrinsic_image_store:367case nir_intrinsic_image_store_raw_intel:368case nir_intrinsic_ssbo_atomic_add:369case nir_intrinsic_ssbo_atomic_add_ir3:370case nir_intrinsic_ssbo_atomic_and:371case nir_intrinsic_ssbo_atomic_and_ir3:372case nir_intrinsic_ssbo_atomic_comp_swap:373case nir_intrinsic_ssbo_atomic_comp_swap_ir3:374case nir_intrinsic_ssbo_atomic_exchange:375case nir_intrinsic_ssbo_atomic_exchange_ir3:376case nir_intrinsic_ssbo_atomic_fadd:377case nir_intrinsic_ssbo_atomic_fcomp_swap:378case nir_intrinsic_ssbo_atomic_fmax:379case nir_intrinsic_ssbo_atomic_fmin:380case nir_intrinsic_ssbo_atomic_imax:381case nir_intrinsic_ssbo_atomic_imax_ir3:382case nir_intrinsic_ssbo_atomic_imin:383case nir_intrinsic_ssbo_atomic_imin_ir3:384case nir_intrinsic_ssbo_atomic_or:385case nir_intrinsic_ssbo_atomic_or_ir3:386case nir_intrinsic_ssbo_atomic_umax:387case nir_intrinsic_ssbo_atomic_umax_ir3:388case nir_intrinsic_ssbo_atomic_umin:389case nir_intrinsic_ssbo_atomic_umin_ir3:390case nir_intrinsic_ssbo_atomic_xor:391case nir_intrinsic_ssbo_atomic_xor_ir3:392case nir_intrinsic_store_global:393case nir_intrinsic_store_global_ir3:394case nir_intrinsic_store_ssbo:395case nir_intrinsic_store_ssbo_ir3:396return true;397398case nir_intrinsic_store_deref:399case nir_intrinsic_deref_atomic_add:400case nir_intrinsic_deref_atomic_imin:401case nir_intrinsic_deref_atomic_umin:402case nir_intrinsic_deref_atomic_imax:403case nir_intrinsic_deref_atomic_umax:404case nir_intrinsic_deref_atomic_and:405case nir_intrinsic_deref_atomic_or:406case nir_intrinsic_deref_atomic_xor:407case nir_intrinsic_deref_atomic_exchange:408case nir_intrinsic_deref_atomic_comp_swap:409case nir_intrinsic_deref_atomic_fadd:410case nir_intrinsic_deref_atomic_fmin:411case nir_intrinsic_deref_atomic_fmax:412case nir_intrinsic_deref_atomic_fcomp_swap:413return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]),414nir_var_mem_ssbo | nir_var_mem_global);415416default:417return false;418}419}420421static void422gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,423void *dead_ctx)424{425uint64_t slot_mask = 0;426uint16_t slot_mask_16bit = 0;427428if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {429nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);430431if (semantics.location >= VARYING_SLOT_PATCH0 &&432semantics.location <= VARYING_SLOT_PATCH31) {433/* Generic per-patch I/O. */434assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&435instr->intrinsic == nir_intrinsic_load_input) ||436(shader->info.stage == MESA_SHADER_TESS_CTRL &&437(instr->intrinsic == nir_intrinsic_load_output ||438instr->intrinsic == nir_intrinsic_store_output)));439440semantics.location -= VARYING_SLOT_PATCH0;441}442443if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&444semantics.location <= VARYING_SLOT_VAR15_16BIT) {445/* Convert num_slots from the units of half vectors to full vectors. */446unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;447slot_mask_16bit =448BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);449} else {450slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);451assert(util_bitcount64(slot_mask) == semantics.num_slots);452}453}454455switch (instr->intrinsic) {456case nir_intrinsic_demote:457case nir_intrinsic_demote_if:458shader->info.fs.uses_demote = true;459FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */460case nir_intrinsic_discard:461case nir_intrinsic_discard_if:462/* Freedreno uses the discard_if intrinsic to end GS invocations that463* don't produce a vertex, so we only set uses_discard if executing on464* a fragment shader. */465if (shader->info.stage == MESA_SHADER_FRAGMENT)466shader->info.fs.uses_discard = true;467break;468469case nir_intrinsic_terminate:470case nir_intrinsic_terminate_if:471assert(shader->info.stage == MESA_SHADER_FRAGMENT);472shader->info.fs.uses_discard = true;473break;474475case nir_intrinsic_interp_deref_at_centroid:476case nir_intrinsic_interp_deref_at_sample:477case nir_intrinsic_interp_deref_at_offset:478case nir_intrinsic_interp_deref_at_vertex:479case nir_intrinsic_load_deref:480case nir_intrinsic_store_deref:{481nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);482if (nir_deref_mode_is_one_of(deref, nir_var_shader_in |483nir_var_shader_out)) {484nir_variable *var = nir_deref_instr_get_variable(deref);485bool is_output_read = false;486if (var->data.mode == nir_var_shader_out &&487instr->intrinsic == nir_intrinsic_load_deref)488is_output_read = true;489490if (!try_mask_partial_io(shader, var, deref, is_output_read))491mark_whole_variable(shader, var, deref, is_output_read);492493/* We need to track which input_reads bits correspond to a494* dvec3/dvec4 input attribute */495if (shader->info.stage == MESA_SHADER_VERTEX &&496var->data.mode == nir_var_shader_in &&497glsl_type_is_dual_slot(glsl_without_array(var->type))) {498for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {499int idx = var->data.location + i;500shader->info.vs.double_inputs |= BITFIELD64_BIT(idx);501}502}503}504if (nir_intrinsic_writes_external_memory(instr))505shader->info.writes_memory = true;506break;507}508509case nir_intrinsic_load_input:510case nir_intrinsic_load_per_vertex_input:511case nir_intrinsic_load_input_vertex:512case nir_intrinsic_load_interpolated_input:513if (shader->info.stage == MESA_SHADER_TESS_EVAL &&514instr->intrinsic == nir_intrinsic_load_input) {515shader->info.patch_inputs_read |= slot_mask;516if (!nir_src_is_const(*nir_get_io_offset_src(instr)))517shader->info.patch_inputs_read_indirectly |= slot_mask;518} else {519shader->info.inputs_read |= slot_mask;520shader->info.inputs_read_16bit |= slot_mask_16bit;521if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {522shader->info.inputs_read_indirectly |= slot_mask;523shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;524}525}526527if (shader->info.stage == MESA_SHADER_TESS_CTRL &&528instr->intrinsic == nir_intrinsic_load_per_vertex_input &&529!src_is_invocation_id(nir_get_io_vertex_index_src(instr)))530shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask;531break;532533case nir_intrinsic_load_output:534case nir_intrinsic_load_per_vertex_output:535if (shader->info.stage == MESA_SHADER_TESS_CTRL &&536instr->intrinsic == nir_intrinsic_load_output) {537shader->info.patch_outputs_read |= slot_mask;538if (!nir_src_is_const(*nir_get_io_offset_src(instr)))539shader->info.patch_outputs_accessed_indirectly |= slot_mask;540} else {541shader->info.outputs_read |= slot_mask;542shader->info.outputs_read_16bit |= slot_mask_16bit;543if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {544shader->info.outputs_accessed_indirectly |= slot_mask;545shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;546}547}548549if (shader->info.stage == MESA_SHADER_TESS_CTRL &&550instr->intrinsic == nir_intrinsic_load_per_vertex_output &&551!src_is_invocation_id(nir_get_io_vertex_index_src(instr)))552shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask;553554if (shader->info.stage == MESA_SHADER_FRAGMENT &&555nir_intrinsic_io_semantics(instr).fb_fetch_output)556shader->info.fs.uses_fbfetch_output = true;557break;558559case nir_intrinsic_store_output:560case nir_intrinsic_store_per_vertex_output:561if (shader->info.stage == MESA_SHADER_TESS_CTRL &&562instr->intrinsic == nir_intrinsic_store_output) {563shader->info.patch_outputs_written |= slot_mask;564if (!nir_src_is_const(*nir_get_io_offset_src(instr)))565shader->info.patch_outputs_accessed_indirectly |= slot_mask;566} else {567shader->info.outputs_written |= slot_mask;568shader->info.outputs_written_16bit |= slot_mask_16bit;569if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {570shader->info.outputs_accessed_indirectly |= slot_mask;571shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;572}573}574575if (shader->info.stage == MESA_SHADER_FRAGMENT &&576nir_intrinsic_io_semantics(instr).dual_source_blend_index)577shader->info.fs.color_is_dual_source = true;578break;579580case nir_intrinsic_load_color0:581case nir_intrinsic_load_color1:582shader->info.inputs_read |=583BITFIELD64_BIT(VARYING_SLOT_COL0 <<584(instr->intrinsic == nir_intrinsic_load_color1));585FALLTHROUGH;586case nir_intrinsic_load_subgroup_size:587case nir_intrinsic_load_subgroup_invocation:588case nir_intrinsic_load_subgroup_eq_mask:589case nir_intrinsic_load_subgroup_ge_mask:590case nir_intrinsic_load_subgroup_gt_mask:591case nir_intrinsic_load_subgroup_le_mask:592case nir_intrinsic_load_subgroup_lt_mask:593case nir_intrinsic_load_num_subgroups:594case nir_intrinsic_load_subgroup_id:595case nir_intrinsic_load_vertex_id:596case nir_intrinsic_load_instance_id:597case nir_intrinsic_load_vertex_id_zero_base:598case nir_intrinsic_load_base_vertex:599case nir_intrinsic_load_first_vertex:600case nir_intrinsic_load_is_indexed_draw:601case nir_intrinsic_load_base_instance:602case nir_intrinsic_load_draw_id:603case nir_intrinsic_load_invocation_id:604case nir_intrinsic_load_frag_coord:605case nir_intrinsic_load_frag_shading_rate:606case nir_intrinsic_load_point_coord:607case nir_intrinsic_load_line_coord:608case nir_intrinsic_load_front_face:609case nir_intrinsic_load_sample_id:610case nir_intrinsic_load_sample_pos:611case nir_intrinsic_load_sample_mask_in:612case nir_intrinsic_load_helper_invocation:613case nir_intrinsic_load_tess_coord:614case nir_intrinsic_load_patch_vertices_in:615case nir_intrinsic_load_primitive_id:616case nir_intrinsic_load_tess_level_outer:617case nir_intrinsic_load_tess_level_inner:618case nir_intrinsic_load_tess_level_outer_default:619case nir_intrinsic_load_tess_level_inner_default:620case nir_intrinsic_load_local_invocation_id:621case nir_intrinsic_load_local_invocation_index:622case nir_intrinsic_load_global_invocation_id:623case nir_intrinsic_load_base_global_invocation_id:624case nir_intrinsic_load_global_invocation_index:625case nir_intrinsic_load_workgroup_id:626case nir_intrinsic_load_num_workgroups:627case nir_intrinsic_load_workgroup_size:628case nir_intrinsic_load_work_dim:629case nir_intrinsic_load_user_data_amd:630case nir_intrinsic_load_view_index:631case nir_intrinsic_load_barycentric_model:632case nir_intrinsic_load_gs_header_ir3:633case nir_intrinsic_load_tcs_header_ir3:634BITSET_SET(shader->info.system_values_read,635nir_system_value_from_intrinsic(instr->intrinsic));636break;637638case nir_intrinsic_load_barycentric_pixel:639if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||640nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {641BITSET_SET(shader->info.system_values_read,642SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);643} else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {644BITSET_SET(shader->info.system_values_read,645SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);646}647break;648649case nir_intrinsic_load_barycentric_centroid:650if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||651nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {652BITSET_SET(shader->info.system_values_read,653SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);654} else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {655BITSET_SET(shader->info.system_values_read,656SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);657}658break;659660case nir_intrinsic_load_barycentric_sample:661if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||662nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {663BITSET_SET(shader->info.system_values_read,664SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);665} else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {666BITSET_SET(shader->info.system_values_read,667SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);668}669if (shader->info.stage == MESA_SHADER_FRAGMENT)670shader->info.fs.uses_sample_qualifier = true;671break;672673case nir_intrinsic_quad_broadcast:674case nir_intrinsic_quad_swap_horizontal:675case nir_intrinsic_quad_swap_vertical:676case nir_intrinsic_quad_swap_diagonal:677case nir_intrinsic_quad_swizzle_amd:678if (shader->info.stage == MESA_SHADER_FRAGMENT)679shader->info.fs.needs_quad_helper_invocations = true;680break;681682case nir_intrinsic_vote_any:683case nir_intrinsic_vote_all:684case nir_intrinsic_vote_feq:685case nir_intrinsic_vote_ieq:686case nir_intrinsic_ballot:687case nir_intrinsic_ballot_bit_count_exclusive:688case nir_intrinsic_ballot_bit_count_inclusive:689case nir_intrinsic_ballot_bitfield_extract:690case nir_intrinsic_ballot_bit_count_reduce:691case nir_intrinsic_ballot_find_lsb:692case nir_intrinsic_ballot_find_msb:693case nir_intrinsic_first_invocation:694case nir_intrinsic_read_invocation:695case nir_intrinsic_read_first_invocation:696case nir_intrinsic_elect:697case nir_intrinsic_reduce:698case nir_intrinsic_inclusive_scan:699case nir_intrinsic_exclusive_scan:700case nir_intrinsic_shuffle:701case nir_intrinsic_shuffle_xor:702case nir_intrinsic_shuffle_up:703case nir_intrinsic_shuffle_down:704case nir_intrinsic_write_invocation_amd:705if (shader->info.stage == MESA_SHADER_FRAGMENT)706shader->info.fs.needs_all_helper_invocations = true;707if (shader->info.stage == MESA_SHADER_COMPUTE)708shader->info.cs.uses_wide_subgroup_intrinsics = true;709break;710711case nir_intrinsic_end_primitive:712case nir_intrinsic_end_primitive_with_counter:713assert(shader->info.stage == MESA_SHADER_GEOMETRY);714shader->info.gs.uses_end_primitive = 1;715FALLTHROUGH;716717case nir_intrinsic_emit_vertex:718case nir_intrinsic_emit_vertex_with_counter:719shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr);720721break;722723case nir_intrinsic_control_barrier:724shader->info.uses_control_barrier = true;725break;726727case nir_intrinsic_scoped_barrier:728shader->info.uses_control_barrier |=729nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE;730731shader->info.uses_memory_barrier |=732nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE;733break;734735case nir_intrinsic_memory_barrier:736case nir_intrinsic_group_memory_barrier:737case nir_intrinsic_memory_barrier_atomic_counter:738case nir_intrinsic_memory_barrier_buffer:739case nir_intrinsic_memory_barrier_image:740case nir_intrinsic_memory_barrier_shared:741case nir_intrinsic_memory_barrier_tcs_patch:742shader->info.uses_memory_barrier = true;743break;744745default:746if (nir_intrinsic_writes_external_memory(instr))747shader->info.writes_memory = true;748break;749}750}751752static void753gather_tex_info(nir_tex_instr *instr, nir_shader *shader)754{755if (shader->info.stage == MESA_SHADER_FRAGMENT &&756nir_tex_instr_has_implicit_derivative(instr))757shader->info.fs.needs_quad_helper_invocations = true;758759switch (instr->op) {760case nir_texop_tg4:761shader->info.uses_texture_gather = true;762break;763default:764break;765}766}767768static void769gather_alu_info(nir_alu_instr *instr, nir_shader *shader)770{771switch (instr->op) {772case nir_op_fddx:773case nir_op_fddy:774shader->info.uses_fddx_fddy = true;775FALLTHROUGH;776case nir_op_fddx_fine:777case nir_op_fddy_fine:778case nir_op_fddx_coarse:779case nir_op_fddy_coarse:780if (shader->info.stage == MESA_SHADER_FRAGMENT)781shader->info.fs.needs_quad_helper_invocations = true;782break;783default:784break;785}786787const nir_op_info *info = &nir_op_infos[instr->op];788789for (unsigned i = 0; i < info->num_inputs; i++) {790if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)791shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src);792else793shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src);794}795if (nir_alu_type_get_base_type(info->output_type) == nir_type_float)796shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest);797else798shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest);799}800801static void802gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx)803{804nir_foreach_instr(instr, block) {805switch (instr->type) {806case nir_instr_type_alu:807gather_alu_info(nir_instr_as_alu(instr), shader);808break;809case nir_instr_type_intrinsic:810gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx);811break;812case nir_instr_type_tex:813gather_tex_info(nir_instr_as_tex(instr), shader);814break;815case nir_instr_type_call:816assert(!"nir_shader_gather_info only works if functions are inlined");817break;818default:819break;820}821}822}823824void825nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)826{827shader->info.num_textures = 0;828shader->info.num_images = 0;829shader->info.image_buffers = 0;830shader->info.msaa_images = 0;831shader->info.bit_sizes_float = 0;832shader->info.bit_sizes_int = 0;833834nir_foreach_uniform_variable(var, shader) {835/* Bindless textures and images don't use non-bindless slots.836* Interface blocks imply inputs, outputs, UBO, or SSBO, which can only837* mean bindless.838*/839if (var->data.bindless || var->interface_type)840continue;841842shader->info.num_textures += glsl_type_get_sampler_count(var->type);843844unsigned num_image_slots = glsl_type_get_image_count(var->type);845if (num_image_slots) {846const struct glsl_type *image_type = glsl_without_array(var->type);847848if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_BUF) {849shader->info.image_buffers |=850BITFIELD_RANGE(shader->info.num_images, num_image_slots);851}852if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {853shader->info.msaa_images |=854BITFIELD_RANGE(shader->info.num_images, num_image_slots);855}856shader->info.num_images += num_image_slots;857}858}859860shader->info.inputs_read = 0;861shader->info.outputs_written = 0;862shader->info.outputs_read = 0;863shader->info.inputs_read_16bit = 0;864shader->info.outputs_written_16bit = 0;865shader->info.outputs_read_16bit = 0;866shader->info.inputs_read_indirectly_16bit = 0;867shader->info.outputs_accessed_indirectly_16bit = 0;868shader->info.patch_outputs_read = 0;869shader->info.patch_inputs_read = 0;870shader->info.patch_outputs_written = 0;871BITSET_ZERO(shader->info.system_values_read);872shader->info.inputs_read_indirectly = 0;873shader->info.outputs_accessed_indirectly = 0;874shader->info.patch_inputs_read_indirectly = 0;875shader->info.patch_outputs_accessed_indirectly = 0;876877if (shader->info.stage == MESA_SHADER_VERTEX) {878shader->info.vs.double_inputs = 0;879}880if (shader->info.stage == MESA_SHADER_FRAGMENT) {881shader->info.fs.uses_sample_qualifier = false;882shader->info.fs.uses_discard = false;883shader->info.fs.uses_demote = false;884shader->info.fs.color_is_dual_source = false;885shader->info.fs.uses_fbfetch_output = false;886shader->info.fs.needs_quad_helper_invocations = false;887shader->info.fs.needs_all_helper_invocations = false;888}889if (shader->info.stage == MESA_SHADER_TESS_CTRL) {890shader->info.tess.tcs_cross_invocation_inputs_read = 0;891shader->info.tess.tcs_cross_invocation_outputs_read = 0;892}893894shader->info.writes_memory = shader->info.has_transform_feedback_varyings;895896void *dead_ctx = ralloc_context(NULL);897nir_foreach_block(block, entrypoint) {898gather_info_block(block, shader, dead_ctx);899}900ralloc_free(dead_ctx);901902if (shader->info.stage == MESA_SHADER_FRAGMENT &&903(shader->info.fs.uses_sample_qualifier ||904(BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||905BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)))) {906/* This shouldn't be cleared because if optimizations remove all907* sample-qualified inputs and that pass is run again, the sample908* shading must stay enabled.909*/910shader->info.fs.uses_sample_shading = true;911}912}913914915