Path: blob/21.2-virgl/src/compiler/nir/nir_deref.c
4545 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir.h"24#include "nir_builder.h"25#include "nir_deref.h"26#include "util/hash_table.h"2728static bool29is_trivial_deref_cast(nir_deref_instr *cast)30{31nir_deref_instr *parent = nir_src_as_deref(cast->parent);32if (!parent)33return false;3435return cast->modes == parent->modes &&36cast->type == parent->type &&37cast->dest.ssa.num_components == parent->dest.ssa.num_components &&38cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;39}4041void42nir_deref_path_init(nir_deref_path *path,43nir_deref_instr *deref, void *mem_ctx)44{45assert(deref != NULL);4647/* The length of the short path is at most ARRAY_SIZE - 1 because we need48* room for the NULL terminator.49*/50static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;5152int count = 0;5354nir_deref_instr **tail = &path->_short_path[max_short_path_len];55nir_deref_instr **head = tail;5657*tail = NULL;58for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {59if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))60continue;61count++;62if (count <= max_short_path_len)63*(--head) = d;64}6566if (count <= max_short_path_len) {67/* If we're under max_short_path_len, just use the short path. */68path->path = head;69goto done;70}7172#ifndef NDEBUG73/* Just in case someone uses short_path by accident */74for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)75path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;76#endif7778path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);79head = tail = path->path + count;80*tail = NULL;81for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {82if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))83continue;84*(--head) = d;85}8687done:88assert(head == path->path);89assert(tail == head + count);90assert(*tail == NULL);91}9293void94nir_deref_path_finish(nir_deref_path *path)95{96if (path->path < &path->_short_path[0] ||97path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])98ralloc_free(path->path);99}100101/**102* Recursively removes unused deref instructions103*/104bool105nir_deref_instr_remove_if_unused(nir_deref_instr *instr)106{107bool progress = false;108109for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {110/* If anyone is using this deref, leave it alone */111assert(d->dest.is_ssa);112if (!nir_ssa_def_is_unused(&d->dest.ssa))113break;114115nir_instr_remove(&d->instr);116progress = true;117}118119return progress;120}121122bool123nir_deref_instr_has_indirect(nir_deref_instr *instr)124{125while (instr->deref_type != nir_deref_type_var) {126/* Consider casts to be indirects */127if (instr->deref_type == nir_deref_type_cast)128return true;129130if ((instr->deref_type == nir_deref_type_array ||131instr->deref_type == nir_deref_type_ptr_as_array) &&132!nir_src_is_const(instr->arr.index))133return true;134135instr = nir_deref_instr_parent(instr);136}137138return false;139}140141bool142nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)143{144for (; instr; instr = nir_deref_instr_parent(instr)) {145if (instr->deref_type == nir_deref_type_array &&146nir_src_is_const(instr->arr.index) &&147nir_src_as_uint(instr->arr.index) >=148glsl_get_length(nir_deref_instr_parent(instr)->type))149return true;150}151152return false;153}154155bool156nir_deref_instr_has_complex_use(nir_deref_instr *deref)157{158nir_foreach_use(use_src, &deref->dest.ssa) {159nir_instr *use_instr = use_src->parent_instr;160161switch (use_instr->type) {162case nir_instr_type_deref: {163nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);164165/* A var deref has no sources */166assert(use_deref->deref_type != nir_deref_type_var);167168/* If a deref shows up in an array index or something like that, it's169* a complex use.170*/171if (use_src != &use_deref->parent)172return true;173174/* Anything that isn't a basic struct or array deref is considered to175* be a "complex" use. In particular, we don't allow ptr_as_array176* because we assume that opt_deref will turn any non-complex177* ptr_as_array derefs into regular array derefs eventually so passes178* which only want to handle simple derefs will pick them up in a179* later pass.180*/181if (use_deref->deref_type != nir_deref_type_struct &&182use_deref->deref_type != nir_deref_type_array_wildcard &&183use_deref->deref_type != nir_deref_type_array)184return true;185186if (nir_deref_instr_has_complex_use(use_deref))187return true;188189continue;190}191192case nir_instr_type_intrinsic: {193nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);194switch (use_intrin->intrinsic) {195case nir_intrinsic_load_deref:196assert(use_src == &use_intrin->src[0]);197continue;198199case nir_intrinsic_copy_deref:200assert(use_src == &use_intrin->src[0] ||201use_src == &use_intrin->src[1]);202continue;203204case nir_intrinsic_store_deref:205/* A use in src[1] of a store means we're taking that pointer and206* writing it to a variable. Because we have no idea who will207* read that variable and what they will do with the pointer, it's208* considered a "complex" use. A use in src[0], on the other209* hand, is a simple use because we're just going to dereference210* it and write a value there.211*/212if (use_src == &use_intrin->src[0])213continue;214return true;215216default:217return true;218}219unreachable("Switch default failed");220}221222default:223return true;224}225}226227nir_foreach_if_use(use, &deref->dest.ssa)228return true;229230return false;231}232233static unsigned234type_scalar_size_bytes(const struct glsl_type *type)235{236assert(glsl_type_is_vector_or_scalar(type) ||237glsl_type_is_matrix(type));238return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;239}240241unsigned242nir_deref_instr_array_stride(nir_deref_instr *deref)243{244switch (deref->deref_type) {245case nir_deref_type_array:246case nir_deref_type_array_wildcard: {247const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;248unsigned stride = glsl_get_explicit_stride(arr_type);249250if ((glsl_type_is_matrix(arr_type) &&251glsl_matrix_type_is_row_major(arr_type)) ||252(glsl_type_is_vector(arr_type) && stride == 0))253stride = type_scalar_size_bytes(arr_type);254255return stride;256}257case nir_deref_type_ptr_as_array:258return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));259case nir_deref_type_cast:260return deref->cast.ptr_stride;261default:262return 0;263}264}265266static unsigned267type_get_array_stride(const struct glsl_type *elem_type,268glsl_type_size_align_func size_align)269{270unsigned elem_size, elem_align;271size_align(elem_type, &elem_size, &elem_align);272return ALIGN_POT(elem_size, elem_align);273}274275static unsigned276struct_type_get_field_offset(const struct glsl_type *struct_type,277glsl_type_size_align_func size_align,278unsigned field_idx)279{280assert(glsl_type_is_struct_or_ifc(struct_type));281unsigned offset = 0;282for (unsigned i = 0; i <= field_idx; i++) {283unsigned elem_size, elem_align;284size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);285offset = ALIGN_POT(offset, elem_align);286if (i < field_idx)287offset += elem_size;288}289return offset;290}291292unsigned293nir_deref_instr_get_const_offset(nir_deref_instr *deref,294glsl_type_size_align_func size_align)295{296nir_deref_path path;297nir_deref_path_init(&path, deref, NULL);298299unsigned offset = 0;300for (nir_deref_instr **p = &path.path[1]; *p; p++) {301switch ((*p)->deref_type) {302case nir_deref_type_array:303offset += nir_src_as_uint((*p)->arr.index) *304type_get_array_stride((*p)->type, size_align);305break;306case nir_deref_type_struct: {307/* p starts at path[1], so this is safe */308nir_deref_instr *parent = *(p - 1);309offset += struct_type_get_field_offset(parent->type, size_align,310(*p)->strct.index);311break;312}313case nir_deref_type_cast:314/* A cast doesn't contribute to the offset */315break;316default:317unreachable("Unsupported deref type");318}319}320321nir_deref_path_finish(&path);322323return offset;324}325326nir_ssa_def *327nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,328glsl_type_size_align_func size_align)329{330nir_deref_path path;331nir_deref_path_init(&path, deref, NULL);332333nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);334for (nir_deref_instr **p = &path.path[1]; *p; p++) {335switch ((*p)->deref_type) {336case nir_deref_type_array:337case nir_deref_type_ptr_as_array: {338nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);339int stride = type_get_array_stride((*p)->type, size_align);340offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));341break;342}343case nir_deref_type_struct: {344/* p starts at path[1], so this is safe */345nir_deref_instr *parent = *(p - 1);346unsigned field_offset =347struct_type_get_field_offset(parent->type, size_align,348(*p)->strct.index);349offset = nir_iadd_imm(b, offset, field_offset);350break;351}352case nir_deref_type_cast:353/* A cast doesn't contribute to the offset */354break;355default:356unreachable("Unsupported deref type");357}358}359360nir_deref_path_finish(&path);361362return offset;363}364365bool366nir_remove_dead_derefs_impl(nir_function_impl *impl)367{368bool progress = false;369370nir_foreach_block(block, impl) {371nir_foreach_instr_safe(instr, block) {372if (instr->type == nir_instr_type_deref &&373nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))374progress = true;375}376}377378if (progress)379nir_metadata_preserve(impl, nir_metadata_block_index |380nir_metadata_dominance);381382return progress;383}384385bool386nir_remove_dead_derefs(nir_shader *shader)387{388bool progress = false;389nir_foreach_function(function, shader) {390if (function->impl && nir_remove_dead_derefs_impl(function->impl))391progress = true;392}393394return progress;395}396397void398nir_fixup_deref_modes(nir_shader *shader)399{400nir_foreach_function(function, shader) {401if (!function->impl)402continue;403404nir_foreach_block(block, function->impl) {405nir_foreach_instr(instr, block) {406if (instr->type != nir_instr_type_deref)407continue;408409nir_deref_instr *deref = nir_instr_as_deref(instr);410if (deref->deref_type == nir_deref_type_cast)411continue;412413nir_variable_mode parent_modes;414if (deref->deref_type == nir_deref_type_var) {415parent_modes = deref->var->data.mode;416} else {417assert(deref->parent.is_ssa);418nir_deref_instr *parent =419nir_instr_as_deref(deref->parent.ssa->parent_instr);420parent_modes = parent->modes;421}422423deref->modes = parent_modes;424}425}426}427}428429static bool430modes_may_alias(nir_variable_mode a, nir_variable_mode b)431{432/* Generic pointers can alias with SSBOs */433if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&434(b & (nir_var_mem_ssbo | nir_var_mem_global)))435return true;436437/* Pointers can only alias if they share a mode. */438return a & b;439}440441static bool442deref_path_contains_coherent_decoration(nir_deref_path *path)443{444assert(path->path[0]->deref_type == nir_deref_type_var);445446if (path->path[0]->var->data.access & ACCESS_COHERENT)447return true;448449for (nir_deref_instr **p = &path->path[1]; *p; p++) {450if ((*p)->deref_type != nir_deref_type_struct)451continue;452453const struct glsl_type *struct_type = (*(p - 1))->type;454const struct glsl_struct_field *field =455glsl_get_struct_field_data(struct_type, (*p)->strct.index);456if (field->memory_coherent)457return true;458}459460return false;461}462463nir_deref_compare_result464nir_compare_deref_paths(nir_deref_path *a_path,465nir_deref_path *b_path)466{467if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))468return nir_derefs_do_not_alias;469470if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)471return nir_derefs_may_alias_bit;472473if (a_path->path[0]->deref_type == nir_deref_type_var) {474if (a_path->path[0]->var != b_path->path[0]->var) {475/* Shader and function temporaries aren't backed by memory so two476* distinct variables never alias.477*/478static const nir_variable_mode temp_var_modes =479nir_var_shader_temp | nir_var_function_temp;480if (!(a_path->path[0]->modes & ~temp_var_modes) ||481!(b_path->path[0]->modes & ~temp_var_modes))482return nir_derefs_do_not_alias;483484/* If they are both declared coherent or have coherent somewhere in485* their path (due to a member of an interface being declared486* coherent), we have to assume we that we could have any kind of487* aliasing. Otherwise, they could still alias but the client didn't488* tell us and that's their fault.489*/490if (deref_path_contains_coherent_decoration(a_path) &&491deref_path_contains_coherent_decoration(b_path))492return nir_derefs_may_alias_bit;493494/* Per SPV_KHR_workgroup_memory_explicit_layout and GL_EXT_shared_memory_block,495* shared blocks alias each other.496*/497if (a_path->path[0]->modes & nir_var_mem_shared &&498b_path->path[0]->modes & nir_var_mem_shared &&499(glsl_type_is_interface(a_path->path[0]->var->type) ||500glsl_type_is_interface(b_path->path[0]->var->type))) {501assert(glsl_type_is_interface(a_path->path[0]->var->type) &&502glsl_type_is_interface(b_path->path[0]->var->type));503return nir_derefs_may_alias_bit;504}505506/* If we can chase the deref all the way back to the variable and507* they're not the same variable and at least one is not declared508* coherent, we know they can't possibly alias.509*/510return nir_derefs_do_not_alias;511}512} else {513assert(a_path->path[0]->deref_type == nir_deref_type_cast);514/* If they're not exactly the same cast, it's hard to compare them so we515* just assume they alias. Comparing casts is tricky as there are lots516* of things such as mode, type, etc. to make sure work out; for now, we517* just assume nit_opt_deref will combine them and compare the deref518* instructions.519*520* TODO: At some point in the future, we could be clever and understand521* that a float[] and int[] have the same layout and aliasing structure522* but double[] and vec3[] do not and we could potentially be a bit523* smarter here.524*/525if (a_path->path[0] != b_path->path[0])526return nir_derefs_may_alias_bit;527}528529/* Start off assuming they fully compare. We ignore equality for now. In530* the end, we'll determine that by containment.531*/532nir_deref_compare_result result = nir_derefs_may_alias_bit |533nir_derefs_a_contains_b_bit |534nir_derefs_b_contains_a_bit;535536nir_deref_instr **a_p = &a_path->path[1];537nir_deref_instr **b_p = &b_path->path[1];538while (*a_p != NULL && *a_p == *b_p) {539a_p++;540b_p++;541}542543/* We're at either the tail or the divergence point between the two deref544* paths. Look to see if either contains cast or a ptr_as_array deref. If545* it does we don't know how to safely make any inferences. Hopefully,546* nir_opt_deref will clean most of these up and we can start inferring547* things again.548*549* In theory, we could do a bit better. For instance, we could detect the550* case where we have exactly one ptr_as_array deref in the chain after the551* divergence point and it's matched in both chains and the two chains have552* different constant indices.553*/554for (nir_deref_instr **t_p = a_p; *t_p; t_p++) {555if ((*t_p)->deref_type == nir_deref_type_cast ||556(*t_p)->deref_type == nir_deref_type_ptr_as_array)557return nir_derefs_may_alias_bit;558}559for (nir_deref_instr **t_p = b_p; *t_p; t_p++) {560if ((*t_p)->deref_type == nir_deref_type_cast ||561(*t_p)->deref_type == nir_deref_type_ptr_as_array)562return nir_derefs_may_alias_bit;563}564565while (*a_p != NULL && *b_p != NULL) {566nir_deref_instr *a_tail = *(a_p++);567nir_deref_instr *b_tail = *(b_p++);568569switch (a_tail->deref_type) {570case nir_deref_type_array:571case nir_deref_type_array_wildcard: {572assert(b_tail->deref_type == nir_deref_type_array ||573b_tail->deref_type == nir_deref_type_array_wildcard);574575if (a_tail->deref_type == nir_deref_type_array_wildcard) {576if (b_tail->deref_type != nir_deref_type_array_wildcard)577result &= ~nir_derefs_b_contains_a_bit;578} else if (b_tail->deref_type == nir_deref_type_array_wildcard) {579if (a_tail->deref_type != nir_deref_type_array_wildcard)580result &= ~nir_derefs_a_contains_b_bit;581} else {582assert(a_tail->deref_type == nir_deref_type_array &&583b_tail->deref_type == nir_deref_type_array);584assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa);585586if (nir_src_is_const(a_tail->arr.index) &&587nir_src_is_const(b_tail->arr.index)) {588/* If they're both direct and have different offsets, they589* don't even alias much less anything else.590*/591if (nir_src_as_uint(a_tail->arr.index) !=592nir_src_as_uint(b_tail->arr.index))593return nir_derefs_do_not_alias;594} else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {595/* They're the same indirect, continue on */596} else {597/* They're not the same index so we can't prove anything about598* containment.599*/600result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);601}602}603break;604}605606case nir_deref_type_struct: {607/* If they're different struct members, they don't even alias */608if (a_tail->strct.index != b_tail->strct.index)609return nir_derefs_do_not_alias;610break;611}612613default:614unreachable("Invalid deref type");615}616}617618/* If a is longer than b, then it can't contain b */619if (*a_p != NULL)620result &= ~nir_derefs_a_contains_b_bit;621if (*b_p != NULL)622result &= ~nir_derefs_b_contains_a_bit;623624/* If a contains b and b contains a they must be equal. */625if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit))626result |= nir_derefs_equal_bit;627628return result;629}630631nir_deref_compare_result632nir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)633{634if (a == b) {635return nir_derefs_equal_bit | nir_derefs_may_alias_bit |636nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;637}638639nir_deref_path a_path, b_path;640nir_deref_path_init(&a_path, a, NULL);641nir_deref_path_init(&b_path, b, NULL);642assert(a_path.path[0]->deref_type == nir_deref_type_var ||643a_path.path[0]->deref_type == nir_deref_type_cast);644assert(b_path.path[0]->deref_type == nir_deref_type_var ||645b_path.path[0]->deref_type == nir_deref_type_cast);646647nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);648649nir_deref_path_finish(&a_path);650nir_deref_path_finish(&b_path);651652return result;653}654655nir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)656{657if (!deref->_path) {658deref->_path = ralloc(mem_ctx, nir_deref_path);659nir_deref_path_init(deref->_path, deref->instr, mem_ctx);660}661return deref->_path;662}663664nir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,665nir_deref_and_path *a,666nir_deref_and_path *b)667{668if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */669return nir_compare_derefs(a->instr, b->instr);670671return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),672nir_get_deref_path(mem_ctx, b));673}674675struct rematerialize_deref_state {676bool progress;677nir_builder builder;678nir_block *block;679struct hash_table *cache;680};681682static nir_deref_instr *683rematerialize_deref_in_block(nir_deref_instr *deref,684struct rematerialize_deref_state *state)685{686if (deref->instr.block == state->block)687return deref;688689if (!state->cache) {690state->cache = _mesa_pointer_hash_table_create(NULL);691}692693struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);694if (cached)695return cached->data;696697nir_builder *b = &state->builder;698nir_deref_instr *new_deref =699nir_deref_instr_create(b->shader, deref->deref_type);700new_deref->modes = deref->modes;701new_deref->type = deref->type;702703if (deref->deref_type == nir_deref_type_var) {704new_deref->var = deref->var;705} else {706nir_deref_instr *parent = nir_src_as_deref(deref->parent);707if (parent) {708parent = rematerialize_deref_in_block(parent, state);709new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);710} else {711nir_src_copy(&new_deref->parent, &deref->parent, new_deref);712}713}714715switch (deref->deref_type) {716case nir_deref_type_var:717case nir_deref_type_array_wildcard:718/* Nothing more to do */719break;720721case nir_deref_type_cast:722new_deref->cast.ptr_stride = deref->cast.ptr_stride;723break;724725case nir_deref_type_array:726case nir_deref_type_ptr_as_array:727assert(!nir_src_as_deref(deref->arr.index));728nir_src_copy(&new_deref->arr.index, &deref->arr.index, new_deref);729break;730731case nir_deref_type_struct:732new_deref->strct.index = deref->strct.index;733break;734735default:736unreachable("Invalid deref instruction type");737}738739nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,740deref->dest.ssa.num_components,741deref->dest.ssa.bit_size,742NULL);743nir_builder_instr_insert(b, &new_deref->instr);744745return new_deref;746}747748static bool749rematerialize_deref_src(nir_src *src, void *_state)750{751struct rematerialize_deref_state *state = _state;752753nir_deref_instr *deref = nir_src_as_deref(*src);754if (!deref)755return true;756757nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);758if (block_deref != deref) {759nir_instr_rewrite_src(src->parent_instr, src,760nir_src_for_ssa(&block_deref->dest.ssa));761nir_deref_instr_remove_if_unused(deref);762state->progress = true;763}764765return true;766}767768/** Re-materialize derefs in every block769*770* This pass re-materializes deref instructions in every block in which it is771* used. After this pass has been run, every use of a deref will be of a772* deref in the same block as the use. Also, all unused derefs will be773* deleted as a side-effect.774*775* Derefs used as sources of phi instructions are not rematerialized.776*/777bool778nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)779{780struct rematerialize_deref_state state = { 0 };781nir_builder_init(&state.builder, impl);782783nir_foreach_block_unstructured(block, impl) {784state.block = block;785786/* Start each block with a fresh cache */787if (state.cache)788_mesa_hash_table_clear(state.cache, NULL);789790nir_foreach_instr_safe(instr, block) {791if (instr->type == nir_instr_type_deref &&792nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))793continue;794795/* If a deref is used in a phi, we can't rematerialize it, as the new796* derefs would appear before the phi, which is not valid.797*/798if (instr->type == nir_instr_type_phi)799continue;800801state.builder.cursor = nir_before_instr(instr);802nir_foreach_src(instr, rematerialize_deref_src, &state);803}804805#ifndef NDEBUG806nir_if *following_if = nir_block_get_following_if(block);807if (following_if)808assert(!nir_src_as_deref(following_if->condition));809#endif810}811812_mesa_hash_table_destroy(state.cache, NULL);813814return state.progress;815}816817static void818nir_deref_instr_fixup_child_types(nir_deref_instr *parent)819{820nir_foreach_use(use, &parent->dest.ssa) {821if (use->parent_instr->type != nir_instr_type_deref)822continue;823824nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);825switch (child->deref_type) {826case nir_deref_type_var:827unreachable("nir_deref_type_var cannot be a child");828829case nir_deref_type_array:830case nir_deref_type_array_wildcard:831child->type = glsl_get_array_element(parent->type);832break;833834case nir_deref_type_ptr_as_array:835child->type = parent->type;836break;837838case nir_deref_type_struct:839child->type = glsl_get_struct_field(parent->type,840child->strct.index);841break;842843case nir_deref_type_cast:844/* We stop the recursion here */845continue;846}847848/* Recurse into children */849nir_deref_instr_fixup_child_types(child);850}851}852853static bool854is_trivial_array_deref_cast(nir_deref_instr *cast)855{856assert(is_trivial_deref_cast(cast));857858nir_deref_instr *parent = nir_src_as_deref(cast->parent);859860if (parent->deref_type == nir_deref_type_array) {861return cast->cast.ptr_stride ==862glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);863} else if (parent->deref_type == nir_deref_type_ptr_as_array) {864return cast->cast.ptr_stride ==865nir_deref_instr_array_stride(parent);866} else {867return false;868}869}870871static bool872is_deref_ptr_as_array(nir_instr *instr)873{874return instr->type == nir_instr_type_deref &&875nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;876}877878static bool879opt_remove_restricting_cast_alignments(nir_deref_instr *cast)880{881assert(cast->deref_type == nir_deref_type_cast);882if (cast->cast.align_mul == 0)883return false;884885nir_deref_instr *parent = nir_src_as_deref(cast->parent);886if (parent == NULL)887return false;888889/* Don't use any default alignment for this check. We don't want to fall890* back to type alignment too early in case we find out later that we're891* somehow a child of a packed struct.892*/893uint32_t parent_mul, parent_offset;894if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,895&parent_mul, &parent_offset))896return false;897898/* If this cast increases the alignment, we want to keep it.899*900* There is a possibility that the larger alignment provided by this cast901* somehow disagrees with the smaller alignment further up the deref chain.902* In that case, we choose to favor the alignment closer to the actual903* memory operation which, in this case, is the cast and not its parent so904* keeping the cast alignment is the right thing to do.905*/906if (parent_mul < cast->cast.align_mul)907return false;908909/* If we've gotten here, we have a parent deref with an align_mul at least910* as large as ours so we can potentially throw away the alignment911* information on this deref. There are two cases to consider here:912*913* 1. We can chase the deref all the way back to the variable. In this914* case, we have "perfect" knowledge, modulo indirect array derefs.915* Unless we've done something wrong in our indirect/wildcard stride916* calculations, our knowledge from the deref walk is better than the917* client's.918*919* 2. We can't chase it all the way back to the variable. In this case,920* because our call to nir_get_explicit_deref_align(parent, ...) above921* above passes default_to_type_align=false, the only way we can even922* get here is if something further up the deref chain has a cast with923* an alignment which can only happen if we get an alignment from the924* client (most likely a decoration in the SPIR-V). If the client has925* provided us with two conflicting alignments in the deref chain,926* that's their fault and we can do whatever we want.927*928* In either case, we should be without our rights, at this point, to throw929* away the alignment information on this deref. However, to be "nice" to930* weird clients, we do one more check. It really shouldn't happen but931* it's possible that the parent's alignment offset disagrees with the932* cast's alignment offset. In this case, we consider the cast as933* providing more information (or at least more valid information) and keep934* it even if the align_mul from the parent is larger.935*/936assert(cast->cast.align_mul <= parent_mul);937if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)938return false;939940/* If we got here, the parent has better alignment information than the941* child and we can get rid of the child alignment information.942*/943cast->cast.align_mul = 0;944cast->cast.align_offset = 0;945return true;946}947948/**949* Remove casts that just wrap other casts.950*/951static bool952opt_remove_cast_cast(nir_deref_instr *cast)953{954nir_deref_instr *first_cast = cast;955956while (true) {957nir_deref_instr *parent = nir_deref_instr_parent(first_cast);958if (parent == NULL || parent->deref_type != nir_deref_type_cast)959break;960first_cast = parent;961}962if (cast == first_cast)963return false;964965nir_instr_rewrite_src(&cast->instr, &cast->parent,966nir_src_for_ssa(first_cast->parent.ssa));967return true;968}969970/* Restrict variable modes in casts.971*972* If we know from something higher up the deref chain that the deref has a973* specific mode, we can cast to more general and back but we can never cast974* across modes. For non-cast derefs, we should only ever do anything here if975* the parent eventually comes from a cast that we restricted earlier.976*/977static bool978opt_restrict_deref_modes(nir_deref_instr *deref)979{980if (deref->deref_type == nir_deref_type_var) {981assert(deref->modes == deref->var->data.mode);982return false;983}984985nir_deref_instr *parent = nir_src_as_deref(deref->parent);986if (parent == NULL || parent->modes == deref->modes)987return false;988989assert(parent->modes & deref->modes);990deref->modes &= parent->modes;991return true;992}993994static bool995opt_remove_sampler_cast(nir_deref_instr *cast)996{997assert(cast->deref_type == nir_deref_type_cast);998nir_deref_instr *parent = nir_src_as_deref(cast->parent);999if (parent == NULL)1000return false;10011002/* Strip both types down to their non-array type and bail if there are any1003* discrepancies in array lengths.1004*/1005const struct glsl_type *parent_type = parent->type;1006const struct glsl_type *cast_type = cast->type;1007while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {1008if (glsl_get_length(parent_type) != glsl_get_length(cast_type))1009return false;1010parent_type = glsl_get_array_element(parent_type);1011cast_type = glsl_get_array_element(cast_type);1012}10131014if (glsl_type_is_array(parent_type) || glsl_type_is_array(cast_type))1015return false;10161017if (!glsl_type_is_sampler(parent_type) ||1018cast_type != glsl_bare_sampler_type())1019return false;10201021/* We're a cast from a more detailed sampler type to a bare sampler */1022nir_ssa_def_rewrite_uses(&cast->dest.ssa,1023&parent->dest.ssa);1024nir_instr_remove(&cast->instr);10251026/* Recursively crawl the deref tree and clean up types */1027nir_deref_instr_fixup_child_types(parent);10281029return true;1030}10311032/**1033* Is this casting a struct to a contained struct.1034* struct a { struct b field0 };1035* ssa_5 is structa;1036* deref_cast (structb *)ssa_5 (function_temp structb);1037* converts to1038* deref_struct &ssa_5->field0 (function_temp structb);1039* This allows subsequent copy propagation to work.1040*/1041static bool1042opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)1043{1044nir_deref_instr *parent = nir_src_as_deref(cast->parent);1045if (!parent)1046return false;10471048if (cast->cast.align_mul > 0)1049return false;10501051if (!glsl_type_is_struct(parent->type))1052return false;10531054if (glsl_get_struct_field_offset(parent->type, 0) != 0)1055return false;10561057if (cast->type != glsl_get_struct_field(parent->type, 0))1058return false;10591060nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);1061nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);1062nir_deref_instr_remove_if_unused(cast);1063return true;1064}10651066static bool1067opt_deref_cast(nir_builder *b, nir_deref_instr *cast)1068{1069bool progress = false;10701071progress |= opt_remove_restricting_cast_alignments(cast);10721073if (opt_replace_struct_wrapper_cast(b, cast))1074return true;10751076if (opt_remove_sampler_cast(cast))1077return true;10781079progress |= opt_remove_cast_cast(cast);1080if (!is_trivial_deref_cast(cast))1081return progress;10821083/* If this deref still contains useful alignment information, we don't want1084* to delete it.1085*/1086if (cast->cast.align_mul > 0)1087return progress;10881089bool trivial_array_cast = is_trivial_array_deref_cast(cast);10901091assert(cast->dest.is_ssa);1092assert(cast->parent.is_ssa);10931094nir_foreach_use_safe(use_src, &cast->dest.ssa) {1095/* If this isn't a trivial array cast, we can't propagate into1096* ptr_as_array derefs.1097*/1098if (is_deref_ptr_as_array(use_src->parent_instr) &&1099!trivial_array_cast)1100continue;11011102nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);1103progress = true;1104}11051106/* If uses would be a bit crazy */1107assert(list_is_empty(&cast->dest.ssa.if_uses));11081109if (nir_deref_instr_remove_if_unused(cast))1110progress = true;11111112return progress;1113}11141115static bool1116opt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)1117{1118assert(deref->deref_type == nir_deref_type_ptr_as_array);11191120nir_deref_instr *parent = nir_deref_instr_parent(deref);11211122if (nir_src_is_const(deref->arr.index) &&1123nir_src_as_int(deref->arr.index) == 0) {1124/* If it's a ptr_as_array deref with an index of 0, it does nothing1125* and we can just replace its uses with its parent, unless it has1126* alignment information.1127*1128* The source of a ptr_as_array deref always has a deref_type of1129* nir_deref_type_array or nir_deref_type_cast. If it's a cast, it1130* may be trivial and we may be able to get rid of that too. Any1131* trivial cast of trivial cast cases should be handled already by1132* opt_deref_cast() above.1133*/1134if (parent->deref_type == nir_deref_type_cast &&1135parent->cast.align_mul == 0 &&1136is_trivial_deref_cast(parent))1137parent = nir_deref_instr_parent(parent);1138nir_ssa_def_rewrite_uses(&deref->dest.ssa,1139&parent->dest.ssa);1140nir_instr_remove(&deref->instr);1141return true;1142}11431144if (parent->deref_type != nir_deref_type_array &&1145parent->deref_type != nir_deref_type_ptr_as_array)1146return false;11471148assert(parent->parent.is_ssa);1149assert(parent->arr.index.is_ssa);1150assert(deref->arr.index.is_ssa);11511152nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,1153deref->arr.index.ssa);11541155deref->deref_type = parent->deref_type;1156nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);1157nir_instr_rewrite_src(&deref->instr, &deref->arr.index,1158nir_src_for_ssa(new_idx));1159return true;1160}11611162static bool1163is_vector_bitcast_deref(nir_deref_instr *cast,1164nir_component_mask_t mask,1165bool is_write)1166{1167if (cast->deref_type != nir_deref_type_cast)1168return false;11691170/* Don't throw away useful alignment information */1171if (cast->cast.align_mul > 0)1172return false;11731174/* It has to be a cast of another deref */1175nir_deref_instr *parent = nir_src_as_deref(cast->parent);1176if (parent == NULL)1177return false;11781179/* The parent has to be a vector or scalar */1180if (!glsl_type_is_vector_or_scalar(parent->type))1181return false;11821183/* Don't bother with 1-bit types */1184unsigned cast_bit_size = glsl_get_bit_size(cast->type);1185unsigned parent_bit_size = glsl_get_bit_size(parent->type);1186if (cast_bit_size == 1 || parent_bit_size == 1)1187return false;11881189/* A strided vector type means it's not tightly packed */1190if (glsl_get_explicit_stride(cast->type) ||1191glsl_get_explicit_stride(parent->type))1192return false;11931194assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);1195assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);1196unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);1197unsigned parent_bytes = glsl_get_vector_elements(parent->type) *1198(parent_bit_size / 8);1199if (bytes_used > parent_bytes)1200return false;12011202if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,1203parent_bit_size))1204return false;12051206return true;1207}12081209static nir_ssa_def *1210resize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)1211{1212if (num_components == data->num_components)1213return data;12141215unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };1216for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)1217swiz[i] = i;12181219return nir_swizzle(b, data, swiz, num_components);1220}12211222static bool1223opt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)1224{1225nir_deref_instr *deref = nir_src_as_deref(load->src[0]);1226nir_component_mask_t read_mask =1227nir_ssa_def_components_read(&load->dest.ssa);12281229/* LLVM loves take advantage of the fact that vec3s in OpenCL are1230* vec4-aligned and so it can just read/write them as vec4s. This1231* results in a LOT of vec4->vec3 casts on loads and stores.1232*/1233if (is_vector_bitcast_deref(deref, read_mask, false)) {1234const unsigned old_num_comps = load->dest.ssa.num_components;1235const unsigned old_bit_size = load->dest.ssa.bit_size;12361237nir_deref_instr *parent = nir_src_as_deref(deref->parent);1238const unsigned new_num_comps = glsl_get_vector_elements(parent->type);1239const unsigned new_bit_size = glsl_get_bit_size(parent->type);12401241/* Stomp it to reference the parent */1242nir_instr_rewrite_src(&load->instr, &load->src[0],1243nir_src_for_ssa(&parent->dest.ssa));1244assert(load->dest.is_ssa);1245load->dest.ssa.bit_size = new_bit_size;1246load->dest.ssa.num_components = new_num_comps;1247load->num_components = new_num_comps;12481249b->cursor = nir_after_instr(&load->instr);1250nir_ssa_def *data = &load->dest.ssa;1251if (old_bit_size != new_bit_size)1252data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);1253data = resize_vector(b, data, old_num_comps);12541255nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,1256data->parent_instr);1257return true;1258}12591260return false;1261}12621263static bool1264opt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)1265{1266nir_deref_instr *deref = nir_src_as_deref(store->src[0]);1267nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);12681269/* LLVM loves take advantage of the fact that vec3s in OpenCL are1270* vec4-aligned and so it can just read/write them as vec4s. This1271* results in a LOT of vec4->vec3 casts on loads and stores.1272*/1273if (is_vector_bitcast_deref(deref, write_mask, true)) {1274assert(store->src[1].is_ssa);1275nir_ssa_def *data = store->src[1].ssa;12761277const unsigned old_bit_size = data->bit_size;12781279nir_deref_instr *parent = nir_src_as_deref(deref->parent);1280const unsigned new_num_comps = glsl_get_vector_elements(parent->type);1281const unsigned new_bit_size = glsl_get_bit_size(parent->type);12821283nir_instr_rewrite_src(&store->instr, &store->src[0],1284nir_src_for_ssa(&parent->dest.ssa));12851286/* Restrict things down as needed so the bitcast doesn't fail */1287data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);1288if (old_bit_size != new_bit_size)1289data = nir_bitcast_vector(b, data, new_bit_size);1290data = resize_vector(b, data, new_num_comps);1291nir_instr_rewrite_src(&store->instr, &store->src[1],1292nir_src_for_ssa(data));1293store->num_components = new_num_comps;12941295/* Adjust the write mask */1296write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,1297new_bit_size);1298nir_intrinsic_set_write_mask(store, write_mask);1299return true;1300}13011302return false;1303}13041305static bool1306opt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)1307{1308nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);1309nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);1310if (deref == NULL)1311return false;13121313nir_ssa_def *deref_is = NULL;13141315if (nir_deref_mode_must_be(deref, modes))1316deref_is = nir_imm_true(b);13171318if (!nir_deref_mode_may_be(deref, modes))1319deref_is = nir_imm_false(b);13201321if (deref_is == NULL)1322return false;13231324nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);1325nir_instr_remove(&intrin->instr);1326return true;1327}13281329bool1330nir_opt_deref_impl(nir_function_impl *impl)1331{1332bool progress = false;13331334nir_builder b;1335nir_builder_init(&b, impl);13361337nir_foreach_block(block, impl) {1338nir_foreach_instr_safe(instr, block) {1339b.cursor = nir_before_instr(instr);13401341switch (instr->type) {1342case nir_instr_type_deref: {1343nir_deref_instr *deref = nir_instr_as_deref(instr);13441345if (opt_restrict_deref_modes(deref))1346progress = true;13471348switch (deref->deref_type) {1349case nir_deref_type_ptr_as_array:1350if (opt_deref_ptr_as_array(&b, deref))1351progress = true;1352break;13531354case nir_deref_type_cast:1355if (opt_deref_cast(&b, deref))1356progress = true;1357break;13581359default:1360/* Do nothing */1361break;1362}1363break;1364}13651366case nir_instr_type_intrinsic: {1367nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);1368switch (intrin->intrinsic) {1369case nir_intrinsic_load_deref:1370if (opt_load_vec_deref(&b, intrin))1371progress = true;1372break;13731374case nir_intrinsic_store_deref:1375if (opt_store_vec_deref(&b, intrin))1376progress = true;1377break;13781379case nir_intrinsic_deref_mode_is:1380if (opt_known_deref_mode_is(&b, intrin))1381progress = true;1382break;13831384default:1385/* Do nothing */1386break;1387}1388break;1389}13901391default:1392/* Do nothing */1393break;1394}1395}1396}13971398if (progress) {1399nir_metadata_preserve(impl, nir_metadata_block_index |1400nir_metadata_dominance);1401} else {1402nir_metadata_preserve(impl, nir_metadata_all);1403}14041405return progress;1406}14071408bool1409nir_opt_deref(nir_shader *shader)1410{1411bool progress = false;14121413nir_foreach_function(func, shader) {1414if (func->impl && nir_opt_deref_impl(func->impl))1415progress = true;1416}14171418return progress;1419}142014211422