Path: blob/21.2-virgl/src/panfrost/util/nir_mod_helpers.c
4560 views
/*1* Copyright (C) 2020 Collabora, Ltd.2* Copyright (C) 2014 Intel Corporation3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS21* IN THE SOFTWARE.22*23* Authors:24* Alyssa Rosenzweig <[email protected]>25* Jason Ekstrand ([email protected])26*27*/2829#include "nir.h"30#include "pan_ir.h"3132/* Check if a given ALU source is the result of a particular componentwise 1-op33* ALU source (principally fneg or fabs). If so, return true and rewrite the34* source to be the argument, respecting swizzles as needed. If not (or it35* cannot be proven), return false and leave the source untouched.36*/3738bool39pan_has_source_mod(nir_alu_src *src, nir_op op)40{41if (!src->src.is_ssa || src->src.ssa->parent_instr->type != nir_instr_type_alu)42return false;4344nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);4546if (alu->op != op)47return false;4849/* This only works for unary ops */50assert(nir_op_infos[op].num_inputs == 1);5152/* If the copied source is not SSA, moving it might not be valid */53if (!alu->src[0].src.is_ssa)54return false;5556/* Okay - we've found the modifier we wanted. Let's construct the new ALU57* src. In a scalar world, this is just psrc, but for vector archs we need58* to respect the swizzle, so we compose.59*/6061nir_alu_src nsrc = {62.src = alu->src[0].src,63};6465for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) {66/* (a o b)(i) = a(b(i)) ... swizzle composition is intense. */67nsrc.swizzle[i] = alu->src[0].swizzle[src->swizzle[i]];68}6970*src = nsrc;71return true;72}7374/* Check if a given instruction's result will be fed into a75* componentwise 1-op ALU instruction (principally fsat without76* swizzles). If so, return true and rewrite the destination. The77* backend will need to track the new destinations to avoid78* incorrect double-emits. */7980bool81pan_has_dest_mod(nir_dest **odest, nir_op op)82{83/* This only works for unary ops */84assert(nir_op_infos[op].num_inputs == 1);8586/* If not SSA, this might not be legal */87nir_dest *dest = *odest;88if (!dest->is_ssa)89return false;9091/* Check the uses. We want a single use, with the op `op` */92if (!list_is_empty(&dest->ssa.if_uses))93return false;9495if (!list_is_singular(&dest->ssa.uses))96return false;9798nir_src *use = list_first_entry(&dest->ssa.uses, nir_src, use_link);99nir_instr *parent = use->parent_instr;100101/* Check if the op is `op` */102if (parent->type != nir_instr_type_alu)103return false;104105nir_alu_instr *alu = nir_instr_as_alu(parent);106if (alu->op != op)107return false;108109/* We can't do expansions without a move in the middle */110unsigned nr_components = nir_dest_num_components(alu->dest.dest);111112if (nir_dest_num_components(*dest) != nr_components)113return false;114115/* We don't handle swizzles here, so check for the identity */116for (unsigned i = 0; i < nr_components; ++i) {117if (alu->src[0].swizzle[i] != i)118return false;119}120121if (!alu->dest.dest.is_ssa)122return false;123124/* Otherwise, we're good */125*odest = &alu->dest.dest;126return true;127}128129130