Path: blob/21.2-virgl/src/panfrost/bifrost/bi_opt_mod_props.c
4564 views
/*1* Copyright (C) 2021 Collabora, Ltd.2* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "compiler.h"25#include "bi_builder.h"2627static bool28bi_takes_fabs(bi_instr *I, bi_index repl, unsigned s)29{30switch (I->op) {31case BI_OPCODE_FCMP_V2F16:32case BI_OPCODE_FMAX_V2F16:33case BI_OPCODE_FMIN_V2F16:34/* Encoding restriction: can't have both abs if equal sources */35return !(I->src[1 - s].abs && bi_is_word_equiv(I->src[1 - s], repl));36case BI_OPCODE_V2F32_TO_V2F16:37/* TODO: Needs both match or lower */38return false;39case BI_OPCODE_FLOG_TABLE_F32:40/* TODO: Need to check mode */41return false;42default:43return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);44}45}4647static bool48bi_takes_fneg(bi_instr *I, unsigned s)49{50switch (I->op) {51case BI_OPCODE_CUBE_SSEL:52case BI_OPCODE_CUBE_TSEL:53case BI_OPCODE_CUBEFACE:54/* TODO: Needs match or lower */55return false;56case BI_OPCODE_FREXPE_F32:57case BI_OPCODE_FREXPE_V2F16:58case BI_OPCODE_FLOG_TABLE_F32:59/* TODO: Need to check mode */60return false;61default:62return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);63}64}6566static bool67bi_is_fabsneg(bi_instr *I)68{69return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&70(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&71(I->clamp == BI_CLAMP_NONE);72}7374static enum bi_swizzle75bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)76{77assert(a <= BI_SWIZZLE_H11);78assert(b <= BI_SWIZZLE_H11);7980bool al = (a & BI_SWIZZLE_H10);81bool ar = (a & BI_SWIZZLE_H01);82bool bl = (b & BI_SWIZZLE_H10);83bool br = (b & BI_SWIZZLE_H01);8485return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |86((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);87}8889/* Like bi_replace_index, but composes instead of overwrites */9091static inline bi_index92bi_compose_float_index(bi_index old, bi_index repl)93{94/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise95* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */96repl.neg = old.neg ^ (repl.neg && !old.abs);9798/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */99repl.abs |= old.abs;100101/* Use the old swizzle to select from the replacement swizzle */102repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);103104return repl;105}106107void108bi_opt_mod_prop_forward(bi_context *ctx)109{110bi_instr **lut = calloc(sizeof(bi_instr *), ((ctx->ssa_alloc + 1) << 2));111112bi_foreach_instr_global_safe(ctx, I) {113if (bi_is_ssa(I->dest[0]))114lut[bi_word_node(I->dest[0])] = I;115116bi_foreach_src(I, s) {117if (!bi_is_ssa(I->src[s]))118continue;119120bi_instr *mod = lut[bi_word_node(I->src[s])];121122if (!mod)123continue;124125if (bi_opcode_props[mod->op].size != bi_opcode_props[I->op].size)126continue;127128if (bi_is_fabsneg(mod)) {129if (mod->src[0].abs && !bi_takes_fabs(I, mod->src[0], s))130continue;131132if (mod->src[0].neg && !bi_takes_fneg(I, s))133continue;134135I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);136}137}138}139140free(lut);141}142143/* RSCALE has restrictions on how the clamp may be used, only used for144* specialized transcendental sequences that set the clamp explicitly anyway */145146static bool147bi_takes_clamp(bi_instr *I)148{149switch (I->op) {150case BI_OPCODE_FMA_RSCALE_F32:151case BI_OPCODE_FMA_RSCALE_V2F16:152case BI_OPCODE_FADD_RSCALE_F32:153return false;154default:155return bi_opcode_props[I->op].clamp;156}157}158159/* Treating clamps as functions, compute the composition f circ g. For {NONE,160* SAT, SAT_SIGNED, CLAMP_POS}, anything left- or right-composed with NONE is161* unchanged, anything composed with itself is unchanged, and any two162* nontrivial distinct clamps compose to SAT (left as an exercise) */163164static enum bi_clamp165bi_compose_clamp(enum bi_clamp f, enum bi_clamp g)166{167return (f == BI_CLAMP_NONE) ? g :168(g == BI_CLAMP_NONE) ? f :169(f == g) ? f :170BI_CLAMP_CLAMP_0_1;171}172173static bool174bi_is_fclamp(bi_instr *I)175{176return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&177(!I->src[0].abs && !I->src[0].neg) &&178(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&179(I->clamp != BI_CLAMP_NONE);180}181182static bool183bi_optimizer_clamp(bi_instr *I, bi_instr *use)184{185if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size) return false;186if (!bi_is_fclamp(use)) return false;187if (!bi_takes_clamp(I)) return false;188if (use->src[0].neg || use->src[0].abs) return false;189190I->clamp = bi_compose_clamp(I->clamp, use->clamp);191I->dest[0] = use->dest[0];192return true;193}194195static bool196bi_is_var_tex(bi_instr *var, bi_instr *tex)197{198return (var->op == BI_OPCODE_LD_VAR_IMM) &&199(tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&200(var->register_format == BI_REGISTER_FORMAT_F32) &&201((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||202(var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&203(tex->texture_index == tex->sampler_index) &&204(tex->texture_index < 4) &&205(var->index < 8);206}207208static bool209bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)210{211if (!bi_is_var_tex(var, tex)) return false;212213/* Construct the corresponding VAR_TEX intruction */214bi_builder b = bi_init_builder(ctx, bi_after_instr(var));215216bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,217var->sample, var->update, tex->texture_index, var->index);218I->skip = tex->skip;219220if (tex->op == BI_OPCODE_TEXS_2D_F16)221I->op = BI_OPCODE_VAR_TEX_F16;222223/* Dead code elimination will clean up for us */224return true;225}226227void228bi_opt_mod_prop_backward(bi_context *ctx)229{230unsigned count = ((ctx->ssa_alloc + 1) << 2);231bi_instr **uses = calloc(count, sizeof(*uses));232BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));233234bi_foreach_instr_global_rev(ctx, I) {235bi_foreach_src(I, s) {236if (bi_is_ssa(I->src[s])) {237unsigned v = bi_word_node(I->src[s]);238239if (uses[v] && uses[v] != I)240BITSET_SET(multiple, v);241else242uses[v] = I;243}244}245246if (!bi_is_ssa(I->dest[0]))247continue;248249bi_instr *use = uses[bi_word_node(I->dest[0])];250251if (!use || BITSET_TEST(multiple, bi_word_node(I->dest[0])))252continue;253254/* Destination has a single use, try to propagate */255bool propagated =256bi_optimizer_clamp(I, use) ||257bi_optimizer_var_tex(ctx, I, use);258259if (propagated) {260bi_remove_instruction(use);261continue;262}263}264265free(uses);266free(multiple);267}268269270