Path: blob/21.2-virgl/src/asahi/compiler/agx_optimizer.c
4564 views
/*1* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "agx_compiler.h"24#include "agx_minifloat.h"2526/* AGX peephole optimizer responsible for instruction combining. It operates in27* a forward direction and a backward direction, in each case traversing in28* source order. SSA means the forward pass satisfies the invariant:29*30* Every def is visited before any of its uses.31*32* Dually, the backend pass satisfies the invariant:33*34* Every use of a def is visited before the def.35*36* This means the forward pass can propagate modifiers forward, whereas the37* backwards pass propagates modifiers backward. Consider an example:38*39* 1 = fabs 040* 2 = fround 141* 3 = fsat 142*43* The forwards pass would propagate the fabs to the fround (since we can44* lookup the fabs from the fround source and do the replacement). By contrast45* the backwards pass would propagate the fsat back to the fround (since when46* we see the fround we know it has only a single user, fsat). Propagatable47* instruction have natural directions (like pushforwards and pullbacks).48*49* We are careful to update the tracked state whenever we modify an instruction50* to ensure the passes are linear-time and converge in a single iteration.51*52* Size conversions are worth special discussion. Consider the snippet:53*54* 2 = fadd 0, 155* 3 = f2f16 256* 4 = fround 357*58* A priori, we can move the f2f16 in either direction. But it's not equal --59* if we move it up to the fadd, we get FP16 for two instructions, whereas if60* we push it into the fround, we effectively get FP32 for two instructions. So61* f2f16 is backwards. Likewise, consider62*63* 2 = fadd 0, 164* 3 = f2f32 165* 4 = fround 366*67* This time if we move f2f32 up to the fadd, we get FP32 for two, but if we68* move it down to the fround, we get FP16 to too. So f2f32 is backwards.69*/7071static bool72agx_is_fmov(agx_instr *def)73{74return (def->op == AGX_OPCODE_FADD)75&& agx_is_equiv(def->src[1], agx_negzero());76}7778/* Compose floating-point modifiers with floating-point sources */7980static agx_index81agx_compose_float_src(agx_index to, agx_index from)82{83if (to.abs)84from.neg = false;8586from.abs |= to.abs;87from.neg |= to.neg;8889return from;90}9192static void93agx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs)94{95for (unsigned s = 0; s < srcs; ++s) {96agx_index src = ins->src[s];97if (src.type != AGX_INDEX_NORMAL) continue;9899agx_instr *def = defs[src.value];100if (!agx_is_fmov(def)) continue;101if (def->saturate) continue;102103ins->src[s] = agx_compose_float_src(src, def->src[0]);104}105}106107static void108agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,109unsigned srcs, bool is_float)110{111for (unsigned s = 0; s < srcs; ++s) {112agx_index src = I->src[s];113if (src.type != AGX_INDEX_NORMAL) continue;114115agx_instr *def = defs[src.value];116if (def->op != AGX_OPCODE_MOV_IMM) continue;117118uint8_t value = def->imm;119bool float_src = is_float;120121/* cmpselsrc takes integer immediates only */122if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;123124if (float_src) {125bool fp16 = (def->dest[0].size == AGX_SIZE_16);126assert(fp16 || (def->dest[0].size == AGX_SIZE_32));127128float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);129if (!agx_minifloat_exact(f)) continue;130131value = agx_minifloat_encode(f);132} else if (value != def->imm) {133continue;134}135136I->src[s].type = AGX_INDEX_IMMEDIATE;137I->src[s].value = value;138}139}140141static bool142agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)143{144if (!agx_is_fmov(use)) return false;145if (use->src[0].neg || use->src[0].abs) return false;146147/* saturate(saturate(x)) = saturate(x) */148I->saturate |= use->saturate;149I->dest[0] = use->dest[0];150return true;151}152153static void154agx_optimizer_forward(agx_context *ctx)155{156agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));157158agx_foreach_instr_global(ctx, I) {159struct agx_opcode_info info = agx_opcodes_info[I->op];160161for (unsigned d = 0; d < info.nr_dests; ++d) {162if (I->dest[d].type == AGX_INDEX_NORMAL)163defs[I->dest[d].value] = I;164}165166/* Propagate fmov down */167if (info.is_float)168agx_optimizer_fmov(defs, I, info.nr_srcs);169170/* Inline immediates if we can. TODO: systematic */171if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)172agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);173}174175free(defs);176}177178static void179agx_optimizer_backward(agx_context *ctx)180{181agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));182BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));183184agx_foreach_instr_global_rev(ctx, I) {185struct agx_opcode_info info = agx_opcodes_info[I->op];186187for (unsigned s = 0; s < info.nr_srcs; ++s) {188if (I->src[s].type == AGX_INDEX_NORMAL) {189unsigned v = I->src[s].value;190191if (uses[v])192BITSET_SET(multiple, v);193else194uses[v] = I;195}196}197198if (info.nr_dests != 1)199continue;200201if (I->dest[0].type != AGX_INDEX_NORMAL)202continue;203204agx_instr *use = uses[I->dest[0].value];205206if (!use || BITSET_TEST(multiple, I->dest[0].value))207continue;208209/* Destination has a single use, try to propagate */210if (info.is_float && agx_optimizer_fmov_rev(I, use)) {211agx_remove_instruction(use);212continue;213}214}215216free(uses);217free(multiple);218}219220void221agx_optimizer(agx_context *ctx)222{223agx_optimizer_backward(ctx);224agx_optimizer_forward(ctx);225}226227228