Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_opt_algebraic.c
4570 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* @file vc4_opt_algebraic.c25*26* This is the optimization pass for miscellaneous changes to instructions27* where we can simplify the operation by some knowledge about the specific28* operations.29*30* Mostly this will be a matter of turning things into MOVs so that they can31* later be copy-propagated out.32*/3334#include "vc4_qir.h"35#include "util/u_math.h"3637static bool debug;3839static void40dump_from(struct vc4_compile *c, struct qinst *inst)41{42if (!debug)43return;4445fprintf(stderr, "optimizing: ");46qir_dump_inst(c, inst);47fprintf(stderr, "\n");48}4950static void51dump_to(struct vc4_compile *c, struct qinst *inst)52{53if (!debug)54return;5556fprintf(stderr, "to: ");57qir_dump_inst(c, inst);58fprintf(stderr, "\n");59}6061static bool62is_constant_value(struct vc4_compile *c, struct qreg reg,63uint32_t val)64{65if (reg.file == QFILE_UNIF &&66!reg.pack &&67c->uniform_contents[reg.index] == QUNIFORM_CONSTANT &&68c->uniform_data[reg.index] == val) {69return true;70}7172if (reg.file == QFILE_SMALL_IMM && reg.index == val)73return true;7475return false;76}7778static bool79is_zero(struct vc4_compile *c, struct qreg reg)80{81reg = qir_follow_movs(c, reg);82return is_constant_value(c, reg, 0);83}8485static bool86is_1f(struct vc4_compile *c, struct qreg reg)87{88reg = qir_follow_movs(c, reg);89return is_constant_value(c, reg, fui(1.0));90}9192static void93replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)94{95dump_from(c, inst);9697inst->src[0] = arg;98if (qir_has_implicit_tex_uniform(inst))99inst->src[1] = inst->src[qir_get_tex_uniform_src(inst)];100101if (qir_is_mul(inst))102inst->op = QOP_MMOV;103else if (qir_is_float_input(inst))104inst->op = QOP_FMOV;105else106inst->op = QOP_MOV;107dump_to(c, inst);108}109110static bool111replace_x_0_with_x(struct vc4_compile *c,112struct qinst *inst,113int arg)114{115if (!is_zero(c, inst->src[arg]))116return false;117replace_with_mov(c, inst, inst->src[1 - arg]);118return true;119}120121static bool122replace_x_0_with_0(struct vc4_compile *c,123struct qinst *inst,124int arg)125{126if (!is_zero(c, inst->src[arg]))127return false;128replace_with_mov(c, inst, inst->src[arg]);129return true;130}131132static bool133fmul_replace_one(struct vc4_compile *c,134struct qinst *inst,135int arg)136{137if (!is_1f(c, inst->src[arg]))138return false;139replace_with_mov(c, inst, inst->src[1 - arg]);140return true;141}142143bool144qir_opt_algebraic(struct vc4_compile *c)145{146bool progress = false;147148qir_for_each_inst_inorder(inst, c) {149switch (inst->op) {150case QOP_FMIN:151if (is_1f(c, inst->src[1]) &&152inst->src[0].pack >= QPU_UNPACK_8D_REP &&153inst->src[0].pack <= QPU_UNPACK_8D) {154replace_with_mov(c, inst, inst->src[0]);155progress = true;156}157break;158159case QOP_FMAX:160if (is_zero(c, inst->src[1]) &&161inst->src[0].pack >= QPU_UNPACK_8D_REP &&162inst->src[0].pack <= QPU_UNPACK_8D) {163replace_with_mov(c, inst, inst->src[0]);164progress = true;165}166break;167168case QOP_FSUB:169case QOP_SUB:170if (is_zero(c, inst->src[1])) {171replace_with_mov(c, inst, inst->src[0]);172progress = true;173}174break;175176case QOP_ADD:177/* Kernel validation requires that we use an actual178* add instruction.179*/180if (inst->dst.file != QFILE_TEX_S_DIRECT &&181(replace_x_0_with_x(c, inst, 0) ||182replace_x_0_with_x(c, inst, 1))) {183progress = true;184break;185}186break;187188case QOP_FADD:189if (replace_x_0_with_x(c, inst, 0) ||190replace_x_0_with_x(c, inst, 1)) {191progress = true;192break;193}194195/* FADD(a, FSUB(0, b)) -> FSUB(a, b) */196if (inst->src[1].file == QFILE_TEMP &&197c->defs[inst->src[1].index] &&198c->defs[inst->src[1].index]->op == QOP_FSUB) {199struct qinst *fsub = c->defs[inst->src[1].index];200if (is_zero(c, fsub->src[0])) {201dump_from(c, inst);202inst->op = QOP_FSUB;203inst->src[1] = fsub->src[1];204progress = true;205dump_to(c, inst);206break;207}208}209210/* FADD(FSUB(0, b), a) -> FSUB(a, b) */211if (inst->src[0].file == QFILE_TEMP &&212c->defs[inst->src[0].index] &&213c->defs[inst->src[0].index]->op == QOP_FSUB) {214struct qinst *fsub = c->defs[inst->src[0].index];215if (is_zero(c, fsub->src[0])) {216dump_from(c, inst);217inst->op = QOP_FSUB;218inst->src[0] = inst->src[1];219inst->src[1] = fsub->src[1];220dump_to(c, inst);221progress = true;222break;223}224}225break;226227case QOP_FMUL:228if (!inst->dst.pack &&229(replace_x_0_with_0(c, inst, 0) ||230replace_x_0_with_0(c, inst, 1) ||231fmul_replace_one(c, inst, 0) ||232fmul_replace_one(c, inst, 1))) {233progress = true;234break;235}236break;237238case QOP_MUL24:239if (!inst->dst.pack &&240(replace_x_0_with_0(c, inst, 0) ||241replace_x_0_with_0(c, inst, 1))) {242progress = true;243break;244}245break;246247case QOP_AND:248if (replace_x_0_with_0(c, inst, 0) ||249replace_x_0_with_0(c, inst, 1)) {250progress = true;251break;252}253254if (is_constant_value(c, inst->src[0], ~0)) {255replace_with_mov(c, inst, inst->src[1]);256progress = true;257break;258}259if (is_constant_value(c, inst->src[1], ~0)) {260replace_with_mov(c, inst, inst->src[0]);261progress = true;262break;263}264break;265266case QOP_OR:267if (replace_x_0_with_x(c, inst, 0) ||268replace_x_0_with_x(c, inst, 1)) {269progress = true;270break;271}272break;273274case QOP_RCP:275if (is_1f(c, inst->src[0])) {276replace_with_mov(c, inst, inst->src[0]);277progress = true;278break;279}280break;281282default:283break;284}285}286287return progress;288}289290291