Path: blob/21.2-virgl/src/broadcom/compiler/vir_opt_constant_alu.c
4564 views
/*1* Copyright © 2021 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* @file v3d_opt_constant_alu.c25*26* Identified sequences of ALU instructions that operate on constant operands27* and reduces them to a uniform load.28*29* This is useful, for example, to optimize the result of removing leading30* ldunifa instructions in the DCE pass, which can leave a series of constant31* additions that increment the unifa address by 4 for each leading ldunif32* removed. It helps turn this:33*34* nop t1; ldunif (0x00000004 / 0.000000)35* nop t2; ldunif (0x00000004 / 0.000000)36* add t3, t1, t237*38* into:39*40* nop t1; ldunif (0x00000004 / 0.000000)41* nop t2; ldunif (0x00000004 / 0.000000)42* nop t4; ldunif (0x00000008 / 0.000000)43* mov t3, t444*45* For best results we want to run copy propagation in between this and46* the combine constants pass: every time we manage to convert an alu to47* a uniform load, we move the uniform to the original alu destination. By48* running copy propagation immediately after we can reuse the uniform as49* source in more follow-up alu instructions, making them constant and allowing50* this pass to continue making progress. However, if we run the small51* immediates optimization before that, that pass can convert some of the movs52* to use small immediates instead of the uniforms and prevent us from making53* the best of this pass, as small immediates don't get copy propagated.54*/5556#include "v3d_compiler.h"5758#include "util/half_float.h"59#include "util/u_math.h"6061static bool62opt_constant_add(struct v3d_compile *c, struct qinst *inst, union fi *values)63{64/* FIXME: handle more add operations */65struct qreg unif = { };66switch (inst->qpu.alu.add.op) {67case V3D_QPU_A_ADD:68c->cursor = vir_after_inst(inst);69unif = vir_uniform_ui(c, values[0].ui + values[1].ui);70break;7172case V3D_QPU_A_VFPACK: {73assert(inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE);7475const uint32_t packed =76(((uint32_t)_mesa_float_to_half(values[1].f)) << 16) |77_mesa_float_to_half(values[0].f);7879c->cursor = vir_after_inst(inst);80unif = vir_uniform_ui(c, packed);81break;82}8384default:85return false;86}8788/* Remove the original ALU instruction and replace it with a uniform89* load. If the original instruction loaded an implicit uniform we90* need to replicate that in the new instruction.91*/92struct qreg dst = inst->dst;93struct qinst *mov = vir_MOV_dest(c, dst, unif);94mov->uniform = inst->uniform;95vir_remove_instruction(c, inst);96if (dst.file == QFILE_TEMP)97c->defs[dst.index] = mov;98return true;99}100101static bool102try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst)103{104if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)105return false;106107/* If the instruction does anything other than writing the result108* directly to the destination, skip.109*/110if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||111inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {112return false;113}114115if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||116inst->qpu.flags.mc != V3D_QPU_COND_NONE) {117return false;118}119120assert(vir_get_nsrc(inst) <= 2);121union fi values[2];122for (int i = 0; i < vir_get_nsrc(inst); i++) {123if (inst->src[i].file == QFILE_SMALL_IMM &&124v3d_qpu_small_imm_unpack(c->devinfo,125inst->qpu.raddr_b,126&values[i].ui)) {127continue;128}129130if (inst->src[i].file == QFILE_TEMP) {131struct qinst *def = c->defs[inst->src[i].index];132if (!def)133return false;134135if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) &&136c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) {137values[i].ui = c->uniform_data[def->uniform];138continue;139}140}141142return false;143}144145/* FIXME: handle mul operations */146if (vir_is_add(inst))147return opt_constant_add(c, inst, values);148149return false;150}151152bool153vir_opt_constant_alu(struct v3d_compile *c)154{155bool progress = false;156vir_for_each_block(block, c) {157vir_for_each_inst_safe(inst, block) {158progress = try_opt_constant_alu(c, inst) || progress;159}160}161162return progress;163}164165166