Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
4570 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* @file vc4_opt_small_immediates.c25*26* Turns references to small constant uniform values into small immediates27* fields.28*/2930#include "vc4_qir.h"31#include "vc4_qpu.h"3233static bool debug;3435bool36qir_opt_small_immediates(struct vc4_compile *c)37{38bool progress = false;3940qir_for_each_inst_inorder(inst, c) {41/* The small immediate value sits in the raddr B field, so we42* can't have 2 small immediates in one instruction (unless43* they're the same value, but that should be optimized away44* elsewhere).45*/46bool uses_small_imm = false;47for (int i = 0; i < qir_get_nsrc(inst); i++) {48if (inst->src[i].file == QFILE_SMALL_IMM)49uses_small_imm = true;50}51if (uses_small_imm)52continue;5354/* Don't propagate small immediates into the top-end bounds55* checking for indirect UBO loads. The kernel doesn't parse56* small immediates and rejects the shader in this case. UBO57* loads are much more expensive than the uniform load, and58* indirect UBO regions are usually much larger than a small59* immediate, so it's not worth updating the kernel to allow60* optimizing it.61*/62if (inst->op == QOP_MIN_NOIMM)63continue;6465for (int i = 0; i < qir_get_nsrc(inst); i++) {66struct qreg src = qir_follow_movs(c, inst->src[i]);6768if (src.file != QFILE_UNIF ||69src.pack ||70c->uniform_contents[src.index] !=71QUNIFORM_CONSTANT) {72continue;73}7475if (qir_is_tex(inst) &&76i == qir_get_tex_uniform_src(inst)) {77/* No turning the implicit uniform read into78* an immediate.79*/80continue;81}8283uint32_t imm = c->uniform_data[src.index];84uint32_t small_imm = qpu_encode_small_immediate(imm);85if (small_imm == ~0)86continue;8788if (debug) {89fprintf(stderr, "opt_small_immediate() from: ");90qir_dump_inst(c, inst);91fprintf(stderr, "\n");92}93inst->src[i].file = QFILE_SMALL_IMM;94inst->src[i].index = imm;95if (debug) {96fprintf(stderr, "to: ");97qir_dump_inst(c, inst);98fprintf(stderr, "\n");99}100progress = true;101break;102}103}104105return progress;106}107108109