Path: blob/21.2-virgl/src/panfrost/bifrost/bi_opt_cse.c
4564 views
/*1* Copyright (C) 2021 Collabora, Ltd.2* Copyright (C) 2014 Valve Corporation3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "compiler.h"25#include "bi_builder.h"2627#define XXH_INLINE_ALL28#include "xxhash.h"2930/* This pass handles CSE'ing repeated expressions created in the process of31* translating from NIR. Also, currently this is intra-block only, to make it32* work over multiple block we'd need to bring forward dominance calculation.33*/3435static inline uint32_t36HASH(uint32_t hash, unsigned data)37{38return XXH32(&data, sizeof(data), hash);39}4041static uint32_t42hash_index(uint32_t hash, bi_index index)43{44hash = HASH(hash, index.value);45hash = HASH(hash, index.abs);46hash = HASH(hash, index.neg);47hash = HASH(hash, index.swizzle);48hash = HASH(hash, index.offset);49hash = HASH(hash, index.reg);50hash = HASH(hash, index.type);51return hash;52}5354/* Hash an ALU instruction. */55static uint32_t56hash_instr(const void *data)57{58const bi_instr *I = data;59uint32_t hash = 0;6061hash = HASH(hash, I->op);6263/* Explcitly skip destinations, except for size details */64bi_foreach_dest(I, d) {65hash = HASH(hash, I->dest[d].swizzle);66}6768bi_foreach_src(I, s) {69hash = hash_index(hash, I->src[s]);70}7172/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */73hash = HASH(hash, I->dest_mod);7475/* Explicitly skip other immediates */76hash = HASH(hash, I->shift);7778for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)79hash = HASH(hash, I->flags[i]);8081return hash;82}8384static bool85instrs_equal(const void *_i1, const void *_i2)86{87const bi_instr *i1 = _i1, *i2 = _i2;8889if (i1->op != i2->op)90return false;9192/* Explicitly skip destinations */9394bi_foreach_src(i1, s) {95bi_index s1 = i1->src[s], s2 = i2->src[s];9697if (memcmp(&s1, &s2, sizeof(s1)) != 0)98return false;99}100101if (i1->dest_mod != i2->dest_mod)102return false;103104if (i1->shift != i2->shift)105return false;106107for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {108if (i1->flags[i] != i2->flags[i])109return false;110}111112return true;113}114115/* Determines what instructions the above routines have to handle */116117static bool118instr_can_cse(const bi_instr *I)119{120switch (I->op) {121case BI_OPCODE_DTSEL_IMM:122case BI_OPCODE_DISCARD_F32:123return false;124default:125break;126}127128if (bi_opcode_props[I->op].message)129return false;130131if (I->branch_target)132return false;133134/* Refuse to CSE non-SSA destinations since the data flow analysis135* required is nontrivial */136bi_foreach_dest(I, d) {137if (!bi_is_null(I->dest[d]) && !bi_is_ssa(I->dest[d]))138return false;139}140141/* Similar refuse to CSE non-SSA sources */142bi_foreach_src(I, s) {143if (I->src[s].reg || I->src[s].type == BI_INDEX_REGISTER)144return false;145}146147return true;148}149150void151bi_opt_cse(bi_context *ctx)152{153struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);154155bi_foreach_block(ctx, block) {156bi_index *replacement = calloc(sizeof(bi_index), ((ctx->ssa_alloc + 1) << 2));157_mesa_set_clear(instr_set, NULL);158159bi_foreach_instr_in_block((bi_block *) block, instr) {160/* Rewrite before trying to CSE anything so we converge161* locally in one iteration */162bi_foreach_src(instr, s) {163if (s == 0 && bi_opcode_props[instr->op].sr_read)164continue;165166if (!bi_is_ssa(instr->src[s]))167continue;168169bi_index repl = replacement[bi_word_node(instr->src[s])];170if (!bi_is_null(repl))171instr->src[s] = bi_replace_index(instr->src[s], repl);172}173174if (!instr_can_cse(instr))175continue;176177bool found;178struct set_entry *entry =179_mesa_set_search_or_add(instr_set, instr, &found);180if (found) {181const bi_instr *match = entry->key;182183bi_foreach_dest(instr, d) {184if (!bi_is_null(instr->dest[d]))185replacement[bi_word_node(instr->dest[d])] = match->dest[d];186}187}188}189190free(replacement);191}192193_mesa_set_destroy(instr_set, NULL);194}195196197