Path: blob/21.2-virgl/src/broadcom/compiler/vir_live_variables.c
4564 views
/*1* Copyright © 2012 Intel Corporation2* Copyright © 2016 Broadcom3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS21* IN THE SOFTWARE.22*/2324#define MAX_INSTRUCTION (1 << 30)2526#include "util/ralloc.h"27#include "util/register_allocate.h"28#include "v3d_compiler.h"2930struct partial_update_state {31struct qinst *insts[4];32uint8_t channels;33};3435static int36vir_reg_to_var(struct qreg reg)37{38if (reg.file == QFILE_TEMP)39return reg.index;4041return -1;42}4344static void45vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,46struct qreg src)47{48int var = vir_reg_to_var(src);49if (var == -1)50return;5152c->temp_start[var] = MIN2(c->temp_start[var], ip);53c->temp_end[var] = MAX2(c->temp_end[var], ip);5455/* The use[] bitset marks when the block makes56* use of a variable without having completely57* defined that variable within the block.58*/59if (!BITSET_TEST(block->def, var))60BITSET_SET(block->use, var);61}6263static struct partial_update_state *64get_partial_update_state(struct hash_table *partial_update_ht,65struct qinst *inst)66{67struct hash_entry *entry =68_mesa_hash_table_search(partial_update_ht,69&inst->dst.index);70if (entry)71return entry->data;7273struct partial_update_state *state =74rzalloc(partial_update_ht, struct partial_update_state);7576_mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);7778return state;79}8081static void82vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,83struct hash_table *partial_update_ht, struct qinst *inst)84{85if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)86return;8788/* The def[] bitset marks when an initialization in a89* block completely screens off previous updates of90* that variable.91*/92int var = vir_reg_to_var(inst->dst);93if (var == -1)94return;9596c->temp_start[var] = MIN2(c->temp_start[var], ip);97c->temp_end[var] = MAX2(c->temp_end[var], ip);9899/* Mark the block as having a (partial) def of the var. */100BITSET_SET(block->defout, var);101102/* If we've already tracked this as a def that screens off previous103* uses, or already used it within the block, there's nothing to do.104*/105if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))106return;107108/* Easy, common case: unconditional full register update.*/109if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&110inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&111inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&112inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {113BITSET_SET(block->def, var);114return;115}116117/* Finally, look at the condition code and packing and mark it as a118* def. We need to make sure that we understand sequences119* instructions like:120*121* mov.zs t0, t1122* mov.zc t0, t2123*124* or:125*126* mmov t0.8a, t1127* mmov t0.8b, t2128* mmov t0.8c, t3129* mmov t0.8d, t4130*131* as defining the temp within the block, because otherwise dst's live132* range will get extended up the control flow to the top of the133* program.134*/135struct partial_update_state *state =136get_partial_update_state(partial_update_ht, inst);137uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */138139if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&140inst->qpu.flags.mc == V3D_QPU_COND_NONE) {141state->channels |= mask;142} else {143for (int i = 0; i < 4; i++) {144if (!(mask & (1 << i)))145continue;146147/* XXXif (state->insts[i] &&148state->insts[i]->cond ==149qpu_cond_complement(inst->cond))150state->channels |= 1 << i;151else152*/153state->insts[i] = inst;154}155}156157if (state->channels == 0xf)158BITSET_SET(block->def, var);159}160161static void162sf_state_clear(struct hash_table *partial_update_ht)163{164hash_table_foreach(partial_update_ht, entry) {165struct partial_update_state *state = entry->data;166167for (int i = 0; i < 4; i++) {168if (state->insts[i] &&169(state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||170state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))171state->insts[i] = NULL;172}173}174}175176/* Sets up the def/use arrays for when variables are used-before-defined or177* defined-before-used in the block.178*179* Also initializes the temp_start/temp_end to cover just the instruction IPs180* where the variable is used, which will be extended later in181* vir_compute_start_end().182*/183static void184vir_setup_def_use(struct v3d_compile *c)185{186struct hash_table *partial_update_ht =187_mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);188int ip = 0;189190vir_for_each_block(block, c) {191block->start_ip = ip;192193_mesa_hash_table_clear(partial_update_ht, NULL);194195vir_for_each_inst(inst, block) {196for (int i = 0; i < vir_get_nsrc(inst); i++)197vir_setup_use(c, block, ip, inst->src[i]);198199vir_setup_def(c, block, ip, partial_update_ht, inst);200201if (false /* XXX inst->uf */)202sf_state_clear(partial_update_ht);203204/* Payload registers: r0/1/2 contain W, centroid W,205* and Z at program start. Register allocation will206* force their nodes to R0/1/2.207*/208if (inst->src[0].file == QFILE_REG) {209switch (inst->src[0].index) {210case 0:211case 1:212case 2:213c->temp_start[inst->dst.index] = 0;214break;215}216}217218ip++;219}220block->end_ip = ip;221}222223_mesa_hash_table_destroy(partial_update_ht, NULL);224}225226static bool227vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)228{229bool cont = false;230231vir_for_each_block_rev(block, c) {232/* Update live_out: Any successor using the variable233* on entrance needs us to have the variable live on234* exit.235*/236vir_for_each_successor(succ, block) {237for (int i = 0; i < bitset_words; i++) {238BITSET_WORD new_live_out = (succ->live_in[i] &239~block->live_out[i]);240if (new_live_out) {241block->live_out[i] |= new_live_out;242cont = true;243}244}245}246247/* Update live_in */248for (int i = 0; i < bitset_words; i++) {249BITSET_WORD new_live_in = (block->use[i] |250(block->live_out[i] &251~block->def[i]));252if (new_live_in & ~block->live_in[i]) {253block->live_in[i] |= new_live_in;254cont = true;255}256}257}258259return cont;260}261262static bool263vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)264{265bool cont = false;266267vir_for_each_block_rev(block, c) {268/* Propagate defin/defout down the successors to produce the269* union of blocks with a reachable (partial) definition of270* the var.271*272* This keeps a conditional first write to a reg from273* extending its lifetime back to the start of the program.274*/275vir_for_each_successor(succ, block) {276for (int i = 0; i < bitset_words; i++) {277BITSET_WORD new_def = (block->defout[i] &278~succ->defin[i]);279succ->defin[i] |= new_def;280succ->defout[i] |= new_def;281cont |= new_def;282}283}284}285286return cont;287}288289/**290* Extend the start/end ranges for each variable to account for the291* new information calculated from control flow.292*/293static void294vir_compute_start_end(struct v3d_compile *c, int num_vars)295{296vir_for_each_block(block, c) {297for (int i = 0; i < num_vars; i++) {298if (BITSET_TEST(block->live_in, i) &&299BITSET_TEST(block->defin, i)) {300c->temp_start[i] = MIN2(c->temp_start[i],301block->start_ip);302c->temp_end[i] = MAX2(c->temp_end[i],303block->start_ip);304}305306if (BITSET_TEST(block->live_out, i) &&307BITSET_TEST(block->defout, i)) {308c->temp_start[i] = MIN2(c->temp_start[i],309block->end_ip);310c->temp_end[i] = MAX2(c->temp_end[i],311block->end_ip);312}313}314}315}316317void318vir_calculate_live_intervals(struct v3d_compile *c)319{320int bitset_words = BITSET_WORDS(c->num_temps);321322/* We may be called more than once if we've rearranged the program to323* try to get register allocation to succeed.324*/325if (c->temp_start) {326ralloc_free(c->temp_start);327ralloc_free(c->temp_end);328329vir_for_each_block(block, c) {330ralloc_free(block->def);331ralloc_free(block->use);332ralloc_free(block->live_in);333ralloc_free(block->live_out);334}335}336337c->temp_start = rzalloc_array(c, int, c->num_temps);338c->temp_end = rzalloc_array(c, int, c->num_temps);339340for (int i = 0; i < c->num_temps; i++) {341c->temp_start[i] = MAX_INSTRUCTION;342c->temp_end[i] = -1;343}344345vir_for_each_block(block, c) {346block->def = rzalloc_array(c, BITSET_WORD, bitset_words);347block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);348block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);349block->use = rzalloc_array(c, BITSET_WORD, bitset_words);350block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);351block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);352}353354vir_setup_def_use(c);355356while (vir_live_variables_dataflow(c, bitset_words))357;358359while (vir_live_variables_defin_defout_dataflow(c, bitset_words))360;361362vir_compute_start_end(c, c->num_temps);363364c->live_intervals_valid = true;365}366367368