Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_qir_live_variables.c
4570 views
/*1* Copyright © 2012 Intel Corporation2* Copyright © 2016 Broadcom3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS21* IN THE SOFTWARE.22*/2324#define MAX_INSTRUCTION (1 << 30)2526#include "util/ralloc.h"27#include "util/register_allocate.h"28#include "vc4_context.h"29#include "vc4_qir.h"3031struct partial_update_state {32struct qinst *insts[4];33uint8_t channels;34};3536static int37qir_reg_to_var(struct qreg reg)38{39if (reg.file == QFILE_TEMP)40return reg.index;4142return -1;43}4445static void46qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,47struct qreg src)48{49int var = qir_reg_to_var(src);50if (var == -1)51return;5253c->temp_start[var] = MIN2(c->temp_start[var], ip);54c->temp_end[var] = MAX2(c->temp_end[var], ip);5556/* The use[] bitset marks when the block makes57* use of a variable without having completely58* defined that variable within the block.59*/60if (!BITSET_TEST(block->def, var))61BITSET_SET(block->use, var);62}6364static struct partial_update_state *65get_partial_update_state(struct hash_table *partial_update_ht,66struct qinst *inst)67{68struct hash_entry *entry =69_mesa_hash_table_search(partial_update_ht,70&inst->dst.index);71if (entry)72return entry->data;7374struct partial_update_state *state =75rzalloc(partial_update_ht, struct partial_update_state);7677_mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);7879return state;80}8182static void83qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,84struct hash_table *partial_update_ht, struct qinst *inst)85{86/* The def[] bitset marks when an initialization in a87* block completely screens off previous updates of88* that variable.89*/90int var = qir_reg_to_var(inst->dst);91if (var == -1)92return;9394c->temp_start[var] = MIN2(c->temp_start[var], ip);95c->temp_end[var] = MAX2(c->temp_end[var], ip);9697/* If we've already tracked this as a def, or already used it within98* the block, there's nothing to do.99*/100if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))101return;102103/* Easy, common case: unconditional full register update.104*105* We treat conditioning on the exec mask as the same as not being106* conditional. This makes sure that if the register gets set on107* either side of an if, it is treated as being screened off before108* the if. Otherwise, if there was no intervening def, its live109* interval doesn't extend back to the start of he program, and if too110* many registers did that we'd fail to register allocate.111*/112if ((inst->cond == QPU_COND_ALWAYS ||113inst->cond_is_exec_mask) && !inst->dst.pack) {114BITSET_SET(block->def, var);115return;116}117118/* Finally, look at the condition code and packing and mark it as a119* def. We need to make sure that we understand sequences120* instructions like:121*122* mov.zs t0, t1123* mov.zc t0, t2124*125* or:126*127* mmov t0.8a, t1128* mmov t0.8b, t2129* mmov t0.8c, t3130* mmov t0.8d, t4131*132* as defining the temp within the block, because otherwise dst's live133* range will get extended up the control flow to the top of the134* program.135*/136struct partial_update_state *state =137get_partial_update_state(partial_update_ht, inst);138uint8_t mask = qir_channels_written(inst);139140if (inst->cond == QPU_COND_ALWAYS) {141state->channels |= mask;142} else {143for (int i = 0; i < 4; i++) {144if (!(mask & (1 << i)))145continue;146147if (state->insts[i] &&148state->insts[i]->cond ==149qpu_cond_complement(inst->cond))150state->channels |= 1 << i;151else152state->insts[i] = inst;153}154}155156if (state->channels == 0xf)157BITSET_SET(block->def, var);158}159160static void161sf_state_clear(struct hash_table *partial_update_ht)162{163hash_table_foreach(partial_update_ht, entry) {164struct partial_update_state *state = entry->data;165166for (int i = 0; i < 4; i++) {167if (state->insts[i] && state->insts[i]->cond)168state->insts[i] = NULL;169}170}171}172173/* Sets up the def/use arrays for when variables are used-before-defined or174* defined-before-used in the block.175*176* Also initializes the temp_start/temp_end to cover just the instruction IPs177* where the variable is used, which will be extended later in178* qir_compute_start_end().179*/180static void181qir_setup_def_use(struct vc4_compile *c)182{183struct hash_table *partial_update_ht =184_mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);185int ip = 0;186187qir_for_each_block(block, c) {188block->start_ip = ip;189190_mesa_hash_table_clear(partial_update_ht, NULL);191192qir_for_each_inst(inst, block) {193for (int i = 0; i < qir_get_nsrc(inst); i++)194qir_setup_use(c, block, ip, inst->src[i]);195196qir_setup_def(c, block, ip, partial_update_ht, inst);197198if (inst->sf)199sf_state_clear(partial_update_ht);200201switch (inst->op) {202case QOP_FRAG_Z:203case QOP_FRAG_W:204/* The payload registers have values205* implicitly loaded at the start of the206* program.207*/208if (inst->dst.file == QFILE_TEMP)209c->temp_start[inst->dst.index] = 0;210break;211default:212break;213}214ip++;215}216block->end_ip = ip;217}218219_mesa_hash_table_destroy(partial_update_ht, NULL);220}221222static bool223qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)224{225bool cont = false;226227qir_for_each_block_rev(block, c) {228/* Update live_out: Any successor using the variable229* on entrance needs us to have the variable live on230* exit.231*/232qir_for_each_successor(succ, block) {233for (int i = 0; i < bitset_words; i++) {234BITSET_WORD new_live_out = (succ->live_in[i] &235~block->live_out[i]);236if (new_live_out) {237block->live_out[i] |= new_live_out;238cont = true;239}240}241}242243/* Update live_in */244for (int i = 0; i < bitset_words; i++) {245BITSET_WORD new_live_in = (block->use[i] |246(block->live_out[i] &247~block->def[i]));248if (new_live_in & ~block->live_in[i]) {249block->live_in[i] |= new_live_in;250cont = true;251}252}253}254255return cont;256}257258/**259* Extend the start/end ranges for each variable to account for the260* new information calculated from control flow.261*/262static void263qir_compute_start_end(struct vc4_compile *c, int num_vars)264{265qir_for_each_block(block, c) {266for (int i = 0; i < num_vars; i++) {267if (BITSET_TEST(block->live_in, i)) {268c->temp_start[i] = MIN2(c->temp_start[i],269block->start_ip);270c->temp_end[i] = MAX2(c->temp_end[i],271block->start_ip);272}273274if (BITSET_TEST(block->live_out, i)) {275c->temp_start[i] = MIN2(c->temp_start[i],276block->end_ip);277c->temp_end[i] = MAX2(c->temp_end[i],278block->end_ip);279}280}281}282}283284void285qir_calculate_live_intervals(struct vc4_compile *c)286{287int bitset_words = BITSET_WORDS(c->num_temps);288289/* If we called this function more than once, then we should be290* freeing the previous arrays.291*/292assert(!c->temp_start);293294c->temp_start = rzalloc_array(c, int, c->num_temps);295c->temp_end = rzalloc_array(c, int, c->num_temps);296297for (int i = 0; i < c->num_temps; i++) {298c->temp_start[i] = MAX_INSTRUCTION;299c->temp_end[i] = -1;300}301302qir_for_each_block(block, c) {303block->def = rzalloc_array(c, BITSET_WORD, bitset_words);304block->use = rzalloc_array(c, BITSET_WORD, bitset_words);305block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);306block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);307}308309qir_setup_def_use(c);310311while (qir_live_variables_dataflow(c, bitset_words))312;313314qir_compute_start_end(c, c->num_temps);315316if (vc4_debug & VC4_DEBUG_SHADERDB) {317int last_ip = 0;318for (int i = 0; i < c->num_temps; i++)319last_ip = MAX2(last_ip, c->temp_end[i]);320321int reg_pressure = 0;322int max_reg_pressure = 0;323for (int i = 0; i < last_ip; i++) {324for (int j = 0; j < c->num_temps; j++) {325if (c->temp_start[j] == i)326reg_pressure++;327if (c->temp_end[j] == i)328reg_pressure--;329}330max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);331}332333fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n",334qir_get_stage_name(c->stage),335c->program_id, c->variant_id,336max_reg_pressure);337}338}339340341