Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/ir2_ra.c
4574 views
/*1* Copyright (C) 2018 Jonathan Marek <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Jonathan Marek <[email protected]>24*/2526#include "ir2_private.h"2728/* if an instruction has side effects, we should never kill it */29static bool30has_side_effects(struct ir2_instr *instr)31{32if (instr->type == IR2_CF)33return true;34else if (instr->type == IR2_FETCH)35return false;3637switch (instr->alu.scalar_opc) {38case PRED_SETEs ... KILLONEs:39return true;40default:41break;42}4344switch (instr->alu.vector_opc) {45case PRED_SETE_PUSHv ... KILLNEv:46return true;47default:48break;49}5051return instr->alu.export >= 0;52}5354/* mark an instruction as required, and all its sources recursively */55static void56set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr)57{58struct ir2_reg *reg;5960/* don't repeat work already done */61if (instr->need_emit)62return;6364instr->need_emit = true;6566ir2_foreach_src (src, instr) {67switch (src->type) {68case IR2_SRC_SSA:69set_need_emit(ctx, &ctx->instr[src->num]);70break;71case IR2_SRC_REG:72/* slow .. */73reg = get_reg_src(ctx, src);74ir2_foreach_instr (instr, ctx) {75if (!instr->is_ssa && instr->reg == reg)76set_need_emit(ctx, instr);77}78break;79default:80break;81}82}83}8485/* get current bit mask of allocated components for a register */86static unsigned87reg_mask(struct ir2_context *ctx, unsigned idx)88{89return ctx->reg_state[idx / 8] >> idx % 8 * 4 & 0xf;90}9192static void93reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c)94{95idx = idx * 4 + c;96ctx->reg_state[idx / 32] |= 1 << idx % 32;97}9899static void100reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c)101{102idx = idx * 4 + c;103ctx->reg_state[idx / 32] &= ~(1 << idx % 32);104}105106void107ra_count_refs(struct ir2_context *ctx)108{109struct ir2_reg *reg;110111/* mark instructions as needed112* need to do this because "substitutions" pass makes many movs not needed113*/114ir2_foreach_instr (instr, ctx) {115if (has_side_effects(instr))116set_need_emit(ctx, instr);117}118119/* compute ref_counts */120ir2_foreach_instr (instr, ctx) {121/* kill non-needed so they can be skipped */122if (!instr->need_emit) {123instr->type = IR2_NONE;124continue;125}126127ir2_foreach_src (src, instr) {128if (src->type == IR2_SRC_CONST)129continue;130131reg = get_reg_src(ctx, src);132for (int i = 0; i < src_ncomp(instr); i++)133reg->comp[swiz_get(src->swizzle, i)].ref_count++;134}135}136}137138void139ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, bool export,140uint8_t export_writemask)141{142/* for export, don't allocate anything but set component layout */143if (export) {144for (int i = 0; i < 4; i++)145reg->comp[i].c = i;146return;147}148149unsigned idx = force_idx;150151/* TODO: allocate into the same register if theres room152* note: the blob doesn't do it, so verify that it is indeed better153* also, doing it would conflict with scalar mov insertion154*/155156/* check if already allocated */157for (int i = 0; i < reg->ncomp; i++) {158if (reg->comp[i].alloc)159return;160}161162if (force_idx < 0) {163for (idx = 0; idx < 64; idx++) {164if (reg_mask(ctx, idx) == 0)165break;166}167}168assert(idx != 64); /* TODO ran out of register space.. */169170/* update max_reg value */171ctx->info->max_reg = MAX2(ctx->info->max_reg, (int)idx);172173unsigned mask = reg_mask(ctx, idx);174175for (int i = 0; i < reg->ncomp; i++) {176/* don't allocate never used values */177if (reg->comp[i].ref_count == 0) {178reg->comp[i].c = 7;179continue;180}181182/* TODO */183unsigned c = 1 ? i : (ffs(~mask) - 1);184mask |= 1 << c;185reg->comp[i].c = c;186reg_setmask(ctx, idx, c);187reg->comp[i].alloc = true;188}189190reg->idx = idx;191ctx->live_regs[reg->idx] = reg;192}193194/* reduce srcs ref_count and free if needed */195void196ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr)197{198struct ir2_reg *reg;199struct ir2_reg_component *comp;200201ir2_foreach_src (src, instr) {202if (src->type == IR2_SRC_CONST)203continue;204205reg = get_reg_src(ctx, src);206/* XXX use before write case */207208for (int i = 0; i < src_ncomp(instr); i++) {209comp = ®->comp[swiz_get(src->swizzle, i)];210if (!--comp->ref_count && reg->block_idx_free < 0) {211reg_freemask(ctx, reg->idx, comp->c);212comp->alloc = false;213}214}215}216}217218/* free any regs left for a block */219void220ra_block_free(struct ir2_context *ctx, unsigned block)221{222ir2_foreach_live_reg (reg, ctx) {223if (reg->block_idx_free != block)224continue;225226for (int i = 0; i < reg->ncomp; i++) {227if (!reg->comp[i].alloc) /* XXX should never be true? */228continue;229230reg_freemask(ctx, reg->idx, reg->comp[i].c);231reg->comp[i].alloc = false;232}233ctx->live_regs[reg->idx] = NULL;234}235}236237238