Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/ir2_cp.c
4574 views
/*1* Copyright (C) 2018 Jonathan Marek <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Jonathan Marek <[email protected]>24*/2526#include "ir2_private.h"2728static bool29is_mov(struct ir2_instr *instr)30{31return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&32instr->src_count == 1;33}3435static void36src_combine(struct ir2_src *src, struct ir2_src b)37{38src->num = b.num;39src->type = b.type;40src->swizzle = swiz_merge(b.swizzle, src->swizzle);41if (!src->abs) /* if we have abs we don't care about previous negate */42src->negate ^= b.negate;43src->abs |= b.abs;44}4546/* cp_src: replace src regs when they refer to a mov instruction47* example:48* ALU: MAXv R7 = C7, C749* ALU: MULADDv R7 = R7, R10, R0.xxxx50* becomes:51* ALU: MULADDv R7 = C7, R10, R0.xxxx52*/53void54cp_src(struct ir2_context *ctx)55{56struct ir2_instr *p;5758ir2_foreach_instr (instr, ctx) {59ir2_foreach_src (src, instr) {60/* loop to replace recursively */61do {62if (src->type != IR2_SRC_SSA)63break;6465p = &ctx->instr[src->num];66/* don't work across blocks to avoid possible issues */67if (p->block_idx != instr->block_idx)68break;6970if (!is_mov(p))71break;7273if (p->alu.saturate)74break;7576/* cant apply abs to const src, const src only for alu */77if (p->src[0].type == IR2_SRC_CONST &&78(src->abs || instr->type != IR2_ALU))79break;8081src_combine(src, p->src[0]);82} while (1);83}84}85}8687/* cp_export: replace mov to export when possible88* in the cp_src pass we bypass any mov instructions related89* to the src registers, but for exports for need something different90* example:91* ALU: MAXv R3.x___ = C9.x???, C9.x???92* ALU: MAXv R3._y__ = R0.?x??, C8.?x??93* ALU: MAXv export0 = R3.yyyx, R3.yyyx94* becomes:95* ALU: MAXv export0.___w = C9.???x, C9.???x96* ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?97*98*/99void100cp_export(struct ir2_context *ctx)101{102struct ir2_instr *c[4], *ins[4];103struct ir2_src *src;104struct ir2_reg *reg;105unsigned ncomp;106107ir2_foreach_instr (instr, ctx) {108if (!is_export(instr)) /* TODO */109continue;110111if (!is_mov(instr))112continue;113114src = &instr->src[0];115116if (src->negate || src->abs) /* TODO handle these cases */117continue;118119if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)120continue;121122reg = get_reg_src(ctx, src);123ncomp = dst_ncomp(instr);124125unsigned reswiz[4] = {};126unsigned num_instr = 0;127128/* fill array c with pointers to instrs that write each component */129if (src->type == IR2_SRC_SSA) {130struct ir2_instr *instr = &ctx->instr[src->num];131132if (instr->type != IR2_ALU)133continue;134135for (int i = 0; i < ncomp; i++)136c[i] = instr;137138ins[num_instr++] = instr;139reswiz[0] = src->swizzle;140} else {141bool ok = true;142unsigned write_mask = 0;143144ir2_foreach_instr (instr, ctx) {145if (instr->is_ssa || instr->reg != reg)146continue;147148/* set by non-ALU */149if (instr->type != IR2_ALU) {150ok = false;151break;152}153154/* component written more than once */155if (write_mask & instr->alu.write_mask) {156ok = false;157break;158}159160write_mask |= instr->alu.write_mask;161162/* src pointers for components */163for (int i = 0, j = 0; i < 4; i++) {164unsigned k = swiz_get(src->swizzle, i);165if (instr->alu.write_mask & 1 << k) {166c[i] = instr;167168/* reswiz = compressed src->swizzle */169unsigned x = 0;170for (int i = 0; i < k; i++)171x += !!(instr->alu.write_mask & 1 << i);172173assert(src->swizzle || x == j);174reswiz[num_instr] |= swiz_set(x, j++);175}176}177ins[num_instr++] = instr;178}179if (!ok)180continue;181}182183bool redirect = true;184185/* must all be in same block */186for (int i = 0; i < ncomp; i++)187redirect &= (c[i]->block_idx == instr->block_idx);188189/* no other instr using the value */190ir2_foreach_instr (p, ctx) {191if (p == instr)192continue;193ir2_foreach_src (src, p)194redirect &= reg != get_reg_src(ctx, src);195}196197if (!redirect)198continue;199200/* redirect the instructions writing to the register */201for (int i = 0; i < num_instr; i++) {202struct ir2_instr *p = ins[i];203204p->alu.export = instr->alu.export;205p->alu.write_mask = 0;206p->is_ssa = true;207p->ssa.ncomp = 0;208memset(p->ssa.comp, 0, sizeof(p->ssa.comp));209p->alu.saturate |= instr->alu.saturate;210211switch (p->alu.vector_opc) {212case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:213case DOT2ADDv:214case DOT3v:215case DOT4v:216case CUBEv:217continue;218default:219break;220}221ir2_foreach_src (s, p)222swiz_merge_p(&s->swizzle, reswiz[i]);223}224225for (int i = 0; i < ncomp; i++) {226c[i]->alu.write_mask |= (1 << i);227c[i]->ssa.ncomp++;228}229instr->type = IR2_NONE;230instr->need_emit = false;231}232}233234235