Path: blob/21.2-virgl/src/asahi/compiler/agx_register_allocate.c
7655 views
/*1* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "agx_compiler.h"24#include "agx_builder.h"2526/* Trivial register allocator that never frees anything.27*28* TODO: Write a real register allocator.29* TODO: Handle phi nodes.30*/3132/** Returns number of registers read by an instruction. TODO: 16-bit */33static unsigned34agx_read_registers(agx_instr *I, unsigned s)35{36unsigned size = I->src[s].size == AGX_SIZE_32 ? 2 : 1;3738switch (I->op) {39default:40return size;41}42}4344/** Returns number of registers written by an instruction */45static unsigned46agx_write_registers(agx_instr *I, unsigned d)47{48unsigned size = I->dest[d].size == AGX_SIZE_32 ? 2 : 1;4950switch (I->op) {51case AGX_OPCODE_LD_VARY:52case AGX_OPCODE_DEVICE_LOAD:53case AGX_OPCODE_TEXTURE_SAMPLE:54case AGX_OPCODE_LD_TILE:55return 8;56case AGX_OPCODE_LD_VARY_FLAT:57return 6;58case AGX_OPCODE_P_COMBINE:59{60unsigned components = 0;6162for (unsigned i = 0; i < 4; ++i) {63if (!agx_is_null(I->src[i]))64components = i + 1;65}6667return components * size;68}69default:70return size;71}72}7374static unsigned75agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)76{77for (unsigned reg = 0; reg < max; reg += align) {78bool conflict = false;7980for (unsigned j = 0; j < count; ++j)81conflict |= BITSET_TEST(used_regs, reg + j);8283if (!conflict) {84for (unsigned j = 0; j < count; ++j)85BITSET_SET(used_regs, reg + j);8687return reg;88}89}9091unreachable("Could not find a free register");92}9394/** Assign registers to SSA values in a block. */9596static void97agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, unsigned max_reg)98{99BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };100101agx_foreach_predecessor(block, pred) {102for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i)103used_regs[i] |= pred->regs_out[i];104}105106BITSET_SET(used_regs, 0); // control flow writes r0l107BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID108BITSET_SET(used_regs, (5*2 + 1));109110agx_foreach_instr_in_block(block, I) {111/* First, free killed sources */112agx_foreach_src(I, s) {113if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) {114unsigned reg = ssa_to_reg[I->src[s].value];115unsigned count = agx_read_registers(I, s);116117for (unsigned i = 0; i < count; ++i)118BITSET_CLEAR(used_regs, reg + i);119}120}121122/* Next, assign destinations. Always legal in SSA form. */123agx_foreach_dest(I, d) {124if (I->dest[d].type == AGX_INDEX_NORMAL) {125unsigned count = agx_write_registers(I, d);126unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2;127unsigned reg = agx_assign_regs(used_regs, count, align, max_reg);128129ssa_to_reg[I->dest[d].value] = reg;130}131}132}133134STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));135memcpy(block->regs_out, used_regs, sizeof(used_regs));136}137138void139agx_ra(agx_context *ctx)140{141unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));142143agx_compute_liveness(ctx);144uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));145agx_foreach_block(ctx, block)146agx_ra_assign_local(block, ssa_to_reg, ctx->max_register);147148/* TODO: Coalesce combines */149150agx_foreach_instr_global_safe(ctx, ins) {151/* Lower away RA pseudo-instructions */152if (ins->op == AGX_OPCODE_P_COMBINE) {153/* TODO: Optimize out the moves! */154assert(ins->dest[0].type == AGX_INDEX_NORMAL);155enum agx_size common_size = ins->dest[0].size;156unsigned base = ssa_to_reg[ins->dest[0].value];157unsigned size = common_size == AGX_SIZE_32 ? 2 : 1;158159/* Move the sources */160agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));161162/* TODO: Eliminate the intermediate copy by handling parallel copies */163for (unsigned i = 0; i < 4; ++i) {164if (agx_is_null(ins->src[i])) continue;165unsigned base = ins->src[i].value;166if (ins->src[i].type == AGX_INDEX_NORMAL)167base = ssa_to_reg[base];168else169assert(ins->src[i].type == AGX_INDEX_REGISTER);170171assert(ins->src[i].size == common_size);172173agx_mov_to(&b, agx_register(124*2 + (i * size), common_size),174agx_register(base, common_size));175}176177for (unsigned i = 0; i < 4; ++i) {178if (agx_is_null(ins->src[i])) continue;179agx_index src = ins->src[i];180181if (src.type == AGX_INDEX_NORMAL)182src = agx_register(alloc[src.value], src.size);183184agx_mov_to(&b, agx_register(base + (i * size), common_size),185agx_register(124*2 + (i * size), common_size));186}187188/* We've lowered away, delete the old */189agx_remove_instruction(ins);190continue;191} else if (ins->op == AGX_OPCODE_P_EXTRACT) {192assert(ins->dest[0].type == AGX_INDEX_NORMAL);193assert(ins->dest[0].size == ins->src[0].size);194unsigned base = ins->src[0].value;195196if (ins->src[0].type != AGX_INDEX_REGISTER) {197assert(ins->src[0].type == AGX_INDEX_NORMAL);198base = alloc[base];199}200201unsigned size = ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;202unsigned left = ssa_to_reg[ins->dest[0].value];203unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm);204205if (left != right) {206agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));207agx_mov_to(&b, agx_register(left, ins->dest[0].size),208agx_register(right, ins->src[0].size));209}210211agx_remove_instruction(ins);212continue;213}214215agx_foreach_src(ins, s) {216if (ins->src[s].type == AGX_INDEX_NORMAL) {217unsigned v = ssa_to_reg[ins->src[s].value];218ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size));219}220}221222agx_foreach_dest(ins, d) {223if (ins->dest[d].type == AGX_INDEX_NORMAL) {224unsigned v = ssa_to_reg[ins->dest[d].value];225ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));226}227}228}229230free(ssa_to_reg);231free(alloc);232}233234235