Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/regalloc.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "gpir.h"25#include "util/u_dynarray.h"2627/* Per-register information */2829struct reg_info {30BITSET_WORD *conflicts;31struct util_dynarray conflict_list;3233/* Number of conflicts that must be allocated to physical registers.34*/35unsigned phys_conflicts;3637unsigned node_conflicts;3839/* Number of conflicts that can be allocated to either. */40unsigned total_conflicts;4142int assigned_color;4344bool visited;45};4647struct regalloc_ctx {48unsigned bitset_words, num_nodes_and_regs;49struct reg_info *registers;5051/* Reusable scratch liveness array */52BITSET_WORD *live;5354unsigned *worklist;55unsigned worklist_start, worklist_end;5657unsigned *stack;58unsigned stack_size;5960gpir_compiler *comp;61void *mem_ctx;62};6364/* Liveness analysis */6566static void propagate_liveness_instr(gpir_node *node, BITSET_WORD *live,67gpir_compiler *comp)68{69/* KILL */70if (node->type == gpir_node_type_store) {71if (node->op == gpir_op_store_reg) {72gpir_store_node *store = gpir_node_to_store(node);73BITSET_CLEAR(live, store->reg->index);74}75}7677/* GEN */78if (node->type == gpir_node_type_load) {79if (node->op == gpir_op_load_reg) {80gpir_load_node *load = gpir_node_to_load(node);81BITSET_SET(live, load->reg->index);82}83}84}8586static bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx)87{88for (unsigned i = 0; i < 2; i++) {89if (block->successors[i]) {90for (unsigned j = 0; j < ctx->bitset_words; j++)91block->live_out[j] |= block->successors[i]->live_in[j];92}93}9495memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));9697list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {98propagate_liveness_instr(node, ctx->live, block->comp);99}100101bool changed = false;102for (unsigned i = 0; i < ctx->bitset_words; i++) {103changed |= (block->live_in[i] != ctx->live[i]);104block->live_in[i] = ctx->live[i];105}106return changed;107}108109static void calc_def_block(gpir_block *block)110{111list_for_each_entry(gpir_node, node, &block->node_list, list) {112if (node->op == gpir_op_store_reg) {113gpir_store_node *store = gpir_node_to_store(node);114BITSET_SET(block->def_out, store->reg->index);115}116}117}118119static void calc_liveness(struct regalloc_ctx *ctx)120{121bool changed = true;122while (changed) {123changed = false;124list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) {125changed |= propagate_liveness_block(block, ctx);126}127}128129list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {130calc_def_block(block);131}132133changed = true;134while (changed) {135changed = false;136list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {137for (unsigned i = 0; i < 2; i++) {138gpir_block *succ = block->successors[i];139if (!succ)140continue;141142for (unsigned j = 0; j < ctx->bitset_words; j++) {143BITSET_WORD new = block->def_out[j] & ~succ->def_out[j];144changed |= (new != 0);145succ->def_out[j] |= block->def_out[j];146}147}148}149}150}151152/* Interference calculation */153154static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)155{156if (i == j)157return;158159struct reg_info *a = &ctx->registers[i];160struct reg_info *b = &ctx->registers[j];161162if (BITSET_TEST(a->conflicts, j))163return;164165BITSET_SET(a->conflicts, j);166BITSET_SET(b->conflicts, i);167168a->total_conflicts++;169b->total_conflicts++;170if (j < ctx->comp->cur_reg)171a->phys_conflicts++;172else173a->node_conflicts++;174175if (i < ctx->comp->cur_reg)176b->phys_conflicts++;177else178b->node_conflicts++;179180util_dynarray_append(&a->conflict_list, unsigned, j);181util_dynarray_append(&b->conflict_list, unsigned, i);182}183184/* Make the register or node "i" interfere with all the other live registers185* and nodes.186*/187static void add_all_interferences(struct regalloc_ctx *ctx,188unsigned i,189BITSET_WORD *live_nodes,190BITSET_WORD *live_regs)191{192int live_node;193BITSET_FOREACH_SET(live_node, live_nodes, ctx->comp->cur_index) {194add_interference(ctx, i,195live_node + ctx->comp->cur_reg);196}197198int live_reg;199BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_index) {200add_interference(ctx, i, live_reg);201}202203}204205static void print_liveness(struct regalloc_ctx *ctx,206BITSET_WORD *live_reg, BITSET_WORD *live_val)207{208if (!(lima_debug & LIMA_DEBUG_GP))209return;210211int live_idx;212BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) {213printf("reg%d ", live_idx);214}215BITSET_FOREACH_SET(live_idx, live_val, ctx->comp->cur_index) {216printf("%d ", live_idx);217}218printf("\n");219}220221static void calc_interference(struct regalloc_ctx *ctx)222{223BITSET_WORD *live_nodes =224rzalloc_array(ctx->mem_ctx, BITSET_WORD, ctx->comp->cur_index);225226list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {227/* Initialize liveness at the end of the block, but exclude values that228* definitely aren't defined by the end. This helps out with229* partially-defined registers, like:230*231* if (condition) {232* foo = ...;233* }234* if (condition) {235* ... = foo;236* }237*238* If we naively propagated liveness backwards, foo would be live from239* the beginning of the program, but if we're not inside a loop then240* its value is undefined before the first if and we don't have to241* consider it live. Mask out registers like foo here.242*/243for (unsigned i = 0; i < ctx->bitset_words; i++) {244ctx->live[i] = block->live_out[i] & block->def_out[i];245}246247list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {248gpir_debug("processing node %d\n", node->index);249print_liveness(ctx, ctx->live, live_nodes);250if (node->type != gpir_node_type_store &&251node->type != gpir_node_type_branch) {252add_all_interferences(ctx, node->index + ctx->comp->cur_reg,253live_nodes, ctx->live);254255/* KILL */256BITSET_CLEAR(live_nodes, node->index);257} else if (node->op == gpir_op_store_reg) {258gpir_store_node *store = gpir_node_to_store(node);259add_all_interferences(ctx, store->reg->index,260live_nodes, ctx->live);261262/* KILL */263BITSET_CLEAR(ctx->live, store->reg->index);264}265266/* GEN */267if (node->type == gpir_node_type_store) {268gpir_store_node *store = gpir_node_to_store(node);269BITSET_SET(live_nodes, store->child->index);270} else if (node->type == gpir_node_type_alu) {271gpir_alu_node *alu = gpir_node_to_alu(node);272for (int i = 0; i < alu->num_child; i++)273BITSET_SET(live_nodes, alu->children[i]->index);274} else if (node->type == gpir_node_type_branch) {275gpir_branch_node *branch = gpir_node_to_branch(node);276BITSET_SET(live_nodes, branch->cond->index);277} else if (node->op == gpir_op_load_reg) {278gpir_load_node *load = gpir_node_to_load(node);279BITSET_SET(ctx->live, load->reg->index);280}281}282}283}284285/* Register allocation */286287static bool can_simplify(struct regalloc_ctx *ctx, unsigned i)288{289struct reg_info *info = &ctx->registers[i];290if (i < ctx->comp->cur_reg) {291/* Physical regs. */292return info->phys_conflicts + info->node_conflicts < GPIR_PHYSICAL_REG_NUM;293} else {294/* Nodes: if we manage to allocate all of its conflicting physical295* registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so296* we can ignore any more than that.297*/298return MIN2(info->phys_conflicts, GPIR_PHYSICAL_REG_NUM) +299info->node_conflicts < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;300}301}302303static void push_stack(struct regalloc_ctx *ctx, unsigned i)304{305ctx->stack[ctx->stack_size++] = i;306if (i < ctx->comp->cur_reg)307gpir_debug("pushing reg%u\n", i);308else309gpir_debug("pushing %d\n", i - ctx->comp->cur_reg);310311struct reg_info *info = &ctx->registers[i];312assert(info->visited);313314util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {315struct reg_info *conflict_info = &ctx->registers[*conflict];316if (i < ctx->comp->cur_reg) {317assert(conflict_info->phys_conflicts > 0);318conflict_info->phys_conflicts--;319} else {320assert(conflict_info->node_conflicts > 0);321conflict_info->node_conflicts--;322}323if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {324ctx->worklist[ctx->worklist_end++] = *conflict;325ctx->registers[*conflict].visited = true;326}327}328}329330static bool do_regalloc(struct regalloc_ctx *ctx)331{332ctx->worklist_start = 0;333ctx->worklist_end = 0;334ctx->stack_size = 0;335336/* Step 1: find the initially simplifiable registers */337for (int i = 0; i < ctx->comp->cur_reg + ctx->comp->cur_index; i++) {338if (can_simplify(ctx, i)) {339ctx->worklist[ctx->worklist_end++] = i;340ctx->registers[i].visited = true;341}342}343344while (true) {345/* Step 2: push onto the stack whatever we can */346while (ctx->worklist_start != ctx->worklist_end) {347push_stack(ctx, ctx->worklist[ctx->worklist_start++]);348}349350if (ctx->stack_size < ctx->num_nodes_and_regs) {351/* If there are still unsimplifiable nodes left, we need to352* optimistically push a node onto the stack. Choose the one with353* the smallest number of current neighbors, since that's the most354* likely to succeed.355*/356unsigned min_conflicts = UINT_MAX;357unsigned best_reg = 0;358for (unsigned reg = 0; reg < ctx->num_nodes_and_regs; reg++) {359struct reg_info *info = &ctx->registers[reg];360if (info->visited)361continue;362if (info->phys_conflicts + info->node_conflicts < min_conflicts) {363best_reg = reg;364min_conflicts = info->phys_conflicts + info->node_conflicts;365}366}367gpir_debug("optimistic triggered\n");368ctx->registers[best_reg].visited = true;369push_stack(ctx, best_reg);370} else {371break;372}373}374375/* Step 4: pop off the stack and assign colors */376for (int i = ctx->num_nodes_and_regs - 1; i >= 0; i--) {377unsigned idx = ctx->stack[i];378struct reg_info *reg = &ctx->registers[idx];379380unsigned num_available_regs;381if (idx < ctx->comp->cur_reg) {382num_available_regs = GPIR_PHYSICAL_REG_NUM;383} else {384num_available_regs = GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM;385}386387bool found = false;388unsigned start = i % num_available_regs;389for (unsigned j = 0; j < num_available_regs; j++) {390unsigned candidate = (j + start) % num_available_regs;391bool available = true;392util_dynarray_foreach(®->conflict_list, unsigned, conflict_idx) {393struct reg_info *conflict = &ctx->registers[*conflict_idx];394if (conflict->assigned_color >= 0 &&395conflict->assigned_color == (int) candidate) {396available = false;397break;398}399}400401if (available) {402reg->assigned_color = candidate;403found = true;404break;405}406}407408/* TODO: spilling */409if (!found) {410gpir_error("Failed to allocate registers\n");411return false;412}413}414415return true;416}417418static void assign_regs(struct regalloc_ctx *ctx)419{420list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {421list_for_each_entry(gpir_node, node, &block->node_list, list) {422if (node->index >= 0) {423node->value_reg =424ctx->registers[ctx->comp->cur_reg + node->index].assigned_color;425}426427if (node->op == gpir_op_load_reg) {428gpir_load_node *load = gpir_node_to_load(node);429unsigned color = ctx->registers[load->reg->index].assigned_color;430load->index = color / 4;431load->component = color % 4;432}433434if (node->op == gpir_op_store_reg) {435gpir_store_node *store = gpir_node_to_store(node);436unsigned color = ctx->registers[store->reg->index].assigned_color;437store->index = color / 4;438store->component = color % 4;439node->value_reg = color;440}441}442443block->live_out_phys = 0;444445int reg_idx;446BITSET_FOREACH_SET(reg_idx, block->live_out, ctx->comp->cur_reg) {447if (BITSET_TEST(block->def_out, reg_idx)) {448block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color);449}450}451}452}453454static void regalloc_print_result(gpir_compiler *comp)455{456if (!(lima_debug & LIMA_DEBUG_GP))457return;458459int index = 0;460printf("======== regalloc ========\n");461list_for_each_entry(gpir_block, block, &comp->block_list, list) {462list_for_each_entry(gpir_node, node, &block->node_list, list) {463printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,464gpir_op_infos[node->op].name);465gpir_node_foreach_pred(node, dep) {466gpir_node *pred = dep->pred;467printf(" %d/%d", pred->index, pred->value_reg);468}469if (node->op == gpir_op_load_reg) {470gpir_load_node *load = gpir_node_to_load(node);471printf(" -/%d", 4 * load->index + load->component);472printf(" (%d)", load->reg->index);473} else if (node->op == gpir_op_store_reg) {474gpir_store_node *store = gpir_node_to_store(node);475printf(" (%d)", store->reg->index);476}477printf("\n");478}479printf("----------------------------\n");480}481}482483bool gpir_regalloc_prog(gpir_compiler *comp)484{485struct regalloc_ctx ctx;486487ctx.mem_ctx = ralloc_context(NULL);488ctx.num_nodes_and_regs = comp->cur_reg + comp->cur_index;489ctx.bitset_words = BITSET_WORDS(ctx.num_nodes_and_regs);490ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);491ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);492ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);493ctx.comp = comp;494495ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, ctx.num_nodes_and_regs);496for (unsigned i = 0; i < ctx.num_nodes_and_regs; i++) {497ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,498ctx.bitset_words);499util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);500}501502list_for_each_entry(gpir_block, block, &comp->block_list, list) {503block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);504block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);505block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);506}507508calc_liveness(&ctx);509calc_interference(&ctx);510if (!do_regalloc(&ctx)) {511ralloc_free(ctx.mem_ctx);512return false;513}514assign_regs(&ctx);515516regalloc_print_result(comp);517ralloc_free(ctx.mem_ctx);518return true;519}520521522