Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
4574 views
/*1* Copyright (C) 2009 Nicolai Haehnle.2* Copyright 2011 Tom Stellard <[email protected]>3*4* All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining7* a copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sublicense, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial16* portions of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,19* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.21* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE22* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION23* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION24* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26*/2728#include "radeon_program_pair.h"2930#include <stdio.h>3132#include "main/glheader.h"33#include "util/register_allocate.h"34#include "util/u_memory.h"35#include "util/ralloc.h"3637#include "r300_fragprog_swizzle.h"38#include "radeon_compiler.h"39#include "radeon_compiler_util.h"40#include "radeon_dataflow.h"41#include "radeon_list.h"42#include "radeon_regalloc.h"43#include "radeon_variable.h"4445#define VERBOSE 04647#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)48495051struct register_info {52struct live_intervals Live[4];5354unsigned int Used:1;55unsigned int Allocated:1;56unsigned int File:3;57unsigned int Index:RC_REGISTER_INDEX_BITS;58unsigned int Writemask;59};6061struct regalloc_state {62struct radeon_compiler * C;6364struct register_info * Input;65unsigned int NumInputs;6667struct register_info * Temporary;68unsigned int NumTemporaries;6970unsigned int Simple;71int LoopEnd;72};7374struct rc_class {75enum rc_reg_class ID;7677unsigned int WritemaskCount;7879/** List of writemasks that belong to this class */80unsigned int Writemasks[3];818283};8485static const struct rc_class rc_class_list [] = {86{RC_REG_CLASS_SINGLE, 3,87{RC_MASK_X,88RC_MASK_Y,89RC_MASK_Z}},90{RC_REG_CLASS_DOUBLE, 3,91{RC_MASK_X | RC_MASK_Y,92RC_MASK_X | RC_MASK_Z,93RC_MASK_Y | RC_MASK_Z}},94{RC_REG_CLASS_TRIPLE, 1,95{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,96RC_MASK_NONE,97RC_MASK_NONE}},98{RC_REG_CLASS_ALPHA, 1,99{RC_MASK_W,100RC_MASK_NONE,101RC_MASK_NONE}},102{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,103{RC_MASK_X | RC_MASK_W,104RC_MASK_Y | RC_MASK_W,105RC_MASK_Z | RC_MASK_W}},106{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,107{RC_MASK_X | RC_MASK_Y | RC_MASK_W,108RC_MASK_X | RC_MASK_Z | RC_MASK_W,109RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},110{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,111{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,112RC_MASK_NONE,113RC_MASK_NONE}},114{RC_REG_CLASS_X, 1,115{RC_MASK_X,116RC_MASK_NONE,117RC_MASK_NONE}},118{RC_REG_CLASS_Y, 1,119{RC_MASK_Y,120RC_MASK_NONE,121RC_MASK_NONE}},122{RC_REG_CLASS_Z, 1,123{RC_MASK_Z,124RC_MASK_NONE,125RC_MASK_NONE}},126{RC_REG_CLASS_XY, 1,127{RC_MASK_X | RC_MASK_Y,128RC_MASK_NONE,129RC_MASK_NONE}},130{RC_REG_CLASS_YZ, 1,131{RC_MASK_Y | RC_MASK_Z,132RC_MASK_NONE,133RC_MASK_NONE}},134{RC_REG_CLASS_XZ, 1,135{RC_MASK_X | RC_MASK_Z,136RC_MASK_NONE,137RC_MASK_NONE}},138{RC_REG_CLASS_XW, 1,139{RC_MASK_X | RC_MASK_W,140RC_MASK_NONE,141RC_MASK_NONE}},142{RC_REG_CLASS_YW, 1,143{RC_MASK_Y | RC_MASK_W,144RC_MASK_NONE,145RC_MASK_NONE}},146{RC_REG_CLASS_ZW, 1,147{RC_MASK_Z | RC_MASK_W,148RC_MASK_NONE,149RC_MASK_NONE}},150{RC_REG_CLASS_XYW, 1,151{RC_MASK_X | RC_MASK_Y | RC_MASK_W,152RC_MASK_NONE,153RC_MASK_NONE}},154{RC_REG_CLASS_YZW, 1,155{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,156RC_MASK_NONE,157RC_MASK_NONE}},158{RC_REG_CLASS_XZW, 1,159{RC_MASK_X | RC_MASK_Z | RC_MASK_W,160RC_MASK_NONE,161RC_MASK_NONE}}162};163164static void print_live_intervals(struct live_intervals * src)165{166if (!src || !src->Used) {167DBG("(null)");168return;169}170171DBG("(%i,%i)", src->Start, src->End);172}173174static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)175{176if (VERBOSE) {177DBG("overlap_live_intervals: ");178print_live_intervals(a);179DBG(" to ");180print_live_intervals(b);181DBG("\n");182}183184if (!a->Used || !b->Used) {185DBG(" unused interval\n");186return 0;187}188189if (a->Start > b->Start) {190if (a->Start < b->End) {191DBG(" overlap\n");192return 1;193}194} else if (b->Start > a->Start) {195if (b->Start < a->End) {196DBG(" overlap\n");197return 1;198}199} else { /* a->Start == b->Start */200if (a->Start != a->End && b->Start != b->End) {201DBG(" overlap\n");202return 1;203}204}205206DBG(" no overlap\n");207208return 0;209}210211static void scan_read_callback(void * data, struct rc_instruction * inst,212rc_register_file file, unsigned int index, unsigned int mask)213{214struct regalloc_state * s = data;215struct register_info * reg;216unsigned int i;217218if (file != RC_FILE_INPUT)219return;220221s->Input[index].Used = 1;222reg = &s->Input[index];223224for (i = 0; i < 4; i++) {225if (!((mask >> i) & 0x1)) {226continue;227}228reg->Live[i].Used = 1;229reg->Live[i].Start = 0;230reg->Live[i].End =231s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;232}233}234235static void remap_register(void * data, struct rc_instruction * inst,236rc_register_file * file, unsigned int * index)237{238struct regalloc_state * s = data;239const struct register_info * reg;240241if (*file == RC_FILE_TEMPORARY && s->Simple)242reg = &s->Temporary[*index];243else if (*file == RC_FILE_INPUT)244reg = &s->Input[*index];245else246return;247248if (reg->Allocated) {249*index = reg->Index;250}251}252253static void alloc_input_simple(void * data, unsigned int input,254unsigned int hwreg)255{256struct regalloc_state * s = data;257258if (input >= s->NumInputs)259return;260261s->Input[input].Allocated = 1;262s->Input[input].File = RC_FILE_TEMPORARY;263s->Input[input].Index = hwreg;264}265266/* This functions offsets the temporary register indices by the number267* of input registers, because input registers are actually temporaries and268* should not occupy the same space.269*270* This pass is supposed to be used to maintain correct allocation of inputs271* if the standard register allocation is disabled. */272static void do_regalloc_inputs_only(struct regalloc_state * s)273{274for (unsigned i = 0; i < s->NumTemporaries; i++) {275s->Temporary[i].Allocated = 1;276s->Temporary[i].File = RC_FILE_TEMPORARY;277s->Temporary[i].Index = i + s->NumInputs;278}279}280281static unsigned int is_derivative(rc_opcode op)282{283return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);284}285286static int find_class(287const struct rc_class * classes,288unsigned int writemask,289unsigned int max_writemask_count)290{291unsigned int i;292for (i = 0; i < RC_REG_CLASS_COUNT; i++) {293unsigned int j;294if (classes[i].WritemaskCount > max_writemask_count) {295continue;296}297for (j = 0; j < 3; j++) {298if (classes[i].Writemasks[j] == writemask) {299return i;300}301}302}303return -1;304}305306struct variable_get_class_cb_data {307unsigned int * can_change_writemask;308unsigned int conversion_swizzle;309};310311static void variable_get_class_read_cb(312void * userdata,313struct rc_instruction * inst,314struct rc_pair_instruction_arg * arg,315struct rc_pair_instruction_source * src)316{317struct variable_get_class_cb_data * d = userdata;318unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,319d->conversion_swizzle);320if (!r300_swizzle_is_native_basic(new_swizzle)) {321*d->can_change_writemask = 0;322}323}324325static enum rc_reg_class variable_get_class(326struct rc_variable * variable,327const struct rc_class * classes)328{329unsigned int i;330unsigned int can_change_writemask= 1;331unsigned int writemask = rc_variable_writemask_sum(variable);332struct rc_list * readers = rc_variable_readers_union(variable);333int class_index;334335if (!variable->C->is_r500) {336struct rc_class c;337struct rc_variable * var_ptr;338/* The assumption here is that if an instruction has type339* RC_INSTRUCTION_NORMAL then it is a TEX instruction.340* r300 and r400 can't swizzle the result of a TEX lookup. */341for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {342if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {343writemask = RC_MASK_XYZW;344}345}346347/* Check if it is possible to do swizzle packing for r300/r400348* without creating non-native swizzles. */349class_index = find_class(classes, writemask, 3);350if (class_index < 0) {351goto error;352}353c = classes[class_index];354if (c.WritemaskCount == 1) {355goto done;356}357for (i = 0; i < c.WritemaskCount; i++) {358struct rc_variable * var_ptr;359for (var_ptr = variable; var_ptr;360var_ptr = var_ptr->Friend) {361int j;362unsigned int conversion_swizzle =363rc_make_conversion_swizzle(364writemask, c.Writemasks[i]);365struct variable_get_class_cb_data d;366d.can_change_writemask = &can_change_writemask;367d.conversion_swizzle = conversion_swizzle;368/* If we get this far var_ptr->Inst has to369* be a pair instruction. If variable or any370* of its friends are normal instructions,371* then the writemask will be set to RC_MASK_XYZW372* and the function will return before it gets373* here. */374rc_pair_for_all_reads_arg(var_ptr->Inst,375variable_get_class_read_cb, &d);376377for (j = 0; j < var_ptr->ReaderCount; j++) {378unsigned int old_swizzle;379unsigned int new_swizzle;380struct rc_reader r = var_ptr->Readers[j];381if (r.Inst->Type ==382RC_INSTRUCTION_PAIR ) {383old_swizzle = r.U.P.Arg->Swizzle;384} else {385/* Source operands of TEX386* instructions can't be387* swizzle on r300/r400 GPUs.388*/389can_change_writemask = 0;390break;391}392new_swizzle = rc_adjust_channels(393old_swizzle, conversion_swizzle);394if (!r300_swizzle_is_native_basic(395new_swizzle)) {396can_change_writemask = 0;397break;398}399}400if (!can_change_writemask) {401break;402}403}404if (!can_change_writemask) {405break;406}407}408}409410if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {411/* DDX/DDY seem to always fail when their writemasks are412* changed.*/413if (is_derivative(variable->Inst->U.P.RGB.Opcode)414|| is_derivative(variable->Inst->U.P.Alpha.Opcode)) {415can_change_writemask = 0;416}417}418for ( ; readers; readers = readers->Next) {419struct rc_reader * r = readers->Item;420if (r->Inst->Type == RC_INSTRUCTION_PAIR) {421if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {422can_change_writemask = 0;423break;424}425/* DDX/DDY also fail when their swizzles are changed. */426if (is_derivative(r->Inst->U.P.RGB.Opcode)427|| is_derivative(r->Inst->U.P.Alpha.Opcode)) {428can_change_writemask = 0;429break;430}431}432}433434class_index = find_class(classes, writemask,435can_change_writemask ? 3 : 1);436done:437if (class_index > -1) {438return classes[class_index].ID;439} else {440error:441rc_error(variable->C,442"Could not find class for index=%u mask=%u\n",443variable->Dst.Index, writemask);444return 0;445}446}447448static unsigned int overlap_live_intervals_array(449struct live_intervals * a,450struct live_intervals * b)451{452unsigned int a_chan, b_chan;453for (a_chan = 0; a_chan < 4; a_chan++) {454for (b_chan = 0; b_chan < 4; b_chan++) {455if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {456return 1;457}458}459}460return 0;461}462463static unsigned int reg_get_index(int reg)464{465return reg / RC_MASK_XYZW;466}467468static unsigned int reg_get_writemask(int reg)469{470return (reg % RC_MASK_XYZW) + 1;471}472473static int get_reg_id(unsigned int index, unsigned int writemask)474{475assert(writemask);476if (writemask == 0) {477return 0;478}479return (index * RC_MASK_XYZW) + (writemask - 1);480}481482#if VERBOSE483static void print_reg(int reg)484{485unsigned int index = reg_get_index(reg);486unsigned int mask = reg_get_writemask(reg);487fprintf(stderr, "Temp[%u].%c%c%c%c", index,488mask & RC_MASK_X ? 'x' : '_',489mask & RC_MASK_Y ? 'y' : '_',490mask & RC_MASK_Z ? 'z' : '_',491mask & RC_MASK_W ? 'w' : '_');492}493#endif494495static void add_register_conflicts(496struct ra_regs * regs,497unsigned int max_temp_regs)498{499unsigned int index, a_mask, b_mask;500for (index = 0; index < max_temp_regs; index++) {501for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {502for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;503b_mask++) {504if (a_mask & b_mask) {505ra_add_reg_conflict(regs,506get_reg_id(index, a_mask),507get_reg_id(index, b_mask));508}509}510}511}512}513514static void do_advanced_regalloc(struct regalloc_state * s)515{516517unsigned int i, input_node, node_count, node_index;518struct ra_class ** node_classes;519struct rc_instruction * inst;520struct rc_list * var_ptr;521struct rc_list * variables;522struct ra_graph * graph;523const struct rc_regalloc_state *ra_state = s->C->regalloc_state;524525/* Get list of program variables */526variables = rc_get_variables(s->C);527node_count = rc_list_count(variables);528node_classes = memory_pool_malloc(&s->C->Pool,529node_count * sizeof(struct ra_class *));530531for (var_ptr = variables, node_index = 0; var_ptr;532var_ptr = var_ptr->Next, node_index++) {533unsigned int class_index;534/* Compute the live intervals */535rc_variable_compute_live_intervals(var_ptr->Item);536537class_index = variable_get_class(var_ptr->Item, rc_class_list);538node_classes[node_index] = ra_state->classes[class_index];539}540541542/* Calculate live intervals for input registers */543for (inst = s->C->Program.Instructions.Next;544inst != &s->C->Program.Instructions;545inst = inst->Next) {546rc_opcode op = rc_get_flow_control_inst(inst);547if (op == RC_OPCODE_BGNLOOP) {548struct rc_instruction * endloop =549rc_match_bgnloop(inst);550if (endloop->IP > s->LoopEnd) {551s->LoopEnd = endloop->IP;552}553}554rc_for_all_reads_mask(inst, scan_read_callback, s);555}556557/* Compute the writemask for inputs. */558for (i = 0; i < s->NumInputs; i++) {559unsigned int chan, writemask = 0;560for (chan = 0; chan < 4; chan++) {561if (s->Input[i].Live[chan].Used) {562writemask |= (1 << chan);563}564}565s->Input[i].Writemask = writemask;566}567568graph = ra_alloc_interference_graph(ra_state->regs,569node_count + s->NumInputs);570571for (node_index = 0; node_index < node_count; node_index++) {572ra_set_node_class(graph, node_index, node_classes[node_index]);573}574575/* Build the interference graph */576for (var_ptr = variables, node_index = 0; var_ptr;577var_ptr = var_ptr->Next,node_index++) {578struct rc_list * a, * b;579unsigned int b_index;580581for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;582b; b = b->Next, b_index++) {583struct rc_variable * var_a = a->Item;584while (var_a) {585struct rc_variable * var_b = b->Item;586while (var_b) {587if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {588ra_add_node_interference(graph,589node_index, b_index);590}591var_b = var_b->Friend;592}593var_a = var_a->Friend;594}595}596}597598/* Add input registers to the interference graph */599for (i = 0, input_node = 0; i< s->NumInputs; i++) {600if (!s->Input[i].Writemask) {601continue;602}603for (var_ptr = variables, node_index = 0;604var_ptr; var_ptr = var_ptr->Next, node_index++) {605struct rc_variable * var = var_ptr->Item;606if (overlap_live_intervals_array(s->Input[i].Live,607var->Live)) {608ra_add_node_interference(graph, node_index,609node_count + input_node);610}611}612/* Manually allocate a register for this input */613ra_set_node_reg(graph, node_count + input_node, get_reg_id(614s->Input[i].Index, s->Input[i].Writemask));615input_node++;616}617618if (!ra_allocate(graph)) {619rc_error(s->C, "Ran out of hardware temporaries\n");620return;621}622623/* Rewrite the registers */624for (var_ptr = variables, node_index = 0; var_ptr;625var_ptr = var_ptr->Next, node_index++) {626int reg = ra_get_node_reg(graph, node_index);627unsigned int writemask = reg_get_writemask(reg);628unsigned int index = reg_get_index(reg);629struct rc_variable * var = var_ptr->Item;630631if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {632writemask = rc_variable_writemask_sum(var);633}634635if (var->Dst.File == RC_FILE_INPUT) {636continue;637}638rc_variable_change_dst(var, index, writemask);639}640641ralloc_free(graph);642}643644void rc_init_regalloc_state(struct rc_regalloc_state *s)645{646unsigned i, j, index;647unsigned **ra_q_values;648649/* Pre-computed q values. This array describes the maximum number of650* a class's [row] registers that are in conflict with a single651* register from another class [column].652*653* For example:654* q_values[0][2] is 3, because a register from class 2655* (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from656* class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,657* and T0.z.658*659* q_values[2][0] is 1, because a register from class 0660* (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from661* class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz662*663* The q values for each register class [row] will never be greater664* than the maximum number of writemask combinations for that class.665*666* For example:667*668* Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,669* so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater670* than 1.671*/672const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {673{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},674{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},675{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},676{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},677{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},678{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},679{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},680{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},681{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},682{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},683{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},684{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},685{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},686{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},687{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},688{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},689{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},690{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},691{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}692};693694/* Allocate the main ra data structure */695s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,696true);697698/* Create the register classes */699for (i = 0; i < RC_REG_CLASS_COUNT; i++) {700const struct rc_class *class = &rc_class_list[i];701s->classes[class->ID] = ra_alloc_reg_class(s->regs);702703/* Assign registers to the classes */704for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {705for (j = 0; j < class->WritemaskCount; j++) {706int reg_id = get_reg_id(index,707class->Writemasks[j]);708ra_class_add_reg(s->classes[class->ID], reg_id);709}710}711}712713/* Set the q values. The q_values array is indexed based on714* the rc_reg_class ID (RC_REG_CLASS_*) which might be715* different than the ID assigned to that class by ra.716* This why we need to manually construct this list.717*/718ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));719720for (i = 0; i < RC_REG_CLASS_COUNT; i++) {721ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));722for (j = 0; j < RC_REG_CLASS_COUNT; j++) {723ra_q_values[i][j] = q_values[i][j];724}725}726727/* Add register conflicts */728add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);729730ra_set_finalize(s->regs, ra_q_values);731732for (i = 0; i < RC_REG_CLASS_COUNT; i++) {733FREE(ra_q_values[i]);734}735FREE(ra_q_values);736}737738void rc_destroy_regalloc_state(struct rc_regalloc_state *s)739{740ralloc_free(s->regs);741}742743/**744* @param user This parameter should be a pointer to an integer value. If this745* integer value is zero, then a simple register allocator will be used that746* only allocates space for input registers (\sa do_regalloc_inputs_only). If747* user is non-zero, then the regular register allocator will be used748* (\sa do_regalloc).749*/750void rc_pair_regalloc(struct radeon_compiler *cc, void *user)751{752struct r300_fragment_program_compiler *c =753(struct r300_fragment_program_compiler*)cc;754struct regalloc_state s;755int * do_full_regalloc = (int*)user;756757memset(&s, 0, sizeof(s));758s.C = cc;759s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;760s.Input = memory_pool_malloc(&cc->Pool,761s.NumInputs * sizeof(struct register_info));762memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));763764s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;765s.Temporary = memory_pool_malloc(&cc->Pool,766s.NumTemporaries * sizeof(struct register_info));767memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));768769rc_recompute_ips(s.C);770771c->AllocateHwInputs(c, &alloc_input_simple, &s);772if (*do_full_regalloc) {773do_advanced_regalloc(&s);774} else {775s.Simple = 1;776do_regalloc_inputs_only(&s);777}778779/* Rewrite inputs and if we are doing the simple allocation, rewrite780* temporaries too. */781for (struct rc_instruction *inst = s.C->Program.Instructions.Next;782inst != &s.C->Program.Instructions;783inst = inst->Next) {784rc_remap_registers(inst, &remap_register, &s);785}786}787788789