Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
4574 views
/*1* Copyright 2010 Tom Stellard <[email protected]>2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627/**28* \file29*/3031#include <stdio.h>32#include "c99_math.h"33#include "radeon_emulate_loops.h"34#include "radeon_compiler.h"35#include "radeon_compiler_util.h"36#include "radeon_dataflow.h"3738#define VERBOSE 03940#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)4142struct const_value {43struct radeon_compiler * C;44struct rc_src_register * Src;45float Value;46int HasValue;47};4849struct count_inst {50struct radeon_compiler * C;51int Index;52rc_swizzle Swz;53float Amount;54int Unknown;55unsigned BranchDepth;56};5758static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,59struct loop_info * loop)60{61unsigned int total_i = rc_recompute_ips(c);62unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;63/* +1 because the program already has one iteration of the loop. */64return 1 + ((c->max_alu_insts - total_i) / loop_i);65}6667static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,68unsigned int iterations)69{70unsigned int i;71struct rc_instruction * ptr;72struct rc_instruction * first = loop->BeginLoop->Next;73struct rc_instruction * last = loop->EndLoop->Prev;74struct rc_instruction * append_to = last;75rc_remove_instruction(loop->BeginLoop);76rc_remove_instruction(loop->EndLoop);77for( i = 1; i < iterations; i++){78for(ptr = first; ptr != last->Next; ptr = ptr->Next){79struct rc_instruction *new = rc_alloc_instruction(c);80memcpy(new, ptr, sizeof(struct rc_instruction));81rc_insert_instruction(append_to, new);82append_to = new;83}84}85}868788static void update_const_value(void * data, struct rc_instruction * inst,89rc_register_file file, unsigned int index, unsigned int mask)90{91struct const_value * value = data;92if(value->Src->File != file ||93value->Src->Index != index ||94!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){95return;96}97switch(inst->U.I.Opcode){98case RC_OPCODE_MOV:99if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File,100inst->U.I.SrcReg[0].Index)){101return;102}103value->HasValue = 1;104value->Value =105rc_get_constant_value(value->C,106inst->U.I.SrcReg[0].Index,107inst->U.I.SrcReg[0].Swizzle,108inst->U.I.SrcReg[0].Negate, 0);109break;110}111}112113static void get_incr_amount(void * data, struct rc_instruction * inst,114rc_register_file file, unsigned int index, unsigned int mask)115{116struct count_inst * count_inst = data;117int amnt_src_index;118const struct rc_opcode_info * opcode;119float amount;120121if(file != RC_FILE_TEMPORARY ||122count_inst->Index != index ||123(1 << GET_SWZ(count_inst->Swz,0) != mask)){124return;125}126127/* XXX: Give up if the counter is modified within an IF block. We128* could handle this case with better analysis. */129if (count_inst->BranchDepth > 0) {130count_inst->Unknown = 1;131return;132}133134/* Find the index of the counter register. */135opcode = rc_get_opcode_info(inst->U.I.Opcode);136if(opcode->NumSrcRegs != 2){137count_inst->Unknown = 1;138return;139}140if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&141inst->U.I.SrcReg[0].Index == count_inst->Index &&142inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){143amnt_src_index = 1;144} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&145inst->U.I.SrcReg[1].Index == count_inst->Index &&146inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){147amnt_src_index = 0;148}149else{150count_inst->Unknown = 1;151return;152}153if(rc_src_reg_is_immediate(count_inst->C,154inst->U.I.SrcReg[amnt_src_index].File,155inst->U.I.SrcReg[amnt_src_index].Index)){156amount = rc_get_constant_value(count_inst->C,157inst->U.I.SrcReg[amnt_src_index].Index,158inst->U.I.SrcReg[amnt_src_index].Swizzle,159inst->U.I.SrcReg[amnt_src_index].Negate, 0);160}161else{162count_inst->Unknown = 1 ;163return;164}165switch(inst->U.I.Opcode){166case RC_OPCODE_ADD:167count_inst->Amount += amount;168break;169case RC_OPCODE_SUB:170if(amnt_src_index == 0){171count_inst->Unknown = 0;172return;173}174count_inst->Amount -= amount;175break;176default:177count_inst->Unknown = 1;178return;179}180}181182/**183* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless184* of how many iterations they have.185*/186static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)187{188int end_loops;189int iterations;190struct count_inst count_inst;191float limit_value;192struct rc_src_register * counter;193struct rc_src_register * limit;194struct const_value counter_value;195struct rc_instruction * inst;196197/* Find the counter and the upper limit */198199if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File,200loop->Cond->U.I.SrcReg[0].Index)){201limit = &loop->Cond->U.I.SrcReg[0];202counter = &loop->Cond->U.I.SrcReg[1];203}204else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File,205loop->Cond->U.I.SrcReg[1].Index)){206limit = &loop->Cond->U.I.SrcReg[1];207counter = &loop->Cond->U.I.SrcReg[0];208}209else{210DBG("No constant limit.\n");211return 0;212}213214/* Find the initial value of the counter */215counter_value.Src = counter;216counter_value.Value = 0.0f;217counter_value.HasValue = 0;218counter_value.C = c;219for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;220inst = inst->Next){221rc_for_all_writes_mask(inst, update_const_value, &counter_value);222}223if(!counter_value.HasValue){224DBG("Initial counter value cannot be determined.\n");225return 0;226}227DBG("Initial counter value is %f\n", counter_value.Value);228/* Determine how the counter is modified each loop */229count_inst.C = c;230count_inst.Index = counter->Index;231count_inst.Swz = counter->Swizzle;232count_inst.Amount = 0.0f;233count_inst.Unknown = 0;234count_inst.BranchDepth = 0;235end_loops = 1;236for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){237switch(inst->U.I.Opcode){238/* XXX In the future we might want to try to unroll nested239* loops here.*/240case RC_OPCODE_BGNLOOP:241end_loops++;242break;243case RC_OPCODE_ENDLOOP:244loop->EndLoop = inst;245end_loops--;246break;247case RC_OPCODE_BRK:248/* Don't unroll loops if it has a BRK instruction249* other one used when testing the main conditional250* of the loop. */251252/* Make sure we haven't entered a nested loops. */253if(inst != loop->Brk && end_loops == 1) {254return 0;255}256break;257case RC_OPCODE_IF:258count_inst.BranchDepth++;259break;260case RC_OPCODE_ENDIF:261count_inst.BranchDepth--;262break;263default:264rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);265if(count_inst.Unknown){266return 0;267}268break;269}270}271/* Infinite loop */272if(count_inst.Amount == 0.0f){273return 0;274}275DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);276/* Calculate the number of iterations of this loop. Keeping this277* simple, since we only support increment and decrement loops.278*/279limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle,280limit->Negate, 0);281DBG("Limit is %f.\n", limit_value);282/* The iteration calculations are opposite of what you would expect.283* In a normal loop, if the condition is met, then loop continues, but284* with our loops, if the condition is met, the is exited. */285switch(loop->Cond->U.I.Opcode){286case RC_OPCODE_SGE:287case RC_OPCODE_SLE:288iterations = (int) ceilf((limit_value - counter_value.Value) /289count_inst.Amount);290break;291292case RC_OPCODE_SGT:293case RC_OPCODE_SLT:294iterations = (int) floorf((limit_value - counter_value.Value) /295count_inst.Amount) + 1;296break;297default:298return 0;299}300301if (c->max_alu_insts > 0302&& iterations > loop_max_possible_iterations(c, loop)) {303return 0;304}305306DBG("Loop will have %d iterations.\n", iterations);307308/* Prepare loop for unrolling */309rc_remove_instruction(loop->Cond);310rc_remove_instruction(loop->If);311rc_remove_instruction(loop->Brk);312rc_remove_instruction(loop->EndIf);313314unroll_loop(c, loop, iterations);315loop->EndLoop = NULL;316return 1;317}318319/**320* @param c321* @param loop322* @param inst A pointer to a BGNLOOP instruction.323* @return 1 if all of the members of loop where set.324* @return 0 if there was an error and some members of loop are still NULL.325*/326static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,327struct rc_instruction * inst)328{329struct rc_instruction * ptr;330331if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){332rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);333return 0;334}335336memset(loop, 0, sizeof(struct loop_info));337338loop->BeginLoop = inst;339340for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {341342if (ptr == &c->Program.Instructions) {343rc_error(c, "%s: BGNLOOP without an ENDLOOP.\n",344__FUNCTION__);345return 0;346}347348switch(ptr->U.I.Opcode){349case RC_OPCODE_BGNLOOP:350{351/* Nested loop, skip ahead to the end. */352unsigned int loop_depth = 1;353for(ptr = ptr->Next; ptr != &c->Program.Instructions;354ptr = ptr->Next){355if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {356loop_depth++;357} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {358if (!--loop_depth) {359break;360}361}362}363if (ptr == &c->Program.Instructions) {364rc_error(c, "%s: BGNLOOP without an ENDLOOP\n",365__FUNCTION__);366return 0;367}368break;369}370case RC_OPCODE_BRK:371{372struct rc_src_register *src;373if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF374|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF375|| loop->Brk){376continue;377}378loop->Brk = ptr;379loop->If = ptr->Prev;380loop->EndIf = ptr->Next;381src = &loop->If->U.I.SrcReg[0];382383for (loop->Cond = loop->If->Prev;384loop->Cond->U.I.Opcode != RC_OPCODE_BGNLOOP;385loop->Cond = loop->Cond->Prev) {386387const struct rc_dst_register *dst = &loop->Cond->U.I.DstReg;388if (dst->File == src->File &&389dst->Index == src->Index &&390dst->WriteMask & (rc_swizzle_to_writemask(src->Swizzle))) {391if (loop->Cond->U.I.Opcode == RC_OPCODE_CMP) {392src = &loop->Cond->U.I.SrcReg[0];393continue;394}395break;396}397}398399if (loop->Cond->U.I.Opcode == RC_OPCODE_BGNLOOP) {400rc_error(c, "%s: Cannot find condition for if\n", __FUNCTION__);401return 0;402}403break;404}405case RC_OPCODE_ENDLOOP:406loop->EndLoop = ptr;407break;408}409}410411if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf412&& loop->Cond && loop->EndLoop) {413return 1;414}415return 0;416}417418/**419* This function prepares a loop to be unrolled by converting it into an if420* statement. Here is an outline of the conversion process:421* BGNLOOP; -> BGNLOOP;422* <Additional conditional code> -> <Additional conditional code>423* SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];424* IF temp[0]; -> IF temp[0];425* BRK; ->426* ENDIF; -> <Loop Body>427* <Loop Body> -> ENDIF;428* ENDLOOP; -> ENDLOOP429*430* @param inst A pointer to a BGNLOOP instruction.431* @return 1 for success, 0 for failure432*/433static int transform_loop(struct emulate_loop_state * s,434struct rc_instruction * inst)435{436struct loop_info * loop;437438memory_pool_array_reserve(&s->C->Pool, struct loop_info,439s->Loops, s->LoopCount, s->LoopReserved, 1);440441loop = &s->Loops[s->LoopCount++];442443if (!build_loop_info(s->C, loop, inst)) {444rc_error(s->C, "Failed to build loop info\n");445return 0;446}447448if(try_unroll_loop(s->C, loop)){449return 1;450}451452/* Reverse the conditional instruction */453switch(loop->Cond->U.I.Opcode){454case RC_OPCODE_SGE:455loop->Cond->U.I.Opcode = RC_OPCODE_SLT;456break;457case RC_OPCODE_SLT:458loop->Cond->U.I.Opcode = RC_OPCODE_SGE;459break;460case RC_OPCODE_SLE:461loop->Cond->U.I.Opcode = RC_OPCODE_SGT;462break;463case RC_OPCODE_SGT:464loop->Cond->U.I.Opcode = RC_OPCODE_SLE;465break;466case RC_OPCODE_SEQ:467loop->Cond->U.I.Opcode = RC_OPCODE_SNE;468break;469case RC_OPCODE_SNE:470loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;471break;472default:473rc_error(s->C, "loop->Cond is not a conditional.\n");474return 0;475}476477/* Prepare the loop to be emulated */478rc_remove_instruction(loop->Brk);479rc_remove_instruction(loop->EndIf);480rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);481return 1;482}483484void rc_transform_loops(struct radeon_compiler *c, void *user)485{486struct emulate_loop_state * s = &c->loop_state;487struct rc_instruction * ptr;488489memset(s, 0, sizeof(struct emulate_loop_state));490s->C = c;491for(ptr = s->C->Program.Instructions.Next;492ptr != &s->C->Program.Instructions; ptr = ptr->Next) {493if(ptr->Type == RC_INSTRUCTION_NORMAL &&494ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){495if (!transform_loop(s, ptr))496return;497}498}499}500501void rc_unroll_loops(struct radeon_compiler *c, void *user)502{503struct rc_instruction * inst;504struct loop_info loop;505506for(inst = c->Program.Instructions.Next;507inst != &c->Program.Instructions; inst = inst->Next) {508509if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {510if (build_loop_info(c, &loop, inst)) {511try_unroll_loop(c, &loop);512}513}514}515}516517void rc_emulate_loops(struct radeon_compiler *c, void *user)518{519struct emulate_loop_state * s = &c->loop_state;520int i;521/* Iterate backwards of the list of loops so that loops that nested522* loops are unrolled first.523*/524for( i = s->LoopCount - 1; i >= 0; i-- ){525unsigned int iterations;526527if(!s->Loops[i].EndLoop){528continue;529}530iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);531unroll_loop(s->C, &s->Loops[i], iterations);532}533}534535536