Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
4574 views
/*1* Copyright (C) 2009 Nicolai Haehnle.2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627#include "radeon_dataflow.h"2829#include "radeon_compiler.h"303132struct updatemask_state {33unsigned char Output[RC_REGISTER_MAX_INDEX];34unsigned char Temporary[RC_REGISTER_MAX_INDEX];35unsigned char Address;36unsigned char Special[RC_NUM_SPECIAL_REGISTERS];37};3839struct instruction_state {40unsigned char WriteMask:4;41unsigned char WriteALUResult:1;42unsigned char SrcReg[3];43};4445struct loopinfo {46struct updatemask_state * Breaks;47unsigned int BreakCount;48unsigned int BreaksReserved;49};5051struct branchinfo {52unsigned int HaveElse:1;5354struct updatemask_state StoreEndif;55struct updatemask_state StoreElse;56};5758struct deadcode_state {59struct radeon_compiler * C;60struct instruction_state * Instructions;6162struct updatemask_state R;6364struct branchinfo * BranchStack;65unsigned int BranchStackSize;66unsigned int BranchStackReserved;6768struct loopinfo * LoopStack;69unsigned int LoopStackSize;70unsigned int LoopStackReserved;71};727374static void or_updatemasks(75struct updatemask_state * dst,76struct updatemask_state * a,77struct updatemask_state * b)78{79for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {80dst->Output[i] = a->Output[i] | b->Output[i];81dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];82}8384for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)85dst->Special[i] = a->Special[i] | b->Special[i];8687dst->Address = a->Address | b->Address;88}8990static void push_break(struct deadcode_state *s)91{92struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];93memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,94loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);9596memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));97}9899static void push_loop(struct deadcode_state * s)100{101memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,102s->LoopStackSize, s->LoopStackReserved, 1);103memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));104}105106static void push_branch(struct deadcode_state * s)107{108struct branchinfo * branch;109110memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,111s->BranchStackSize, s->BranchStackReserved, 1);112113branch = &s->BranchStack[s->BranchStackSize++];114branch->HaveElse = 0;115memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));116}117118static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)119{120if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {121if (index >= RC_REGISTER_MAX_INDEX) {122rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);123return 0;124}125126if (file == RC_FILE_OUTPUT)127return &s->R.Output[index];128else129return &s->R.Temporary[index];130} else if (file == RC_FILE_ADDRESS) {131return &s->R.Address;132} else if (file == RC_FILE_SPECIAL) {133if (index >= RC_NUM_SPECIAL_REGISTERS) {134rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);135return 0;136}137138return &s->R.Special[index];139}140141return 0;142}143144static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)145{146unsigned char * pused = get_used_ptr(s, file, index);147if (pused)148*pused |= mask;149}150151static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)152{153const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);154struct instruction_state * insts = &s->Instructions[inst->IP];155unsigned int usedmask = 0;156unsigned int srcmasks[3];157158if (opcode->HasDstReg) {159unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);160if (pused) {161usedmask = *pused & inst->U.I.DstReg.WriteMask;162*pused &= ~usedmask;163}164}165166insts->WriteMask |= usedmask;167168if (inst->U.I.WriteALUResult) {169unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);170if (pused && *pused) {171if (inst->U.I.WriteALUResult == RC_ALURESULT_X)172usedmask |= RC_MASK_X;173else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)174usedmask |= RC_MASK_W;175176*pused = 0;177insts->WriteALUResult = 1;178}179}180181rc_compute_sources_for_writemask(inst, usedmask, srcmasks);182183for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {184unsigned int refmask = 0;185unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];186insts->SrcReg[src] |= newsrcmask;187188for(unsigned int chan = 0; chan < 4; ++chan) {189if (GET_BIT(newsrcmask, chan))190refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);191}192193/* get rid of spurious bits from ZERO, ONE, etc. swizzles */194refmask &= RC_MASK_XYZW;195196if (!refmask)197continue;198199mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);200201if (inst->U.I.SrcReg[src].RelAddr)202mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);203}204}205206static void mark_output_use(void * data, unsigned int index, unsigned int mask)207{208struct deadcode_state * s = data;209210mark_used(s, RC_FILE_OUTPUT, index, mask);211}212213void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)214{215struct deadcode_state s;216unsigned int nr_instructions;217rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;218unsigned int ip;219220memset(&s, 0, sizeof(s));221s.C = c;222223nr_instructions = rc_recompute_ips(c);224s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);225memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);226227dce(c, &s, &mark_output_use);228229for(struct rc_instruction * inst = c->Program.Instructions.Prev;230inst != &c->Program.Instructions;231inst = inst->Prev) {232const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);233234switch(opcode->Opcode){235/* Mark all sources in the loop body as used before doing236* normal deadcode analysis. This is probably not optimal.237*/238case RC_OPCODE_ENDLOOP:239{240int endloops = 1;241struct rc_instruction *ptr;242for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){243opcode = rc_get_opcode_info(ptr->U.I.Opcode);244if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){245endloops--;246continue;247}248if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){249endloops++;250continue;251}252if(opcode->HasDstReg){253int src = 0;254unsigned int srcmasks[3];255rc_compute_sources_for_writemask(ptr,256ptr->U.I.DstReg.WriteMask, srcmasks);257for(src=0; src < opcode->NumSrcRegs; src++){258mark_used(&s,259ptr->U.I.SrcReg[src].File,260ptr->U.I.SrcReg[src].Index,261srcmasks[src]);262}263}264}265push_loop(&s);266break;267}268case RC_OPCODE_BRK:269push_break(&s);270break;271case RC_OPCODE_BGNLOOP:272{273unsigned int i;274struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];275for(i = 0; i < loop->BreakCount; i++) {276or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);277}278break;279}280case RC_OPCODE_CONT:281break;282case RC_OPCODE_ENDIF:283push_branch(&s);284break;285default:286if (opcode->IsFlowControl && s.BranchStackSize) {287struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];288if (opcode->Opcode == RC_OPCODE_IF) {289or_updatemasks(&s.R,290&s.R,291branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);292293s.BranchStackSize--;294} else if (opcode->Opcode == RC_OPCODE_ELSE) {295if (branch->HaveElse) {296rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);297} else {298memcpy(&branch->StoreElse, &s.R, sizeof(s.R));299memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));300branch->HaveElse = 1;301}302} else {303rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);304}305}306}307308update_instruction(&s, inst);309}310311ip = 0;312for(struct rc_instruction * inst = c->Program.Instructions.Next;313inst != &c->Program.Instructions;314inst = inst->Next, ++ip) {315const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);316int dead = 1;317unsigned int srcmasks[3];318unsigned int usemask;319320if (!opcode->HasDstReg) {321dead = 0;322} else {323inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;324if (s.Instructions[ip].WriteMask)325dead = 0;326327if (s.Instructions[ip].WriteALUResult)328dead = 0;329else330inst->U.I.WriteALUResult = RC_ALURESULT_NONE;331}332333if (dead) {334struct rc_instruction * todelete = inst;335inst = inst->Prev;336rc_remove_instruction(todelete);337continue;338}339340usemask = s.Instructions[ip].WriteMask;341342if (inst->U.I.WriteALUResult == RC_ALURESULT_X)343usemask |= RC_MASK_X;344else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)345usemask |= RC_MASK_W;346347rc_compute_sources_for_writemask(inst, usemask, srcmasks);348349for(unsigned int src = 0; src < 3; ++src) {350for(unsigned int chan = 0; chan < 4; ++chan) {351if (!GET_BIT(srcmasks[src], chan))352SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);353}354}355}356357rc_calculate_inputs_outputs(c);358}359360361