Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_optimize.c
4574 views
/*1* Copyright (C) 2009 Nicolai Haehnle.2* Copyright 2010 Tom Stellard <[email protected]>3*4* All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining7* a copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sublicense, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial16* portions of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,19* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.21* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE22* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION23* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION24* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26*/2728#include "radeon_dataflow.h"2930#include "radeon_compiler.h"31#include "radeon_compiler_util.h"32#include "radeon_list.h"33#include "radeon_swizzle.h"34#include "radeon_variable.h"3536struct src_clobbered_reads_cb_data {37rc_register_file File;38unsigned int Index;39unsigned int Mask;40struct rc_reader_data * ReaderData;41};4243typedef void (*rc_presub_replace_fn)(struct rc_instruction *,44struct rc_instruction *,45unsigned int);4647static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)48{49struct rc_src_register combine;50combine.File = inner.File;51combine.Index = inner.Index;52combine.RelAddr = inner.RelAddr;53if (outer.Abs) {54combine.Abs = 1;55combine.Negate = outer.Negate;56} else {57combine.Abs = inner.Abs;58combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);59combine.Negate ^= outer.Negate;60}61combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);62return combine;63}6465static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,66struct rc_src_register * src)67{68rc_register_file file = src->File;69struct rc_reader_data * reader_data = data;7071if(!rc_inst_can_use_presub(inst,72reader_data->Writer->U.I.PreSub.Opcode,73rc_swizzle_to_writemask(src->Swizzle),74src,75&reader_data->Writer->U.I.PreSub.SrcReg[0],76&reader_data->Writer->U.I.PreSub.SrcReg[1])) {77reader_data->Abort = 1;78return;79}8081/* XXX This could probably be handled better. */82if (file == RC_FILE_ADDRESS) {83reader_data->Abort = 1;84return;85}8687/* These instructions cannot read from the constants file.88* see radeonTransformTEX()89*/90if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&91reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&92(inst->U.I.Opcode == RC_OPCODE_TEX ||93inst->U.I.Opcode == RC_OPCODE_TXB ||94inst->U.I.Opcode == RC_OPCODE_TXP ||95inst->U.I.Opcode == RC_OPCODE_TXD ||96inst->U.I.Opcode == RC_OPCODE_TXL ||97inst->U.I.Opcode == RC_OPCODE_KIL)){98reader_data->Abort = 1;99return;100}101}102103static void src_clobbered_reads_cb(104void * data,105struct rc_instruction * inst,106struct rc_src_register * src)107{108struct src_clobbered_reads_cb_data * sc_data = data;109110if (src->File == sc_data->File111&& src->Index == sc_data->Index112&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {113114sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;115}116117if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {118sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;119}120}121122static void is_src_clobbered_scan_write(123void * data,124struct rc_instruction * inst,125rc_register_file file,126unsigned int index,127unsigned int mask)128{129struct src_clobbered_reads_cb_data sc_data;130struct rc_reader_data * reader_data = data;131sc_data.File = file;132sc_data.Index = index;133sc_data.Mask = mask;134sc_data.ReaderData = reader_data;135rc_for_all_reads_src(reader_data->Writer,136src_clobbered_reads_cb, &sc_data);137}138139static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)140{141struct rc_reader_data reader_data;142unsigned int i;143144if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||145inst_mov->U.I.WriteALUResult)146return;147148/* Get a list of all the readers of this MOV instruction. */149reader_data.ExitOnAbort = 1;150rc_get_readers(c, inst_mov, &reader_data,151copy_propagate_scan_read, NULL,152is_src_clobbered_scan_write);153154if (reader_data.Abort || reader_data.ReaderCount == 0)155return;156157/* We can propagate SaturateMode if all the readers are MOV instructions158* without a presubtract operation, source negation and absolute.159* In that case, we just move SaturateMode to all readers. */160if (inst_mov->U.I.SaturateMode) {161for (i = 0; i < reader_data.ReaderCount; i++) {162struct rc_instruction * inst = reader_data.Readers[i].Inst;163164if (inst->U.I.Opcode != RC_OPCODE_MOV ||165inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||166inst->U.I.SrcReg[0].Abs ||167inst->U.I.SrcReg[0].Negate) {168return;169}170}171}172173/* Propagate the MOV instruction. */174for (i = 0; i < reader_data.ReaderCount; i++) {175struct rc_instruction * inst = reader_data.Readers[i].Inst;176*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);177178if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)179inst->U.I.PreSub = inst_mov->U.I.PreSub;180if (!inst->U.I.SaturateMode)181inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;182}183184/* Finally, remove the original MOV instruction */185rc_remove_instruction(inst_mov);186}187188/**189* Check if a source register is actually always the same190* swizzle constant.191*/192static int is_src_uniform_constant(struct rc_src_register src,193rc_swizzle * pswz, unsigned int * pnegate)194{195int have_used = 0;196197if (src.File != RC_FILE_NONE) {198*pswz = 0;199return 0;200}201202for(unsigned int chan = 0; chan < 4; ++chan) {203unsigned int swz = GET_SWZ(src.Swizzle, chan);204if (swz < 4) {205*pswz = 0;206return 0;207}208if (swz == RC_SWIZZLE_UNUSED)209continue;210211if (!have_used) {212*pswz = swz;213*pnegate = GET_BIT(src.Negate, chan);214have_used = 1;215} else {216if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {217*pswz = 0;218return 0;219}220}221}222223return 1;224}225226static void constant_folding_mad(struct rc_instruction * inst)227{228rc_swizzle swz = 0;229unsigned int negate= 0;230231if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {232if (swz == RC_SWIZZLE_ZERO) {233inst->U.I.Opcode = RC_OPCODE_MUL;234return;235}236}237238if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {239if (swz == RC_SWIZZLE_ONE) {240inst->U.I.Opcode = RC_OPCODE_ADD;241if (negate)242inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;243inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];244return;245} else if (swz == RC_SWIZZLE_ZERO) {246inst->U.I.Opcode = RC_OPCODE_MOV;247inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];248return;249}250}251252if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {253if (swz == RC_SWIZZLE_ONE) {254inst->U.I.Opcode = RC_OPCODE_ADD;255if (negate)256inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;257inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];258return;259} else if (swz == RC_SWIZZLE_ZERO) {260inst->U.I.Opcode = RC_OPCODE_MOV;261inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];262return;263}264}265}266267static void constant_folding_mul(struct rc_instruction * inst)268{269rc_swizzle swz = 0;270unsigned int negate = 0;271272if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {273if (swz == RC_SWIZZLE_ONE) {274inst->U.I.Opcode = RC_OPCODE_MOV;275inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];276if (negate)277inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;278return;279} else if (swz == RC_SWIZZLE_ZERO) {280inst->U.I.Opcode = RC_OPCODE_MOV;281inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;282return;283}284}285286if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {287if (swz == RC_SWIZZLE_ONE) {288inst->U.I.Opcode = RC_OPCODE_MOV;289if (negate)290inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;291return;292} else if (swz == RC_SWIZZLE_ZERO) {293inst->U.I.Opcode = RC_OPCODE_MOV;294inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;295return;296}297}298}299300static void constant_folding_add(struct rc_instruction * inst)301{302rc_swizzle swz = 0;303unsigned int negate = 0;304305if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {306if (swz == RC_SWIZZLE_ZERO) {307inst->U.I.Opcode = RC_OPCODE_MOV;308inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];309return;310}311}312313if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {314if (swz == RC_SWIZZLE_ZERO) {315inst->U.I.Opcode = RC_OPCODE_MOV;316return;317}318}319}320321/**322* Replace 0.0, 1.0 and 0.5 immediate constants by their323* respective swizzles. Simplify instructions like ADD dst, src, 0;324*/325static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)326{327const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);328unsigned int i;329330/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */331for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {332struct rc_constant * constant;333struct rc_src_register newsrc;334int have_real_reference;335unsigned int chan;336337/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */338for (chan = 0; chan < 4; ++chan)339if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)340break;341if (chan == 4) {342inst->U.I.SrcReg[src].File = RC_FILE_NONE;343continue;344}345346/* Convert immediates to swizzles. */347if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||348inst->U.I.SrcReg[src].RelAddr ||349inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)350continue;351352constant =353&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];354355if (constant->Type != RC_CONSTANT_IMMEDIATE)356continue;357358newsrc = inst->U.I.SrcReg[src];359have_real_reference = 0;360for (chan = 0; chan < 4; ++chan) {361unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);362unsigned int newswz;363float imm;364float baseimm;365366if (swz >= 4)367continue;368369imm = constant->u.Immediate[swz];370baseimm = imm;371if (imm < 0.0)372baseimm = -baseimm;373374if (baseimm == 0.0) {375newswz = RC_SWIZZLE_ZERO;376} else if (baseimm == 1.0) {377newswz = RC_SWIZZLE_ONE;378} else if (baseimm == 0.5 && c->has_half_swizzles) {379newswz = RC_SWIZZLE_HALF;380} else {381have_real_reference = 1;382continue;383}384385SET_SWZ(newsrc.Swizzle, chan, newswz);386if (imm < 0.0 && !newsrc.Abs)387newsrc.Negate ^= 1 << chan;388}389390if (!have_real_reference) {391newsrc.File = RC_FILE_NONE;392newsrc.Index = 0;393}394395/* don't make the swizzle worse */396if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&397c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))398continue;399400inst->U.I.SrcReg[src] = newsrc;401}402403/* Simplify instructions based on constants */404if (inst->U.I.Opcode == RC_OPCODE_MAD)405constant_folding_mad(inst);406407/* note: MAD can simplify to MUL or ADD */408if (inst->U.I.Opcode == RC_OPCODE_MUL)409constant_folding_mul(inst);410else if (inst->U.I.Opcode == RC_OPCODE_ADD)411constant_folding_add(inst);412413/* In case this instruction has been converted, make sure all of the414* registers that are no longer used are empty. */415opcode = rc_get_opcode_info(inst->U.I.Opcode);416for(i = opcode->NumSrcRegs; i < 3; i++) {417memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));418}419}420421/**422* If src and dst use the same register, this function returns a writemask that423* indicates which components are read by src. Otherwise zero is returned.424*/425static unsigned int src_reads_dst_mask(struct rc_src_register src,426struct rc_dst_register dst)427{428if (dst.File != src.File || dst.Index != src.Index) {429return 0;430}431return rc_swizzle_to_writemask(src.Swizzle);432}433434/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)435* in any of its channels. Return 0 otherwise. */436static int src_has_const_swz(struct rc_src_register src) {437int chan;438for(chan = 0; chan < 4; chan++) {439unsigned int swz = GET_SWZ(src.Swizzle, chan);440if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF441|| swz == RC_SWIZZLE_ONE) {442return 1;443}444}445return 0;446}447448static void presub_scan_read(449void * data,450struct rc_instruction * inst,451struct rc_src_register * src)452{453struct rc_reader_data * reader_data = data;454rc_presubtract_op * presub_opcode = reader_data->CbData;455456if (!rc_inst_can_use_presub(inst, *presub_opcode,457reader_data->Writer->U.I.DstReg.WriteMask,458src,459&reader_data->Writer->U.I.SrcReg[0],460&reader_data->Writer->U.I.SrcReg[1])) {461reader_data->Abort = 1;462return;463}464}465466static int presub_helper(467struct radeon_compiler * c,468struct rc_instruction * inst_add,469rc_presubtract_op presub_opcode,470rc_presub_replace_fn presub_replace)471{472struct rc_reader_data reader_data;473unsigned int i;474rc_presubtract_op cb_op = presub_opcode;475476reader_data.CbData = &cb_op;477reader_data.ExitOnAbort = 1;478rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,479is_src_clobbered_scan_write);480481if (reader_data.Abort || reader_data.ReaderCount == 0)482return 0;483484for(i = 0; i < reader_data.ReaderCount; i++) {485unsigned int src_index;486struct rc_reader reader = reader_data.Readers[i];487const struct rc_opcode_info * info =488rc_get_opcode_info(reader.Inst->U.I.Opcode);489490for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {491if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)492presub_replace(inst_add, reader.Inst, src_index);493}494}495return 1;496}497498/* This function assumes that inst_add->U.I.SrcReg[0] and499* inst_add->U.I.SrcReg[1] aren't both negative. */500static void presub_replace_add(501struct rc_instruction * inst_add,502struct rc_instruction * inst_reader,503unsigned int src_index)504{505rc_presubtract_op presub_opcode;506if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)507presub_opcode = RC_PRESUB_SUB;508else509presub_opcode = RC_PRESUB_ADD;510511if (inst_add->U.I.SrcReg[1].Negate) {512inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];513inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];514} else {515inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];516inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];517}518inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;519inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;520inst_reader->U.I.PreSub.Opcode = presub_opcode;521inst_reader->U.I.SrcReg[src_index] =522chain_srcregs(inst_reader->U.I.SrcReg[src_index],523inst_reader->U.I.PreSub.SrcReg[0]);524inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;525inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;526}527528static int is_presub_candidate(529struct radeon_compiler * c,530struct rc_instruction * inst)531{532const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);533unsigned int i;534unsigned int is_constant[2] = {0, 0};535536assert(inst->U.I.Opcode == RC_OPCODE_ADD);537538if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE539|| inst->U.I.SaturateMode540|| inst->U.I.WriteALUResult541|| inst->U.I.Omod) {542return 0;543}544545/* If both sources use a constant swizzle, then we can't convert it to546* a presubtract operation. In fact for the ADD and SUB presubtract547* operations neither source can contain a constant swizzle. This548* specific case is checked in peephole_add_presub_add() when549* we make sure the swizzles for both sources are equal, so we550* don't need to worry about it here. */551for (i = 0; i < 2; i++) {552int chan;553for (chan = 0; chan < 4; chan++) {554rc_swizzle swz =555get_swz(inst->U.I.SrcReg[i].Swizzle, chan);556if (swz == RC_SWIZZLE_ONE557|| swz == RC_SWIZZLE_ZERO558|| swz == RC_SWIZZLE_HALF) {559is_constant[i] = 1;560}561}562}563if (is_constant[0] && is_constant[1])564return 0;565566for(i = 0; i < info->NumSrcRegs; i++) {567struct rc_src_register src = inst->U.I.SrcReg[i];568if (src_reads_dst_mask(src, inst->U.I.DstReg))569return 0;570571src.File = RC_FILE_PRESUB;572if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))573return 0;574}575return 1;576}577578static int peephole_add_presub_add(579struct radeon_compiler * c,580struct rc_instruction * inst_add)581{582unsigned dstmask = inst_add->U.I.DstReg.WriteMask;583unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;584unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;585586if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)587return 0;588589/* src0 and src1 can't have absolute values */590if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)591return 0;592593/* presub_replace_add() assumes only one is negative */594if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)595return 0;596597/* if src0 is negative, at least all bits of dstmask have to be set */598if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)599return 0;600601/* if src1 is negative, at least all bits of dstmask have to be set */602if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)603return 0;604605if (!is_presub_candidate(c, inst_add))606return 0;607608if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {609rc_remove_instruction(inst_add);610return 1;611}612return 0;613}614615static void presub_replace_inv(616struct rc_instruction * inst_add,617struct rc_instruction * inst_reader,618unsigned int src_index)619{620/* We must be careful not to modify inst_add, since it621* is possible it will remain part of the program.*/622inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];623inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;624inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;625inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],626inst_reader->U.I.PreSub.SrcReg[0]);627628inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;629inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;630}631632/**633* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]634* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source635* of the add instruction must have the constatnt 1 swizzle. This function636* does not check const registers to see if their value is 1.0, so it should637* be called after the constant_folding optimization.638* @return639* 0 if the ADD instruction is still part of the program.640* 1 if the ADD instruction is no longer part of the program.641*/642static int peephole_add_presub_inv(643struct radeon_compiler * c,644struct rc_instruction * inst_add)645{646unsigned int i, swz;647648if (!is_presub_candidate(c, inst_add))649return 0;650651/* Check if src0 is 1. */652/* XXX It would be nice to use is_src_uniform_constant here, but that653* function only works if the register's file is RC_FILE_NONE */654for(i = 0; i < 4; i++ ) {655swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);656if(((1 << i) & inst_add->U.I.DstReg.WriteMask)657&& swz != RC_SWIZZLE_ONE) {658return 0;659}660}661662/* Check src1. */663if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=664inst_add->U.I.DstReg.WriteMask665|| inst_add->U.I.SrcReg[1].Abs666|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY667&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)668|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {669670return 0;671}672673if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {674rc_remove_instruction(inst_add);675return 1;676}677return 0;678}679680struct peephole_mul_cb_data {681struct rc_dst_register * Writer;682unsigned int Clobbered;683};684685static void omod_filter_reader_cb(686void * userdata,687struct rc_instruction * inst,688rc_register_file file,689unsigned int index,690unsigned int mask)691{692struct peephole_mul_cb_data * d = userdata;693if (rc_src_reads_dst_mask(file, mask, index,694d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {695696d->Clobbered = 1;697}698}699700static void omod_filter_writer_cb(701void * userdata,702struct rc_instruction * inst,703rc_register_file file,704unsigned int index,705unsigned int mask)706{707struct peephole_mul_cb_data * d = userdata;708if (file == d->Writer->File && index == d->Writer->Index &&709(mask & d->Writer->WriteMask)) {710d->Clobbered = 1;711}712}713714static int peephole_mul_omod(715struct radeon_compiler * c,716struct rc_instruction * inst_mul,717struct rc_list * var_list)718{719unsigned int chan = 0, swz, i;720int const_index = -1;721int temp_index = -1;722float const_value;723rc_omod_op omod_op = RC_OMOD_DISABLE;724struct rc_list * writer_list;725struct rc_variable * var;726struct peephole_mul_cb_data cb_data;727unsigned writemask_sum;728729for (i = 0; i < 2; i++) {730unsigned int j;731if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT732&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {733return 0;734}735if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {736if (temp_index != -1) {737/* The instruction has two temp sources */738return 0;739} else {740temp_index = i;741continue;742}743}744/* If we get this far Src[i] must be a constant src */745if (inst_mul->U.I.SrcReg[i].Negate) {746return 0;747}748/* The constant src needs to read from the same swizzle */749swz = RC_SWIZZLE_UNUSED;750chan = 0;751for (j = 0; j < 4; j++) {752unsigned int j_swz =753GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);754if (j_swz == RC_SWIZZLE_UNUSED) {755continue;756}757if (swz == RC_SWIZZLE_UNUSED) {758swz = j_swz;759chan = j;760} else if (j_swz != swz) {761return 0;762}763}764765if (const_index != -1) {766/* The instruction has two constant sources */767return 0;768} else {769const_index = i;770}771}772773if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,774inst_mul->U.I.SrcReg[const_index].Index)) {775return 0;776}777const_value = rc_get_constant_value(c,778inst_mul->U.I.SrcReg[const_index].Index,779inst_mul->U.I.SrcReg[const_index].Swizzle,780inst_mul->U.I.SrcReg[const_index].Negate,781chan);782783if (const_value == 2.0f) {784omod_op = RC_OMOD_MUL_2;785} else if (const_value == 4.0f) {786omod_op = RC_OMOD_MUL_4;787} else if (const_value == 8.0f) {788omod_op = RC_OMOD_MUL_8;789} else if (const_value == (1.0f / 2.0f)) {790omod_op = RC_OMOD_DIV_2;791} else if (const_value == (1.0f / 4.0f)) {792omod_op = RC_OMOD_DIV_4;793} else if (const_value == (1.0f / 8.0f)) {794omod_op = RC_OMOD_DIV_8;795} else {796return 0;797}798799writer_list = rc_variable_list_get_writers_one_reader(var_list,800RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);801802if (!writer_list) {803return 0;804}805806cb_data.Clobbered = 0;807cb_data.Writer = &inst_mul->U.I.DstReg;808for (var = writer_list->Item; var; var = var->Friend) {809struct rc_instruction * inst;810const struct rc_opcode_info * info = rc_get_opcode_info(811var->Inst->U.I.Opcode);812if (info->HasTexture) {813return 0;814}815if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {816return 0;817}818for (inst = inst_mul->Prev; inst != var->Inst;819inst = inst->Prev) {820rc_for_all_reads_mask(inst, omod_filter_reader_cb,821&cb_data);822rc_for_all_writes_mask(inst, omod_filter_writer_cb,823&cb_data);824if (cb_data.Clobbered) {825break;826}827}828}829830if (cb_data.Clobbered) {831return 0;832}833834/* Rewrite the instructions */835writemask_sum = rc_variable_writemask_sum(writer_list->Item);836for (var = writer_list->Item; var; var = var->Friend) {837struct rc_variable * writer = var;838unsigned conversion_swizzle = rc_make_conversion_swizzle(839writemask_sum,840inst_mul->U.I.DstReg.WriteMask);841writer->Inst->U.I.Omod = omod_op;842writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;843writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;844rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);845writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;846}847848rc_remove_instruction(inst_mul);849850return 1;851}852853/**854* @return855* 0 if inst is still part of the program.856* 1 if inst is no longer part of the program.857*/858static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)859{860switch(inst->U.I.Opcode){861case RC_OPCODE_ADD:862if (c->has_presub) {863if(peephole_add_presub_inv(c, inst))864return 1;865if(peephole_add_presub_add(c, inst))866return 1;867}868break;869default:870break;871}872return 0;873}874875void rc_optimize(struct radeon_compiler * c, void *user)876{877struct rc_instruction * inst = c->Program.Instructions.Next;878struct rc_list * var_list;879while(inst != &c->Program.Instructions) {880struct rc_instruction * cur = inst;881inst = inst->Next;882883constant_folding(c, cur);884885if(peephole(c, cur))886continue;887888if (cur->U.I.Opcode == RC_OPCODE_MOV) {889copy_propagate(c, cur);890/* cur may no longer be part of the program */891}892}893894if (!c->has_omod) {895return;896}897898inst = c->Program.Instructions.Next;899while(inst != &c->Program.Instructions) {900struct rc_instruction * cur = inst;901inst = inst->Next;902if (cur->U.I.Opcode == RC_OPCODE_MUL) {903var_list = rc_get_variables(c);904peephole_mul_omod(c, cur, var_list);905}906}907}908909910