Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
4574 views
/*1* Copyright 2010 Tom Stellard <[email protected]>2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627/**28* \file29*/3031#include "radeon_compiler_util.h"3233#include "radeon_compiler.h"34#include "radeon_dataflow.h"35/**36*/37unsigned int rc_swizzle_to_writemask(unsigned int swz)38{39unsigned int mask = 0;40unsigned int i;4142for(i = 0; i < 4; i++) {43mask |= 1 << GET_SWZ(swz, i);44}45mask &= RC_MASK_XYZW;4647return mask;48}4950rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)51{52if (idx & 0x4)53return idx;54return GET_SWZ(swz, idx);55}5657/**58* The purpose of this function is to standardize the number channels used by59* swizzles. All swizzles regardless of what instruction they are a part of60* should have 4 channels initialized with values.61* @param channels The number of channels in initial_value that have a62* meaningful value.63* @return An initialized swizzle that has all of the unused channels set to64* RC_SWIZZLE_UNUSED.65*/66unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)67{68unsigned int i;69for (i = channels; i < 4; i++) {70SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);71}72return initial_value;73}7475unsigned int combine_swizzles4(unsigned int src,76rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)77{78unsigned int ret = 0;7980ret |= get_swz(src, swz_x);81ret |= get_swz(src, swz_y) << 3;82ret |= get_swz(src, swz_z) << 6;83ret |= get_swz(src, swz_w) << 9;8485return ret;86}8788unsigned int combine_swizzles(unsigned int src, unsigned int swz)89{90unsigned int ret = 0;9192ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));93ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;94ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;95ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;9697return ret;98}99100/**101* @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W102*/103rc_swizzle rc_mask_to_swizzle(unsigned int mask)104{105switch (mask) {106case RC_MASK_X: return RC_SWIZZLE_X;107case RC_MASK_Y: return RC_SWIZZLE_Y;108case RC_MASK_Z: return RC_SWIZZLE_Z;109case RC_MASK_W: return RC_SWIZZLE_W;110}111return RC_SWIZZLE_UNUSED;112}113114/* Reorder mask bits according to swizzle. */115unsigned swizzle_mask(unsigned swizzle, unsigned mask)116{117unsigned ret = 0;118for (unsigned chan = 0; chan < 4; ++chan) {119unsigned swz = GET_SWZ(swizzle, chan);120if (swz < 4)121ret |= GET_BIT(mask, swz) << chan;122}123return ret;124}125126static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)127{128if (info->HasTexture) {129return 0;130}131switch (info->Opcode) {132case RC_OPCODE_DP2:133case RC_OPCODE_DP3:134case RC_OPCODE_DP4:135case RC_OPCODE_DDX:136case RC_OPCODE_DDY:137return 0;138default:139return 1;140}141}142143/**144* @return A swizzle the results from converting old_swizzle using145* conversion_swizzle146*/147unsigned int rc_adjust_channels(148unsigned int old_swizzle,149unsigned int conversion_swizzle)150{151unsigned int i;152unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);153for (i = 0; i < 4; i++) {154unsigned int new_chan = get_swz(conversion_swizzle, i);155if (new_chan == RC_SWIZZLE_UNUSED) {156continue;157}158SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));159}160return new_swizzle;161}162163static unsigned int rewrite_writemask(164unsigned int old_mask,165unsigned int conversion_swizzle)166{167unsigned int new_mask = 0;168unsigned int i;169170for (i = 0; i < 4; i++) {171if (!GET_BIT(old_mask, i)172|| GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {173continue;174}175new_mask |= (1 << GET_SWZ(conversion_swizzle, i));176}177178return new_mask;179}180181/**182* This function rewrites the writemask of sub and adjusts the swizzles183* of all its source registers based on the conversion_swizzle.184* conversion_swizzle represents a mapping of the old writemask to the185* new writemask. For a detailed description of how conversion swizzles186* work see rc_rewrite_swizzle().187*/188void rc_pair_rewrite_writemask(189struct rc_pair_sub_instruction * sub,190unsigned int conversion_swizzle)191{192const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);193unsigned int i;194195sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);196197if (!srcs_need_rewrite(info)) {198return ;199}200201for (i = 0; i < info->NumSrcRegs; i++) {202sub->Arg[i].Swizzle =203rc_adjust_channels(sub->Arg[i].Swizzle,204conversion_swizzle);205}206}207208static void normal_rewrite_writemask_cb(209void * userdata,210struct rc_instruction * inst,211struct rc_src_register * src)212{213unsigned int * conversion_swizzle = (unsigned int *)userdata;214src->Swizzle = rc_adjust_channels(src->Swizzle, *conversion_swizzle);215}216217/**218* This function is the same as rc_pair_rewrite_writemask() except it219* operates on normal instructions.220*/221void rc_normal_rewrite_writemask(222struct rc_instruction * inst,223unsigned int conversion_swizzle)224{225struct rc_sub_instruction * sub = &inst->U.I;226const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);227sub->DstReg.WriteMask =228rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);229230if (info->HasTexture) {231unsigned int i;232assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);233for (i = 0; i < 4; i++) {234unsigned int swz = GET_SWZ(conversion_swizzle, i);235if (swz > 3)236continue;237SET_SWZ(sub->TexSwizzle, swz, i);238}239}240241if (!srcs_need_rewrite(info)) {242return;243}244245rc_for_all_reads_src(inst, normal_rewrite_writemask_cb,246&conversion_swizzle);247}248249/**250* This function replaces each value 'swz' in swizzle with the value of251* GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's252* in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want253* to change all the Y's in swizzle to X, then conversion_swizzle should be254* _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then255* conversion swizzle should be YX__ (0xfc1).256* @param swizzle The swizzle to change257* @param conversion_swizzle Describes the conversion to perform on the swizzle258* @return A converted swizzle259*/260unsigned int rc_rewrite_swizzle(261unsigned int swizzle,262unsigned int conversion_swizzle)263{264unsigned int chan;265unsigned int out_swizzle = swizzle;266267for (chan = 0; chan < 4; chan++) {268unsigned int swz = GET_SWZ(swizzle, chan);269unsigned int new_swz;270if (swz > 3) {271SET_SWZ(out_swizzle, chan, swz);272} else {273new_swz = GET_SWZ(conversion_swizzle, swz);274if (new_swz != RC_SWIZZLE_UNUSED) {275SET_SWZ(out_swizzle, chan, new_swz);276} else {277SET_SWZ(out_swizzle, chan, swz);278}279}280}281return out_swizzle;282}283284/**285* Left multiplication of a register with a swizzle286*/287struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)288{289struct rc_src_register tmp = srcreg;290int i;291tmp.Swizzle = 0;292tmp.Negate = 0;293for(i = 0; i < 4; ++i) {294rc_swizzle swz = GET_SWZ(swizzle, i);295if (swz < 4) {296tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);297tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;298} else {299tmp.Swizzle |= swz << (i*3);300}301}302return tmp;303}304305void reset_srcreg(struct rc_src_register* reg)306{307memset(reg, 0, sizeof(struct rc_src_register));308reg->Swizzle = RC_SWIZZLE_XYZW;309}310311unsigned int rc_src_reads_dst_mask(312rc_register_file src_file,313unsigned int src_idx,314unsigned int src_swz,315rc_register_file dst_file,316unsigned int dst_idx,317unsigned int dst_mask)318{319if (src_file != dst_file || src_idx != dst_idx) {320return RC_MASK_NONE;321}322return dst_mask & rc_swizzle_to_writemask(src_swz);323}324325/**326* @return A bit mask specifying whether this swizzle will select from an RGB327* source, an Alpha source, or both.328*/329unsigned int rc_source_type_swz(unsigned int swizzle)330{331unsigned int chan;332unsigned int swz = RC_SWIZZLE_UNUSED;333unsigned int ret = RC_SOURCE_NONE;334335for(chan = 0; chan < 4; chan++) {336swz = GET_SWZ(swizzle, chan);337if (swz == RC_SWIZZLE_W) {338ret |= RC_SOURCE_ALPHA;339} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y340|| swz == RC_SWIZZLE_Z) {341ret |= RC_SOURCE_RGB;342}343}344return ret;345}346347unsigned int rc_source_type_mask(unsigned int mask)348{349unsigned int ret = RC_SOURCE_NONE;350351if (mask & RC_MASK_XYZ)352ret |= RC_SOURCE_RGB;353354if (mask & RC_MASK_W)355ret |= RC_SOURCE_ALPHA;356357return ret;358}359360struct src_select {361rc_register_file File;362int Index;363unsigned int SrcType;364};365366struct can_use_presub_data {367struct src_select Selects[5];368unsigned int SelectCount;369const struct rc_src_register * ReplaceReg;370unsigned int ReplaceRemoved;371};372373static void can_use_presub_data_add_select(374struct can_use_presub_data * data,375rc_register_file file,376unsigned int index,377unsigned int src_type)378{379struct src_select * select;380381select = &data->Selects[data->SelectCount++];382select->File = file;383select->Index = index;384select->SrcType = src_type;385}386387/**388* This callback function counts the number of sources in inst that are389* different from the sources in can_use_presub_data->RemoveSrcs.390*/391static void can_use_presub_read_cb(392void * userdata,393struct rc_instruction * inst,394struct rc_src_register * src)395{396struct can_use_presub_data * d = userdata;397398if (!d->ReplaceRemoved && src == d->ReplaceReg) {399d->ReplaceRemoved = 1;400return;401}402403if (src->File == RC_FILE_NONE)404return;405406can_use_presub_data_add_select(d, src->File, src->Index,407rc_source_type_swz(src->Swizzle));408}409410unsigned int rc_inst_can_use_presub(411struct rc_instruction * inst,412rc_presubtract_op presub_op,413unsigned int presub_writemask,414const struct rc_src_register * replace_reg,415const struct rc_src_register * presub_src0,416const struct rc_src_register * presub_src1)417{418struct can_use_presub_data d;419unsigned int num_presub_srcs;420unsigned int i;421const struct rc_opcode_info * info =422rc_get_opcode_info(inst->U.I.Opcode);423int rgb_count = 0, alpha_count = 0;424unsigned int src_type0, src_type1;425426if (presub_op == RC_PRESUB_NONE) {427return 1;428}429430if (info->HasTexture) {431return 0;432}433434/* We can't use more than one presubtract value in an435* instruction, unless the two prsubtract operations436* are the same and read from the same registers.437* XXX For now we will limit instructions to only one presubtract438* value.*/439if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {440return 0;441}442443memset(&d, 0, sizeof(d));444d.ReplaceReg = replace_reg;445446rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);447448num_presub_srcs = rc_presubtract_src_reg_count(presub_op);449450src_type0 = rc_source_type_swz(presub_src0->Swizzle);451can_use_presub_data_add_select(&d,452presub_src0->File,453presub_src0->Index,454src_type0);455456if (num_presub_srcs > 1) {457src_type1 = rc_source_type_swz(presub_src1->Swizzle);458can_use_presub_data_add_select(&d,459presub_src1->File,460presub_src1->Index,461src_type1);462463/* Even if both of the presub sources read from the same464* register, we still need to use 2 different source selects465* for them, so we need to increment the count to compensate.466*/467if (presub_src0->File == presub_src1->File468&& presub_src0->Index == presub_src1->Index) {469if (src_type0 & src_type1 & RC_SOURCE_RGB) {470rgb_count++;471}472if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {473alpha_count++;474}475}476}477478/* Count the number of source selects for Alpha and RGB. If we479* encounter two of the same source selects then we can ignore the480* first one. */481for (i = 0; i < d.SelectCount; i++) {482unsigned int j;483unsigned int src_type = d.Selects[i].SrcType;484for (j = i + 1; j < d.SelectCount; j++) {485if (d.Selects[i].File == d.Selects[j].File486&& d.Selects[i].Index == d.Selects[j].Index) {487src_type &= ~d.Selects[j].SrcType;488}489}490if (src_type & RC_SOURCE_RGB) {491rgb_count++;492}493494if (src_type & RC_SOURCE_ALPHA) {495alpha_count++;496}497}498499if (rgb_count > 3 || alpha_count > 3) {500return 0;501}502503return 1;504}505506struct max_data {507unsigned int Max;508unsigned int HasFileType;509rc_register_file File;510};511512static void max_callback(513void * userdata,514struct rc_instruction * inst,515rc_register_file file,516unsigned int index,517unsigned int mask)518{519struct max_data * d = (struct max_data*)userdata;520if (file == d->File && (!d->HasFileType || index > d->Max)) {521d->Max = index;522d->HasFileType = 1;523}524}525526/**527* @return The maximum index of the specified register file used by the528* program.529*/530int rc_get_max_index(531struct radeon_compiler * c,532rc_register_file file)533{534struct max_data data;535struct rc_instruction * inst;536data.Max = 0;537data.HasFileType = 0;538data.File = file;539for (inst = c->Program.Instructions.Next;540inst != &c->Program.Instructions;541inst = inst->Next) {542rc_for_all_reads_mask(inst, max_callback, &data);543rc_for_all_writes_mask(inst, max_callback, &data);544}545if (!data.HasFileType) {546return -1;547} else {548return data.Max;549}550}551552static unsigned int get_source_readmask(553struct rc_pair_sub_instruction * sub,554unsigned int source,555unsigned int src_type)556{557unsigned int i;558unsigned int readmask = 0;559const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);560561for (i = 0; i < info->NumSrcRegs; i++) {562if (sub->Arg[i].Source != source563|| src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {564continue;565}566readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);567}568return readmask;569}570571/**572* This function attempts to remove a source from a pair instructions.573* @param inst574* @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd575* @param source The index of the source to remove576* @param new_readmask A mask representing the components that are read by577* the source that is intended to replace the one you are removing. If you578* want to remove a source only and not replace it, this parameter should be579* zero.580* @return 1 if the source was successfully removed, 0 if it was not581*/582unsigned int rc_pair_remove_src(583struct rc_instruction * inst,584unsigned int src_type,585unsigned int source,586unsigned int new_readmask)587{588unsigned int readmask = 0;589590readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);591readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);592593if ((new_readmask & readmask) != readmask)594return 0;595596if (src_type & RC_SOURCE_RGB) {597memset(&inst->U.P.RGB.Src[source], 0,598sizeof(struct rc_pair_instruction_source));599}600601if (src_type & RC_SOURCE_ALPHA) {602memset(&inst->U.P.Alpha.Src[source], 0,603sizeof(struct rc_pair_instruction_source));604}605606return 1;607}608609/**610* @return RC_OPCODE_NOOP if inst is not a flow control instruction.611* @return The opcode of inst if it is a flow control instruction.612*/613rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)614{615const struct rc_opcode_info * info;616if (inst->Type == RC_INSTRUCTION_NORMAL) {617info = rc_get_opcode_info(inst->U.I.Opcode);618} else {619info = rc_get_opcode_info(inst->U.P.RGB.Opcode);620/*A flow control instruction shouldn't have an alpha621* instruction.*/622assert(!info->IsFlowControl ||623inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);624}625626if (info->IsFlowControl)627return info->Opcode;628else629return RC_OPCODE_NOP;630631}632633/**634* @return The BGNLOOP instruction that starts the loop ended by endloop.635*/636struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)637{638unsigned int endloop_count = 0;639struct rc_instruction * inst;640for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {641rc_opcode op = rc_get_flow_control_inst(inst);642if (op == RC_OPCODE_ENDLOOP) {643endloop_count++;644} else if (op == RC_OPCODE_BGNLOOP) {645if (endloop_count == 0) {646return inst;647} else {648endloop_count--;649}650}651}652return NULL;653}654655/**656* @return The ENDLOOP instruction that ends the loop started by bgnloop.657*/658struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)659{660unsigned int bgnloop_count = 0;661struct rc_instruction * inst;662for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {663rc_opcode op = rc_get_flow_control_inst(inst);664if (op == RC_OPCODE_BGNLOOP) {665bgnloop_count++;666} else if (op == RC_OPCODE_ENDLOOP) {667if (bgnloop_count == 0) {668return inst;669} else {670bgnloop_count--;671}672}673}674return NULL;675}676677/**678* @return A conversion swizzle for converting from old_mask->new_mask679*/680unsigned int rc_make_conversion_swizzle(681unsigned int old_mask,682unsigned int new_mask)683{684unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);685unsigned int old_idx;686unsigned int new_idx = 0;687for (old_idx = 0; old_idx < 4; old_idx++) {688if (!GET_BIT(old_mask, old_idx))689continue;690for ( ; new_idx < 4; new_idx++) {691if (GET_BIT(new_mask, new_idx)) {692SET_SWZ(conversion_swizzle, old_idx, new_idx);693new_idx++;694break;695}696}697}698return conversion_swizzle;699}700701/**702* @return 1 if the register contains an immediate value, 0 otherwise.703*/704unsigned int rc_src_reg_is_immediate(705struct radeon_compiler * c,706unsigned int file,707unsigned int index)708{709return file == RC_FILE_CONSTANT &&710c->Program.Constants.Constants[index].Type == RC_CONSTANT_IMMEDIATE;711}712713/**714* @return The immediate value in the specified register.715*/716float rc_get_constant_value(717struct radeon_compiler * c,718unsigned int index,719unsigned int swizzle,720unsigned int negate,721unsigned int chan)722{723float base = 1.0f;724int swz = GET_SWZ(swizzle, chan);725if(swz >= 4 || index >= c->Program.Constants.Count ){726rc_error(c, "get_constant_value: Can't find a value.\n");727return 0.0f;728}729if(GET_BIT(negate, chan)){730base = -1.0f;731}732return base *733c->Program.Constants.Constants[index].u.Immediate[swz];734}735736/**737* This function returns the component value (RC_SWIZZLE_*) of the first used738* channel in the swizzle. This is only useful for scalar instructions that are739* known to use only one channel of the swizzle.740*/741unsigned int rc_get_scalar_src_swz(unsigned int swizzle)742{743unsigned int swz, chan;744for (chan = 0; chan < 4; chan++) {745swz = GET_SWZ(swizzle, chan);746if (swz != RC_SWIZZLE_UNUSED) {747break;748}749}750assert(swz != RC_SWIZZLE_UNUSED);751return swz;752}753754755