Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
4574 views
/*1* Copyright (C) 2009 Nicolai Haehnle.2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627#include "radeon_program_pair.h"2829#include "radeon_compiler.h"30#include "radeon_compiler_util.h"3132#include "util/compiler.h"333435/**36* Finally rewrite ADD, MOV, MUL as the appropriate native instruction37* and reverse the order of arguments for CMP.38*/39static void final_rewrite(struct rc_sub_instruction *inst)40{41struct rc_src_register tmp;4243switch(inst->Opcode) {44case RC_OPCODE_ADD:45inst->SrcReg[2] = inst->SrcReg[1];46inst->SrcReg[1].File = RC_FILE_NONE;47inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;48inst->SrcReg[1].Negate = RC_MASK_NONE;49inst->Opcode = RC_OPCODE_MAD;50break;51case RC_OPCODE_CMP:52tmp = inst->SrcReg[2];53inst->SrcReg[2] = inst->SrcReg[0];54inst->SrcReg[0] = tmp;55break;56case RC_OPCODE_MOV:57/* AMD say we should use CMP.58* However, when we transform59* KIL -r0;60* into61* CMP tmp, -r0, -r0, 0;62* KIL tmp;63* we get incorrect behaviour on R500 when r0 == 0.0.64* It appears that the R500 KIL hardware treats -0.0 as less65* than zero.66*/67inst->SrcReg[1].File = RC_FILE_NONE;68inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;69inst->SrcReg[2].File = RC_FILE_NONE;70inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;71inst->Opcode = RC_OPCODE_MAD;72break;73case RC_OPCODE_MUL:74inst->SrcReg[2].File = RC_FILE_NONE;75inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;76inst->Opcode = RC_OPCODE_MAD;77break;78default:79/* nothing to do */80break;81}82}838485/**86* Classify an instruction according to which ALUs etc. it needs87*/88static void classify_instruction(struct rc_sub_instruction * inst,89int * needrgb, int * needalpha, int * istranscendent)90{91*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;92*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;93*istranscendent = 0;9495if (inst->WriteALUResult == RC_ALURESULT_X)96*needrgb = 1;97else if (inst->WriteALUResult == RC_ALURESULT_W)98*needalpha = 1;99100switch(inst->Opcode) {101case RC_OPCODE_ADD:102case RC_OPCODE_CMP:103case RC_OPCODE_CND:104case RC_OPCODE_DDX:105case RC_OPCODE_DDY:106case RC_OPCODE_FRC:107case RC_OPCODE_MAD:108case RC_OPCODE_MAX:109case RC_OPCODE_MIN:110case RC_OPCODE_MOV:111case RC_OPCODE_MUL:112break;113case RC_OPCODE_COS:114case RC_OPCODE_EX2:115case RC_OPCODE_LG2:116case RC_OPCODE_RCP:117case RC_OPCODE_RSQ:118case RC_OPCODE_SIN:119*istranscendent = 1;120*needalpha = 1;121break;122case RC_OPCODE_DP4:123*needalpha = 1;124FALLTHROUGH;125case RC_OPCODE_DP3:126*needrgb = 1;127break;128default:129break;130}131}132133static void src_uses(struct rc_src_register src, unsigned int * rgb,134unsigned int * alpha)135{136int j;137for(j = 0; j < 4; ++j) {138unsigned int swz = GET_SWZ(src.Swizzle, j);139if (swz < 3)140*rgb = 1;141else if (swz < 4)142*alpha = 1;143}144}145146/**147* Fill the given ALU instruction's opcodes and source operands into the given pair,148* if possible.149*/150static void set_pair_instruction(struct r300_fragment_program_compiler *c,151struct rc_pair_instruction * pair,152struct rc_sub_instruction * inst)153{154int needrgb, needalpha, istranscendent;155const struct rc_opcode_info * opcode;156int i;157158memset(pair, 0, sizeof(struct rc_pair_instruction));159160classify_instruction(inst, &needrgb, &needalpha, &istranscendent);161162if (needrgb) {163if (istranscendent)164pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;165else166pair->RGB.Opcode = inst->Opcode;167if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)168pair->RGB.Saturate = 1;169}170if (needalpha) {171pair->Alpha.Opcode = inst->Opcode;172if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)173pair->Alpha.Saturate = 1;174}175176opcode = rc_get_opcode_info(inst->Opcode);177178/* Presubtract handling:179* We need to make sure that the values used by the presubtract180* operation end up in src0 or src1. */181if(inst->PreSub.Opcode != RC_PRESUB_NONE) {182/* rc_pair_alloc_source() will fill in data for183* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */184int j;185for(j = 0; j < 3; j++) {186int src_regs;187if(inst->SrcReg[j].File != RC_FILE_PRESUB)188continue;189190src_regs = rc_presubtract_src_reg_count(191inst->PreSub.Opcode);192for(i = 0; i < src_regs; i++) {193unsigned int rgb = 0;194unsigned int alpha = 0;195src_uses(inst->SrcReg[j], &rgb, &alpha);196if(rgb) {197pair->RGB.Src[i].File =198inst->PreSub.SrcReg[i].File;199pair->RGB.Src[i].Index =200inst->PreSub.SrcReg[i].Index;201pair->RGB.Src[i].Used = 1;202}203if(alpha) {204pair->Alpha.Src[i].File =205inst->PreSub.SrcReg[i].File;206pair->Alpha.Src[i].Index =207inst->PreSub.SrcReg[i].Index;208pair->Alpha.Src[i].Used = 1;209}210}211}212}213214for(i = 0; i < opcode->NumSrcRegs; ++i) {215int source;216if (needrgb && !istranscendent) {217unsigned int srcrgb = 0;218unsigned int srcalpha = 0;219unsigned int srcmask = 0;220int j;221/* We don't care about the alpha channel here. We only222* want the part of the swizzle that writes to rgb,223* since we are creating an rgb instruction. */224for(j = 0; j < 3; ++j) {225unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);226227if (swz < RC_SWIZZLE_W)228srcrgb = 1;229else if (swz == RC_SWIZZLE_W)230srcalpha = 1;231232if (swz < RC_SWIZZLE_UNUSED)233srcmask |= 1 << j;234}235source = rc_pair_alloc_source(pair, srcrgb, srcalpha,236inst->SrcReg[i].File, inst->SrcReg[i].Index);237if (source < 0) {238rc_error(&c->Base, "Failed to translate "239"rgb instruction.\n");240return;241}242pair->RGB.Arg[i].Source = source;243pair->RGB.Arg[i].Swizzle =244rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);245pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;246pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));247}248if (needalpha) {249unsigned int srcrgb = 0;250unsigned int srcalpha = 0;251unsigned int swz;252if (istranscendent) {253swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);254} else {255swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);256}257258if (swz < 3)259srcrgb = 1;260else if (swz < 4)261srcalpha = 1;262source = rc_pair_alloc_source(pair, srcrgb, srcalpha,263inst->SrcReg[i].File, inst->SrcReg[i].Index);264if (source < 0) {265rc_error(&c->Base, "Failed to translate "266"alpha instruction.\n");267return;268}269pair->Alpha.Arg[i].Source = source;270pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);271pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;272273if (istranscendent) {274pair->Alpha.Arg[i].Negate =275!!(inst->SrcReg[i].Negate &276inst->DstReg.WriteMask);277} else {278pair->Alpha.Arg[i].Negate =279!!(inst->SrcReg[i].Negate & RC_MASK_W);280}281}282}283284/* Destination handling */285if (inst->DstReg.File == RC_FILE_OUTPUT) {286if (inst->DstReg.Index == c->OutputDepth) {287pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);288} else {289for (i = 0; i < 4; i++) {290if (inst->DstReg.Index == c->OutputColor[i]) {291pair->RGB.Target = i;292pair->Alpha.Target = i;293pair->RGB.OutputWriteMask |=294inst->DstReg.WriteMask & RC_MASK_XYZ;295pair->Alpha.OutputWriteMask |=296GET_BIT(inst->DstReg.WriteMask, 3);297break;298}299}300}301} else {302if (needrgb) {303pair->RGB.DestIndex = inst->DstReg.Index;304pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;305}306307if (needalpha) {308pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);309if (pair->Alpha.WriteMask) {310pair->Alpha.DestIndex = inst->DstReg.Index;311}312}313}314315if (needrgb) {316pair->RGB.Omod = inst->Omod;317}318if (needalpha) {319pair->Alpha.Omod = inst->Omod;320}321322if (inst->WriteALUResult) {323pair->WriteALUResult = inst->WriteALUResult;324pair->ALUResultCompare = inst->ALUResultCompare;325}326}327328329static void check_opcode_support(struct r300_fragment_program_compiler *c,330struct rc_sub_instruction *inst)331{332const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);333334if (opcode->HasDstReg) {335if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {336rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");337return;338}339}340341for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {342if (inst->SrcReg[i].RelAddr) {343rc_error(&c->Base, "Fragment program does not support relative addressing "344" of source operands.\n");345return;346}347}348}349350351/**352* Translate all ALU instructions into corresponding pair instructions,353* performing no other changes.354*/355void rc_pair_translate(struct radeon_compiler *cc, void *user)356{357struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;358359for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;360inst != &c->Base.Program.Instructions;361inst = inst->Next) {362const struct rc_opcode_info * opcode;363struct rc_sub_instruction copy;364365if (inst->Type != RC_INSTRUCTION_NORMAL)366continue;367368opcode = rc_get_opcode_info(inst->U.I.Opcode);369370if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)371continue;372373copy = inst->U.I;374375check_opcode_support(c, ©);376377final_rewrite(©);378inst->Type = RC_INSTRUCTION_PAIR;379set_pair_instruction(c, &inst->U.P, ©);380}381}382383384