Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/r500_fragprog.c
4574 views
/*1* Copyright 2008 Corbin Simpson <[email protected]>2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627#include "r500_fragprog.h"2829#include <stdio.h>3031#include "radeon_compiler_util.h"32#include "radeon_list.h"33#include "radeon_variable.h"34#include "r300_reg.h"3536#include "util/compiler.h"3738/**39* Rewrite IF instructions to use the ALU result special register.40*/41int r500_transform_IF(42struct radeon_compiler * c,43struct rc_instruction * inst_if,44void *data)45{46struct rc_variable * writer;47struct rc_list * writer_list, * list_ptr;48struct rc_list * var_list = rc_get_variables(c);49unsigned int generic_if = 0;50unsigned int alu_chan;5152if (inst_if->U.I.Opcode != RC_OPCODE_IF) {53return 0;54}5556writer_list = rc_variable_list_get_writers(57var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);58if (!writer_list) {59generic_if = 1;60} else {6162/* Make sure it is safe for the writers to write to63* ALU Result */64for (list_ptr = writer_list; list_ptr;65list_ptr = list_ptr->Next) {66struct rc_instruction * inst;67writer = list_ptr->Item;68/* We are going to modify the destination register69* of writer, so if it has a reader other than70* inst_if (aka ReaderCount > 1) we must fall back to71* our generic IF.72* If the writer has a lower IP than inst_if, this73* means that inst_if is above the writer in a loop.74* I'm not sure why this would ever happen, but75* if it does we want to make sure we fall back76* to our generic IF. */77if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {78generic_if = 1;79break;80}8182/* The ALU Result is not preserved across IF83* instructions, so if there is another IF84* instruction between writer and inst_if, then85* we need to fall back to generic IF. */86for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {87const struct rc_opcode_info * info =88rc_get_opcode_info(inst->U.I.Opcode);89if (info->IsFlowControl) {90generic_if = 1;91break;92}93}94if (generic_if) {95break;96}97}98}99100if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {101alu_chan = RC_ALURESULT_X;102} else {103alu_chan = RC_ALURESULT_W;104}105if (generic_if) {106struct rc_instruction * inst_mov =107rc_insert_new_instruction(c, inst_if->Prev);108109inst_mov->U.I.Opcode = RC_OPCODE_MOV;110inst_mov->U.I.DstReg.WriteMask = 0;111inst_mov->U.I.DstReg.File = RC_FILE_NONE;112inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;113inst_mov->U.I.WriteALUResult = alu_chan;114inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];115if (alu_chan == RC_ALURESULT_X) {116inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(117inst_mov->U.I.SrcReg[0].Swizzle,118RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,119RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);120} else {121inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(122inst_mov->U.I.SrcReg[0].Swizzle,123RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,124RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);125}126} else {127rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;128unsigned int reverse_srcs = 0;129unsigned int preserve_opcode = 0;130for (list_ptr = writer_list; list_ptr;131list_ptr = list_ptr->Next) {132writer = list_ptr->Item;133switch(writer->Inst->U.I.Opcode) {134case RC_OPCODE_SEQ:135compare_func = RC_COMPARE_FUNC_EQUAL;136break;137case RC_OPCODE_SNE:138compare_func = RC_COMPARE_FUNC_NOTEQUAL;139break;140case RC_OPCODE_SLE:141reverse_srcs = 1;142FALLTHROUGH;143case RC_OPCODE_SGE:144compare_func = RC_COMPARE_FUNC_GEQUAL;145break;146case RC_OPCODE_SGT:147reverse_srcs = 1;148FALLTHROUGH;149case RC_OPCODE_SLT:150compare_func = RC_COMPARE_FUNC_LESS;151break;152default:153compare_func = RC_COMPARE_FUNC_NOTEQUAL;154preserve_opcode = 1;155break;156}157if (!preserve_opcode) {158writer->Inst->U.I.Opcode = RC_OPCODE_SUB;159}160writer->Inst->U.I.DstReg.WriteMask = 0;161writer->Inst->U.I.DstReg.File = RC_FILE_NONE;162writer->Inst->U.I.WriteALUResult = alu_chan;163writer->Inst->U.I.ALUResultCompare = compare_func;164if (reverse_srcs) {165struct rc_src_register temp_src;166temp_src = writer->Inst->U.I.SrcReg[0];167writer->Inst->U.I.SrcReg[0] =168writer->Inst->U.I.SrcReg[1];169writer->Inst->U.I.SrcReg[1] = temp_src;170}171}172}173174inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;175inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;176inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(177RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,178RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);179inst_if->U.I.SrcReg[0].Negate = 0;180181return 1;182}183184static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)185{186unsigned int relevant;187int i;188189if (opcode == RC_OPCODE_TEX ||190opcode == RC_OPCODE_TXB ||191opcode == RC_OPCODE_TXP ||192opcode == RC_OPCODE_TXD ||193opcode == RC_OPCODE_TXL ||194opcode == RC_OPCODE_KIL) {195if (reg.Abs)196return 0;197198if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))199return 0;200201for(i = 0; i < 4; ++i) {202unsigned int swz = GET_SWZ(reg.Swizzle, i);203if (swz == RC_SWIZZLE_UNUSED) {204reg.Negate &= ~(1 << i);205continue;206}207if (swz >= 4)208return 0;209}210211if (reg.Negate)212return 0;213214return 1;215} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {216/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;217* if it doesn't fit perfectly into a .xyzw case... */218if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)219return 1;220221return 0;222} else if (reg.File == RC_FILE_INLINE) {223return 1;224} else {225/* ALU instructions support almost everything */226relevant = 0;227for(i = 0; i < 3; ++i) {228unsigned int swz = GET_SWZ(reg.Swizzle, i);229if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)230relevant |= 1 << i;231}232if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))233return 0;234235return 1;236}237}238239/**240* Split source register access.241*242* The only thing we *cannot* do in an ALU instruction is per-component243* negation.244*/245static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,246struct rc_swizzle_split * split)247{248unsigned int negatebase[2] = { 0, 0 };249int i;250251for(i = 0; i < 4; ++i) {252unsigned int swz = GET_SWZ(src.Swizzle, i);253if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))254continue;255negatebase[GET_BIT(src.Negate, i)] |= 1 << i;256}257258split->NumPhases = 0;259260for(i = 0; i <= 1; ++i) {261if (!negatebase[i])262continue;263264split->Phase[split->NumPhases++] = negatebase[i];265}266}267268struct rc_swizzle_caps r500_swizzle_caps = {269.IsNative = r500_swizzle_is_native,270.Split = r500_swizzle_split271};272273static char *toswiz(int swiz_val) {274switch(swiz_val) {275case 0: return "R";276case 1: return "G";277case 2: return "B";278case 3: return "A";279case 4: return "0";280case 5: return "H";281case 6: return "1";282case 7: return "U";283}284return NULL;285}286287static char *toop(int op_val)288{289char *str = NULL;290switch (op_val) {291case 0: str = "MAD"; break;292case 1: str = "DP3"; break;293case 2: str = "DP4"; break;294case 3: str = "D2A"; break;295case 4: str = "MIN"; break;296case 5: str = "MAX"; break;297case 6: str = "Reserved"; break;298case 7: str = "CND"; break;299case 8: str = "CMP"; break;300case 9: str = "FRC"; break;301case 10: str = "SOP"; break;302case 11: str = "MDH"; break;303case 12: str = "MDV"; break;304}305return str;306}307308static char *to_alpha_op(int op_val)309{310char *str = NULL;311switch (op_val) {312case 0: str = "MAD"; break;313case 1: str = "DP"; break;314case 2: str = "MIN"; break;315case 3: str = "MAX"; break;316case 4: str = "Reserved"; break;317case 5: str = "CND"; break;318case 6: str = "CMP"; break;319case 7: str = "FRC"; break;320case 8: str = "EX2"; break;321case 9: str = "LN2"; break;322case 10: str = "RCP"; break;323case 11: str = "RSQ"; break;324case 12: str = "SIN"; break;325case 13: str = "COS"; break;326case 14: str = "MDH"; break;327case 15: str = "MDV"; break;328}329return str;330}331332static char *to_mask(int val)333{334char *str = NULL;335switch(val) {336case 0: str = "NONE"; break;337case 1: str = "R"; break;338case 2: str = "G"; break;339case 3: str = "RG"; break;340case 4: str = "B"; break;341case 5: str = "RB"; break;342case 6: str = "GB"; break;343case 7: str = "RGB"; break;344case 8: str = "A"; break;345case 9: str = "AR"; break;346case 10: str = "AG"; break;347case 11: str = "ARG"; break;348case 12: str = "AB"; break;349case 13: str = "ARB"; break;350case 14: str = "AGB"; break;351case 15: str = "ARGB"; break;352}353return str;354}355356static char *to_texop(int val)357{358switch(val) {359case 0: return "NOP";360case 1: return "LD";361case 2: return "TEXKILL";362case 3: return "PROJ";363case 4: return "LODBIAS";364case 5: return "LOD";365case 6: return "DXDY";366}367return NULL;368}369370void r500FragmentProgramDump(struct radeon_compiler *c, void *user)371{372struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;373struct r500_fragment_program_code *code = &compiler->code->code.r500;374int n, i;375uint32_t inst;376uint32_t inst0;377char *str = NULL;378fprintf(stderr, "R500 Fragment Program:\n--------\n");379380for (n = 0; n < code->inst_end+1; n++) {381inst0 = inst = code->inst[n].inst0;382fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);383switch(inst & 0x3) {384case R500_INST_TYPE_ALU: str = "ALU"; break;385case R500_INST_TYPE_OUT: str = "OUT"; break;386case R500_INST_TYPE_FC: str = "FC"; break;387case R500_INST_TYPE_TEX: str = "TEX"; break;388}389fprintf(stderr,"%s %s %s %s %s ", str,390inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",391inst & R500_INST_LAST ? "LAST" : "",392inst & R500_INST_NOP ? "NOP" : "",393inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");394fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),395to_mask((inst >> 15) & 0xf));396397switch(inst0 & 0x3) {398case R500_INST_TYPE_ALU:399case R500_INST_TYPE_OUT:400fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);401inst = code->inst[n].inst1;402403fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",404inst & 0xff, (inst & (1<<8)) ? 'c' : 't',405(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',406(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',407(inst >> 30));408409fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);410inst = code->inst[n].inst2;411fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",412inst & 0xff, (inst & (1<<8)) ? 'c' : 't',413(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',414(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',415(inst >> 30));416fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);417inst = code->inst[n].inst3;418fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",419(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),420(inst >> 11) & 0x3,421(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),422(inst >> 24) & 0x3, (inst >> 29) & 0x3);423424425fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);426inst = code->inst[n].inst4;427fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),428(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",429(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,430(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,431(inst >> 29) & 0x3,432(inst >> 31) & 0x1);433434fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);435inst = code->inst[n].inst5;436fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),437(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",438(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),439(inst >> 23) & 0x3,440(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);441break;442case R500_INST_TYPE_FC:443fprintf(stderr, "\t2:FC_INST 0x%08x:", code->inst[n].inst2);444inst = code->inst[n].inst2;445/* JUMP_FUNC JUMP_ANY*/446fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,447(inst & R500_FC_JUMP_ANY) >> 5);448449/* OP */450switch(inst & 0x7){451case R500_FC_OP_JUMP:452fprintf(stderr, "JUMP");453break;454case R500_FC_OP_LOOP:455fprintf(stderr, "LOOP");456break;457case R500_FC_OP_ENDLOOP:458fprintf(stderr, "ENDLOOP");459break;460case R500_FC_OP_REP:461fprintf(stderr, "REP");462break;463case R500_FC_OP_ENDREP:464fprintf(stderr, "ENDREP");465break;466case R500_FC_OP_BREAKLOOP:467fprintf(stderr, "BREAKLOOP");468break;469case R500_FC_OP_BREAKREP:470fprintf(stderr, "BREAKREP");471break;472case R500_FC_OP_CONTINUE:473fprintf(stderr, "CONTINUE");474break;475}476fprintf(stderr," ");477/* A_OP */478switch(inst & (0x3 << 6)){479case R500_FC_A_OP_NONE:480fprintf(stderr, "NONE");481break;482case R500_FC_A_OP_POP:483fprintf(stderr, "POP");484break;485case R500_FC_A_OP_PUSH:486fprintf(stderr, "PUSH");487break;488}489/* B_OP0 B_OP1 */490for(i=0; i<2; i++){491fprintf(stderr, " ");492switch(inst & (0x3 << (24 + (i * 2)))){493/* R500_FC_B_OP0_NONE494* R500_FC_B_OP1_NONE */495case 0:496fprintf(stderr, "NONE");497break;498case R500_FC_B_OP0_DECR:499case R500_FC_B_OP1_DECR:500fprintf(stderr, "DECR");501break;502case R500_FC_B_OP0_INCR:503case R500_FC_B_OP1_INCR:504fprintf(stderr, "INCR");505break;506}507}508/*POP_CNT B_ELSE */509fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);510inst = code->inst[n].inst3;511/* JUMP_ADDR */512fprintf(stderr, " %d", inst >> 16);513514if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){515fprintf(stderr, " IGN_UNC");516}517inst = code->inst[n].inst3;518fprintf(stderr, "\n\t3:FC_ADDR 0x%08x:", inst);519fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",520inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31);521break;522case R500_INST_TYPE_TEX:523inst = code->inst[n].inst1;524fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,525to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",526(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");527inst = code->inst[n].inst2;528fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,529inst & 127, inst & (1<<7) ? "(rel)" : "",530toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),531toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),532(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",533toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),534toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));535536fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);537break;538}539fprintf(stderr,"\n");540}541542}543544545