Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
4574 views
/*1* Copyright (C) 2005 Ben Skeggs.2*3* Copyright 2008 Corbin Simpson <[email protected]>4* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.5*6* All Rights Reserved.7*8* Permission is hereby granted, free of charge, to any person obtaining9* a copy of this software and associated documentation files (the10* "Software"), to deal in the Software without restriction, including11* without limitation the rights to use, copy, modify, merge, publish,12* distribute, sublicense, and/or sell copies of the Software, and to13* permit persons to whom the Software is furnished to do so, subject to14* the following conditions:15*16* The above copyright notice and this permission notice (including the17* next paragraph) shall be included in all copies or substantial18* portions of the Software.19*20* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,21* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF22* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.23* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE24* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION25* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION26* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.27*28*/2930/**31* \file32*33* \author Ben Skeggs <[email protected]>34*35* \author Jerome Glisse <[email protected]>36*37* \author Corbin Simpson <[email protected]>38*39*/4041#include "r500_fragprog.h"4243#include "r300_reg.h"4445#include "radeon_program_pair.h"4647#include "util/compiler.h"4849#define PROG_CODE \50struct r500_fragment_program_code *code = &c->code->code.r5005152#define error(fmt, args...) do { \53rc_error(&c->Base, "%s::%s(): " fmt "\n", \54__FILE__, __FUNCTION__, ##args); \55} while(0)565758struct branch_info {59int If;60int Else;61int Endif;62};6364struct r500_loop_info {65int BgnLoop;6667int BranchDepth;68int * Brks;69int BrkCount;70int BrkReserved;7172int * Conts;73int ContCount;74int ContReserved;75};7677struct emit_state {78struct radeon_compiler * C;79struct r500_fragment_program_code * Code;8081struct branch_info * Branches;82unsigned int CurrentBranchDepth;83unsigned int BranchesReserved;8485struct r500_loop_info * Loops;86unsigned int CurrentLoopDepth;87unsigned int LoopsReserved;8889unsigned int MaxBranchDepth;9091};9293static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)94{95switch(opcode) {96case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;97case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;98case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;99case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;100case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;101case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;102case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;103default:104error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);105FALLTHROUGH;106case RC_OPCODE_NOP:107FALLTHROUGH;108case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;109case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;110case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;111case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;112}113}114115static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)116{117switch(opcode) {118case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;119case RC_OPCODE_CND: return R500_ALPHA_OP_CND;120case RC_OPCODE_COS: return R500_ALPHA_OP_COS;121case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;122case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;123case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;124case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;125case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;126case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;127case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;128default:129error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);130FALLTHROUGH;131case RC_OPCODE_NOP:132FALLTHROUGH;133case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;134case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;135case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;136case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;137case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;138case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;139}140}141142static unsigned int fix_hw_swizzle(unsigned int swz)143{144switch (swz) {145case RC_SWIZZLE_ZERO:146case RC_SWIZZLE_UNUSED:147swz = 4;148break;149case RC_SWIZZLE_HALF:150swz = 5;151break;152case RC_SWIZZLE_ONE:153swz = 6;154break;155}156157return swz;158}159160static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)161{162unsigned int t = inst->RGB.Arg[arg].Source;163int comp;164t |= inst->RGB.Arg[arg].Negate << 11;165t |= inst->RGB.Arg[arg].Abs << 12;166167for(comp = 0; comp < 3; ++comp)168t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);169170return t;171}172173static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)174{175unsigned int t = inst->Alpha.Arg[i].Source;176t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;177t |= inst->Alpha.Arg[i].Negate << 5;178t |= inst->Alpha.Arg[i].Abs << 6;179return t;180}181182static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)183{184switch(func) {185case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;186case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;187case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;188case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;189default:190rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);191return 0;192}193}194195static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)196{197if (index > code->max_temp_idx)198code->max_temp_idx = index;199}200201static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)202{203/* From docs:204* Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.205* MSB = 1 << 7 */206if (!src.Used)207return 1 << 7;208209if (src.File == RC_FILE_CONSTANT) {210return src.Index | R500_RGB_ADDR0_CONST;211} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {212use_temporary(code, src.Index);213return src.Index;214} else if (src.File == RC_FILE_INLINE) {215return src.Index | (1 << 7);216}217218return 0;219}220221/**222* NOP the specified instruction if it is not a texture lookup.223*/224static void alu_nop(struct r300_fragment_program_compiler *c, int ip)225{226PROG_CODE;227228if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {229code->inst[ip].inst0 |= R500_INST_NOP;230}231}232233/**234* Emit a paired ALU instruction.235*/236static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)237{238int ip;239PROG_CODE;240241if (code->inst_end >= c->Base.max_alu_insts-1) {242error("emit_alu: Too many instructions");243return;244}245246ip = ++code->inst_end;247248/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */249if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||250inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {251if (ip > 0) {252alu_nop(c, ip - 1);253}254}255256code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);257code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);258259if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {260code->inst[ip].inst0 = R500_INST_TYPE_OUT;261if (inst->WriteALUResult) {262error("Cannot write output and ALU result at the same time");263return;264}265} else {266code->inst[ip].inst0 = R500_INST_TYPE_ALU;267}268code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);269270code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);271code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;272code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);273if (inst->Nop) {274code->inst[ip].inst0 |= R500_INST_NOP;275}276if (inst->Alpha.DepthWriteMask) {277code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;278c->code->writes_depth = 1;279}280281code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);282code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);283use_temporary(code, inst->Alpha.DestIndex);284use_temporary(code, inst->RGB.DestIndex);285286if (inst->RGB.Saturate)287code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;288if (inst->Alpha.Saturate)289code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;290291/* Set the presubtract operation. */292switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {293case RC_PRESUB_BIAS:294code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;295break;296case RC_PRESUB_SUB:297code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;298break;299case RC_PRESUB_ADD:300code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;301break;302case RC_PRESUB_INV:303code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;304break;305default:306break;307}308switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {309case RC_PRESUB_BIAS:310code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;311break;312case RC_PRESUB_SUB:313code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;314break;315case RC_PRESUB_ADD:316code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;317break;318case RC_PRESUB_INV:319code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;320break;321default:322break;323}324325/* Set the output modifier */326code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;327code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;328329code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));330code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));331code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));332333code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));334code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));335code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));336337code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;338code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;339code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;340341code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;342code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;343code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;344345code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);346code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);347348if (inst->WriteALUResult) {349code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;350351if (inst->WriteALUResult == RC_ALURESULT_X)352code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;353else354code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;355356code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);357}358}359360static unsigned int translate_strq_swizzle(unsigned int swizzle)361{362unsigned int swiz = 0;363int i;364for (i = 0; i < 4; i++)365swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;366return swiz;367}368369/**370* Emit a single TEX instruction371*/372static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)373{374int ip;375PROG_CODE;376377if (code->inst_end >= c->Base.max_alu_insts-1) {378error("emit_tex: Too many instructions");379return 0;380}381382ip = ++code->inst_end;383384code->inst[ip].inst0 = R500_INST_TYPE_TEX385| (inst->DstReg.WriteMask << 11)386| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);387code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)388| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);389390if (inst->TexSrcTarget == RC_TEXTURE_RECT)391code->inst[ip].inst1 |= R500_TEX_UNSCALED;392393switch (inst->Opcode) {394case RC_OPCODE_KIL:395code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;396break;397case RC_OPCODE_TEX:398code->inst[ip].inst1 |= R500_TEX_INST_LD;399break;400case RC_OPCODE_TXB:401code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;402break;403case RC_OPCODE_TXP:404code->inst[ip].inst1 |= R500_TEX_INST_PROJ;405break;406case RC_OPCODE_TXD:407code->inst[ip].inst1 |= R500_TEX_INST_DXDY;408break;409case RC_OPCODE_TXL:410code->inst[ip].inst1 |= R500_TEX_INST_LOD;411break;412default:413error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);414}415416use_temporary(code, inst->SrcReg[0].Index);417if (inst->Opcode != RC_OPCODE_KIL)418use_temporary(code, inst->DstReg.Index);419420code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)421| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)422| R500_TEX_DST_ADDR(inst->DstReg.Index)423| (GET_SWZ(inst->TexSwizzle, 0) << 24)424| (GET_SWZ(inst->TexSwizzle, 1) << 26)425| (GET_SWZ(inst->TexSwizzle, 2) << 28)426| (GET_SWZ(inst->TexSwizzle, 3) << 30)427;428429if (inst->Opcode == RC_OPCODE_TXD) {430use_temporary(code, inst->SrcReg[1].Index);431use_temporary(code, inst->SrcReg[2].Index);432433/* DX and DY parameters are specified in a separate register. */434code->inst[ip].inst3 =435R500_DX_ADDR(inst->SrcReg[1].Index) |436(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |437R500_DY_ADDR(inst->SrcReg[2].Index) |438(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);439}440441return 1;442}443444static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)445{446unsigned int newip;447448if (s->Code->inst_end >= s->C->max_alu_insts-1) {449rc_error(s->C, "emit_tex: Too many instructions");450return;451}452453newip = ++s->Code->inst_end;454455/* Currently all loops use the same integer constant to initialize456* the loop variables. */457if(!s->Code->int_constants[0]) {458s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);459s->Code->int_constant_count = 1;460}461s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;462463switch(inst->U.I.Opcode){464struct branch_info * branch;465struct r500_loop_info * loop;466case RC_OPCODE_BGNLOOP:467memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,468s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);469470loop = &s->Loops[s->CurrentLoopDepth++];471memset(loop, 0, sizeof(struct r500_loop_info));472loop->BranchDepth = s->CurrentBranchDepth;473loop->BgnLoop = newip;474475s->Code->inst[newip].inst2 = R500_FC_OP_LOOP476| R500_FC_JUMP_FUNC(0x00)477| R500_FC_IGNORE_UNCOVERED478;479break;480case RC_OPCODE_BRK:481loop = &s->Loops[s->CurrentLoopDepth - 1];482memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,483loop->BrkCount, loop->BrkReserved, 1);484485loop->Brks[loop->BrkCount++] = newip;486s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP487| R500_FC_JUMP_FUNC(0xff)488| R500_FC_B_OP1_DECR489| R500_FC_B_POP_CNT(490s->CurrentBranchDepth - loop->BranchDepth)491| R500_FC_IGNORE_UNCOVERED492;493break;494495case RC_OPCODE_CONT:496loop = &s->Loops[s->CurrentLoopDepth - 1];497memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,498loop->ContCount, loop->ContReserved, 1);499loop->Conts[loop->ContCount++] = newip;500s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE501| R500_FC_JUMP_FUNC(0xff)502| R500_FC_B_OP1_DECR503| R500_FC_B_POP_CNT(504s->CurrentBranchDepth - loop->BranchDepth)505| R500_FC_IGNORE_UNCOVERED506;507break;508509case RC_OPCODE_ENDLOOP:510{511loop = &s->Loops[s->CurrentLoopDepth - 1];512/* Emit ENDLOOP */513s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP514| R500_FC_JUMP_FUNC(0xff)515| R500_FC_JUMP_ANY516| R500_FC_IGNORE_UNCOVERED517;518/* The constant integer at index 0 is used by all loops. */519s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)520| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)521;522523/* Set jump address and int constant for BGNLOOP */524s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)525| R500_FC_JUMP_ADDR(newip)526;527528/* Set jump address for the BRK instructions. */529while(loop->BrkCount--) {530s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =531R500_FC_JUMP_ADDR(newip + 1);532}533534/* Set jump address for CONT instructions. */535while(loop->ContCount--) {536s->Code->inst[loop->Conts[loop->ContCount]].inst3 =537R500_FC_JUMP_ADDR(newip);538}539s->CurrentLoopDepth--;540break;541}542case RC_OPCODE_IF:543if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {544rc_error(s->C, "Branch depth exceeds hardware limit");545return;546}547memory_pool_array_reserve(&s->C->Pool, struct branch_info,548s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);549550branch = &s->Branches[s->CurrentBranchDepth++];551branch->If = newip;552branch->Else = -1;553branch->Endif = -1;554555if (s->CurrentBranchDepth > s->MaxBranchDepth)556s->MaxBranchDepth = s->CurrentBranchDepth;557558/* actual instruction is filled in at ENDIF time */559break;560561case RC_OPCODE_ELSE:562if (!s->CurrentBranchDepth) {563rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);564return;565}566567branch = &s->Branches[s->CurrentBranchDepth - 1];568branch->Else = newip;569570/* actual instruction is filled in at ENDIF time */571break;572573case RC_OPCODE_ENDIF:574if (!s->CurrentBranchDepth) {575rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);576return;577}578579branch = &s->Branches[s->CurrentBranchDepth - 1];580branch->Endif = newip;581582s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP583| R500_FC_A_OP_NONE /* no address stack */584| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */585| R500_FC_B_OP0_DECR /* decrement branch counter if stay */586| R500_FC_B_OP1_NONE /* no branch counter if stay */587| R500_FC_B_POP_CNT(1)588;589s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);590s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP591| R500_FC_A_OP_NONE /* no address stack */592| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */593| R500_FC_B_OP0_INCR /* increment branch counter if stay */594| R500_FC_IGNORE_UNCOVERED595;596597if (branch->Else >= 0) {598/* increment branch counter also if jump */599s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;600s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);601602s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP603| R500_FC_A_OP_NONE /* no address stack */604| R500_FC_B_ELSE /* all active pixels want to jump */605| R500_FC_B_OP0_NONE /* no counter op if stay */606| R500_FC_B_OP1_DECR /* decrement branch counter if jump */607| R500_FC_B_POP_CNT(1)608;609s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);610} else {611/* don't touch branch counter on jump */612s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;613s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);614}615616617s->CurrentBranchDepth--;618break;619default:620rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);621}622}623624void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)625{626struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;627struct emit_state s;628struct r500_fragment_program_code *code = &compiler->code->code.r500;629630memset(&s, 0, sizeof(s));631s.C = &compiler->Base;632s.Code = code;633634memset(code, 0, sizeof(*code));635code->max_temp_idx = 1;636code->inst_end = -1;637638for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;639inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;640inst = inst->Next) {641if (inst->Type == RC_INSTRUCTION_NORMAL) {642const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);643644if (opcode->IsFlowControl) {645emit_flowcontrol(&s, inst);646} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {647continue;648} else {649emit_tex(compiler, &inst->U.I);650}651} else {652emit_paired(compiler, &inst->U.P);653}654}655656if (code->max_temp_idx >= compiler->Base.max_temp_regs)657rc_error(&compiler->Base, "Too many hardware temporaries used");658659if (compiler->Base.Error)660return;661662if (code->inst_end == -1 ||663(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {664int ip;665666/* This may happen when dead-code elimination is disabled or667* when most of the fragment program logic is leading to a KIL */668if (code->inst_end >= compiler->Base.max_alu_insts-1) {669rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");670return;671}672673ip = ++code->inst_end;674code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;675}676677/* Make sure TEX_SEM_WAIT is set on the last instruction */678code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;679680/* Enable full flow control mode if we are using loops or have if681* statements nested at least four deep. */682if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {683if (code->max_temp_idx < 1)684code->max_temp_idx = 1;685686code->us_fc_ctrl |= R500_FC_FULL_FC_EN;687}688}689690691