Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
4574 views
/*1* Copyright (C) 2005 Ben Skeggs.2*3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sublicense, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial15* portions of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25*/2627/**28* \file29*30* Emit the r300_fragment_program_code that can be understood by the hardware.31* Input is a pre-transformed radeon_program.32*33* \author Ben Skeggs <[email protected]>34*35* \author Jerome Glisse <[email protected]>36*/3738#include "r300_fragprog.h"3940#include "r300_reg.h"4142#include "radeon_program_pair.h"43#include "r300_fragprog_swizzle.h"4445#include "util/compiler.h"464748struct r300_emit_state {49struct r300_fragment_program_compiler * compiler;5051unsigned current_node : 2;52unsigned node_first_tex : 8;53unsigned node_first_alu : 8;54uint32_t node_flags;55};5657#define PROG_CODE \58struct r300_fragment_program_compiler *c = emit->compiler; \59struct r300_fragment_program_code *code = &c->code->code.r3006061#define error(fmt, args...) do { \62rc_error(&c->Base, "%s::%s(): " fmt "\n", \63__FILE__, __FUNCTION__, ##args); \64} while(0)6566static unsigned int get_msbs_alu(unsigned int bits)67{68return (bits >> 6) & 0x7;69}7071/**72* @param lsbs The number of least significant bits73*/74static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)75{76return (bits >> lsbs) & 0x15;77}7879#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)8081/**82* Mark a temporary register as used.83*/84static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)85{86if (index > code->pixsize)87code->pixsize = index;88}8990static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)91{92if (!src.Used)93return 0;9495if (src.File == RC_FILE_CONSTANT) {96return src.Index | (1 << 5);97} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {98use_temporary(code, src.Index);99return src.Index & 0x1f;100}101102return 0;103}104105106static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)107{108switch(opcode) {109case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;110case RC_OPCODE_CND: return R300_ALU_OUTC_CND;111case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;112case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;113case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;114default:115error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);116FALLTHROUGH;117case RC_OPCODE_NOP:118FALLTHROUGH;119case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;120case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;121case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;122case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;123}124}125126static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)127{128switch(opcode) {129case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;130case RC_OPCODE_CND: return R300_ALU_OUTA_CND;131case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;132case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;133case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;134case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;135case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;136default:137error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);138FALLTHROUGH;139case RC_OPCODE_NOP:140FALLTHROUGH;141case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;142case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;143case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;144case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;145case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;146}147}148149/**150* Emit one paired ALU instruction.151*/152static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)153{154int ip;155int j;156PROG_CODE;157158if (code->alu.length >= c->Base.max_alu_insts) {159error("Too many ALU instructions");160return 0;161}162163ip = code->alu.length++;164165code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);166code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);167168for(j = 0; j < 3; ++j) {169/* Set the RGB address */170unsigned int src = use_source(code, inst->RGB.Src[j]);171unsigned int arg;172if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)173code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);174175code->alu.inst[ip].rgb_addr |= src << (6*j);176177/* Set the Alpha address */178src = use_source(code, inst->Alpha.Src[j]);179if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)180code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);181182code->alu.inst[ip].alpha_addr |= src << (6*j);183184arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);185arg |= inst->RGB.Arg[j].Abs << 6;186arg |= inst->RGB.Arg[j].Negate << 5;187code->alu.inst[ip].rgb_inst |= arg << (7*j);188189arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);190arg |= inst->Alpha.Arg[j].Abs << 6;191arg |= inst->Alpha.Arg[j].Negate << 5;192code->alu.inst[ip].alpha_inst |= arg << (7*j);193}194195/* Presubtract */196if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {197switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {198case RC_PRESUB_BIAS:199code->alu.inst[ip].rgb_inst |=200R300_ALU_SRCP_1_MINUS_2_SRC0;201break;202case RC_PRESUB_ADD:203code->alu.inst[ip].rgb_inst |=204R300_ALU_SRCP_SRC1_PLUS_SRC0;205break;206case RC_PRESUB_SUB:207code->alu.inst[ip].rgb_inst |=208R300_ALU_SRCP_SRC1_MINUS_SRC0;209break;210case RC_PRESUB_INV:211code->alu.inst[ip].rgb_inst |=212R300_ALU_SRCP_1_MINUS_SRC0;213break;214default:215break;216}217}218219if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {220switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {221case RC_PRESUB_BIAS:222code->alu.inst[ip].alpha_inst |=223R300_ALU_SRCP_1_MINUS_2_SRC0;224break;225case RC_PRESUB_ADD:226code->alu.inst[ip].alpha_inst |=227R300_ALU_SRCP_SRC1_PLUS_SRC0;228break;229case RC_PRESUB_SUB:230code->alu.inst[ip].alpha_inst |=231R300_ALU_SRCP_SRC1_MINUS_SRC0;232break;233case RC_PRESUB_INV:234code->alu.inst[ip].alpha_inst |=235R300_ALU_SRCP_1_MINUS_SRC0;236break;237default:238break;239}240}241242if (inst->RGB.Saturate)243code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;244if (inst->Alpha.Saturate)245code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;246247if (inst->RGB.WriteMask) {248use_temporary(code, inst->RGB.DestIndex);249if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)250code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;251code->alu.inst[ip].rgb_addr |=252((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |253(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);254}255if (inst->RGB.OutputWriteMask) {256code->alu.inst[ip].rgb_addr |=257(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |258R300_RGB_TARGET(inst->RGB.Target);259emit->node_flags |= R300_RGBA_OUT;260}261262if (inst->Alpha.WriteMask) {263use_temporary(code, inst->Alpha.DestIndex);264if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)265code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;266code->alu.inst[ip].alpha_addr |=267((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |268R300_ALU_DSTA_REG;269}270if (inst->Alpha.OutputWriteMask) {271code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |272R300_ALPHA_TARGET(inst->Alpha.Target);273emit->node_flags |= R300_RGBA_OUT;274}275if (inst->Alpha.DepthWriteMask) {276code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;277emit->node_flags |= R300_W_OUT;278c->code->writes_depth = 1;279}280if (inst->Nop)281code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;282283/* Handle Output Modifier284* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */285if (inst->RGB.Omod) {286if (inst->RGB.Omod == RC_OMOD_DISABLE) {287rc_error(&c->Base, "RC_OMOD_DISABLE not supported");288}289code->alu.inst[ip].rgb_inst |=290(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);291}292if (inst->Alpha.Omod) {293if (inst->Alpha.Omod == RC_OMOD_DISABLE) {294rc_error(&c->Base, "RC_OMOD_DISABLE not supported");295}296code->alu.inst[ip].alpha_inst |=297(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);298}299return 1;300}301302303/**304* Finish the current node without advancing to the next one.305*/306static int finish_node(struct r300_emit_state * emit)307{308struct r300_fragment_program_compiler * c = emit->compiler;309struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;310unsigned alu_offset;311unsigned alu_end;312unsigned tex_offset;313unsigned tex_end;314315unsigned int alu_offset_msbs, alu_end_msbs;316317if (code->alu.length == emit->node_first_alu) {318/* Generate a single NOP for this node */319struct rc_pair_instruction inst;320memset(&inst, 0, sizeof(inst));321if (!emit_alu(emit, &inst))322return 0;323}324325alu_offset = emit->node_first_alu;326alu_end = code->alu.length - alu_offset - 1;327tex_offset = emit->node_first_tex;328tex_end = code->tex.length - tex_offset - 1;329330if (code->tex.length == emit->node_first_tex) {331if (emit->current_node > 0) {332error("Node %i has no TEX instructions", emit->current_node);333return 0;334}335336tex_end = 0;337} else {338if (emit->current_node == 0)339code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;340}341342/* Write the config register.343* Note: The order in which the words for each node are written344* is not correct here and needs to be fixed up once we're entirely345* done346*347* Also note that the register specification from AMD is slightly348* incorrect in its description of this register. */349code->code_addr[emit->current_node] =350((alu_offset << R300_ALU_START_SHIFT)351& R300_ALU_START_MASK)352| ((alu_end << R300_ALU_SIZE_SHIFT)353& R300_ALU_SIZE_MASK)354| ((tex_offset << R300_TEX_START_SHIFT)355& R300_TEX_START_MASK)356| ((tex_end << R300_TEX_SIZE_SHIFT)357& R300_TEX_SIZE_MASK)358| emit->node_flags359| (get_msbs_tex(tex_offset, 5)360<< R400_TEX_START_MSB_SHIFT)361| (get_msbs_tex(tex_end, 5)362<< R400_TEX_SIZE_MSB_SHIFT)363;364365/* Write r400 extended instruction fields. These will be ignored on366* r300 cards. */367alu_offset_msbs = get_msbs_alu(alu_offset);368alu_end_msbs = get_msbs_alu(alu_end);369switch(emit->current_node) {370case 0:371code->r400_code_offset_ext |=372alu_offset_msbs << R400_ALU_START3_MSB_SHIFT373| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;374break;375case 1:376code->r400_code_offset_ext |=377alu_offset_msbs << R400_ALU_START2_MSB_SHIFT378| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;379break;380case 2:381code->r400_code_offset_ext |=382alu_offset_msbs << R400_ALU_START1_MSB_SHIFT383| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;384break;385case 3:386code->r400_code_offset_ext |=387alu_offset_msbs << R400_ALU_START0_MSB_SHIFT388| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;389break;390}391return 1;392}393394395/**396* Begin a block of texture instructions.397* Create the necessary indirection.398*/399static int begin_tex(struct r300_emit_state * emit)400{401PROG_CODE;402403if (code->alu.length == emit->node_first_alu &&404code->tex.length == emit->node_first_tex) {405return 1;406}407408if (emit->current_node == 3) {409error("Too many texture indirections");410return 0;411}412413if (!finish_node(emit))414return 0;415416emit->current_node++;417emit->node_first_tex = code->tex.length;418emit->node_first_alu = code->alu.length;419emit->node_flags = 0;420return 1;421}422423424static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)425{426unsigned int unit;427unsigned int dest;428unsigned int opcode;429PROG_CODE;430431if (code->tex.length >= emit->compiler->Base.max_tex_insts) {432error("Too many TEX instructions");433return 0;434}435436unit = inst->U.I.TexSrcUnit;437dest = inst->U.I.DstReg.Index;438439switch(inst->U.I.Opcode) {440case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;441case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;442case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;443case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;444default:445error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);446return 0;447}448449if (inst->U.I.Opcode == RC_OPCODE_KIL) {450unit = 0;451dest = 0;452} else {453use_temporary(code, dest);454}455456use_temporary(code, inst->U.I.SrcReg[0].Index);457458code->tex.inst[code->tex.length++] =459((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)460& R300_SRC_ADDR_MASK)461| ((dest << R300_DST_ADDR_SHIFT)462& R300_DST_ADDR_MASK)463| (unit << R300_TEX_ID_SHIFT)464| (opcode << R300_TEX_INST_SHIFT)465| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?466R400_SRC_ADDR_EXT_BIT : 0)467| (dest >= R300_PFS_NUM_TEMP_REGS ?468R400_DST_ADDR_EXT_BIT : 0)469;470return 1;471}472473474/**475* Final compilation step: Turn the intermediate radeon_program into476* machine-readable instructions.477*/478void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)479{480struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;481struct r300_emit_state emit;482struct r300_fragment_program_code *code = &compiler->code->code.r300;483unsigned int tex_end;484485memset(&emit, 0, sizeof(emit));486emit.compiler = compiler;487488memset(code, 0, sizeof(struct r300_fragment_program_code));489490for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;491inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;492inst = inst->Next) {493if (inst->Type == RC_INSTRUCTION_NORMAL) {494if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {495begin_tex(&emit);496continue;497}498499emit_tex(&emit, inst);500} else {501emit_alu(&emit, &inst->U.P);502}503}504505if (code->pixsize >= compiler->Base.max_temp_regs)506rc_error(&compiler->Base, "Too many hardware temporaries used.\n");507508if (compiler->Base.Error)509return;510511/* Finish the program */512finish_node(&emit);513514code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */515516/* Set r400 extended instruction fields. These values will be ignored517* on r300 cards. */518code->r400_code_offset_ext |=519(get_msbs_alu(0)520<< R400_ALU_OFFSET_MSB_SHIFT)521| (get_msbs_alu(code->alu.length - 1)522<< R400_ALU_SIZE_MSB_SHIFT);523524tex_end = code->tex.length ? code->tex.length - 1 : 0;525code->code_offset =526((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)527& R300_PFS_CNTL_ALU_OFFSET_MASK)528| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)529& R300_PFS_CNTL_ALU_END_MASK)530| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)531& R300_PFS_CNTL_TEX_OFFSET_MASK)532| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)533& R300_PFS_CNTL_TEX_END_MASK)534| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)535| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)536;537538if (emit.current_node < 3) {539int shift = 3 - emit.current_node;540int i;541for(i = emit.current_node; i >= 0; --i)542code->code_addr[shift + i] = code->code_addr[i];543for(i = 0; i < shift; ++i)544code->code_addr[i] = 0;545}546547if (code->pixsize >= R300_PFS_NUM_TEMP_REGS548|| code->alu.length > R300_PFS_MAX_ALU_INST549|| code->tex.length > R300_PFS_MAX_TEX_INST) {550551code->r390_mode = 1;552}553}554555556