Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
4574 views
#include <strings.h>1#include "pipe/p_context.h"2#include "pipe/p_defines.h"3#include "pipe/p_state.h"4#include "util/compiler.h"5#include "util/u_dynarray.h"6#include "util/u_debug.h"7#include "util/u_memory.h"89#include "pipe/p_shader_tokens.h"10#include "tgsi/tgsi_parse.h"11#include "tgsi/tgsi_dump.h"12#include "tgsi/tgsi_util.h"13#include "tgsi/tgsi_ureg.h"1415#include "draw/draw_context.h"1617#include "nv_object.xml.h"18#include "nouveau_debug.h"19#include "nv30/nv30-40_3d.xml.h"20#include "nv30/nv30_state.h"2122/* TODO (at least...):23* 1. Indexed consts + ARL24* 3. NV_vp11, NV_vp2, NV_vp3 features25* - extra arith opcodes26* - branching27* - texture sampling28* - indexed attribs29* - indexed results30* 4. bugs31*/3233#include "nv30/nv30_vertprog.h"34#include "nv30/nv40_vertprog.h"3536struct nvfx_loop_entry {37unsigned brk_target;38unsigned cont_target;39};4041struct nvfx_vpc {42struct pipe_shader_state pipe;43struct nv30_vertprog *vp;44struct tgsi_shader_info* info;4546struct nv30_vertprog_exec *vpi;4748unsigned r_temps;49unsigned r_temps_discard;50struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];51struct nvfx_reg *r_address;52struct nvfx_reg *r_temp;53struct nvfx_reg *r_const;54struct nvfx_reg r_0_1;5556struct nvfx_reg *imm;57unsigned nr_imm;5859int hpos_idx;60int cvtx_idx;6162unsigned is_nv4x;6364struct util_dynarray label_relocs;65struct util_dynarray loop_stack;66};6768static struct nvfx_reg69temp(struct nvfx_vpc *vpc)70{71int idx = ffs(~vpc->r_temps) - 1;7273if (idx < 0 || (!vpc->is_nv4x && idx >= 16)) {74NOUVEAU_ERR("out of temps!!\n");75return nvfx_reg(NVFXSR_TEMP, 0);76}7778vpc->r_temps |= (1 << idx);79vpc->r_temps_discard |= (1 << idx);80return nvfx_reg(NVFXSR_TEMP, idx);81}8283static inline void84release_temps(struct nvfx_vpc *vpc)85{86vpc->r_temps &= ~vpc->r_temps_discard;87vpc->r_temps_discard = 0;88}8990static struct nvfx_reg91constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)92{93struct nv30_vertprog *vp = vpc->vp;94struct nv30_vertprog_data *vpd;95int idx;9697if (pipe >= 0) {98for (idx = 0; idx < vp->nr_consts; idx++) {99if (vp->consts[idx].index == pipe)100return nvfx_reg(NVFXSR_CONST, idx);101}102}103104idx = vp->nr_consts++;105vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);106vpd = &vp->consts[idx];107108vpd->index = pipe;109vpd->value[0] = x;110vpd->value[1] = y;111vpd->value[2] = z;112vpd->value[3] = w;113return nvfx_reg(NVFXSR_CONST, idx);114}115116#define arith(s,t,o,d,m,s0,s1,s2) \117nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2))118119static void120emit_src(struct nvfx_vpc *vpc, uint32_t *hw,121int pos, struct nvfx_src src)122{123struct nv30_vertprog *vp = vpc->vp;124uint32_t sr = 0;125struct nvfx_relocation reloc;126127switch (src.reg.type) {128case NVFXSR_TEMP:129sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));130sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));131break;132case NVFXSR_INPUT:133sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<134NVFX_VP(SRC_REG_TYPE_SHIFT));135vp->ir |= (1 << src.reg.index);136hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));137break;138case NVFXSR_CONST:139sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<140NVFX_VP(SRC_REG_TYPE_SHIFT));141if (src.reg.index < 256 && src.reg.index >= -256) {142reloc.location = vp->nr_insns - 1;143reloc.target = src.reg.index;144util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);145} else {146hw[1] |= (src.reg.index << NVFX_VP(INST_CONST_SRC_SHIFT)) &147NVFX_VP(INST_CONST_SRC_MASK);148}149break;150case NVFXSR_NONE:151sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<152NVFX_VP(SRC_REG_TYPE_SHIFT));153break;154default:155assert(0);156}157158if (src.negate)159sr |= NVFX_VP(SRC_NEGATE);160161if (src.abs)162hw[0] |= (1 << (21 + pos));163164sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |165(src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |166(src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |167(src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));168169if(src.indirect) {170if(src.reg.type == NVFXSR_CONST)171hw[3] |= NVFX_VP(INST_INDEX_CONST);172else if(src.reg.type == NVFXSR_INPUT)173hw[0] |= NVFX_VP(INST_INDEX_INPUT);174else175assert(0);176177if(src.indirect_reg)178hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1);179hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT);180}181182switch (pos) {183case 0:184hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>185NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);186hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<187NVFX_VP(INST_SRC0L_SHIFT);188break;189case 1:190hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);191break;192case 2:193hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>194NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);195hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<196NVFX_VP(INST_SRC2L_SHIFT);197break;198default:199assert(0);200}201}202203static void204emit_dst(struct nvfx_vpc *vpc, uint32_t *hw,205int slot, struct nvfx_reg dst)206{207struct nv30_vertprog *vp = vpc->vp;208209switch (dst.type) {210case NVFXSR_NONE:211if(!vpc->is_nv4x)212hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;213else {214hw[3] |= NV40_VP_INST_DEST_MASK;215if (slot == 0)216hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;217else218hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;219}220break;221case NVFXSR_TEMP:222if(!vpc->is_nv4x)223hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);224else {225hw[3] |= NV40_VP_INST_DEST_MASK;226if (slot == 0)227hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);228else229hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);230}231break;232case NVFXSR_OUTPUT:233/* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */234if(vpc->is_nv4x) {235switch (dst.index) {236case NV30_VP_INST_DEST_CLP(0):237dst.index = NVFX_VP(INST_DEST_FOGC);238vp->or |= (1 << 6);239break;240case NV30_VP_INST_DEST_CLP(1):241dst.index = NVFX_VP(INST_DEST_FOGC);242vp->or |= (1 << 7);243break;244case NV30_VP_INST_DEST_CLP(2):245dst.index = NVFX_VP(INST_DEST_FOGC);246vp->or |= (1 << 8);247break;248case NV30_VP_INST_DEST_CLP(3):249dst.index = NVFX_VP(INST_DEST_PSZ);250vp->or |= (1 << 9);251break;252case NV30_VP_INST_DEST_CLP(4):253dst.index = NVFX_VP(INST_DEST_PSZ);254vp->or |= (1 << 10);255break;256case NV30_VP_INST_DEST_CLP(5):257dst.index = NVFX_VP(INST_DEST_PSZ);258vp->or |= (1 << 11);259break;260case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break;261case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break;262case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break;263case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break;264case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;265case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;266}267}268269if(!vpc->is_nv4x) {270hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);271hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;272273/*XXX: no way this is entirely correct, someone needs to274* figure out what exactly it is.275*/276hw[3] |= 0x800;277} else {278hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);279if (slot == 0) {280hw[0] |= NV40_VP_INST_VEC_RESULT;281hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;282} else {283hw[3] |= NV40_VP_INST_SCA_RESULT;284hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;285}286}287break;288default:289assert(0);290}291}292293static void294nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)295{296struct nv30_vertprog *vp = vpc->vp;297unsigned slot = insn.op >> 7;298unsigned op = insn.op & 0x7f;299uint32_t *hw;300301vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));302vpc->vpi = &vp->insns[vp->nr_insns - 1];303memset(vpc->vpi, 0, sizeof(*vpc->vpi));304305hw = vpc->vpi->data;306307if (insn.cc_test != NVFX_COND_TR)308hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE);309hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));310hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |311(insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |312(insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |313(insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));314if(insn.cc_update)315hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);316317if(insn.sat) {318assert(vpc->is_nv4x);319if(vpc->is_nv4x)320hw[0] |= NV40_VP_INST_SATURATE;321}322323if(!vpc->is_nv4x) {324if(slot == 0)325hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);326else {327hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);328hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);329}330// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);331// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));332333if (insn.dst.type == NVFXSR_OUTPUT) {334if (slot)335hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);336else337hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);338} else {339if (slot)340hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);341else342hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);343}344} else {345if (slot == 0) {346hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);347hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;348hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);349} else {350hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);351hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;352hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);353}354}355356emit_dst(vpc, hw, slot, insn.dst);357emit_src(vpc, hw, 0, insn.src[0]);358emit_src(vpc, hw, 1, insn.src[1]);359emit_src(vpc, hw, 2, insn.src[2]);360361// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL)362// hw[3] |= NV40_VP_INST_SCA_RESULT;363}364365static inline struct nvfx_src366tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {367struct nvfx_src src;368369switch (fsrc->Register.File) {370case TGSI_FILE_INPUT:371src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);372break;373case TGSI_FILE_CONSTANT:374if(fsrc->Register.Indirect) {375src.reg = vpc->r_const[0];376src.reg.index = fsrc->Register.Index;377} else {378src.reg = vpc->r_const[fsrc->Register.Index];379}380break;381case TGSI_FILE_IMMEDIATE:382src.reg = vpc->imm[fsrc->Register.Index];383break;384case TGSI_FILE_TEMPORARY:385src.reg = vpc->r_temp[fsrc->Register.Index];386break;387default:388NOUVEAU_ERR("bad src file\n");389src.reg.index = 0;390src.reg.type = -1;391break;392}393394src.abs = fsrc->Register.Absolute;395src.negate = fsrc->Register.Negate;396src.swz[0] = fsrc->Register.SwizzleX;397src.swz[1] = fsrc->Register.SwizzleY;398src.swz[2] = fsrc->Register.SwizzleZ;399src.swz[3] = fsrc->Register.SwizzleW;400src.indirect = 0;401src.indirect_reg = 0;402src.indirect_swz = 0;403404if(fsrc->Register.Indirect) {405if(fsrc->Indirect.File == TGSI_FILE_ADDRESS &&406(fsrc->Register.File == TGSI_FILE_CONSTANT ||407fsrc->Register.File == TGSI_FILE_INPUT)) {408src.indirect = 1;409src.indirect_reg = fsrc->Indirect.Index;410src.indirect_swz = fsrc->Indirect.Swizzle;411} else {412src.reg.index = 0;413src.reg.type = -1;414}415}416417return src;418}419420static inline struct nvfx_reg421tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {422struct nvfx_reg dst;423424switch (fdst->Register.File) {425case TGSI_FILE_NULL:426dst = nvfx_reg(NVFXSR_NONE, 0);427break;428case TGSI_FILE_OUTPUT:429dst = vpc->r_result[fdst->Register.Index];430break;431case TGSI_FILE_TEMPORARY:432dst = vpc->r_temp[fdst->Register.Index];433break;434case TGSI_FILE_ADDRESS:435dst = vpc->r_address[fdst->Register.Index];436break;437default:438NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);439dst.index = 0;440dst.type = 0;441break;442}443444return dst;445}446447static inline int448tgsi_mask(uint tgsi)449{450int mask = 0;451452if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;453if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;454if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;455if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;456return mask;457}458459static bool460nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,461unsigned idx, const struct tgsi_full_instruction *finst)462{463struct nvfx_src src[3], tmp;464struct nvfx_reg dst;465struct nvfx_reg final_dst;466struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));467struct nvfx_insn insn;468struct nvfx_relocation reloc;469struct nvfx_loop_entry loop;470bool sat = false;471int mask;472int ai = -1, ci = -1, ii = -1;473int i;474unsigned sub_depth = 0;475476for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {477const struct tgsi_full_src_register *fsrc;478479fsrc = &finst->Src[i];480if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {481src[i] = tgsi_src(vpc, fsrc);482}483}484485for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {486const struct tgsi_full_src_register *fsrc;487488fsrc = &finst->Src[i];489490switch (fsrc->Register.File) {491case TGSI_FILE_INPUT:492if (ai == -1 || ai == fsrc->Register.Index) {493ai = fsrc->Register.Index;494src[i] = tgsi_src(vpc, fsrc);495} else {496src[i] = nvfx_src(temp(vpc));497nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,498tgsi_src(vpc, fsrc), none, none));499}500break;501case TGSI_FILE_CONSTANT:502if ((ci == -1 && ii == -1) ||503ci == fsrc->Register.Index) {504ci = fsrc->Register.Index;505src[i] = tgsi_src(vpc, fsrc);506} else {507src[i] = nvfx_src(temp(vpc));508nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,509tgsi_src(vpc, fsrc), none, none));510}511break;512case TGSI_FILE_IMMEDIATE:513if ((ci == -1 && ii == -1) ||514ii == fsrc->Register.Index) {515ii = fsrc->Register.Index;516src[i] = tgsi_src(vpc, fsrc);517} else {518src[i] = nvfx_src(temp(vpc));519nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,520tgsi_src(vpc, fsrc), none, none));521}522break;523case TGSI_FILE_TEMPORARY:524/* handled above */525break;526default:527NOUVEAU_ERR("bad src file\n");528return false;529}530}531532for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {533if(src[i].reg.type < 0)534return false;535}536537if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&538finst->Instruction.Opcode != TGSI_OPCODE_ARL)539return false;540541final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]);542mask = tgsi_mask(finst->Dst[0].Register.WriteMask);543if(finst->Instruction.Saturate) {544assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);545if (vpc->is_nv4x)546sat = true;547else548if(dst.type != NVFXSR_TEMP)549dst = temp(vpc);550}551552switch (finst->Instruction.Opcode) {553case TGSI_OPCODE_ADD:554nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1]));555break;556case TGSI_OPCODE_ARL:557nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none));558break;559case TGSI_OPCODE_CEIL:560tmp = nvfx_src(temp(vpc));561nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none));562nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none));563break;564case TGSI_OPCODE_CMP:565insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);566insn.cc_update = 1;567nvfx_vp_emit(vpc, insn);568569insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none);570insn.cc_test = NVFX_COND_GE;571nvfx_vp_emit(vpc, insn);572573insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none);574insn.cc_test = NVFX_COND_LT;575nvfx_vp_emit(vpc, insn);576break;577case TGSI_OPCODE_COS:578nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0]));579break;580case TGSI_OPCODE_DP2:581tmp = nvfx_src(temp(vpc));582nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));583nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y)));584break;585case TGSI_OPCODE_DP3:586nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none));587break;588case TGSI_OPCODE_DP4:589nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));590break;591case TGSI_OPCODE_DST:592nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));593break;594case TGSI_OPCODE_EX2:595nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0]));596break;597case TGSI_OPCODE_EXP:598nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0]));599break;600case TGSI_OPCODE_FLR:601nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none));602break;603case TGSI_OPCODE_FRC:604nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none));605break;606case TGSI_OPCODE_LG2:607nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0]));608break;609case TGSI_OPCODE_LIT:610nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0]));611break;612case TGSI_OPCODE_LOG:613nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0]));614break;615case TGSI_OPCODE_LRP:616tmp = nvfx_src(temp(vpc));617nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));618nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp));619break;620case TGSI_OPCODE_MAD:621nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2]));622break;623case TGSI_OPCODE_MAX:624nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none));625break;626case TGSI_OPCODE_MIN:627nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none));628break;629case TGSI_OPCODE_MOV:630nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none));631break;632case TGSI_OPCODE_MUL:633nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none));634break;635case TGSI_OPCODE_NOP:636break;637case TGSI_OPCODE_POW:638tmp = nvfx_src(temp(vpc));639nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));640nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));641nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));642break;643case TGSI_OPCODE_RCP:644nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0]));645break;646case TGSI_OPCODE_RSQ:647nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0])));648break;649case TGSI_OPCODE_SEQ:650nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none));651break;652case TGSI_OPCODE_SGE:653nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none));654break;655case TGSI_OPCODE_SGT:656nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none));657break;658case TGSI_OPCODE_SIN:659nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0]));660break;661case TGSI_OPCODE_SLE:662nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none));663break;664case TGSI_OPCODE_SLT:665nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none));666break;667case TGSI_OPCODE_SNE:668nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none));669break;670case TGSI_OPCODE_SSG:671nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none));672break;673case TGSI_OPCODE_TRUNC:674tmp = nvfx_src(temp(vpc));675insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);676insn.cc_update = 1;677nvfx_vp_emit(vpc, insn);678679nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));680nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none));681682insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none);683insn.cc_test = NVFX_COND_LT;684nvfx_vp_emit(vpc, insn);685break;686case TGSI_OPCODE_IF:687insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);688insn.cc_update = 1;689nvfx_vp_emit(vpc, insn);690691reloc.location = vpc->vp->nr_insns;692reloc.target = finst->Label.Label + 1;693util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);694695insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);696insn.cc_test = NVFX_COND_EQ;697insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;698nvfx_vp_emit(vpc, insn);699break;700case TGSI_OPCODE_ELSE:701case TGSI_OPCODE_CAL:702reloc.location = vpc->vp->nr_insns;703reloc.target = finst->Label.Label;704util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);705706if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)707insn = arith(0, SCA, CAL, none.reg, 0, none, none, none);708else709insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);710nvfx_vp_emit(vpc, insn);711break;712case TGSI_OPCODE_RET:713if(sub_depth || !vpc->vp->enabled_ucps) {714tmp = none;715tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;716nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp));717} else {718reloc.location = vpc->vp->nr_insns;719reloc.target = vpc->info->num_instructions;720util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);721nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));722}723break;724case TGSI_OPCODE_BGNSUB:725++sub_depth;726break;727case TGSI_OPCODE_ENDSUB:728--sub_depth;729break;730case TGSI_OPCODE_ENDIF:731/* nothing to do here */732break;733case TGSI_OPCODE_BGNLOOP:734loop.cont_target = idx;735loop.brk_target = finst->Label.Label + 1;736util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);737break;738case TGSI_OPCODE_ENDLOOP:739loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);740741reloc.location = vpc->vp->nr_insns;742reloc.target = loop.cont_target;743util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);744745nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));746break;747case TGSI_OPCODE_CONT:748loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);749750reloc.location = vpc->vp->nr_insns;751reloc.target = loop.cont_target;752util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);753754nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));755break;756case TGSI_OPCODE_BRK:757loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);758759reloc.location = vpc->vp->nr_insns;760reloc.target = loop.brk_target;761util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);762763nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));764break;765case TGSI_OPCODE_END:766assert(!sub_depth);767if(vpc->vp->enabled_ucps) {768if(idx != (vpc->info->num_instructions - 1)) {769reloc.location = vpc->vp->nr_insns;770reloc.target = vpc->info->num_instructions;771util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);772nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));773}774} else {775if(vpc->vp->nr_insns)776vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;777nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none));778vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;779}780break;781default:782NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);783return false;784}785786if(finst->Instruction.Saturate && !vpc->is_nv4x) {787if (!vpc->r_0_1.type)788vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0);789nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none));790nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none));791}792793release_temps(vpc);794return true;795}796797static bool798nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,799const struct tgsi_full_declaration *fdec)800{801unsigned num_texcoords = vpc->is_nv4x ? 10 : 8;802unsigned idx = fdec->Range.First;803unsigned semantic_index = fdec->Semantic.Index;804int hw = 0, i;805806switch (fdec->Semantic.Name) {807case TGSI_SEMANTIC_POSITION:808hw = NVFX_VP(INST_DEST_POS);809vpc->hpos_idx = idx;810break;811case TGSI_SEMANTIC_CLIPVERTEX:812vpc->r_result[idx] = temp(vpc);813vpc->r_temps_discard = 0;814vpc->cvtx_idx = idx;815return true;816case TGSI_SEMANTIC_COLOR:817if (fdec->Semantic.Index == 0) {818hw = NVFX_VP(INST_DEST_COL0);819} else820if (fdec->Semantic.Index == 1) {821hw = NVFX_VP(INST_DEST_COL1);822} else {823NOUVEAU_ERR("bad colour semantic index\n");824return false;825}826break;827case TGSI_SEMANTIC_BCOLOR:828if (fdec->Semantic.Index == 0) {829hw = NVFX_VP(INST_DEST_BFC0);830} else831if (fdec->Semantic.Index == 1) {832hw = NVFX_VP(INST_DEST_BFC1);833} else {834NOUVEAU_ERR("bad bcolour semantic index\n");835return false;836}837break;838case TGSI_SEMANTIC_FOG:839hw = NVFX_VP(INST_DEST_FOGC);840break;841case TGSI_SEMANTIC_PSIZE:842hw = NVFX_VP(INST_DEST_PSZ);843break;844case TGSI_SEMANTIC_GENERIC:845/* this is really an identifier for VP/FP linkage */846semantic_index += 8;847FALLTHROUGH;848case TGSI_SEMANTIC_TEXCOORD:849for (i = 0; i < num_texcoords; i++) {850if (vpc->vp->texcoord[i] == semantic_index) {851hw = NVFX_VP(INST_DEST_TC(i));852break;853}854}855856if (i == num_texcoords) {857vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);858return true;859}860break;861case TGSI_SEMANTIC_EDGEFLAG:862vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);863return true;864default:865NOUVEAU_ERR("bad output semantic\n");866return false;867}868869vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);870return true;871}872873static bool874nvfx_vertprog_prepare(struct nvfx_vpc *vpc)875{876struct tgsi_parse_context p;877int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i;878879tgsi_parse_init(&p, vpc->pipe.tokens);880while (!tgsi_parse_end_of_tokens(&p)) {881const union tgsi_full_token *tok = &p.FullToken;882883tgsi_parse_token(&p);884switch(tok->Token.Type) {885case TGSI_TOKEN_TYPE_IMMEDIATE:886nr_imm++;887break;888case TGSI_TOKEN_TYPE_DECLARATION:889{890const struct tgsi_full_declaration *fdec;891892fdec = &p.FullToken.FullDeclaration;893switch (fdec->Declaration.File) {894case TGSI_FILE_TEMPORARY:895if (fdec->Range.Last > high_temp) {896high_temp =897fdec->Range.Last;898}899break;900case TGSI_FILE_ADDRESS:901if (fdec->Range.Last > high_addr) {902high_addr =903fdec->Range.Last;904}905break;906case TGSI_FILE_CONSTANT:907if (fdec->Range.Last > high_const) {908high_const =909fdec->Range.Last;910}911break;912case TGSI_FILE_OUTPUT:913if (!nvfx_vertprog_parse_decl_output(vpc, fdec))914return false;915break;916default:917break;918}919}920break;921default:922break;923}924}925tgsi_parse_free(&p);926927if (nr_imm) {928vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));929assert(vpc->imm);930}931932if (++high_temp) {933vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));934for (i = 0; i < high_temp; i++)935vpc->r_temp[i] = temp(vpc);936}937938if (++high_addr) {939vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));940for (i = 0; i < high_addr; i++)941vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i);942}943944if(++high_const) {945vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg));946for (i = 0; i < high_const; i++)947vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0);948}949950vpc->r_temps_discard = 0;951return true;952}953954DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", false)955956bool957_nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)958{959struct tgsi_parse_context parse;960struct nvfx_vpc *vpc = NULL;961struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));962struct util_dynarray insns;963int i, ucps;964965vp->translated = false;966vp->nr_insns = 0;967vp->nr_consts = 0;968969vpc = CALLOC_STRUCT(nvfx_vpc);970if (!vpc)971return false;972vpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;973vpc->vp = vp;974vpc->pipe = vp->pipe;975vpc->info = &vp->info;976vpc->cvtx_idx = -1;977978if (!nvfx_vertprog_prepare(vpc)) {979FREE(vpc);980return false;981}982983/* Redirect post-transform vertex position to a temp if user clip984* planes are enabled. We need to append code to the vtxprog985* to handle clip planes later.986*/987if (vp->enabled_ucps && vpc->cvtx_idx < 0) {988vpc->r_result[vpc->hpos_idx] = temp(vpc);989vpc->r_temps_discard = 0;990vpc->cvtx_idx = vpc->hpos_idx;991}992993util_dynarray_init(&insns, NULL);994995tgsi_parse_init(&parse, vp->pipe.tokens);996while (!tgsi_parse_end_of_tokens(&parse)) {997tgsi_parse_token(&parse);998999switch (parse.FullToken.Token.Type) {1000case TGSI_TOKEN_TYPE_IMMEDIATE:1001{1002const struct tgsi_full_immediate *imm;10031004imm = &parse.FullToken.FullImmediate;1005assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);1006assert(imm->Immediate.NrTokens == 4 + 1);1007vpc->imm[vpc->nr_imm++] =1008constant(vpc, -1,1009imm->u[0].Float,1010imm->u[1].Float,1011imm->u[2].Float,1012imm->u[3].Float);1013}1014break;1015case TGSI_TOKEN_TYPE_INSTRUCTION:1016{1017const struct tgsi_full_instruction *finst;1018unsigned idx = insns.size >> 2;1019util_dynarray_append(&insns, unsigned, vp->nr_insns);1020finst = &parse.FullToken.FullInstruction;1021if (!nvfx_vertprog_parse_instruction(vpc, idx, finst))1022goto out;1023}1024break;1025default:1026break;1027}1028}10291030util_dynarray_append(&insns, unsigned, vp->nr_insns);10311032for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))1033{1034struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);1035struct nvfx_relocation hw_reloc;10361037hw_reloc.location = label_reloc->location;1038hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];10391040//debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);10411042util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);1043}1044util_dynarray_fini(&insns);1045util_dynarray_trim(&vp->branch_relocs);10461047/* XXX: what if we add a RET before?! make sure we jump here...*/10481049/* Write out HPOS if it was redirected to a temp earlier */1050if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {1051struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,1052NVFX_VP(INST_DEST_POS));1053struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);10541055nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));1056}10571058/* Insert code to handle user clip planes */1059ucps = vp->enabled_ucps;1060while (ucps) {1061int i = ffs(ucps) - 1; ucps &= ~(1 << i);1062struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));1063struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i));1064struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]);1065unsigned mask;10661067if(vpc->is_nv4x)1068{1069switch (i) {1070case 0: case 3: mask = NVFX_VP_MASK_Y; break;1071case 1: case 4: mask = NVFX_VP_MASK_Z; break;1072case 2: case 5: mask = NVFX_VP_MASK_W; break;1073default:1074NOUVEAU_ERR("invalid clip dist #%d\n", i);1075goto out;1076}1077}1078else1079mask = NVFX_VP_MASK_X;10801081nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none));1082}10831084if (vpc->vp->nr_insns)1085vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;10861087if(debug_get_option_nvfx_dump_vp())1088{1089debug_printf("\n");1090tgsi_dump(vpc->pipe.tokens, 0);10911092debug_printf("\n%s vertex program:\n", vpc->is_nv4x ? "nv4x" : "nv3x");1093for (i = 0; i < vp->nr_insns; i++)1094debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);1095debug_printf("\n");1096}10971098vp->translated = true;10991100out:1101tgsi_parse_free(&parse);1102if (vpc) {1103util_dynarray_fini(&vpc->label_relocs);1104util_dynarray_fini(&vpc->loop_stack);1105FREE(vpc->r_temp);1106FREE(vpc->r_address);1107FREE(vpc->r_const);1108FREE(vpc->imm);1109FREE(vpc);1110}11111112return vp->translated;1113}111411151116