Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "tgsi/tgsi_build.h"23#include "tgsi/tgsi_dump.h"24#include "tgsi/tgsi_scan.h"25#include "tgsi/tgsi_util.h"2627#include <set>2829#include "codegen/nv50_ir.h"30#include "codegen/nv50_ir_from_common.h"31#include "codegen/nv50_ir_util.h"3233namespace tgsi {3435class Source;3637static nv50_ir::operation translateOpcode(uint opcode);38static nv50_ir::DataFile translateFile(uint file);39static nv50_ir::TexTarget translateTexture(uint texTarg);40static nv50_ir::SVSemantic translateSysVal(uint sysval);41static nv50_ir::CacheMode translateCacheMode(uint qualifier);4243class Instruction44{45public:46Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }4748class SrcRegister49{50public:51SrcRegister(const struct tgsi_full_src_register *src)52: reg(src->Register),53fsr(src)54{ }5556SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }5758SrcRegister(const struct tgsi_ind_register& ind)59: reg(tgsi_util_get_src_from_ind(&ind)),60fsr(NULL)61{ }6263struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)64{65struct tgsi_src_register reg;66memset(®, 0, sizeof(reg));67reg.Index = off.Index;68reg.File = off.File;69reg.SwizzleX = off.SwizzleX;70reg.SwizzleY = off.SwizzleY;71reg.SwizzleZ = off.SwizzleZ;72return reg;73}7475SrcRegister(const struct tgsi_texture_offset& off) :76reg(offsetToSrc(off)),77fsr(NULL)78{ }7980uint getFile() const { return reg.File; }8182bool is2D() const { return reg.Dimension; }8384bool isIndirect(int dim) const85{86return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;87}8889int getIndex(int dim) const90{91return (dim && fsr) ? fsr->Dimension.Index : reg.Index;92}9394int getSwizzle(int chan) const95{96return tgsi_util_get_src_register_swizzle(®, chan);97}9899int getArrayId() const100{101if (isIndirect(0))102return fsr->Indirect.ArrayID;103return 0;104}105106nv50_ir::Modifier getMod(int chan) const;107108SrcRegister getIndirect(int dim) const109{110assert(fsr && isIndirect(dim));111if (dim)112return SrcRegister(fsr->DimIndirect);113return SrcRegister(fsr->Indirect);114}115116uint32_t getValueU32(int c, const uint32_t *data) const117{118assert(reg.File == TGSI_FILE_IMMEDIATE);119assert(!reg.Absolute);120assert(!reg.Negate);121return data[reg.Index * 4 + getSwizzle(c)];122}123124private:125const struct tgsi_src_register reg;126const struct tgsi_full_src_register *fsr;127};128129class DstRegister130{131public:132DstRegister(const struct tgsi_full_dst_register *dst)133: reg(dst->Register),134fdr(dst)135{ }136137DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }138139uint getFile() const { return reg.File; }140141bool is2D() const { return reg.Dimension; }142143bool isIndirect(int dim) const144{145return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;146}147148int getIndex(int dim) const149{150return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;151}152153unsigned int getMask() const { return reg.WriteMask; }154155bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }156157SrcRegister getIndirect(int dim) const158{159assert(fdr && isIndirect(dim));160if (dim)161return SrcRegister(fdr->DimIndirect);162return SrcRegister(fdr->Indirect);163}164165struct tgsi_full_src_register asSrc()166{167assert(fdr);168return tgsi_full_src_register_from_dst(fdr);169}170171int getArrayId() const172{173if (isIndirect(0))174return fdr->Indirect.ArrayID;175return 0;176}177178private:179const struct tgsi_dst_register reg;180const struct tgsi_full_dst_register *fdr;181};182183inline uint getOpcode() const { return insn->Instruction.Opcode; }184185unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }186unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }187188// mask of used components of source s189unsigned int srcMask(unsigned int s) const;190unsigned int texOffsetMask() const;191192SrcRegister getSrc(unsigned int s) const193{194assert(s < srcCount());195return SrcRegister(&insn->Src[s]);196}197198DstRegister getDst(unsigned int d) const199{200assert(d < dstCount());201return DstRegister(&insn->Dst[d]);202}203204SrcRegister getTexOffset(unsigned int i) const205{206assert(i < TGSI_FULL_MAX_TEX_OFFSETS);207return SrcRegister(insn->TexOffsets[i]);208}209210unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }211212bool checkDstSrcAliasing() const;213214inline nv50_ir::operation getOP() const {215return translateOpcode(getOpcode()); }216217nv50_ir::DataType inferSrcType() const;218nv50_ir::DataType inferDstType() const;219220nv50_ir::CondCode getSetCond() const;221222nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;223224const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {225return nv50_ir::TexInstruction::translateImgFormat((enum pipe_format)insn->Memory.Format);226}227228nv50_ir::TexTarget getImageTarget() const {229return translateTexture(insn->Memory.Texture);230}231232nv50_ir::CacheMode getCacheMode() const {233if (!insn->Instruction.Memory)234return nv50_ir::CACHE_CA;235return translateCacheMode(insn->Memory.Qualifier);236}237238inline uint getLabel() { return insn->Label.Label; }239240unsigned getSaturate() const { return insn->Instruction.Saturate; }241242void print() const243{244tgsi_dump_instruction(insn, 1);245}246247private:248const struct tgsi_full_instruction *insn;249};250251unsigned int Instruction::texOffsetMask() const252{253const struct tgsi_instruction_texture *tex = &insn->Texture;254assert(insn->Instruction.Texture);255256switch (tex->Texture) {257case TGSI_TEXTURE_BUFFER:258case TGSI_TEXTURE_1D:259case TGSI_TEXTURE_SHADOW1D:260case TGSI_TEXTURE_1D_ARRAY:261case TGSI_TEXTURE_SHADOW1D_ARRAY:262return 0x1;263case TGSI_TEXTURE_2D:264case TGSI_TEXTURE_SHADOW2D:265case TGSI_TEXTURE_2D_ARRAY:266case TGSI_TEXTURE_SHADOW2D_ARRAY:267case TGSI_TEXTURE_RECT:268case TGSI_TEXTURE_SHADOWRECT:269case TGSI_TEXTURE_2D_MSAA:270case TGSI_TEXTURE_2D_ARRAY_MSAA:271return 0x3;272case TGSI_TEXTURE_3D:273return 0x7;274default:275assert(!"Unexpected texture target");276return 0xf;277}278}279280unsigned int Instruction::srcMask(unsigned int s) const281{282unsigned int mask = insn->Dst[0].Register.WriteMask;283284switch (insn->Instruction.Opcode) {285case TGSI_OPCODE_COS:286case TGSI_OPCODE_SIN:287return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);288case TGSI_OPCODE_DP2:289return 0x3;290case TGSI_OPCODE_DP3:291return 0x7;292case TGSI_OPCODE_DP4:293case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */294return 0xf;295case TGSI_OPCODE_DST:296return mask & (s ? 0xa : 0x6);297case TGSI_OPCODE_EX2:298case TGSI_OPCODE_EXP:299case TGSI_OPCODE_LG2:300case TGSI_OPCODE_LOG:301case TGSI_OPCODE_POW:302case TGSI_OPCODE_RCP:303case TGSI_OPCODE_RSQ:304return 0x1;305case TGSI_OPCODE_IF:306case TGSI_OPCODE_UIF:307return 0x1;308case TGSI_OPCODE_LIT:309return 0xb;310case TGSI_OPCODE_TEX2:311case TGSI_OPCODE_TXB2:312case TGSI_OPCODE_TXL2:313return (s == 0) ? 0xf : 0x3;314case TGSI_OPCODE_TEX:315case TGSI_OPCODE_TXB:316case TGSI_OPCODE_TXD:317case TGSI_OPCODE_TXL:318case TGSI_OPCODE_TXP:319case TGSI_OPCODE_TXF:320case TGSI_OPCODE_TG4:321case TGSI_OPCODE_TEX_LZ:322case TGSI_OPCODE_TXF_LZ:323case TGSI_OPCODE_LODQ:324{325const struct tgsi_instruction_texture *tex = &insn->Texture;326327assert(insn->Instruction.Texture);328329mask = 0x7;330if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&331insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&332insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&333insn->Instruction.Opcode != TGSI_OPCODE_TXD)334mask |= 0x8; /* bias, lod or proj */335336switch (tex->Texture) {337case TGSI_TEXTURE_1D:338mask &= 0x9;339break;340case TGSI_TEXTURE_SHADOW1D:341mask &= 0xd;342break;343case TGSI_TEXTURE_1D_ARRAY:344case TGSI_TEXTURE_2D:345case TGSI_TEXTURE_RECT:346mask &= 0xb;347break;348case TGSI_TEXTURE_CUBE_ARRAY:349case TGSI_TEXTURE_SHADOW2D_ARRAY:350case TGSI_TEXTURE_SHADOWCUBE:351case TGSI_TEXTURE_SHADOWCUBE_ARRAY:352mask |= 0x8;353break;354default:355break;356}357}358return mask;359case TGSI_OPCODE_TXQ:360return 1;361case TGSI_OPCODE_D2I:362case TGSI_OPCODE_D2U:363case TGSI_OPCODE_D2F:364case TGSI_OPCODE_DSLT:365case TGSI_OPCODE_DSGE:366case TGSI_OPCODE_DSEQ:367case TGSI_OPCODE_DSNE:368case TGSI_OPCODE_U64SEQ:369case TGSI_OPCODE_U64SNE:370case TGSI_OPCODE_I64SLT:371case TGSI_OPCODE_U64SLT:372case TGSI_OPCODE_I64SGE:373case TGSI_OPCODE_U64SGE:374case TGSI_OPCODE_I642F:375case TGSI_OPCODE_U642F:376switch (util_bitcount(mask)) {377case 1: return 0x3;378case 2: return 0xf;379default:380assert(!"unexpected mask");381return 0xf;382}383case TGSI_OPCODE_I2D:384case TGSI_OPCODE_U2D:385case TGSI_OPCODE_F2D: {386unsigned int x = 0;387if ((mask & 0x3) == 0x3)388x |= 1;389if ((mask & 0xc) == 0xc)390x |= 2;391return x;392}393case TGSI_OPCODE_PK2H:394return 0x3;395case TGSI_OPCODE_UP2H:396return 0x1;397default:398break;399}400401return mask;402}403404nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const405{406nv50_ir::Modifier m(0);407408if (reg.Absolute)409m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);410if (reg.Negate)411m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);412return m;413}414415static nv50_ir::DataFile translateFile(uint file)416{417switch (file) {418case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST;419case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT;420case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT;421case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR;422case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS;423case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;424case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;425case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;426case TGSI_FILE_IMAGE: return nv50_ir::FILE_MEMORY_GLOBAL;427case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;428case TGSI_FILE_SAMPLER:429case TGSI_FILE_NULL:430default:431return nv50_ir::FILE_NULL;432}433}434435static nv50_ir::SVSemantic translateSysVal(uint sysval)436{437switch (sysval) {438case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE;439case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE;440case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID;441case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;442case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID;443case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID;444case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;445case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;446case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;447case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;448case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;449case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;450case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;451case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;452case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;453case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;454case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;455case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;456case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;457case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;458case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;459case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;460case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;461case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;462case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;463case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;464case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;465case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;466default:467assert(0);468return nv50_ir::SV_CLOCK;469}470}471472#define NV50_IR_TEX_TARG_CASE(a, b) \473case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;474475static nv50_ir::TexTarget translateTexture(uint tex)476{477switch (tex) {478NV50_IR_TEX_TARG_CASE(1D, 1D);479NV50_IR_TEX_TARG_CASE(2D, 2D);480NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);481NV50_IR_TEX_TARG_CASE(3D, 3D);482NV50_IR_TEX_TARG_CASE(CUBE, CUBE);483NV50_IR_TEX_TARG_CASE(RECT, RECT);484NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);485NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);486NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);487NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);488NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);489NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);490NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);491NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);492NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);493NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);494NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);495NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);496497case TGSI_TEXTURE_UNKNOWN:498default:499assert(!"invalid texture target");500return nv50_ir::TEX_TARGET_2D;501}502}503504static nv50_ir::CacheMode translateCacheMode(uint qualifier)505{506if (qualifier & TGSI_MEMORY_VOLATILE)507return nv50_ir::CACHE_CV;508if (qualifier & TGSI_MEMORY_COHERENT)509return nv50_ir::CACHE_CG;510return nv50_ir::CACHE_CA;511}512513nv50_ir::DataType Instruction::inferSrcType() const514{515switch (getOpcode()) {516case TGSI_OPCODE_UIF:517case TGSI_OPCODE_AND:518case TGSI_OPCODE_OR:519case TGSI_OPCODE_XOR:520case TGSI_OPCODE_NOT:521case TGSI_OPCODE_SHL:522case TGSI_OPCODE_U2F:523case TGSI_OPCODE_U2D:524case TGSI_OPCODE_U2I64:525case TGSI_OPCODE_UADD:526case TGSI_OPCODE_UDIV:527case TGSI_OPCODE_UMOD:528case TGSI_OPCODE_UMAD:529case TGSI_OPCODE_UMUL:530case TGSI_OPCODE_UMUL_HI:531case TGSI_OPCODE_UMAX:532case TGSI_OPCODE_UMIN:533case TGSI_OPCODE_USEQ:534case TGSI_OPCODE_USGE:535case TGSI_OPCODE_USLT:536case TGSI_OPCODE_USNE:537case TGSI_OPCODE_USHR:538case TGSI_OPCODE_ATOMUADD:539case TGSI_OPCODE_ATOMXCHG:540case TGSI_OPCODE_ATOMCAS:541case TGSI_OPCODE_ATOMAND:542case TGSI_OPCODE_ATOMOR:543case TGSI_OPCODE_ATOMXOR:544case TGSI_OPCODE_ATOMUMIN:545case TGSI_OPCODE_ATOMUMAX:546case TGSI_OPCODE_ATOMDEC_WRAP:547case TGSI_OPCODE_ATOMINC_WRAP:548case TGSI_OPCODE_UBFE:549case TGSI_OPCODE_UMSB:550case TGSI_OPCODE_UP2H:551case TGSI_OPCODE_VOTE_ALL:552case TGSI_OPCODE_VOTE_ANY:553case TGSI_OPCODE_VOTE_EQ:554return nv50_ir::TYPE_U32;555case TGSI_OPCODE_I2F:556case TGSI_OPCODE_I2D:557case TGSI_OPCODE_I2I64:558case TGSI_OPCODE_IDIV:559case TGSI_OPCODE_IMUL_HI:560case TGSI_OPCODE_IMAX:561case TGSI_OPCODE_IMIN:562case TGSI_OPCODE_IABS:563case TGSI_OPCODE_INEG:564case TGSI_OPCODE_ISGE:565case TGSI_OPCODE_ISHR:566case TGSI_OPCODE_ISLT:567case TGSI_OPCODE_ISSG:568case TGSI_OPCODE_MOD:569case TGSI_OPCODE_UARL:570case TGSI_OPCODE_ATOMIMIN:571case TGSI_OPCODE_ATOMIMAX:572case TGSI_OPCODE_IBFE:573case TGSI_OPCODE_IMSB:574return nv50_ir::TYPE_S32;575case TGSI_OPCODE_D2F:576case TGSI_OPCODE_D2I:577case TGSI_OPCODE_D2U:578case TGSI_OPCODE_D2I64:579case TGSI_OPCODE_D2U64:580case TGSI_OPCODE_DABS:581case TGSI_OPCODE_DNEG:582case TGSI_OPCODE_DADD:583case TGSI_OPCODE_DMUL:584case TGSI_OPCODE_DDIV:585case TGSI_OPCODE_DMAX:586case TGSI_OPCODE_DMIN:587case TGSI_OPCODE_DSLT:588case TGSI_OPCODE_DSGE:589case TGSI_OPCODE_DSEQ:590case TGSI_OPCODE_DSNE:591case TGSI_OPCODE_DRCP:592case TGSI_OPCODE_DSQRT:593case TGSI_OPCODE_DMAD:594case TGSI_OPCODE_DFMA:595case TGSI_OPCODE_DFRAC:596case TGSI_OPCODE_DRSQ:597case TGSI_OPCODE_DTRUNC:598case TGSI_OPCODE_DCEIL:599case TGSI_OPCODE_DFLR:600case TGSI_OPCODE_DROUND:601return nv50_ir::TYPE_F64;602case TGSI_OPCODE_U64SEQ:603case TGSI_OPCODE_U64SNE:604case TGSI_OPCODE_U64SLT:605case TGSI_OPCODE_U64SGE:606case TGSI_OPCODE_U64MIN:607case TGSI_OPCODE_U64MAX:608case TGSI_OPCODE_U64ADD:609case TGSI_OPCODE_U64MUL:610case TGSI_OPCODE_U64SHL:611case TGSI_OPCODE_U64SHR:612case TGSI_OPCODE_U64DIV:613case TGSI_OPCODE_U64MOD:614case TGSI_OPCODE_U642F:615case TGSI_OPCODE_U642D:616return nv50_ir::TYPE_U64;617case TGSI_OPCODE_I64ABS:618case TGSI_OPCODE_I64SSG:619case TGSI_OPCODE_I64NEG:620case TGSI_OPCODE_I64SLT:621case TGSI_OPCODE_I64SGE:622case TGSI_OPCODE_I64MIN:623case TGSI_OPCODE_I64MAX:624case TGSI_OPCODE_I64SHR:625case TGSI_OPCODE_I64DIV:626case TGSI_OPCODE_I64MOD:627case TGSI_OPCODE_I642F:628case TGSI_OPCODE_I642D:629return nv50_ir::TYPE_S64;630default:631return nv50_ir::TYPE_F32;632}633}634635nv50_ir::DataType Instruction::inferDstType() const636{637switch (getOpcode()) {638case TGSI_OPCODE_D2U:639case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;640case TGSI_OPCODE_D2I:641case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;642case TGSI_OPCODE_FSEQ:643case TGSI_OPCODE_FSGE:644case TGSI_OPCODE_FSLT:645case TGSI_OPCODE_FSNE:646case TGSI_OPCODE_DSEQ:647case TGSI_OPCODE_DSGE:648case TGSI_OPCODE_DSLT:649case TGSI_OPCODE_DSNE:650case TGSI_OPCODE_I64SLT:651case TGSI_OPCODE_I64SGE:652case TGSI_OPCODE_U64SEQ:653case TGSI_OPCODE_U64SNE:654case TGSI_OPCODE_U64SLT:655case TGSI_OPCODE_U64SGE:656case TGSI_OPCODE_PK2H:657return nv50_ir::TYPE_U32;658case TGSI_OPCODE_I2F:659case TGSI_OPCODE_U2F:660case TGSI_OPCODE_D2F:661case TGSI_OPCODE_I642F:662case TGSI_OPCODE_U642F:663case TGSI_OPCODE_UP2H:664return nv50_ir::TYPE_F32;665case TGSI_OPCODE_I2D:666case TGSI_OPCODE_U2D:667case TGSI_OPCODE_F2D:668case TGSI_OPCODE_I642D:669case TGSI_OPCODE_U642D:670return nv50_ir::TYPE_F64;671case TGSI_OPCODE_I2I64:672case TGSI_OPCODE_U2I64:673case TGSI_OPCODE_F2I64:674case TGSI_OPCODE_D2I64:675return nv50_ir::TYPE_S64;676case TGSI_OPCODE_F2U64:677case TGSI_OPCODE_D2U64:678return nv50_ir::TYPE_U64;679default:680return inferSrcType();681}682}683684nv50_ir::CondCode Instruction::getSetCond() const685{686using namespace nv50_ir;687688switch (getOpcode()) {689case TGSI_OPCODE_SLT:690case TGSI_OPCODE_ISLT:691case TGSI_OPCODE_USLT:692case TGSI_OPCODE_FSLT:693case TGSI_OPCODE_DSLT:694case TGSI_OPCODE_I64SLT:695case TGSI_OPCODE_U64SLT:696return CC_LT;697case TGSI_OPCODE_SLE:698return CC_LE;699case TGSI_OPCODE_SGE:700case TGSI_OPCODE_ISGE:701case TGSI_OPCODE_USGE:702case TGSI_OPCODE_FSGE:703case TGSI_OPCODE_DSGE:704case TGSI_OPCODE_I64SGE:705case TGSI_OPCODE_U64SGE:706return CC_GE;707case TGSI_OPCODE_SGT:708return CC_GT;709case TGSI_OPCODE_SEQ:710case TGSI_OPCODE_USEQ:711case TGSI_OPCODE_FSEQ:712case TGSI_OPCODE_DSEQ:713case TGSI_OPCODE_U64SEQ:714return CC_EQ;715case TGSI_OPCODE_SNE:716case TGSI_OPCODE_FSNE:717case TGSI_OPCODE_DSNE:718case TGSI_OPCODE_U64SNE:719return CC_NEU;720case TGSI_OPCODE_USNE:721return CC_NE;722default:723return CC_ALWAYS;724}725}726727#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b728729static nv50_ir::operation translateOpcode(uint opcode)730{731switch (opcode) {732NV50_IR_OPCODE_CASE(ARL, SHL);733NV50_IR_OPCODE_CASE(MOV, MOV);734735NV50_IR_OPCODE_CASE(RCP, RCP);736NV50_IR_OPCODE_CASE(RSQ, RSQ);737NV50_IR_OPCODE_CASE(SQRT, SQRT);738739NV50_IR_OPCODE_CASE(MUL, MUL);740NV50_IR_OPCODE_CASE(ADD, ADD);741742NV50_IR_OPCODE_CASE(MIN, MIN);743NV50_IR_OPCODE_CASE(MAX, MAX);744NV50_IR_OPCODE_CASE(SLT, SET);745NV50_IR_OPCODE_CASE(SGE, SET);746NV50_IR_OPCODE_CASE(MAD, MAD);747NV50_IR_OPCODE_CASE(FMA, FMA);748749NV50_IR_OPCODE_CASE(FLR, FLOOR);750NV50_IR_OPCODE_CASE(ROUND, CVT);751NV50_IR_OPCODE_CASE(EX2, EX2);752NV50_IR_OPCODE_CASE(LG2, LG2);753NV50_IR_OPCODE_CASE(POW, POW);754755NV50_IR_OPCODE_CASE(COS, COS);756NV50_IR_OPCODE_CASE(DDX, DFDX);757NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);758NV50_IR_OPCODE_CASE(DDY, DFDY);759NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);760NV50_IR_OPCODE_CASE(KILL, DISCARD);761NV50_IR_OPCODE_CASE(DEMOTE, DISCARD);762763NV50_IR_OPCODE_CASE(SEQ, SET);764NV50_IR_OPCODE_CASE(SGT, SET);765NV50_IR_OPCODE_CASE(SIN, SIN);766NV50_IR_OPCODE_CASE(SLE, SET);767NV50_IR_OPCODE_CASE(SNE, SET);768NV50_IR_OPCODE_CASE(TEX, TEX);769NV50_IR_OPCODE_CASE(TXD, TXD);770NV50_IR_OPCODE_CASE(TXP, TEX);771772NV50_IR_OPCODE_CASE(CAL, CALL);773NV50_IR_OPCODE_CASE(RET, RET);774NV50_IR_OPCODE_CASE(CMP, SLCT);775776NV50_IR_OPCODE_CASE(TXB, TXB);777778NV50_IR_OPCODE_CASE(DIV, DIV);779780NV50_IR_OPCODE_CASE(TXL, TXL);781NV50_IR_OPCODE_CASE(TEX_LZ, TXL);782783NV50_IR_OPCODE_CASE(CEIL, CEIL);784NV50_IR_OPCODE_CASE(I2F, CVT);785NV50_IR_OPCODE_CASE(NOT, NOT);786NV50_IR_OPCODE_CASE(TRUNC, TRUNC);787NV50_IR_OPCODE_CASE(SHL, SHL);788789NV50_IR_OPCODE_CASE(AND, AND);790NV50_IR_OPCODE_CASE(OR, OR);791NV50_IR_OPCODE_CASE(MOD, MOD);792NV50_IR_OPCODE_CASE(XOR, XOR);793NV50_IR_OPCODE_CASE(TXF, TXF);794NV50_IR_OPCODE_CASE(TXF_LZ, TXF);795NV50_IR_OPCODE_CASE(TXQ, TXQ);796NV50_IR_OPCODE_CASE(TXQS, TXQ);797NV50_IR_OPCODE_CASE(TG4, TXG);798NV50_IR_OPCODE_CASE(LODQ, TXLQ);799800NV50_IR_OPCODE_CASE(EMIT, EMIT);801NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);802803NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);804805NV50_IR_OPCODE_CASE(F2I, CVT);806NV50_IR_OPCODE_CASE(FSEQ, SET);807NV50_IR_OPCODE_CASE(FSGE, SET);808NV50_IR_OPCODE_CASE(FSLT, SET);809NV50_IR_OPCODE_CASE(FSNE, SET);810NV50_IR_OPCODE_CASE(IDIV, DIV);811NV50_IR_OPCODE_CASE(IMAX, MAX);812NV50_IR_OPCODE_CASE(IMIN, MIN);813NV50_IR_OPCODE_CASE(IABS, ABS);814NV50_IR_OPCODE_CASE(INEG, NEG);815NV50_IR_OPCODE_CASE(ISGE, SET);816NV50_IR_OPCODE_CASE(ISHR, SHR);817NV50_IR_OPCODE_CASE(ISLT, SET);818NV50_IR_OPCODE_CASE(F2U, CVT);819NV50_IR_OPCODE_CASE(U2F, CVT);820NV50_IR_OPCODE_CASE(UADD, ADD);821NV50_IR_OPCODE_CASE(UDIV, DIV);822NV50_IR_OPCODE_CASE(UMAD, MAD);823NV50_IR_OPCODE_CASE(UMAX, MAX);824NV50_IR_OPCODE_CASE(UMIN, MIN);825NV50_IR_OPCODE_CASE(UMOD, MOD);826NV50_IR_OPCODE_CASE(UMUL, MUL);827NV50_IR_OPCODE_CASE(USEQ, SET);828NV50_IR_OPCODE_CASE(USGE, SET);829NV50_IR_OPCODE_CASE(USHR, SHR);830NV50_IR_OPCODE_CASE(USLT, SET);831NV50_IR_OPCODE_CASE(USNE, SET);832833NV50_IR_OPCODE_CASE(DABS, ABS);834NV50_IR_OPCODE_CASE(DNEG, NEG);835NV50_IR_OPCODE_CASE(DADD, ADD);836NV50_IR_OPCODE_CASE(DMUL, MUL);837NV50_IR_OPCODE_CASE(DDIV, DIV);838NV50_IR_OPCODE_CASE(DMAX, MAX);839NV50_IR_OPCODE_CASE(DMIN, MIN);840NV50_IR_OPCODE_CASE(DSLT, SET);841NV50_IR_OPCODE_CASE(DSGE, SET);842NV50_IR_OPCODE_CASE(DSEQ, SET);843NV50_IR_OPCODE_CASE(DSNE, SET);844NV50_IR_OPCODE_CASE(DRCP, RCP);845NV50_IR_OPCODE_CASE(DSQRT, SQRT);846NV50_IR_OPCODE_CASE(DMAD, MAD);847NV50_IR_OPCODE_CASE(DFMA, FMA);848NV50_IR_OPCODE_CASE(D2I, CVT);849NV50_IR_OPCODE_CASE(D2U, CVT);850NV50_IR_OPCODE_CASE(I2D, CVT);851NV50_IR_OPCODE_CASE(U2D, CVT);852NV50_IR_OPCODE_CASE(DRSQ, RSQ);853NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);854NV50_IR_OPCODE_CASE(DCEIL, CEIL);855NV50_IR_OPCODE_CASE(DFLR, FLOOR);856NV50_IR_OPCODE_CASE(DROUND, CVT);857858NV50_IR_OPCODE_CASE(U64SEQ, SET);859NV50_IR_OPCODE_CASE(U64SNE, SET);860NV50_IR_OPCODE_CASE(U64SLT, SET);861NV50_IR_OPCODE_CASE(U64SGE, SET);862NV50_IR_OPCODE_CASE(I64SLT, SET);863NV50_IR_OPCODE_CASE(I64SGE, SET);864NV50_IR_OPCODE_CASE(I2I64, CVT);865NV50_IR_OPCODE_CASE(U2I64, CVT);866NV50_IR_OPCODE_CASE(F2I64, CVT);867NV50_IR_OPCODE_CASE(F2U64, CVT);868NV50_IR_OPCODE_CASE(D2I64, CVT);869NV50_IR_OPCODE_CASE(D2U64, CVT);870NV50_IR_OPCODE_CASE(I642F, CVT);871NV50_IR_OPCODE_CASE(U642F, CVT);872NV50_IR_OPCODE_CASE(I642D, CVT);873NV50_IR_OPCODE_CASE(U642D, CVT);874875NV50_IR_OPCODE_CASE(I64MIN, MIN);876NV50_IR_OPCODE_CASE(U64MIN, MIN);877NV50_IR_OPCODE_CASE(I64MAX, MAX);878NV50_IR_OPCODE_CASE(U64MAX, MAX);879NV50_IR_OPCODE_CASE(I64ABS, ABS);880NV50_IR_OPCODE_CASE(I64NEG, NEG);881NV50_IR_OPCODE_CASE(U64ADD, ADD);882NV50_IR_OPCODE_CASE(U64MUL, MUL);883NV50_IR_OPCODE_CASE(U64SHL, SHL);884NV50_IR_OPCODE_CASE(I64SHR, SHR);885NV50_IR_OPCODE_CASE(U64SHR, SHR);886887NV50_IR_OPCODE_CASE(IMUL_HI, MUL);888NV50_IR_OPCODE_CASE(UMUL_HI, MUL);889890NV50_IR_OPCODE_CASE(SAMPLE, TEX);891NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);892NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);893NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);894NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);895NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);896NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);897NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);898NV50_IR_OPCODE_CASE(GATHER4, TXG);899NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);900901NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);902NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);903NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);904NV50_IR_OPCODE_CASE(ATOMAND, ATOM);905NV50_IR_OPCODE_CASE(ATOMOR, ATOM);906NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);907NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);908NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);909NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);910NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);911NV50_IR_OPCODE_CASE(ATOMFADD, ATOM);912NV50_IR_OPCODE_CASE(ATOMDEC_WRAP, ATOM);913NV50_IR_OPCODE_CASE(ATOMINC_WRAP, ATOM);914915NV50_IR_OPCODE_CASE(TEX2, TEX);916NV50_IR_OPCODE_CASE(TXB2, TXB);917NV50_IR_OPCODE_CASE(TXL2, TXL);918919NV50_IR_OPCODE_CASE(IBFE, EXTBF);920NV50_IR_OPCODE_CASE(UBFE, EXTBF);921NV50_IR_OPCODE_CASE(BFI, INSBF);922NV50_IR_OPCODE_CASE(BREV, EXTBF);923NV50_IR_OPCODE_CASE(POPC, POPCNT);924NV50_IR_OPCODE_CASE(LSB, BFIND);925NV50_IR_OPCODE_CASE(IMSB, BFIND);926NV50_IR_OPCODE_CASE(UMSB, BFIND);927928NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);929NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);930NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);931932NV50_IR_OPCODE_CASE(BALLOT, VOTE);933NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);934NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);935936NV50_IR_OPCODE_CASE(END, EXIT);937938default:939return nv50_ir::OP_NOP;940}941}942943static uint16_t opcodeToSubOp(uint opcode)944{945switch (opcode) {946case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;947case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;948case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS;949case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND;950case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR;951case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR;952case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;953case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;954case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;955case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;956case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD;957case TGSI_OPCODE_ATOMDEC_WRAP: return NV50_IR_SUBOP_ATOM_DEC;958case TGSI_OPCODE_ATOMINC_WRAP: return NV50_IR_SUBOP_ATOM_INC;959case TGSI_OPCODE_IMUL_HI:960case TGSI_OPCODE_UMUL_HI:961return NV50_IR_SUBOP_MUL_HIGH;962case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;963case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;964case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;965default:966return 0;967}968}969970bool Instruction::checkDstSrcAliasing() const971{972if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory973return false;974975for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {976if (insn->Src[s].Register.File == TGSI_FILE_NULL)977break;978if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&979insn->Src[s].Register.Index == insn->Dst[0].Register.Index)980return true;981}982return false;983}984985class Source986{987public:988Source(struct nv50_ir_prog_info *, struct nv50_ir_prog_info_out *, nv50_ir::Program *);989~Source();990991public:992bool scanSource();993unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }994995public:996struct tgsi_shader_info scan;997struct tgsi_full_instruction *insns;998const struct tgsi_token *tokens;999struct nv50_ir_prog_info *info;1000struct nv50_ir_prog_info_out *info_out;10011002nv50_ir::DynArray tempArrays;1003nv50_ir::DynArray immdArrays;10041005typedef nv50_ir::BuildUtil::Location Location;1006// these registers are per-subroutine, cannot be used for parameter passing1007std::set<Location> locals;10081009std::set<int> indirectTempArrays;1010std::map<int, int> indirectTempOffsets;1011std::map<int, std::pair<int, int> > tempArrayInfo;1012std::vector<int> tempArrayId;10131014std::map<int, int> bufferIds;1015std::map<int, int> imageIds;10161017int clipVertexOutput;10181019struct TextureView {1020uint8_t target; // TGSI_TEXTURE_*1021};1022std::vector<TextureView> textureViews;10231024/*1025struct Resource {1026uint8_t target; // TGSI_TEXTURE_*1027bool raw;1028uint8_t slot; // $surface index1029};1030std::vector<Resource> resources;1031*/10321033struct MemoryFile {1034uint8_t mem_type; // TGSI_MEMORY_TYPE_*1035};1036std::vector<MemoryFile> memoryFiles;10371038std::vector<bool> bufferAtomics;10391040struct {1041uint16_t count; /* count of inline immediates */1042uint32_t *data; /* inline immediate data */1043} immd;10441045private:1046int gmemSlot;1047nv50_ir::Program *prog;1048int inferSysValDirection(unsigned sn) const;1049bool scanDeclaration(const struct tgsi_full_declaration *);1050bool scanInstruction(const struct tgsi_full_instruction *);1051void scanInstructionSrc(const Instruction& insn,1052const Instruction::SrcRegister& src,1053unsigned mask);1054void scanProperty(const struct tgsi_full_property *);1055void scanImmediate(const struct tgsi_full_immediate *);10561057inline bool isEdgeFlagPassthrough(const Instruction&) const;1058};10591060Source::Source(struct nv50_ir_prog_info *info, struct nv50_ir_prog_info_out *info_out,1061nv50_ir::Program *prog)1062: insns(NULL), info(info), info_out(info_out), clipVertexOutput(-1),1063gmemSlot(0), prog(prog)1064{1065tokens = (const struct tgsi_token *)info->bin.source;10661067if (info->dbgFlags & NV50_IR_DEBUG_BASIC)1068tgsi_dump(tokens, 0);10691070tgsi_scan_shader(tokens, &scan);10711072immd.count = 0;1073immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);1074}10751076Source::~Source()1077{1078if (insns)1079FREE(insns);10801081if (immd.data)1082FREE(immd.data);1083}10841085bool Source::scanSource()1086{1087unsigned insnCount = 0;1088struct tgsi_parse_context parse;10891090insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *1091sizeof(insns[0]));1092if (!insns)1093return false;10941095textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);1096//resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);1097tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);1098memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);1099bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1);11001101info_out->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;1102info_out->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;1103info_out->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;11041105if (info->type == PIPE_SHADER_FRAGMENT) {1106info_out->prop.fp.writesDepth = scan.writes_z;1107info_out->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;1108} else1109if (info->type == PIPE_SHADER_GEOMETRY) {1110info_out->prop.gp.instanceCount = 1; // default value1111}11121113info->io.viewportId = -1;11141115tgsi_parse_init(&parse, tokens);1116while (!tgsi_parse_end_of_tokens(&parse)) {1117tgsi_parse_token(&parse);11181119switch (parse.FullToken.Token.Type) {1120case TGSI_TOKEN_TYPE_IMMEDIATE:1121scanImmediate(&parse.FullToken.FullImmediate);1122break;1123case TGSI_TOKEN_TYPE_DECLARATION:1124scanDeclaration(&parse.FullToken.FullDeclaration);1125break;1126case TGSI_TOKEN_TYPE_INSTRUCTION:1127insns[insnCount++] = parse.FullToken.FullInstruction;1128scanInstruction(&parse.FullToken.FullInstruction);1129break;1130case TGSI_TOKEN_TYPE_PROPERTY:1131scanProperty(&parse.FullToken.FullProperty);1132break;1133default:1134INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);1135break;1136}1137}1138tgsi_parse_free(&parse);11391140if (indirectTempArrays.size()) {1141int tempBase = 0;1142for (std::set<int>::const_iterator it = indirectTempArrays.begin();1143it != indirectTempArrays.end(); ++it) {1144std::pair<int, int>& info = tempArrayInfo[*it];1145indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));1146tempBase += info.second;1147}1148info_out->bin.tlsSpace += tempBase * 16;1149}11501151if (info_out->io.genUserClip > 0) {1152info_out->io.clipDistances = info_out->io.genUserClip;11531154const unsigned int nOut = (info_out->io.genUserClip + 3) / 4;11551156for (unsigned int n = 0; n < nOut; ++n) {1157unsigned int i = info_out->numOutputs++;1158info_out->out[i].id = i;1159info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST;1160info_out->out[i].si = n;1161info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4);1162}1163}11641165return info->assignSlots(info_out) == 0;1166}11671168void Source::scanProperty(const struct tgsi_full_property *prop)1169{1170switch (prop->Property.PropertyName) {1171case TGSI_PROPERTY_GS_OUTPUT_PRIM:1172info_out->prop.gp.outputPrim = prop->u[0].Data;1173break;1174case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:1175info_out->prop.gp.maxVertices = prop->u[0].Data;1176break;1177case TGSI_PROPERTY_GS_INVOCATIONS:1178info_out->prop.gp.instanceCount = prop->u[0].Data;1179break;1180case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:1181info_out->prop.fp.separateFragData = true;1182break;1183case TGSI_PROPERTY_FS_COORD_ORIGIN:1184case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:1185case TGSI_PROPERTY_FS_DEPTH_LAYOUT:1186case TGSI_PROPERTY_GS_INPUT_PRIM:1187case TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED:1188// we don't care1189break;1190case TGSI_PROPERTY_VS_PROHIBIT_UCPS:1191info_out->io.genUserClip = -1;1192break;1193case TGSI_PROPERTY_TCS_VERTICES_OUT:1194info_out->prop.tp.outputPatchSize = prop->u[0].Data;1195break;1196case TGSI_PROPERTY_TES_PRIM_MODE:1197info_out->prop.tp.domain = prop->u[0].Data;1198break;1199case TGSI_PROPERTY_TES_SPACING:1200info_out->prop.tp.partitioning = prop->u[0].Data;1201break;1202case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:1203info_out->prop.tp.winding = prop->u[0].Data;1204break;1205case TGSI_PROPERTY_TES_POINT_MODE:1206if (prop->u[0].Data)1207info_out->prop.tp.outputPrim = PIPE_PRIM_POINTS;1208else1209info_out->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */1210break;1211case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:1212info->prop.cp.numThreads[0] = prop->u[0].Data;1213break;1214case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:1215info->prop.cp.numThreads[1] = prop->u[0].Data;1216break;1217case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:1218info->prop.cp.numThreads[2] = prop->u[0].Data;1219break;1220case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:1221info_out->io.clipDistances = prop->u[0].Data;1222break;1223case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:1224info_out->io.cullDistances = prop->u[0].Data;1225break;1226case TGSI_PROPERTY_NEXT_SHADER:1227/* Do not need to know the next shader stage. */1228break;1229case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:1230info_out->prop.fp.earlyFragTests = prop->u[0].Data;1231break;1232case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:1233info_out->prop.fp.postDepthCoverage = prop->u[0].Data;1234break;1235case TGSI_PROPERTY_MUL_ZERO_WINS:1236info->io.mul_zero_wins = prop->u[0].Data;1237break;1238case TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE:1239info_out->io.layer_viewport_relative = prop->u[0].Data;1240break;1241default:1242INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);1243break;1244}1245}12461247void Source::scanImmediate(const struct tgsi_full_immediate *imm)1248{1249const unsigned n = immd.count++;12501251assert(n < scan.immediate_count);12521253for (int c = 0; c < 4; ++c)1254immd.data[n * 4 + c] = imm->u[c].Uint;1255}12561257int Source::inferSysValDirection(unsigned sn) const1258{1259switch (sn) {1260case TGSI_SEMANTIC_INSTANCEID:1261case TGSI_SEMANTIC_VERTEXID:1262return 1;1263case TGSI_SEMANTIC_LAYER:1264#if 01265case TGSI_SEMANTIC_VIEWPORTINDEX:1266return 0;1267#endif1268case TGSI_SEMANTIC_PRIMID:1269return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;1270default:1271return 0;1272}1273}12741275bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)1276{1277unsigned i, c;1278unsigned sn = TGSI_SEMANTIC_GENERIC;1279unsigned si = 0;1280const unsigned first = decl->Range.First, last = decl->Range.Last;1281const int arrayId = decl->Array.ArrayID;12821283if (decl->Declaration.Semantic) {1284sn = decl->Semantic.Name;1285si = decl->Semantic.Index;1286}12871288if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {1289for (i = first; i <= last; ++i) {1290for (c = 0; c < 4; ++c) {1291locals.insert(1292Location(decl->Declaration.File, decl->Dim.Index2D, i, c));1293}1294}1295}12961297switch (decl->Declaration.File) {1298case TGSI_FILE_INPUT:1299if (info->type == PIPE_SHADER_VERTEX) {1300// all vertex attributes are equal1301for (i = first; i <= last; ++i) {1302info_out->in[i].sn = TGSI_SEMANTIC_GENERIC;1303info_out->in[i].si = i;1304}1305} else {1306for (i = first; i <= last; ++i, ++si) {1307info_out->in[i].id = i;1308info_out->in[i].sn = sn;1309info_out->in[i].si = si;1310if (info->type == PIPE_SHADER_FRAGMENT) {1311// translate interpolation mode1312switch (decl->Interp.Interpolate) {1313case TGSI_INTERPOLATE_CONSTANT:1314info_out->in[i].flat = 1;1315break;1316case TGSI_INTERPOLATE_COLOR:1317info_out->in[i].sc = 1;1318break;1319case TGSI_INTERPOLATE_LINEAR:1320info_out->in[i].linear = 1;1321break;1322default:1323break;1324}1325if (decl->Interp.Location)1326info_out->in[i].centroid = 1;1327}13281329if (sn == TGSI_SEMANTIC_PATCH)1330info_out->in[i].patch = 1;1331if (sn == TGSI_SEMANTIC_PATCH)1332info_out->numPatchConstants = MAX2(info_out->numPatchConstants, si + 1);1333}1334}1335break;1336case TGSI_FILE_OUTPUT:1337for (i = first; i <= last; ++i, ++si) {1338switch (sn) {1339case TGSI_SEMANTIC_POSITION:1340if (info->type == PIPE_SHADER_FRAGMENT)1341info_out->io.fragDepth = i;1342else1343if (clipVertexOutput < 0)1344clipVertexOutput = i;1345break;1346case TGSI_SEMANTIC_COLOR:1347if (info->type == PIPE_SHADER_FRAGMENT)1348info_out->prop.fp.numColourResults++;1349break;1350case TGSI_SEMANTIC_EDGEFLAG:1351info_out->io.edgeFlagOut = i;1352break;1353case TGSI_SEMANTIC_CLIPVERTEX:1354clipVertexOutput = i;1355break;1356case TGSI_SEMANTIC_CLIPDIST:1357info_out->io.genUserClip = -1;1358break;1359case TGSI_SEMANTIC_SAMPLEMASK:1360info_out->io.sampleMask = i;1361break;1362case TGSI_SEMANTIC_VIEWPORT_INDEX:1363info->io.viewportId = i;1364break;1365case TGSI_SEMANTIC_PATCH:1366info_out->numPatchConstants = MAX2(info_out->numPatchConstants, si + 1);1367FALLTHROUGH;1368case TGSI_SEMANTIC_TESSOUTER:1369case TGSI_SEMANTIC_TESSINNER:1370info_out->out[i].patch = 1;1371break;1372default:1373break;1374}1375info_out->out[i].id = i;1376info_out->out[i].sn = sn;1377info_out->out[i].si = si;1378}1379break;1380case TGSI_FILE_SYSTEM_VALUE:1381switch (sn) {1382case TGSI_SEMANTIC_INSTANCEID:1383info_out->io.instanceId = first;1384break;1385case TGSI_SEMANTIC_VERTEXID:1386info_out->io.vertexId = first;1387break;1388case TGSI_SEMANTIC_BASEVERTEX:1389case TGSI_SEMANTIC_BASEINSTANCE:1390case TGSI_SEMANTIC_DRAWID:1391info_out->prop.vp.usesDrawParameters = true;1392break;1393case TGSI_SEMANTIC_SAMPLEID:1394case TGSI_SEMANTIC_SAMPLEPOS:1395prog->persampleInvocation = true;1396break;1397case TGSI_SEMANTIC_SAMPLEMASK:1398info_out->prop.fp.usesSampleMaskIn = true;1399break;1400default:1401break;1402}1403for (i = first; i <= last; ++i, ++si) {1404info_out->sv[i].sn = sn;1405info_out->sv[i].si = si;1406info_out->sv[i].input = inferSysValDirection(sn);14071408switch (sn) {1409case TGSI_SEMANTIC_TESSOUTER:1410case TGSI_SEMANTIC_TESSINNER:1411info_out->sv[i].patch = 1;1412break;1413}1414}1415break;1416/*1417case TGSI_FILE_RESOURCE:1418for (i = first; i <= last; ++i) {1419resources[i].target = decl->Resource.Resource;1420resources[i].raw = decl->Resource.Raw;1421resources[i].slot = i;1422}1423break;1424*/1425case TGSI_FILE_SAMPLER_VIEW:1426for (i = first; i <= last; ++i)1427textureViews[i].target = decl->SamplerView.Resource;1428break;1429case TGSI_FILE_MEMORY:1430for (i = first; i <= last; ++i)1431memoryFiles[i].mem_type = decl->Declaration.MemType;1432break;1433case TGSI_FILE_NULL:1434case TGSI_FILE_TEMPORARY:1435for (i = first; i <= last; ++i)1436tempArrayId[i] = arrayId;1437if (arrayId)1438tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(1439first, last - first + 1)));1440break;1441case TGSI_FILE_BUFFER:1442for (i = first; i <= last; ++i)1443bufferAtomics[i] = decl->Declaration.Atomic;1444if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {1445for (i = first; i <= last; i++) {1446bufferIds.insert(std::make_pair(i, gmemSlot));1447info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 0, .slot = i};1448assert(gmemSlot < 16);1449}1450}1451break;1452case TGSI_FILE_IMAGE:1453if (info->type == PIPE_SHADER_COMPUTE && info->target < NVISA_GF100_CHIPSET) {1454for (i = first; i <= last; i++) {1455imageIds.insert(std::make_pair(i, gmemSlot));1456info_out->prop.cp.gmem[gmemSlot++] = {.valid = 1, .image = 1, .slot = i};1457assert(gmemSlot < 16);1458}1459}1460break;1461case TGSI_FILE_ADDRESS:1462case TGSI_FILE_CONSTANT:1463case TGSI_FILE_IMMEDIATE:1464case TGSI_FILE_SAMPLER:1465break;1466default:1467ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);1468return false;1469}1470return true;1471}14721473inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const1474{1475return insn.getOpcode() == TGSI_OPCODE_MOV &&1476insn.getDst(0).getIndex(0) == info_out->io.edgeFlagOut &&1477insn.getSrc(0).getFile() == TGSI_FILE_INPUT;1478}14791480void Source::scanInstructionSrc(const Instruction& insn,1481const Instruction::SrcRegister& src,1482unsigned mask)1483{1484if (src.getFile() == TGSI_FILE_TEMPORARY) {1485if (src.isIndirect(0))1486indirectTempArrays.insert(src.getArrayId());1487} else1488if (src.getFile() == TGSI_FILE_OUTPUT) {1489if (src.isIndirect(0)) {1490// We don't know which one is accessed, just mark everything for1491// reading. This is an extremely unlikely occurrence.1492for (unsigned i = 0; i < info_out->numOutputs; ++i)1493info_out->out[i].oread = 1;1494} else {1495info_out->out[src.getIndex(0)].oread = 1;1496}1497}1498if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {1499if (info_out->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)1500info_out->prop.fp.readsSampleLocations = true;1501}1502if (src.getFile() != TGSI_FILE_INPUT)1503return;15041505if (src.isIndirect(0)) {1506for (unsigned i = 0; i < info_out->numInputs; ++i)1507info_out->in[i].mask = 0xf;1508} else {1509const int i = src.getIndex(0);1510for (unsigned c = 0; c < 4; ++c) {1511if (!(mask & (1 << c)))1512continue;1513int k = src.getSwizzle(c);1514if (k <= TGSI_SWIZZLE_W)1515info_out->in[i].mask |= 1 << k;1516}1517switch (info_out->in[i].sn) {1518case TGSI_SEMANTIC_PSIZE:1519case TGSI_SEMANTIC_PRIMID:1520case TGSI_SEMANTIC_FOG:1521info_out->in[i].mask &= 0x1;1522break;1523case TGSI_SEMANTIC_PCOORD:1524info_out->in[i].mask &= 0x3;1525break;1526default:1527break;1528}1529}1530}15311532bool Source::scanInstruction(const struct tgsi_full_instruction *inst)1533{1534Instruction insn(inst);15351536if (insn.getOpcode() == TGSI_OPCODE_BARRIER)1537info_out->numBarriers = 1;15381539if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)1540info_out->prop.fp.readsFramebuffer = true;15411542if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)1543info_out->prop.fp.readsSampleLocations = true;15441545if (insn.getOpcode() == TGSI_OPCODE_DEMOTE)1546info_out->prop.fp.usesDiscard = true;15471548if (insn.dstCount()) {1549Instruction::DstRegister dst = insn.getDst(0);15501551if (insn.getOpcode() == TGSI_OPCODE_STORE &&1552dst.getFile() != TGSI_FILE_MEMORY) {1553info_out->io.globalAccess |= 0x2;15541555if (dst.getFile() == TGSI_FILE_INPUT) {1556// TODO: Handle indirect somehow?1557const int i = dst.getIndex(0);1558info_out->in[i].mask |= 1;1559}1560}15611562if (dst.getFile() == TGSI_FILE_OUTPUT) {1563if (dst.isIndirect(0))1564for (unsigned i = 0; i < info_out->numOutputs; ++i)1565info_out->out[i].mask = 0xf;1566else1567info_out->out[dst.getIndex(0)].mask |= dst.getMask();15681569if (info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||1570info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||1571info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||1572info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||1573info_out->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)1574info_out->out[dst.getIndex(0)].mask &= 1;15751576if (isEdgeFlagPassthrough(insn))1577info_out->io.edgeFlagIn = insn.getSrc(0).getIndex(0);1578} else1579if (dst.getFile() == TGSI_FILE_TEMPORARY) {1580if (dst.isIndirect(0))1581indirectTempArrays.insert(dst.getArrayId());1582} else1583if (dst.getFile() == TGSI_FILE_BUFFER ||1584dst.getFile() == TGSI_FILE_IMAGE ||1585(dst.getFile() == TGSI_FILE_MEMORY &&1586memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {1587info_out->io.globalAccess |= 0x2;1588}1589}15901591if (insn.srcCount() && (1592insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||1593memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==1594TGSI_MEMORY_TYPE_GLOBAL)) {1595switch (insn.getOpcode()) {1596case TGSI_OPCODE_ATOMUADD:1597case TGSI_OPCODE_ATOMXCHG:1598case TGSI_OPCODE_ATOMCAS:1599case TGSI_OPCODE_ATOMAND:1600case TGSI_OPCODE_ATOMOR:1601case TGSI_OPCODE_ATOMXOR:1602case TGSI_OPCODE_ATOMUMIN:1603case TGSI_OPCODE_ATOMIMIN:1604case TGSI_OPCODE_ATOMUMAX:1605case TGSI_OPCODE_ATOMIMAX:1606case TGSI_OPCODE_ATOMFADD:1607case TGSI_OPCODE_ATOMDEC_WRAP:1608case TGSI_OPCODE_ATOMINC_WRAP:1609case TGSI_OPCODE_LOAD:1610info_out->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?16110x1 : 0x2;1612break;1613}1614}161516161617for (unsigned s = 0; s < insn.srcCount(); ++s)1618scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));16191620for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)1621scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());16221623return true;1624}16251626nv50_ir::TexInstruction::Target1627Instruction::getTexture(const tgsi::Source *code, int s) const1628{1629// XXX: indirect access1630unsigned int r;16311632switch (getSrc(s).getFile()) {1633/*1634case TGSI_FILE_RESOURCE:1635r = getSrc(s).getIndex(0);1636return translateTexture(code->resources.at(r).target);1637*/1638case TGSI_FILE_SAMPLER_VIEW:1639r = getSrc(s).getIndex(0);1640return translateTexture(code->textureViews.at(r).target);1641default:1642return translateTexture(insn->Texture.Texture);1643}1644}16451646} // namespace tgsi16471648namespace {16491650using namespace nv50_ir;16511652class Converter : public ConverterCommon1653{1654public:1655Converter(Program *, const tgsi::Source *, nv50_ir_prog_info_out *);1656~Converter();16571658bool run();16591660private:1661Value *shiftAddress(Value *);1662Value *getVertexBase(int s);1663Value *getOutputBase(int s);1664DataArray *getArrayForFile(unsigned file, int idx);1665Value *fetchSrc(int s, int c);1666Value *fetchDst(int d, int c);1667Value *acquireDst(int d, int c);1668void storeDst(int d, int c, Value *);16691670Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);1671void storeDst(const tgsi::Instruction::DstRegister dst, int c,1672Value *val, Value *ptr);16731674void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;1675Value *applySrcMod(Value *, int s, int c);16761677Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);1678Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);1679Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);16801681bool isSubGroupMask(uint8_t semantic);16821683bool handleInstruction(const struct tgsi_full_instruction *);1684void exportOutputs();1685inline bool isEndOfSubroutine(uint ip);16861687void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);16881689// R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)1690void setTexRS(TexInstruction *, unsigned int& s, int R, int S);1691void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);1692void handleTXF(Value *dst0[4], int R, int L_M);1693void handleTXQ(Value *dst0[4], enum TexQuery, int R);1694void handleFBFETCH(Value *dst0[4]);1695void handleLIT(Value *dst0[4]);16961697// Symbol *getResourceBase(int r);1698void getImageCoords(std::vector<Value *>&, int s);1699int remapImageId(int);1700int remapBufferId(int);17011702void handleLOAD(Value *dst0[4]);1703void handleSTORE();1704void handleATOM(Value *dst0[4], DataType, uint16_t subOp);17051706void handleINTERP(Value *dst0[4]);17071708Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);17091710void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);17111712Value *buildDot(int dim);17131714class BindArgumentsPass : public Pass {1715public:1716BindArgumentsPass(Converter &conv) : conv(conv), sub(NULL) { }17171718private:1719Converter &conv;1720Subroutine *sub;17211722inline const Location *getValueLocation(Subroutine *, Value *);17231724template<typename T> inline void1725updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),1726T (Function::*proto));17271728template<typename T> inline void1729updatePrototype(BitSet *set, void (Function::*updateSet)(),1730T (Function::*proto));17311732protected:1733bool visit(Function *);1734bool visit(BasicBlock *bb) { return false; }1735};17361737private:1738const tgsi::Source *code;17391740uint ip; // instruction pointer17411742tgsi::Instruction tgsi;17431744DataType dstTy;1745DataType srcTy;17461747DataArray tData; // TGSI_FILE_TEMPORARY1748DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays1749DataArray aData; // TGSI_FILE_ADDRESS1750DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)17511752Value *zero;17531754Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)1755uint8_t vtxBaseValid;17561757Stack condBBs; // fork BB, then else clause BB1758Stack joinBBs; // fork BB, for inserting join ops on ENDIF1759Stack loopBBs; // loop headers1760Stack breakBBs; // end of / after loop17611762Value *viewport;1763};17641765Symbol *1766Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)1767{1768const int swz = src.getSwizzle(c);17691770/* TODO: Use Array ID when it's available for the index */1771return makeSym(src.getFile(),1772src.is2D() ? src.getIndex(1) : 0,1773src.getIndex(0), swz,1774src.getIndex(0) * 16 + swz * 4);1775}17761777Symbol *1778Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)1779{1780/* TODO: Use Array ID when it's available for the index */1781return makeSym(dst.getFile(),1782dst.is2D() ? dst.getIndex(1) : 0,1783dst.getIndex(0), c,1784dst.getIndex(0) * 16 + c * 4);1785}17861787Symbol *1788Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)1789{1790Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));17911792sym->reg.fileIndex = fileIdx;17931794if (tgsiFile == TGSI_FILE_MEMORY) {1795switch (code->memoryFiles[fileIdx].mem_type) {1796case TGSI_MEMORY_TYPE_GLOBAL:1797/* No-op this is the default for TGSI_FILE_MEMORY */1798sym->setFile(FILE_MEMORY_GLOBAL);1799break;1800case TGSI_MEMORY_TYPE_SHARED:1801sym->setFile(FILE_MEMORY_SHARED);1802address += info->prop.cp.inputOffset;1803break;1804case TGSI_MEMORY_TYPE_INPUT:1805assert(prog->getType() == Program::TYPE_COMPUTE);1806assert(idx == -1);1807sym->setFile(FILE_SHADER_INPUT);1808address += info->prop.cp.inputOffset;1809break;1810default:1811assert(0); /* TODO: Add support for global and private memory */1812}1813}18141815if (idx >= 0) {1816if (sym->reg.file == FILE_SHADER_INPUT)1817sym->setOffset(info_out->in[idx].slot[c] * 4);1818else1819if (sym->reg.file == FILE_SHADER_OUTPUT)1820sym->setOffset(info_out->out[idx].slot[c] * 4);1821else1822if (sym->reg.file == FILE_SYSTEM_VALUE)1823sym->setSV(tgsi::translateSysVal(info_out->sv[idx].sn), c);1824else1825sym->setOffset(address);1826} else {1827sym->setOffset(address);1828}1829return sym;1830}18311832Value *1833Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)1834{1835operation op;18361837// XXX: no way to know interpolation mode if we don't know what's accessed1838const uint8_t mode = translateInterpMode(&info_out->in[ptr ? 0 :1839src.getIndex(0)], op);18401841Instruction *insn = new_Instruction(func, op, TYPE_F32);18421843insn->setDef(0, getScratch());1844insn->setSrc(0, srcToSym(src, c));1845if (op == OP_PINTERP)1846insn->setSrc(1, fragCoord[3]);1847if (ptr)1848insn->setIndirect(0, 0, ptr);18491850insn->setInterpolate(mode);18511852bb->insertTail(insn);1853return insn->getDef(0);1854}18551856Value *1857Converter::applySrcMod(Value *val, int s, int c)1858{1859Modifier m = tgsi.getSrc(s).getMod(c);1860DataType ty = tgsi.inferSrcType();18611862if (m & Modifier(NV50_IR_MOD_ABS))1863val = mkOp1v(OP_ABS, ty, getScratch(), val);18641865if (m & Modifier(NV50_IR_MOD_NEG))1866val = mkOp1v(OP_NEG, ty, getScratch(), val);18671868return val;1869}18701871Value *1872Converter::getVertexBase(int s)1873{1874assert(s < 5);1875if (!(vtxBaseValid & (1 << s))) {1876const int index = tgsi.getSrc(s).getIndex(1);1877Value *rel = NULL;1878if (tgsi.getSrc(s).isIndirect(1))1879rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);1880vtxBaseValid |= 1 << s;1881vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),1882mkImm(index), rel);1883}1884return vtxBase[s];1885}18861887Value *1888Converter::getOutputBase(int s)1889{1890assert(s < 5);1891if (!(vtxBaseValid & (1 << s))) {1892Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));1893if (tgsi.getSrc(s).isIndirect(1))1894offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),1895fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),1896offset);1897vtxBaseValid |= 1 << s;1898vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);1899}1900return vtxBase[s];1901}19021903Value *1904Converter::fetchSrc(int s, int c)1905{1906Value *res;1907Value *ptr = NULL, *dimRel = NULL;19081909tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);19101911if (src.isIndirect(0))1912ptr = fetchSrc(src.getIndirect(0), 0, NULL);19131914if (src.is2D()) {1915switch (src.getFile()) {1916case TGSI_FILE_OUTPUT:1917dimRel = getOutputBase(s);1918break;1919case TGSI_FILE_INPUT:1920dimRel = getVertexBase(s);1921break;1922case TGSI_FILE_CONSTANT:1923// on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]1924if (src.isIndirect(1))1925dimRel = fetchSrc(src.getIndirect(1), 0, 0);1926break;1927default:1928break;1929}1930}19311932res = fetchSrc(src, c, ptr);19331934if (dimRel)1935res->getInsn()->setIndirect(0, 1, dimRel);19361937return applySrcMod(res, s, c);1938}19391940Value *1941Converter::fetchDst(int d, int c)1942{1943Value *res;1944Value *ptr = NULL, *dimRel = NULL;19451946tgsi::Instruction::DstRegister dst = tgsi.getDst(d);19471948if (dst.isIndirect(0))1949ptr = fetchSrc(dst.getIndirect(0), 0, NULL);19501951if (dst.is2D()) {1952switch (dst.getFile()) {1953case TGSI_FILE_OUTPUT:1954assert(0); // TODO1955dimRel = NULL;1956break;1957case TGSI_FILE_INPUT:1958assert(0); // TODO1959dimRel = NULL;1960break;1961case TGSI_FILE_CONSTANT:1962// on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]1963if (dst.isIndirect(1))1964dimRel = fetchSrc(dst.getIndirect(1), 0, 0);1965break;1966default:1967break;1968}1969}19701971struct tgsi_full_src_register fsr = dst.asSrc();1972tgsi::Instruction::SrcRegister src(&fsr);1973res = fetchSrc(src, c, ptr);19741975if (dimRel)1976res->getInsn()->setIndirect(0, 1, dimRel);19771978return res;1979}19801981Converter::DataArray *1982Converter::getArrayForFile(unsigned file, int idx)1983{1984switch (file) {1985case TGSI_FILE_TEMPORARY:1986return idx == 0 ? &tData : &lData;1987case TGSI_FILE_ADDRESS:1988return &aData;1989case TGSI_FILE_OUTPUT:1990assert(prog->getType() == Program::TYPE_FRAGMENT);1991return &oData;1992default:1993assert(!"invalid/unhandled TGSI source file");1994return NULL;1995}1996}19971998Value *1999Converter::shiftAddress(Value *index)2000{2001if (!index)2002return NULL;2003return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));2004}20052006void2007Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const2008{2009std::map<int, int>::const_iterator it =2010code->indirectTempOffsets.find(arrayId);2011if (it == code->indirectTempOffsets.end())2012return;20132014idx2d = 1;2015idx += it->second;2016}20172018bool2019Converter::isSubGroupMask(uint8_t semantic)2020{2021switch (semantic) {2022case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:2023case TGSI_SEMANTIC_SUBGROUP_LT_MASK:2024case TGSI_SEMANTIC_SUBGROUP_LE_MASK:2025case TGSI_SEMANTIC_SUBGROUP_GT_MASK:2026case TGSI_SEMANTIC_SUBGROUP_GE_MASK:2027return true;2028default:2029return false;2030}2031}20322033Value *2034Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)2035{2036int idx2d = src.is2D() ? src.getIndex(1) : 0;2037int idx = src.getIndex(0);2038const int swz = src.getSwizzle(c);2039Instruction *ld;20402041switch (src.getFile()) {2042case TGSI_FILE_IMMEDIATE:2043assert(!ptr);2044return loadImm(NULL, code->immd.data[idx * 4 + swz]);2045case TGSI_FILE_CONSTANT:2046return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));2047case TGSI_FILE_INPUT:2048if (prog->getType() == Program::TYPE_FRAGMENT) {2049// don't load masked inputs, won't be assigned a slot2050if (!ptr && !(info_out->in[idx].mask & (1 << swz)))2051return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);2052return interpolate(src, c, shiftAddress(ptr));2053} else2054if (prog->getType() == Program::TYPE_GEOMETRY) {2055if (!ptr && info_out->in[idx].sn == TGSI_SEMANTIC_PRIMID)2056return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));2057// XXX: This is going to be a problem with scalar arrays, i.e. when2058// we cannot assume that the address is given in units of vec4.2059//2060// nv50 and nvc0 need different things here, so let the lowering2061// passes decide what to do with the address2062if (ptr)2063return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);2064}2065ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));2066ld->perPatch = info_out->in[idx].patch;2067return ld->getDef(0);2068case TGSI_FILE_OUTPUT:2069assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);2070ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));2071ld->perPatch = info_out->out[idx].patch;2072return ld->getDef(0);2073case TGSI_FILE_SYSTEM_VALUE:2074assert(!ptr);2075if (info_out->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&2076info->prop.cp.numThreads[swz] == 1)2077return loadImm(NULL, 0u);2078if (isSubGroupMask(info_out->sv[idx].sn) && swz > 0)2079return loadImm(NULL, 0u);2080if (info_out->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)2081return loadImm(NULL, 32u);2082ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));2083ld->perPatch = info_out->sv[idx].patch;2084return ld->getDef(0);2085case TGSI_FILE_TEMPORARY: {2086int arrayid = src.getArrayId();2087if (!arrayid)2088arrayid = code->tempArrayId[idx];2089adjustTempIndex(arrayid, idx, idx2d);2090}2091FALLTHROUGH;2092default:2093return getArrayForFile(src.getFile(), idx2d)->load(2094sub.cur->values, idx, swz, shiftAddress(ptr));2095}2096}20972098Value *2099Converter::acquireDst(int d, int c)2100{2101const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);2102const unsigned f = dst.getFile();2103int idx = dst.getIndex(0);2104int idx2d = dst.is2D() ? dst.getIndex(1) : 0;21052106if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||2107f == TGSI_FILE_IMAGE)2108return NULL;21092110if (dst.isIndirect(0) ||2111f == TGSI_FILE_SYSTEM_VALUE ||2112(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))2113return getScratch();21142115if (f == TGSI_FILE_TEMPORARY) {2116int arrayid = dst.getArrayId();2117if (!arrayid)2118arrayid = code->tempArrayId[idx];2119adjustTempIndex(arrayid, idx, idx2d);2120}21212122return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);2123}21242125void2126Converter::storeDst(int d, int c, Value *val)2127{2128const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);21292130if (tgsi.getSaturate()) {2131mkOp1(OP_SAT, dstTy, val, val);2132}21332134Value *ptr = NULL;2135if (dst.isIndirect(0))2136ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));21372138if (info_out->io.genUserClip > 0 &&2139dst.getFile() == TGSI_FILE_OUTPUT &&2140!dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {2141mkMov(clipVtx[c], val);2142val = clipVtx[c];2143}21442145storeDst(dst, c, val, ptr);2146}21472148void2149Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,2150Value *val, Value *ptr)2151{2152const unsigned f = dst.getFile();2153int idx = dst.getIndex(0);2154int idx2d = dst.is2D() ? dst.getIndex(1) : 0;21552156if (f == TGSI_FILE_SYSTEM_VALUE) {2157assert(!ptr);2158mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);2159} else2160if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {21612162if (ptr || (info_out->out[idx].mask & (1 << c))) {2163/* Save the viewport index into a scratch register so that it can be2164exported at EMIT time */2165if (info_out->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&2166prog->getType() == Program::TYPE_GEOMETRY &&2167viewport != NULL)2168mkOp1(OP_MOV, TYPE_U32, viewport, val);2169else2170mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =2171info_out->out[idx].patch;2172}2173} else2174if (f == TGSI_FILE_TEMPORARY ||2175f == TGSI_FILE_ADDRESS ||2176f == TGSI_FILE_OUTPUT) {2177if (f == TGSI_FILE_TEMPORARY) {2178int arrayid = dst.getArrayId();2179if (!arrayid)2180arrayid = code->tempArrayId[idx];2181adjustTempIndex(arrayid, idx, idx2d);2182}21832184getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);2185} else {2186assert(!"invalid dst file");2187}2188}21892190#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \2191for (chan = 0; chan < 4; ++chan) \2192if (!inst.getDst(d).isMasked(chan))21932194Value *2195Converter::buildDot(int dim)2196{2197assert(dim > 0);21982199Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);2200Value *dotp = getScratch();22012202mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)2203->dnz = info->io.mul_zero_wins;22042205for (int c = 1; c < dim; ++c) {2206src0 = fetchSrc(0, c);2207src1 = fetchSrc(1, c);2208mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)2209->dnz = info->io.mul_zero_wins;2210}2211return dotp;2212}22132214void2215Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)2216{2217FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);2218join->fixed = 1;2219conv->insertHead(join);22202221assert(!fork->joinAt);2222fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);2223fork->insertBefore(fork->getExit(), fork->joinAt);2224}22252226void2227Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)2228{2229unsigned rIdx = 0, sIdx = 0;22302231if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {2232// This is the bindless case. We have to get the actual value and pass2233// it in. This will be the complete handle.2234tex->tex.rIndirectSrc = s;2235tex->setSrc(s++, fetchSrc(R, 0));2236tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);2237tex->tex.bindless = true;2238return;2239}22402241if (R >= 0)2242rIdx = tgsi.getSrc(R).getIndex(0);2243if (S >= 0)2244sIdx = tgsi.getSrc(S).getIndex(0);22452246tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);22472248if (tgsi.getSrc(R).isIndirect(0)) {2249tex->tex.rIndirectSrc = s;2250tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));2251}2252if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {2253tex->tex.sIndirectSrc = s;2254tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));2255}2256}22572258void2259Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)2260{2261TexInstruction *tex = new_TexInstruction(func, OP_TXQ);2262tex->tex.query = query;2263unsigned int c, d;22642265for (d = 0, c = 0; c < 4; ++c) {2266if (!dst0[c])2267continue;2268tex->tex.mask |= 1 << c;2269tex->setDef(d++, dst0[c]);2270}2271if (query == TXQ_DIMS)2272tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level2273else2274tex->setSrc((c = 0), zero);22752276setTexRS(tex, ++c, R, -1);22772278bb->insertTail(tex);2279}22802281void2282Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)2283{2284Value *proj = fetchSrc(0, 3);2285Instruction *insn = proj->getUniqueInsn();2286int c;22872288if (insn->op == OP_PINTERP) {2289bb->insertTail(insn = cloneForward(func, insn));2290insn->op = OP_LINTERP;2291insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());2292insn->setSrc(1, NULL);2293proj = insn->getDef(0);2294}2295proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);22962297for (c = 0; c < 4; ++c) {2298if (!(mask & (1 << c)))2299continue;2300if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)2301continue;2302mask &= ~(1 << c);23032304bb->insertTail(insn = cloneForward(func, insn));2305insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());2306insn->setSrc(1, proj);2307dst[c] = insn->getDef(0);2308}2309if (!mask)2310return;23112312proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));23132314for (c = 0; c < 4; ++c)2315if (mask & (1 << c))2316dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);2317}23182319// order of nv50 ir sources: x y z layer lod/bias shadow2320// order of TGSI TEX sources: x y z layer shadow lod/bias2321// lowering will finally set the hw specific order (like array first on nvc0)2322void2323Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)2324{2325Value *arg[4], *src[8];2326Value *lod = NULL, *shd = NULL;2327unsigned int s, c, d;2328TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());23292330TexInstruction::Target tgt = tgsi.getTexture(code, R);23312332for (s = 0; s < tgt.getArgCount(); ++s)2333arg[s] = src[s] = fetchSrc(0, s);23342335if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)2336lod = loadImm(NULL, 0);2337else if (texi->op == OP_TXL || texi->op == OP_TXB)2338lod = fetchSrc(L >> 4, L & 3);23392340if (C == 0x0f)2341C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src23422343if (tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) {2344switch (tgsi.getOpcode()) {2345case TGSI_OPCODE_TG4: shd = fetchSrc(1, 0); break;2346case TGSI_OPCODE_TEX2: shd = fetchSrc(1, 0); break;2347case TGSI_OPCODE_TXB2: shd = fetchSrc(1, 1); break;2348case TGSI_OPCODE_TXL2: shd = fetchSrc(1, 1); break;2349default: assert(!"unexpected opcode with cube array shadow"); break;2350}2351}2352else if (tgt.isShadow())2353shd = fetchSrc(C >> 4, C & 3);23542355if (texi->op == OP_TXD) {2356for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {2357texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));2358texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));2359}2360}23612362// cube textures don't care about projection value, it's divided out2363if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {2364unsigned int n = tgt.getDim();2365if (shd) {2366arg[n] = shd;2367++n;2368assert(tgt.getDim() == tgt.getArgCount());2369}2370loadProjTexCoords(src, arg, (1 << n) - 1);2371if (shd)2372shd = src[n - 1];2373}23742375for (c = 0, d = 0; c < 4; ++c) {2376if (dst[c]) {2377texi->setDef(d++, dst[c]);2378texi->tex.mask |= 1 << c;2379} else {2380// NOTE: maybe hook up def too, for CSE2381}2382}2383for (s = 0; s < tgt.getArgCount(); ++s)2384texi->setSrc(s, src[s]);2385if (lod)2386texi->setSrc(s++, lod);2387if (shd)2388texi->setSrc(s++, shd);23892390setTexRS(texi, s, R, S);23912392if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)2393texi->tex.levelZero = true;2394if (prog->getType() != Program::TYPE_FRAGMENT &&2395(tgsi.getOpcode() == TGSI_OPCODE_TEX ||2396tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||2397tgsi.getOpcode() == TGSI_OPCODE_TXP))2398texi->tex.levelZero = true;2399if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())2400texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, code->immd.data);24012402texi->tex.useOffsets = tgsi.getNumTexOffsets();2403for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {2404for (c = 0; c < 3; ++c) {2405texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));2406texi->offset[s][c].setInsn(texi);2407}2408}24092410bb->insertTail(texi);2411}24122413// 1st source: xyz = coordinates, w = lod/sample2414// 2nd source: offset2415void2416Converter::handleTXF(Value *dst[4], int R, int L_M)2417{2418TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());2419int ms;2420unsigned int c, d, s;24212422texi->tex.target = tgsi.getTexture(code, R);24232424ms = texi->tex.target.isMS() ? 1 : 0;2425texi->tex.levelZero = ms; /* MS textures don't have mip-maps */24262427for (c = 0, d = 0; c < 4; ++c) {2428if (dst[c]) {2429texi->setDef(d++, dst[c]);2430texi->tex.mask |= 1 << c;2431}2432}2433for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)2434texi->setSrc(c, fetchSrc(0, c));2435if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)2436texi->setSrc(c++, loadImm(NULL, 0));2437else2438texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms24392440setTexRS(texi, c, R, -1);24412442texi->tex.useOffsets = tgsi.getNumTexOffsets();2443for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {2444for (c = 0; c < 3; ++c) {2445texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));2446texi->offset[s][c].setInsn(texi);2447}2448}24492450bb->insertTail(texi);2451}24522453void2454Converter::handleFBFETCH(Value *dst[4])2455{2456TexInstruction *texi = new_TexInstruction(func, OP_TXF);2457unsigned int c, d;24582459texi->tex.target = TEX_TARGET_2D_MS_ARRAY;2460texi->tex.levelZero = 1;2461texi->tex.useOffsets = 0;24622463for (c = 0, d = 0; c < 4; ++c) {2464if (dst[c]) {2465texi->setDef(d++, dst[c]);2466texi->tex.mask |= 1 << c;2467}2468}24692470Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));2471Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));2472Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));2473Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));24742475mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;2476mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;2477texi->setSrc(0, x);2478texi->setSrc(1, y);2479texi->setSrc(2, z);2480texi->setSrc(3, ms);24812482texi->tex.r = texi->tex.s = -1;24832484bb->insertTail(texi);2485}24862487void2488Converter::handleLIT(Value *dst0[4])2489{2490Value *val0 = NULL;2491unsigned int mask = tgsi.getDst(0).getMask();24922493if (mask & (1 << 0))2494loadImm(dst0[0], 1.0f);24952496if (mask & (1 << 3))2497loadImm(dst0[3], 1.0f);24982499if (mask & (3 << 1)) {2500val0 = getScratch();2501mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);2502if (mask & (1 << 1))2503mkMov(dst0[1], val0);2504}25052506if (mask & (1 << 2)) {2507Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);2508Value *val1 = getScratch(), *val3 = getScratch();25092510Value *pos128 = loadImm(NULL, +127.999999f);2511Value *neg128 = loadImm(NULL, -127.999999f);25122513mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);2514mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);2515mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);2516mkOp2(OP_POW, TYPE_F32, val3, val1, val3);25172518mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);2519}2520}25212522/* Keep this around for now as reference when adding img support2523static inline bool2524isResourceSpecial(const int r)2525{2526return (r == TGSI_RESOURCE_GLOBAL ||2527r == TGSI_RESOURCE_LOCAL ||2528r == TGSI_RESOURCE_PRIVATE ||2529r == TGSI_RESOURCE_INPUT);2530}25312532static inline bool2533isResourceRaw(const tgsi::Source *code, const int r)2534{2535return isResourceSpecial(r) || code->resources[r].raw;2536}25372538static inline nv50_ir::TexTarget2539getResourceTarget(const tgsi::Source *code, int r)2540{2541if (isResourceSpecial(r))2542return nv50_ir::TEX_TARGET_BUFFER;2543return tgsi::translateTexture(code->resources.at(r).target);2544}25452546Symbol *2547Converter::getResourceBase(const int r)2548{2549Symbol *sym = NULL;25502551switch (r) {2552case TGSI_RESOURCE_GLOBAL:2553sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,2554info->io.auxCBSlot);2555break;2556case TGSI_RESOURCE_LOCAL:2557assert(prog->getType() == Program::TYPE_COMPUTE);2558sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,2559info->prop.cp.sharedOffset);2560break;2561case TGSI_RESOURCE_PRIVATE:2562sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,2563info->bin.tlsSpace);2564break;2565case TGSI_RESOURCE_INPUT:2566assert(prog->getType() == Program::TYPE_COMPUTE);2567sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,2568info->prop.cp.inputOffset);2569break;2570default:2571sym = new_Symbol(prog,2572nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);2573break;2574}2575return sym;2576}25772578void2579Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)2580{2581const int arg =2582TexInstruction::Target(getResourceTarget(code, r)).getArgCount();25832584for (int c = 0; c < arg; ++c)2585coords.push_back(fetchSrc(s, c));25862587// NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk2588if (r == TGSI_RESOURCE_LOCAL ||2589r == TGSI_RESOURCE_PRIVATE ||2590r == TGSI_RESOURCE_INPUT)2591coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),2592coords[0]);2593}25942595static inline int2596partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)2597{2598int n = 0;25992600while (mask) {2601if (mask & 1) {2602size[n]++;2603} else {2604if (size[n])2605comp[n = 1] = size[0] + 1;2606else2607comp[n]++;2608}2609mask >>= 1;2610}2611if (size[0] == 3) {2612n = 1;2613size[0] = (comp[0] == 1) ? 1 : 2;2614size[1] = 3 - size[0];2615comp[1] = comp[0] + size[0];2616}2617return n + 1;2618}2619*/2620void2621Converter::getImageCoords(std::vector<Value *> &coords, int s)2622{2623TexInstruction::Target t =2624TexInstruction::Target(tgsi.getImageTarget());2625const int arg = t.getDim() + (t.isArray() || t.isCube());26262627for (int c = 0; c < arg; ++c)2628coords.push_back(fetchSrc(s, c));26292630if (t.isMS())2631coords.push_back(fetchSrc(s, 3));2632}26332634int2635Converter::remapBufferId(int id)2636{2637std::map<int, int>::const_iterator it = code->bufferIds.find(id);2638if (it != code->bufferIds.end())2639return it->second;2640return id;2641}26422643int2644Converter::remapImageId(int id)2645{2646std::map<int, int>::const_iterator it = code->imageIds.find(id);2647if (it != code->imageIds.end())2648return it->second;2649return id;2650}26512652// For raw loads, granularity is 4 byte.2653// Usage of the texture read mask on OP_SULDP is not allowed.2654void2655Converter::handleLOAD(Value *dst0[4])2656{2657int r = tgsi.getSrc(0).getIndex(0);2658int c;2659std::vector<Value *> off, src, ldv, def;2660Value *ind = NULL;26612662if (tgsi.getSrc(0).isIndirect(0))2663ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);26642665switch (tgsi.getSrc(0).getFile()) {2666case TGSI_FILE_BUFFER:2667r = remapBufferId(r);2668/* fallthrough */2669case TGSI_FILE_MEMORY:2670for (c = 0; c < 4; ++c) {2671if (!dst0[c])2672continue;26732674Value *off;2675Symbol *sym;2676uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;26772678if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {2679off = NULL;2680sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,2681tgsi.getSrc(1).getValueU32(0, code->immd.data) +2682src0_component_offset);2683} else {2684// yzw are ignored for buffers2685off = fetchSrc(1, 0);2686sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,2687src0_component_offset);2688}26892690Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);2691if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER &&2692code->bufferAtomics[tgsi.getSrc(0).getIndex(0)])2693ld->cache = nv50_ir::CACHE_CG;2694else2695ld->cache = tgsi.getCacheMode();2696if (ind)2697ld->setIndirect(0, 1, ind);2698}2699break;2700default: {2701r = remapImageId(r);2702getImageCoords(off, 1);2703def.resize(4);27042705for (c = 0; c < 4; ++c) {2706if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))2707def[c] = getScratch();2708else2709def[c] = dst0[c];2710}27112712bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;2713if (bindless)2714ind = fetchSrc(0, 0);27152716TexInstruction *ld =2717mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);2718ld->tex.mask = tgsi.getDst(0).getMask();2719ld->tex.format = tgsi.getImageFormat();2720ld->cache = tgsi.getCacheMode();2721ld->tex.bindless = bindless;2722if (!bindless)2723ld->tex.r = r;2724if (ind)2725ld->setIndirectR(ind);27262727FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)2728if (dst0[c] != def[c])2729mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);2730break;2731}2732}273327342735/* Keep this around for now as reference when adding img support2736getResourceCoords(off, r, 1);27372738if (isResourceRaw(code, r)) {2739uint8_t mask = 0;2740uint8_t comp[2] = { 0, 0 };2741uint8_t size[2] = { 0, 0 };27422743Symbol *base = getResourceBase(r);27442745// determine the base and size of the at most 2 load ops2746for (c = 0; c < 4; ++c)2747if (!tgsi.getDst(0).isMasked(c))2748mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);27492750int n = partitionLoadStore(comp, size, mask);27512752src = off;27532754def.resize(4); // index by component, the ones we need will be non-NULL2755for (c = 0; c < 4; ++c) {2756if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))2757def[c] = dst0[c];2758else2759if (mask & (1 << c))2760def[c] = getScratch();2761}27622763const bool useLd = isResourceSpecial(r) ||2764(info->io.nv50styleSurfaces &&2765code->resources[r].target == TGSI_TEXTURE_BUFFER);27662767for (int i = 0; i < n; ++i) {2768ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);27692770if (comp[i]) // adjust x component of source address if necessary2771src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),2772off[0], mkImm(comp[i] * 4));2773else2774src[0] = off[0];27752776if (useLd) {2777Instruction *ld =2778mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);2779for (size_t c = 1; c < ldv.size(); ++c)2780ld->setDef(c, ldv[c]);2781} else {2782mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,27830, ldv, src)->dType = typeOfSize(size[i] * 4);2784}2785}2786} else {2787def.resize(4);2788for (c = 0; c < 4; ++c) {2789if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))2790def[c] = getScratch();2791else2792def[c] = dst0[c];2793}27942795mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,2796def, off);2797}2798FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)2799if (dst0[c] != def[c])2800mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);2801*/2802}28032804// For formatted stores, the write mask on OP_SUSTP can be used.2805// Raw stores have to be split.2806void2807Converter::handleSTORE()2808{2809int r = tgsi.getDst(0).getIndex(0);2810int c;2811std::vector<Value *> off, src, dummy;2812Value *ind = NULL;28132814if (tgsi.getDst(0).isIndirect(0))2815ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);28162817switch (tgsi.getDst(0).getFile()) {2818case TGSI_FILE_BUFFER:2819r = remapBufferId(r);2820/* fallthrough */2821case TGSI_FILE_MEMORY:2822for (c = 0; c < 4; ++c) {2823if (!(tgsi.getDst(0).getMask() & (1 << c)))2824continue;28252826Symbol *sym;2827Value *off;2828if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {2829off = NULL;2830sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,2831tgsi.getSrc(0).getValueU32(0, code->immd.data) + 4 * c);2832} else {2833// yzw are ignored for buffers2834off = fetchSrc(0, 0);2835sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);2836}28372838Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));2839st->cache = tgsi.getCacheMode();2840if (ind)2841st->setIndirect(0, 1, ind);2842}2843break;2844default: {2845r = remapImageId(r);2846getImageCoords(off, 0);2847src = off;28482849FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)2850src.push_back(fetchSrc(1, c));28512852bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;2853if (bindless)2854ind = fetchDst(0, 0);28552856TexInstruction *st =2857mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);2858st->tex.mask = tgsi.getDst(0).getMask();2859st->tex.format = tgsi.getImageFormat();2860st->cache = tgsi.getCacheMode();2861st->tex.bindless = bindless;2862if (!bindless)2863st->tex.r = r;2864if (ind)2865st->setIndirectR(ind);28662867break;2868}2869}28702871/* Keep this around for now as reference when adding img support2872getResourceCoords(off, r, 0);2873src = off;2874const int s = src.size();28752876if (isResourceRaw(code, r)) {2877uint8_t comp[2] = { 0, 0 };2878uint8_t size[2] = { 0, 0 };28792880int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());28812882Symbol *base = getResourceBase(r);28832884const bool useSt = isResourceSpecial(r) ||2885(info->io.nv50styleSurfaces &&2886code->resources[r].target == TGSI_TEXTURE_BUFFER);28872888for (int i = 0; i < n; ++i) {2889if (comp[i]) // adjust x component of source address if necessary2890src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),2891off[0], mkImm(comp[i] * 4));2892else2893src[0] = off[0];28942895const DataType stTy = typeOfSize(size[i] * 4);28962897if (useSt) {2898Instruction *st =2899mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));2900for (c = 1; c < size[i]; ++c)2901st->setSrc(1 + c, fetchSrc(1, comp[i] + c));2902st->setIndirect(0, 0, src[0]);2903} else {2904// attach values to be stored2905src.resize(s + size[i]);2906for (c = 0; c < size[i]; ++c)2907src[s + c] = fetchSrc(1, comp[i] + c);2908mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,29090, dummy, src)->setType(stTy);2910}2911}2912} else {2913FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)2914src.push_back(fetchSrc(1, c));29152916mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,2917dummy, src)->tex.mask = tgsi.getDst(0).getMask();2918}2919*/2920}29212922// XXX: These only work on resources with the single-component u32/s32 formats.2923// Therefore the result is replicated. This might not be intended by TGSI, but2924// operating on more than 1 component would produce undefined results because2925// they do not exist.2926void2927Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)2928{2929int r = tgsi.getSrc(0).getIndex(0);2930std::vector<Value *> srcv;2931std::vector<Value *> defv;2932LValue *dst = getScratch();2933Value *ind = NULL;29342935if (tgsi.getSrc(0).isIndirect(0))2936ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);29372938switch (tgsi.getSrc(0).getFile()) {2939case TGSI_FILE_BUFFER:2940r = remapBufferId(r);2941/* fallthrough */2942case TGSI_FILE_MEMORY:2943for (int c = 0; c < 4; ++c) {2944if (!dst0[c])2945continue;29462947Instruction *insn;2948Value *off = fetchSrc(1, c);2949Value *sym;2950if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)2951sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,2952tgsi.getSrc(1).getValueU32(c, code->immd.data));2953else2954sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);2955if (subOp == NV50_IR_SUBOP_ATOM_CAS)2956insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));2957else2958insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));2959if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)2960insn->setIndirect(0, 0, off);2961if (ind)2962insn->setIndirect(0, 1, ind);2963insn->subOp = subOp;2964}2965for (int c = 0; c < 4; ++c)2966if (dst0[c])2967dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov2968break;2969default: {2970r = remapImageId(r);2971getImageCoords(srcv, 1);2972defv.push_back(dst);2973srcv.push_back(fetchSrc(2, 0));29742975if (subOp == NV50_IR_SUBOP_ATOM_CAS)2976srcv.push_back(fetchSrc(3, 0));29772978bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;2979if (bindless)2980ind = fetchSrc(0, 0);29812982TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),29830, 0, defv, srcv);2984tex->subOp = subOp;2985tex->tex.mask = 1;2986tex->tex.format = tgsi.getImageFormat();2987tex->setType(ty);2988tex->tex.bindless = bindless;2989if (!bindless)2990tex->tex.r = r;2991if (ind)2992tex->setIndirectR(ind);29932994for (int c = 0; c < 4; ++c)2995if (dst0[c])2996dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov2997break;2998}2999}30003001/* Keep this around for now as reference when adding img support3002getResourceCoords(srcv, r, 1);30033004if (isResourceSpecial(r)) {3005assert(r != TGSI_RESOURCE_INPUT);3006Instruction *insn;3007insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));3008insn->subOp = subOp;3009if (subOp == NV50_IR_SUBOP_ATOM_CAS)3010insn->setSrc(2, fetchSrc(3, 0));3011insn->setIndirect(0, 0, srcv.at(0));3012} else {3013operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;3014TexTarget targ = getResourceTarget(code, r);3015int idx = code->resources[r].slot;3016defv.push_back(dst);3017srcv.push_back(fetchSrc(2, 0));3018if (subOp == NV50_IR_SUBOP_ATOM_CAS)3019srcv.push_back(fetchSrc(3, 0));3020TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);3021tex->subOp = subOp;3022tex->tex.mask = 1;3023tex->setType(ty);3024}30253026for (int c = 0; c < 4; ++c)3027if (dst0[c])3028dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov3029*/3030}30313032void3033Converter::handleINTERP(Value *dst[4])3034{3035// Check whether the input is linear. All other attributes ignored.3036Instruction *insn;3037Value *offset = NULL, *ptr = NULL, *w = NULL;3038Symbol *sym[4] = { NULL };3039bool linear;3040operation op = OP_NOP;3041int c, mode = 0;30423043tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);30443045// In some odd cases, in large part due to varying packing, the source3046// might not actually be an input. This is illegal TGSI, but it's easier to3047// account for it here than it is to fix it where the TGSI is being3048// generated. In that case, it's going to be a straight up mov (or sequence3049// of mov's) from the input in question. We follow the mov chain to see3050// which input we need to use.3051if (src.getFile() != TGSI_FILE_INPUT) {3052if (src.isIndirect(0)) {3053ERROR("Ignoring indirect input interpolation\n");3054return;3055}3056FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3057Value *val = fetchSrc(0, c);3058assert(val->defs.size() == 1);3059insn = val->getInsn();3060while (insn->op == OP_MOV) {3061assert(insn->getSrc(0)->defs.size() == 1);3062insn = insn->getSrc(0)->getInsn();3063if (!insn) {3064ERROR("Miscompiling shader due to unhandled INTERP\n");3065return;3066}3067}3068if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {3069ERROR("Trying to interpolate non-input, this is not allowed.\n");3070return;3071}3072sym[c] = insn->getSrc(0)->asSym();3073assert(sym[c]);3074op = insn->op;3075mode = insn->ipa;3076ptr = insn->getIndirect(0, 0);3077}3078} else {3079if (src.isIndirect(0))3080ptr = shiftAddress(fetchSrc(src.getIndirect(0), 0, NULL));30813082// We can assume that the fixed index will point to an input of the same3083// interpolation type in case of an indirect.3084// TODO: Make use of ArrayID.3085linear = info_out->in[src.getIndex(0)].linear;3086if (linear) {3087op = OP_LINTERP;3088mode = NV50_IR_INTERP_LINEAR;3089} else {3090op = OP_PINTERP;3091mode = NV50_IR_INTERP_PERSPECTIVE;3092}3093}30943095switch (tgsi.getOpcode()) {3096case TGSI_OPCODE_INTERP_CENTROID:3097mode |= NV50_IR_INTERP_CENTROID;3098break;3099case TGSI_OPCODE_INTERP_SAMPLE: {3100// When using a non-MS buffer, we're supposed to always use the center3101// (i.e. sample 0). This adds a SELP which will be always true or false3102// based on a data fixup.3103Value *sample = getScratch();3104mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), fetchSrc(1, 0), mkImm(0))3105->subOp = 2;31063107insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), sample);3108insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;3109mode |= NV50_IR_INTERP_OFFSET;3110break;3111}3112case TGSI_OPCODE_INTERP_OFFSET: {3113// The input in src1.xy is float, but we need a single 32-bit value3114// where the upper and lower 16 bits are encoded in S0.12 format. We need3115// to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,3116// and then convert to s32.3117Value *offs[2];3118for (c = 0; c < 2; c++) {3119offs[c] = getScratch();3120mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));3121mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));3122mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));3123mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);3124}3125offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),3126offs[1], mkImm(0x1010), offs[0]);3127mode |= NV50_IR_INTERP_OFFSET;3128break;3129}3130}31313132if (op == OP_PINTERP) {3133if (offset) {3134w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);3135mkOp1(OP_RCP, TYPE_F32, w, w);3136} else {3137w = fragCoord[3];3138}3139}314031413142FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3143insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));3144if (op == OP_PINTERP)3145insn->setSrc(1, w);3146if (offset)3147insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);3148if (ptr)3149insn->setIndirect(0, 0, ptr);31503151insn->setInterpolate(mode);3152}3153}31543155bool3156Converter::isEndOfSubroutine(uint ip)3157{3158assert(ip < code->scan.num_instructions);3159tgsi::Instruction insn(&code->insns[ip]);3160return (insn.getOpcode() == TGSI_OPCODE_END ||3161insn.getOpcode() == TGSI_OPCODE_ENDSUB ||3162// does END occur at end of main or the very end ?3163insn.getOpcode() == TGSI_OPCODE_BGNSUB);3164}31653166bool3167Converter::handleInstruction(const struct tgsi_full_instruction *insn)3168{3169Instruction *geni;31703171Value *dst0[4], *rDst0[4];3172Value *src0, *src1, *src2, *src3;3173Value *val0 = NULL, *val1 = NULL;3174int c;31753176tgsi = tgsi::Instruction(insn);31773178bool useScratchDst = tgsi.checkDstSrcAliasing();31793180operation op = tgsi.getOP();3181dstTy = tgsi.inferDstType();3182srcTy = tgsi.inferSrcType();31833184unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;31853186if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {3187for (c = 0; c < 4; ++c) {3188rDst0[c] = acquireDst(0, c);3189dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];3190}3191}31923193switch (tgsi.getOpcode()) {3194case TGSI_OPCODE_ADD:3195case TGSI_OPCODE_UADD:3196case TGSI_OPCODE_AND:3197case TGSI_OPCODE_DIV:3198case TGSI_OPCODE_IDIV:3199case TGSI_OPCODE_UDIV:3200case TGSI_OPCODE_MAX:3201case TGSI_OPCODE_MIN:3202case TGSI_OPCODE_IMAX:3203case TGSI_OPCODE_IMIN:3204case TGSI_OPCODE_UMAX:3205case TGSI_OPCODE_UMIN:3206case TGSI_OPCODE_MOD:3207case TGSI_OPCODE_UMOD:3208case TGSI_OPCODE_MUL:3209case TGSI_OPCODE_UMUL:3210case TGSI_OPCODE_IMUL_HI:3211case TGSI_OPCODE_UMUL_HI:3212case TGSI_OPCODE_OR:3213case TGSI_OPCODE_SHL:3214case TGSI_OPCODE_ISHR:3215case TGSI_OPCODE_USHR:3216case TGSI_OPCODE_XOR:3217FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3218src0 = fetchSrc(0, c);3219src1 = fetchSrc(1, c);3220geni = mkOp2(op, dstTy, dst0[c], src0, src1);3221geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());3222if (op == OP_MUL && dstTy == TYPE_F32)3223geni->dnz = info->io.mul_zero_wins;3224geni->precise = insn->Instruction.Precise;3225}3226break;3227case TGSI_OPCODE_MAD:3228case TGSI_OPCODE_UMAD:3229case TGSI_OPCODE_FMA:3230FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3231src0 = fetchSrc(0, c);3232src1 = fetchSrc(1, c);3233src2 = fetchSrc(2, c);3234geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);3235if (dstTy == TYPE_F32)3236geni->dnz = info->io.mul_zero_wins;3237geni->precise = insn->Instruction.Precise;3238}3239break;3240case TGSI_OPCODE_MOV:3241case TGSI_OPCODE_CEIL:3242case TGSI_OPCODE_FLR:3243case TGSI_OPCODE_TRUNC:3244case TGSI_OPCODE_RCP:3245case TGSI_OPCODE_SQRT:3246case TGSI_OPCODE_IABS:3247case TGSI_OPCODE_INEG:3248case TGSI_OPCODE_NOT:3249case TGSI_OPCODE_DDX:3250case TGSI_OPCODE_DDY:3251case TGSI_OPCODE_DDX_FINE:3252case TGSI_OPCODE_DDY_FINE:3253FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3254mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));3255break;3256case TGSI_OPCODE_RSQ:3257src0 = fetchSrc(0, 0);3258val0 = getScratch();3259mkOp1(OP_ABS, TYPE_F32, val0, src0);3260mkOp1(OP_RSQ, TYPE_F32, val0, val0);3261FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3262mkMov(dst0[c], val0);3263break;3264case TGSI_OPCODE_ARL:3265case TGSI_OPCODE_ARR:3266FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3267const RoundMode rnd =3268tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;3269src0 = fetchSrc(0, c);3270mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;3271}3272break;3273case TGSI_OPCODE_UARL:3274FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3275mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));3276break;3277case TGSI_OPCODE_POW:3278val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));3279FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3280mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);3281break;3282case TGSI_OPCODE_EX2:3283case TGSI_OPCODE_LG2:3284val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);3285FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3286mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);3287break;3288case TGSI_OPCODE_COS:3289case TGSI_OPCODE_SIN:3290val0 = getScratch();3291if (mask & 7) {3292mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));3293mkOp1(op, TYPE_F32, val0, val0);3294for (c = 0; c < 3; ++c)3295if (dst0[c])3296mkMov(dst0[c], val0);3297}3298if (dst0[3]) {3299mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));3300mkOp1(op, TYPE_F32, dst0[3], val0);3301}3302break;3303case TGSI_OPCODE_EXP:3304src0 = fetchSrc(0, 0);3305val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);3306if (dst0[1])3307mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);3308if (dst0[0])3309mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);3310if (dst0[2])3311mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);3312if (dst0[3])3313loadImm(dst0[3], 1.0f);3314break;3315case TGSI_OPCODE_LOG:3316src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));3317val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);3318if (dst0[0] || dst0[1])3319val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);3320if (dst0[1]) {3321mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);3322mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);3323mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)3324->dnz = info->io.mul_zero_wins;3325}3326if (dst0[3])3327loadImm(dst0[3], 1.0f);3328break;3329case TGSI_OPCODE_DP2:3330val0 = buildDot(2);3331FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3332mkMov(dst0[c], val0);3333break;3334case TGSI_OPCODE_DP3:3335val0 = buildDot(3);3336FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3337mkMov(dst0[c], val0);3338break;3339case TGSI_OPCODE_DP4:3340val0 = buildDot(4);3341FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3342mkMov(dst0[c], val0);3343break;3344case TGSI_OPCODE_DST:3345if (dst0[0])3346loadImm(dst0[0], 1.0f);3347if (dst0[1]) {3348src0 = fetchSrc(0, 1);3349src1 = fetchSrc(1, 1);3350mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)3351->dnz = info->io.mul_zero_wins;3352}3353if (dst0[2])3354mkMov(dst0[2], fetchSrc(0, 2));3355if (dst0[3])3356mkMov(dst0[3], fetchSrc(1, 3));3357break;3358case TGSI_OPCODE_LRP:3359FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3360src0 = fetchSrc(0, c);3361src1 = fetchSrc(1, c);3362src2 = fetchSrc(2, c);3363mkOp3(OP_MAD, TYPE_F32, dst0[c],3364mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)3365->dnz = info->io.mul_zero_wins;3366}3367break;3368case TGSI_OPCODE_LIT:3369handleLIT(dst0);3370break;3371case TGSI_OPCODE_ISSG:3372case TGSI_OPCODE_SSG:3373FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3374src0 = fetchSrc(0, c);3375val0 = getScratch();3376val1 = getScratch();3377mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);3378mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);3379if (srcTy == TYPE_F32)3380mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);3381else3382mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);3383}3384break;3385case TGSI_OPCODE_UCMP:3386srcTy = TYPE_U32;3387FALLTHROUGH;3388case TGSI_OPCODE_CMP:3389FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3390src0 = fetchSrc(0, c);3391src1 = fetchSrc(1, c);3392src2 = fetchSrc(2, c);3393if (src1 == src2)3394mkMov(dst0[c], src1);3395else3396mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,3397srcTy, dst0[c], srcTy, src1, src2, src0);3398}3399break;3400case TGSI_OPCODE_FRC:3401FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3402src0 = fetchSrc(0, c);3403val0 = getScratch();3404mkOp1(OP_FLOOR, TYPE_F32, val0, src0);3405mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);3406}3407break;3408case TGSI_OPCODE_ROUND:3409FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3410mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))3411->rnd = ROUND_NI;3412break;3413case TGSI_OPCODE_SLT:3414case TGSI_OPCODE_SGE:3415case TGSI_OPCODE_SEQ:3416case TGSI_OPCODE_SGT:3417case TGSI_OPCODE_SLE:3418case TGSI_OPCODE_SNE:3419case TGSI_OPCODE_FSEQ:3420case TGSI_OPCODE_FSGE:3421case TGSI_OPCODE_FSLT:3422case TGSI_OPCODE_FSNE:3423case TGSI_OPCODE_ISGE:3424case TGSI_OPCODE_ISLT:3425case TGSI_OPCODE_USEQ:3426case TGSI_OPCODE_USGE:3427case TGSI_OPCODE_USLT:3428case TGSI_OPCODE_USNE:3429FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3430src0 = fetchSrc(0, c);3431src1 = fetchSrc(1, c);3432mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);3433}3434break;3435case TGSI_OPCODE_VOTE_ALL:3436case TGSI_OPCODE_VOTE_ANY:3437case TGSI_OPCODE_VOTE_EQ:3438val0 = new_LValue(func, FILE_PREDICATE);3439FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3440mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);3441mkOp1(op, dstTy, val0, val0)3442->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());3443mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);3444}3445break;3446case TGSI_OPCODE_BALLOT:3447if (!tgsi.getDst(0).isMasked(0)) {3448val0 = new_LValue(func, FILE_PREDICATE);3449mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);3450mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;3451}3452if (!tgsi.getDst(0).isMasked(1))3453mkMov(dst0[1], zero, TYPE_U32);3454break;3455case TGSI_OPCODE_READ_FIRST:3456// ReadFirstInvocationARB(src) is implemented as3457// ReadInvocationARB(src, findLSB(ballot(true)))3458val0 = getScratch();3459mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;3460mkOp1(OP_BREV, TYPE_U32, val0, val0);3461mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;3462src1 = val0;3463FALLTHROUGH;3464case TGSI_OPCODE_READ_INVOC:3465if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)3466src1 = fetchSrc(1, 0);3467else3468src1 = val0;3469FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3470geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));3471geni->subOp = NV50_IR_SUBOP_SHFL_IDX;3472}3473break;3474case TGSI_OPCODE_CLOCK:3475// Stick the 32-bit clock into the high dword of the logical result.3476if (!tgsi.getDst(0).isMasked(0))3477mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);3478if (!tgsi.getDst(0).isMasked(1))3479mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;3480break;3481case TGSI_OPCODE_READ_HELPER:3482if (!tgsi.getDst(0).isMasked(0))3483mkOp1(OP_RDSV, TYPE_U32, dst0[0], mkSysVal(SV_THREAD_KILL, 0))3484->fixed = 1;3485break;3486case TGSI_OPCODE_KILL_IF:3487val0 = new_LValue(func, FILE_PREDICATE);3488mask = 0;3489for (c = 0; c < 4; ++c) {3490const int s = tgsi.getSrc(0).getSwizzle(c);3491if (mask & (1 << s))3492continue;3493mask |= 1 << s;3494mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);3495mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);3496}3497break;3498case TGSI_OPCODE_KILL:3499case TGSI_OPCODE_DEMOTE:3500// TODO: Should we make KILL exit that invocation? Some old shaders3501// don't like that.3502mkOp(OP_DISCARD, TYPE_NONE, NULL);3503break;3504case TGSI_OPCODE_TEX:3505case TGSI_OPCODE_TEX_LZ:3506case TGSI_OPCODE_TXB:3507case TGSI_OPCODE_TXL:3508case TGSI_OPCODE_TXP:3509case TGSI_OPCODE_LODQ:3510// R S L C Dx Dy3511handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);3512break;3513case TGSI_OPCODE_TXD:3514handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);3515break;3516case TGSI_OPCODE_TG4:3517handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);3518break;3519case TGSI_OPCODE_TEX2:3520handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);3521break;3522case TGSI_OPCODE_TXB2:3523case TGSI_OPCODE_TXL2:3524handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);3525break;3526case TGSI_OPCODE_SAMPLE:3527case TGSI_OPCODE_SAMPLE_B:3528case TGSI_OPCODE_SAMPLE_D:3529case TGSI_OPCODE_SAMPLE_L:3530case TGSI_OPCODE_SAMPLE_C:3531case TGSI_OPCODE_SAMPLE_C_LZ:3532handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);3533break;3534case TGSI_OPCODE_TXF_LZ:3535case TGSI_OPCODE_TXF:3536handleTXF(dst0, 1, 0x03);3537break;3538case TGSI_OPCODE_SAMPLE_I:3539handleTXF(dst0, 1, 0x03);3540break;3541case TGSI_OPCODE_SAMPLE_I_MS:3542handleTXF(dst0, 1, 0x20);3543break;3544case TGSI_OPCODE_TXQ:3545case TGSI_OPCODE_SVIEWINFO:3546handleTXQ(dst0, TXQ_DIMS, 1);3547break;3548case TGSI_OPCODE_TXQS:3549// The TXQ_TYPE query returns samples in its 3rd arg, but we need it to3550// be in .x3551dst0[1] = dst0[2] = dst0[3] = NULL;3552std::swap(dst0[0], dst0[2]);3553handleTXQ(dst0, TXQ_TYPE, 0);3554std::swap(dst0[0], dst0[2]);3555break;3556case TGSI_OPCODE_FBFETCH:3557handleFBFETCH(dst0);3558break;3559case TGSI_OPCODE_F2I:3560case TGSI_OPCODE_F2U:3561FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3562mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;3563break;3564case TGSI_OPCODE_I2F:3565case TGSI_OPCODE_U2F:3566FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3567mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));3568break;3569case TGSI_OPCODE_PK2H:3570val0 = getScratch();3571val1 = getScratch();3572mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));3573mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));3574FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)3575mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);3576break;3577case TGSI_OPCODE_UP2H:3578src0 = fetchSrc(0, 0);3579FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3580geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);3581geni->subOp = c & 1;3582}3583break;3584case TGSI_OPCODE_EMIT:3585/* export the saved viewport index */3586if (viewport != NULL) {3587Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,3588info_out->out[info->io.viewportId].slot[0] * 4);3589mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);3590}3591/* handle user clip planes for each emitted vertex */3592if (info_out->io.genUserClip > 0)3593handleUserClipPlanes();3594FALLTHROUGH;3595case TGSI_OPCODE_ENDPRIM:3596{3597// get vertex stream (must be immediate)3598unsigned int stream = tgsi.getSrc(0).getValueU32(0, code->immd.data);3599if (stream && op == OP_RESTART)3600break;3601if (info_out->prop.gp.maxVertices == 0)3602break;3603src0 = mkImm(stream);3604mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;3605break;3606}3607case TGSI_OPCODE_IF:3608case TGSI_OPCODE_UIF:3609{3610BasicBlock *ifBB = new BasicBlock(func);36113612bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);3613condBBs.push(bb);3614joinBBs.push(bb);36153616mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);36173618setPosition(ifBB, true);3619}3620break;3621case TGSI_OPCODE_ELSE:3622{3623BasicBlock *elseBB = new BasicBlock(func);3624BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);36253626forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);3627condBBs.push(bb);36283629forkBB->getExit()->asFlow()->target.bb = elseBB;3630if (!bb->isTerminated())3631mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);36323633setPosition(elseBB, true);3634}3635break;3636case TGSI_OPCODE_ENDIF:3637{3638BasicBlock *convBB = new BasicBlock(func);3639BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);3640BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);36413642if (!bb->isTerminated()) {3643// we only want join if none of the clauses ended with CONT/BREAK/RET3644if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)3645insertConvergenceOps(convBB, forkBB);3646mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);3647bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);3648}36493650if (prevBB->getExit()->op == OP_BRA) {3651prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);3652prevBB->getExit()->asFlow()->target.bb = convBB;3653}3654setPosition(convBB, true);3655}3656break;3657case TGSI_OPCODE_BGNLOOP:3658{3659BasicBlock *lbgnBB = new BasicBlock(func);3660BasicBlock *lbrkBB = new BasicBlock(func);36613662loopBBs.push(lbgnBB);3663breakBBs.push(lbrkBB);3664if (loopBBs.getSize() > func->loopNestingBound)3665func->loopNestingBound++;36663667mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);36683669bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);3670setPosition(lbgnBB, true);3671mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);3672}3673break;3674case TGSI_OPCODE_ENDLOOP:3675{3676BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);36773678if (!bb->isTerminated()) {3679mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);3680bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);3681}3682setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);36833684// If the loop never breaks (e.g. only has RET's inside), then there3685// will be no way to get to the break bb. However BGNLOOP will have3686// already made a PREBREAK to it, so it must be in the CFG.3687if (getBB()->cfg.incidentCount() == 0)3688loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);3689}3690break;3691case TGSI_OPCODE_BRK:3692{3693if (bb->isTerminated())3694break;3695BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);3696mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);3697bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);3698}3699break;3700case TGSI_OPCODE_CONT:3701{3702if (bb->isTerminated())3703break;3704BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);3705mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);3706contBB->explicitCont = true;3707bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);3708}3709break;3710case TGSI_OPCODE_BGNSUB:3711{3712Subroutine *s = getSubroutine(ip);3713BasicBlock *entry = new BasicBlock(s->f);3714BasicBlock *leave = new BasicBlock(s->f);37153716// multiple entrypoints possible, keep the graph connected3717if (prog->getType() == Program::TYPE_COMPUTE)3718prog->main->call.attach(&s->f->call, Graph::Edge::TREE);37193720sub.cur = s;3721s->f->setEntry(entry);3722s->f->setExit(leave);3723setPosition(entry, true);3724return true;3725}3726case TGSI_OPCODE_ENDSUB:3727{3728sub.cur = getSubroutine(prog->main);3729setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);3730return true;3731}3732case TGSI_OPCODE_CAL:3733{3734Subroutine *s = getSubroutine(tgsi.getLabel());3735mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);3736func->call.attach(&s->f->call, Graph::Edge::TREE);3737return true;3738}3739case TGSI_OPCODE_RET:3740{3741if (bb->isTerminated())3742return true;3743BasicBlock *leave = BasicBlock::get(func->cfgExit);37443745if (!isEndOfSubroutine(ip + 1)) {3746// insert a PRERET at the entry if this is an early return3747// (only needed for sharing code in the epilogue)3748BasicBlock *root = BasicBlock::get(func->cfg.getRoot());3749if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {3750BasicBlock *pos = getBB();3751setPosition(root, false);3752mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;3753setPosition(pos, true);3754}3755}3756mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;3757bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);3758}3759break;3760case TGSI_OPCODE_END:3761{3762// attach and generate epilogue code3763BasicBlock *epilogue = BasicBlock::get(func->cfgExit);3764bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);3765setPosition(epilogue, true);3766if (prog->getType() == Program::TYPE_FRAGMENT)3767exportOutputs();3768if ((prog->getType() == Program::TYPE_VERTEX ||3769prog->getType() == Program::TYPE_TESSELLATION_EVAL3770) && info_out->io.genUserClip > 0)3771handleUserClipPlanes();3772mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;3773}3774break;3775case TGSI_OPCODE_SWITCH:3776case TGSI_OPCODE_CASE:3777ERROR("switch/case opcode encountered, should have been lowered\n");3778abort();3779break;3780case TGSI_OPCODE_LOAD:3781handleLOAD(dst0);3782break;3783case TGSI_OPCODE_STORE:3784handleSTORE();3785break;3786case TGSI_OPCODE_BARRIER:3787geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));3788geni->fixed = 1;3789geni->subOp = NV50_IR_SUBOP_BAR_SYNC;3790break;3791case TGSI_OPCODE_MEMBAR:3792{3793uint32_t level = tgsi.getSrc(0).getValueU32(0, code->immd.data);3794geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);3795geni->fixed = 1;3796if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))3797geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);3798else3799geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);3800}3801break;3802case TGSI_OPCODE_ATOMUADD:3803case TGSI_OPCODE_ATOMXCHG:3804case TGSI_OPCODE_ATOMCAS:3805case TGSI_OPCODE_ATOMAND:3806case TGSI_OPCODE_ATOMOR:3807case TGSI_OPCODE_ATOMXOR:3808case TGSI_OPCODE_ATOMUMIN:3809case TGSI_OPCODE_ATOMIMIN:3810case TGSI_OPCODE_ATOMUMAX:3811case TGSI_OPCODE_ATOMIMAX:3812case TGSI_OPCODE_ATOMFADD:3813case TGSI_OPCODE_ATOMDEC_WRAP:3814case TGSI_OPCODE_ATOMINC_WRAP:3815handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));3816break;3817case TGSI_OPCODE_RESQ:3818if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {3819Value *ind = NULL;3820if (tgsi.getSrc(0).isIndirect(0))3821ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);3822geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],3823makeSym(tgsi.getSrc(0).getFile(),3824tgsi.getSrc(0).getIndex(0), -1, 0, 0));3825if (ind)3826geni->setIndirect(0, 1, ind);3827} else {3828TexInstruction *texi = new_TexInstruction(func, OP_SUQ);3829for (int c = 0, d = 0; c < 4; ++c) {3830if (dst0[c]) {3831texi->setDef(d++, dst0[c]);3832texi->tex.mask |= 1 << c;3833}3834}3835if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {3836texi->tex.r = tgsi.getSrc(0).getIndex(0);3837if (tgsi.getSrc(0).isIndirect(0))3838texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));3839} else {3840texi->tex.bindless = true;3841texi->setIndirectR(fetchSrc(0, 0));3842}3843texi->tex.target = tgsi.getImageTarget();38443845bb->insertTail(texi);3846}3847break;3848case TGSI_OPCODE_IBFE:3849case TGSI_OPCODE_UBFE:3850FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3851src0 = fetchSrc(0, c);3852val0 = getScratch();3853if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&3854tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {3855loadImm(val0, (tgsi.getSrc(2).getValueU32(c, code->immd.data) << 8) |3856tgsi.getSrc(1).getValueU32(c, code->immd.data));3857} else {3858src1 = fetchSrc(1, c);3859src2 = fetchSrc(2, c);3860mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);3861}3862mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);3863}3864break;3865case TGSI_OPCODE_BFI:3866FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3867src0 = fetchSrc(0, c);3868src1 = fetchSrc(1, c);3869src2 = fetchSrc(2, c);3870src3 = fetchSrc(3, c);3871val0 = getScratch();3872mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);3873mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);3874}3875break;3876case TGSI_OPCODE_LSB:3877FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3878src0 = fetchSrc(0, c);3879val0 = getScratch();3880mkOp1(OP_BREV, TYPE_U32, val0, src0);3881geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);3882geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;3883}3884break;3885case TGSI_OPCODE_IMSB:3886case TGSI_OPCODE_UMSB:3887FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3888src0 = fetchSrc(0, c);3889mkOp1(OP_BFIND, srcTy, dst0[c], src0);3890}3891break;3892case TGSI_OPCODE_BREV:3893FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3894src0 = fetchSrc(0, c);3895mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);3896}3897break;3898case TGSI_OPCODE_POPC:3899FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3900src0 = fetchSrc(0, c);3901mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);3902}3903break;3904case TGSI_OPCODE_INTERP_CENTROID:3905case TGSI_OPCODE_INTERP_SAMPLE:3906case TGSI_OPCODE_INTERP_OFFSET:3907handleINTERP(dst0);3908break;3909case TGSI_OPCODE_I642F:3910case TGSI_OPCODE_U642F:3911case TGSI_OPCODE_D2I:3912case TGSI_OPCODE_D2U:3913case TGSI_OPCODE_D2F: {3914int pos = 0;3915FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3916Value *dreg = getSSA(8);3917src0 = fetchSrc(0, pos);3918src1 = fetchSrc(0, pos + 1);3919mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);3920Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);3921if (!isFloatType(dstTy))3922cvt->rnd = ROUND_Z;3923pos += 2;3924}3925break;3926}3927case TGSI_OPCODE_I2I64:3928FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3929dst0[c] = fetchSrc(0, c / 2);3930mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));3931c++;3932}3933break;3934case TGSI_OPCODE_U2I64:3935FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3936dst0[c] = fetchSrc(0, c / 2);3937dst0[c + 1] = zero;3938c++;3939}3940break;3941case TGSI_OPCODE_F2I64:3942case TGSI_OPCODE_F2U64:3943case TGSI_OPCODE_I2D:3944case TGSI_OPCODE_U2D:3945case TGSI_OPCODE_F2D:3946FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3947Value *dreg = getSSA(8);3948Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));3949if (!isFloatType(dstTy))3950cvt->rnd = ROUND_Z;3951mkSplit(&dst0[c], 4, dreg);3952c++;3953}3954break;3955case TGSI_OPCODE_D2I64:3956case TGSI_OPCODE_D2U64:3957case TGSI_OPCODE_I642D:3958case TGSI_OPCODE_U642D:3959FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3960src0 = getSSA(8);3961Value *dst = getSSA(8), *tmp[2];3962tmp[0] = fetchSrc(0, c);3963tmp[1] = fetchSrc(0, c + 1);3964mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);3965Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);3966if (!isFloatType(dstTy))3967cvt->rnd = ROUND_Z;3968mkSplit(&dst0[c], 4, dst);3969c++;3970}3971break;3972case TGSI_OPCODE_I64NEG:3973FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3974src0 = getSSA(8);3975Value *dst = getSSA(8), *tmp[2];3976tmp[0] = fetchSrc(0, c);3977tmp[1] = fetchSrc(0, c + 1);3978mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);3979mkOp2(OP_SUB, dstTy, dst, zero, src0);3980mkSplit(&dst0[c], 4, dst);3981c++;3982}3983break;3984case TGSI_OPCODE_I64ABS:3985FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {3986src0 = getSSA(8);3987Value *neg = getSSA(8), *srcComp[2], *negComp[2];3988srcComp[0] = fetchSrc(0, c);3989srcComp[1] = fetchSrc(0, c + 1);3990mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);3991mkOp2(OP_SUB, dstTy, neg, zero, src0);3992mkSplit(negComp, 4, neg);3993mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,3994negComp[0], srcComp[0], srcComp[1]);3995mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,3996negComp[1], srcComp[1], srcComp[1]);3997c++;3998}3999break;4000case TGSI_OPCODE_DABS:4001case TGSI_OPCODE_DNEG:4002case TGSI_OPCODE_DRCP:4003case TGSI_OPCODE_DSQRT:4004case TGSI_OPCODE_DRSQ:4005case TGSI_OPCODE_DTRUNC:4006case TGSI_OPCODE_DCEIL:4007case TGSI_OPCODE_DFLR:4008FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4009src0 = getSSA(8);4010Value *dst = getSSA(8), *tmp[2];4011tmp[0] = fetchSrc(0, c);4012tmp[1] = fetchSrc(0, c + 1);4013mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4014mkOp1(op, dstTy, dst, src0);4015mkSplit(&dst0[c], 4, dst);4016c++;4017}4018break;4019case TGSI_OPCODE_DFRAC:4020FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4021src0 = getSSA(8);4022Value *dst = getSSA(8), *tmp[2];4023tmp[0] = fetchSrc(0, c);4024tmp[1] = fetchSrc(0, c + 1);4025mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4026mkOp1(OP_FLOOR, TYPE_F64, dst, src0);4027mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);4028mkSplit(&dst0[c], 4, dst);4029c++;4030}4031break;4032case TGSI_OPCODE_U64SEQ:4033case TGSI_OPCODE_U64SNE:4034case TGSI_OPCODE_U64SLT:4035case TGSI_OPCODE_U64SGE:4036case TGSI_OPCODE_I64SLT:4037case TGSI_OPCODE_I64SGE:4038case TGSI_OPCODE_DSLT:4039case TGSI_OPCODE_DSGE:4040case TGSI_OPCODE_DSEQ:4041case TGSI_OPCODE_DSNE: {4042int pos = 0;4043FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4044Value *tmp[2];40454046src0 = getSSA(8);4047src1 = getSSA(8);4048tmp[0] = fetchSrc(0, pos);4049tmp[1] = fetchSrc(0, pos + 1);4050mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4051tmp[0] = fetchSrc(1, pos);4052tmp[1] = fetchSrc(1, pos + 1);4053mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);4054mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);4055pos += 2;4056}4057break;4058}4059case TGSI_OPCODE_U64MIN:4060case TGSI_OPCODE_U64MAX:4061case TGSI_OPCODE_I64MIN:4062case TGSI_OPCODE_I64MAX: {4063dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;4064FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4065Value *flag = getSSA(1, FILE_FLAGS);4066src0 = fetchSrc(0, c + 1);4067src1 = fetchSrc(1, c + 1);4068geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);4069geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;4070geni->setFlagsDef(1, flag);40714072src0 = fetchSrc(0, c);4073src1 = fetchSrc(1, c);4074geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);4075geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;4076geni->setFlagsSrc(2, flag);40774078c++;4079}4080break;4081}4082case TGSI_OPCODE_U64SHL:4083case TGSI_OPCODE_I64SHR:4084case TGSI_OPCODE_U64SHR:4085FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4086src0 = getSSA(8);4087Value *dst = getSSA(8), *tmp[2];4088tmp[0] = fetchSrc(0, c);4089tmp[1] = fetchSrc(0, c + 1);4090mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4091// Theoretically src1 is a 64-bit value but in practice only the low4092// bits matter. The IR expects this to be a 32-bit value.4093src1 = fetchSrc(1, c);4094mkOp2(op, dstTy, dst, src0, src1);4095mkSplit(&dst0[c], 4, dst);4096c++;4097}4098break;4099case TGSI_OPCODE_U64ADD:4100case TGSI_OPCODE_U64MUL:4101case TGSI_OPCODE_DADD:4102case TGSI_OPCODE_DMUL:4103case TGSI_OPCODE_DDIV:4104case TGSI_OPCODE_DMAX:4105case TGSI_OPCODE_DMIN:4106FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4107src0 = getSSA(8);4108src1 = getSSA(8);4109Value *dst = getSSA(8), *tmp[2];4110tmp[0] = fetchSrc(0, c);4111tmp[1] = fetchSrc(0, c + 1);4112mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4113tmp[0] = fetchSrc(1, c);4114tmp[1] = fetchSrc(1, c + 1);4115mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);4116mkOp2(op, dstTy, dst, src0, src1);4117mkSplit(&dst0[c], 4, dst);4118c++;4119}4120break;4121case TGSI_OPCODE_DMAD:4122case TGSI_OPCODE_DFMA:4123FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4124src0 = getSSA(8);4125src1 = getSSA(8);4126src2 = getSSA(8);4127Value *dst = getSSA(8), *tmp[2];4128tmp[0] = fetchSrc(0, c);4129tmp[1] = fetchSrc(0, c + 1);4130mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4131tmp[0] = fetchSrc(1, c);4132tmp[1] = fetchSrc(1, c + 1);4133mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);4134tmp[0] = fetchSrc(2, c);4135tmp[1] = fetchSrc(2, c + 1);4136mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);4137mkOp3(op, dstTy, dst, src0, src1, src2);4138mkSplit(&dst0[c], 4, dst);4139c++;4140}4141break;4142case TGSI_OPCODE_DROUND:4143FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4144src0 = getSSA(8);4145Value *dst = getSSA(8), *tmp[2];4146tmp[0] = fetchSrc(0, c);4147tmp[1] = fetchSrc(0, c + 1);4148mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);4149mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)4150->rnd = ROUND_NI;4151mkSplit(&dst0[c], 4, dst);4152c++;4153}4154break;4155case TGSI_OPCODE_DSSG:4156FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4157src0 = getSSA(8);4158Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];4159tmp[0] = fetchSrc(0, c);4160tmp[1] = fetchSrc(0, c + 1);4161mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);41624163val0 = getScratch();4164val1 = getScratch();4165// The zero is wrong here since it's only 32-bit, but it works out in4166// the end since it gets replaced with $r63.4167mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);4168mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);4169mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);4170mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);4171mkSplit(&dst0[c], 4, dst);4172c++;4173}4174break;4175case TGSI_OPCODE_I64SSG:4176FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {4177src0 = getSSA(8);4178Value *tmp[2];4179tmp[0] = fetchSrc(0, c);4180tmp[1] = fetchSrc(0, c + 1);4181mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);41824183val0 = getScratch();4184val1 = getScratch();4185mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);4186mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);4187mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);4188mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));4189c++;4190}4191break;4192default:4193ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());4194assert(0);4195break;4196}41974198if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {4199for (c = 0; c < 4; ++c) {4200if (!dst0[c])4201continue;4202if (dst0[c] != rDst0[c])4203mkMov(rDst0[c], dst0[c]);4204storeDst(0, c, rDst0[c]);4205}4206}4207vtxBaseValid = 0;42084209return true;4210}42114212void4213Converter::exportOutputs()4214{4215if (info->io.alphaRefBase) {4216for (unsigned int i = 0; i < info_out->numOutputs; ++i) {4217if (info_out->out[i].sn != TGSI_SEMANTIC_COLOR ||4218info_out->out[i].si != 0)4219continue;4220const unsigned int c = 3;4221if (!oData.exists(sub.cur->values, i, c))4222continue;4223Value *val = oData.load(sub.cur->values, i, c, NULL);4224if (!val)4225continue;42264227Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,4228TYPE_U32, info->io.alphaRefBase);4229Value *pred = new_LValue(func, FILE_PREDICATE);4230mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,4231mkLoadv(TYPE_U32, ref, NULL))4232->subOp = 1;4233mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);4234}4235}42364237for (unsigned int i = 0; i < info_out->numOutputs; ++i) {4238for (unsigned int c = 0; c < 4; ++c) {4239if (!oData.exists(sub.cur->values, i, c))4240continue;4241Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,4242info_out->out[i].slot[c] * 4);4243Value *val = oData.load(sub.cur->values, i, c, NULL);4244if (val) {4245if (info_out->out[i].sn == TGSI_SEMANTIC_POSITION)4246mkOp1(OP_SAT, TYPE_F32, val, val);4247mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);4248}4249}4250}4251}42524253Converter::Converter(Program *ir, const tgsi::Source *code, nv50_ir_prog_info_out *info_out)4254: ConverterCommon(ir, code->info, info_out),4255code(code),4256tgsi(NULL),4257tData(this), lData(this), aData(this), oData(this)4258{4259const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);4260const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);4261const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);42624263tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);4264lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);4265aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);4266oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);42674268zero = mkImm((uint32_t)0);42694270vtxBaseValid = 0;4271}42724273Converter::~Converter()4274{4275}42764277inline const Converter::Location *4278Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)4279{4280ValueMap::l_iterator it = s->values.l.find(v);4281return it == s->values.l.end() ? NULL : &it->second;4282}42834284template<typename T> inline void4285Converter::BindArgumentsPass::updateCallArgs(4286Instruction *i, void (Instruction::*setArg)(int, Value *),4287T (Function::*proto))4288{4289Function *g = i->asFlow()->target.fn;4290Subroutine *subg = conv.getSubroutine(g);42914292for (unsigned a = 0; a < (g->*proto).size(); ++a) {4293Value *v = (g->*proto)[a].get();4294const Converter::Location &l = *getValueLocation(subg, v);4295Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);42964297(i->*setArg)(a, array->acquire(sub->values, l.i, l.c));4298}4299}43004301template<typename T> inline void4302Converter::BindArgumentsPass::updatePrototype(4303BitSet *set, void (Function::*updateSet)(), T (Function::*proto))4304{4305(func->*updateSet)();43064307for (unsigned i = 0; i < set->getSize(); ++i) {4308Value *v = func->getLValue(i);4309const Converter::Location *l = getValueLocation(sub, v);43104311// only include values with a matching TGSI register4312if (set->test(i) && l && !conv.code->locals.count(*l))4313(func->*proto).push_back(v);4314}4315}43164317bool4318Converter::BindArgumentsPass::visit(Function *f)4319{4320sub = conv.getSubroutine(f);43214322for (ArrayList::Iterator bi = f->allBBlocks.iterator();4323!bi.end(); bi.next()) {4324for (Instruction *i = BasicBlock::get(bi)->getFirst();4325i; i = i->next) {4326if (i->op == OP_CALL && !i->asFlow()->builtin) {4327updateCallArgs(i, &Instruction::setSrc, &Function::ins);4328updateCallArgs(i, &Instruction::setDef, &Function::outs);4329}4330}4331}43324333if (func == prog->main /* && prog->getType() != Program::TYPE_COMPUTE */)4334return true;4335updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,4336&Function::buildLiveSets, &Function::ins);4337updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,4338&Function::buildDefSets, &Function::outs);43394340return true;4341}43424343bool4344Converter::run()4345{4346BasicBlock *entry = new BasicBlock(prog->main);4347BasicBlock *leave = new BasicBlock(prog->main);43484349prog->main->setEntry(entry);4350prog->main->setExit(leave);43514352setPosition(entry, true);4353sub.cur = getSubroutine(prog->main);43544355if (info_out->io.genUserClip > 0) {4356for (int c = 0; c < 4; ++c)4357clipVtx[c] = getScratch();4358}43594360switch (prog->getType()) {4361case Program::TYPE_TESSELLATION_CONTROL:4362outBase = mkOp2v(4363OP_SUB, TYPE_U32, getSSA(),4364mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),4365mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));4366break;4367case Program::TYPE_FRAGMENT: {4368Symbol *sv = mkSysVal(SV_POSITION, 3);4369fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);4370mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);4371break;4372}4373default:4374break;4375}43764377if (info->io.viewportId >= 0)4378viewport = getScratch();4379else4380viewport = NULL;43814382for (ip = 0; ip < code->scan.num_instructions; ++ip) {4383if (!handleInstruction(&code->insns[ip]))4384return false;4385}43864387if (!BindArgumentsPass(*this).run(prog))4388return false;43894390return true;4391}43924393} // unnamed namespace43944395namespace nv50_ir {43964397bool4398Program::makeFromTGSI(struct nv50_ir_prog_info *info,4399struct nv50_ir_prog_info_out *info_out)4400{4401tgsi::Source src(info, info_out, this);4402if (!src.scanSource())4403return false;4404tlsSize = info_out->bin.tlsSpace;44054406Converter builder(this, &src, info_out);4407return builder.run();4408}44094410} // namespace nv50_ir441144124413