Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "codegen/nv50_ir.h"23#include "codegen/nv50_ir_target_nv50.h"2425namespace nv50_ir {2627#define NV50_OP_ENC_LONG 028#define NV50_OP_ENC_SHORT 129#define NV50_OP_ENC_IMM 230#define NV50_OP_ENC_LONG_ALT 33132class CodeEmitterNV50 : public CodeEmitter33{34public:35CodeEmitterNV50(Program::Type, const TargetNV50 *);3637virtual bool emitInstruction(Instruction *);3839virtual uint32_t getMinEncodingSize(const Instruction *) const;4041virtual void prepareEmission(Function *);4243private:44Program::Type progType;4546const TargetNV50 *targNV50;4748private:49inline void defId(const ValueDef&, const int pos);50inline void srcId(const ValueRef&, const int pos);51inline void srcId(const ValueRef *, const int pos);5253inline void srcAddr16(const ValueRef&, bool adj, const int pos);54inline void srcAddr8(const ValueRef&, const int pos);5556void emitFlagsRd(const Instruction *);57void emitFlagsWr(const Instruction *);5859void emitCondCode(CondCode cc, DataType ty, int pos);6061inline void setARegBits(unsigned int);6263void setAReg16(const Instruction *, int s);64void setImmediate(const Instruction *, int s);6566void setDst(const Value *);67void setDst(const Instruction *, int d);68void setSrcFileBits(const Instruction *, int enc);69void setSrc(const Instruction *, unsigned int s, int slot);7071void emitForm_MAD(const Instruction *);72void emitForm_ADD(const Instruction *);73void emitForm_MUL(const Instruction *);74void emitForm_IMM(const Instruction *);7576void emitLoadStoreSizeLG(DataType ty, int pos);77void emitLoadStoreSizeCS(DataType ty);7879void roundMode_MAD(const Instruction *);80void roundMode_CVT(RoundMode);8182void emitMNeg12(const Instruction *);8384void emitLOAD(const Instruction *);85void emitSTORE(const Instruction *);86void emitMOV(const Instruction *);87void emitRDSV(const Instruction *);88void emitNOP();89void emitINTERP(const Instruction *);90void emitPFETCH(const Instruction *);91void emitOUT(const Instruction *);9293void emitUADD(const Instruction *);94void emitAADD(const Instruction *);95void emitFADD(const Instruction *);96void emitDADD(const Instruction *);97void emitIMUL(const Instruction *);98void emitFMUL(const Instruction *);99void emitDMUL(const Instruction *);100void emitFMAD(const Instruction *);101void emitDMAD(const Instruction *);102void emitIMAD(const Instruction *);103void emitISAD(const Instruction *);104105void emitMINMAX(const Instruction *);106107void emitPreOp(const Instruction *);108void emitSFnOp(const Instruction *, uint8_t subOp);109110void emitShift(const Instruction *);111void emitARL(const Instruction *, unsigned int shl);112void emitLogicOp(const Instruction *);113void emitNOT(const Instruction *);114115void emitCVT(const Instruction *);116void emitSET(const Instruction *);117118void emitTEX(const TexInstruction *);119void emitTXQ(const TexInstruction *);120void emitTEXPREP(const TexInstruction *);121122void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);123124void emitFlow(const Instruction *, uint8_t flowOp);125void emitPRERETEmu(const FlowInstruction *);126void emitBAR(const Instruction *);127128void emitATOM(const Instruction *);129};130131#define SDATA(a) ((a).rep()->reg.data)132#define DDATA(a) ((a).rep()->reg.data)133134void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)135{136assert(src.get());137code[pos / 32] |= SDATA(src).id << (pos % 32);138}139140void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)141{142assert(src->get());143code[pos / 32] |= SDATA(*src).id << (pos % 32);144}145146void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)147{148assert(src.get());149150int32_t offset = SDATA(src).offset;151152assert(!adj || src.get()->reg.size <= 4);153if (adj)154offset /= src.get()->reg.size;155156assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);157158if (offset < 0)159offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;160161code[pos / 32] |= offset << (pos % 32);162}163164void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)165{166assert(src.get());167168uint32_t offset = SDATA(src).offset;169170assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));171172code[pos / 32] |= (offset >> 2) << (pos % 32);173}174175void CodeEmitterNV50::defId(const ValueDef& def, const int pos)176{177assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);178179code[pos / 32] |= DDATA(def).id << (pos % 32);180}181182void183CodeEmitterNV50::roundMode_MAD(const Instruction *insn)184{185switch (insn->rnd) {186case ROUND_M: code[1] |= 1 << 22; break;187case ROUND_P: code[1] |= 2 << 22; break;188case ROUND_Z: code[1] |= 3 << 22; break;189default:190assert(insn->rnd == ROUND_N);191break;192}193}194195void196CodeEmitterNV50::emitMNeg12(const Instruction *i)197{198code[1] |= i->src(0).mod.neg() << 26;199code[1] |= i->src(1).mod.neg() << 27;200}201202void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)203{204uint8_t enc;205206assert(pos >= 32 || pos <= 27);207208switch (cc) {209case CC_LT: enc = 0x1; break;210case CC_LTU: enc = 0x9; break;211case CC_EQ: enc = 0x2; break;212case CC_EQU: enc = 0xa; break;213case CC_LE: enc = 0x3; break;214case CC_LEU: enc = 0xb; break;215case CC_GT: enc = 0x4; break;216case CC_GTU: enc = 0xc; break;217case CC_NE: enc = 0x5; break;218case CC_NEU: enc = 0xd; break;219case CC_GE: enc = 0x6; break;220case CC_GEU: enc = 0xe; break;221case CC_TR: enc = 0xf; break;222case CC_FL: enc = 0x0; break;223224case CC_O: enc = 0x10; break;225case CC_C: enc = 0x11; break;226case CC_A: enc = 0x12; break;227case CC_S: enc = 0x13; break;228case CC_NS: enc = 0x1c; break;229case CC_NA: enc = 0x1d; break;230case CC_NC: enc = 0x1e; break;231case CC_NO: enc = 0x1f; break;232233default:234enc = 0;235assert(!"invalid condition code");236break;237}238if (ty != TYPE_NONE && !isFloatType(ty))239enc &= ~0x8; // unordered only exists for float types240241code[pos / 32] |= enc << (pos % 32);242}243244void245CodeEmitterNV50::emitFlagsRd(const Instruction *i)246{247int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;248249assert(!(code[1] & 0x00003f80));250251if (s >= 0) {252assert(i->getSrc(s)->reg.file == FILE_FLAGS);253emitCondCode(i->cc, TYPE_NONE, 32 + 7);254srcId(i->src(s), 32 + 12);255} else {256code[1] |= 0x0780;257}258}259260void261CodeEmitterNV50::emitFlagsWr(const Instruction *i)262{263assert(!(code[1] & 0x70));264265int flagsDef = i->flagsDef;266267// find flags definition and check that it is the last def268if (flagsDef < 0) {269for (int d = 0; i->defExists(d); ++d)270if (i->def(d).getFile() == FILE_FLAGS)271flagsDef = d;272if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point273WARN("Instruction::flagsDef was not set properly\n");274}275if (flagsDef == 0 && i->defExists(1))276WARN("flags def should not be the primary definition\n");277278if (flagsDef >= 0)279code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;280281}282283void284CodeEmitterNV50::setARegBits(unsigned int u)285{286code[0] |= (u & 3) << 26;287code[1] |= (u & 4);288}289290void291CodeEmitterNV50::setAReg16(const Instruction *i, int s)292{293if (i->srcExists(s)) {294s = i->src(s).indirect[0];295if (s >= 0)296setARegBits(SDATA(i->src(s)).id + 1);297}298}299300void301CodeEmitterNV50::setImmediate(const Instruction *i, int s)302{303const ImmediateValue *imm = i->src(s).get()->asImm();304assert(imm);305306uint32_t u = imm->reg.data.u32;307308if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))309u = ~u;310311code[1] |= 3;312code[0] |= (u & 0x3f) << 16;313code[1] |= (u >> 6) << 2;314}315316void317CodeEmitterNV50::setDst(const Value *dst)318{319const Storage *reg = &dst->join->reg;320321assert(reg->file != FILE_ADDRESS);322323if (reg->data.id < 0 || reg->file == FILE_FLAGS) {324code[0] |= (127 << 2) | 1;325code[1] |= 8;326} else {327int id;328if (reg->file == FILE_SHADER_OUTPUT) {329code[1] |= 8;330id = reg->data.offset / 4;331} else {332id = reg->data.id;333}334code[0] |= id << 2;335}336}337338void339CodeEmitterNV50::setDst(const Instruction *i, int d)340{341if (i->defExists(d)) {342setDst(i->getDef(d));343} else344if (!d) {345code[0] |= 0x01fc; // bit bucket346code[1] |= 0x0008;347}348}349350// 3 * 2 bits:351// 0: r352// 1: a/s353// 2: c354// 3: i355void356CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)357{358uint8_t mode = 0;359360for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {361switch (i->src(s).getFile()) {362case FILE_GPR:363break;364case FILE_MEMORY_SHARED:365case FILE_SHADER_INPUT:366mode |= 1 << (s * 2);367break;368case FILE_MEMORY_CONST:369mode |= 2 << (s * 2);370break;371case FILE_IMMEDIATE:372mode |= 3 << (s * 2);373break;374default:375ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());376assert(0);377break;378}379}380switch (mode) {381case 0x00: // rrr382break;383case 0x01: // arr/grr384if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {385code[0] |= 0x01800000;386if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)387code[1] |= 0x00200000;388} else {389if (enc == NV50_OP_ENC_SHORT)390code[0] |= 0x01000000;391else392code[1] |= 0x00200000;393}394break;395case 0x03: // irr396assert(i->op == OP_MOV);397return;398case 0x0c: // rir399break;400case 0x0d: // gir401assert(progType == Program::TYPE_GEOMETRY ||402progType == Program::TYPE_COMPUTE);403code[0] |= 0x01000000;404if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {405int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;406assert(reg < 3);407code[0] |= (reg + 1) << 26;408}409break;410case 0x08: // rcr411code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;412code[1] |= (i->getSrc(1)->reg.fileIndex << 22);413break;414case 0x09: // acr/gcr415if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {416code[0] |= 0x01800000;417} else {418code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;419code[1] |= 0x00200000;420}421code[1] |= (i->getSrc(1)->reg.fileIndex << 22);422break;423case 0x20: // rrc424code[0] |= 0x01000000;425code[1] |= (i->getSrc(2)->reg.fileIndex << 22);426break;427case 0x21: // arc428code[0] |= 0x01000000;429code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);430assert(progType != Program::TYPE_GEOMETRY);431break;432default:433ERROR("not encodable: %x\n", mode);434assert(0);435break;436}437if (progType != Program::TYPE_COMPUTE)438return;439440if ((mode & 3) == 1) {441const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;442443switch (i->sType) {444case TYPE_U8:445break;446case TYPE_U16:447code[0] |= 1 << pos;448break;449case TYPE_S16:450code[0] |= 2 << pos;451break;452default:453code[0] |= 3 << pos;454assert(i->getSrc(0)->reg.size == 4);455break;456}457}458}459460void461CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)462{463if (Target::operationSrcNr[i->op] <= s)464return;465const Storage *reg = &i->src(s).rep()->reg;466467unsigned int id = (reg->file == FILE_GPR) ?468reg->data.id :469reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here470471switch (slot) {472case 0: code[0] |= id << 9; break;473case 1: code[0] |= id << 16; break;474case 2: code[1] |= id << 14; break;475default:476assert(0);477break;478}479}480481// the default form:482// - long instruction483// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)484// - address & flags485void486CodeEmitterNV50::emitForm_MAD(const Instruction *i)487{488assert(i->encSize == 8);489code[0] |= 1;490491emitFlagsRd(i);492emitFlagsWr(i);493494setDst(i, 0);495496setSrcFileBits(i, NV50_OP_ENC_LONG);497setSrc(i, 0, 0);498setSrc(i, 1, 1);499setSrc(i, 2, 2);500501if (i->getIndirect(0, 0)) {502assert(!i->srcExists(1) || !i->getIndirect(1, 0));503assert(!i->srcExists(2) || !i->getIndirect(2, 0));504setAReg16(i, 0);505} else if (i->srcExists(1) && i->getIndirect(1, 0)) {506assert(!i->srcExists(2) || !i->getIndirect(2, 0));507setAReg16(i, 1);508} else {509setAReg16(i, 2);510}511}512513// like default form, but 2nd source in slot 2, and no 3rd source514void515CodeEmitterNV50::emitForm_ADD(const Instruction *i)516{517assert(i->encSize == 8);518code[0] |= 1;519520emitFlagsRd(i);521emitFlagsWr(i);522523setDst(i, 0);524525setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);526setSrc(i, 0, 0);527if (i->predSrc != 1)528setSrc(i, 1, 2);529530if (i->getIndirect(0, 0)) {531assert(!i->getIndirect(1, 0));532setAReg16(i, 0);533} else {534setAReg16(i, 1);535}536}537538// default short form (rr, ar, rc, gr)539void540CodeEmitterNV50::emitForm_MUL(const Instruction *i)541{542assert(i->encSize == 4 && !(code[0] & 1));543assert(i->defExists(0));544assert(!i->getPredicate());545546setDst(i, 0);547548setSrcFileBits(i, NV50_OP_ENC_SHORT);549setSrc(i, 0, 0);550setSrc(i, 1, 1);551}552553// usual immediate form554// - 1 to 3 sources where second is immediate (rir, gir)555// - no address or predicate possible556void557CodeEmitterNV50::emitForm_IMM(const Instruction *i)558{559assert(i->encSize == 8);560code[0] |= 1;561562assert(i->defExists(0) && i->srcExists(0));563564setDst(i, 0);565566setSrcFileBits(i, NV50_OP_ENC_IMM);567if (Target::operationSrcNr[i->op] > 1) {568setSrc(i, 0, 0);569setImmediate(i, 1);570// If there is another source, it has to be the same as the dest reg.571} else {572setImmediate(i, 0);573}574}575576void577CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)578{579uint8_t enc;580581switch (ty) {582case TYPE_F32: // fall through583case TYPE_S32: // fall through584case TYPE_U32: enc = 0x6; break;585case TYPE_B128: enc = 0x5; break;586case TYPE_F64: // fall through587case TYPE_S64: // fall through588case TYPE_U64: enc = 0x4; break;589case TYPE_S16: enc = 0x3; break;590case TYPE_U16: enc = 0x2; break;591case TYPE_S8: enc = 0x1; break;592case TYPE_U8: enc = 0x0; break;593default:594enc = 0;595assert(!"invalid load/store type");596break;597}598code[pos / 32] |= enc << (pos % 32);599}600601void602CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)603{604switch (ty) {605case TYPE_U8: break;606case TYPE_U16: code[1] |= 0x4000; break;607case TYPE_S16: code[1] |= 0x8000; break;608case TYPE_F32:609case TYPE_S32:610case TYPE_U32: code[1] |= 0xc000; break;611default:612assert(0);613break;614}615}616617void618CodeEmitterNV50::emitLOAD(const Instruction *i)619{620DataFile sf = i->src(0).getFile();621ASSERTED int32_t offset = i->getSrc(0)->reg.data.offset;622623switch (sf) {624case FILE_SHADER_INPUT:625if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))626code[0] = 0x11800001;627else628// use 'mov' where we can629code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;630code[1] = 0x00200000 | (i->lanes << 14);631if (typeSizeof(i->dType) == 4)632code[1] |= 0x04000000;633break;634case FILE_MEMORY_SHARED:635if (targ->getChipset() >= 0x84) {636assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));637code[0] = 0x10000001;638code[1] = 0x40000000;639640if (typeSizeof(i->dType) == 4)641code[1] |= 0x04000000;642643emitLoadStoreSizeCS(i->sType);644645if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)646code[1] |= 0x00800000;647} else {648assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));649code[0] = 0x10000001;650code[1] = 0x00200000 | (i->lanes << 14);651emitLoadStoreSizeCS(i->sType);652}653break;654case FILE_MEMORY_CONST:655code[0] = 0x10000001;656code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);657if (typeSizeof(i->dType) == 4)658code[1] |= 0x04000000;659emitLoadStoreSizeCS(i->sType);660break;661case FILE_MEMORY_LOCAL:662code[0] = 0xd0000001;663code[1] = 0x40000000;664break;665case FILE_MEMORY_GLOBAL:666code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);667code[1] = 0x80000000;668break;669default:670assert(!"invalid load source file");671break;672}673if (sf == FILE_MEMORY_LOCAL ||674sf == FILE_MEMORY_GLOBAL)675emitLoadStoreSizeLG(i->sType, 21 + 32);676677setDst(i, 0);678679emitFlagsRd(i);680emitFlagsWr(i);681682if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {683srcId(*i->src(0).getIndirect(0), 9);684} else {685setAReg16(i, 0);686srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);687}688}689690void691CodeEmitterNV50::emitSTORE(const Instruction *i)692{693DataFile f = i->getSrc(0)->reg.file;694int32_t offset = i->getSrc(0)->reg.data.offset;695696switch (f) {697case FILE_SHADER_OUTPUT:698code[0] = 0x00000001 | ((offset >> 2) << 9);699code[1] = 0x80c00000;700srcId(i->src(1), 32 + 14);701break;702case FILE_MEMORY_GLOBAL:703code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);704code[1] = 0xa0000000;705emitLoadStoreSizeLG(i->dType, 21 + 32);706srcId(i->src(1), 2);707break;708case FILE_MEMORY_LOCAL:709code[0] = 0xd0000001;710code[1] = 0x60000000;711emitLoadStoreSizeLG(i->dType, 21 + 32);712srcId(i->src(1), 2);713break;714case FILE_MEMORY_SHARED:715code[0] = 0x00000001;716code[1] = 0xe0000000;717if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)718code[1] |= 0x00800000;719switch (typeSizeof(i->dType)) {720case 1:721code[0] |= offset << 9;722code[1] |= 0x00400000;723break;724case 2:725code[0] |= (offset >> 1) << 9;726break;727case 4:728code[0] |= (offset >> 2) << 9;729code[1] |= 0x04200000;730break;731default:732assert(0);733break;734}735srcId(i->src(1), 32 + 14);736break;737default:738assert(!"invalid store destination file");739break;740}741742if (f == FILE_MEMORY_GLOBAL)743srcId(*i->src(0).getIndirect(0), 9);744else745setAReg16(i, 0);746747if (f == FILE_MEMORY_LOCAL)748srcAddr16(i->src(0), false, 9);749750emitFlagsRd(i);751}752753void754CodeEmitterNV50::emitMOV(const Instruction *i)755{756DataFile sf = i->getSrc(0)->reg.file;757DataFile df = i->getDef(0)->reg.file;758759assert(sf == FILE_GPR || df == FILE_GPR);760761if (sf == FILE_FLAGS) {762assert(i->flagsSrc >= 0);763code[0] = 0x00000001;764code[1] = 0x20000000;765defId(i->def(0), 2);766emitFlagsRd(i);767} else768if (sf == FILE_ADDRESS) {769code[0] = 0x00000001;770code[1] = 0x40000000;771defId(i->def(0), 2);772setARegBits(SDATA(i->src(0)).id + 1);773emitFlagsRd(i);774} else775if (df == FILE_FLAGS) {776assert(i->flagsDef >= 0);777code[0] = 0x00000001;778code[1] = 0xa0000000;779srcId(i->src(0), 9);780emitFlagsRd(i);781emitFlagsWr(i);782} else783if (sf == FILE_IMMEDIATE) {784code[0] = 0x10000001;785code[1] = 0x00000003;786emitForm_IMM(i);787788code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;789} else {790if (i->encSize == 4) {791code[0] = 0x10000000;792code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;793defId(i->def(0), 2);794} else {795code[0] = 0x10000001;796code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;797code[1] |= (i->lanes << 14);798setDst(i, 0);799emitFlagsRd(i);800}801srcId(i->src(0), 9);802}803if (df == FILE_SHADER_OUTPUT) {804assert(i->encSize == 8);805code[1] |= 0x8;806}807}808809static inline uint8_t getSRegEncoding(const ValueRef &ref)810{811switch (SDATA(ref).sv.sv) {812case SV_PHYSID: return 0;813case SV_CLOCK: return 1;814case SV_VERTEX_STRIDE: return 3;815// case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index;816case SV_SAMPLE_INDEX: return 8;817default:818assert(!"no sreg for system value");819return 0;820}821}822823void824CodeEmitterNV50::emitRDSV(const Instruction *i)825{826code[0] = 0x00000001;827code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);828defId(i->def(0), 2);829emitFlagsRd(i);830}831832void833CodeEmitterNV50::emitNOP()834{835code[0] = 0xf0000001;836code[1] = 0xe0000000;837}838839void840CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)841{842code[0] = 0xc0000000 | (lane << 16);843code[1] = 0x80000000;844845code[0] |= (quOp & 0x03) << 20;846code[1] |= (quOp & 0xfc) << 20;847848emitForm_ADD(i);849850if (!i->srcExists(1) || i->predSrc == 1)851srcId(i->src(0), 32 + 14);852}853854/* NOTE: This returns the base address of a vertex inside the primitive.855* src0 is an immediate, the index (not offset) of the vertex856* inside the primitive. XXX: signed or unsigned ?857* src1 (may be NULL) should use whatever units the hardware requires858* (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).859*/860void861CodeEmitterNV50::emitPFETCH(const Instruction *i)862{863const uint32_t prim = i->src(0).get()->reg.data.u32;864assert(prim <= 127);865866if (i->def(0).getFile() == FILE_ADDRESS) {867// shl $aX a[] 0868code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);869code[1] = 0xc0200000;870code[0] |= prim << 9;871assert(!i->srcExists(1));872} else873if (i->srcExists(1)) {874// ld b32 $rX a[$aX+base]875code[0] = 0x00000001;876code[1] = 0x04200000 | (0xf << 14);877defId(i->def(0), 2);878code[0] |= prim << 9;879setARegBits(SDATA(i->src(1)).id + 1);880} else {881// mov b32 $rX a[]882code[0] = 0x10000001;883code[1] = 0x04200000 | (0xf << 14);884defId(i->def(0), 2);885code[0] |= prim << 9;886}887emitFlagsRd(i);888}889890void891nv50_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)892{893int ipa = entry->ipa;894int encSize = entry->reg;895int loc = entry->loc;896897if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&898(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {899if (data.force_persample_interp) {900if (encSize == 8)901code[loc + 1] |= 1 << 16;902else903code[loc + 0] |= 1 << 24;904} else {905if (encSize == 8)906code[loc + 1] &= ~(1 << 16);907else908code[loc + 0] &= ~(1 << 24);909}910}911}912913void914CodeEmitterNV50::emitINTERP(const Instruction *i)915{916code[0] = 0x80000000;917918defId(i->def(0), 2);919srcAddr8(i->src(0), 16);920setAReg16(i, 0);921922if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {923code[0] |= 1 << 8;924} else {925if (i->op == OP_PINTERP) {926code[0] |= 1 << 25;927srcId(i->src(1), 9);928}929if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)930code[0] |= 1 << 24;931}932933if (i->encSize == 8) {934if (i->getInterpMode() == NV50_IR_INTERP_FLAT)935code[1] = 4 << 16;936else937code[1] = (code[0] & (3 << 24)) >> (24 - 16);938code[0] &= ~0x03000000;939code[0] |= 1;940emitFlagsRd(i);941}942943addInterp(i->ipa, i->encSize, nv50_interpApply);944}945946void947CodeEmitterNV50::emitMINMAX(const Instruction *i)948{949if (i->dType == TYPE_F64) {950code[0] = 0xe0000000;951code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;952} else {953code[0] = 0x30000000;954code[1] = 0x80000000;955if (i->op == OP_MIN)956code[1] |= 0x20000000;957958switch (i->dType) {959case TYPE_F32: code[0] |= 0x80000000; break;960case TYPE_S32: code[1] |= 0x8c000000; break;961case TYPE_U32: code[1] |= 0x84000000; break;962case TYPE_S16: code[1] |= 0x80000000; break;963case TYPE_U16: break;964default:965assert(0);966break;967}968}969970code[1] |= i->src(0).mod.abs() << 20;971code[1] |= i->src(0).mod.neg() << 26;972code[1] |= i->src(1).mod.abs() << 19;973code[1] |= i->src(1).mod.neg() << 27;974975emitForm_MAD(i);976}977978void979CodeEmitterNV50::emitFMAD(const Instruction *i)980{981const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();982const int neg_add = i->src(2).mod.neg();983984code[0] = 0xe0000000;985986if (i->src(1).getFile() == FILE_IMMEDIATE) {987code[1] = 0;988emitForm_IMM(i);989code[0] |= neg_mul << 15;990code[0] |= neg_add << 22;991if (i->saturate)992code[0] |= 1 << 8;993} else994if (i->encSize == 4) {995emitForm_MUL(i);996code[0] |= neg_mul << 15;997code[0] |= neg_add << 22;998if (i->saturate)999code[0] |= 1 << 8;1000} else {1001code[1] = neg_mul << 26;1002code[1] |= neg_add << 27;1003if (i->saturate)1004code[1] |= 1 << 29;1005emitForm_MAD(i);1006}1007}10081009void1010CodeEmitterNV50::emitDMAD(const Instruction *i)1011{1012const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();1013const int neg_add = i->src(2).mod.neg();10141015assert(i->encSize == 8);1016assert(!i->saturate);10171018code[1] = 0x40000000;1019code[0] = 0xe0000000;10201021code[1] |= neg_mul << 26;1022code[1] |= neg_add << 27;10231024roundMode_MAD(i);10251026emitForm_MAD(i);1027}10281029void1030CodeEmitterNV50::emitFADD(const Instruction *i)1031{1032const int neg0 = i->src(0).mod.neg();1033const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);10341035code[0] = 0xb0000000;10361037assert(!(i->src(0).mod | i->src(1).mod).abs());10381039if (i->src(1).getFile() == FILE_IMMEDIATE) {1040code[1] = 0;1041emitForm_IMM(i);1042code[0] |= neg0 << 15;1043code[0] |= neg1 << 22;1044if (i->saturate)1045code[0] |= 1 << 8;1046} else1047if (i->encSize == 8) {1048code[1] = 0;1049emitForm_ADD(i);1050code[1] |= neg0 << 26;1051code[1] |= neg1 << 27;1052if (i->saturate)1053code[1] |= 1 << 29;1054} else {1055emitForm_MUL(i);1056code[0] |= neg0 << 15;1057code[0] |= neg1 << 22;1058if (i->saturate)1059code[0] |= 1 << 8;1060}1061}10621063void1064CodeEmitterNV50::emitDADD(const Instruction *i)1065{1066const int neg0 = i->src(0).mod.neg();1067const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);10681069assert(!(i->src(0).mod | i->src(1).mod).abs());1070assert(!i->saturate);1071assert(i->encSize == 8);10721073code[1] = 0x60000000;1074code[0] = 0xe0000000;10751076emitForm_ADD(i);10771078code[1] |= neg0 << 26;1079code[1] |= neg1 << 27;1080}10811082void1083CodeEmitterNV50::emitUADD(const Instruction *i)1084{1085const int neg0 = i->src(0).mod.neg();1086const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);10871088code[0] = 0x20000000;10891090if (i->src(1).getFile() == FILE_IMMEDIATE) {1091code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;1092code[1] = 0;1093emitForm_IMM(i);1094} else1095if (i->encSize == 8) {1096code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;1097emitForm_ADD(i);1098} else {1099code[0] |= (typeSizeof(i->dType) == 2) ? 0 : 0x00008000;1100emitForm_MUL(i);1101}1102assert(!(neg0 && neg1));1103code[0] |= neg0 << 28;1104code[0] |= neg1 << 22;11051106if (i->flagsSrc >= 0) {1107// addc == sub | subr1108assert(!(code[0] & 0x10400000) && !i->getPredicate());1109code[0] |= 0x10400000;1110srcId(i->src(i->flagsSrc), 32 + 12);1111}1112}11131114void1115CodeEmitterNV50::emitAADD(const Instruction *i)1116{1117const int s = (i->op == OP_MOV) ? 0 : 1;11181119code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);1120code[1] = 0x20000000;11211122code[0] |= (DDATA(i->def(0)).id + 1) << 2;11231124emitFlagsRd(i);11251126if (s && i->srcExists(0))1127setARegBits(SDATA(i->src(0)).id + 1);1128}11291130void1131CodeEmitterNV50::emitIMUL(const Instruction *i)1132{1133code[0] = 0x40000000;11341135if (i->src(1).getFile() == FILE_IMMEDIATE) {1136if (i->sType == TYPE_S16)1137code[0] |= 0x8100;1138code[1] = 0;1139emitForm_IMM(i);1140} else1141if (i->encSize == 8) {1142code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;1143emitForm_MAD(i);1144} else {1145if (i->sType == TYPE_S16)1146code[0] |= 0x8100;1147emitForm_MUL(i);1148}1149}11501151void1152CodeEmitterNV50::emitFMUL(const Instruction *i)1153{1154const int neg = (i->src(0).mod ^ i->src(1).mod).neg();11551156code[0] = 0xc0000000;11571158if (i->src(1).getFile() == FILE_IMMEDIATE) {1159code[1] = 0;1160emitForm_IMM(i);1161if (neg)1162code[0] |= 0x8000;1163if (i->saturate)1164code[0] |= 1 << 8;1165} else1166if (i->encSize == 8) {1167code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;1168if (neg)1169code[1] |= 0x08000000;1170if (i->saturate)1171code[1] |= 1 << 20;1172emitForm_MAD(i);1173} else {1174emitForm_MUL(i);1175if (neg)1176code[0] |= 0x8000;1177if (i->saturate)1178code[0] |= 1 << 8;1179}1180}11811182void1183CodeEmitterNV50::emitDMUL(const Instruction *i)1184{1185const int neg = (i->src(0).mod ^ i->src(1).mod).neg();11861187assert(!i->saturate);1188assert(i->encSize == 8);11891190code[1] = 0x80000000;1191code[0] = 0xe0000000;11921193if (neg)1194code[1] |= 0x08000000;11951196roundMode_CVT(i->rnd);11971198emitForm_MAD(i);1199}12001201void1202CodeEmitterNV50::emitIMAD(const Instruction *i)1203{1204int mode;1205code[0] = 0x60000000;12061207assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);1208if (!isSignedType(i->sType))1209mode = 0;1210else if (i->saturate)1211mode = 2;1212else1213mode = 1;12141215if (i->src(1).getFile() == FILE_IMMEDIATE) {1216code[1] = 0;1217emitForm_IMM(i);1218code[0] |= (mode & 1) << 8 | (mode & 2) << 14;1219if (i->flagsSrc >= 0) {1220assert(!(code[0] & 0x10400000));1221assert(SDATA(i->src(i->flagsSrc)).id == 0);1222code[0] |= 0x10400000;1223}1224} else1225if (i->encSize == 4) {1226emitForm_MUL(i);1227code[0] |= (mode & 1) << 8 | (mode & 2) << 14;1228if (i->flagsSrc >= 0) {1229assert(!(code[0] & 0x10400000));1230assert(SDATA(i->src(i->flagsSrc)).id == 0);1231code[0] |= 0x10400000;1232}1233} else {1234code[1] = mode << 29;1235emitForm_MAD(i);12361237if (i->flagsSrc >= 0) {1238// add with carry from $cX1239assert(!(code[1] & 0x0c000000) && !i->getPredicate());1240code[1] |= 0xc << 24;1241srcId(i->src(i->flagsSrc), 32 + 12);1242}1243}1244}12451246void1247CodeEmitterNV50::emitISAD(const Instruction *i)1248{1249if (i->encSize == 8) {1250code[0] = 0x50000000;1251switch (i->sType) {1252case TYPE_U32: code[1] = 0x04000000; break;1253case TYPE_S32: code[1] = 0x0c000000; break;1254case TYPE_U16: code[1] = 0x00000000; break;1255case TYPE_S16: code[1] = 0x08000000; break;1256default:1257assert(0);1258break;1259}1260emitForm_MAD(i);1261} else {1262switch (i->sType) {1263case TYPE_U32: code[0] = 0x50008000; break;1264case TYPE_S32: code[0] = 0x50008100; break;1265case TYPE_U16: code[0] = 0x50000000; break;1266case TYPE_S16: code[0] = 0x50000100; break;1267default:1268assert(0);1269break;1270}1271emitForm_MUL(i);1272}1273}12741275static void1276alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)1277{1278int loc = entry->loc;1279int enc;12801281switch (data.alphatest) {1282case PIPE_FUNC_NEVER: enc = 0x0; break;1283case PIPE_FUNC_LESS: enc = 0x1; break;1284case PIPE_FUNC_EQUAL: enc = 0x2; break;1285case PIPE_FUNC_LEQUAL: enc = 0x3; break;1286case PIPE_FUNC_GREATER: enc = 0x4; break;1287case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;1288case PIPE_FUNC_GEQUAL: enc = 0x6; break;1289default:1290case PIPE_FUNC_ALWAYS: enc = 0xf; break;1291}12921293code[loc + 1] &= ~(0x1f << 14);1294code[loc + 1] |= enc << 14;1295}12961297void1298CodeEmitterNV50::emitSET(const Instruction *i)1299{1300code[0] = 0x30000000;1301code[1] = 0x60000000;13021303switch (i->sType) {1304case TYPE_F64:1305code[0] = 0xe0000000;1306code[1] = 0xe0000000;1307break;1308case TYPE_F32: code[0] |= 0x80000000; break;1309case TYPE_S32: code[1] |= 0x0c000000; break;1310case TYPE_U32: code[1] |= 0x04000000; break;1311case TYPE_S16: code[1] |= 0x08000000; break;1312case TYPE_U16: break;1313default:1314assert(0);1315break;1316}13171318emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);13191320if (i->src(0).mod.neg()) code[1] |= 0x04000000;1321if (i->src(1).mod.neg()) code[1] |= 0x08000000;1322if (i->src(0).mod.abs()) code[1] |= 0x00100000;1323if (i->src(1).mod.abs()) code[1] |= 0x00080000;13241325emitForm_MAD(i);13261327if (i->subOp == 1) {1328addInterp(0, 0, alphatestSet);1329}1330}13311332void1333CodeEmitterNV50::roundMode_CVT(RoundMode rnd)1334{1335switch (rnd) {1336case ROUND_NI: code[1] |= 0x08000000; break;1337case ROUND_M: code[1] |= 0x00020000; break;1338case ROUND_MI: code[1] |= 0x08020000; break;1339case ROUND_P: code[1] |= 0x00040000; break;1340case ROUND_PI: code[1] |= 0x08040000; break;1341case ROUND_Z: code[1] |= 0x00060000; break;1342case ROUND_ZI: code[1] |= 0x08060000; break;1343default:1344assert(rnd == ROUND_N);1345break;1346}1347}13481349void1350CodeEmitterNV50::emitCVT(const Instruction *i)1351{1352const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);1353RoundMode rnd;1354DataType dType;13551356switch (i->op) {1357case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;1358case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;1359case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;1360default:1361rnd = i->rnd;1362break;1363}13641365if (i->op == OP_NEG && i->dType == TYPE_U32)1366dType = TYPE_S32;1367else1368dType = i->dType;13691370code[0] = 0xa0000000;13711372switch (dType) {1373case TYPE_F64:1374switch (i->sType) {1375case TYPE_F64: code[1] = 0xc4404000; break;1376case TYPE_S64: code[1] = 0x44414000; break;1377case TYPE_U64: code[1] = 0x44404000; break;1378case TYPE_F32: code[1] = 0xc4400000; break;1379case TYPE_S32: code[1] = 0x44410000; break;1380case TYPE_U32: code[1] = 0x44400000; break;1381default:1382assert(0);1383break;1384}1385break;1386case TYPE_S64:1387switch (i->sType) {1388case TYPE_F64: code[1] = 0x8c404000; break;1389case TYPE_F32: code[1] = 0x8c400000; break;1390default:1391assert(0);1392break;1393}1394break;1395case TYPE_U64:1396switch (i->sType) {1397case TYPE_F64: code[1] = 0x84404000; break;1398case TYPE_F32: code[1] = 0x84400000; break;1399default:1400assert(0);1401break;1402}1403break;1404case TYPE_F32:1405switch (i->sType) {1406case TYPE_F64: code[1] = 0xc0404000; break;1407case TYPE_S64: code[1] = 0x40414000; break;1408case TYPE_U64: code[1] = 0x40404000; break;1409case TYPE_F32: code[1] = 0xc4004000; break;1410case TYPE_S32: code[1] = 0x44014000; break;1411case TYPE_U32: code[1] = 0x44004000; break;1412case TYPE_F16: code[1] = 0xc4000000; break;1413case TYPE_U16: code[1] = 0x44000000; break;1414case TYPE_S16: code[1] = 0x44010000; break;1415case TYPE_S8: code[1] = 0x44018000; break;1416case TYPE_U8: code[1] = 0x44008000; break;1417default:1418assert(0);1419break;1420}1421break;1422case TYPE_S32:1423switch (i->sType) {1424case TYPE_F64: code[1] = 0x88404000; break;1425case TYPE_F32: code[1] = 0x8c004000; break;1426case TYPE_S32: code[1] = 0x0c014000; break;1427case TYPE_U32: code[1] = 0x0c004000; break;1428case TYPE_F16: code[1] = 0x8c000000; break;1429case TYPE_S16: code[1] = 0x0c010000; break;1430case TYPE_U16: code[1] = 0x0c000000; break;1431case TYPE_S8: code[1] = 0x0c018000; break;1432case TYPE_U8: code[1] = 0x0c008000; break;1433default:1434assert(0);1435break;1436}1437break;1438case TYPE_U32:1439switch (i->sType) {1440case TYPE_F64: code[1] = 0x80404000; break;1441case TYPE_F32: code[1] = 0x84004000; break;1442case TYPE_S32: code[1] = 0x04014000; break;1443case TYPE_U32: code[1] = 0x04004000; break;1444case TYPE_F16: code[1] = 0x84000000; break;1445case TYPE_S16: code[1] = 0x04010000; break;1446case TYPE_U16: code[1] = 0x04000000; break;1447case TYPE_S8: code[1] = 0x04018000; break;1448case TYPE_U8: code[1] = 0x04008000; break;1449default:1450assert(0);1451break;1452}1453break;1454case TYPE_F16:1455switch (i->sType) {1456case TYPE_F16: code[1] = 0xc0000000; break;1457case TYPE_F32: code[1] = 0xc0004000; break;1458default:1459assert(0);1460break;1461}1462break;1463case TYPE_S16:1464switch (i->sType) {1465case TYPE_F32: code[1] = 0x88004000; break;1466case TYPE_S32: code[1] = 0x08014000; break;1467case TYPE_U32: code[1] = 0x08004000; break;1468case TYPE_F16: code[1] = 0x88000000; break;1469case TYPE_S16: code[1] = 0x08010000; break;1470case TYPE_U16: code[1] = 0x08000000; break;1471case TYPE_S8: code[1] = 0x08018000; break;1472case TYPE_U8: code[1] = 0x08008000; break;1473default:1474assert(0);1475break;1476}1477break;1478case TYPE_U16:1479switch (i->sType) {1480case TYPE_F32: code[1] = 0x80004000; break;1481case TYPE_S32: code[1] = 0x00014000; break;1482case TYPE_U32: code[1] = 0x00004000; break;1483case TYPE_F16: code[1] = 0x80000000; break;1484case TYPE_S16: code[1] = 0x00010000; break;1485case TYPE_U16: code[1] = 0x00000000; break;1486case TYPE_S8: code[1] = 0x00018000; break;1487case TYPE_U8: code[1] = 0x00008000; break;1488default:1489assert(0);1490break;1491}1492break;1493case TYPE_S8:1494switch (i->sType) {1495case TYPE_S32: code[1] = 0x08094000; break;1496case TYPE_U32: code[1] = 0x08084000; break;1497case TYPE_F16: code[1] = 0x88080000; break;1498case TYPE_S16: code[1] = 0x08090000; break;1499case TYPE_U16: code[1] = 0x08080000; break;1500case TYPE_S8: code[1] = 0x08098000; break;1501case TYPE_U8: code[1] = 0x08088000; break;1502default:1503assert(0);1504break;1505}1506break;1507case TYPE_U8:1508switch (i->sType) {1509case TYPE_S32: code[1] = 0x00094000; break;1510case TYPE_U32: code[1] = 0x00084000; break;1511case TYPE_F16: code[1] = 0x80080000; break;1512case TYPE_S16: code[1] = 0x00090000; break;1513case TYPE_U16: code[1] = 0x00080000; break;1514case TYPE_S8: code[1] = 0x00098000; break;1515case TYPE_U8: code[1] = 0x00088000; break;1516default:1517assert(0);1518break;1519}1520break;1521default:1522assert(0);1523break;1524}1525if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)1526code[1] |= 0x00004000;15271528roundMode_CVT(rnd);15291530switch (i->op) {1531case OP_ABS: code[1] |= 1 << 20; break;1532case OP_SAT: code[1] |= 1 << 19; break;1533case OP_NEG: code[1] |= 1 << 29; break;1534default:1535break;1536}1537code[1] ^= i->src(0).mod.neg() << 29;1538code[1] |= i->src(0).mod.abs() << 20;1539if (i->saturate)1540code[1] |= 1 << 19;15411542assert(i->op != OP_ABS || !i->src(0).mod.neg());15431544emitForm_MAD(i);1545}15461547void1548CodeEmitterNV50::emitPreOp(const Instruction *i)1549{1550code[0] = 0xb0000000;1551code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;15521553code[1] |= i->src(0).mod.abs() << 20;1554code[1] |= i->src(0).mod.neg() << 26;15551556emitForm_MAD(i);1557}15581559void1560CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)1561{1562code[0] = 0x90000000;15631564if (i->encSize == 4) {1565assert(i->op == OP_RCP);1566assert(!i->saturate);1567code[0] |= i->src(0).mod.abs() << 15;1568code[0] |= i->src(0).mod.neg() << 22;1569emitForm_MUL(i);1570} else {1571code[1] = subOp << 29;1572code[1] |= i->src(0).mod.abs() << 20;1573code[1] |= i->src(0).mod.neg() << 26;1574if (i->saturate) {1575assert(subOp == 6 && i->op == OP_EX2);1576code[1] |= 1 << 27;1577}1578emitForm_MAD(i);1579}1580}15811582void1583CodeEmitterNV50::emitNOT(const Instruction *i)1584{1585code[0] = 0xd0000000;1586code[1] = 0x0002c000;15871588switch (i->sType) {1589case TYPE_U32:1590case TYPE_S32:1591code[1] |= 0x04000000;1592break;1593default:1594break;1595}1596emitForm_MAD(i);1597setSrc(i, 0, 1);1598}15991600void1601CodeEmitterNV50::emitLogicOp(const Instruction *i)1602{1603code[0] = 0xd0000000;1604code[1] = 0;16051606if (i->src(1).getFile() == FILE_IMMEDIATE) {1607switch (i->op) {1608case OP_OR: code[0] |= 0x0100; break;1609case OP_XOR: code[0] |= 0x8000; break;1610default:1611assert(i->op == OP_AND);1612break;1613}1614if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))1615code[0] |= 1 << 22;16161617emitForm_IMM(i);1618} else {1619switch (i->op) {1620case OP_AND: code[1] = 0x00000000; break;1621case OP_OR: code[1] = 0x00004000; break;1622case OP_XOR: code[1] = 0x00008000; break;1623default:1624assert(0);1625break;1626}1627if (typeSizeof(i->dType) == 4)1628code[1] |= 0x04000000;1629if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))1630code[1] |= 1 << 16;1631if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))1632code[1] |= 1 << 17;16331634emitForm_MAD(i);1635}1636}16371638void1639CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)1640{1641code[0] = 0x00000001 | (shl << 16);1642code[1] = 0xc0000000;16431644code[0] |= (DDATA(i->def(0)).id + 1) << 2;16451646setSrcFileBits(i, NV50_OP_ENC_IMM);1647setSrc(i, 0, 0);1648emitFlagsRd(i);1649}16501651void1652CodeEmitterNV50::emitShift(const Instruction *i)1653{1654if (i->def(0).getFile() == FILE_ADDRESS) {1655assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);1656emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);1657} else {1658code[0] = 0x30000001;1659code[1] = (i->op == OP_SHR) ? 0xe0000000 : 0xc0000000;1660if (typeSizeof(i->dType) == 4)1661code[1] |= 0x04000000;1662if (i->op == OP_SHR && isSignedType(i->sType))1663code[1] |= 1 << 27;16641665if (i->src(1).getFile() == FILE_IMMEDIATE) {1666code[1] |= 1 << 20;1667code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;1668defId(i->def(0), 2);1669srcId(i->src(0), 9);1670emitFlagsRd(i);1671} else {1672emitForm_MAD(i);1673}1674}1675}16761677void1678CodeEmitterNV50::emitOUT(const Instruction *i)1679{1680code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;1681code[1] = 0xc0000000;16821683emitFlagsRd(i);1684}16851686void1687CodeEmitterNV50::emitTEX(const TexInstruction *i)1688{1689code[0] = 0xf0000001;1690code[1] = 0x00000000;16911692switch (i->op) {1693case OP_TXB:1694code[1] = 0x20000000;1695break;1696case OP_TXL:1697code[1] = 0x40000000;1698break;1699case OP_TXF:1700code[0] |= 0x01000000;1701break;1702case OP_TXG:1703code[0] |= 0x01000000;1704code[1] = 0x80000000;1705break;1706case OP_TXLQ:1707code[1] = 0x60020000;1708break;1709default:1710assert(i->op == OP_TEX);1711break;1712}17131714code[0] |= i->tex.r << 9;1715code[0] |= i->tex.s << 17;17161717int argc = i->tex.target.getArgCount();17181719if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)1720argc += 1;1721if (i->tex.target.isShadow())1722argc += 1;1723assert(argc <= 4);17241725code[0] |= (argc - 1) << 22;17261727if (i->tex.target.isCube()) {1728code[0] |= 0x08000000;1729} else1730if (i->tex.useOffsets) {1731code[1] |= (i->tex.offset[0] & 0xf) << 24;1732code[1] |= (i->tex.offset[1] & 0xf) << 20;1733code[1] |= (i->tex.offset[2] & 0xf) << 16;1734}17351736code[0] |= (i->tex.mask & 0x3) << 25;1737code[1] |= (i->tex.mask & 0xc) << 12;17381739if (i->tex.liveOnly)1740code[1] |= 1 << 2;1741if (i->tex.derivAll)1742code[1] |= 1 << 3;17431744defId(i->def(0), 2);17451746emitFlagsRd(i);1747}17481749void1750CodeEmitterNV50::emitTXQ(const TexInstruction *i)1751{1752assert(i->tex.query == TXQ_DIMS);17531754code[0] = 0xf0000001;1755code[1] = 0x60000000;17561757code[0] |= i->tex.r << 9;1758code[0] |= i->tex.s << 17;17591760code[0] |= (i->tex.mask & 0x3) << 25;1761code[1] |= (i->tex.mask & 0xc) << 12;17621763defId(i->def(0), 2);17641765emitFlagsRd(i);1766}17671768void1769CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)1770{1771code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);1772code[1] = 0x60010000;17731774code[0] |= (i->tex.mask & 0x3) << 25;1775code[1] |= (i->tex.mask & 0xc) << 12;1776defId(i->def(0), 2);17771778emitFlagsRd(i);1779}17801781void1782CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)1783{1784uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */17851786code[0] = 0x10000003; // bra1787code[1] = 0x00000780; // always17881789switch (i->subOp) {1790case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call1791break;1792case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call1793pos += 8;1794break;1795default:1796assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));1797code[0] = 0x20000003; // call1798code[1] = 0x00000000; // no predicate1799break;1800}1801addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);1802addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);1803}18041805void1806CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)1807{1808const FlowInstruction *f = i->asFlow();1809bool hasPred = false;1810bool hasTarg = false;18111812code[0] = 0x00000003 | (flowOp << 28);1813code[1] = 0x00000000;18141815switch (i->op) {1816case OP_BRA:1817hasPred = true;1818hasTarg = true;1819break;1820case OP_BREAK:1821case OP_BRKPT:1822case OP_DISCARD:1823case OP_RET:1824hasPred = true;1825break;1826case OP_CALL:1827case OP_PREBREAK:1828case OP_JOINAT:1829hasTarg = true;1830break;1831case OP_PRERET:1832hasTarg = true;1833if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {1834emitPRERETEmu(f);1835return;1836}1837break;1838default:1839break;1840}18411842if (hasPred)1843emitFlagsRd(i);18441845if (hasTarg && f) {1846uint32_t pos;18471848if (f->op == OP_CALL) {1849if (f->builtin) {1850pos = targNV50->getBuiltinOffset(f->target.builtin);1851} else {1852pos = f->target.fn->binPos;1853}1854} else {1855pos = f->target.bb->binPos;1856}18571858code[0] |= ((pos >> 2) & 0xffff) << 11;1859code[1] |= ((pos >> 18) & 0x003f) << 14;18601861RelocEntry::Type relocTy;18621863relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;18641865addReloc(relocTy, 0, pos, 0x07fff800, 9);1866addReloc(relocTy, 1, pos, 0x000fc000, -4);1867}1868}18691870void1871CodeEmitterNV50::emitBAR(const Instruction *i)1872{1873ImmediateValue *barId = i->getSrc(0)->asImm();1874assert(barId);18751876code[0] = 0x82000003 | (barId->reg.data.u32 << 21);1877code[1] = 0x00004000;18781879if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)1880code[0] |= 1 << 26;1881}18821883void1884CodeEmitterNV50::emitATOM(const Instruction *i)1885{1886uint8_t subOp;1887switch (i->subOp) {1888case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break;1889case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break;1890case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break;1891case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break;1892case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break;1893case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break;1894case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break;1895case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break;1896case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break;1897case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;1898default:1899assert(!"invalid subop");1900return;1901}1902code[0] = 0xd0000001;1903code[1] = 0xc0c00000 | (subOp << 2);1904if (isSignedType(i->dType))1905code[1] |= 1 << 21;19061907// args1908emitFlagsRd(i);1909if (i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||1910i->subOp == NV50_IR_SUBOP_ATOM_CAS ||1911i->defExists(0)) {1912code[1] |= 0x20000000;1913setDst(i, 0);1914setSrc(i, 1, 1);1915// g[] pointer1916code[0] |= i->getSrc(0)->reg.fileIndex << 23;1917} else {1918srcId(i->src(1), 2);1919// g[] pointer1920code[0] |= i->getSrc(0)->reg.fileIndex << 16;1921}1922if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)1923setSrc(i, 2, 2);19241925srcId(i->getIndirect(0, 0), 9);1926}19271928bool1929CodeEmitterNV50::emitInstruction(Instruction *insn)1930{1931if (!insn->encSize) {1932ERROR("skipping unencodable instruction: "); insn->print();1933return false;1934} else1935if (codeSize + insn->encSize > codeSizeLimit) {1936ERROR("code emitter output buffer too small\n");1937return false;1938}19391940if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {1941INFO("EMIT: "); insn->print();1942}19431944switch (insn->op) {1945case OP_MOV:1946emitMOV(insn);1947break;1948case OP_EXIT:1949case OP_NOP:1950case OP_JOIN:1951emitNOP();1952break;1953case OP_VFETCH:1954case OP_LOAD:1955emitLOAD(insn);1956break;1957case OP_EXPORT:1958case OP_STORE:1959emitSTORE(insn);1960break;1961case OP_PFETCH:1962emitPFETCH(insn);1963break;1964case OP_RDSV:1965emitRDSV(insn);1966break;1967case OP_LINTERP:1968case OP_PINTERP:1969emitINTERP(insn);1970break;1971case OP_ADD:1972case OP_SUB:1973if (insn->dType == TYPE_F64)1974emitDADD(insn);1975else if (isFloatType(insn->dType))1976emitFADD(insn);1977else if (insn->getDef(0)->reg.file == FILE_ADDRESS)1978emitAADD(insn);1979else1980emitUADD(insn);1981break;1982case OP_MUL:1983if (insn->dType == TYPE_F64)1984emitDMUL(insn);1985else if (isFloatType(insn->dType))1986emitFMUL(insn);1987else1988emitIMUL(insn);1989break;1990case OP_MAD:1991case OP_FMA:1992if (insn->dType == TYPE_F64)1993emitDMAD(insn);1994else if (isFloatType(insn->dType))1995emitFMAD(insn);1996else1997emitIMAD(insn);1998break;1999case OP_SAD:2000emitISAD(insn);2001break;2002case OP_NOT:2003emitNOT(insn);2004break;2005case OP_AND:2006case OP_OR:2007case OP_XOR:2008emitLogicOp(insn);2009break;2010case OP_SHL:2011case OP_SHR:2012emitShift(insn);2013break;2014case OP_SET:2015emitSET(insn);2016break;2017case OP_MIN:2018case OP_MAX:2019emitMINMAX(insn);2020break;2021case OP_CEIL:2022case OP_FLOOR:2023case OP_TRUNC:2024case OP_ABS:2025case OP_NEG:2026case OP_SAT:2027emitCVT(insn);2028break;2029case OP_CVT:2030if (insn->def(0).getFile() == FILE_ADDRESS)2031emitARL(insn, 0);2032else2033if (insn->def(0).getFile() == FILE_FLAGS ||2034insn->src(0).getFile() == FILE_FLAGS ||2035insn->src(0).getFile() == FILE_ADDRESS)2036emitMOV(insn);2037else2038emitCVT(insn);2039break;2040case OP_RCP:2041emitSFnOp(insn, 0);2042break;2043case OP_RSQ:2044emitSFnOp(insn, 2);2045break;2046case OP_LG2:2047emitSFnOp(insn, 3);2048break;2049case OP_SIN:2050emitSFnOp(insn, 4);2051break;2052case OP_COS:2053emitSFnOp(insn, 5);2054break;2055case OP_EX2:2056emitSFnOp(insn, 6);2057break;2058case OP_PRESIN:2059case OP_PREEX2:2060emitPreOp(insn);2061break;2062case OP_TEX:2063case OP_TXB:2064case OP_TXL:2065case OP_TXF:2066case OP_TXG:2067case OP_TXLQ:2068emitTEX(insn->asTex());2069break;2070case OP_TXQ:2071emitTXQ(insn->asTex());2072break;2073case OP_TEXPREP:2074emitTEXPREP(insn->asTex());2075break;2076case OP_EMIT:2077case OP_RESTART:2078emitOUT(insn);2079break;2080case OP_DISCARD:2081emitFlow(insn, 0x0);2082break;2083case OP_BRA:2084emitFlow(insn, 0x1);2085break;2086case OP_CALL:2087emitFlow(insn, 0x2);2088break;2089case OP_RET:2090emitFlow(insn, 0x3);2091break;2092case OP_PREBREAK:2093emitFlow(insn, 0x4);2094break;2095case OP_BREAK:2096emitFlow(insn, 0x5);2097break;2098case OP_QUADON:2099emitFlow(insn, 0x6);2100break;2101case OP_QUADPOP:2102emitFlow(insn, 0x7);2103break;2104case OP_JOINAT:2105emitFlow(insn, 0xa);2106break;2107case OP_PRERET:2108emitFlow(insn, 0xd);2109break;2110case OP_QUADOP:2111emitQUADOP(insn, insn->lanes, insn->subOp);2112break;2113case OP_DFDX:2114emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);2115break;2116case OP_DFDY:2117emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);2118break;2119case OP_ATOM:2120emitATOM(insn);2121break;2122case OP_BAR:2123emitBAR(insn);2124break;2125case OP_PHI:2126case OP_UNION:2127case OP_CONSTRAINT:2128ERROR("operation should have been eliminated\n");2129return false;2130case OP_EXP:2131case OP_LOG:2132case OP_SQRT:2133case OP_POW:2134case OP_SELP:2135case OP_SLCT:2136case OP_TXD:2137case OP_PRECONT:2138case OP_CONT:2139case OP_POPCNT:2140case OP_INSBF:2141case OP_EXTBF:2142ERROR("operation should have been lowered\n");2143return false;2144default:2145ERROR("unknown op: %u\n", insn->op);2146return false;2147}2148if (insn->join || insn->op == OP_JOIN)2149code[1] |= 0x2;2150else2151if (insn->exit || insn->op == OP_EXIT)2152code[1] |= 0x1;21532154assert((insn->encSize == 8) == (code[0] & 1));21552156code += insn->encSize / 4;2157codeSize += insn->encSize;2158return true;2159}21602161uint32_t2162CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const2163{2164const Target::OpInfo &info = targ->getOpInfo(i);21652166if (info.minEncSize > 4 || i->dType == TYPE_F64)2167return 8;21682169// check constraints on dst and src operands2170for (int d = 0; i->defExists(d); ++d) {2171if (i->def(d).rep()->reg.data.id > 63 ||2172i->def(d).rep()->reg.file != FILE_GPR)2173return 8;2174}21752176for (int s = 0; i->srcExists(s); ++s) {2177DataFile sf = i->src(s).getFile();2178if (sf != FILE_GPR)2179if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)2180return 8;2181if (i->src(s).rep()->reg.data.id > 63)2182return 8;2183}21842185// check modifiers & rounding2186if (i->join || i->lanes != 0xf || i->exit)2187return 8;2188if (i->op == OP_MUL && i->rnd != ROUND_N)2189return 8;21902191if (i->asTex())2192return 8; // TODO: short tex encoding21932194// check constraints on short MAD2195if (info.srcNr >= 2 && i->srcExists(2)) {2196if (!i->defExists(0) ||2197(i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||2198DDATA(i->def(0)).id != SDATA(i->src(2)).id)2199return 8;2200}22012202return info.minEncSize;2203}22042205// Change the encoding size of an instruction after BBs have been scheduled.2206static void2207makeInstructionLong(Instruction *insn)2208{2209if (insn->encSize == 8)2210return;2211Function *fn = insn->bb->getFunction();2212int n = 0;2213int adj = 4;22142215for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);22162217if (n & 1) {2218adj = 8;2219insn->next->encSize = 8;2220} else2221if (insn->prev && insn->prev->encSize == 4) {2222adj = 8;2223insn->prev->encSize = 8;2224}2225insn->encSize = 8;22262227for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {2228fn->bbArray[i]->binPos += adj;2229}2230fn->binSize += adj;2231insn->bb->binSize += adj;2232}22332234static bool2235trySetExitModifier(Instruction *insn)2236{2237if (insn->op == OP_DISCARD ||2238insn->op == OP_QUADON ||2239insn->op == OP_QUADPOP)2240return false;2241for (int s = 0; insn->srcExists(s); ++s)2242if (insn->src(s).getFile() == FILE_IMMEDIATE)2243return false;2244if (insn->asFlow()) {2245if (insn->op == OP_CALL) // side effects !2246return false;2247if (insn->getPredicate()) // cannot do conditional exit (or can we ?)2248return false;2249insn->op = OP_EXIT;2250}2251insn->exit = 1;2252makeInstructionLong(insn);2253return true;2254}22552256static void2257replaceExitWithModifier(Function *func)2258{2259BasicBlock *epilogue = BasicBlock::get(func->cfgExit);22602261if (!epilogue->getExit() ||2262epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT2263return;22642265if (epilogue->getEntry()->op != OP_EXIT) {2266Instruction *insn = epilogue->getExit()->prev;2267if (!insn || !trySetExitModifier(insn))2268return;2269insn->exit = 1;2270} else {2271for (Graph::EdgeIterator ei = func->cfgExit->incident();2272!ei.end(); ei.next()) {2273BasicBlock *bb = BasicBlock::get(ei.getNode());2274Instruction *i = bb->getExit();22752276if (!i || !trySetExitModifier(i))2277return;2278}2279}22802281int adj = epilogue->getExit()->encSize;2282epilogue->binSize -= adj;2283func->binSize -= adj;2284delete_Instruction(func->getProgram(), epilogue->getExit());22852286// There may be BB's that are laid out after the exit block2287for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {2288func->bbArray[i]->binPos -= adj;2289}2290}22912292void2293CodeEmitterNV50::prepareEmission(Function *func)2294{2295CodeEmitter::prepareEmission(func);22962297replaceExitWithModifier(func);2298}22992300CodeEmitterNV50::CodeEmitterNV50(Program::Type type, const TargetNV50 *target) :2301CodeEmitter(target), progType(type), targNV50(target)2302{2303targ = target; // specialized2304code = NULL;2305codeSize = codeSizeLimit = 0;2306relocInfo = NULL;2307}23082309CodeEmitter *2310TargetNV50::getCodeEmitter(Program::Type type)2311{2312CodeEmitterNV50 *emit = new CodeEmitterNV50(type, this);2313return emit;2314}23152316} // namespace nv50_ir231723182319