Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "codegen/nv50_ir_target_nvc0.h"2324namespace nv50_ir {2526// Argh, all these assertions ...2728class CodeEmitterNVC0 : public CodeEmitter29{30public:31CodeEmitterNVC0(const TargetNVC0 *, Program::Type);3233virtual bool emitInstruction(Instruction *);34virtual uint32_t getMinEncodingSize(const Instruction *) const;35virtual void prepareEmission(Function *);3637private:38const TargetNVC0 *targNVC0;3940Program::Type progType;4142const bool writeIssueDelays;4344private:45void emitForm_A(const Instruction *, uint64_t);46void emitForm_B(const Instruction *, uint64_t);47void emitForm_S(const Instruction *, uint32_t, bool pred);4849void emitPredicate(const Instruction *);5051void setAddress16(const ValueRef&);52void setAddress24(const ValueRef&);53void setAddressByFile(const ValueRef&);54void setImmediate(const Instruction *, const int s); // needs op already set55void setImmediateS8(const ValueRef&);56void setSUConst16(const Instruction *, const int s);57void setSUPred(const Instruction *, const int s);58void setPDSTL(const Instruction *, const int d);5960void emitCondCode(CondCode cc, int pos);61void emitInterpMode(const Instruction *);62void emitLoadStoreType(DataType ty);63void emitSUGType(DataType);64void emitSUAddr(const TexInstruction *);65void emitSUDim(const TexInstruction *);66void emitCachingMode(CacheMode c);6768void emitShortSrc2(const ValueRef&);6970inline uint8_t getSRegEncoding(const ValueRef&);7172void roundMode_A(const Instruction *);73void roundMode_C(const Instruction *);74void roundMode_CS(const Instruction *);7576void emitNegAbs12(const Instruction *);7778void emitNOP(const Instruction *);7980void emitLOAD(const Instruction *);81void emitSTORE(const Instruction *);82void emitMOV(const Instruction *);83void emitATOM(const Instruction *);84void emitMEMBAR(const Instruction *);85void emitCCTL(const Instruction *);8687void emitINTERP(const Instruction *);88void emitAFETCH(const Instruction *);89void emitPFETCH(const Instruction *);90void emitVFETCH(const Instruction *);91void emitEXPORT(const Instruction *);92void emitOUT(const Instruction *);9394void emitUADD(const Instruction *);95void emitFADD(const Instruction *);96void emitDADD(const Instruction *);97void emitUMUL(const Instruction *);98void emitFMUL(const Instruction *);99void emitDMUL(const Instruction *);100void emitIMAD(const Instruction *);101void emitISAD(const Instruction *);102void emitSHLADD(const Instruction *a);103void emitFMAD(const Instruction *);104void emitDMAD(const Instruction *);105void emitMADSP(const Instruction *);106107void emitNOT(Instruction *);108void emitLogicOp(const Instruction *, uint8_t subOp);109void emitPOPC(const Instruction *);110void emitINSBF(const Instruction *);111void emitEXTBF(const Instruction *);112void emitBFIND(const Instruction *);113void emitPERMT(const Instruction *);114void emitShift(const Instruction *);115116void emitSFnOp(const Instruction *, uint8_t subOp);117118void emitCVT(Instruction *);119void emitMINMAX(const Instruction *);120void emitPreOp(const Instruction *);121122void emitSET(const CmpInstruction *);123void emitSLCT(const CmpInstruction *);124void emitSELP(const Instruction *);125126void emitTEXBAR(const Instruction *);127void emitTEX(const TexInstruction *);128void emitTEXCSAA(const TexInstruction *);129void emitTXQ(const TexInstruction *);130131void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);132133void emitFlow(const Instruction *);134void emitBAR(const Instruction *);135136void emitSUCLAMPMode(uint16_t);137void emitSUCalc(Instruction *);138void emitSULDGB(const TexInstruction *);139void emitSUSTGx(const TexInstruction *);140141void emitSULDB(const TexInstruction *);142void emitSUSTx(const TexInstruction *);143void emitSULEA(const TexInstruction *);144145void emitVSHL(const Instruction *);146void emitVectorSubOp(const Instruction *);147148void emitPIXLD(const Instruction *);149150void emitSHFL(const Instruction *);151152void emitVOTE(const Instruction *);153154inline void defId(const ValueDef&, const int pos);155inline void defId(const Instruction *, int d, const int pos);156inline void srcId(const ValueRef&, const int pos);157inline void srcId(const ValueRef *, const int pos);158inline void srcId(const Instruction *, int s, const int pos);159inline void srcAddr32(const ValueRef&, int pos, int shr);160161inline bool isLIMM(const ValueRef&, DataType ty);162};163164// for better visibility165#define HEX64(h, l) 0x##h##l##ULL166167#define SDATA(a) ((a).rep()->reg.data)168#define DDATA(a) ((a).rep()->reg.data)169170void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)171{172code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);173}174175void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)176{177code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);178}179180void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)181{182int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;183code[pos / 32] |= r << (pos % 32);184}185186void187CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)188{189const uint32_t offset = SDATA(src).offset >> shr;190191code[pos / 32] |= offset << (pos % 32);192if (pos && (pos < 32))193code[1] |= offset >> (32 - pos);194}195196void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)197{198code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : 63) << (pos % 32);199}200201void CodeEmitterNVC0::defId(const Instruction *insn, int d, const int pos)202{203if (insn->defExists(d))204defId(insn->def(d), pos);205else206code[pos / 32] |= 63 << (pos % 32);207}208209bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)210{211const ImmediateValue *imm = ref.get()->asImm();212213if (ty == TYPE_F32)214return imm && imm->reg.data.u32 & 0xfff;215else216return imm && (imm->reg.data.s32 > 0x7ffff ||217imm->reg.data.s32 < -0x80000);218}219220void221CodeEmitterNVC0::roundMode_A(const Instruction *insn)222{223switch (insn->rnd) {224case ROUND_M: code[1] |= 1 << 23; break;225case ROUND_P: code[1] |= 2 << 23; break;226case ROUND_Z: code[1] |= 3 << 23; break;227default:228assert(insn->rnd == ROUND_N);229break;230}231}232233void234CodeEmitterNVC0::emitNegAbs12(const Instruction *i)235{236if (i->src(1).mod.abs()) code[0] |= 1 << 6;237if (i->src(0).mod.abs()) code[0] |= 1 << 7;238if (i->src(1).mod.neg()) code[0] |= 1 << 8;239if (i->src(0).mod.neg()) code[0] |= 1 << 9;240}241242void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)243{244uint8_t val;245246switch (cc) {247case CC_LT: val = 0x1; break;248case CC_LTU: val = 0x9; break;249case CC_EQ: val = 0x2; break;250case CC_EQU: val = 0xa; break;251case CC_LE: val = 0x3; break;252case CC_LEU: val = 0xb; break;253case CC_GT: val = 0x4; break;254case CC_GTU: val = 0xc; break;255case CC_NE: val = 0x5; break;256case CC_NEU: val = 0xd; break;257case CC_GE: val = 0x6; break;258case CC_GEU: val = 0xe; break;259case CC_TR: val = 0xf; break;260case CC_FL: val = 0x0; break;261262case CC_A: val = 0x14; break;263case CC_NA: val = 0x13; break;264case CC_S: val = 0x15; break;265case CC_NS: val = 0x12; break;266case CC_C: val = 0x16; break;267case CC_NC: val = 0x11; break;268case CC_O: val = 0x17; break;269case CC_NO: val = 0x10; break;270271default:272val = 0;273assert(!"invalid condition code");274break;275}276code[pos / 32] |= val << (pos % 32);277}278279void280CodeEmitterNVC0::emitPredicate(const Instruction *i)281{282if (i->predSrc >= 0) {283assert(i->getPredicate()->reg.file == FILE_PREDICATE);284srcId(i->src(i->predSrc), 10);285if (i->cc == CC_NOT_P)286code[0] |= 0x2000; // negate287} else {288code[0] |= 0x1c00;289}290}291292void293CodeEmitterNVC0::setAddressByFile(const ValueRef& src)294{295switch (src.getFile()) {296case FILE_MEMORY_GLOBAL:297srcAddr32(src, 26, 0);298break;299case FILE_MEMORY_LOCAL:300case FILE_MEMORY_SHARED:301setAddress24(src);302break;303default:304assert(src.getFile() == FILE_MEMORY_CONST);305setAddress16(src);306break;307}308}309310void311CodeEmitterNVC0::setAddress16(const ValueRef& src)312{313Symbol *sym = src.get()->asSym();314315assert(sym);316317code[0] |= (sym->reg.data.offset & 0x003f) << 26;318code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;319}320321void322CodeEmitterNVC0::setAddress24(const ValueRef& src)323{324Symbol *sym = src.get()->asSym();325326assert(sym);327328code[0] |= (sym->reg.data.offset & 0x00003f) << 26;329code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;330}331332void333CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)334{335const ImmediateValue *imm = i->src(s).get()->asImm();336uint32_t u32;337338assert(imm);339u32 = imm->reg.data.u32;340341if ((code[0] & 0xf) == 0x1) {342// double immediate343uint64_t u64 = imm->reg.data.u64;344assert(!(u64 & 0x00000fffffffffffULL));345assert(!(code[1] & 0xc000));346code[0] |= ((u64 >> 44) & 0x3f) << 26;347code[1] |= 0xc000 | (u64 >> 50);348} else349if ((code[0] & 0xf) == 0x2) {350// LIMM351code[0] |= (u32 & 0x3f) << 26;352code[1] |= u32 >> 6;353} else354if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {355// integer immediate356assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);357assert(!(code[1] & 0xc000));358u32 &= 0xfffff;359code[0] |= (u32 & 0x3f) << 26;360code[1] |= 0xc000 | (u32 >> 6);361} else {362// float immediate363assert(!(u32 & 0x00000fff));364assert(!(code[1] & 0xc000));365code[0] |= ((u32 >> 12) & 0x3f) << 26;366code[1] |= 0xc000 | (u32 >> 18);367}368}369370void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)371{372const ImmediateValue *imm = ref.get()->asImm();373374int8_t s8 = static_cast<int8_t>(imm->reg.data.s32);375376assert(s8 == imm->reg.data.s32);377378code[0] |= (s8 & 0x3f) << 26;379code[0] |= (s8 >> 6) << 8;380}381382void CodeEmitterNVC0::setPDSTL(const Instruction *i, const int d)383{384assert(d < 0 || (i->defExists(d) && i->def(d).getFile() == FILE_PREDICATE));385386uint32_t pred = d >= 0 ? DDATA(i->def(d)).id : 7;387388code[0] |= (pred & 3) << 8;389code[1] |= (pred & 4) << (26 - 2);390}391392void393CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)394{395code[0] = opc;396code[1] = opc >> 32;397398emitPredicate(i);399400defId(i->def(0), 14);401402int s1 = 26;403if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)404s1 = 49;405406for (int s = 0; s < 3 && i->srcExists(s); ++s) {407switch (i->getSrc(s)->reg.file) {408case FILE_MEMORY_CONST:409assert(!(code[1] & 0xc000));410code[1] |= (s == 2) ? 0x8000 : 0x4000;411code[1] |= i->getSrc(s)->reg.fileIndex << 10;412setAddress16(i->src(s));413break;414case FILE_IMMEDIATE:415assert(s == 1 ||416i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);417assert(!(code[1] & 0xc000));418setImmediate(i, s);419break;420case FILE_GPR:421if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst422break;423srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);424break;425default:426if (i->op == OP_SELP) {427// OP_SELP is used to implement shared+atomics on Fermi.428assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);429srcId(i->src(s), 49);430}431// ignore here, can be predicate or flags, but must not be address432break;433}434}435}436437void438CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)439{440code[0] = opc;441code[1] = opc >> 32;442443emitPredicate(i);444445defId(i->def(0), 14);446447switch (i->src(0).getFile()) {448case FILE_MEMORY_CONST:449assert(!(code[1] & 0xc000));450code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);451setAddress16(i->src(0));452break;453case FILE_IMMEDIATE:454assert(!(code[1] & 0xc000));455setImmediate(i, 0);456break;457case FILE_GPR:458srcId(i->src(0), 26);459break;460default:461// ignore here, can be predicate or flags, but must not be address462break;463}464}465466void467CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)468{469code[0] = opc;470471int ss2a = 0;472if (opc == 0x0d || opc == 0x0e)473ss2a = 2;474475defId(i->def(0), 14);476srcId(i->src(0), 20);477478assert(pred || (i->predSrc < 0));479if (pred)480emitPredicate(i);481482for (int s = 1; s < 3 && i->srcExists(s); ++s) {483if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {484assert(!(code[0] & (0x300 >> ss2a)));485switch (i->src(s).get()->reg.fileIndex) {486case 0: code[0] |= 0x100 >> ss2a; break;487case 1: code[0] |= 0x200 >> ss2a; break;488case 16: code[0] |= 0x300 >> ss2a; break;489default:490ERROR("invalid c[] space for short form\n");491break;492}493if (s == 1)494code[0] |= i->getSrc(s)->reg.data.offset << 24;495else496code[0] |= i->getSrc(s)->reg.data.offset << 6;497} else498if (i->src(s).getFile() == FILE_IMMEDIATE) {499assert(s == 1);500setImmediateS8(i->src(s));501} else502if (i->src(s).getFile() == FILE_GPR) {503srcId(i->src(s), (s == 1) ? 26 : 8);504}505}506}507508void509CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)510{511if (src.getFile() == FILE_MEMORY_CONST) {512switch (src.get()->reg.fileIndex) {513case 0: code[0] |= 0x100; break;514case 1: code[0] |= 0x200; break;515case 16: code[0] |= 0x300; break;516default:517assert(!"unsupported file index for short op");518break;519}520srcAddr32(src, 20, 2);521} else {522srcId(src, 20);523assert(src.getFile() == FILE_GPR);524}525}526527void528CodeEmitterNVC0::emitNOP(const Instruction *i)529{530code[0] = 0x000001e4;531code[1] = 0x40000000;532emitPredicate(i);533}534535void536CodeEmitterNVC0::emitFMAD(const Instruction *i)537{538bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();539540if (i->encSize == 8) {541if (isLIMM(i->src(1), TYPE_F32)) {542emitForm_A(i, HEX64(20000000, 00000002));543} else {544emitForm_A(i, HEX64(30000000, 00000000));545546if (i->src(2).mod.neg())547code[0] |= 1 << 8;548}549roundMode_A(i);550551if (neg1)552code[0] |= 1 << 9;553554if (i->saturate)555code[0] |= 1 << 5;556557if (i->dnz)558code[0] |= 1 << 7;559else560if (i->ftz)561code[0] |= 1 << 6;562} else {563assert(!i->saturate && !i->src(2).mod.neg());564emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,565false);566if (neg1)567code[0] |= 1 << 4;568}569}570571void572CodeEmitterNVC0::emitDMAD(const Instruction *i)573{574bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();575576emitForm_A(i, HEX64(20000000, 00000001));577578if (i->src(2).mod.neg())579code[0] |= 1 << 8;580581roundMode_A(i);582583if (neg1)584code[0] |= 1 << 9;585586assert(!i->saturate);587assert(!i->ftz);588}589590void591CodeEmitterNVC0::emitFMUL(const Instruction *i)592{593bool neg = (i->src(0).mod ^ i->src(1).mod).neg();594595assert(i->postFactor >= -3 && i->postFactor <= 3);596597if (i->encSize == 8) {598if (isLIMM(i->src(1), TYPE_F32)) {599assert(i->postFactor == 0); // constant folded, hopefully600emitForm_A(i, HEX64(30000000, 00000002));601} else {602emitForm_A(i, HEX64(58000000, 00000000));603roundMode_A(i);604code[1] |= ((i->postFactor > 0) ?605(7 - i->postFactor) : (0 - i->postFactor)) << 17;606}607if (neg)608code[1] ^= 1 << 25; // aliases with LIMM sign bit609610if (i->saturate)611code[0] |= 1 << 5;612613if (i->dnz)614code[0] |= 1 << 7;615else616if (i->ftz)617code[0] |= 1 << 6;618} else {619assert(!neg && !i->saturate && !i->ftz && !i->postFactor);620emitForm_S(i, 0xa8, true);621}622}623624void625CodeEmitterNVC0::emitDMUL(const Instruction *i)626{627bool neg = (i->src(0).mod ^ i->src(1).mod).neg();628629emitForm_A(i, HEX64(50000000, 00000001));630roundMode_A(i);631632if (neg)633code[0] |= 1 << 9;634635assert(!i->saturate);636assert(!i->ftz);637assert(!i->dnz);638assert(!i->postFactor);639}640641void642CodeEmitterNVC0::emitUMUL(const Instruction *i)643{644if (i->encSize == 8) {645if (isLIMM(i->src(1), TYPE_U32)) {646emitForm_A(i, HEX64(10000000, 00000002));647} else {648emitForm_A(i, HEX64(50000000, 00000003));649}650if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)651code[0] |= 1 << 6;652if (i->sType == TYPE_S32)653code[0] |= 1 << 5;654if (i->dType == TYPE_S32)655code[0] |= 1 << 7;656} else {657emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);658659if (i->sType == TYPE_S32)660code[0] |= 1 << 6;661}662}663664void665CodeEmitterNVC0::emitFADD(const Instruction *i)666{667if (i->encSize == 8) {668if (isLIMM(i->src(1), TYPE_F32)) {669assert(!i->saturate);670emitForm_A(i, HEX64(28000000, 00000002));671672code[0] |= i->src(0).mod.abs() << 7;673code[0] |= i->src(0).mod.neg() << 9;674675if (i->src(1).mod.abs())676code[1] &= 0xfdffffff;677if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg()))678code[1] ^= 0x02000000;679} else {680emitForm_A(i, HEX64(50000000, 00000000));681682roundMode_A(i);683if (i->saturate)684code[1] |= 1 << 17;685686emitNegAbs12(i);687if (i->op == OP_SUB) code[0] ^= 1 << 8;688}689if (i->ftz)690code[0] |= 1 << 5;691} else {692assert(!i->saturate && i->op != OP_SUB &&693!i->src(0).mod.abs() &&694!i->src(1).mod.neg() && !i->src(1).mod.abs());695696emitForm_S(i, 0x49, true);697698if (i->src(0).mod.neg())699code[0] |= 1 << 7;700}701}702703void704CodeEmitterNVC0::emitDADD(const Instruction *i)705{706assert(i->encSize == 8);707emitForm_A(i, HEX64(48000000, 00000001));708roundMode_A(i);709assert(!i->saturate);710assert(!i->ftz);711emitNegAbs12(i);712if (i->op == OP_SUB)713code[0] ^= 1 << 8;714}715716void717CodeEmitterNVC0::emitUADD(const Instruction *i)718{719uint32_t addOp = 0;720721assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());722723if (i->src(0).mod.neg())724addOp |= 0x200;725if (i->src(1).mod.neg())726addOp |= 0x100;727if (i->op == OP_SUB)728addOp ^= 0x100;729730assert(addOp != 0x300); // would be add-plus-one731732if (i->encSize == 8) {733if (isLIMM(i->src(1), TYPE_U32)) {734emitForm_A(i, HEX64(08000000, 00000002));735if (i->flagsDef >= 0)736code[1] |= 1 << 26; // write carry737} else {738emitForm_A(i, HEX64(48000000, 00000003));739if (i->flagsDef >= 0)740code[1] |= 1 << 16; // write carry741}742code[0] |= addOp;743744if (i->saturate)745code[0] |= 1 << 5;746if (i->flagsSrc >= 0) // add carry747code[0] |= 1 << 6;748} else {749assert(!(addOp & 0x100));750emitForm_S(i, (addOp >> 3) |751((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);752}753}754755void756CodeEmitterNVC0::emitIMAD(const Instruction *i)757{758uint8_t addOp =759i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);760761assert(i->encSize == 8);762emitForm_A(i, HEX64(20000000, 00000003));763764assert(addOp != 3);765code[0] |= addOp << 8;766767if (isSignedType(i->dType))768code[0] |= 1 << 7;769if (isSignedType(i->sType))770code[0] |= 1 << 5;771772code[1] |= i->saturate << 24;773774if (i->flagsDef >= 0) code[1] |= 1 << 16;775if (i->flagsSrc >= 0) code[1] |= 1 << 23;776777if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)778code[0] |= 1 << 6;779}780781void782CodeEmitterNVC0::emitSHLADD(const Instruction *i)783{784uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();785const ImmediateValue *imm = i->src(1).get()->asImm();786assert(imm);787788code[0] = 0x00000003;789code[1] = 0x40000000 | addOp << 23;790791emitPredicate(i);792793defId(i->def(0), 14);794srcId(i->src(0), 20);795796if (i->flagsDef >= 0)797code[1] |= 1 << 16;798799assert(!(imm->reg.data.u32 & 0xffffffe0));800code[0] |= imm->reg.data.u32 << 5;801802switch (i->src(2).getFile()) {803case FILE_GPR:804srcId(i->src(2), 26);805break;806case FILE_MEMORY_CONST:807code[1] |= 0x4000;808code[1] |= i->getSrc(2)->reg.fileIndex << 10;809setAddress16(i->src(2));810break;811case FILE_IMMEDIATE:812setImmediate(i, 2);813break;814default:815assert(!"bad src2 file");816break;817}818}819820void821CodeEmitterNVC0::emitMADSP(const Instruction *i)822{823assert(targ->getChipset() >= NVISA_GK104_CHIPSET);824825emitForm_A(i, HEX64(00000000, 00000003));826827if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {828code[1] |= 0x01800000;829} else {830code[0] |= (i->subOp & 0x00f) << 7;831code[0] |= (i->subOp & 0x0f0) << 1;832code[0] |= (i->subOp & 0x100) >> 3;833code[0] |= (i->subOp & 0x200) >> 2;834code[1] |= (i->subOp & 0xc00) << 13;835}836837if (i->flagsDef >= 0)838code[1] |= 1 << 16;839}840841void842CodeEmitterNVC0::emitISAD(const Instruction *i)843{844assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);845assert(i->encSize == 8);846847emitForm_A(i, HEX64(38000000, 00000003));848849if (i->dType == TYPE_S32)850code[0] |= 1 << 5;851}852853void854CodeEmitterNVC0::emitNOT(Instruction *i)855{856assert(i->encSize == 8);857if (i->getPredicate())858i->moveSources(1, 1);859i->setSrc(1, i->src(0));860emitForm_A(i, HEX64(68000000, 000001c3));861}862863void864CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)865{866if (i->def(0).getFile() == FILE_PREDICATE) {867code[0] = 0x00000004 | (subOp << 30);868code[1] = 0x0c000000;869870emitPredicate(i);871872defId(i->def(0), 17);873srcId(i->src(0), 20);874if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;875srcId(i->src(1), 26);876if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;877878if (i->defExists(1)) {879defId(i->def(1), 14);880} else {881code[0] |= 7 << 14;882}883// (a OP b) OP c884if (i->predSrc != 2 && i->srcExists(2)) {885code[1] |= subOp << 21;886srcId(i->src(2), 49);887if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;888} else {889code[1] |= 0x000e0000;890}891} else892if (i->encSize == 8) {893if (isLIMM(i->src(1), TYPE_U32)) {894emitForm_A(i, HEX64(38000000, 00000002));895896if (i->flagsDef >= 0)897code[1] |= 1 << 26;898} else {899emitForm_A(i, HEX64(68000000, 00000003));900901if (i->flagsDef >= 0)902code[1] |= 1 << 16;903}904code[0] |= subOp << 6;905906if (i->flagsSrc >= 0) // carry907code[0] |= 1 << 5;908909if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;910if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;911} else {912emitForm_S(i, (subOp << 5) |913((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);914}915}916917void918CodeEmitterNVC0::emitPOPC(const Instruction *i)919{920emitForm_A(i, HEX64(54000000, 00000004));921922if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;923if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;924}925926void927CodeEmitterNVC0::emitINSBF(const Instruction *i)928{929emitForm_A(i, HEX64(28000000, 00000003));930}931932void933CodeEmitterNVC0::emitEXTBF(const Instruction *i)934{935emitForm_A(i, HEX64(70000000, 00000003));936937if (i->dType == TYPE_S32)938code[0] |= 1 << 5;939if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)940code[0] |= 1 << 8;941}942943void944CodeEmitterNVC0::emitBFIND(const Instruction *i)945{946emitForm_B(i, HEX64(78000000, 00000003));947948if (i->dType == TYPE_S32)949code[0] |= 1 << 5;950if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))951code[0] |= 1 << 8;952if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)953code[0] |= 1 << 6;954}955956void957CodeEmitterNVC0::emitPERMT(const Instruction *i)958{959emitForm_A(i, HEX64(24000000, 00000004));960961code[0] |= i->subOp << 5;962}963964void965CodeEmitterNVC0::emitShift(const Instruction *i)966{967if (i->op == OP_SHR) {968emitForm_A(i, HEX64(58000000, 00000003)969| (isSignedType(i->dType) ? 0x20 : 0x00));970} else {971emitForm_A(i, HEX64(60000000, 00000003));972}973974if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)975code[0] |= 1 << 9;976}977978void979CodeEmitterNVC0::emitPreOp(const Instruction *i)980{981if (i->encSize == 8) {982emitForm_B(i, HEX64(60000000, 00000000));983984if (i->op == OP_PREEX2)985code[0] |= 0x20;986987if (i->src(0).mod.abs()) code[0] |= 1 << 6;988if (i->src(0).mod.neg()) code[0] |= 1 << 8;989} else {990emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);991}992}993994void995CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)996{997if (i->encSize == 8) {998code[0] = 0x00000000 | (subOp << 26);999code[1] = 0xc8000000;10001001emitPredicate(i);10021003defId(i->def(0), 14);1004srcId(i->src(0), 20);10051006assert(i->src(0).getFile() == FILE_GPR);10071008if (i->saturate) code[0] |= 1 << 5;10091010if (i->src(0).mod.abs()) code[0] |= 1 << 7;1011if (i->src(0).mod.neg()) code[0] |= 1 << 9;1012} else {1013emitForm_S(i, 0x80000008 | (subOp << 26), true);10141015assert(!i->src(0).mod.neg());1016if (i->src(0).mod.abs()) code[0] |= 1 << 30;1017}1018}10191020void1021CodeEmitterNVC0::emitMINMAX(const Instruction *i)1022{1023uint64_t op;10241025assert(i->encSize == 8);10261027op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;10281029if (i->ftz)1030op |= 1 << 5;1031else1032if (!isFloatType(i->dType)) {1033op |= isSignedType(i->dType) ? 0x23 : 0x03;1034op |= i->subOp << 6;1035}1036if (i->dType == TYPE_F64)1037op |= 0x01;10381039emitForm_A(i, op);1040emitNegAbs12(i);10411042if (i->flagsDef >= 0)1043code[1] |= 1 << 16;1044}10451046void1047CodeEmitterNVC0::roundMode_C(const Instruction *i)1048{1049switch (i->rnd) {1050case ROUND_M: code[1] |= 1 << 17; break;1051case ROUND_P: code[1] |= 2 << 17; break;1052case ROUND_Z: code[1] |= 3 << 17; break;1053case ROUND_NI: code[0] |= 1 << 7; break;1054case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;1055case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;1056case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;1057case ROUND_N: break;1058default:1059assert(!"invalid round mode");1060break;1061}1062}10631064void1065CodeEmitterNVC0::roundMode_CS(const Instruction *i)1066{1067switch (i->rnd) {1068case ROUND_M:1069case ROUND_MI: code[0] |= 1 << 16; break;1070case ROUND_P:1071case ROUND_PI: code[0] |= 2 << 16; break;1072case ROUND_Z:1073case ROUND_ZI: code[0] |= 3 << 16; break;1074default:1075break;1076}1077}10781079void1080CodeEmitterNVC0::emitCVT(Instruction *i)1081{1082const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);1083DataType dType;10841085switch (i->op) {1086case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break;1087case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;1088case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;1089default:1090break;1091}10921093const bool sat = (i->op == OP_SAT) || i->saturate;1094const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();1095const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();10961097if (i->op == OP_NEG && i->dType == TYPE_U32)1098dType = TYPE_S32;1099else1100dType = i->dType;11011102if (i->encSize == 8) {1103emitForm_B(i, HEX64(10000000, 00000004));11041105roundMode_C(i);11061107// cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()1108code[0] |= util_logbase2(typeSizeof(dType)) << 20;1109code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;11101111// for 8/16 source types, the byte/word is in subOp. word 1 is1112// represented as 2.1113if (!isFloatType(i->sType))1114code[1] |= i->subOp << 0x17;1115else1116code[1] |= i->subOp << 0x18;11171118if (sat)1119code[0] |= 0x20;1120if (abs)1121code[0] |= 1 << 6;1122if (neg && i->op != OP_ABS)1123code[0] |= 1 << 8;11241125if (i->ftz)1126code[1] |= 1 << 23;11271128if (isSignedIntType(dType))1129code[0] |= 0x080;1130if (isSignedIntType(i->sType))1131code[0] |= 0x200;11321133if (isFloatType(dType)) {1134if (!isFloatType(i->sType))1135code[1] |= 0x08000000;1136} else {1137if (isFloatType(i->sType))1138code[1] |= 0x04000000;1139else1140code[1] |= 0x0c000000;1141}1142} else {1143if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {1144code[0] = 0x298;1145} else1146if (isFloatType(dType)) {1147if (isFloatType(i->sType))1148code[0] = 0x098;1149else1150code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);1151} else {1152assert(isFloatType(i->sType));11531154code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);1155}11561157if (neg) code[0] |= 1 << 16;1158if (sat) code[0] |= 1 << 18;1159if (abs) code[0] |= 1 << 19;11601161roundMode_CS(i);1162}1163}11641165void1166CodeEmitterNVC0::emitSET(const CmpInstruction *i)1167{1168uint32_t hi;1169uint32_t lo = 0;11701171if (i->sType == TYPE_F64)1172lo = 0x1;1173else1174if (!isFloatType(i->sType))1175lo = 0x3;11761177if (isSignedIntType(i->sType))1178lo |= 0x20;1179if (isFloatType(i->dType)) {1180if (isFloatType(i->sType))1181lo |= 0x20;1182else1183lo |= 0x80;1184}11851186switch (i->op) {1187case OP_SET_AND: hi = 0x10000000; break;1188case OP_SET_OR: hi = 0x10200000; break;1189case OP_SET_XOR: hi = 0x10400000; break;1190default:1191hi = 0x100e0000;1192break;1193}1194emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo);11951196if (i->op != OP_SET)1197srcId(i->src(2), 32 + 17);11981199if (i->def(0).getFile() == FILE_PREDICATE) {1200if (i->sType == TYPE_F32)1201code[1] += 0x10000000;1202else1203code[1] += 0x08000000;12041205code[0] &= ~0xfc000;1206defId(i->def(0), 17);1207if (i->defExists(1))1208defId(i->def(1), 14);1209else1210code[0] |= 0x1c000;1211}12121213if (i->ftz)1214code[1] |= 1 << 27;1215if (i->flagsSrc >= 0)1216code[0] |= 1 << 6;12171218emitCondCode(i->setCond, 32 + 23);1219emitNegAbs12(i);1220}12211222void1223CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)1224{1225uint64_t op;12261227switch (i->dType) {1228case TYPE_S32:1229op = HEX64(30000000, 00000023);1230break;1231case TYPE_U32:1232op = HEX64(30000000, 00000003);1233break;1234case TYPE_F32:1235op = HEX64(38000000, 00000000);1236break;1237default:1238assert(!"invalid type for SLCT");1239op = 0;1240break;1241}1242emitForm_A(i, op);12431244CondCode cc = i->setCond;12451246if (i->src(2).mod.neg())1247cc = reverseCondCode(cc);12481249emitCondCode(cc, 32 + 23);12501251if (i->ftz)1252code[0] |= 1 << 5;1253}12541255void1256nvc0_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)1257{1258int loc = entry->loc;1259bool val = false;1260switch (entry->ipa) {1261case 0:1262val = data.force_persample_interp;1263break;1264case 1:1265val = data.msaa;1266break;1267}1268if (val)1269code[loc + 1] |= 1 << 20;1270else1271code[loc + 1] &= ~(1 << 20);1272}12731274void CodeEmitterNVC0::emitSELP(const Instruction *i)1275{1276emitForm_A(i, HEX64(20000000, 00000004));12771278if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))1279code[1] |= 1 << 20;12801281if (i->subOp >= 1) {1282addInterp(i->subOp - 1, 0, nvc0_selpFlip);1283}1284}12851286void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)1287{1288code[0] = 0x00000006 | (i->subOp << 26);1289code[1] = 0xf0000000;1290emitPredicate(i);1291emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);1292}12931294void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)1295{1296code[0] = 0x00000086;1297code[1] = 0xd0000000;12981299code[1] |= i->tex.r;1300code[1] |= i->tex.s << 8;13011302if (i->tex.liveOnly)1303code[0] |= 1 << 9;13041305defId(i->def(0), 14);1306srcId(i->src(0), 20);1307}13081309static inline bool1310isNextIndependentTex(const TexInstruction *i)1311{1312if (!i->next || !isTextureOp(i->next->op))1313return false;1314if (i->getDef(0)->interfers(i->next->getSrc(0)))1315return false;1316return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));1317}13181319void1320CodeEmitterNVC0::emitTEX(const TexInstruction *i)1321{1322code[0] = 0x00000006;13231324if (isNextIndependentTex(i))1325code[0] |= 0x080; // t mode1326else1327code[0] |= 0x100; // p mode13281329if (i->tex.liveOnly)1330code[0] |= 1 << 9;13311332switch (i->op) {1333case OP_TEX: code[1] = 0x80000000; break;1334case OP_TXB: code[1] = 0x84000000; break;1335case OP_TXL: code[1] = 0x86000000; break;1336case OP_TXF: code[1] = 0x90000000; break;1337case OP_TXG: code[1] = 0xa0000000; break;1338case OP_TXLQ: code[1] = 0xb0000000; break;1339case OP_TXD: code[1] = 0xe0000000; break;1340default:1341assert(!"invalid texture op");1342break;1343}1344if (i->op == OP_TXF) {1345if (!i->tex.levelZero)1346code[1] |= 0x02000000;1347} else1348if (i->tex.levelZero) {1349code[1] |= 0x02000000;1350}13511352if (i->op != OP_TXD && i->tex.derivAll)1353code[1] |= 1 << 13;13541355defId(i->def(0), 14);1356srcId(i->src(0), 20);13571358emitPredicate(i);13591360if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;13611362code[1] |= i->tex.mask << 14;13631364code[1] |= i->tex.r;1365code[1] |= i->tex.s << 8;1366if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)1367code[1] |= 1 << 18; // in 1st source (with array index)13681369// texture target:1370code[1] |= (i->tex.target.getDim() - 1) << 20;1371if (i->tex.target.isCube())1372code[1] += 2 << 20;1373if (i->tex.target.isArray())1374code[1] |= 1 << 19;1375if (i->tex.target.isShadow())1376code[1] |= 1 << 24;13771378const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)13791380if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {1381// lzero1382if (i->op == OP_TXL)1383code[1] &= ~(1 << 26);1384else1385if (i->op == OP_TXF)1386code[1] &= ~(1 << 25);1387}1388if (i->tex.target == TEX_TARGET_2D_MS ||1389i->tex.target == TEX_TARGET_2D_MS_ARRAY)1390code[1] |= 1 << 23;13911392if (i->tex.useOffsets == 1)1393code[1] |= 1 << 22;1394if (i->tex.useOffsets == 4)1395code[1] |= 1 << 23;13961397srcId(i, src1, 26);1398}13991400void1401CodeEmitterNVC0::emitTXQ(const TexInstruction *i)1402{1403code[0] = 0x00000086;1404code[1] = 0xc0000000;14051406switch (i->tex.query) {1407case TXQ_DIMS: code[1] |= 0 << 22; break;1408case TXQ_TYPE: code[1] |= 1 << 22; break;1409case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;1410case TXQ_FILTER: code[1] |= 3 << 22; break;1411case TXQ_LOD: code[1] |= 4 << 22; break;1412case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break;1413default:1414assert(!"invalid texture query");1415break;1416}14171418code[1] |= i->tex.mask << 14;14191420code[1] |= i->tex.r;1421code[1] |= i->tex.s << 8;1422if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)1423code[1] |= 1 << 18;14241425const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)14261427defId(i->def(0), 14);1428srcId(i->src(0), 20);1429srcId(i, src1, 26);14301431emitPredicate(i);1432}14331434void1435CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)1436{1437code[0] = 0x00000200 | (laneMask << 6); // dall1438code[1] = 0x48000000 | qOp;14391440defId(i->def(0), 14);1441srcId(i->src(0), 20);1442srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);14431444emitPredicate(i);1445}14461447void1448CodeEmitterNVC0::emitFlow(const Instruction *i)1449{1450const FlowInstruction *f = i->asFlow();14511452unsigned mask; // bit 0: predicate, bit 1: target14531454code[0] = 0x00000007;14551456switch (i->op) {1457case OP_BRA:1458code[1] = f->absolute ? 0x00000000 : 0x40000000;1459if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)1460code[0] |= 0x4000;1461mask = 3;1462break;1463case OP_CALL:1464code[1] = f->absolute ? 0x10000000 : 0x50000000;1465if (f->indirect)1466code[0] |= 0x4000; // indirect calls always use c[] source1467mask = 2;1468break;14691470case OP_EXIT: code[1] = 0x80000000; mask = 1; break;1471case OP_RET: code[1] = 0x90000000; mask = 1; break;1472case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;1473case OP_BREAK: code[1] = 0xa8000000; mask = 1; break;1474case OP_CONT: code[1] = 0xb0000000; mask = 1; break;14751476case OP_JOINAT: code[1] = 0x60000000; mask = 2; break;1477case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;1478case OP_PRECONT: code[1] = 0x70000000; mask = 2; break;1479case OP_PRERET: code[1] = 0x78000000; mask = 2; break;14801481case OP_QUADON: code[1] = 0xc0000000; mask = 0; break;1482case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;1483case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break;1484default:1485assert(!"invalid flow operation");1486return;1487}14881489if (mask & 1) {1490emitPredicate(i);1491if (i->flagsSrc < 0)1492code[0] |= 0x1e0;1493}14941495if (!f)1496return;14971498if (f->allWarp)1499code[0] |= 1 << 15;1500if (f->limit)1501code[0] |= 1 << 16;15021503if (f->indirect) {1504if (code[0] & 0x4000) {1505assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);1506setAddress16(i->src(0));1507code[1] |= i->getSrc(0)->reg.fileIndex << 10;1508if (f->op == OP_BRA)1509srcId(f->src(0).getIndirect(0), 20);1510} else {1511srcId(f, 0, 20);1512}1513}15141515if (f->op == OP_CALL) {1516if (f->indirect) {1517// nothing1518} else1519if (f->builtin) {1520assert(f->absolute);1521uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);1522addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);1523addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);1524} else {1525assert(!f->absolute);1526int32_t pcRel = f->target.fn->binPos - (codeSize + 8);1527code[0] |= (pcRel & 0x3f) << 26;1528code[1] |= (pcRel >> 6) & 0x3ffff;1529}1530} else1531if (mask & 2) {1532int32_t pcRel = f->target.bb->binPos - (codeSize + 8);1533if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))1534pcRel += 8;1535// currently we don't want absolute branches1536assert(!f->absolute);1537code[0] |= (pcRel & 0x3f) << 26;1538code[1] |= (pcRel >> 6) & 0x3ffff;1539}1540}15411542void1543CodeEmitterNVC0::emitBAR(const Instruction *i)1544{1545Value *rDef = NULL, *pDef = NULL;15461547switch (i->subOp) {1548case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break;1549case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break;1550case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break;1551case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;1552default:1553code[0] = 0x04;1554assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);1555break;1556}1557code[1] = 0x50000000;15581559code[0] |= 63 << 14;1560code[1] |= 7 << 21;15611562emitPredicate(i);15631564// barrier id1565if (i->src(0).getFile() == FILE_GPR) {1566srcId(i->src(0), 20);1567} else {1568ImmediateValue *imm = i->getSrc(0)->asImm();1569assert(imm);1570code[0] |= imm->reg.data.u32 << 20;1571code[1] |= 0x8000;1572}15731574// thread count1575if (i->src(1).getFile() == FILE_GPR) {1576srcId(i->src(1), 26);1577} else {1578ImmediateValue *imm = i->getSrc(1)->asImm();1579assert(imm);1580assert(imm->reg.data.u32 <= 0xfff);1581code[0] |= imm->reg.data.u32 << 26;1582code[1] |= imm->reg.data.u32 >> 6;1583code[1] |= 0x4000;1584}15851586if (i->srcExists(2) && (i->predSrc != 2)) {1587srcId(i->src(2), 32 + 17);1588if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))1589code[1] |= 1 << 20;1590} else {1591code[1] |= 7 << 17;1592}15931594if (i->defExists(0)) {1595if (i->def(0).getFile() == FILE_GPR)1596rDef = i->getDef(0);1597else1598pDef = i->getDef(0);15991600if (i->defExists(1)) {1601if (i->def(1).getFile() == FILE_GPR)1602rDef = i->getDef(1);1603else1604pDef = i->getDef(1);1605}1606}1607if (rDef) {1608code[0] &= ~(63 << 14);1609defId(rDef, 14);1610}1611if (pDef) {1612code[1] &= ~(7 << 21);1613defId(pDef, 32 + 21);1614}1615}16161617void1618CodeEmitterNVC0::emitAFETCH(const Instruction *i)1619{1620code[0] = 0x00000006;1621code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);16221623if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)1624code[0] |= 0x200;16251626emitPredicate(i);16271628defId(i->def(0), 14);1629srcId(i->src(0).getIndirect(0), 20);1630}16311632void1633CodeEmitterNVC0::emitPFETCH(const Instruction *i)1634{1635uint32_t prim = i->src(0).get()->reg.data.u32;16361637code[0] = 0x00000006 | ((prim & 0x3f) << 26);1638code[1] = 0x00000000 | (prim >> 6);16391640emitPredicate(i);16411642const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)16431644defId(i->def(0), 14);1645srcId(i, src1, 20);1646}16471648void1649CodeEmitterNVC0::emitVFETCH(const Instruction *i)1650{1651code[0] = 0x00000006;1652code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;16531654if (i->perPatch)1655code[0] |= 0x100;1656if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)1657code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads16581659emitPredicate(i);16601661code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;16621663defId(i->def(0), 14);1664srcId(i->src(0).getIndirect(0), 20);1665srcId(i->src(0).getIndirect(1), 26); // vertex address1666}16671668void1669CodeEmitterNVC0::emitEXPORT(const Instruction *i)1670{1671unsigned int size = typeSizeof(i->dType);16721673code[0] = 0x00000006 | ((size / 4 - 1) << 5);1674code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;16751676assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));16771678if (i->perPatch)1679code[0] |= 0x100;16801681emitPredicate(i);16821683assert(i->src(1).getFile() == FILE_GPR);16841685srcId(i->src(0).getIndirect(0), 20);1686srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address1687srcId(i->src(1), 26);1688}16891690void1691CodeEmitterNVC0::emitOUT(const Instruction *i)1692{1693code[0] = 0x00000006;1694code[1] = 0x1c000000;16951696emitPredicate(i);16971698defId(i->def(0), 14); // new secret address1699srcId(i->src(0), 20); // old secret address, should be 0 initially17001701assert(i->src(0).getFile() == FILE_GPR);17021703if (i->op == OP_EMIT)1704code[0] |= 1 << 5;1705if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)1706code[0] |= 1 << 6;17071708// vertex stream1709if (i->src(1).getFile() == FILE_IMMEDIATE) {1710unsigned int stream = SDATA(i->src(1)).u32;1711assert(stream < 4);1712if (stream) {1713code[1] |= 0xc000;1714code[0] |= stream << 26;1715} else {1716srcId(NULL, 26);1717}1718} else {1719srcId(i->src(1), 26);1720}1721}17221723void1724CodeEmitterNVC0::emitInterpMode(const Instruction *i)1725{1726if (i->encSize == 8) {1727code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID1728} else {1729if (i->getInterpMode() == NV50_IR_INTERP_SC)1730code[0] |= 0x80;1731assert(i->op == OP_PINTERP && i->getSampleMode() == 0);1732}1733}17341735void1736nvc0_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)1737{1738int ipa = entry->ipa;1739int reg = entry->reg;1740int loc = entry->loc;17411742if (data.flatshade &&1743(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {1744ipa = NV50_IR_INTERP_FLAT;1745reg = 0x3f;1746} else if (data.force_persample_interp &&1747(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&1748(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {1749ipa |= NV50_IR_INTERP_CENTROID;1750}1751code[loc + 0] &= ~(0xf << 6);1752code[loc + 0] |= ipa << 6;1753code[loc + 0] &= ~(0x3f << 26);1754code[loc + 0] |= reg << 26;1755}17561757void1758CodeEmitterNVC0::emitINTERP(const Instruction *i)1759{1760const uint32_t base = i->getSrc(0)->reg.data.offset;17611762if (i->encSize == 8) {1763code[0] = 0x00000000;1764code[1] = 0xc0000000 | (base & 0xffff);17651766if (i->saturate)1767code[0] |= 1 << 5;17681769if (i->op == OP_PINTERP) {1770srcId(i->src(1), 26);1771addInterp(i->ipa, SDATA(i->src(1)).id, nvc0_interpApply);1772} else {1773code[0] |= 0x3f << 26;1774addInterp(i->ipa, 0x3f, nvc0_interpApply);1775}17761777srcId(i->src(0).getIndirect(0), 20);1778} else {1779assert(i->op == OP_PINTERP);1780code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);1781srcId(i->src(1), 20);1782}1783emitInterpMode(i);17841785emitPredicate(i);1786defId(i->def(0), 14);17871788if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)1789srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);1790else1791code[1] |= 0x3f << 17;1792}17931794void1795CodeEmitterNVC0::emitLoadStoreType(DataType ty)1796{1797uint8_t val;17981799switch (ty) {1800case TYPE_U8:1801val = 0x00;1802break;1803case TYPE_S8:1804val = 0x20;1805break;1806case TYPE_F16:1807case TYPE_U16:1808val = 0x40;1809break;1810case TYPE_S16:1811val = 0x60;1812break;1813case TYPE_F32:1814case TYPE_U32:1815case TYPE_S32:1816val = 0x80;1817break;1818case TYPE_F64:1819case TYPE_U64:1820case TYPE_S64:1821val = 0xa0;1822break;1823case TYPE_B128:1824val = 0xc0;1825break;1826default:1827val = 0x80;1828assert(!"invalid type");1829break;1830}1831code[0] |= val;1832}18331834void1835CodeEmitterNVC0::emitCachingMode(CacheMode c)1836{1837uint32_t val;18381839switch (c) {1840case CACHE_CA:1841// case CACHE_WB:1842val = 0x000;1843break;1844case CACHE_CG:1845val = 0x100;1846break;1847case CACHE_CS:1848val = 0x200;1849break;1850case CACHE_CV:1851// case CACHE_WT:1852val = 0x300;1853break;1854default:1855val = 0;1856assert(!"invalid caching mode");1857break;1858}1859code[0] |= val;1860}18611862static inline bool1863uses64bitAddress(const Instruction *ldst)1864{1865return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&1866ldst->src(0).isIndirect(0) &&1867ldst->getIndirect(0, 0)->reg.size == 8;1868}18691870void1871CodeEmitterNVC0::emitSTORE(const Instruction *i)1872{1873uint32_t opc;18741875switch (i->src(0).getFile()) {1876case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;1877case FILE_MEMORY_LOCAL: opc = 0xc8000000; break;1878case FILE_MEMORY_SHARED:1879if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {1880if (targ->getChipset() >= NVISA_GK104_CHIPSET)1881opc = 0xb8000000;1882else1883opc = 0xcc000000;1884} else {1885opc = 0xc9000000;1886}1887break;1888default:1889assert(!"invalid memory file");1890opc = 0;1891break;1892}1893code[0] = 0x00000005;1894code[1] = opc;18951896if (targ->getChipset() >= NVISA_GK104_CHIPSET) {1897// Unlocked store on shared memory can fail.1898if (i->src(0).getFile() == FILE_MEMORY_SHARED &&1899i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {1900assert(i->defExists(0));1901setPDSTL(i, 0);1902}1903}19041905setAddressByFile(i->src(0));1906srcId(i->src(1), 14);1907srcId(i->src(0).getIndirect(0), 20);1908if (uses64bitAddress(i))1909code[1] |= 1 << 26;19101911emitPredicate(i);19121913emitLoadStoreType(i->dType);1914emitCachingMode(i->cache);1915}19161917void1918CodeEmitterNVC0::emitLOAD(const Instruction *i)1919{1920uint32_t opc;19211922code[0] = 0x00000005;19231924switch (i->src(0).getFile()) {1925case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;1926case FILE_MEMORY_LOCAL: opc = 0xc0000000; break;1927case FILE_MEMORY_SHARED:1928if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {1929if (targ->getChipset() >= NVISA_GK104_CHIPSET)1930opc = 0xa8000000;1931else1932opc = 0xc4000000;1933} else {1934opc = 0xc1000000;1935}1936break;1937case FILE_MEMORY_CONST:1938if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {1939emitMOV(i); // not sure if this is any better1940return;1941}1942opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);1943code[0] = 0x00000006 | (i->subOp << 8);1944break;1945default:1946assert(!"invalid memory file");1947opc = 0;1948break;1949}1950code[1] = opc;19511952int r = 0, p = -1;1953if (i->src(0).getFile() == FILE_MEMORY_SHARED) {1954if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {1955if (i->def(0).getFile() == FILE_PREDICATE) { // p, #1956r = -1;1957p = 0;1958} else if (i->defExists(1)) { // r, p1959p = 1;1960} else {1961assert(!"Expected predicate dest for load locked");1962}1963}1964}19651966if (r >= 0)1967defId(i->def(r), 14);1968else1969code[0] |= 63 << 14;19701971if (p >= 0) {1972if (targ->getChipset() >= NVISA_GK104_CHIPSET)1973setPDSTL(i, p);1974else1975defId(i->def(p), 32 + 18);1976}19771978setAddressByFile(i->src(0));1979srcId(i->src(0).getIndirect(0), 20);1980if (uses64bitAddress(i))1981code[1] |= 1 << 26;19821983emitPredicate(i);19841985emitLoadStoreType(i->dType);1986emitCachingMode(i->cache);1987}19881989uint8_t1990CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)1991{1992switch (SDATA(ref).sv.sv) {1993case SV_LANEID: return 0x00;1994case SV_PHYSID: return 0x03;1995case SV_VERTEX_COUNT: return 0x10;1996case SV_INVOCATION_ID: return 0x11;1997case SV_YDIR: return 0x12;1998case SV_THREAD_KILL: return 0x13;1999case SV_COMBINED_TID: return 0x20;2000case SV_TID: return 0x21 + SDATA(ref).sv.index;2001case SV_CTAID: return 0x25 + SDATA(ref).sv.index;2002case SV_NTID: return 0x29 + SDATA(ref).sv.index;2003case SV_GRIDID: return 0x2c;2004case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;2005case SV_LBASE: return 0x34;2006case SV_SBASE: return 0x30;2007case SV_LANEMASK_EQ: return 0x38;2008case SV_LANEMASK_LT: return 0x39;2009case SV_LANEMASK_LE: return 0x3a;2010case SV_LANEMASK_GT: return 0x3b;2011case SV_LANEMASK_GE: return 0x3c;2012case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;2013default:2014assert(!"no sreg for system value");2015return 0;2016}2017}20182019void2020CodeEmitterNVC0::emitMOV(const Instruction *i)2021{2022assert(!i->saturate);2023if (i->def(0).getFile() == FILE_PREDICATE) {2024if (i->src(0).getFile() == FILE_GPR) {2025code[0] = 0xfc01c003;2026code[1] = 0x1a8e0000;2027srcId(i->src(0), 20);2028} else {2029code[0] = 0x0001c004;2030code[1] = 0x0c0e0000;2031if (i->src(0).getFile() == FILE_IMMEDIATE) {2032code[0] |= 7 << 20;2033if (!i->getSrc(0)->reg.data.u32)2034code[0] |= 1 << 23;2035} else {2036srcId(i->src(0), 20);2037}2038}2039defId(i->def(0), 17);2040emitPredicate(i);2041} else2042if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {2043uint8_t sr = getSRegEncoding(i->src(0));20442045if (i->encSize == 8) {2046code[0] = 0x00000004 | (sr << 26);2047code[1] = 0x2c000000;2048} else {2049code[0] = 0x40000008 | (sr << 20);2050}2051defId(i->def(0), 14);20522053emitPredicate(i);2054} else2055if (i->encSize == 8) {2056uint64_t opc;20572058if (i->src(0).getFile() == FILE_IMMEDIATE)2059opc = HEX64(18000000, 000001e2);2060else2061if (i->src(0).getFile() == FILE_PREDICATE)2062opc = HEX64(080e0000, 1c000004);2063else2064opc = HEX64(28000000, 00000004);20652066if (i->src(0).getFile() != FILE_PREDICATE)2067opc |= i->lanes << 5;20682069emitForm_B(i, opc);20702071// Explicitly emit the predicate source as emitForm_B skips it.2072if (i->src(0).getFile() == FILE_PREDICATE)2073srcId(i->src(0), 20);2074} else {2075uint32_t imm;20762077if (i->src(0).getFile() == FILE_IMMEDIATE) {2078imm = SDATA(i->src(0)).u32;2079if (imm & 0xfff00000) {2080assert(!(imm & 0x000fffff));2081code[0] = 0x00000318 | imm;2082} else {2083assert(imm < 0x800 && ((int32_t)imm >= -0x800));2084code[0] = 0x00000118 | (imm << 20);2085}2086} else {2087code[0] = 0x0028;2088emitShortSrc2(i->src(0));2089}2090defId(i->def(0), 14);20912092emitPredicate(i);2093}2094}20952096void2097CodeEmitterNVC0::emitATOM(const Instruction *i)2098{2099const bool hasDst = i->defExists(0);2100const bool casOrExch =2101i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||2102i->subOp == NV50_IR_SUBOP_ATOM_CAS;21032104if (i->dType == TYPE_U64) {2105switch (i->subOp) {2106case NV50_IR_SUBOP_ATOM_ADD:2107code[0] = 0x205;2108if (hasDst)2109code[1] = 0x507e0000;2110else2111code[1] = 0x10000000;2112break;2113case NV50_IR_SUBOP_ATOM_EXCH:2114code[0] = 0x305;2115code[1] = 0x507e0000;2116break;2117case NV50_IR_SUBOP_ATOM_CAS:2118code[0] = 0x325;2119code[1] = 0x50000000;2120break;2121default:2122assert(!"invalid u64 red op");2123break;2124}2125} else2126if (i->dType == TYPE_U32) {2127switch (i->subOp) {2128case NV50_IR_SUBOP_ATOM_EXCH:2129code[0] = 0x105;2130code[1] = 0x507e0000;2131break;2132case NV50_IR_SUBOP_ATOM_CAS:2133code[0] = 0x125;2134code[1] = 0x50000000;2135break;2136default:2137code[0] = 0x5 | (i->subOp << 5);2138if (hasDst)2139code[1] = 0x507e0000;2140else2141code[1] = 0x10000000;2142break;2143}2144} else2145if (i->dType == TYPE_S32) {2146assert(i->subOp <= 2);2147code[0] = 0x205 | (i->subOp << 5);2148if (hasDst)2149code[1] = 0x587e0000;2150else2151code[1] = 0x18000000;2152} else2153if (i->dType == TYPE_F32) {2154assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);2155code[0] = 0x205;2156if (hasDst)2157code[1] = 0x687e0000;2158else2159code[1] = 0x28000000;2160}21612162emitPredicate(i);21632164srcId(i->src(1), 14);21652166if (hasDst)2167defId(i->def(0), 32 + 11);2168else2169if (casOrExch)2170code[1] |= 63 << 11;21712172if (hasDst || casOrExch) {2173const int32_t offset = SDATA(i->src(0)).offset;2174assert(offset < 0x80000 && offset >= -0x80000);2175code[0] |= offset << 26;2176code[1] |= (offset & 0x1ffc0) >> 6;2177code[1] |= (offset & 0xe0000) << 6;2178} else {2179srcAddr32(i->src(0), 26, 0);2180}2181if (i->getIndirect(0, 0)) {2182srcId(i->getIndirect(0, 0), 20);2183if (i->getIndirect(0, 0)->reg.size == 8)2184code[1] |= 1 << 26;2185} else {2186code[0] |= 63 << 20;2187}21882189if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {2190assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));2191code[1] |= (SDATA(i->src(1)).id + 1) << 17;2192}2193}21942195void2196CodeEmitterNVC0::emitMEMBAR(const Instruction *i)2197{2198switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {2199case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;2200case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break;2201default:2202code[0] = 0x45;2203assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);2204break;2205}2206code[1] = 0xe0000000;22072208emitPredicate(i);2209}22102211void2212CodeEmitterNVC0::emitCCTL(const Instruction *i)2213{2214code[0] = 0x00000005 | (i->subOp << 5);22152216if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {2217code[1] = 0x98000000;2218srcAddr32(i->src(0), 28, 2);2219} else {2220code[1] = 0xd0000000;2221setAddress24(i->src(0));2222}2223if (uses64bitAddress(i))2224code[1] |= 1 << 26;2225srcId(i->src(0).getIndirect(0), 20);22262227emitPredicate(i);22282229defId(i, 0, 14);2230}22312232void2233CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)2234{2235uint8_t m;2236switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {2237case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;2238case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;2239case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;2240case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;2241case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;2242case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;2243case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;2244case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;2245case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;2246case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;2247case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;2248case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;2249case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;2250case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;2251case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;2252default:2253return;2254}2255code[0] |= m << 5;2256if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)2257code[1] |= 1 << 16;2258}22592260void2261CodeEmitterNVC0::emitSUCalc(Instruction *i)2262{2263ImmediateValue *imm = NULL;2264uint64_t opc;22652266if (i->srcExists(2)) {2267imm = i->getSrc(2)->asImm();2268if (imm)2269i->setSrc(2, NULL); // special case, make emitForm_A not assert2270}22712272switch (i->op) {2273case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;2274case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;2275case OP_SUEAU: opc = HEX64(60000000, 00000004); break;2276default:2277assert(0);2278return;2279}2280emitForm_A(i, opc);22812282if (i->op == OP_SUCLAMP) {2283if (i->dType == TYPE_S32)2284code[0] |= 1 << 9;2285emitSUCLAMPMode(i->subOp);2286}22872288if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)2289code[1] |= 1 << 16;22902291if (i->op != OP_SUEAU) {2292if (i->def(0).getFile() == FILE_PREDICATE) { // p, #2293code[0] |= 63 << 14;2294code[1] |= i->getDef(0)->reg.data.id << 23;2295} else2296if (i->defExists(1)) { // r, p2297assert(i->def(1).getFile() == FILE_PREDICATE);2298code[1] |= i->getDef(1)->reg.data.id << 23;2299} else { // r, #2300code[1] |= 7 << 23;2301}2302}2303if (imm) {2304assert(i->op == OP_SUCLAMP);2305i->setSrc(2, imm);2306code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint62307}2308}23092310void2311CodeEmitterNVC0::emitSUGType(DataType ty)2312{2313switch (ty) {2314case TYPE_S32: code[1] |= 1 << 13; break;2315case TYPE_U8: code[1] |= 2 << 13; break;2316case TYPE_S8: code[1] |= 3 << 13; break;2317default:2318assert(ty == TYPE_U32);2319break;2320}2321}23222323void2324CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)2325{2326const uint32_t offset = i->getSrc(s)->reg.data.offset;23272328assert(i->src(s).getFile() == FILE_MEMORY_CONST);2329assert(offset == (offset & 0xfffc));23302331code[1] |= 1 << 21;2332code[0] |= offset << 24;2333code[1] |= offset >> 8;2334code[1] |= i->getSrc(s)->reg.fileIndex << 8;2335}23362337void2338CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)2339{2340if (!i->srcExists(s) || (i->predSrc == s)) {2341code[1] |= 0x7 << 17;2342} else {2343if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))2344code[1] |= 1 << 20;2345srcId(i->src(s), 32 + 17);2346}2347}23482349void2350CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)2351{2352code[0] = 0x5;2353code[1] = 0xd4000000 | (i->subOp << 15);23542355emitLoadStoreType(i->dType);2356emitSUGType(i->sType);2357emitCachingMode(i->cache);23582359emitPredicate(i);2360defId(i->def(0), 14); // destination2361srcId(i->src(0), 20); // address2362// format2363if (i->src(1).getFile() == FILE_GPR)2364srcId(i->src(1), 26);2365else2366setSUConst16(i, 1);2367setSUPred(i, 2);2368}23692370void2371CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)2372{2373code[0] = 0x5;2374code[1] = 0xdc000000 | (i->subOp << 15);23752376if (i->op == OP_SUSTP)2377code[1] |= i->tex.mask << 22;2378else2379emitLoadStoreType(i->dType);2380emitSUGType(i->sType);2381emitCachingMode(i->cache);23822383emitPredicate(i);2384srcId(i->src(0), 20); // address2385// format2386if (i->src(1).getFile() == FILE_GPR)2387srcId(i->src(1), 26);2388else2389setSUConst16(i, 1);2390srcId(i->src(3), 14); // values2391setSUPred(i, 2);2392}23932394void2395CodeEmitterNVC0::emitSUAddr(const TexInstruction *i)2396{2397assert(targ->getChipset() < NVISA_GK104_CHIPSET);23982399if (i->tex.rIndirectSrc < 0) {2400code[1] |= 0x00004000;2401code[0] |= i->tex.r << 26;2402} else {2403srcId(i, i->tex.rIndirectSrc, 26);2404}2405}24062407void2408CodeEmitterNVC0::emitSUDim(const TexInstruction *i)2409{2410assert(targ->getChipset() < NVISA_GK104_CHIPSET);24112412code[1] |= (i->tex.target.getDim() - 1) << 12;2413if (i->tex.target.isArray() || i->tex.target.isCube() ||2414i->tex.target.getDim() == 3) {2415// use e2d mode for 3-dim images, arrays and cubes.2416code[1] |= 3 << 12;2417}24182419srcId(i->src(0), 20);2420}24212422void2423CodeEmitterNVC0::emitSULEA(const TexInstruction *i)2424{2425assert(targ->getChipset() < NVISA_GK104_CHIPSET);24262427code[0] = 0x5;2428code[1] = 0xf0000000;24292430emitPredicate(i);2431emitLoadStoreType(i->sType);24322433defId(i->def(0), 14);24342435if (i->defExists(1)) {2436defId(i->def(1), 32 + 22);2437} else {2438code[1] |= 7 << 22;2439}24402441emitSUAddr(i);2442emitSUDim(i);2443}24442445void2446CodeEmitterNVC0::emitSULDB(const TexInstruction *i)2447{2448assert(targ->getChipset() < NVISA_GK104_CHIPSET);24492450code[0] = 0x5;2451code[1] = 0xd4000000 | (i->subOp << 15);24522453emitPredicate(i);2454emitLoadStoreType(i->dType);24552456defId(i->def(0), 14);24572458emitCachingMode(i->cache);2459emitSUAddr(i);2460emitSUDim(i);2461}24622463void2464CodeEmitterNVC0::emitSUSTx(const TexInstruction *i)2465{2466assert(targ->getChipset() < NVISA_GK104_CHIPSET);24672468code[0] = 0x5;2469code[1] = 0xdc000000 | (i->subOp << 15);24702471if (i->op == OP_SUSTP)2472code[1] |= i->tex.mask << 17;2473else2474emitLoadStoreType(i->dType);24752476emitPredicate(i);24772478srcId(i->src(1), 14);24792480emitCachingMode(i->cache);2481emitSUAddr(i);2482emitSUDim(i);2483}24842485void2486CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)2487{2488switch (NV50_IR_SUBOP_Vn(i->subOp)) {2489case 0:2490code[1] |= (i->subOp & 0x000f) << 12; // vsrc12491code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc22492code[1] |= (i->subOp & 0x0100) << 7; // vsrc22493code[1] |= (i->subOp & 0x3c00) << 13; // vdst2494break;2495case 1:2496code[1] |= (i->subOp & 0x000f) << 8; // v2src12497code[1] |= (i->subOp & 0x0010) << 11; // v2src12498code[1] |= (i->subOp & 0x01e0) >> 1; // v2src22499code[1] |= (i->subOp & 0x0200) << 6; // v2src22500code[1] |= (i->subOp & 0x3c00) << 2; // v4dst2501code[1] |= (i->mask & 0x3) << 2;2502break;2503case 2:2504code[1] |= (i->subOp & 0x000f) << 8; // v4src12505code[1] |= (i->subOp & 0x01e0) >> 1; // v4src22506code[1] |= (i->subOp & 0x3c00) << 2; // v4dst2507code[1] |= (i->mask & 0x3) << 2;2508code[1] |= (i->mask & 0xc) << 21;2509break;2510default:2511assert(0);2512break;2513}2514}25152516void2517CodeEmitterNVC0::emitVSHL(const Instruction *i)2518{2519uint64_t opc = 0x4;25202521switch (NV50_IR_SUBOP_Vn(i->subOp)) {2522case 0: opc |= 0xe8ULL << 56; break;2523case 1: opc |= 0xb4ULL << 56; break;2524case 2: opc |= 0x94ULL << 56; break;2525default:2526assert(0);2527break;2528}2529if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {2530if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;2531if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);2532} else {2533if (isSignedType(i->dType)) opc |= 1ULL << 0x39;2534if (isSignedType(i->sType)) opc |= 1 << 6;2535}2536emitForm_A(i, opc);2537emitVectorSubOp(i);25382539if (i->saturate)2540code[0] |= 1 << 9;2541if (i->flagsDef >= 0)2542code[1] |= 1 << 16;2543}25442545void2546CodeEmitterNVC0::emitPIXLD(const Instruction *i)2547{2548assert(i->encSize == 8);2549emitForm_A(i, HEX64(10000000, 00000006));2550code[0] |= i->subOp << 5;2551code[1] |= 0x00e00000;2552}25532554void2555CodeEmitterNVC0::emitSHFL(const Instruction *i)2556{2557const ImmediateValue *imm;25582559assert(targ->getChipset() >= NVISA_GK104_CHIPSET);25602561code[0] = 0x00000005;2562code[1] = 0x88000000 | (i->subOp << 23);25632564emitPredicate(i);25652566defId(i->def(0), 14);2567srcId(i->src(0), 20);25682569switch (i->src(1).getFile()) {2570case FILE_GPR:2571srcId(i->src(1), 26);2572break;2573case FILE_IMMEDIATE:2574imm = i->getSrc(1)->asImm();2575assert(imm && imm->reg.data.u32 < 0x20);2576code[0] |= imm->reg.data.u32 << 26;2577code[0] |= 1 << 5;2578break;2579default:2580assert(!"invalid src1 file");2581break;2582}25832584switch (i->src(2).getFile()) {2585case FILE_GPR:2586srcId(i->src(2), 49);2587break;2588case FILE_IMMEDIATE:2589imm = i->getSrc(2)->asImm();2590assert(imm && imm->reg.data.u32 < 0x2000);2591code[1] |= imm->reg.data.u32 << 10;2592code[0] |= 1 << 6;2593break;2594default:2595assert(!"invalid src2 file");2596break;2597}25982599setPDSTL(i, i->defExists(1) ? 1 : -1);2600}26012602void2603CodeEmitterNVC0::emitVOTE(const Instruction *i)2604{2605const ImmediateValue *imm;2606uint32_t u32;26072608code[0] = 0x00000004 | (i->subOp << 5);2609code[1] = 0x48000000;26102611emitPredicate(i);26122613unsigned rp = 0;2614for (int d = 0; i->defExists(d); d++) {2615if (i->def(d).getFile() == FILE_PREDICATE) {2616assert(!(rp & 2));2617rp |= 2;2618defId(i->def(d), 32 + 22);2619} else if (i->def(d).getFile() == FILE_GPR) {2620assert(!(rp & 1));2621rp |= 1;2622defId(i->def(d), 14);2623} else {2624assert(!"Unhandled def");2625}2626}2627if (!(rp & 1))2628code[0] |= 63 << 14;2629if (!(rp & 2))2630code[1] |= 7 << 22;26312632switch (i->src(0).getFile()) {2633case FILE_PREDICATE:2634if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))2635code[0] |= 1 << 23;2636srcId(i->src(0), 20);2637break;2638case FILE_IMMEDIATE:2639imm = i->getSrc(0)->asImm();2640assert(imm);2641u32 = imm->reg.data.u32;2642assert(u32 == 0 || u32 == 1);2643code[0] |= (u32 == 1 ? 0x7 : 0xf) << 20;2644break;2645default:2646assert(!"Unhandled src");2647break;2648}2649}26502651bool2652CodeEmitterNVC0::emitInstruction(Instruction *insn)2653{2654unsigned int size = insn->encSize;26552656if (writeIssueDelays && !(codeSize & 0x3f))2657size += 8;26582659if (!insn->encSize) {2660ERROR("skipping unencodable instruction: "); insn->print();2661return false;2662} else2663if (codeSize + size > codeSizeLimit) {2664ERROR("code emitter output buffer too small\n");2665return false;2666}26672668if (writeIssueDelays) {2669if (!(codeSize & 0x3f)) {2670code[0] = 0x00000007; // cf issue delay "instruction"2671code[1] = 0x20000000;2672code += 2;2673codeSize += 8;2674}2675const unsigned int id = (codeSize & 0x3f) / 8 - 1;2676uint32_t *data = code - (id * 2 + 2);2677if (id <= 2) {2678data[0] |= insn->sched << (id * 8 + 4);2679} else2680if (id == 3) {2681data[0] |= insn->sched << 28;2682data[1] |= insn->sched >> 4;2683} else {2684data[1] |= insn->sched << ((id - 4) * 8 + 4);2685}2686}26872688// assert that instructions with multiple defs don't corrupt registers2689for (int d = 0; insn->defExists(d); ++d)2690assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);26912692switch (insn->op) {2693case OP_MOV:2694case OP_RDSV:2695emitMOV(insn);2696break;2697case OP_NOP:2698break;2699case OP_LOAD:2700emitLOAD(insn);2701break;2702case OP_STORE:2703emitSTORE(insn);2704break;2705case OP_LINTERP:2706case OP_PINTERP:2707emitINTERP(insn);2708break;2709case OP_VFETCH:2710emitVFETCH(insn);2711break;2712case OP_EXPORT:2713emitEXPORT(insn);2714break;2715case OP_PFETCH:2716emitPFETCH(insn);2717break;2718case OP_AFETCH:2719emitAFETCH(insn);2720break;2721case OP_EMIT:2722case OP_RESTART:2723emitOUT(insn);2724break;2725case OP_ADD:2726case OP_SUB:2727if (insn->dType == TYPE_F64)2728emitDADD(insn);2729else if (isFloatType(insn->dType))2730emitFADD(insn);2731else2732emitUADD(insn);2733break;2734case OP_MUL:2735if (insn->dType == TYPE_F64)2736emitDMUL(insn);2737else if (isFloatType(insn->dType))2738emitFMUL(insn);2739else2740emitUMUL(insn);2741break;2742case OP_MAD:2743case OP_FMA:2744if (insn->dType == TYPE_F64)2745emitDMAD(insn);2746else if (isFloatType(insn->dType))2747emitFMAD(insn);2748else2749emitIMAD(insn);2750break;2751case OP_SAD:2752emitISAD(insn);2753break;2754case OP_SHLADD:2755emitSHLADD(insn);2756break;2757case OP_NOT:2758emitNOT(insn);2759break;2760case OP_AND:2761emitLogicOp(insn, 0);2762break;2763case OP_OR:2764emitLogicOp(insn, 1);2765break;2766case OP_XOR:2767emitLogicOp(insn, 2);2768break;2769case OP_SHL:2770case OP_SHR:2771emitShift(insn);2772break;2773case OP_SET:2774case OP_SET_AND:2775case OP_SET_OR:2776case OP_SET_XOR:2777emitSET(insn->asCmp());2778break;2779case OP_SELP:2780emitSELP(insn);2781break;2782case OP_SLCT:2783emitSLCT(insn->asCmp());2784break;2785case OP_MIN:2786case OP_MAX:2787emitMINMAX(insn);2788break;2789case OP_ABS:2790case OP_NEG:2791case OP_CEIL:2792case OP_FLOOR:2793case OP_TRUNC:2794case OP_SAT:2795emitCVT(insn);2796break;2797case OP_CVT:2798if (insn->def(0).getFile() == FILE_PREDICATE ||2799insn->src(0).getFile() == FILE_PREDICATE)2800emitMOV(insn);2801else2802emitCVT(insn);2803break;2804case OP_RSQ:2805emitSFnOp(insn, 5 + 2 * insn->subOp);2806break;2807case OP_RCP:2808emitSFnOp(insn, 4 + 2 * insn->subOp);2809break;2810case OP_LG2:2811emitSFnOp(insn, 3);2812break;2813case OP_EX2:2814emitSFnOp(insn, 2);2815break;2816case OP_SIN:2817emitSFnOp(insn, 1);2818break;2819case OP_COS:2820emitSFnOp(insn, 0);2821break;2822case OP_PRESIN:2823case OP_PREEX2:2824emitPreOp(insn);2825break;2826case OP_TEX:2827case OP_TXB:2828case OP_TXL:2829case OP_TXD:2830case OP_TXF:2831case OP_TXG:2832case OP_TXLQ:2833emitTEX(insn->asTex());2834break;2835case OP_TXQ:2836emitTXQ(insn->asTex());2837break;2838case OP_TEXBAR:2839emitTEXBAR(insn);2840break;2841case OP_SUBFM:2842case OP_SUCLAMP:2843case OP_SUEAU:2844emitSUCalc(insn);2845break;2846case OP_MADSP:2847emitMADSP(insn);2848break;2849case OP_SULDB:2850if (targ->getChipset() >= NVISA_GK104_CHIPSET)2851emitSULDGB(insn->asTex());2852else2853emitSULDB(insn->asTex());2854break;2855case OP_SUSTB:2856case OP_SUSTP:2857if (targ->getChipset() >= NVISA_GK104_CHIPSET)2858emitSUSTGx(insn->asTex());2859else2860emitSUSTx(insn->asTex());2861break;2862case OP_SULEA:2863emitSULEA(insn->asTex());2864break;2865case OP_ATOM:2866emitATOM(insn);2867break;2868case OP_BRA:2869case OP_CALL:2870case OP_PRERET:2871case OP_RET:2872case OP_DISCARD:2873case OP_EXIT:2874case OP_PRECONT:2875case OP_CONT:2876case OP_PREBREAK:2877case OP_BREAK:2878case OP_JOINAT:2879case OP_BRKPT:2880case OP_QUADON:2881case OP_QUADPOP:2882emitFlow(insn);2883break;2884case OP_QUADOP:2885emitQUADOP(insn, insn->subOp, insn->lanes);2886break;2887case OP_DFDX:2888emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);2889break;2890case OP_DFDY:2891emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);2892break;2893case OP_POPCNT:2894emitPOPC(insn);2895break;2896case OP_INSBF:2897emitINSBF(insn);2898break;2899case OP_EXTBF:2900emitEXTBF(insn);2901break;2902case OP_BFIND:2903emitBFIND(insn);2904break;2905case OP_PERMT:2906emitPERMT(insn);2907break;2908case OP_JOIN:2909emitNOP(insn);2910insn->join = 1;2911break;2912case OP_BAR:2913emitBAR(insn);2914break;2915case OP_MEMBAR:2916emitMEMBAR(insn);2917break;2918case OP_CCTL:2919emitCCTL(insn);2920break;2921case OP_VSHL:2922emitVSHL(insn);2923break;2924case OP_PIXLD:2925emitPIXLD(insn);2926break;2927case OP_SHFL:2928emitSHFL(insn);2929break;2930case OP_VOTE:2931emitVOTE(insn);2932break;2933case OP_PHI:2934case OP_UNION:2935case OP_CONSTRAINT:2936ERROR("operation should have been eliminated");2937return false;2938case OP_EXP:2939case OP_LOG:2940case OP_SQRT:2941case OP_POW:2942ERROR("operation should have been lowered\n");2943return false;2944default:2945ERROR("unknown op: %u\n", insn->op);2946return false;2947}29482949if (insn->join) {2950code[0] |= 0x10;2951assert(insn->encSize == 8);2952}29532954code += insn->encSize / 4;2955codeSize += insn->encSize;2956return true;2957}29582959uint32_t2960CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const2961{2962const Target::OpInfo &info = targ->getOpInfo(i);29632964if (writeIssueDelays || info.minEncSize == 8 || 1)2965return 8;29662967if (i->ftz || i->saturate || i->join)2968return 8;2969if (i->rnd != ROUND_N)2970return 8;2971if (i->predSrc >= 0 && i->op == OP_MAD)2972return 8;29732974if (i->op == OP_PINTERP) {2975if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work2976return 8;2977} else2978if (i->op == OP_MOV && i->lanes != 0xf) {2979return 8;2980}29812982for (int s = 0; i->srcExists(s); ++s) {2983if (i->src(s).isIndirect(0))2984return 8;29852986if (i->src(s).getFile() == FILE_MEMORY_CONST) {2987if (SDATA(i->src(s)).offset >= 0x100)2988return 8;2989if (i->getSrc(s)->reg.fileIndex > 1 &&2990i->getSrc(s)->reg.fileIndex != 16)2991return 8;2992} else2993if (i->src(s).getFile() == FILE_IMMEDIATE) {2994if (i->dType == TYPE_F32) {2995if (SDATA(i->src(s)).u32 >= 0x100)2996return 8;2997} else {2998if (SDATA(i->src(s)).u32 > 0xff)2999return 8;3000}3001}30023003if (i->op == OP_CVT)3004continue;3005if (i->src(s).mod != Modifier(0)) {3006if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))3007if (i->op != OP_RSQ)3008return 8;3009if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))3010if (i->op != OP_ADD || s != 0)3011return 8;3012}3013}30143015return 4;3016}30173018// Simplified, erring on safe side.3019class SchedDataCalculator : public Pass3020{3021public:3022SchedDataCalculator(const Target *targ) : score(NULL), prevData(0),3023prevOp(OP_NOP), targ(targ) { }30243025private:3026struct RegScores3027{3028struct Resource {3029int st[DATA_FILE_COUNT]; // LD to LD delay 33030int ld[DATA_FILE_COUNT]; // ST to ST delay 33031int tex; // TEX to non-TEX delay 17 (0x11)3032int sfu; // SFU to SFU delay 3 (except PRE-ops)3033int imul; // integer MUL to MUL delay 33034} res;3035struct ScoreData {3036int r[256];3037int p[8];3038int c;3039} rd, wr;3040int base;3041int regs;30423043void rebase(const int base)3044{3045const int delta = this->base - base;3046if (!delta)3047return;3048this->base = 0;30493050for (int i = 0; i < regs; ++i) {3051rd.r[i] += delta;3052wr.r[i] += delta;3053}3054for (int i = 0; i < 8; ++i) {3055rd.p[i] += delta;3056wr.p[i] += delta;3057}3058rd.c += delta;3059wr.c += delta;30603061for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {3062res.ld[f] += delta;3063res.st[f] += delta;3064}3065res.sfu += delta;3066res.imul += delta;3067res.tex += delta;3068}3069void wipe(int regs)3070{3071memset(&rd, 0, sizeof(rd));3072memset(&wr, 0, sizeof(wr));3073memset(&res, 0, sizeof(res));3074this->regs = regs;3075}3076int getLatest(const ScoreData& d) const3077{3078int max = 0;3079for (int i = 0; i < regs; ++i)3080if (d.r[i] > max)3081max = d.r[i];3082for (int i = 0; i < 8; ++i)3083if (d.p[i] > max)3084max = d.p[i];3085if (d.c > max)3086max = d.c;3087return max;3088}3089inline int getLatestRd() const3090{3091return getLatest(rd);3092}3093inline int getLatestWr() const3094{3095return getLatest(wr);3096}3097inline int getLatest() const3098{3099const int a = getLatestRd();3100const int b = getLatestWr();31013102int max = MAX2(a, b);3103for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {3104max = MAX2(res.ld[f], max);3105max = MAX2(res.st[f], max);3106}3107max = MAX2(res.sfu, max);3108max = MAX2(res.imul, max);3109max = MAX2(res.tex, max);3110return max;3111}3112void setMax(const RegScores *that)3113{3114for (int i = 0; i < regs; ++i) {3115rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);3116wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);3117}3118for (int i = 0; i < 8; ++i) {3119rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);3120wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);3121}3122rd.c = MAX2(rd.c, that->rd.c);3123wr.c = MAX2(wr.c, that->wr.c);31243125for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {3126res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);3127res.st[f] = MAX2(res.st[f], that->res.st[f]);3128}3129res.sfu = MAX2(res.sfu, that->res.sfu);3130res.imul = MAX2(res.imul, that->res.imul);3131res.tex = MAX2(res.tex, that->res.tex);3132}3133void print(int cycle)3134{3135for (int i = 0; i < regs; ++i) {3136if (rd.r[i] > cycle)3137INFO("rd $r%i @ %i\n", i, rd.r[i]);3138if (wr.r[i] > cycle)3139INFO("wr $r%i @ %i\n", i, wr.r[i]);3140}3141for (int i = 0; i < 8; ++i) {3142if (rd.p[i] > cycle)3143INFO("rd $p%i @ %i\n", i, rd.p[i]);3144if (wr.p[i] > cycle)3145INFO("wr $p%i @ %i\n", i, wr.p[i]);3146}3147if (rd.c > cycle)3148INFO("rd $c @ %i\n", rd.c);3149if (wr.c > cycle)3150INFO("wr $c @ %i\n", wr.c);3151if (res.sfu > cycle)3152INFO("sfu @ %i\n", res.sfu);3153if (res.imul > cycle)3154INFO("imul @ %i\n", res.imul);3155if (res.tex > cycle)3156INFO("tex @ %i\n", res.tex);3157}3158};31593160RegScores *score; // for current BB3161std::vector<RegScores> scoreBoards;3162int prevData;3163operation prevOp;31643165const Target *targ;31663167bool visit(Function *);3168bool visit(BasicBlock *);31693170void commitInsn(const Instruction *, int cycle);3171int calcDelay(const Instruction *, int cycle) const;3172void setDelay(Instruction *, int delay, Instruction *next);31733174void recordRd(const Value *, const int ready);3175void recordWr(const Value *, const int ready);3176void checkRd(const Value *, int cycle, int& delay) const;3177void checkWr(const Value *, int cycle, int& delay) const;31783179int getCycles(const Instruction *, int origDelay) const;3180};31813182void3183SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)3184{3185if (insn->op == OP_EXIT || insn->op == OP_RET)3186delay = MAX2(delay, 14);31873188if (insn->op == OP_TEXBAR) {3189// TODO: except if results not used before EXIT3190insn->sched = 0xc2;3191} else3192if (insn->op == OP_JOIN || insn->join) {3193insn->sched = 0x00;3194} else3195if (delay >= 0 || prevData == 0x04 ||3196!next || !targ->canDualIssue(insn, next)) {3197insn->sched = static_cast<uint8_t>(MAX2(delay, 0));3198if (prevOp == OP_EXPORT)3199insn->sched |= 0x40;3200else3201insn->sched |= 0x20;3202} else {3203insn->sched = 0x04; // dual-issue3204}32053206if (prevData != 0x04 || prevOp != OP_EXPORT)3207if (insn->sched != 0x04 || insn->op == OP_EXPORT)3208prevOp = insn->op;32093210prevData = insn->sched;3211}32123213int3214SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const3215{3216if (insn->sched & 0x80) {3217int c = (insn->sched & 0x0f) * 2 + 1;3218if (insn->op == OP_TEXBAR && origDelay > 0)3219c += origDelay;3220return c;3221}3222if (insn->sched & 0x60)3223return (insn->sched & 0x1f) + 1;3224return (insn->sched == 0x04) ? 0 : 32;3225}32263227bool3228SchedDataCalculator::visit(Function *func)3229{3230int regs = targ->getFileSize(FILE_GPR) + 1;3231scoreBoards.resize(func->cfg.getSize());3232for (size_t i = 0; i < scoreBoards.size(); ++i)3233scoreBoards[i].wipe(regs);3234return true;3235}32363237bool3238SchedDataCalculator::visit(BasicBlock *bb)3239{3240Instruction *insn;3241Instruction *next = NULL;32423243int cycle = 0;32443245prevData = 0x00;3246prevOp = OP_NOP;3247score = &scoreBoards.at(bb->getId());32483249for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {3250// back branches will wait until all target dependencies are satisfied3251if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized3252continue;3253BasicBlock *in = BasicBlock::get(ei.getNode());3254if (in->getExit()) {3255if (prevData != 0x04)3256prevData = in->getExit()->sched;3257prevOp = in->getExit()->op;3258}3259score->setMax(&scoreBoards.at(in->getId()));3260}3261if (bb->cfg.incidentCount() > 1)3262prevOp = OP_NOP;32633264#ifdef NVC0_DEBUG_SCHED_DATA3265INFO("=== BB:%i initial scores\n", bb->getId());3266score->print(cycle);3267#endif32683269for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {3270next = insn->next;32713272commitInsn(insn, cycle);3273int delay = calcDelay(next, cycle);3274setDelay(insn, delay, next);3275cycle += getCycles(insn, delay);32763277#ifdef NVC0_DEBUG_SCHED_DATA3278INFO("cycle %i, sched %02x\n", cycle, insn->sched);3279insn->print();3280next->print();3281#endif3282}3283if (!insn)3284return true;3285commitInsn(insn, cycle);32863287int bbDelay = -1;32883289for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {3290BasicBlock *out = BasicBlock::get(ei.getNode());32913292if (ei.getType() != Graph::Edge::BACK) {3293// only test the first instruction of the outgoing block3294next = out->getEntry();3295if (next)3296bbDelay = MAX2(bbDelay, calcDelay(next, cycle));3297} else {3298// wait until all dependencies are satisfied3299const int regsFree = score->getLatest();3300next = out->getFirst();3301for (int c = cycle; next && c < regsFree; next = next->next) {3302bbDelay = MAX2(bbDelay, calcDelay(next, c));3303c += getCycles(next, bbDelay);3304}3305next = NULL;3306}3307}3308if (bb->cfg.outgoingCount() != 1)3309next = NULL;3310setDelay(insn, bbDelay, next);3311cycle += getCycles(insn, bbDelay);33123313score->rebase(cycle); // common base for initializing out blocks' scores3314return true;3315}33163317#define NVE4_MAX_ISSUE_DELAY 0x1f3318int3319SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const3320{3321int delay = 0, ready = cycle;33223323for (int s = 0; insn->srcExists(s); ++s)3324checkRd(insn->getSrc(s), cycle, delay);3325// WAR & WAW don't seem to matter3326// for (int s = 0; insn->srcExists(s); ++s)3327// recordRd(insn->getSrc(s), cycle);33283329switch (Target::getOpClass(insn->op)) {3330case OPCLASS_SFU:3331ready = score->res.sfu;3332break;3333case OPCLASS_ARITH:3334if (insn->op == OP_MUL && !isFloatType(insn->dType))3335ready = score->res.imul;3336break;3337case OPCLASS_TEXTURE:3338ready = score->res.tex;3339break;3340case OPCLASS_LOAD:3341ready = score->res.ld[insn->src(0).getFile()];3342break;3343case OPCLASS_STORE:3344ready = score->res.st[insn->src(0).getFile()];3345break;3346default:3347break;3348}3349if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)3350ready = MAX2(ready, score->res.tex);33513352delay = MAX2(delay, ready - cycle);33533354// if can issue next cycle, delay is 0, not 13355return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);3356}33573358void3359SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)3360{3361const int ready = cycle + targ->getLatency(insn);33623363for (int d = 0; insn->defExists(d); ++d)3364recordWr(insn->getDef(d), ready);3365// WAR & WAW don't seem to matter3366// for (int s = 0; insn->srcExists(s); ++s)3367// recordRd(insn->getSrc(s), cycle);33683369switch (Target::getOpClass(insn->op)) {3370case OPCLASS_SFU:3371score->res.sfu = cycle + 4;3372break;3373case OPCLASS_ARITH:3374if (insn->op == OP_MUL && !isFloatType(insn->dType))3375score->res.imul = cycle + 4;3376break;3377case OPCLASS_TEXTURE:3378score->res.tex = cycle + 18;3379break;3380case OPCLASS_LOAD:3381if (insn->src(0).getFile() == FILE_MEMORY_CONST)3382break;3383score->res.ld[insn->src(0).getFile()] = cycle + 4;3384score->res.st[insn->src(0).getFile()] = ready;3385break;3386case OPCLASS_STORE:3387score->res.st[insn->src(0).getFile()] = cycle + 4;3388score->res.ld[insn->src(0).getFile()] = ready;3389break;3390case OPCLASS_OTHER:3391if (insn->op == OP_TEXBAR)3392score->res.tex = cycle;3393break;3394default:3395break;3396}33973398#ifdef NVC0_DEBUG_SCHED_DATA3399score->print(cycle);3400#endif3401}34023403void3404SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const3405{3406int ready = cycle;3407int a, b;34083409switch (v->reg.file) {3410case FILE_GPR:3411a = v->reg.data.id;3412b = a + v->reg.size / 4;3413for (int r = a; r < b; ++r)3414ready = MAX2(ready, score->rd.r[r]);3415break;3416case FILE_PREDICATE:3417ready = MAX2(ready, score->rd.p[v->reg.data.id]);3418break;3419case FILE_FLAGS:3420ready = MAX2(ready, score->rd.c);3421break;3422case FILE_SHADER_INPUT:3423case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs3424case FILE_MEMORY_LOCAL:3425case FILE_MEMORY_CONST:3426case FILE_MEMORY_SHARED:3427case FILE_MEMORY_GLOBAL:3428case FILE_SYSTEM_VALUE:3429// TODO: any restrictions here ?3430break;3431case FILE_IMMEDIATE:3432break;3433default:3434assert(0);3435break;3436}3437if (cycle < ready)3438delay = MAX2(delay, ready - cycle);3439}34403441void3442SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const3443{3444int ready = cycle;3445int a, b;34463447switch (v->reg.file) {3448case FILE_GPR:3449a = v->reg.data.id;3450b = a + v->reg.size / 4;3451for (int r = a; r < b; ++r)3452ready = MAX2(ready, score->wr.r[r]);3453break;3454case FILE_PREDICATE:3455ready = MAX2(ready, score->wr.p[v->reg.data.id]);3456break;3457default:3458assert(v->reg.file == FILE_FLAGS);3459ready = MAX2(ready, score->wr.c);3460break;3461}3462if (cycle < ready)3463delay = MAX2(delay, ready - cycle);3464}34653466void3467SchedDataCalculator::recordWr(const Value *v, const int ready)3468{3469int a = v->reg.data.id;34703471if (v->reg.file == FILE_GPR) {3472int b = a + v->reg.size / 4;3473for (int r = a; r < b; ++r)3474score->rd.r[r] = ready;3475} else3476// $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)3477if (v->reg.file == FILE_PREDICATE) {3478score->rd.p[a] = ready + 4;3479} else {3480assert(v->reg.file == FILE_FLAGS);3481score->rd.c = ready + 4;3482}3483}34843485void3486SchedDataCalculator::recordRd(const Value *v, const int ready)3487{3488int a = v->reg.data.id;34893490if (v->reg.file == FILE_GPR) {3491int b = a + v->reg.size / 4;3492for (int r = a; r < b; ++r)3493score->wr.r[r] = ready;3494} else3495if (v->reg.file == FILE_PREDICATE) {3496score->wr.p[a] = ready;3497} else3498if (v->reg.file == FILE_FLAGS) {3499score->wr.c = ready;3500}3501}35023503bool3504calculateSchedDataNVC0(const Target *targ, Function *func)3505{3506SchedDataCalculator sched(targ);3507return sched.run(func, true, true);3508}35093510void3511CodeEmitterNVC0::prepareEmission(Function *func)3512{3513CodeEmitter::prepareEmission(func);35143515if (targ->hasSWSched)3516calculateSchedDataNVC0(targ, func);3517}35183519CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target, Program::Type type)3520: CodeEmitter(target),3521targNVC0(target),3522progType(type),3523writeIssueDelays(target->hasSWSched)3524{3525code = NULL;3526codeSize = codeSizeLimit = 0;3527relocInfo = NULL;3528}35293530CodeEmitter *3531TargetNVC0::createCodeEmitterNVC0(Program::Type type)3532{3533CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this, type);3534return emit;3535}35363537CodeEmitter *3538TargetNVC0::getCodeEmitter(Program::Type type)3539{3540if (chipset >= NVISA_GK20A_CHIPSET)3541return createCodeEmitterGK110(type);3542return createCodeEmitterNVC0(type);3543}35443545} // namespace nv50_ir354635473548