Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
4574 views
/*1* Copyright 2012 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "codegen/nv50_ir_target_nvc0.h"2324// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA.2526namespace nv50_ir {2728class CodeEmitterGK110 : public CodeEmitter29{30public:31CodeEmitterGK110(const TargetNVC0 *, Program::Type);3233virtual bool emitInstruction(Instruction *);34virtual uint32_t getMinEncodingSize(const Instruction *) const;35virtual void prepareEmission(Function *);3637private:38const TargetNVC0 *targNVC0;3940Program::Type progType;4142const bool writeIssueDelays;4344private:45void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);46void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);47void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, int sCount = 3);4849void emitPredicate(const Instruction *);5051void setCAddress14(const ValueRef&);52void setShortImmediate(const Instruction *, const int s);53void setImmediate32(const Instruction *, const int s, Modifier);54void setSUConst16(const Instruction *, const int s);5556void modNegAbsF32_3b(const Instruction *, const int s);5758void emitCondCode(CondCode cc, int pos, uint8_t mask);59void emitInterpMode(const Instruction *);60void emitLoadStoreType(DataType ty, const int pos);61void emitCachingMode(CacheMode c, const int pos);62void emitSUGType(DataType, const int pos);63void emitSUCachingMode(CacheMode c);6465inline uint8_t getSRegEncoding(const ValueRef&);6667void emitRoundMode(RoundMode, const int pos, const int rintPos);68void emitRoundModeF(RoundMode, const int pos);69void emitRoundModeI(RoundMode, const int pos);7071void emitNegAbs12(const Instruction *);7273void emitNOP(const Instruction *);7475void emitLOAD(const Instruction *);76void emitSTORE(const Instruction *);77void emitMOV(const Instruction *);78void emitATOM(const Instruction *);79void emitCCTL(const Instruction *);8081void emitINTERP(const Instruction *);82void emitAFETCH(const Instruction *);83void emitPFETCH(const Instruction *);84void emitVFETCH(const Instruction *);85void emitEXPORT(const Instruction *);86void emitOUT(const Instruction *);8788void emitUADD(const Instruction *);89void emitFADD(const Instruction *);90void emitDADD(const Instruction *);91void emitIMUL(const Instruction *);92void emitFMUL(const Instruction *);93void emitDMUL(const Instruction *);94void emitIMAD(const Instruction *);95void emitISAD(const Instruction *);96void emitSHLADD(const Instruction *);97void emitFMAD(const Instruction *);98void emitDMAD(const Instruction *);99void emitMADSP(const Instruction *i);100101void emitNOT(const Instruction *);102void emitLogicOp(const Instruction *, uint8_t subOp);103void emitPOPC(const Instruction *);104void emitINSBF(const Instruction *);105void emitEXTBF(const Instruction *);106void emitBFIND(const Instruction *);107void emitPERMT(const Instruction *);108void emitShift(const Instruction *);109void emitShift64(const Instruction *);110111void emitSFnOp(const Instruction *, uint8_t subOp);112113void emitCVT(const Instruction *);114void emitMINMAX(const Instruction *);115void emitPreOp(const Instruction *);116117void emitSET(const CmpInstruction *);118void emitSLCT(const CmpInstruction *);119void emitSELP(const Instruction *);120121void emitTEXBAR(const Instruction *);122void emitTEX(const TexInstruction *);123void emitTEXCSAA(const TexInstruction *);124void emitTXQ(const TexInstruction *);125126void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);127128void emitPIXLD(const Instruction *);129130void emitBAR(const Instruction *);131void emitMEMBAR(const Instruction *);132133void emitFlow(const Instruction *);134135void emitSHFL(const Instruction *);136137void emitVOTE(const Instruction *);138139void emitSULDGB(const TexInstruction *);140void emitSUSTGx(const TexInstruction *);141void emitSUCLAMPMode(uint16_t);142void emitSUCalc(Instruction *);143144void emitVSHL(const Instruction *);145void emitVectorSubOp(const Instruction *);146147inline void defId(const ValueDef&, const int pos);148inline void srcId(const ValueRef&, const int pos);149inline void srcId(const ValueRef *, const int pos);150inline void srcId(const Instruction *, int s, const int pos);151152inline void srcAddr32(const ValueRef&, const int pos); // address / 4153154inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false);155};156157#define GK110_GPR_ZERO 255158159#define NEG_(b, s) \160if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)161#define ABS_(b, s) \162if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)163164#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \165code[(0x##b) / 32] |= 1 << ((0x##b) % 32)166167#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)168#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)169170#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)171172#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b)173174#define SDATA(a) ((a).rep()->reg.data)175#define DDATA(a) ((a).rep()->reg.data)176177void CodeEmitterGK110::srcId(const ValueRef& src, const int pos)178{179code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32);180}181182void CodeEmitterGK110::srcId(const ValueRef *src, const int pos)183{184code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32);185}186187void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos)188{189int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO;190code[pos / 32] |= r << (pos % 32);191}192193void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos)194{195code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32);196}197198void CodeEmitterGK110::defId(const ValueDef& def, const int pos)199{200code[pos / 32] |= (def.get() && def.getFile() != FILE_FLAGS ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32);201}202203bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod)204{205const ImmediateValue *imm = ref.get()->asImm();206207if (ty == TYPE_F32)208return imm && imm->reg.data.u32 & 0xfff;209else210return imm && (imm->reg.data.s32 > 0x7ffff ||211imm->reg.data.s32 < -0x80000);212}213214void215CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos)216{217bool rint = false;218uint8_t n;219220switch (rnd) {221case ROUND_MI: rint = true; FALLTHROUGH; case ROUND_M: n = 1; break;222case ROUND_PI: rint = true; FALLTHROUGH; case ROUND_P: n = 2; break;223case ROUND_ZI: rint = true; FALLTHROUGH; case ROUND_Z: n = 3; break;224default:225rint = rnd == ROUND_NI;226n = 0;227assert(rnd == ROUND_N || rnd == ROUND_NI);228break;229}230code[pos / 32] |= n << (pos % 32);231if (rint && rintPos >= 0)232code[rintPos / 32] |= 1 << (rintPos % 32);233}234235void236CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos)237{238uint8_t n;239240switch (rnd) {241case ROUND_M: n = 1; break;242case ROUND_P: n = 2; break;243case ROUND_Z: n = 3; break;244default:245n = 0;246assert(rnd == ROUND_N);247break;248}249code[pos / 32] |= n << (pos % 32);250}251252void253CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos)254{255uint8_t n;256257switch (rnd) {258case ROUND_MI: n = 1; break;259case ROUND_PI: n = 2; break;260case ROUND_ZI: n = 3; break;261default:262n = 0;263assert(rnd == ROUND_NI);264break;265}266code[pos / 32] |= n << (pos % 32);267}268269void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask)270{271uint8_t n;272273switch (cc) {274case CC_FL: n = 0x00; break;275case CC_LT: n = 0x01; break;276case CC_EQ: n = 0x02; break;277case CC_LE: n = 0x03; break;278case CC_GT: n = 0x04; break;279case CC_NE: n = 0x05; break;280case CC_GE: n = 0x06; break;281case CC_LTU: n = 0x09; break;282case CC_EQU: n = 0x0a; break;283case CC_LEU: n = 0x0b; break;284case CC_GTU: n = 0x0c; break;285case CC_NEU: n = 0x0d; break;286case CC_GEU: n = 0x0e; break;287case CC_TR: n = 0x0f; break;288case CC_NO: n = 0x10; break;289case CC_NC: n = 0x11; break;290case CC_NS: n = 0x12; break;291case CC_NA: n = 0x13; break;292case CC_A: n = 0x14; break;293case CC_S: n = 0x15; break;294case CC_C: n = 0x16; break;295case CC_O: n = 0x17; break;296default:297n = 0;298assert(!"invalid condition code");299break;300}301code[pos / 32] |= (n & mask) << (pos % 32);302}303304void305CodeEmitterGK110::emitPredicate(const Instruction *i)306{307if (i->predSrc >= 0) {308srcId(i->src(i->predSrc), 18);309if (i->cc == CC_NOT_P)310code[0] |= 8 << 18; // negate311assert(i->getPredicate()->reg.file == FILE_PREDICATE);312} else {313code[0] |= 7 << 18;314}315}316317void318CodeEmitterGK110::setCAddress14(const ValueRef& src)319{320const Storage& res = src.get()->asSym()->reg;321const int32_t addr = res.data.offset / 4;322323code[0] |= (addr & 0x01ff) << 23;324code[1] |= (addr & 0x3e00) >> 9;325code[1] |= res.fileIndex << 5;326}327328void329CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s)330{331const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;332const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64;333334if (i->sType == TYPE_F32) {335assert(!(u32 & 0x00000fff));336code[0] |= ((u32 & 0x001ff000) >> 12) << 23;337code[1] |= ((u32 & 0x7fe00000) >> 21);338code[1] |= ((u32 & 0x80000000) >> 4);339} else340if (i->sType == TYPE_F64) {341assert(!(u64 & 0x00000fffffffffffULL));342code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23;343code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53);344code[1] |= ((u64 & 0x8000000000000000ULL) >> 36);345} else {346assert((u32 & 0xfff80000) == 0 || (u32 & 0xfff80000) == 0xfff80000);347code[0] |= (u32 & 0x001ff) << 23;348code[1] |= (u32 & 0x7fe00) >> 9;349code[1] |= (u32 & 0x80000) << 8;350}351}352353void354CodeEmitterGK110::setImmediate32(const Instruction *i, const int s,355Modifier mod)356{357uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32;358359if (mod) {360ImmediateValue imm(i->getSrc(s)->asImm(), i->sType);361mod.applyTo(imm);362u32 = imm.reg.data.u32;363}364365code[0] |= u32 << 23;366code[1] |= u32 >> 9;367}368369void370CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,371Modifier mod, int sCount)372{373code[0] = ctg;374code[1] = opc << 20;375376emitPredicate(i);377378defId(i->def(0), 2);379380for (int s = 0; s < sCount && i->srcExists(s); ++s) {381switch (i->src(s).getFile()) {382case FILE_GPR:383srcId(i->src(s), s ? 42 : 10);384break;385case FILE_IMMEDIATE:386setImmediate32(i, s, mod);387break;388default:389break;390}391}392}393394395void396CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg)397{398code[0] = ctg;399code[1] = opc << 20;400401emitPredicate(i);402403defId(i->def(0), 2);404405switch (i->src(0).getFile()) {406case FILE_MEMORY_CONST:407code[1] |= 0x4 << 28;408setCAddress14(i->src(0));409break;410case FILE_GPR:411code[1] |= 0xc << 28;412srcId(i->src(0), 23);413break;414default:415assert(0);416break;417}418}419420// 0x2 for GPR, c[] and 0x1 for short immediate421void422CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2,423uint32_t opc1)424{425const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE;426427int s1 = 23;428if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST)429s1 = 42;430431if (imm) {432code[0] = 0x1;433code[1] = opc1 << 20;434} else {435code[0] = 0x2;436code[1] = (0xc << 28) | (opc2 << 20);437}438439emitPredicate(i);440441defId(i->def(0), 2);442443for (int s = 0; s < 3 && i->srcExists(s); ++s) {444switch (i->src(s).getFile()) {445case FILE_MEMORY_CONST:446code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28);447setCAddress14(i->src(s));448break;449case FILE_IMMEDIATE:450setShortImmediate(i, s);451break;452case FILE_GPR:453srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10);454break;455default:456if (i->op == OP_SELP) {457assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE);458srcId(i->src(s), 42);459}460// ignore here, can be predicate or flags, but must not be address461break;462}463}464// 0x0 = invalid465// 0xc = rrr466// 0x8 = rrc467// 0x4 = rcr468assert(imm || (code[1] & (0xc << 28)));469}470471inline void472CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s)473{474if (i->src(s).mod.abs()) code[1] &= ~(1 << 27);475if (i->src(s).mod.neg()) code[1] ^= (1 << 27);476}477478void479CodeEmitterGK110::emitNOP(const Instruction *i)480{481code[0] = 0x00003c02;482code[1] = 0x85800000;483484if (i)485emitPredicate(i);486else487code[0] = 0x001c3c02;488}489490void491CodeEmitterGK110::emitFMAD(const Instruction *i)492{493bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();494495if (isLIMM(i->src(1), TYPE_F32)) {496assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);497498// last source is dst, so force 2 sources499emitForm_L(i, 0x600, 0x0, 0, 2);500501if (i->flagsDef >= 0)502code[1] |= 1 << 23;503504SAT_(3a);505NEG_(3c, 2);506507if (neg1) {508code[1] |= 1 << 27;509}510} else {511emitForm_21(i, 0x0c0, 0x940);512513NEG_(34, 2);514SAT_(35);515RND_(36, F);516517if (code[0] & 0x1) {518if (neg1)519code[1] ^= 1 << 27;520} else521if (neg1) {522code[1] |= 1 << 19;523}524}525526FTZ_(38);527DNZ_(39);528}529530void531CodeEmitterGK110::emitDMAD(const Instruction *i)532{533assert(!i->saturate);534assert(!i->ftz);535536emitForm_21(i, 0x1b8, 0xb38);537538NEG_(34, 2);539RND_(36, F);540541bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();542543if (code[0] & 0x1) {544if (neg1)545code[1] ^= 1 << 27;546} else547if (neg1) {548code[1] |= 1 << 19;549}550}551552void553CodeEmitterGK110::emitMADSP(const Instruction *i)554{555emitForm_21(i, 0x140, 0xa40);556557if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {558code[1] |= 0x00c00000;559} else {560code[1] |= (i->subOp & 0x00f) << 19; // imadp1561code[1] |= (i->subOp & 0x0f0) << 20; // imadp2562code[1] |= (i->subOp & 0x100) << 11; // imadp3563code[1] |= (i->subOp & 0x200) << 15; // imadp3564code[1] |= (i->subOp & 0xc00) << 12; // imadp3565}566567if (i->flagsDef >= 0)568code[1] |= 1 << 18;569}570571void572CodeEmitterGK110::emitFMUL(const Instruction *i)573{574bool neg = (i->src(0).mod ^ i->src(1).mod).neg();575576assert(i->postFactor >= -3 && i->postFactor <= 3);577578if (isLIMM(i->src(1), TYPE_F32)) {579emitForm_L(i, 0x200, 0x2, Modifier(0));580581FTZ_(38);582DNZ_(39);583SAT_(3a);584if (neg)585code[1] ^= 1 << 22;586587assert(i->postFactor == 0);588} else {589emitForm_21(i, 0x234, 0xc34);590code[1] |= ((i->postFactor > 0) ?591(7 - i->postFactor) : (0 - i->postFactor)) << 12;592593RND_(2a, F);594FTZ_(2f);595DNZ_(30);596SAT_(35);597598if (code[0] & 0x1) {599if (neg)600code[1] ^= 1 << 27;601} else602if (neg) {603code[1] |= 1 << 19;604}605}606}607608void609CodeEmitterGK110::emitDMUL(const Instruction *i)610{611bool neg = (i->src(0).mod ^ i->src(1).mod).neg();612613assert(!i->postFactor);614assert(!i->saturate);615assert(!i->ftz);616assert(!i->dnz);617618emitForm_21(i, 0x240, 0xc40);619620RND_(2a, F);621622if (code[0] & 0x1) {623if (neg)624code[1] ^= 1 << 27;625} else626if (neg) {627code[1] |= 1 << 19;628}629}630631void632CodeEmitterGK110::emitIMUL(const Instruction *i)633{634assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());635assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());636637if (isLIMM(i->src(1), TYPE_S32)) {638emitForm_L(i, 0x280, 2, Modifier(0));639640if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)641code[1] |= 1 << 24;642if (i->sType == TYPE_S32)643code[1] |= 3 << 25;644} else {645emitForm_21(i, 0x21c, 0xc1c);646647if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)648code[1] |= 1 << 10;649if (i->sType == TYPE_S32)650code[1] |= 3 << 11;651}652}653654void655CodeEmitterGK110::emitFADD(const Instruction *i)656{657if (isLIMM(i->src(1), TYPE_F32)) {658assert(i->rnd == ROUND_N);659assert(!i->saturate);660661Modifier mod = i->src(1).mod ^662Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0);663664emitForm_L(i, 0x400, 0, mod);665666FTZ_(3a);667NEG_(3b, 0);668ABS_(39, 0);669} else {670emitForm_21(i, 0x22c, 0xc2c);671672FTZ_(2f);673RND_(2a, F);674ABS_(31, 0);675NEG_(33, 0);676SAT_(35);677678if (code[0] & 0x1) {679modNegAbsF32_3b(i, 1);680if (i->op == OP_SUB) code[1] ^= 1 << 27;681} else {682ABS_(34, 1);683NEG_(30, 1);684if (i->op == OP_SUB) code[1] ^= 1 << 16;685}686}687}688689void690CodeEmitterGK110::emitDADD(const Instruction *i)691{692assert(!i->saturate);693assert(!i->ftz);694695emitForm_21(i, 0x238, 0xc38);696RND_(2a, F);697ABS_(31, 0);698NEG_(33, 0);699if (code[0] & 0x1) {700modNegAbsF32_3b(i, 1);701if (i->op == OP_SUB) code[1] ^= 1 << 27;702} else {703NEG_(30, 1);704ABS_(34, 1);705if (i->op == OP_SUB) code[1] ^= 1 << 16;706}707}708709void710CodeEmitterGK110::emitUADD(const Instruction *i)711{712uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg();713714if (i->op == OP_SUB)715addOp ^= 1;716717assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());718719if (isLIMM(i->src(1), TYPE_S32)) {720emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0));721722if (addOp & 2)723code[1] |= 1 << 27;724725assert(i->flagsDef < 0);726assert(i->flagsSrc < 0);727728SAT_(39);729} else {730emitForm_21(i, 0x208, 0xc08);731732assert(addOp != 3); // would be add-plus-one733734code[1] |= addOp << 19;735736if (i->flagsDef >= 0)737code[1] |= 1 << 18; // write carry738if (i->flagsSrc >= 0)739code[1] |= 1 << 14; // add carry740741SAT_(35);742}743}744745void746CodeEmitterGK110::emitIMAD(const Instruction *i)747{748uint8_t addOp =749i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);750751emitForm_21(i, 0x100, 0xa00);752753assert(addOp != 3);754code[1] |= addOp << 26;755756if (i->sType == TYPE_S32)757code[1] |= (1 << 19) | (1 << 24);758759if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)760code[1] |= 1 << 25;761762if (i->flagsDef >= 0) code[1] |= 1 << 18;763if (i->flagsSrc >= 0) code[1] |= 1 << 20;764765SAT_(35);766}767768void769CodeEmitterGK110::emitISAD(const Instruction *i)770{771assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);772773emitForm_21(i, 0x1f4, 0xb74);774775if (i->dType == TYPE_S32)776code[1] |= 1 << 19;777}778779void780CodeEmitterGK110::emitSHLADD(const Instruction *i)781{782uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();783const ImmediateValue *imm = i->src(1).get()->asImm();784assert(imm);785786if (i->src(2).getFile() == FILE_IMMEDIATE) {787code[0] = 0x1;788code[1] = 0xc0c << 20;789} else {790code[0] = 0x2;791code[1] = 0x20c << 20;792}793code[1] |= addOp << 19;794795emitPredicate(i);796797defId(i->def(0), 2);798srcId(i->src(0), 10);799800if (i->flagsDef >= 0)801code[1] |= 1 << 18;802803assert(!(imm->reg.data.u32 & 0xffffffe0));804code[1] |= imm->reg.data.u32 << 10;805806switch (i->src(2).getFile()) {807case FILE_GPR:808assert(code[0] & 0x2);809code[1] |= 0xc << 28;810srcId(i->src(2), 23);811break;812case FILE_MEMORY_CONST:813assert(code[0] & 0x2);814code[1] |= 0x4 << 28;815setCAddress14(i->src(2));816break;817case FILE_IMMEDIATE:818assert(code[0] & 0x1);819setShortImmediate(i, 2);820break;821default:822assert(!"bad src2 file");823break;824}825}826827void828CodeEmitterGK110::emitNOT(const Instruction *i)829{830code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src831code[1] = 0x22003800;832833emitPredicate(i);834835defId(i->def(0), 2);836837switch (i->src(0).getFile()) {838case FILE_GPR:839code[1] |= 0xc << 28;840srcId(i->src(0), 23);841break;842case FILE_MEMORY_CONST:843code[1] |= 0x4 << 28;844setCAddress14(i->src(0));845break;846default:847assert(0);848break;849}850}851852void853CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)854{855if (i->def(0).getFile() == FILE_PREDICATE) {856code[0] = 0x00000002 | (subOp << 27);857code[1] = 0x84800000;858859emitPredicate(i);860861defId(i->def(0), 5);862srcId(i->src(0), 14);863if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;864srcId(i->src(1), 32);865if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;866867if (i->defExists(1)) {868defId(i->def(1), 2);869} else {870code[0] |= 7 << 2;871}872// (a OP b) OP c873if (i->predSrc != 2 && i->srcExists(2)) {874code[1] |= subOp << 16;875srcId(i->src(2), 42);876if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;877} else {878code[1] |= 7 << 10;879}880} else881if (isLIMM(i->src(1), TYPE_S32)) {882emitForm_L(i, 0x200, 0, i->src(1).mod);883code[1] |= subOp << 24;884NOT_(3a, 0);885} else {886emitForm_21(i, 0x220, 0xc20);887code[1] |= subOp << 12;888NOT_(2a, 0);889NOT_(2b, 1);890}891}892893void894CodeEmitterGK110::emitPOPC(const Instruction *i)895{896assert(!isLIMM(i->src(1), TYPE_S32, true));897898emitForm_21(i, 0x204, 0xc04);899900NOT_(2a, 0);901if (!(code[0] & 0x1))902NOT_(2b, 1);903}904905void906CodeEmitterGK110::emitINSBF(const Instruction *i)907{908emitForm_21(i, 0x1f8, 0xb78);909}910911void912CodeEmitterGK110::emitEXTBF(const Instruction *i)913{914emitForm_21(i, 0x600, 0xc00);915916if (i->dType == TYPE_S32)917code[1] |= 0x80000;918if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)919code[1] |= 0x800;920}921922void923CodeEmitterGK110::emitBFIND(const Instruction *i)924{925emitForm_C(i, 0x218, 0x2);926927if (i->dType == TYPE_S32)928code[1] |= 0x80000;929if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))930code[1] |= 0x800;931if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)932code[1] |= 0x1000;933}934935void936CodeEmitterGK110::emitPERMT(const Instruction *i)937{938emitForm_21(i, 0x1e0, 0xb60);939940code[1] |= i->subOp << 19;941}942943void944CodeEmitterGK110::emitShift(const Instruction *i)945{946if (i->op == OP_SHR) {947emitForm_21(i, 0x214, 0xc14);948if (isSignedType(i->dType))949code[1] |= 1 << 19;950} else {951emitForm_21(i, 0x224, 0xc24);952}953954if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)955code[1] |= 1 << 10;956}957958void959CodeEmitterGK110::emitShift64(const Instruction *i)960{961if (i->op == OP_SHR) {962emitForm_21(i, 0x27c, 0xc7c);963if (isSignedType(i->sType))964code[1] |= 0x100;965if (i->subOp & NV50_IR_SUBOP_SHIFT_HIGH)966code[1] |= 1 << 19;967} else {968emitForm_21(i, 0xdfc, 0xf7c);969}970code[1] |= 0x200;971972if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)973code[1] |= 1 << 21;974}975976void977CodeEmitterGK110::emitPreOp(const Instruction *i)978{979emitForm_C(i, 0x248, 0x2);980981if (i->op == OP_PREEX2)982code[1] |= 1 << 10;983984NEG_(30, 0);985ABS_(34, 0);986}987988void989CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp)990{991code[0] = 0x00000002 | (subOp << 23);992code[1] = 0x84000000;993994emitPredicate(i);995996defId(i->def(0), 2);997srcId(i->src(0), 10);998999NEG_(33, 0);1000ABS_(31, 0);1001SAT_(35);1002}10031004void1005CodeEmitterGK110::emitMINMAX(const Instruction *i)1006{1007uint32_t op2, op1;10081009switch (i->dType) {1010case TYPE_U32:1011case TYPE_S32:1012op2 = 0x210;1013op1 = 0xc10;1014break;1015case TYPE_F32:1016op2 = 0x230;1017op1 = 0xc30;1018break;1019case TYPE_F64:1020op2 = 0x228;1021op1 = 0xc28;1022break;1023default:1024assert(0);1025op2 = 0;1026op1 = 0;1027break;1028}1029emitForm_21(i, op2, op1);10301031if (i->dType == TYPE_S32)1032code[1] |= 1 << 19;1033code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt1034code[1] |= i->subOp << 14;1035if (i->flagsDef >= 0)1036code[1] |= i->subOp << 18;10371038FTZ_(2f);1039ABS_(31, 0);1040NEG_(33, 0);1041if (code[0] & 0x1) {1042modNegAbsF32_3b(i, 1);1043} else {1044ABS_(34, 1);1045NEG_(30, 1);1046}1047}10481049void1050CodeEmitterGK110::emitCVT(const Instruction *i)1051{1052const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);1053const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType);1054const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType);10551056bool sat = i->saturate;1057bool abs = i->src(0).mod.abs();1058bool neg = i->src(0).mod.neg();10591060RoundMode rnd = i->rnd;10611062switch (i->op) {1063case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break;1064case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;1065case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;1066case OP_SAT: sat = true; break;1067case OP_NEG: neg = !neg; break;1068case OP_ABS: abs = true; neg = false; break;1069default:1070break;1071}10721073DataType dType;10741075if (i->op == OP_NEG && i->dType == TYPE_U32)1076dType = TYPE_S32;1077else1078dType = i->dType;107910801081uint32_t op;10821083if (f2f) op = 0x254;1084else if (f2i) op = 0x258;1085else if (i2f) op = 0x25c;1086else op = 0x260;10871088emitForm_C(i, op, 0x2);10891090FTZ_(2f);1091if (neg) code[1] |= 1 << 16;1092if (abs) code[1] |= 1 << 20;1093if (sat) code[1] |= 1 << 21;10941095emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1);10961097code[0] |= typeSizeofLog2(dType) << 10;1098code[0] |= typeSizeofLog2(i->sType) << 12;1099code[1] |= i->subOp << 12;11001101if (isSignedIntType(dType))1102code[0] |= 0x4000;1103if (isSignedIntType(i->sType))1104code[0] |= 0x8000;1105}11061107void1108CodeEmitterGK110::emitSET(const CmpInstruction *i)1109{1110uint16_t op1, op2;11111112if (i->def(0).getFile() == FILE_PREDICATE) {1113switch (i->sType) {1114case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break;1115case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break;1116default:1117op2 = 0x1b0;1118op1 = 0xb30;1119break;1120}1121emitForm_21(i, op2, op1);11221123NEG_(2e, 0);1124ABS_(9, 0);1125if (!(code[0] & 0x1)) {1126NEG_(8, 1);1127ABS_(2f, 1);1128} else {1129modNegAbsF32_3b(i, 1);1130}1131FTZ_(32);11321133// normal DST field is negated predicate result1134code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);1135if (i->defExists(1))1136defId(i->def(1), 2);1137else1138code[0] |= 0x1c;1139} else {1140switch (i->sType) {1141case TYPE_F32: op2 = 0x000; op1 = 0x800; break;1142case TYPE_F64: op2 = 0x080; op1 = 0x900; break;1143default:1144op2 = 0x1a8;1145op1 = 0xb28;1146break;1147}1148emitForm_21(i, op2, op1);11491150NEG_(2e, 0);1151ABS_(39, 0);1152if (!(code[0] & 0x1)) {1153NEG_(38, 1);1154ABS_(2f, 1);1155} else {1156modNegAbsF32_3b(i, 1);1157}1158FTZ_(3a);11591160if (i->dType == TYPE_F32) {1161if (isFloatType(i->sType))1162code[1] |= 1 << 23;1163else1164code[1] |= 1 << 15;1165}1166}1167if (i->sType == TYPE_S32)1168code[1] |= 1 << 19;11691170if (i->op != OP_SET) {1171switch (i->op) {1172case OP_SET_AND: code[1] |= 0x0 << 16; break;1173case OP_SET_OR: code[1] |= 0x1 << 16; break;1174case OP_SET_XOR: code[1] |= 0x2 << 16; break;1175default:1176assert(0);1177break;1178}1179srcId(i->src(2), 0x2a);1180} else {1181code[1] |= 0x7 << 10;1182}1183if (i->flagsSrc >= 0)1184code[1] |= 1 << 14;1185emitCondCode(i->setCond,1186isFloatType(i->sType) ? 0x33 : 0x34,1187isFloatType(i->sType) ? 0xf : 0x7);1188}11891190void1191CodeEmitterGK110::emitSLCT(const CmpInstruction *i)1192{1193CondCode cc = i->setCond;1194if (i->src(2).mod.neg())1195cc = reverseCondCode(cc);11961197if (i->dType == TYPE_F32) {1198emitForm_21(i, 0x1d0, 0xb50);1199FTZ_(32);1200emitCondCode(cc, 0x33, 0xf);1201} else {1202emitForm_21(i, 0x1a0, 0xb20);1203emitCondCode(cc, 0x34, 0x7);1204if (i->dType == TYPE_S32)1205code[1] |= 1 << 19;1206}1207}12081209void1210gk110_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)1211{1212int loc = entry->loc;1213bool val = false;1214switch (entry->ipa) {1215case 0:1216val = data.force_persample_interp;1217break;1218case 1:1219val = data.msaa;1220break;1221}1222if (val)1223code[loc + 1] |= 1 << 13;1224else1225code[loc + 1] &= ~(1 << 13);1226}12271228void CodeEmitterGK110::emitSELP(const Instruction *i)1229{1230emitForm_21(i, 0x250, 0x050);12311232if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))1233code[1] |= 1 << 13;12341235if (i->subOp >= 1) {1236addInterp(i->subOp - 1, 0, gk110_selpFlip);1237}1238}12391240void CodeEmitterGK110::emitTEXBAR(const Instruction *i)1241{1242code[0] = 0x0000003e | (i->subOp << 23);1243code[1] = 0x77000000;12441245emitPredicate(i);1246}12471248void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i)1249{1250code[0] = 0x00000002;1251code[1] = 0x76c00000;12521253code[1] |= i->tex.r << 9;1254// code[1] |= i->tex.s << (9 + 8);12551256if (i->tex.liveOnly)1257code[0] |= 0x80000000;12581259defId(i->def(0), 2);1260srcId(i->src(0), 10);1261}12621263static inline bool1264isNextIndependentTex(const TexInstruction *i)1265{1266if (!i->next || !isTextureOp(i->next->op))1267return false;1268if (i->getDef(0)->interfers(i->next->getSrc(0)))1269return false;1270return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));1271}12721273void1274CodeEmitterGK110::emitTEX(const TexInstruction *i)1275{1276const bool ind = i->tex.rIndirectSrc >= 0;12771278if (ind) {1279code[0] = 0x00000002;1280switch (i->op) {1281case OP_TXD:1282code[1] = 0x7e000000;1283break;1284case OP_TXLQ:1285code[1] = 0x7e800000;1286break;1287case OP_TXF:1288code[1] = 0x78000000;1289break;1290case OP_TXG:1291code[1] = 0x7dc00000;1292break;1293default:1294code[1] = 0x7d800000;1295break;1296}1297} else {1298switch (i->op) {1299case OP_TXD:1300code[0] = 0x00000002;1301code[1] = 0x76000000;1302code[1] |= i->tex.r << 9;1303break;1304case OP_TXLQ:1305code[0] = 0x00000002;1306code[1] = 0x76800000;1307code[1] |= i->tex.r << 9;1308break;1309case OP_TXF:1310code[0] = 0x00000002;1311code[1] = 0x70000000;1312code[1] |= i->tex.r << 13;1313break;1314case OP_TXG:1315code[0] = 0x00000001;1316code[1] = 0x70000000;1317code[1] |= i->tex.r << 15;1318break;1319default:1320code[0] = 0x00000001;1321code[1] = 0x60000000;1322code[1] |= i->tex.r << 15;1323break;1324}1325}13261327code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode13281329if (i->tex.liveOnly)1330code[0] |= 0x80000000;13311332switch (i->op) {1333case OP_TEX: break;1334case OP_TXB: code[1] |= 0x2000; break;1335case OP_TXL: code[1] |= 0x3000; break;1336case OP_TXF: break;1337case OP_TXG: break;1338case OP_TXD: break;1339case OP_TXLQ: break;1340default:1341assert(!"invalid texture op");1342break;1343}13441345if (i->op == OP_TXF) {1346if (!i->tex.levelZero)1347code[1] |= 0x1000;1348} else1349if (i->tex.levelZero) {1350code[1] |= 0x1000;1351}13521353if (i->op != OP_TXD && i->tex.derivAll)1354code[1] |= 0x200;13551356emitPredicate(i);13571358code[1] |= i->tex.mask << 2;13591360const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)13611362defId(i->def(0), 2);1363srcId(i->src(0), 10);1364srcId(i, src1, 23);13651366if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13;13671368// texture target:1369code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7;1370if (i->tex.target.isArray())1371code[1] |= 0x40;1372if (i->tex.target.isShadow())1373code[1] |= 0x400;1374if (i->tex.target == TEX_TARGET_2D_MS ||1375i->tex.target == TEX_TARGET_2D_MS_ARRAY)1376code[1] |= 0x800;13771378if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {1379// ?1380}13811382if (i->tex.useOffsets == 1) {1383switch (i->op) {1384case OP_TXF: code[1] |= 0x200; break;1385case OP_TXD: code[1] |= 0x00400000; break;1386default: code[1] |= 0x800; break;1387}1388}1389if (i->tex.useOffsets == 4)1390code[1] |= 0x1000;1391}13921393void1394CodeEmitterGK110::emitTXQ(const TexInstruction *i)1395{1396code[0] = 0x00000002;1397code[1] = 0x75400001;13981399switch (i->tex.query) {1400case TXQ_DIMS: code[0] |= 0x01 << 25; break;1401case TXQ_TYPE: code[0] |= 0x02 << 25; break;1402case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break;1403case TXQ_FILTER: code[0] |= 0x10 << 25; break;1404case TXQ_LOD: code[0] |= 0x12 << 25; break;1405case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break;1406default:1407assert(!"invalid texture query");1408break;1409}14101411code[1] |= i->tex.mask << 2;1412code[1] |= i->tex.r << 9;1413if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0)1414code[1] |= 0x08000000;14151416defId(i->def(0), 2);1417srcId(i->src(0), 10);14181419emitPredicate(i);1420}14211422void1423CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)1424{1425code[0] = 0x00000002 | ((qOp & 1) << 31);1426code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall14271428defId(i->def(0), 2);1429srcId(i->src(0), 10);1430srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);14311432emitPredicate(i);1433}14341435void1436CodeEmitterGK110::emitPIXLD(const Instruction *i)1437{1438emitForm_L(i, 0x7f4, 2, Modifier(0));1439code[1] |= i->subOp << 2;1440code[1] |= 0x00070000;1441}14421443void1444CodeEmitterGK110::emitBAR(const Instruction *i)1445{1446code[0] = 0x00000002;1447code[1] = 0x85400000;14481449switch (i->subOp) {1450case NV50_IR_SUBOP_BAR_ARRIVE: code[1] |= 0x08; break;1451case NV50_IR_SUBOP_BAR_RED_AND: code[1] |= 0x50; break;1452case NV50_IR_SUBOP_BAR_RED_OR: code[1] |= 0x90; break;1453case NV50_IR_SUBOP_BAR_RED_POPC: code[1] |= 0x10; break;1454default:1455assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);1456break;1457}14581459emitPredicate(i);14601461// barrier id1462if (i->src(0).getFile() == FILE_GPR) {1463srcId(i->src(0), 10);1464} else {1465ImmediateValue *imm = i->getSrc(0)->asImm();1466assert(imm);1467code[0] |= imm->reg.data.u32 << 10;1468code[1] |= 0x8000;1469}14701471// thread count1472if (i->src(1).getFile() == FILE_GPR) {1473srcId(i->src(1), 23);1474} else {1475ImmediateValue *imm = i->getSrc(0)->asImm();1476assert(imm);1477assert(imm->reg.data.u32 <= 0xfff);1478code[0] |= imm->reg.data.u32 << 23;1479code[1] |= imm->reg.data.u32 >> 9;1480code[1] |= 0x4000;1481}14821483if (i->srcExists(2) && (i->predSrc != 2)) {1484srcId(i->src(2), 32 + 10);1485if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))1486code[1] |= 1 << 13;1487} else {1488code[1] |= 7 << 10;1489}1490}14911492void CodeEmitterGK110::emitMEMBAR(const Instruction *i)1493{1494code[0] = 0x00000002 | NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) << 8;1495code[1] = 0x7cc00000;14961497emitPredicate(i);1498}14991500void1501CodeEmitterGK110::emitFlow(const Instruction *i)1502{1503const FlowInstruction *f = i->asFlow();15041505unsigned mask; // bit 0: predicate, bit 1: target15061507code[0] = 0x00000000;15081509switch (i->op) {1510case OP_BRA:1511code[1] = f->absolute ? 0x10800000 : 0x12000000;1512if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)1513code[0] |= 0x80;1514mask = 3;1515break;1516case OP_CALL:1517code[1] = f->absolute ? 0x11000000 : 0x13000000;1518if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)1519code[0] |= 0x80;1520mask = 2;1521break;15221523case OP_EXIT: code[1] = 0x18000000; mask = 1; break;1524case OP_RET: code[1] = 0x19000000; mask = 1; break;1525case OP_DISCARD: code[1] = 0x19800000; mask = 1; break;1526case OP_BREAK: code[1] = 0x1a000000; mask = 1; break;1527case OP_CONT: code[1] = 0x1a800000; mask = 1; break;15281529case OP_JOINAT: code[1] = 0x14800000; mask = 2; break;1530case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break;1531case OP_PRECONT: code[1] = 0x15800000; mask = 2; break;1532case OP_PRERET: code[1] = 0x13800000; mask = 2; break;15331534case OP_QUADON: code[1] = 0x1b800000; mask = 0; break;1535case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break;1536case OP_BRKPT: code[1] = 0x00000000; mask = 0; break;1537default:1538assert(!"invalid flow operation");1539return;1540}15411542if (mask & 1) {1543emitPredicate(i);1544if (i->flagsSrc < 0)1545code[0] |= 0x3c;1546}15471548if (!f)1549return;15501551if (f->allWarp)1552code[0] |= 1 << 9;1553if (f->limit)1554code[0] |= 1 << 8;15551556if (f->op == OP_CALL) {1557if (f->builtin) {1558assert(f->absolute);1559uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);1560addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23);1561addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9);1562} else {1563assert(!f->absolute);1564int32_t pcRel = f->target.fn->binPos - (codeSize + 8);1565code[0] |= (pcRel & 0x1ff) << 23;1566code[1] |= (pcRel >> 9) & 0x7fff;1567}1568} else1569if (mask & 2) {1570int32_t pcRel = f->target.bb->binPos - (codeSize + 8);1571if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))1572pcRel += 8;1573// currently we don't want absolute branches1574assert(!f->absolute);1575code[0] |= (pcRel & 0x1ff) << 23;1576code[1] |= (pcRel >> 9) & 0x7fff;1577}1578}15791580void1581CodeEmitterGK110::emitSHFL(const Instruction *i)1582{1583const ImmediateValue *imm;15841585code[0] = 0x00000002;1586code[1] = 0x78800000 | (i->subOp << 1);15871588emitPredicate(i);15891590defId(i->def(0), 2);1591srcId(i->src(0), 10);15921593switch (i->src(1).getFile()) {1594case FILE_GPR:1595srcId(i->src(1), 23);1596break;1597case FILE_IMMEDIATE:1598imm = i->getSrc(1)->asImm();1599assert(imm && imm->reg.data.u32 < 0x20);1600code[0] |= imm->reg.data.u32 << 23;1601code[0] |= 1 << 31;1602break;1603default:1604assert(!"invalid src1 file");1605break;1606}16071608switch (i->src(2).getFile()) {1609case FILE_GPR:1610srcId(i->src(2), 42);1611break;1612case FILE_IMMEDIATE:1613imm = i->getSrc(2)->asImm();1614assert(imm && imm->reg.data.u32 < 0x2000);1615code[1] |= imm->reg.data.u32 << 5;1616code[1] |= 1;1617break;1618default:1619assert(!"invalid src2 file");1620break;1621}16221623if (!i->defExists(1))1624code[1] |= 7 << 19;1625else {1626assert(i->def(1).getFile() == FILE_PREDICATE);1627defId(i->def(1), 51);1628}1629}16301631void1632CodeEmitterGK110::emitVOTE(const Instruction *i)1633{1634const ImmediateValue *imm;1635uint32_t u32;16361637code[0] = 0x00000002;1638code[1] = 0x86c00000 | (i->subOp << 19);16391640emitPredicate(i);16411642unsigned rp = 0;1643for (int d = 0; i->defExists(d); d++) {1644if (i->def(d).getFile() == FILE_PREDICATE) {1645assert(!(rp & 2));1646rp |= 2;1647defId(i->def(d), 48);1648} else if (i->def(d).getFile() == FILE_GPR) {1649assert(!(rp & 1));1650rp |= 1;1651defId(i->def(d), 2);1652} else {1653assert(!"Unhandled def");1654}1655}1656if (!(rp & 1))1657code[0] |= 255 << 2;1658if (!(rp & 2))1659code[1] |= 7 << 16;16601661switch (i->src(0).getFile()) {1662case FILE_PREDICATE:1663if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))1664code[0] |= 1 << 13;1665srcId(i->src(0), 42);1666break;1667case FILE_IMMEDIATE:1668imm = i->getSrc(0)->asImm();1669assert(imm);1670u32 = imm->reg.data.u32;1671assert(u32 == 0 || u32 == 1);1672code[1] |= (u32 == 1 ? 0x7 : 0xf) << 10;1673break;1674default:1675assert(!"Unhandled src");1676break;1677}1678}16791680void1681CodeEmitterGK110::emitSUGType(DataType ty, const int pos)1682{1683uint8_t n = 0;16841685switch (ty) {1686case TYPE_S32: n = 1; break;1687case TYPE_U8: n = 2; break;1688case TYPE_S8: n = 3; break;1689default:1690assert(ty == TYPE_U32);1691break;1692}1693code[pos / 32] |= n << (pos % 32);1694}16951696void1697CodeEmitterGK110::emitSUCachingMode(CacheMode c)1698{1699uint8_t n = 0;17001701switch (c) {1702case CACHE_CA:1703// case CACHE_WB:1704n = 0;1705break;1706case CACHE_CG:1707n = 1;1708break;1709case CACHE_CS:1710n = 2;1711break;1712case CACHE_CV:1713// case CACHE_WT:1714n = 3;1715break;1716default:1717assert(!"invalid caching mode");1718break;1719}1720code[0] |= (n & 1) << 31;1721code[1] |= (n & 2) >> 1;1722}17231724void1725CodeEmitterGK110::setSUConst16(const Instruction *i, const int s)1726{1727const uint32_t offset = i->getSrc(s)->reg.data.offset;17281729assert(offset == (offset & 0xfffc));17301731code[0] |= offset << 21;1732code[1] |= offset >> 11;1733code[1] |= i->getSrc(s)->reg.fileIndex << 5;1734}17351736void1737CodeEmitterGK110::emitSULDGB(const TexInstruction *i)1738{1739code[0] = 0x00000002;1740code[1] = 0x30000000 | (i->subOp << 14);17411742if (i->src(1).getFile() == FILE_MEMORY_CONST) {1743emitLoadStoreType(i->dType, 0x38);1744emitCachingMode(i->cache, 0x36);17451746// format1747setSUConst16(i, 1);1748} else {1749assert(i->src(1).getFile() == FILE_GPR);1750code[1] |= 0x49800000;17511752emitLoadStoreType(i->dType, 0x21);1753emitSUCachingMode(i->cache);17541755srcId(i->src(1), 23);1756}17571758emitSUGType(i->sType, 0x34);17591760emitPredicate(i);1761defId(i->def(0), 2); // destination1762srcId(i->src(0), 10); // address17631764// surface predicate1765if (!i->srcExists(2) || (i->predSrc == 2)) {1766code[1] |= 0x7 << 10;1767} else {1768if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))1769code[1] |= 1 << 13;1770srcId(i->src(2), 32 + 10);1771}1772}17731774void1775CodeEmitterGK110::emitSUSTGx(const TexInstruction *i)1776{1777assert(i->op == OP_SUSTP);17781779code[0] = 0x00000002;1780code[1] = 0x38000000;17811782if (i->src(1).getFile() == FILE_MEMORY_CONST) {1783code[0] |= i->subOp << 2;17841785if (i->op == OP_SUSTP)1786code[0] |= i->tex.mask << 4;17871788emitSUGType(i->sType, 0x8);1789emitCachingMode(i->cache, 0x36);17901791// format1792setSUConst16(i, 1);1793} else {1794assert(i->src(1).getFile() == FILE_GPR);17951796code[0] |= i->subOp << 23;1797code[1] |= 0x41c00000;17981799if (i->op == OP_SUSTP)1800code[0] |= i->tex.mask << 25;18011802emitSUGType(i->sType, 0x1d);1803emitSUCachingMode(i->cache);18041805srcId(i->src(1), 2);1806}18071808emitPredicate(i);1809srcId(i->src(0), 10); // address1810srcId(i->src(3), 42); // values18111812// surface predicate1813if (!i->srcExists(2) || (i->predSrc == 2)) {1814code[1] |= 0x7 << 18;1815} else {1816if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))1817code[1] |= 1 << 21;1818srcId(i->src(2), 32 + 18);1819}1820}18211822void1823CodeEmitterGK110::emitSUCLAMPMode(uint16_t subOp)1824{1825uint8_t m;1826switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {1827case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;1828case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;1829case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;1830case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;1831case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;1832case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;1833case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;1834case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;1835case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;1836case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;1837case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;1838case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;1839case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;1840case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;1841case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;1842default:1843return;1844}1845code[1] |= m << 20;1846if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)1847code[1] |= 1 << 24;1848}18491850void1851CodeEmitterGK110::emitSUCalc(Instruction *i)1852{1853ImmediateValue *imm = NULL;1854uint64_t opc1, opc2;18551856if (i->srcExists(2)) {1857imm = i->getSrc(2)->asImm();1858if (imm)1859i->setSrc(2, NULL); // special case, make emitForm_21 not assert1860}18611862switch (i->op) {1863case OP_SUCLAMP: opc1 = 0xb00; opc2 = 0x580; break;1864case OP_SUBFM: opc1 = 0xb68; opc2 = 0x1e8; break;1865case OP_SUEAU: opc1 = 0xb6c; opc2 = 0x1ec; break;1866default:1867assert(0);1868return;1869}1870emitForm_21(i, opc2, opc1);18711872if (i->op == OP_SUCLAMP) {1873if (i->dType == TYPE_S32)1874code[1] |= 1 << 19;1875emitSUCLAMPMode(i->subOp);1876}18771878if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)1879code[1] |= 1 << 18;18801881if (i->op != OP_SUEAU) {1882const uint8_t pos = i->op == OP_SUBFM ? 19 : 16;1883if (i->def(0).getFile() == FILE_PREDICATE) { // p, #1884code[0] |= 255 << 2;1885code[1] |= i->getDef(1)->reg.data.id << pos;1886} else1887if (i->defExists(1)) { // r, p1888assert(i->def(1).getFile() == FILE_PREDICATE);1889code[1] |= i->getDef(1)->reg.data.id << pos;1890} else { // r, #1891code[1] |= 7 << pos;1892}1893}18941895if (imm) {1896assert(i->op == OP_SUCLAMP);1897i->setSrc(2, imm);1898code[1] |= (imm->reg.data.u32 & 0x3f) << 10; // sint61899}1900}190119021903void1904CodeEmitterGK110::emitVectorSubOp(const Instruction *i)1905{1906switch (NV50_IR_SUBOP_Vn(i->subOp)) {1907case 0:1908code[1] |= (i->subOp & 0x000f) << 7; // vsrc11909code[1] |= (i->subOp & 0x00e0) >> 6; // vsrc21910code[1] |= (i->subOp & 0x0100) << 13; // vsrc21911code[1] |= (i->subOp & 0x3c00) << 12; // vdst1912break;1913default:1914assert(0);1915break;1916}1917}19181919void1920CodeEmitterGK110::emitVSHL(const Instruction *i)1921{1922code[0] = 0x00000002;1923code[1] = 0xb8000000;19241925assert(NV50_IR_SUBOP_Vn(i->subOp) == 0);19261927if (isSignedType(i->dType)) code[1] |= 1 << 25;1928if (isSignedType(i->sType)) code[1] |= 1 << 19;19291930emitVectorSubOp(i);19311932emitPredicate(i);1933defId(i->def(0), 2);1934srcId(i->src(0), 10);19351936if (i->getSrc(1)->reg.file == FILE_IMMEDIATE) {1937ImmediateValue *imm = i->getSrc(1)->asImm();1938assert(imm);1939code[0] |= (imm->reg.data.u32 & 0x01ff) << 23;1940code[1] |= (imm->reg.data.u32 & 0xfe00) >> 9;1941} else {1942assert(i->getSrc(1)->reg.file == FILE_GPR);1943code[1] |= 1 << 21;1944srcId(i->src(1), 23);1945}1946srcId(i->src(2), 42);19471948if (i->saturate)1949code[0] |= 1 << 22;1950if (i->flagsDef >= 0)1951code[1] |= 1 << 18;1952}19531954void1955CodeEmitterGK110::emitAFETCH(const Instruction *i)1956{1957uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;19581959code[0] = 0x00000002 | (offset << 23);1960code[1] = 0x7d000000 | (offset >> 9);19611962if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)1963code[1] |= 0x8;19641965emitPredicate(i);19661967defId(i->def(0), 2);1968srcId(i->src(0).getIndirect(0), 10);1969}19701971void1972CodeEmitterGK110::emitPFETCH(const Instruction *i)1973{1974uint32_t prim = i->src(0).get()->reg.data.u32;19751976code[0] = 0x00000002 | ((prim & 0xff) << 23);1977code[1] = 0x7f800000;19781979emitPredicate(i);19801981const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)19821983defId(i->def(0), 2);1984srcId(i, src1, 10);1985}19861987void1988CodeEmitterGK110::emitVFETCH(const Instruction *i)1989{1990unsigned int size = typeSizeof(i->dType);1991uint32_t offset = i->src(0).get()->reg.data.offset;19921993code[0] = 0x00000002 | (offset << 23);1994code[1] = 0x7ec00000 | (offset >> 9);1995code[1] |= (size / 4 - 1) << 18;19961997if (i->perPatch)1998code[1] |= 0x4;1999if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)2000code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads20012002emitPredicate(i);20032004defId(i->def(0), 2);2005srcId(i->src(0).getIndirect(0), 10);2006srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address2007}20082009void2010CodeEmitterGK110::emitEXPORT(const Instruction *i)2011{2012unsigned int size = typeSizeof(i->dType);2013uint32_t offset = i->src(0).get()->reg.data.offset;20142015code[0] = 0x00000002 | (offset << 23);2016code[1] = 0x7f000000 | (offset >> 9);2017code[1] |= (size / 4 - 1) << 18;20182019if (i->perPatch)2020code[1] |= 0x4;20212022emitPredicate(i);20232024assert(i->src(1).getFile() == FILE_GPR);20252026srcId(i->src(0).getIndirect(0), 10);2027srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address2028srcId(i->src(1), 2);2029}20302031void2032CodeEmitterGK110::emitOUT(const Instruction *i)2033{2034assert(i->src(0).getFile() == FILE_GPR);20352036emitForm_21(i, 0x1f0, 0xb70);20372038if (i->op == OP_EMIT)2039code[1] |= 1 << 10;2040if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)2041code[1] |= 1 << 11;2042}20432044void2045CodeEmitterGK110::emitInterpMode(const Instruction *i)2046{2047code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID2048code[1] |= (i->ipa & 0xc) << (19 - 2);2049}20502051void2052gk110_interpApply(const struct FixupEntry *entry, uint32_t *code, const FixupData& data)2053{2054int ipa = entry->ipa;2055int reg = entry->reg;2056int loc = entry->loc;20572058if (data.flatshade &&2059(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {2060ipa = NV50_IR_INTERP_FLAT;2061reg = 0xff;2062} else if (data.force_persample_interp &&2063(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&2064(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {2065ipa |= NV50_IR_INTERP_CENTROID;2066}2067code[loc + 1] &= ~(0xf << 19);2068code[loc + 1] |= (ipa & 0x3) << 21;2069code[loc + 1] |= (ipa & 0xc) << (19 - 2);2070code[loc + 0] &= ~(0xff << 23);2071code[loc + 0] |= reg << 23;2072}20732074void2075CodeEmitterGK110::emitINTERP(const Instruction *i)2076{2077const uint32_t base = i->getSrc(0)->reg.data.offset;20782079code[0] = 0x00000002 | (base << 31);2080code[1] = 0x74800000 | (base >> 1);20812082if (i->saturate)2083code[1] |= 1 << 18;20842085if (i->op == OP_PINTERP) {2086srcId(i->src(1), 23);2087addInterp(i->ipa, SDATA(i->src(1)).id, gk110_interpApply);2088} else {2089code[0] |= 0xff << 23;2090addInterp(i->ipa, 0xff, gk110_interpApply);2091}20922093srcId(i->src(0).getIndirect(0), 10);2094emitInterpMode(i);20952096emitPredicate(i);2097defId(i->def(0), 2);20982099if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)2100srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10);2101else2102code[1] |= 0xff << 10;2103}21042105void2106CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos)2107{2108uint8_t n;21092110switch (ty) {2111case TYPE_U8:2112n = 0;2113break;2114case TYPE_S8:2115n = 1;2116break;2117case TYPE_U16:2118n = 2;2119break;2120case TYPE_S16:2121n = 3;2122break;2123case TYPE_F32:2124case TYPE_U32:2125case TYPE_S32:2126n = 4;2127break;2128case TYPE_F64:2129case TYPE_U64:2130case TYPE_S64:2131n = 5;2132break;2133case TYPE_B128:2134n = 6;2135break;2136default:2137n = 0;2138assert(!"invalid ld/st type");2139break;2140}2141code[pos / 32] |= n << (pos % 32);2142}21432144void2145CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos)2146{2147uint8_t n;21482149switch (c) {2150case CACHE_CA:2151// case CACHE_WB:2152n = 0;2153break;2154case CACHE_CG:2155n = 1;2156break;2157case CACHE_CS:2158n = 2;2159break;2160case CACHE_CV:2161// case CACHE_WT:2162n = 3;2163break;2164default:2165n = 0;2166assert(!"invalid caching mode");2167break;2168}2169code[pos / 32] |= n << (pos % 32);2170}21712172void2173CodeEmitterGK110::emitSTORE(const Instruction *i)2174{2175int32_t offset = SDATA(i->src(0)).offset;21762177switch (i->src(0).getFile()) {2178case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break;2179case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break;2180case FILE_MEMORY_SHARED:2181code[0] = 0x00000002;2182if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)2183code[1] = 0x78400000;2184else2185code[1] = 0x7ac00000;2186break;2187default:2188assert(!"invalid memory file");2189break;2190}21912192if (code[0] & 0x2) {2193offset &= 0xffffff;2194emitLoadStoreType(i->dType, 0x33);2195if (i->src(0).getFile() == FILE_MEMORY_LOCAL)2196emitCachingMode(i->cache, 0x2f);2197} else {2198emitLoadStoreType(i->dType, 0x38);2199emitCachingMode(i->cache, 0x3b);2200}2201code[0] |= offset << 23;2202code[1] |= offset >> 9;22032204// Unlocked store on shared memory can fail.2205if (i->src(0).getFile() == FILE_MEMORY_SHARED &&2206i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {2207assert(i->defExists(0));2208defId(i->def(0), 32 + 16);2209}22102211emitPredicate(i);22122213srcId(i->src(1), 2);2214srcId(i->src(0).getIndirect(0), 10);2215if (i->src(0).getFile() == FILE_MEMORY_GLOBAL &&2216i->src(0).isIndirect(0) &&2217i->getIndirect(0, 0)->reg.size == 8)2218code[1] |= 1 << 23;2219}22202221void2222CodeEmitterGK110::emitLOAD(const Instruction *i)2223{2224int32_t offset = SDATA(i->src(0)).offset;22252226switch (i->src(0).getFile()) {2227case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break;2228case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break;2229case FILE_MEMORY_SHARED:2230code[0] = 0x00000002;2231if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)2232code[1] = 0x77400000;2233else2234code[1] = 0x7a400000;2235break;2236case FILE_MEMORY_CONST:2237if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {2238emitMOV(i);2239return;2240}2241offset &= 0xffff;2242code[0] = 0x00000002;2243code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7);2244code[1] |= i->subOp << 15;2245break;2246default:2247assert(!"invalid memory file");2248break;2249}22502251if (code[0] & 0x2) {2252offset &= 0xffffff;2253emitLoadStoreType(i->dType, 0x33);2254if (i->src(0).getFile() == FILE_MEMORY_LOCAL)2255emitCachingMode(i->cache, 0x2f);2256} else {2257emitLoadStoreType(i->dType, 0x38);2258emitCachingMode(i->cache, 0x3b);2259}2260code[0] |= offset << 23;2261code[1] |= offset >> 9;22622263// Locked store on shared memory can fail.2264int r = 0, p = -1;2265if (i->src(0).getFile() == FILE_MEMORY_SHARED &&2266i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {2267if (i->def(0).getFile() == FILE_PREDICATE) { // p, #2268r = -1;2269p = 0;2270} else if (i->defExists(1)) { // r, p2271p = 1;2272} else {2273assert(!"Expected predicate dest for load locked");2274}2275}22762277emitPredicate(i);22782279if (r >= 0)2280defId(i->def(r), 2);2281else2282code[0] |= 255 << 2;22832284if (p >= 0)2285defId(i->def(p), 32 + 16);22862287if (i->getIndirect(0, 0)) {2288srcId(i->src(0).getIndirect(0), 10);2289if (i->getIndirect(0, 0)->reg.size == 8)2290code[1] |= 1 << 23;2291} else {2292code[0] |= 255 << 10;2293}2294}22952296uint8_t2297CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)2298{2299switch (SDATA(ref).sv.sv) {2300case SV_LANEID: return 0x00;2301case SV_PHYSID: return 0x03;2302case SV_VERTEX_COUNT: return 0x10;2303case SV_INVOCATION_ID: return 0x11;2304case SV_YDIR: return 0x12;2305case SV_THREAD_KILL: return 0x13;2306case SV_COMBINED_TID: return 0x20;2307case SV_TID: return 0x21 + SDATA(ref).sv.index;2308case SV_CTAID: return 0x25 + SDATA(ref).sv.index;2309case SV_NTID: return 0x29 + SDATA(ref).sv.index;2310case SV_GRIDID: return 0x2c;2311case SV_NCTAID: return 0x2d + SDATA(ref).sv.index;2312case SV_LBASE: return 0x34;2313case SV_SBASE: return 0x30;2314case SV_LANEMASK_EQ: return 0x38;2315case SV_LANEMASK_LT: return 0x39;2316case SV_LANEMASK_LE: return 0x3a;2317case SV_LANEMASK_GT: return 0x3b;2318case SV_LANEMASK_GE: return 0x3c;2319case SV_CLOCK: return 0x50 + SDATA(ref).sv.index;2320default:2321assert(!"no sreg for system value");2322return 0;2323}2324}23252326void2327CodeEmitterGK110::emitMOV(const Instruction *i)2328{2329if (i->def(0).getFile() == FILE_PREDICATE) {2330if (i->src(0).getFile() == FILE_GPR) {2331// Use ISETP.NE.AND dst, PT, src, RZ, PT2332code[0] = 0x00000002;2333code[1] = 0xdb500000;23342335code[0] |= 0x7 << 2;2336code[0] |= 0xff << 23;2337code[1] |= 0x7 << 10;2338srcId(i->src(0), 10);2339} else2340if (i->src(0).getFile() == FILE_PREDICATE) {2341// Use PSETP.AND.AND dst, PT, src, PT, PT2342code[0] = 0x00000002;2343code[1] = 0x84800000;23442345code[0] |= 0x7 << 2;2346code[1] |= 0x7 << 0;2347code[1] |= 0x7 << 10;23482349srcId(i->src(0), 14);2350} else {2351assert(!"Unexpected source for predicate destination");2352emitNOP(i);2353}2354emitPredicate(i);2355defId(i->def(0), 5);2356} else2357if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {2358code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23);2359code[1] = 0x86400000;2360emitPredicate(i);2361defId(i->def(0), 2);2362} else2363if (i->src(0).getFile() == FILE_IMMEDIATE) {2364code[0] = 0x00000002 | (i->lanes << 14);2365code[1] = 0x74000000;2366emitPredicate(i);2367defId(i->def(0), 2);2368setImmediate32(i, 0, Modifier(0));2369} else2370if (i->src(0).getFile() == FILE_PREDICATE) {2371code[0] = 0x00000002;2372code[1] = 0x84401c07;2373emitPredicate(i);2374defId(i->def(0), 2);2375srcId(i->src(0), 14);2376} else {2377emitForm_C(i, 0x24c, 2);2378code[1] |= i->lanes << 10;2379}2380}23812382static inline bool2383uses64bitAddress(const Instruction *ldst)2384{2385return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&2386ldst->src(0).isIndirect(0) &&2387ldst->getIndirect(0, 0)->reg.size == 8;2388}23892390void2391CodeEmitterGK110::emitATOM(const Instruction *i)2392{2393const bool hasDst = i->defExists(0);2394const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;23952396code[0] = 0x00000002;2397if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)2398code[1] = 0x77800000;2399else2400code[1] = 0x68000000;24012402switch (i->subOp) {2403case NV50_IR_SUBOP_ATOM_CAS: break;2404case NV50_IR_SUBOP_ATOM_EXCH: code[1] |= 0x04000000; break;2405default: code[1] |= i->subOp << 23; break;2406}24072408switch (i->dType) {2409case TYPE_U32: break;2410case TYPE_S32: code[1] |= 0x00100000; break;2411case TYPE_U64: code[1] |= 0x00200000; break;2412case TYPE_F32: code[1] |= 0x00300000; break;2413case TYPE_B128: code[1] |= 0x00400000; break; /* TODO: U128 */2414case TYPE_S64: code[1] |= 0x00500000; break;2415default: assert(!"unsupported type"); break;2416}24172418emitPredicate(i);24192420/* TODO: cas: check that src regs line up */2421/* TODO: cas: flip bits if $r255 is used */2422srcId(i->src(1), 23);24232424if (hasDst) {2425defId(i->def(0), 2);2426} else2427if (!exch) {2428code[0] |= 255 << 2;2429}24302431if (hasDst || !exch) {2432const int32_t offset = SDATA(i->src(0)).offset;2433assert(offset < 0x80000 && offset >= -0x80000);2434code[0] |= (offset & 1) << 31;2435code[1] |= (offset & 0xffffe) >> 1;2436} else {2437srcAddr32(i->src(0), 31);2438}24392440if (i->getIndirect(0, 0)) {2441srcId(i->getIndirect(0, 0), 10);2442if (i->getIndirect(0, 0)->reg.size == 8)2443code[1] |= 1 << 19;2444} else {2445code[0] |= 255 << 10;2446}2447}24482449void2450CodeEmitterGK110::emitCCTL(const Instruction *i)2451{2452int32_t offset = SDATA(i->src(0)).offset;24532454code[0] = 0x00000002 | (i->subOp << 2);24552456if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {2457code[1] = 0x7b000000;2458} else {2459code[1] = 0x7c000000;2460offset &= 0xffffff;2461}2462code[0] |= offset << 23;2463code[1] |= offset >> 9;24642465if (uses64bitAddress(i))2466code[1] |= 1 << 23;2467srcId(i->src(0).getIndirect(0), 10);24682469emitPredicate(i);2470}24712472bool2473CodeEmitterGK110::emitInstruction(Instruction *insn)2474{2475const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8;24762477if (insn->encSize != 8) {2478ERROR("skipping unencodable instruction: ");2479insn->print();2480return false;2481} else2482if (codeSize + size > codeSizeLimit) {2483ERROR("code emitter output buffer too small\n");2484return false;2485}24862487if (writeIssueDelays) {2488int id = (codeSize & 0x3f) / 8 - 1;2489if (id < 0) {2490id += 1;2491code[0] = 0x00000000; // cf issue delay "instruction"2492code[1] = 0x08000000;2493code += 2;2494codeSize += 8;2495}2496uint32_t *data = code - (id * 2 + 2);24972498switch (id) {2499case 0: data[0] |= insn->sched << 2; break;2500case 1: data[0] |= insn->sched << 10; break;2501case 2: data[0] |= insn->sched << 18; break;2502case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break;2503case 4: data[1] |= insn->sched << 2; break;2504case 5: data[1] |= insn->sched << 10; break;2505case 6: data[1] |= insn->sched << 18; break;2506default:2507assert(0);2508break;2509}2510}25112512// assert that instructions with multiple defs don't corrupt registers2513for (int d = 0; insn->defExists(d); ++d)2514assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);25152516switch (insn->op) {2517case OP_MOV:2518case OP_RDSV:2519emitMOV(insn);2520break;2521case OP_NOP:2522break;2523case OP_LOAD:2524emitLOAD(insn);2525break;2526case OP_STORE:2527emitSTORE(insn);2528break;2529case OP_LINTERP:2530case OP_PINTERP:2531emitINTERP(insn);2532break;2533case OP_VFETCH:2534emitVFETCH(insn);2535break;2536case OP_EXPORT:2537emitEXPORT(insn);2538break;2539case OP_AFETCH:2540emitAFETCH(insn);2541break;2542case OP_PFETCH:2543emitPFETCH(insn);2544break;2545case OP_EMIT:2546case OP_RESTART:2547emitOUT(insn);2548break;2549case OP_ADD:2550case OP_SUB:2551if (insn->dType == TYPE_F64)2552emitDADD(insn);2553else if (isFloatType(insn->dType))2554emitFADD(insn);2555else2556emitUADD(insn);2557break;2558case OP_MUL:2559if (insn->dType == TYPE_F64)2560emitDMUL(insn);2561else if (isFloatType(insn->dType))2562emitFMUL(insn);2563else2564emitIMUL(insn);2565break;2566case OP_MAD:2567case OP_FMA:2568if (insn->dType == TYPE_F64)2569emitDMAD(insn);2570else if (isFloatType(insn->dType))2571emitFMAD(insn);2572else2573emitIMAD(insn);2574break;2575case OP_MADSP:2576emitMADSP(insn);2577break;2578case OP_SAD:2579emitISAD(insn);2580break;2581case OP_SHLADD:2582emitSHLADD(insn);2583break;2584case OP_NOT:2585emitNOT(insn);2586break;2587case OP_AND:2588emitLogicOp(insn, 0);2589break;2590case OP_OR:2591emitLogicOp(insn, 1);2592break;2593case OP_XOR:2594emitLogicOp(insn, 2);2595break;2596case OP_SHL:2597case OP_SHR:2598if (typeSizeof(insn->sType) == 8)2599emitShift64(insn);2600else2601emitShift(insn);2602break;2603case OP_SET:2604case OP_SET_AND:2605case OP_SET_OR:2606case OP_SET_XOR:2607emitSET(insn->asCmp());2608break;2609case OP_SELP:2610emitSELP(insn);2611break;2612case OP_SLCT:2613emitSLCT(insn->asCmp());2614break;2615case OP_MIN:2616case OP_MAX:2617emitMINMAX(insn);2618break;2619case OP_ABS:2620case OP_NEG:2621case OP_CEIL:2622case OP_FLOOR:2623case OP_TRUNC:2624case OP_SAT:2625emitCVT(insn);2626break;2627case OP_CVT:2628if (insn->def(0).getFile() == FILE_PREDICATE ||2629insn->src(0).getFile() == FILE_PREDICATE)2630emitMOV(insn);2631else2632emitCVT(insn);2633break;2634case OP_RSQ:2635emitSFnOp(insn, 5 + 2 * insn->subOp);2636break;2637case OP_RCP:2638emitSFnOp(insn, 4 + 2 * insn->subOp);2639break;2640case OP_LG2:2641emitSFnOp(insn, 3);2642break;2643case OP_EX2:2644emitSFnOp(insn, 2);2645break;2646case OP_SIN:2647emitSFnOp(insn, 1);2648break;2649case OP_COS:2650emitSFnOp(insn, 0);2651break;2652case OP_PRESIN:2653case OP_PREEX2:2654emitPreOp(insn);2655break;2656case OP_TEX:2657case OP_TXB:2658case OP_TXL:2659case OP_TXD:2660case OP_TXF:2661case OP_TXG:2662case OP_TXLQ:2663emitTEX(insn->asTex());2664break;2665case OP_TXQ:2666emitTXQ(insn->asTex());2667break;2668case OP_TEXBAR:2669emitTEXBAR(insn);2670break;2671case OP_PIXLD:2672emitPIXLD(insn);2673break;2674case OP_BRA:2675case OP_CALL:2676case OP_PRERET:2677case OP_RET:2678case OP_DISCARD:2679case OP_EXIT:2680case OP_PRECONT:2681case OP_CONT:2682case OP_PREBREAK:2683case OP_BREAK:2684case OP_JOINAT:2685case OP_BRKPT:2686case OP_QUADON:2687case OP_QUADPOP:2688emitFlow(insn);2689break;2690case OP_QUADOP:2691emitQUADOP(insn, insn->subOp, insn->lanes);2692break;2693case OP_DFDX:2694emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);2695break;2696case OP_DFDY:2697emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);2698break;2699case OP_POPCNT:2700emitPOPC(insn);2701break;2702case OP_INSBF:2703emitINSBF(insn);2704break;2705case OP_EXTBF:2706emitEXTBF(insn);2707break;2708case OP_BFIND:2709emitBFIND(insn);2710break;2711case OP_PERMT:2712emitPERMT(insn);2713break;2714case OP_JOIN:2715emitNOP(insn);2716insn->join = 1;2717break;2718case OP_BAR:2719emitBAR(insn);2720break;2721case OP_MEMBAR:2722emitMEMBAR(insn);2723break;2724case OP_ATOM:2725emitATOM(insn);2726break;2727case OP_CCTL:2728emitCCTL(insn);2729break;2730case OP_SHFL:2731emitSHFL(insn);2732break;2733case OP_VOTE:2734emitVOTE(insn);2735break;2736case OP_SULDB:2737emitSULDGB(insn->asTex());2738break;2739case OP_SUSTB:2740case OP_SUSTP:2741emitSUSTGx(insn->asTex());2742break;2743case OP_SUBFM:2744case OP_SUCLAMP:2745case OP_SUEAU:2746emitSUCalc(insn);2747break;2748case OP_VSHL:2749emitVSHL(insn);2750break;2751case OP_PHI:2752case OP_UNION:2753case OP_CONSTRAINT:2754ERROR("operation should have been eliminated");2755return false;2756case OP_EXP:2757case OP_LOG:2758case OP_SQRT:2759case OP_POW:2760ERROR("operation should have been lowered\n");2761return false;2762default:2763ERROR("unknown op: %u\n", insn->op);2764return false;2765}27662767if (insn->join)2768code[0] |= 1 << 22;27692770code += 2;2771codeSize += 8;2772return true;2773}27742775uint32_t2776CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const2777{2778// No more short instruction encodings.2779return 8;2780}27812782void2783CodeEmitterGK110::prepareEmission(Function *func)2784{2785const Target *targ = func->getProgram()->getTarget();27862787CodeEmitter::prepareEmission(func);27882789if (targ->hasSWSched)2790calculateSchedDataNVC0(targ, func);2791}27922793CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target, Program::Type type)2794: CodeEmitter(target),2795targNVC0(target),2796progType(type),2797writeIssueDelays(target->hasSWSched)2798{2799code = NULL;2800codeSize = codeSizeLimit = 0;2801relocInfo = NULL;2802}28032804CodeEmitter *2805TargetNVC0::createCodeEmitterGK110(Program::Type type)2806{2807CodeEmitterGK110 *emit = new CodeEmitterGK110(this, type);2808return emit;2809}28102811} // namespace nv50_ir281228132814