Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
4574 views
/*1* Copyright 2014 Red Hat Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Ben Skeggs <[email protected]>22*/2324#include "codegen/nv50_ir_target_gm107.h"25#include "codegen/nv50_ir_sched_gm107.h"2627//#define GM107_DEBUG_SCHED_DATA2829namespace nv50_ir {3031class CodeEmitterGM107 : public CodeEmitter32{33public:34CodeEmitterGM107(const TargetGM107 *);3536virtual bool emitInstruction(Instruction *);37virtual uint32_t getMinEncodingSize(const Instruction *) const;3839virtual void prepareEmission(Program *);40virtual void prepareEmission(Function *);4142inline void setProgramType(Program::Type pType) { progType = pType; }4344private:45const TargetGM107 *targGM107;4647Program::Type progType;4849const Instruction *insn;50const bool writeIssueDelays;51uint32_t *data;5253private:54inline void emitField(uint32_t *, int, int, uint32_t);55inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }5657inline void emitInsn(uint32_t, bool);58inline void emitInsn(uint32_t o) { emitInsn(o, true); }59inline void emitPred();60inline void emitGPR(int, const Value *);61inline void emitGPR(int pos) {62emitGPR(pos, (const Value *)NULL);63}64inline void emitGPR(int pos, const ValueRef &ref) {65emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);66}67inline void emitGPR(int pos, const ValueRef *ref) {68emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);69}70inline void emitGPR(int pos, const ValueDef &def) {71emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);72}73inline void emitSYS(int, const Value *);74inline void emitSYS(int pos, const ValueRef &ref) {75emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);76}77inline void emitPRED(int, const Value *);78inline void emitPRED(int pos) {79emitPRED(pos, (const Value *)NULL);80}81inline void emitPRED(int pos, const ValueRef &ref) {82emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);83}84inline void emitPRED(int pos, const ValueDef &def) {85emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);86}87inline void emitADDR(int, int, int, int, const ValueRef &);88inline void emitCBUF(int, int, int, int, int, const ValueRef &);89inline bool longIMMD(const ValueRef &);90inline void emitIMMD(int, int, const ValueRef &);9192void emitCond3(int, CondCode);93void emitCond4(int, CondCode);94void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }95inline void emitO(int);96inline void emitP(int);97inline void emitSAT(int);98inline void emitCC(int);99inline void emitX(int);100inline void emitABS(int, const ValueRef &);101inline void emitNEG(int, const ValueRef &);102inline void emitNEG2(int, const ValueRef &, const ValueRef &);103inline void emitFMZ(int, int);104inline void emitRND(int, RoundMode, int);105inline void emitRND(int pos) {106emitRND(pos, insn->rnd, -1);107}108inline void emitPDIV(int);109inline void emitINV(int, const ValueRef &);110111void emitEXIT();112void emitBRA();113void emitCAL();114void emitPCNT();115void emitCONT();116void emitPBK();117void emitBRK();118void emitPRET();119void emitRET();120void emitSSY();121void emitSYNC();122void emitSAM();123void emitRAM();124125void emitPSETP();126127void emitMOV();128void emitS2R();129void emitCS2R();130void emitF2F();131void emitF2I();132void emitI2F();133void emitI2I();134void emitSEL();135void emitSHFL();136137void emitDADD();138void emitDMUL();139void emitDFMA();140void emitDMNMX();141void emitDSET();142void emitDSETP();143144void emitFADD();145void emitFMUL();146void emitFFMA();147void emitMUFU();148void emitFMNMX();149void emitRRO();150void emitFCMP();151void emitFSET();152void emitFSETP();153void emitFSWZADD();154155void emitLOP();156void emitNOT();157void emitIADD();158void emitIMUL();159void emitIMAD();160void emitISCADD();161void emitXMAD();162void emitIMNMX();163void emitICMP();164void emitISET();165void emitISETP();166void emitSHL();167void emitSHR();168void emitSHF();169void emitPOPC();170void emitBFI();171void emitBFE();172void emitFLO();173void emitPRMT();174175void emitLDSTs(int, DataType);176void emitLDSTc(int);177void emitLDC();178void emitLDL();179void emitLDS();180void emitLD();181void emitSTL();182void emitSTS();183void emitST();184void emitALD();185void emitAST();186void emitISBERD();187void emitAL2P();188void emitIPA();189void emitATOM();190void emitATOMS();191void emitRED();192void emitCCTL();193194void emitPIXLD();195196void emitTEXs(int);197void emitTEX();198void emitTEXS();199void emitTLD();200void emitTLD4();201void emitTXD();202void emitTXQ();203void emitTMML();204void emitDEPBAR();205206void emitNOP();207void emitKIL();208void emitOUT();209210void emitBAR();211void emitMEMBAR();212213void emitVOTE();214215void emitSUTarget();216void emitSUHandle(const int s);217void emitSUSTx();218void emitSULDx();219void emitSUREDx();220};221222/*******************************************************************************223* general instruction layout/fields224******************************************************************************/225226void227CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)228{229if (b >= 0) {230uint32_t m = ((1ULL << s) - 1);231uint64_t d = (uint64_t)(v & m) << b;232assert(!(v & ~m) || (v & ~m) == ~m);233data[1] |= d >> 32;234data[0] |= d;235}236}237238void239CodeEmitterGM107::emitPred()240{241if (insn->predSrc >= 0) {242emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);243emitField(19, 1, insn->cc == CC_NOT_P);244} else {245emitField(16, 3, 7);246}247}248249void250CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)251{252code[0] = 0x00000000;253code[1] = hi;254if (pred)255emitPred();256}257258void259CodeEmitterGM107::emitGPR(int pos, const Value *val)260{261emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?262val->reg.data.id : 255);263}264265void266CodeEmitterGM107::emitSYS(int pos, const Value *val)267{268int id = val ? val->reg.data.id : -1;269270switch (id) {271case SV_LANEID : id = 0x00; break;272case SV_VERTEX_COUNT : id = 0x10; break;273case SV_INVOCATION_ID : id = 0x11; break;274case SV_THREAD_KILL : id = 0x13; break;275case SV_INVOCATION_INFO: id = 0x1d; break;276case SV_COMBINED_TID : id = 0x20; break;277case SV_TID : id = 0x21 + val->reg.data.sv.index; break;278case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;279case SV_LANEMASK_EQ : id = 0x38; break;280case SV_LANEMASK_LT : id = 0x39; break;281case SV_LANEMASK_LE : id = 0x3a; break;282case SV_LANEMASK_GT : id = 0x3b; break;283case SV_LANEMASK_GE : id = 0x3c; break;284case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;285default:286assert(!"invalid system value");287id = 0;288break;289}290291emitField(pos, 8, id);292}293294void295CodeEmitterGM107::emitPRED(int pos, const Value *val)296{297emitField(pos, 3, val ? val->reg.data.id : 7);298}299300void301CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,302const ValueRef &ref)303{304const Value *v = ref.get();305assert(!(v->reg.data.offset & ((1 << shr) - 1)));306if (gpr >= 0)307emitGPR(gpr, ref.getIndirect(0));308emitField(off, len, v->reg.data.offset >> shr);309}310311void312CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,313const ValueRef &ref)314{315const Value *v = ref.get();316const Symbol *s = v->asSym();317318assert(!(s->reg.data.offset & ((1 << shr) - 1)));319320emitField(buf, 5, v->reg.fileIndex);321if (gpr >= 0)322emitGPR(gpr, ref.getIndirect(0));323emitField(off, 16, s->reg.data.offset >> shr);324}325326bool327CodeEmitterGM107::longIMMD(const ValueRef &ref)328{329if (ref.getFile() == FILE_IMMEDIATE) {330const ImmediateValue *imm = ref.get()->asImm();331if (isFloatType(insn->sType))332return imm->reg.data.u32 & 0xfff;333else334return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;335}336return false;337}338339void340CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)341{342const ImmediateValue *imm = ref.get()->asImm();343uint32_t val = imm->reg.data.u32;344345if (len == 19) {346if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {347assert(!(val & 0x00000fff));348val >>= 12;349} else if (insn->sType == TYPE_F64) {350assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));351val = imm->reg.data.u64 >> 44;352} else {353assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);354}355emitField( 56, 1, (val & 0x80000) >> 19);356emitField(pos, len, (val & 0x7ffff));357} else {358emitField(pos, len, val);359}360}361362/*******************************************************************************363* modifiers364******************************************************************************/365366void367CodeEmitterGM107::emitCond3(int pos, CondCode code)368{369int data = 0;370371switch (code) {372case CC_FL : data = 0x00; break;373case CC_LTU:374case CC_LT : data = 0x01; break;375case CC_EQU:376case CC_EQ : data = 0x02; break;377case CC_LEU:378case CC_LE : data = 0x03; break;379case CC_GTU:380case CC_GT : data = 0x04; break;381case CC_NEU:382case CC_NE : data = 0x05; break;383case CC_GEU:384case CC_GE : data = 0x06; break;385case CC_TR : data = 0x07; break;386default:387assert(!"invalid cond3");388break;389}390391emitField(pos, 3, data);392}393394void395CodeEmitterGM107::emitCond4(int pos, CondCode code)396{397int data = 0;398399switch (code) {400case CC_FL: data = 0x00; break;401case CC_LT: data = 0x01; break;402case CC_EQ: data = 0x02; break;403case CC_LE: data = 0x03; break;404case CC_GT: data = 0x04; break;405case CC_NE: data = 0x05; break;406case CC_GE: data = 0x06; break;407// case CC_NUM: data = 0x07; break;408// case CC_NAN: data = 0x08; break;409case CC_LTU: data = 0x09; break;410case CC_EQU: data = 0x0a; break;411case CC_LEU: data = 0x0b; break;412case CC_GTU: data = 0x0c; break;413case CC_NEU: data = 0x0d; break;414case CC_GEU: data = 0x0e; break;415case CC_TR: data = 0x0f; break;416default:417assert(!"invalid cond4");418break;419}420421emitField(pos, 4, data);422}423424void425CodeEmitterGM107::emitO(int pos)426{427emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);428}429430void431CodeEmitterGM107::emitP(int pos)432{433emitField(pos, 1, insn->perPatch);434}435436void437CodeEmitterGM107::emitSAT(int pos)438{439emitField(pos, 1, insn->saturate);440}441442void443CodeEmitterGM107::emitCC(int pos)444{445emitField(pos, 1, insn->flagsDef >= 0);446}447448void449CodeEmitterGM107::emitX(int pos)450{451emitField(pos, 1, insn->flagsSrc >= 0);452}453454void455CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)456{457emitField(pos, 1, ref.mod.abs());458}459460void461CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)462{463emitField(pos, 1, ref.mod.neg());464}465466void467CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)468{469emitField(pos, 1, a.mod.neg() ^ b.mod.neg());470}471472void473CodeEmitterGM107::emitFMZ(int pos, int len)474{475emitField(pos, len, insn->dnz << 1 | insn->ftz);476}477478void479CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)480{481int rm = 0, ri = 0;482switch (rnd) {483case ROUND_NI: ri = 1;484case ROUND_N : rm = 0; break;485case ROUND_MI: ri = 1;486case ROUND_M : rm = 1; break;487case ROUND_PI: ri = 1;488case ROUND_P : rm = 2; break;489case ROUND_ZI: ri = 1;490case ROUND_Z : rm = 3; break;491default:492assert(!"invalid round mode");493break;494}495emitField(rip, 1, ri);496emitField(rmp, 2, rm);497}498499void500CodeEmitterGM107::emitPDIV(int pos)501{502assert(insn->postFactor >= -3 && insn->postFactor <= 3);503if (insn->postFactor > 0)504emitField(pos, 3, 7 - insn->postFactor);505else506emitField(pos, 3, 0 - insn->postFactor);507}508509void510CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)511{512emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));513}514515/*******************************************************************************516* control flow517******************************************************************************/518519void520CodeEmitterGM107::emitEXIT()521{522emitInsn (0xe3000000);523emitCond5(0x00, CC_TR);524}525526void527CodeEmitterGM107::emitBRA()528{529const FlowInstruction *insn = this->insn->asFlow();530int gpr = -1;531532if (insn->indirect) {533if (insn->absolute)534emitInsn(0xe2000000); // JMX535else536emitInsn(0xe2500000); // BRX537gpr = 0x08;538} else {539if (insn->absolute)540emitInsn(0xe2100000); // JMP541else542emitInsn(0xe2400000); // BRA543emitField(0x07, 1, insn->allWarp);544}545546emitField(0x06, 1, insn->limit);547emitCond5(0x00, CC_TR);548549if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {550int32_t pos = insn->target.bb->binPos;551if (writeIssueDelays && !(pos & 0x1f))552pos += 8;553if (!insn->absolute)554emitField(0x14, 24, pos - (codeSize + 8));555else556emitField(0x14, 32, pos);557} else {558emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));559emitField(0x05, 1, 1);560}561}562563void564CodeEmitterGM107::emitCAL()565{566const FlowInstruction *insn = this->insn->asFlow();567568if (insn->absolute) {569emitInsn(0xe2200000, 0); // JCAL570} else {571emitInsn(0xe2600000, 0); // CAL572}573574if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {575if (!insn->absolute)576emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));577else {578if (insn->builtin) {579int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);580addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);581addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);582} else {583emitField(0x14, 32, insn->target.bb->binPos);584}585}586} else {587emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));588emitField(0x05, 1, 1);589}590}591592void593CodeEmitterGM107::emitPCNT()594{595const FlowInstruction *insn = this->insn->asFlow();596597emitInsn(0xe2b00000, 0);598599if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {600emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));601} else {602emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));603emitField(0x05, 1, 1);604}605}606607void608CodeEmitterGM107::emitCONT()609{610emitInsn (0xe3500000);611emitCond5(0x00, CC_TR);612}613614void615CodeEmitterGM107::emitPBK()616{617const FlowInstruction *insn = this->insn->asFlow();618619emitInsn(0xe2a00000, 0);620621if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {622emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));623} else {624emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));625emitField(0x05, 1, 1);626}627}628629void630CodeEmitterGM107::emitBRK()631{632emitInsn (0xe3400000);633emitCond5(0x00, CC_TR);634}635636void637CodeEmitterGM107::emitPRET()638{639const FlowInstruction *insn = this->insn->asFlow();640641emitInsn(0xe2700000, 0);642643if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {644emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));645} else {646emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));647emitField(0x05, 1, 1);648}649}650651void652CodeEmitterGM107::emitRET()653{654emitInsn (0xe3200000);655emitCond5(0x00, CC_TR);656}657658void659CodeEmitterGM107::emitSSY()660{661const FlowInstruction *insn = this->insn->asFlow();662663emitInsn(0xe2900000, 0);664665if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {666emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));667} else {668emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));669emitField(0x05, 1, 1);670}671}672673void674CodeEmitterGM107::emitSYNC()675{676emitInsn (0xf0f80000);677emitCond5(0x00, CC_TR);678}679680void681CodeEmitterGM107::emitSAM()682{683emitInsn(0xe3700000, 0);684}685686void687CodeEmitterGM107::emitRAM()688{689emitInsn(0xe3800000, 0);690}691692/*******************************************************************************693* predicate/cc694******************************************************************************/695696void697CodeEmitterGM107::emitPSETP()698{699700emitInsn(0x50900000);701702switch (insn->op) {703case OP_AND: emitField(0x18, 3, 0); break;704case OP_OR: emitField(0x18, 3, 1); break;705case OP_XOR: emitField(0x18, 3, 2); break;706default:707assert(!"unexpected operation");708break;709}710711// emitINV (0x2a);712emitPRED(0x27); // TODO: support 3-arg713emitINV (0x20, insn->src(1));714emitPRED(0x1d, insn->src(1));715emitINV (0x0f, insn->src(0));716emitPRED(0x0c, insn->src(0));717emitPRED(0x03, insn->def(0));718emitPRED(0x00);719}720721/*******************************************************************************722* movement / conversion723******************************************************************************/724725void726CodeEmitterGM107::emitMOV()727{728if (insn->src(0).getFile() != FILE_IMMEDIATE) {729switch (insn->src(0).getFile()) {730case FILE_GPR:731if (insn->def(0).getFile() == FILE_PREDICATE) {732emitInsn(0x5b6a0000);733emitGPR (0x08);734} else {735emitInsn(0x5c980000);736}737emitGPR (0x14, insn->src(0));738break;739case FILE_MEMORY_CONST:740emitInsn(0x4c980000);741emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));742break;743case FILE_IMMEDIATE:744emitInsn(0x38980000);745emitIMMD(0x14, 19, insn->src(0));746break;747case FILE_PREDICATE:748emitInsn(0x50880000);749emitPRED(0x0c, insn->src(0));750emitPRED(0x1d);751emitPRED(0x27);752break;753default:754assert(!"bad src file");755break;756}757if (insn->def(0).getFile() != FILE_PREDICATE &&758insn->src(0).getFile() != FILE_PREDICATE)759emitField(0x27, 4, insn->lanes);760} else {761emitInsn (0x01000000);762emitIMMD (0x14, 32, insn->src(0));763emitField(0x0c, 4, insn->lanes);764}765766if (insn->def(0).getFile() == FILE_PREDICATE) {767emitPRED(0x27);768emitPRED(0x03, insn->def(0));769emitPRED(0x00);770} else {771emitGPR(0x00, insn->def(0));772}773}774775void776CodeEmitterGM107::emitS2R()777{778emitInsn(0xf0c80000);779emitSYS (0x14, insn->src(0));780emitGPR (0x00, insn->def(0));781}782783void784CodeEmitterGM107::emitCS2R()785{786emitInsn(0x50c80000);787emitSYS (0x14, insn->src(0));788emitGPR (0x00, insn->def(0));789}790791void792CodeEmitterGM107::emitF2F()793{794RoundMode rnd = insn->rnd;795796switch (insn->op) {797case OP_FLOOR: rnd = ROUND_MI; break;798case OP_CEIL : rnd = ROUND_PI; break;799case OP_TRUNC: rnd = ROUND_ZI; break;800default:801break;802}803804switch (insn->src(0).getFile()) {805case FILE_GPR:806emitInsn(0x5ca80000);807emitGPR (0x14, insn->src(0));808break;809case FILE_MEMORY_CONST:810emitInsn(0x4ca80000);811emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));812break;813case FILE_IMMEDIATE:814emitInsn(0x38a80000);815emitIMMD(0x14, 19, insn->src(0));816break;817default:818assert(!"bad src0 file");819break;820}821822emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);823emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());824emitCC (0x2f);825emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());826emitFMZ (0x2c, 1);827emitField(0x29, 1, insn->subOp);828emitRND (0x27, rnd, 0x2a);829emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));830emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));831emitGPR (0x00, insn->def(0));832}833834void835CodeEmitterGM107::emitF2I()836{837RoundMode rnd = insn->rnd;838839switch (insn->op) {840case OP_FLOOR: rnd = ROUND_M; break;841case OP_CEIL : rnd = ROUND_P; break;842case OP_TRUNC: rnd = ROUND_Z; break;843default:844break;845}846847switch (insn->src(0).getFile()) {848case FILE_GPR:849emitInsn(0x5cb00000);850emitGPR (0x14, insn->src(0));851break;852case FILE_MEMORY_CONST:853emitInsn(0x4cb00000);854emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));855break;856case FILE_IMMEDIATE:857emitInsn(0x38b00000);858emitIMMD(0x14, 19, insn->src(0));859break;860default:861assert(!"bad src0 file");862break;863}864865emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());866emitCC (0x2f);867emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());868emitFMZ (0x2c, 1);869emitRND (0x27, rnd, 0x2a);870emitField(0x0c, 1, isSignedType(insn->dType));871emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));872emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));873emitGPR (0x00, insn->def(0));874}875876void877CodeEmitterGM107::emitI2F()878{879RoundMode rnd = insn->rnd;880881switch (insn->op) {882case OP_FLOOR: rnd = ROUND_M; break;883case OP_CEIL : rnd = ROUND_P; break;884case OP_TRUNC: rnd = ROUND_Z; break;885default:886break;887}888889switch (insn->src(0).getFile()) {890case FILE_GPR:891emitInsn(0x5cb80000);892emitGPR (0x14, insn->src(0));893break;894case FILE_MEMORY_CONST:895emitInsn(0x4cb80000);896emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));897break;898case FILE_IMMEDIATE:899emitInsn(0x38b80000);900emitIMMD(0x14, 19, insn->src(0));901break;902default:903assert(!"bad src0 file");904break;905}906907emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());908emitCC (0x2f);909emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());910emitField(0x29, 2, insn->subOp);911emitRND (0x27, rnd, -1);912emitField(0x0d, 1, isSignedType(insn->sType));913emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));914emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));915emitGPR (0x00, insn->def(0));916}917918void919CodeEmitterGM107::emitI2I()920{921switch (insn->src(0).getFile()) {922case FILE_GPR:923emitInsn(0x5ce00000);924emitGPR (0x14, insn->src(0));925break;926case FILE_MEMORY_CONST:927emitInsn(0x4ce00000);928emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));929break;930case FILE_IMMEDIATE:931emitInsn(0x38e00000);932emitIMMD(0x14, 19, insn->src(0));933break;934default:935assert(!"bad src0 file");936break;937}938939emitSAT (0x32);940emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());941emitCC (0x2f);942emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());943emitField(0x29, 2, insn->subOp);944emitField(0x0d, 1, isSignedType(insn->sType));945emitField(0x0c, 1, isSignedType(insn->dType));946emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));947emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));948emitGPR (0x00, insn->def(0));949}950951void952gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)953{954int loc = entry->loc;955bool val = false;956switch (entry->ipa) {957case 0:958val = data.force_persample_interp;959break;960case 1:961val = data.msaa;962break;963}964if (val)965code[loc + 1] |= 1 << 10;966else967code[loc + 1] &= ~(1 << 10);968}969970void971CodeEmitterGM107::emitSEL()972{973switch (insn->src(1).getFile()) {974case FILE_GPR:975emitInsn(0x5ca00000);976emitGPR (0x14, insn->src(1));977break;978case FILE_MEMORY_CONST:979emitInsn(0x4ca00000);980emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));981break;982case FILE_IMMEDIATE:983emitInsn(0x38a00000);984emitIMMD(0x14, 19, insn->src(1));985break;986default:987assert(!"bad src1 file");988break;989}990991emitINV (0x2a, insn->src(2));992emitPRED(0x27, insn->src(2));993emitGPR (0x08, insn->src(0));994emitGPR (0x00, insn->def(0));995996if (insn->subOp >= 1) {997addInterp(insn->subOp - 1, 0, gm107_selpFlip);998}999}10001001void1002CodeEmitterGM107::emitSHFL()1003{1004int type = 0;10051006emitInsn (0xef100000);10071008switch (insn->src(1).getFile()) {1009case FILE_GPR:1010emitGPR(0x14, insn->src(1));1011break;1012case FILE_IMMEDIATE:1013emitIMMD(0x14, 5, insn->src(1));1014type |= 1;1015break;1016default:1017assert(!"invalid src1 file");1018break;1019}10201021switch (insn->src(2).getFile()) {1022case FILE_GPR:1023emitGPR(0x27, insn->src(2));1024break;1025case FILE_IMMEDIATE:1026emitIMMD(0x22, 13, insn->src(2));1027type |= 2;1028break;1029default:1030assert(!"invalid src2 file");1031break;1032}10331034if (!insn->defExists(1))1035emitPRED(0x30);1036else {1037assert(insn->def(1).getFile() == FILE_PREDICATE);1038emitPRED(0x30, insn->def(1));1039}10401041emitField(0x1e, 2, insn->subOp);1042emitField(0x1c, 2, type);1043emitGPR (0x08, insn->src(0));1044emitGPR (0x00, insn->def(0));1045}10461047/*******************************************************************************1048* double1049******************************************************************************/10501051void1052CodeEmitterGM107::emitDADD()1053{1054switch (insn->src(1).getFile()) {1055case FILE_GPR:1056emitInsn(0x5c700000);1057emitGPR (0x14, insn->src(1));1058break;1059case FILE_MEMORY_CONST:1060emitInsn(0x4c700000);1061emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1062break;1063case FILE_IMMEDIATE:1064emitInsn(0x38700000);1065emitIMMD(0x14, 19, insn->src(1));1066break;1067default:1068assert(!"bad src1 file");1069break;1070}1071emitABS(0x31, insn->src(1));1072emitNEG(0x30, insn->src(0));1073emitCC (0x2f);1074emitABS(0x2e, insn->src(0));1075emitNEG(0x2d, insn->src(1));10761077if (insn->op == OP_SUB)1078code[1] ^= 0x00002000;10791080emitGPR(0x08, insn->src(0));1081emitGPR(0x00, insn->def(0));1082}10831084void1085CodeEmitterGM107::emitDMUL()1086{1087switch (insn->src(1).getFile()) {1088case FILE_GPR:1089emitInsn(0x5c800000);1090emitGPR (0x14, insn->src(1));1091break;1092case FILE_MEMORY_CONST:1093emitInsn(0x4c800000);1094emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1095break;1096case FILE_IMMEDIATE:1097emitInsn(0x38800000);1098emitIMMD(0x14, 19, insn->src(1));1099break;1100default:1101assert(!"bad src1 file");1102break;1103}11041105emitNEG2(0x30, insn->src(0), insn->src(1));1106emitCC (0x2f);1107emitRND (0x27);1108emitGPR (0x08, insn->src(0));1109emitGPR (0x00, insn->def(0));1110}11111112void1113CodeEmitterGM107::emitDFMA()1114{1115switch(insn->src(2).getFile()) {1116case FILE_GPR:1117switch (insn->src(1).getFile()) {1118case FILE_GPR:1119emitInsn(0x5b700000);1120emitGPR (0x14, insn->src(1));1121break;1122case FILE_MEMORY_CONST:1123emitInsn(0x4b700000);1124emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1125break;1126case FILE_IMMEDIATE:1127emitInsn(0x36700000);1128emitIMMD(0x14, 19, insn->src(1));1129break;1130default:1131assert(!"bad src1 file");1132break;1133}1134emitGPR (0x27, insn->src(2));1135break;1136case FILE_MEMORY_CONST:1137emitInsn(0x53700000);1138emitGPR (0x27, insn->src(1));1139emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1140break;1141default:1142assert(!"bad src2 file");1143break;1144}11451146emitRND (0x32);1147emitNEG (0x31, insn->src(2));1148emitNEG2(0x30, insn->src(0), insn->src(1));1149emitCC (0x2f);1150emitGPR (0x08, insn->src(0));1151emitGPR (0x00, insn->def(0));1152}11531154void1155CodeEmitterGM107::emitDMNMX()1156{1157switch (insn->src(1).getFile()) {1158case FILE_GPR:1159emitInsn(0x5c500000);1160emitGPR (0x14, insn->src(1));1161break;1162case FILE_MEMORY_CONST:1163emitInsn(0x4c500000);1164emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1165break;1166case FILE_IMMEDIATE:1167emitInsn(0x38500000);1168emitIMMD(0x14, 19, insn->src(1));1169break;1170default:1171assert(!"bad src1 file");1172break;1173}11741175emitABS (0x31, insn->src(1));1176emitNEG (0x30, insn->src(0));1177emitCC (0x2f);1178emitABS (0x2e, insn->src(0));1179emitNEG (0x2d, insn->src(1));1180emitField(0x2a, 1, insn->op == OP_MAX);1181emitPRED (0x27);1182emitGPR (0x08, insn->src(0));1183emitGPR (0x00, insn->def(0));1184}11851186void1187CodeEmitterGM107::emitDSET()1188{1189const CmpInstruction *insn = this->insn->asCmp();11901191switch (insn->src(1).getFile()) {1192case FILE_GPR:1193emitInsn(0x59000000);1194emitGPR (0x14, insn->src(1));1195break;1196case FILE_MEMORY_CONST:1197emitInsn(0x49000000);1198emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1199break;1200case FILE_IMMEDIATE:1201emitInsn(0x32000000);1202emitIMMD(0x14, 19, insn->src(1));1203break;1204default:1205assert(!"bad src1 file");1206break;1207}12081209if (insn->op != OP_SET) {1210switch (insn->op) {1211case OP_SET_AND: emitField(0x2d, 2, 0); break;1212case OP_SET_OR : emitField(0x2d, 2, 1); break;1213case OP_SET_XOR: emitField(0x2d, 2, 2); break;1214default:1215assert(!"invalid set op");1216break;1217}1218emitPRED(0x27, insn->src(2));1219} else {1220emitPRED(0x27);1221}12221223emitABS (0x36, insn->src(0));1224emitNEG (0x35, insn->src(1));1225emitField(0x34, 1, insn->dType == TYPE_F32);1226emitCond4(0x30, insn->setCond);1227emitCC (0x2f);1228emitABS (0x2c, insn->src(1));1229emitNEG (0x2b, insn->src(0));1230emitGPR (0x08, insn->src(0));1231emitGPR (0x00, insn->def(0));1232}12331234void1235CodeEmitterGM107::emitDSETP()1236{1237const CmpInstruction *insn = this->insn->asCmp();12381239switch (insn->src(1).getFile()) {1240case FILE_GPR:1241emitInsn(0x5b800000);1242emitGPR (0x14, insn->src(1));1243break;1244case FILE_MEMORY_CONST:1245emitInsn(0x4b800000);1246emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1247break;1248case FILE_IMMEDIATE:1249emitInsn(0x36800000);1250emitIMMD(0x14, 19, insn->src(1));1251break;1252default:1253assert(!"bad src1 file");1254break;1255}12561257if (insn->op != OP_SET) {1258switch (insn->op) {1259case OP_SET_AND: emitField(0x2d, 2, 0); break;1260case OP_SET_OR : emitField(0x2d, 2, 1); break;1261case OP_SET_XOR: emitField(0x2d, 2, 2); break;1262default:1263assert(!"invalid set op");1264break;1265}1266emitPRED(0x27, insn->src(2));1267} else {1268emitPRED(0x27);1269}12701271emitCond4(0x30, insn->setCond);1272emitABS (0x2c, insn->src(1));1273emitNEG (0x2b, insn->src(0));1274emitGPR (0x08, insn->src(0));1275emitABS (0x07, insn->src(0));1276emitNEG (0x06, insn->src(1));1277emitPRED (0x03, insn->def(0));1278if (insn->defExists(1))1279emitPRED(0x00, insn->def(1));1280else1281emitPRED(0x00);1282}12831284/*******************************************************************************1285* float1286******************************************************************************/12871288void1289CodeEmitterGM107::emitFADD()1290{1291if (!longIMMD(insn->src(1))) {1292switch (insn->src(1).getFile()) {1293case FILE_GPR:1294emitInsn(0x5c580000);1295emitGPR (0x14, insn->src(1));1296break;1297case FILE_MEMORY_CONST:1298emitInsn(0x4c580000);1299emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1300break;1301case FILE_IMMEDIATE:1302emitInsn(0x38580000);1303emitIMMD(0x14, 19, insn->src(1));1304break;1305default:1306assert(!"bad src1 file");1307break;1308}1309emitSAT(0x32);1310emitABS(0x31, insn->src(1));1311emitNEG(0x30, insn->src(0));1312emitCC (0x2f);1313emitABS(0x2e, insn->src(0));1314emitNEG(0x2d, insn->src(1));1315emitFMZ(0x2c, 1);13161317if (insn->op == OP_SUB)1318code[1] ^= 0x00002000;1319} else {1320emitInsn(0x08000000);1321emitABS(0x39, insn->src(1));1322emitNEG(0x38, insn->src(0));1323emitFMZ(0x37, 1);1324emitABS(0x36, insn->src(0));1325emitNEG(0x35, insn->src(1));1326emitCC (0x34);1327emitIMMD(0x14, 32, insn->src(1));13281329if (insn->op == OP_SUB)1330code[1] ^= 0x00080000;1331}13321333emitGPR(0x08, insn->src(0));1334emitGPR(0x00, insn->def(0));1335}13361337void1338CodeEmitterGM107::emitFMUL()1339{1340if (!longIMMD(insn->src(1))) {1341switch (insn->src(1).getFile()) {1342case FILE_GPR:1343emitInsn(0x5c680000);1344emitGPR (0x14, insn->src(1));1345break;1346case FILE_MEMORY_CONST:1347emitInsn(0x4c680000);1348emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1349break;1350case FILE_IMMEDIATE:1351emitInsn(0x38680000);1352emitIMMD(0x14, 19, insn->src(1));1353break;1354default:1355assert(!"bad src1 file");1356break;1357}1358emitSAT (0x32);1359emitNEG2(0x30, insn->src(0), insn->src(1));1360emitCC (0x2f);1361emitFMZ (0x2c, 2);1362emitPDIV(0x29);1363emitRND (0x27);1364} else {1365emitInsn(0x1e000000);1366emitSAT (0x37);1367emitFMZ (0x35, 2);1368emitCC (0x34);1369emitIMMD(0x14, 32, insn->src(1));1370if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())1371code[1] ^= 0x00080000; /* flip immd sign bit */1372}13731374emitGPR(0x08, insn->src(0));1375emitGPR(0x00, insn->def(0));1376}13771378void1379CodeEmitterGM107::emitFFMA()1380{1381bool isLongIMMD = false;1382switch(insn->src(2).getFile()) {1383case FILE_GPR:1384switch (insn->src(1).getFile()) {1385case FILE_GPR:1386emitInsn(0x59800000);1387emitGPR (0x14, insn->src(1));1388break;1389case FILE_MEMORY_CONST:1390emitInsn(0x49800000);1391emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1392break;1393case FILE_IMMEDIATE:1394if (longIMMD(insn->getSrc(1))) {1395assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);1396isLongIMMD = true;1397emitInsn(0x0c000000);1398emitIMMD(0x14, 32, insn->src(1));1399} else {1400emitInsn(0x32800000);1401emitIMMD(0x14, 19, insn->src(1));1402}1403break;1404default:1405assert(!"bad src1 file");1406break;1407}1408if (!isLongIMMD)1409emitGPR (0x27, insn->src(2));1410break;1411case FILE_MEMORY_CONST:1412emitInsn(0x51800000);1413emitGPR (0x27, insn->src(1));1414emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1415break;1416default:1417assert(!"bad src2 file");1418break;1419}14201421if (isLongIMMD) {1422emitNEG (0x39, insn->src(2));1423emitNEG2(0x38, insn->src(0), insn->src(1));1424emitSAT (0x37);1425emitCC (0x34);1426} else {1427emitRND (0x33);1428emitSAT (0x32);1429emitNEG (0x31, insn->src(2));1430emitNEG2(0x30, insn->src(0), insn->src(1));1431emitCC (0x2f);1432}14331434emitFMZ(0x35, 2);1435emitGPR(0x08, insn->src(0));1436emitGPR(0x00, insn->def(0));1437}14381439void1440CodeEmitterGM107::emitMUFU()1441{1442int mufu = 0;14431444switch (insn->op) {1445case OP_COS: mufu = 0; break;1446case OP_SIN: mufu = 1; break;1447case OP_EX2: mufu = 2; break;1448case OP_LG2: mufu = 3; break;1449case OP_RCP: mufu = 4 + 2 * insn->subOp; break;1450case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;1451case OP_SQRT: mufu = 8; break;1452default:1453assert(!"invalid mufu");1454break;1455}14561457emitInsn (0x50800000);1458emitSAT (0x32);1459emitNEG (0x30, insn->src(0));1460emitABS (0x2e, insn->src(0));1461emitField(0x14, 4, mufu);1462emitGPR (0x08, insn->src(0));1463emitGPR (0x00, insn->def(0));1464}14651466void1467CodeEmitterGM107::emitFMNMX()1468{1469switch (insn->src(1).getFile()) {1470case FILE_GPR:1471emitInsn(0x5c600000);1472emitGPR (0x14, insn->src(1));1473break;1474case FILE_MEMORY_CONST:1475emitInsn(0x4c600000);1476emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1477break;1478case FILE_IMMEDIATE:1479emitInsn(0x38600000);1480emitIMMD(0x14, 19, insn->src(1));1481break;1482default:1483assert(!"bad src1 file");1484break;1485}14861487emitField(0x2a, 1, insn->op == OP_MAX);1488emitPRED (0x27);14891490emitABS(0x31, insn->src(1));1491emitNEG(0x30, insn->src(0));1492emitCC (0x2f);1493emitABS(0x2e, insn->src(0));1494emitNEG(0x2d, insn->src(1));1495emitFMZ(0x2c, 1);1496emitGPR(0x08, insn->src(0));1497emitGPR(0x00, insn->def(0));1498}14991500void1501CodeEmitterGM107::emitRRO()1502{1503switch (insn->src(0).getFile()) {1504case FILE_GPR:1505emitInsn(0x5c900000);1506emitGPR (0x14, insn->src(0));1507break;1508case FILE_MEMORY_CONST:1509emitInsn(0x4c900000);1510emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));1511break;1512case FILE_IMMEDIATE:1513emitInsn(0x38900000);1514emitIMMD(0x14, 19, insn->src(0));1515break;1516default:1517assert(!"bad src file");1518break;1519}15201521emitABS (0x31, insn->src(0));1522emitNEG (0x2d, insn->src(0));1523emitField(0x27, 1, insn->op == OP_PREEX2);1524emitGPR (0x00, insn->def(0));1525}15261527void1528CodeEmitterGM107::emitFCMP()1529{1530const CmpInstruction *insn = this->insn->asCmp();1531CondCode cc = insn->setCond;15321533if (insn->src(2).mod.neg())1534cc = reverseCondCode(cc);15351536switch(insn->src(2).getFile()) {1537case FILE_GPR:1538switch (insn->src(1).getFile()) {1539case FILE_GPR:1540emitInsn(0x5ba00000);1541emitGPR (0x14, insn->src(1));1542break;1543case FILE_MEMORY_CONST:1544emitInsn(0x4ba00000);1545emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1546break;1547case FILE_IMMEDIATE:1548emitInsn(0x36a00000);1549emitIMMD(0x14, 19, insn->src(1));1550break;1551default:1552assert(!"bad src1 file");1553break;1554}1555emitGPR (0x27, insn->src(2));1556break;1557case FILE_MEMORY_CONST:1558emitInsn(0x53a00000);1559emitGPR (0x27, insn->src(1));1560emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1561break;1562default:1563assert(!"bad src2 file");1564break;1565}15661567emitCond4(0x30, cc);1568emitFMZ (0x2f, 1);1569emitGPR (0x08, insn->src(0));1570emitGPR (0x00, insn->def(0));1571}15721573void1574CodeEmitterGM107::emitFSET()1575{1576const CmpInstruction *insn = this->insn->asCmp();15771578switch (insn->src(1).getFile()) {1579case FILE_GPR:1580emitInsn(0x58000000);1581emitGPR (0x14, insn->src(1));1582break;1583case FILE_MEMORY_CONST:1584emitInsn(0x48000000);1585emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1586break;1587case FILE_IMMEDIATE:1588emitInsn(0x30000000);1589emitIMMD(0x14, 19, insn->src(1));1590break;1591default:1592assert(!"bad src1 file");1593break;1594}15951596if (insn->op != OP_SET) {1597switch (insn->op) {1598case OP_SET_AND: emitField(0x2d, 2, 0); break;1599case OP_SET_OR : emitField(0x2d, 2, 1); break;1600case OP_SET_XOR: emitField(0x2d, 2, 2); break;1601default:1602assert(!"invalid set op");1603break;1604}1605emitPRED(0x27, insn->src(2));1606} else {1607emitPRED(0x27);1608}16091610emitFMZ (0x37, 1);1611emitABS (0x36, insn->src(0));1612emitNEG (0x35, insn->src(1));1613emitField(0x34, 1, insn->dType == TYPE_F32);1614emitCond4(0x30, insn->setCond);1615emitCC (0x2f);1616emitABS (0x2c, insn->src(1));1617emitNEG (0x2b, insn->src(0));1618emitGPR (0x08, insn->src(0));1619emitGPR (0x00, insn->def(0));1620}16211622void1623CodeEmitterGM107::emitFSETP()1624{1625const CmpInstruction *insn = this->insn->asCmp();16261627switch (insn->src(1).getFile()) {1628case FILE_GPR:1629emitInsn(0x5bb00000);1630emitGPR (0x14, insn->src(1));1631break;1632case FILE_MEMORY_CONST:1633emitInsn(0x4bb00000);1634emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1635break;1636case FILE_IMMEDIATE:1637emitInsn(0x36b00000);1638emitIMMD(0x14, 19, insn->src(1));1639break;1640default:1641assert(!"bad src1 file");1642break;1643}16441645if (insn->op != OP_SET) {1646switch (insn->op) {1647case OP_SET_AND: emitField(0x2d, 2, 0); break;1648case OP_SET_OR : emitField(0x2d, 2, 1); break;1649case OP_SET_XOR: emitField(0x2d, 2, 2); break;1650default:1651assert(!"invalid set op");1652break;1653}1654emitPRED(0x27, insn->src(2));1655} else {1656emitPRED(0x27);1657}16581659emitCond4(0x30, insn->setCond);1660emitFMZ (0x2f, 1);1661emitABS (0x2c, insn->src(1));1662emitNEG (0x2b, insn->src(0));1663emitGPR (0x08, insn->src(0));1664emitABS (0x07, insn->src(0));1665emitNEG (0x06, insn->src(1));1666emitPRED (0x03, insn->def(0));1667if (insn->defExists(1))1668emitPRED(0x00, insn->def(1));1669else1670emitPRED(0x00);1671}16721673void1674CodeEmitterGM107::emitFSWZADD()1675{1676emitInsn (0x50f80000);1677emitCC (0x2f);1678emitFMZ (0x2c, 1);1679emitRND (0x27);1680emitField(0x26, 1, insn->lanes); /* abused for .ndv */1681emitField(0x1c, 8, insn->subOp);1682if (insn->predSrc != 1)1683emitGPR (0x14, insn->src(1));1684else1685emitGPR (0x14);1686emitGPR (0x08, insn->src(0));1687emitGPR (0x00, insn->def(0));1688}16891690/*******************************************************************************1691* integer1692******************************************************************************/16931694void1695CodeEmitterGM107::emitLOP()1696{1697int lop = 0;16981699switch (insn->op) {1700case OP_AND: lop = 0; break;1701case OP_OR : lop = 1; break;1702case OP_XOR: lop = 2; break;1703default:1704assert(!"invalid lop");1705break;1706}17071708if (!longIMMD(insn->src(1))) {1709switch (insn->src(1).getFile()) {1710case FILE_GPR:1711emitInsn(0x5c400000);1712emitGPR (0x14, insn->src(1));1713break;1714case FILE_MEMORY_CONST:1715emitInsn(0x4c400000);1716emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1717break;1718case FILE_IMMEDIATE:1719emitInsn(0x38400000);1720emitIMMD(0x14, 19, insn->src(1));1721break;1722default:1723assert(!"bad src1 file");1724break;1725}1726emitPRED (0x30);1727emitCC (0x2f);1728emitX (0x2b);1729emitField(0x29, 2, lop);1730emitINV (0x28, insn->src(1));1731emitINV (0x27, insn->src(0));1732} else {1733emitInsn (0x04000000);1734emitX (0x39);1735emitINV (0x38, insn->src(1));1736emitINV (0x37, insn->src(0));1737emitField(0x35, 2, lop);1738emitCC (0x34);1739emitIMMD (0x14, 32, insn->src(1));1740}17411742emitGPR (0x08, insn->src(0));1743emitGPR (0x00, insn->def(0));1744}17451746/* special-case of emitLOP(): lop pass_b dst 0 ~src */1747void1748CodeEmitterGM107::emitNOT()1749{1750if (!longIMMD(insn->src(0))) {1751switch (insn->src(0).getFile()) {1752case FILE_GPR:1753emitInsn(0x5c400700);1754emitGPR (0x14, insn->src(0));1755break;1756case FILE_MEMORY_CONST:1757emitInsn(0x4c400700);1758emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));1759break;1760case FILE_IMMEDIATE:1761emitInsn(0x38400700);1762emitIMMD(0x14, 19, insn->src(0));1763break;1764default:1765assert(!"bad src1 file");1766break;1767}1768emitPRED (0x30);1769} else {1770emitInsn (0x05600000);1771emitIMMD (0x14, 32, insn->src(1));1772}17731774emitGPR(0x08);1775emitGPR(0x00, insn->def(0));1776}17771778void1779CodeEmitterGM107::emitIADD()1780{1781if (!longIMMD(insn->src(1))) {1782switch (insn->src(1).getFile()) {1783case FILE_GPR:1784emitInsn(0x5c100000);1785emitGPR (0x14, insn->src(1));1786break;1787case FILE_MEMORY_CONST:1788emitInsn(0x4c100000);1789emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1790break;1791case FILE_IMMEDIATE:1792emitInsn(0x38100000);1793emitIMMD(0x14, 19, insn->src(1));1794break;1795default:1796assert(!"bad src1 file");1797break;1798}1799emitSAT(0x32);1800emitNEG(0x31, insn->src(0));1801emitNEG(0x30, insn->src(1));1802emitCC (0x2f);1803emitX (0x2b);1804} else {1805emitInsn(0x1c000000);1806emitNEG (0x38, insn->src(0));1807emitSAT (0x36);1808emitX (0x35);1809emitCC (0x34);1810emitIMMD(0x14, 32, insn->src(1));1811}18121813if (insn->op == OP_SUB)1814code[1] ^= 0x00010000;18151816emitGPR(0x08, insn->src(0));1817emitGPR(0x00, insn->def(0));1818}18191820void1821CodeEmitterGM107::emitIMUL()1822{1823if (!longIMMD(insn->src(1))) {1824switch (insn->src(1).getFile()) {1825case FILE_GPR:1826emitInsn(0x5c380000);1827emitGPR (0x14, insn->src(1));1828break;1829case FILE_MEMORY_CONST:1830emitInsn(0x4c380000);1831emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1832break;1833case FILE_IMMEDIATE:1834emitInsn(0x38380000);1835emitIMMD(0x14, 19, insn->src(1));1836break;1837default:1838assert(!"bad src1 file");1839break;1840}1841emitCC (0x2f);1842emitField(0x29, 1, isSignedType(insn->sType));1843emitField(0x28, 1, isSignedType(insn->dType));1844emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);1845} else {1846emitInsn (0x1f000000);1847emitField(0x37, 1, isSignedType(insn->sType));1848emitField(0x36, 1, isSignedType(insn->dType));1849emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);1850emitCC (0x34);1851emitIMMD (0x14, 32, insn->src(1));1852}18531854emitGPR(0x08, insn->src(0));1855emitGPR(0x00, insn->def(0));1856}18571858void1859CodeEmitterGM107::emitIMAD()1860{1861/*XXX: imad32i exists, but not using it as third src overlaps dst */1862switch(insn->src(2).getFile()) {1863case FILE_GPR:1864switch (insn->src(1).getFile()) {1865case FILE_GPR:1866emitInsn(0x5a000000);1867emitGPR (0x14, insn->src(1));1868break;1869case FILE_MEMORY_CONST:1870emitInsn(0x4a000000);1871emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1872break;1873case FILE_IMMEDIATE:1874emitInsn(0x34000000);1875emitIMMD(0x14, 19, insn->src(1));1876break;1877default:1878assert(!"bad src1 file");1879break;1880}1881emitGPR (0x27, insn->src(2));1882break;1883case FILE_MEMORY_CONST:1884emitInsn(0x52000000);1885emitGPR (0x27, insn->src(1));1886emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1887break;1888default:1889assert(!"bad src2 file");1890break;1891}18921893emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);1894emitField(0x35, 1, isSignedType(insn->sType));1895emitNEG (0x34, insn->src(2));1896emitNEG2 (0x33, insn->src(0), insn->src(1));1897emitSAT (0x32);1898emitX (0x31);1899emitField(0x30, 1, isSignedType(insn->dType));1900emitCC (0x2f);1901emitGPR (0x08, insn->src(0));1902emitGPR (0x00, insn->def(0));1903}19041905void1906CodeEmitterGM107::emitISCADD()1907{1908assert(insn->src(1).get()->asImm());19091910switch (insn->src(2).getFile()) {1911case FILE_GPR:1912emitInsn(0x5c180000);1913emitGPR (0x14, insn->src(2));1914break;1915case FILE_MEMORY_CONST:1916emitInsn(0x4c180000);1917emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1918break;1919case FILE_IMMEDIATE:1920emitInsn(0x38180000);1921emitIMMD(0x14, 19, insn->src(2));1922break;1923default:1924assert(!"bad src1 file");1925break;1926}1927emitNEG (0x31, insn->src(0));1928emitNEG (0x30, insn->src(2));1929emitCC (0x2f);1930emitIMMD(0x27, 5, insn->src(1));1931emitGPR (0x08, insn->src(0));1932emitGPR (0x00, insn->def(0));1933}19341935void1936CodeEmitterGM107::emitXMAD()1937{1938assert(insn->src(0).getFile() == FILE_GPR);19391940bool constbuf = false;1941bool psl_mrg = true;1942bool immediate = false;1943if (insn->src(2).getFile() == FILE_MEMORY_CONST) {1944assert(insn->src(1).getFile() == FILE_GPR);1945constbuf = true;1946psl_mrg = false;1947emitInsn(0x51000000);1948emitGPR(0x27, insn->src(1));1949emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));1950} else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {1951assert(insn->src(2).getFile() == FILE_GPR);1952constbuf = true;1953emitInsn(0x4e000000);1954emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));1955emitGPR(0x27, insn->src(2));1956} else if (insn->src(1).getFile() == FILE_IMMEDIATE) {1957assert(insn->src(2).getFile() == FILE_GPR);1958assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));1959immediate = true;1960emitInsn(0x36000000);1961emitIMMD(0x14, 16, insn->src(1));1962emitGPR(0x27, insn->src(2));1963} else {1964assert(insn->src(1).getFile() == FILE_GPR);1965assert(insn->src(2).getFile() == FILE_GPR);1966emitInsn(0x5b000000);1967emitGPR(0x14, insn->src(1));1968emitGPR(0x27, insn->src(2));1969}19701971if (psl_mrg)1972emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);19731974unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);1975cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;1976emitField(0x32, constbuf ? 2 : 3, cmode);19771978emitX(constbuf ? 0x36 : 0x26);1979emitCC(0x2f);19801981emitGPR(0x0, insn->def(0));1982emitGPR(0x8, insn->src(0));19831984// source flags1985if (isSignedType(insn->sType)) {1986uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;1987emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);1988}1989emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);1990if (!immediate) {1991bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);1992emitField(constbuf ? 0x34 : 0x23, 1, h1);1993}1994}19951996void1997CodeEmitterGM107::emitIMNMX()1998{1999switch (insn->src(1).getFile()) {2000case FILE_GPR:2001emitInsn(0x5c200000);2002emitGPR (0x14, insn->src(1));2003break;2004case FILE_MEMORY_CONST:2005emitInsn(0x4c200000);2006emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2007break;2008case FILE_IMMEDIATE:2009emitInsn(0x38200000);2010emitIMMD(0x14, 19, insn->src(1));2011break;2012default:2013assert(!"bad src1 file");2014break;2015}20162017emitField(0x30, 1, isSignedType(insn->dType));2018emitCC (0x2f);2019emitField(0x2b, 2, insn->subOp);2020emitField(0x2a, 1, insn->op == OP_MAX);2021emitPRED (0x27);2022emitGPR (0x08, insn->src(0));2023emitGPR (0x00, insn->def(0));2024}20252026void2027CodeEmitterGM107::emitICMP()2028{2029const CmpInstruction *insn = this->insn->asCmp();2030CondCode cc = insn->setCond;20312032if (insn->src(2).mod.neg())2033cc = reverseCondCode(cc);20342035switch(insn->src(2).getFile()) {2036case FILE_GPR:2037switch (insn->src(1).getFile()) {2038case FILE_GPR:2039emitInsn(0x5b400000);2040emitGPR (0x14, insn->src(1));2041break;2042case FILE_MEMORY_CONST:2043emitInsn(0x4b400000);2044emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2045break;2046case FILE_IMMEDIATE:2047emitInsn(0x36400000);2048emitIMMD(0x14, 19, insn->src(1));2049break;2050default:2051assert(!"bad src1 file");2052break;2053}2054emitGPR (0x27, insn->src(2));2055break;2056case FILE_MEMORY_CONST:2057emitInsn(0x53400000);2058emitGPR (0x27, insn->src(1));2059emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));2060break;2061default:2062assert(!"bad src2 file");2063break;2064}20652066emitCond3(0x31, cc);2067emitField(0x30, 1, isSignedType(insn->sType));2068emitGPR (0x08, insn->src(0));2069emitGPR (0x00, insn->def(0));2070}20712072void2073CodeEmitterGM107::emitISET()2074{2075const CmpInstruction *insn = this->insn->asCmp();20762077switch (insn->src(1).getFile()) {2078case FILE_GPR:2079emitInsn(0x5b500000);2080emitGPR (0x14, insn->src(1));2081break;2082case FILE_MEMORY_CONST:2083emitInsn(0x4b500000);2084emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2085break;2086case FILE_IMMEDIATE:2087emitInsn(0x36500000);2088emitIMMD(0x14, 19, insn->src(1));2089break;2090default:2091assert(!"bad src1 file");2092break;2093}20942095if (insn->op != OP_SET) {2096switch (insn->op) {2097case OP_SET_AND: emitField(0x2d, 2, 0); break;2098case OP_SET_OR : emitField(0x2d, 2, 1); break;2099case OP_SET_XOR: emitField(0x2d, 2, 2); break;2100default:2101assert(!"invalid set op");2102break;2103}2104emitPRED(0x27, insn->src(2));2105} else {2106emitPRED(0x27);2107}21082109emitCond3(0x31, insn->setCond);2110emitField(0x30, 1, isSignedType(insn->sType));2111emitCC (0x2f);2112emitField(0x2c, 1, insn->dType == TYPE_F32);2113emitX (0x2b);2114emitGPR (0x08, insn->src(0));2115emitGPR (0x00, insn->def(0));2116}21172118void2119CodeEmitterGM107::emitISETP()2120{2121const CmpInstruction *insn = this->insn->asCmp();21222123switch (insn->src(1).getFile()) {2124case FILE_GPR:2125emitInsn(0x5b600000);2126emitGPR (0x14, insn->src(1));2127break;2128case FILE_MEMORY_CONST:2129emitInsn(0x4b600000);2130emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2131break;2132case FILE_IMMEDIATE:2133emitInsn(0x36600000);2134emitIMMD(0x14, 19, insn->src(1));2135break;2136default:2137assert(!"bad src1 file");2138break;2139}21402141if (insn->op != OP_SET) {2142switch (insn->op) {2143case OP_SET_AND: emitField(0x2d, 2, 0); break;2144case OP_SET_OR : emitField(0x2d, 2, 1); break;2145case OP_SET_XOR: emitField(0x2d, 2, 2); break;2146default:2147assert(!"invalid set op");2148break;2149}2150emitPRED(0x27, insn->src(2));2151} else {2152emitPRED(0x27);2153}21542155emitCond3(0x31, insn->setCond);2156emitField(0x30, 1, isSignedType(insn->sType));2157emitX (0x2b);2158emitGPR (0x08, insn->src(0));2159emitPRED (0x03, insn->def(0));2160if (insn->defExists(1))2161emitPRED(0x00, insn->def(1));2162else2163emitPRED(0x00);2164}21652166void2167CodeEmitterGM107::emitSHL()2168{2169switch (insn->src(1).getFile()) {2170case FILE_GPR:2171emitInsn(0x5c480000);2172emitGPR (0x14, insn->src(1));2173break;2174case FILE_MEMORY_CONST:2175emitInsn(0x4c480000);2176emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2177break;2178case FILE_IMMEDIATE:2179emitInsn(0x38480000);2180emitIMMD(0x14, 19, insn->src(1));2181break;2182default:2183assert(!"bad src1 file");2184break;2185}21862187emitCC (0x2f);2188emitX (0x2b);2189emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);2190emitGPR (0x08, insn->src(0));2191emitGPR (0x00, insn->def(0));2192}21932194void2195CodeEmitterGM107::emitSHR()2196{2197switch (insn->src(1).getFile()) {2198case FILE_GPR:2199emitInsn(0x5c280000);2200emitGPR (0x14, insn->src(1));2201break;2202case FILE_MEMORY_CONST:2203emitInsn(0x4c280000);2204emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2205break;2206case FILE_IMMEDIATE:2207emitInsn(0x38280000);2208emitIMMD(0x14, 19, insn->src(1));2209break;2210default:2211assert(!"bad src1 file");2212break;2213}22142215emitField(0x30, 1, isSignedType(insn->dType));2216emitCC (0x2f);2217emitX (0x2c);2218emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);2219emitGPR (0x08, insn->src(0));2220emitGPR (0x00, insn->def(0));2221}22222223void2224CodeEmitterGM107::emitSHF()2225{2226unsigned type;22272228switch (insn->src(1).getFile()) {2229case FILE_GPR:2230emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);2231emitGPR(0x14, insn->src(1));2232break;2233case FILE_IMMEDIATE:2234emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);2235emitIMMD(0x14, 19, insn->src(1));2236break;2237default:2238assert(!"bad src1 file");2239break;2240}22412242switch (insn->sType) {2243case TYPE_U64:2244type = 2;2245break;2246case TYPE_S64:2247type = 3;2248break;2249default:2250type = 0;2251break;2252}22532254emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));2255emitX (0x31);2256emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));2257emitCC (0x2f);2258emitGPR (0x27, insn->src(2));2259emitField(0x25, 2, type);2260emitGPR (0x08, insn->src(0));2261emitGPR (0x00, insn->def(0));2262}22632264void2265CodeEmitterGM107::emitPOPC()2266{2267switch (insn->src(0).getFile()) {2268case FILE_GPR:2269emitInsn(0x5c080000);2270emitGPR (0x14, insn->src(0));2271break;2272case FILE_MEMORY_CONST:2273emitInsn(0x4c080000);2274emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));2275break;2276case FILE_IMMEDIATE:2277emitInsn(0x38080000);2278emitIMMD(0x14, 19, insn->src(0));2279break;2280default:2281assert(!"bad src1 file");2282break;2283}22842285emitINV(0x28, insn->src(0));2286emitGPR(0x00, insn->def(0));2287}22882289void2290CodeEmitterGM107::emitBFI()2291{2292switch(insn->src(2).getFile()) {2293case FILE_GPR:2294switch (insn->src(1).getFile()) {2295case FILE_GPR:2296emitInsn(0x5bf00000);2297emitGPR (0x14, insn->src(1));2298break;2299case FILE_MEMORY_CONST:2300emitInsn(0x4bf00000);2301emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2302break;2303case FILE_IMMEDIATE:2304emitInsn(0x36f00000);2305emitIMMD(0x14, 19, insn->src(1));2306break;2307default:2308assert(!"bad src1 file");2309break;2310}2311emitGPR (0x27, insn->src(2));2312break;2313case FILE_MEMORY_CONST:2314emitInsn(0x53f00000);2315emitGPR (0x27, insn->src(1));2316emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));2317break;2318default:2319assert(!"bad src2 file");2320break;2321}23222323emitCC (0x2f);2324emitGPR (0x08, insn->src(0));2325emitGPR (0x00, insn->def(0));2326}23272328void2329CodeEmitterGM107::emitBFE()2330{2331switch (insn->src(1).getFile()) {2332case FILE_GPR:2333emitInsn(0x5c000000);2334emitGPR (0x14, insn->src(1));2335break;2336case FILE_MEMORY_CONST:2337emitInsn(0x4c000000);2338emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2339break;2340case FILE_IMMEDIATE:2341emitInsn(0x38000000);2342emitIMMD(0x14, 19, insn->src(1));2343break;2344default:2345assert(!"bad src1 file");2346break;2347}23482349emitField(0x30, 1, isSignedType(insn->dType));2350emitCC (0x2f);2351emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);2352emitGPR (0x08, insn->src(0));2353emitGPR (0x00, insn->def(0));2354}23552356void2357CodeEmitterGM107::emitFLO()2358{2359switch (insn->src(0).getFile()) {2360case FILE_GPR:2361emitInsn(0x5c300000);2362emitGPR (0x14, insn->src(0));2363break;2364case FILE_MEMORY_CONST:2365emitInsn(0x4c300000);2366emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));2367break;2368case FILE_IMMEDIATE:2369emitInsn(0x38300000);2370emitIMMD(0x14, 19, insn->src(0));2371break;2372default:2373assert(!"bad src1 file");2374break;2375}23762377emitField(0x30, 1, isSignedType(insn->dType));2378emitCC (0x2f);2379emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);2380emitINV (0x28, insn->src(0));2381emitGPR (0x00, insn->def(0));2382}23832384void2385CodeEmitterGM107::emitPRMT()2386{2387switch (insn->src(1).getFile()) {2388case FILE_GPR:2389emitInsn(0x5bc00000);2390emitGPR (0x14, insn->src(1));2391break;2392case FILE_MEMORY_CONST:2393emitInsn(0x4bc00000);2394emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));2395break;2396case FILE_IMMEDIATE:2397emitInsn(0x36c00000);2398emitIMMD(0x14, 19, insn->src(1));2399break;2400default:2401assert(!"bad src1 file");2402break;2403}24042405emitField(0x30, 3, insn->subOp);2406emitGPR (0x27, insn->src(2));2407emitGPR (0x08, insn->src(0));2408emitGPR (0x00, insn->def(0));2409}24102411/*******************************************************************************2412* memory2413******************************************************************************/24142415void2416CodeEmitterGM107::emitLDSTs(int pos, DataType type)2417{2418int data = 0;24192420switch (typeSizeof(type)) {2421case 1: data = isSignedType(type) ? 1 : 0; break;2422case 2: data = isSignedType(type) ? 3 : 2; break;2423case 4: data = 4; break;2424case 8: data = 5; break;2425case 16: data = 6; break;2426default:2427assert(!"bad type");2428break;2429}24302431emitField(pos, 3, data);2432}24332434void2435CodeEmitterGM107::emitLDSTc(int pos)2436{2437int mode = 0;24382439switch (insn->cache) {2440case CACHE_CA: mode = 0; break;2441case CACHE_CG: mode = 1; break;2442case CACHE_CS: mode = 2; break;2443case CACHE_CV: mode = 3; break;2444default:2445assert(!"invalid caching mode");2446break;2447}24482449emitField(pos, 2, mode);2450}24512452void2453CodeEmitterGM107::emitLDC()2454{2455emitInsn (0xef900000);2456emitLDSTs(0x30, insn->dType);2457emitField(0x2c, 2, insn->subOp);2458emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));2459emitGPR (0x00, insn->def(0));2460}24612462void2463CodeEmitterGM107::emitLDL()2464{2465emitInsn (0xef400000);2466emitLDSTs(0x30, insn->dType);2467emitLDSTc(0x2c);2468emitADDR (0x08, 0x14, 24, 0, insn->src(0));2469emitGPR (0x00, insn->def(0));2470}24712472void2473CodeEmitterGM107::emitLDS()2474{2475emitInsn (0xef480000);2476emitLDSTs(0x30, insn->dType);2477emitADDR (0x08, 0x14, 24, 0, insn->src(0));2478emitGPR (0x00, insn->def(0));2479}24802481void2482CodeEmitterGM107::emitLD()2483{2484emitInsn (0x80000000);2485emitPRED (0x3a);2486emitLDSTc(0x38);2487emitLDSTs(0x35, insn->dType);2488emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);2489emitADDR (0x08, 0x14, 32, 0, insn->src(0));2490emitGPR (0x00, insn->def(0));2491}24922493void2494CodeEmitterGM107::emitSTL()2495{2496emitInsn (0xef500000);2497emitLDSTs(0x30, insn->dType);2498emitLDSTc(0x2c);2499emitADDR (0x08, 0x14, 24, 0, insn->src(0));2500emitGPR (0x00, insn->src(1));2501}25022503void2504CodeEmitterGM107::emitSTS()2505{2506emitInsn (0xef580000);2507emitLDSTs(0x30, insn->dType);2508emitADDR (0x08, 0x14, 24, 0, insn->src(0));2509emitGPR (0x00, insn->src(1));2510}25112512void2513CodeEmitterGM107::emitST()2514{2515emitInsn (0xa0000000);2516emitPRED (0x3a);2517emitLDSTc(0x38);2518emitLDSTs(0x35, insn->dType);2519emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);2520emitADDR (0x08, 0x14, 32, 0, insn->src(0));2521emitGPR (0x00, insn->src(1));2522}25232524void2525CodeEmitterGM107::emitALD()2526{2527emitInsn (0xefd80000);2528emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);2529emitGPR (0x27, insn->src(0).getIndirect(1));2530emitO (0x20);2531emitP (0x1f);2532emitADDR (0x08, 20, 10, 0, insn->src(0));2533emitGPR (0x00, insn->def(0));2534}25352536void2537CodeEmitterGM107::emitAST()2538{2539emitInsn (0xeff00000);2540emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);2541emitGPR (0x27, insn->src(0).getIndirect(1));2542emitP (0x1f);2543emitADDR (0x08, 20, 10, 0, insn->src(0));2544emitGPR (0x00, insn->src(1));2545}25462547void2548CodeEmitterGM107::emitISBERD()2549{2550emitInsn(0xefd00000);2551emitGPR (0x08, insn->src(0));2552emitGPR (0x00, insn->def(0));2553}25542555void2556CodeEmitterGM107::emitAL2P()2557{2558emitInsn (0xefa00000);2559emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);2560emitPRED (0x2c);2561emitO (0x20);2562emitField(0x14, 11, insn->src(0).get()->reg.data.offset);2563emitGPR (0x08, insn->src(0).getIndirect(0));2564emitGPR (0x00, insn->def(0));2565}25662567void2568gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)2569{2570int ipa = entry->ipa;2571int reg = entry->reg;2572int loc = entry->loc;25732574if (data.flatshade &&2575(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {2576ipa = NV50_IR_INTERP_FLAT;2577reg = 0xff;2578} else if (data.force_persample_interp &&2579(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&2580(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {2581ipa |= NV50_IR_INTERP_CENTROID;2582}2583code[loc + 1] &= ~(0xf << 0x14);2584code[loc + 1] |= (ipa & 0x3) << 0x16;2585code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);2586code[loc + 0] &= ~(0xff << 0x14);2587code[loc + 0] |= reg << 0x14;2588}25892590void2591CodeEmitterGM107::emitIPA()2592{2593int ipam = 0, ipas = 0;25942595switch (insn->getInterpMode()) {2596case NV50_IR_INTERP_LINEAR : ipam = 0; break;2597case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;2598case NV50_IR_INTERP_FLAT : ipam = 2; break;2599case NV50_IR_INTERP_SC : ipam = 3; break;2600default:2601assert(!"invalid ipa mode");2602break;2603}26042605switch (insn->getSampleMode()) {2606case NV50_IR_INTERP_DEFAULT : ipas = 0; break;2607case NV50_IR_INTERP_CENTROID: ipas = 1; break;2608case NV50_IR_INTERP_OFFSET : ipas = 2; break;2609default:2610assert(!"invalid ipa sample mode");2611break;2612}26132614emitInsn (0xe0000000);2615emitField(0x36, 2, ipam);2616emitField(0x34, 2, ipas);2617emitSAT (0x33);2618emitField(0x2f, 3, 7);2619emitADDR (0x08, 0x1c, 10, 0, insn->src(0));2620if ((code[0] & 0x0000ff00) != 0x0000ff00)2621code[1] |= 0x00000040; /* .idx */2622emitGPR(0x00, insn->def(0));26232624if (insn->op == OP_PINTERP) {2625emitGPR(0x14, insn->src(1));2626if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)2627emitGPR(0x27, insn->src(2));2628addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);2629} else {2630if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)2631emitGPR(0x27, insn->src(1));2632emitGPR(0x14);2633addInterp(insn->ipa, 0xff, gm107_interpApply);2634}26352636if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)2637emitGPR(0x27);2638}26392640void2641CodeEmitterGM107::emitATOM()2642{2643unsigned dType, subOp;26442645if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {2646switch (insn->dType) {2647case TYPE_U32: dType = 0; break;2648case TYPE_U64: dType = 1; break;2649default: assert(!"unexpected dType"); dType = 0; break;2650}2651subOp = 15;26522653emitInsn (0xee000000);2654} else {2655switch (insn->dType) {2656case TYPE_U32: dType = 0; break;2657case TYPE_S32: dType = 1; break;2658case TYPE_U64: dType = 2; break;2659case TYPE_F32: dType = 3; break;2660case TYPE_B128: dType = 4; break;2661case TYPE_S64: dType = 5; break;2662default: assert(!"unexpected dType"); dType = 0; break;2663}2664if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)2665subOp = 8;2666else2667subOp = insn->subOp;26682669emitInsn (0xed000000);2670}26712672emitField(0x34, 4, subOp);2673emitField(0x31, 3, dType);2674emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);2675emitGPR (0x14, insn->src(1));2676emitADDR (0x08, 0x1c, 20, 0, insn->src(0));2677emitGPR (0x00, insn->def(0));2678}26792680void2681CodeEmitterGM107::emitATOMS()2682{2683unsigned dType, subOp;26842685if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {2686switch (insn->dType) {2687case TYPE_U32: dType = 0; break;2688case TYPE_U64: dType = 1; break;2689default: assert(!"unexpected dType"); dType = 0; break;2690}2691subOp = 4;26922693emitInsn (0xee000000);2694emitField(0x34, 1, dType);2695} else {2696switch (insn->dType) {2697case TYPE_U32: dType = 0; break;2698case TYPE_S32: dType = 1; break;2699case TYPE_U64: dType = 2; break;2700case TYPE_S64: dType = 3; break;2701default: assert(!"unexpected dType"); dType = 0; break;2702}27032704if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)2705subOp = 8;2706else2707subOp = insn->subOp;27082709emitInsn (0xec000000);2710emitField(0x1c, 3, dType);2711}27122713emitField(0x34, 4, subOp);2714emitGPR (0x14, insn->src(1));2715emitADDR (0x08, 0x1e, 22, 2, insn->src(0));2716emitGPR (0x00, insn->def(0));2717}27182719void2720CodeEmitterGM107::emitRED()2721{2722unsigned dType;27232724switch (insn->dType) {2725case TYPE_U32: dType = 0; break;2726case TYPE_S32: dType = 1; break;2727case TYPE_U64: dType = 2; break;2728case TYPE_F32: dType = 3; break;2729case TYPE_B128: dType = 4; break;2730case TYPE_S64: dType = 5; break;2731default: assert(!"unexpected dType"); dType = 0; break;2732}27332734emitInsn (0xebf80000);2735emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);2736emitField(0x17, 3, insn->subOp);2737emitField(0x14, 3, dType);2738emitADDR (0x08, 0x1c, 20, 0, insn->src(0));2739emitGPR (0x00, insn->src(1));2740}27412742void2743CodeEmitterGM107::emitCCTL()2744{2745unsigned width;2746if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {2747emitInsn(0xef600000);2748width = 30;2749} else {2750emitInsn(0xef800000);2751width = 22;2752}2753emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);2754emitADDR (0x08, 0x16, width, 2, insn->src(0));2755emitField(0x00, 4, insn->subOp);2756}27572758/*******************************************************************************2759* surface2760******************************************************************************/27612762void2763CodeEmitterGM107::emitPIXLD()2764{2765emitInsn (0xefe80000);2766emitPRED (0x2d);2767emitField(0x1f, 3, insn->subOp);2768emitGPR (0x08, insn->src(0));2769emitGPR (0x00, insn->def(0));2770}27712772/*******************************************************************************2773* texture2774******************************************************************************/27752776void2777CodeEmitterGM107::emitTEXs(int pos)2778{2779int src1 = insn->predSrc == 1 ? 2 : 1;2780if (insn->srcExists(src1))2781emitGPR(pos, insn->src(src1));2782else2783emitGPR(pos);2784}27852786static uint8_t2787getTEXSMask(uint8_t mask)2788{2789switch (mask) {2790case 0x1: return 0x0;2791case 0x2: return 0x1;2792case 0x3: return 0x4;2793case 0x4: return 0x2;2794case 0x7: return 0x0;2795case 0x8: return 0x3;2796case 0x9: return 0x5;2797case 0xa: return 0x6;2798case 0xb: return 0x1;2799case 0xc: return 0x7;2800case 0xd: return 0x2;2801case 0xe: return 0x3;2802case 0xf: return 0x4;2803default:2804assert(!"invalid mask");2805return 0;2806}2807}28082809static uint8_t2810getTEXSTarget(const TexInstruction *tex)2811{2812assert(tex->op == OP_TEX || tex->op == OP_TXL);28132814switch (tex->tex.target.getEnum()) {2815case TEX_TARGET_1D:2816assert(tex->tex.levelZero);2817return 0x0;2818case TEX_TARGET_2D:2819case TEX_TARGET_RECT:2820if (tex->tex.levelZero)2821return 0x2;2822if (tex->op == OP_TXL)2823return 0x3;2824return 0x1;2825case TEX_TARGET_2D_SHADOW:2826case TEX_TARGET_RECT_SHADOW:2827if (tex->tex.levelZero)2828return 0x6;2829if (tex->op == OP_TXL)2830return 0x5;2831return 0x4;2832case TEX_TARGET_2D_ARRAY:2833if (tex->tex.levelZero)2834return 0x8;2835return 0x7;2836case TEX_TARGET_2D_ARRAY_SHADOW:2837assert(tex->tex.levelZero);2838return 0x9;2839case TEX_TARGET_3D:2840if (tex->tex.levelZero)2841return 0xb;2842assert(tex->op != OP_TXL);2843return 0xa;2844case TEX_TARGET_CUBE:2845assert(!tex->tex.levelZero);2846if (tex->op == OP_TXL)2847return 0xd;2848return 0xc;2849default:2850assert(false);2851return 0x0;2852}2853}28542855static uint8_t2856getTLDSTarget(const TexInstruction *tex)2857{2858switch (tex->tex.target.getEnum()) {2859case TEX_TARGET_1D:2860if (tex->tex.levelZero)2861return 0x0;2862return 0x1;2863case TEX_TARGET_2D:2864case TEX_TARGET_RECT:2865if (tex->tex.levelZero)2866return tex->tex.useOffsets ? 0x4 : 0x2;2867return tex->tex.useOffsets ? 0xc : 0x5;2868case TEX_TARGET_2D_MS:2869assert(tex->tex.levelZero);2870return 0x6;2871case TEX_TARGET_3D:2872assert(tex->tex.levelZero);2873return 0x7;2874case TEX_TARGET_2D_ARRAY:2875assert(tex->tex.levelZero);2876return 0x8;28772878default:2879assert(false);2880return 0x0;2881}2882}28832884void2885CodeEmitterGM107::emitTEX()2886{2887const TexInstruction *insn = this->insn->asTex();2888int lodm = 0;28892890if (!insn->tex.levelZero) {2891switch (insn->op) {2892case OP_TEX: lodm = 0; break;2893case OP_TXB: lodm = 2; break;2894case OP_TXL: lodm = 3; break;2895default:2896assert(!"invalid tex op");2897break;2898}2899} else {2900lodm = 1;2901}29022903if (insn->tex.rIndirectSrc >= 0) {2904emitInsn (0xdeb80000);2905emitField(0x25, 2, lodm);2906emitField(0x24, 1, insn->tex.useOffsets == 1);2907} else {2908emitInsn (0xc0380000);2909emitField(0x37, 2, lodm);2910emitField(0x36, 1, insn->tex.useOffsets == 1);2911emitField(0x24, 13, insn->tex.r);2912}29132914emitField(0x32, 1, insn->tex.target.isShadow());2915emitField(0x31, 1, insn->tex.liveOnly);2916emitField(0x23, 1, insn->tex.derivAll);2917emitField(0x1f, 4, insn->tex.mask);2918emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :2919insn->tex.target.getDim() - 1);2920emitField(0x1c, 1, insn->tex.target.isArray());2921emitTEXs (0x14);2922emitGPR (0x08, insn->src(0));2923emitGPR (0x00, insn->def(0));2924}29252926void2927CodeEmitterGM107::emitTEXS()2928{2929const TexInstruction *insn = this->insn->asTex();2930assert(!insn->tex.derivAll);29312932switch (insn->op) {2933case OP_TEX:2934case OP_TXL:2935emitInsn (0xd8000000);2936emitField(0x35, 4, getTEXSTarget(insn));2937emitField(0x32, 3, getTEXSMask(insn->tex.mask));2938break;2939case OP_TXF:2940emitInsn (0xda000000);2941emitField(0x35, 4, getTLDSTarget(insn));2942emitField(0x32, 3, getTEXSMask(insn->tex.mask));2943break;2944case OP_TXG:2945assert(insn->tex.useOffsets != 4);2946emitInsn (0xdf000000);2947emitField(0x34, 2, insn->tex.gatherComp);2948emitField(0x33, 1, insn->tex.useOffsets == 1);2949emitField(0x32, 1, insn->tex.target.isShadow());2950break;2951default:2952unreachable("unknown op in emitTEXS()");2953break;2954}29552956emitField(0x31, 1, insn->tex.liveOnly);2957emitField(0x24, 13, insn->tex.r);2958if (insn->defExists(1))2959emitGPR(0x1c, insn->def(1));2960else2961emitGPR(0x1c);2962if (insn->srcExists(1))2963emitGPR(0x14, insn->getSrc(1));2964else2965emitGPR(0x14);2966emitGPR (0x08, insn->src(0));2967emitGPR (0x00, insn->def(0));2968}29692970void2971CodeEmitterGM107::emitTLD()2972{2973const TexInstruction *insn = this->insn->asTex();29742975if (insn->tex.rIndirectSrc >= 0) {2976emitInsn (0xdd380000);2977} else {2978emitInsn (0xdc380000);2979emitField(0x24, 13, insn->tex.r);2980}29812982emitField(0x37, 1, insn->tex.levelZero == 0);2983emitField(0x32, 1, insn->tex.target.isMS());2984emitField(0x31, 1, insn->tex.liveOnly);2985emitField(0x23, 1, insn->tex.useOffsets == 1);2986emitField(0x1f, 4, insn->tex.mask);2987emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :2988insn->tex.target.getDim() - 1);2989emitField(0x1c, 1, insn->tex.target.isArray());2990emitTEXs (0x14);2991emitGPR (0x08, insn->src(0));2992emitGPR (0x00, insn->def(0));2993}29942995void2996CodeEmitterGM107::emitTLD4()2997{2998const TexInstruction *insn = this->insn->asTex();29993000if (insn->tex.rIndirectSrc >= 0) {3001emitInsn (0xdef80000);3002emitField(0x26, 2, insn->tex.gatherComp);3003emitField(0x25, 2, insn->tex.useOffsets == 4);3004emitField(0x24, 2, insn->tex.useOffsets == 1);3005} else {3006emitInsn (0xc8380000);3007emitField(0x38, 2, insn->tex.gatherComp);3008emitField(0x37, 2, insn->tex.useOffsets == 4);3009emitField(0x36, 2, insn->tex.useOffsets == 1);3010emitField(0x24, 13, insn->tex.r);3011}30123013emitField(0x32, 1, insn->tex.target.isShadow());3014emitField(0x31, 1, insn->tex.liveOnly);3015emitField(0x23, 1, insn->tex.derivAll);3016emitField(0x1f, 4, insn->tex.mask);3017emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :3018insn->tex.target.getDim() - 1);3019emitField(0x1c, 1, insn->tex.target.isArray());3020emitTEXs (0x14);3021emitGPR (0x08, insn->src(0));3022emitGPR (0x00, insn->def(0));3023}30243025void3026CodeEmitterGM107::emitTXD()3027{3028const TexInstruction *insn = this->insn->asTex();30293030if (insn->tex.rIndirectSrc >= 0) {3031emitInsn (0xde780000);3032} else {3033emitInsn (0xde380000);3034emitField(0x24, 13, insn->tex.r);3035}30363037emitField(0x31, 1, insn->tex.liveOnly);3038emitField(0x23, 1, insn->tex.useOffsets == 1);3039emitField(0x1f, 4, insn->tex.mask);3040emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :3041insn->tex.target.getDim() - 1);3042emitField(0x1c, 1, insn->tex.target.isArray());3043emitTEXs (0x14);3044emitGPR (0x08, insn->src(0));3045emitGPR (0x00, insn->def(0));3046}30473048void3049CodeEmitterGM107::emitTMML()3050{3051const TexInstruction *insn = this->insn->asTex();30523053if (insn->tex.rIndirectSrc >= 0) {3054emitInsn (0xdf600000);3055} else {3056emitInsn (0xdf580000);3057emitField(0x24, 13, insn->tex.r);3058}30593060emitField(0x31, 1, insn->tex.liveOnly);3061emitField(0x23, 1, insn->tex.derivAll);3062emitField(0x1f, 4, insn->tex.mask);3063emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :3064insn->tex.target.getDim() - 1);3065emitField(0x1c, 1, insn->tex.target.isArray());3066emitTEXs (0x14);3067emitGPR (0x08, insn->src(0));3068emitGPR (0x00, insn->def(0));3069}30703071void3072CodeEmitterGM107::emitTXQ()3073{3074const TexInstruction *insn = this->insn->asTex();3075int type = 0;30763077switch (insn->tex.query) {3078case TXQ_DIMS : type = 0x01; break;3079case TXQ_TYPE : type = 0x02; break;3080case TXQ_SAMPLE_POSITION: type = 0x05; break;3081case TXQ_FILTER : type = 0x10; break;3082case TXQ_LOD : type = 0x12; break;3083case TXQ_WRAP : type = 0x14; break;3084case TXQ_BORDER_COLOUR : type = 0x16; break;3085default:3086assert(!"invalid txq query");3087break;3088}30893090if (insn->tex.rIndirectSrc >= 0) {3091emitInsn (0xdf500000);3092} else {3093emitInsn (0xdf480000);3094emitField(0x24, 13, insn->tex.r);3095}30963097emitField(0x31, 1, insn->tex.liveOnly);3098emitField(0x1f, 4, insn->tex.mask);3099emitField(0x16, 6, type);3100emitGPR (0x08, insn->src(0));3101emitGPR (0x00, insn->def(0));3102}31033104void3105CodeEmitterGM107::emitDEPBAR()3106{3107emitInsn (0xf0f00000);3108emitField(0x1d, 1, 1); /* le */3109emitField(0x1a, 3, 5);3110emitField(0x14, 6, insn->subOp);3111emitField(0x00, 6, insn->subOp);3112}31133114/*******************************************************************************3115* misc3116******************************************************************************/31173118void3119CodeEmitterGM107::emitNOP()3120{3121emitInsn(0x50b00000);3122}31233124void3125CodeEmitterGM107::emitKIL()3126{3127emitInsn (0xe3300000);3128emitCond5(0x00, CC_TR);3129}31303131void3132CodeEmitterGM107::emitOUT()3133{3134const int cut = insn->op == OP_RESTART || insn->subOp;3135const int emit = insn->op == OP_EMIT;31363137switch (insn->src(1).getFile()) {3138case FILE_GPR:3139emitInsn(0xfbe00000);3140emitGPR (0x14, insn->src(1));3141break;3142case FILE_IMMEDIATE:3143emitInsn(0xf6e00000);3144emitIMMD(0x14, 19, insn->src(1));3145break;3146case FILE_MEMORY_CONST:3147emitInsn(0xebe00000);3148emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));3149break;3150default:3151assert(!"bad src1 file");3152break;3153}31543155emitField(0x27, 2, (cut << 1) | emit);3156emitGPR (0x08, insn->src(0));3157emitGPR (0x00, insn->def(0));3158}31593160void3161CodeEmitterGM107::emitBAR()3162{3163uint8_t subop;31643165emitInsn (0xf0a80000);31663167switch (insn->subOp) {3168case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;3169case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;3170case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;3171case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;3172default:3173subop = 0x80;3174assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);3175break;3176}31773178emitField(0x20, 8, subop);31793180// barrier id3181if (insn->src(0).getFile() == FILE_GPR) {3182emitGPR(0x08, insn->src(0));3183} else {3184ImmediateValue *imm = insn->getSrc(0)->asImm();3185assert(imm);3186emitField(0x08, 8, imm->reg.data.u32);3187emitField(0x2b, 1, 1);3188}31893190// thread count3191if (insn->src(1).getFile() == FILE_GPR) {3192emitGPR(0x14, insn->src(1));3193} else {3194ImmediateValue *imm = insn->getSrc(0)->asImm();3195assert(imm);3196emitField(0x14, 12, imm->reg.data.u32);3197emitField(0x2c, 1, 1);3198}31993200if (insn->srcExists(2) && (insn->predSrc != 2)) {3201emitPRED (0x27, insn->src(2));3202emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));3203} else {3204emitField(0x27, 3, 7);3205}3206}32073208void3209CodeEmitterGM107::emitMEMBAR()3210{3211emitInsn (0xef980000);3212emitField(0x08, 2, insn->subOp >> 2);3213}32143215void3216CodeEmitterGM107::emitVOTE()3217{3218const ImmediateValue *imm;3219uint32_t u32;32203221int r = -1, p = -1;3222for (int i = 0; insn->defExists(i); i++) {3223if (insn->def(i).getFile() == FILE_GPR)3224r = i;3225else if (insn->def(i).getFile() == FILE_PREDICATE)3226p = i;3227}32283229emitInsn (0x50d80000);3230emitField(0x30, 2, insn->subOp);3231if (r >= 0)3232emitGPR (0x00, insn->def(r));3233else3234emitGPR (0x00);3235if (p >= 0)3236emitPRED (0x2d, insn->def(p));3237else3238emitPRED (0x2d);32393240switch (insn->src(0).getFile()) {3241case FILE_PREDICATE:3242emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));3243emitPRED (0x27, insn->src(0));3244break;3245case FILE_IMMEDIATE:3246imm = insn->getSrc(0)->asImm();3247assert(imm);3248u32 = imm->reg.data.u32;3249assert(u32 == 0 || u32 == 1);3250emitPRED(0x27);3251emitField(0x2a, 1, u32 == 0);3252break;3253default:3254assert(!"Unhandled src");3255break;3256}3257}32583259void3260CodeEmitterGM107::emitSUTarget()3261{3262const TexInstruction *insn = this->insn->asTex();3263int target = 0;32643265assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);32663267if (insn->tex.target == TEX_TARGET_BUFFER) {3268target = 2;3269} else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {3270target = 4;3271} else if (insn->tex.target == TEX_TARGET_2D ||3272insn->tex.target == TEX_TARGET_RECT) {3273target = 6;3274} else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||3275insn->tex.target == TEX_TARGET_CUBE ||3276insn->tex.target == TEX_TARGET_CUBE_ARRAY) {3277target = 8;3278} else if (insn->tex.target == TEX_TARGET_3D) {3279target = 10;3280} else {3281assert(insn->tex.target == TEX_TARGET_1D);3282}3283emitField(0x20, 4, target);3284}32853286void3287CodeEmitterGM107::emitSUHandle(const int s)3288{3289const TexInstruction *insn = this->insn->asTex();32903291assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);32923293if (insn->src(s).getFile() == FILE_GPR) {3294emitGPR(0x27, insn->src(s));3295} else {3296ImmediateValue *imm = insn->getSrc(s)->asImm();3297assert(imm);3298emitField(0x33, 1, 1);3299emitField(0x24, 13, imm->reg.data.u32);3300}3301}33023303void3304CodeEmitterGM107::emitSUSTx()3305{3306const TexInstruction *insn = this->insn->asTex();33073308emitInsn(0xeb200000);3309if (insn->op == OP_SUSTB)3310emitField(0x34, 1, 1);3311emitSUTarget();33123313emitLDSTc(0x18);3314emitField(0x14, 4, 0xf); // rgba3315emitGPR (0x08, insn->src(0));3316emitGPR (0x00, insn->src(1));33173318emitSUHandle(2);3319}33203321void3322CodeEmitterGM107::emitSULDx()3323{3324const TexInstruction *insn = this->insn->asTex();3325int type = 0;33263327emitInsn(0xeb000000);3328if (insn->op == OP_SULDB)3329emitField(0x34, 1, 1);3330emitSUTarget();33313332switch (insn->dType) {3333case TYPE_S8: type = 1; break;3334case TYPE_U16: type = 2; break;3335case TYPE_S16: type = 3; break;3336case TYPE_U32: type = 4; break;3337case TYPE_U64: type = 5; break;3338case TYPE_B128: type = 6; break;3339default:3340assert(insn->dType == TYPE_U8);3341break;3342}3343emitLDSTc(0x18);3344emitField(0x14, 3, type);3345emitGPR (0x00, insn->def(0));3346emitGPR (0x08, insn->src(0));33473348emitSUHandle(1);3349}33503351void3352CodeEmitterGM107::emitSUREDx()3353{3354const TexInstruction *insn = this->insn->asTex();3355uint8_t type = 0, subOp;33563357if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)3358emitInsn(0xeac00000);3359else3360emitInsn(0xea600000);33613362if (insn->op == OP_SUREDB)3363emitField(0x34, 1, 1);3364emitSUTarget();33653366// destination type3367switch (insn->dType) {3368case TYPE_S32: type = 1; break;3369case TYPE_U64: type = 2; break;3370case TYPE_F32: type = 3; break;3371case TYPE_S64: type = 5; break;3372default:3373assert(insn->dType == TYPE_U32);3374break;3375}33763377// atomic operation3378if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {3379subOp = 0;3380} else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {3381subOp = 8;3382} else {3383subOp = insn->subOp;3384}33853386emitField(0x24, 3, type);3387emitField(0x1d, 4, subOp);3388emitGPR (0x14, insn->src(1));3389emitGPR (0x08, insn->src(0));3390emitGPR (0x00, insn->def(0));33913392emitSUHandle(2);3393}33943395/*******************************************************************************3396* assembler front-end3397******************************************************************************/33983399bool3400CodeEmitterGM107::emitInstruction(Instruction *i)3401{3402const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;3403bool ret = true;34043405insn = i;34063407if (insn->encSize != 8) {3408ERROR("skipping undecodable instruction: "); insn->print();3409return false;3410} else3411if (codeSize + size > codeSizeLimit) {3412ERROR("code emitter output buffer too small\n");3413return false;3414}34153416if (writeIssueDelays) {3417int n = ((codeSize & 0x1f) / 8) - 1;3418if (n < 0) {3419data = code;3420data[0] = 0x00000000;3421data[1] = 0x00000000;3422code += 2;3423codeSize += 8;3424n++;3425}34263427emitField(data, n * 21, 21, insn->sched);3428}34293430switch (insn->op) {3431case OP_EXIT:3432emitEXIT();3433break;3434case OP_BRA:3435emitBRA();3436break;3437case OP_CALL:3438emitCAL();3439break;3440case OP_PRECONT:3441emitPCNT();3442break;3443case OP_CONT:3444emitCONT();3445break;3446case OP_PREBREAK:3447emitPBK();3448break;3449case OP_BREAK:3450emitBRK();3451break;3452case OP_PRERET:3453emitPRET();3454break;3455case OP_RET:3456emitRET();3457break;3458case OP_JOINAT:3459emitSSY();3460break;3461case OP_JOIN:3462emitSYNC();3463break;3464case OP_QUADON:3465emitSAM();3466break;3467case OP_QUADPOP:3468emitRAM();3469break;3470case OP_MOV:3471emitMOV();3472break;3473case OP_RDSV:3474if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))3475emitCS2R();3476else3477emitS2R();3478break;3479case OP_ABS:3480case OP_NEG:3481case OP_SAT:3482case OP_FLOOR:3483case OP_CEIL:3484case OP_TRUNC:3485case OP_CVT:3486if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||3487insn->src(0).getFile() == FILE_PREDICATE)) {3488emitMOV();3489} else if (isFloatType(insn->dType)) {3490if (isFloatType(insn->sType))3491emitF2F();3492else3493emitI2F();3494} else {3495if (isFloatType(insn->sType))3496emitF2I();3497else3498emitI2I();3499}3500break;3501case OP_SHFL:3502emitSHFL();3503break;3504case OP_ADD:3505case OP_SUB:3506if (isFloatType(insn->dType)) {3507if (insn->dType == TYPE_F64)3508emitDADD();3509else3510emitFADD();3511} else {3512emitIADD();3513}3514break;3515case OP_MUL:3516if (isFloatType(insn->dType)) {3517if (insn->dType == TYPE_F64)3518emitDMUL();3519else3520emitFMUL();3521} else {3522emitIMUL();3523}3524break;3525case OP_MAD:3526case OP_FMA:3527if (isFloatType(insn->dType)) {3528if (insn->dType == TYPE_F64)3529emitDFMA();3530else3531emitFFMA();3532} else {3533emitIMAD();3534}3535break;3536case OP_SHLADD:3537emitISCADD();3538break;3539case OP_XMAD:3540emitXMAD();3541break;3542case OP_MIN:3543case OP_MAX:3544if (isFloatType(insn->dType)) {3545if (insn->dType == TYPE_F64)3546emitDMNMX();3547else3548emitFMNMX();3549} else {3550emitIMNMX();3551}3552break;3553case OP_SHL:3554if (typeSizeof(insn->sType) == 8)3555emitSHF();3556else3557emitSHL();3558break;3559case OP_SHR:3560if (typeSizeof(insn->sType) == 8)3561emitSHF();3562else3563emitSHR();3564break;3565case OP_POPCNT:3566emitPOPC();3567break;3568case OP_INSBF:3569emitBFI();3570break;3571case OP_EXTBF:3572emitBFE();3573break;3574case OP_BFIND:3575emitFLO();3576break;3577case OP_PERMT:3578emitPRMT();3579break;3580case OP_SLCT:3581if (isFloatType(insn->dType))3582emitFCMP();3583else3584emitICMP();3585break;3586case OP_SET:3587case OP_SET_AND:3588case OP_SET_OR:3589case OP_SET_XOR:3590if (insn->def(0).getFile() != FILE_PREDICATE) {3591if (isFloatType(insn->sType))3592if (insn->sType == TYPE_F64)3593emitDSET();3594else3595emitFSET();3596else3597emitISET();3598} else {3599if (isFloatType(insn->sType))3600if (insn->sType == TYPE_F64)3601emitDSETP();3602else3603emitFSETP();3604else3605emitISETP();3606}3607break;3608case OP_SELP:3609emitSEL();3610break;3611case OP_PRESIN:3612case OP_PREEX2:3613emitRRO();3614break;3615case OP_COS:3616case OP_SIN:3617case OP_EX2:3618case OP_LG2:3619case OP_RCP:3620case OP_RSQ:3621case OP_SQRT:3622emitMUFU();3623break;3624case OP_AND:3625case OP_OR:3626case OP_XOR:3627switch (insn->def(0).getFile()) {3628case FILE_GPR: emitLOP(); break;3629case FILE_PREDICATE: emitPSETP(); break;3630default:3631assert(!"invalid bool op");3632}3633break;3634case OP_NOT:3635emitNOT();3636break;3637case OP_LOAD:3638switch (insn->src(0).getFile()) {3639case FILE_MEMORY_CONST : emitLDC(); break;3640case FILE_MEMORY_LOCAL : emitLDL(); break;3641case FILE_MEMORY_SHARED: emitLDS(); break;3642case FILE_MEMORY_GLOBAL: emitLD(); break;3643default:3644assert(!"invalid load");3645emitNOP();3646break;3647}3648break;3649case OP_STORE:3650switch (insn->src(0).getFile()) {3651case FILE_MEMORY_LOCAL : emitSTL(); break;3652case FILE_MEMORY_SHARED: emitSTS(); break;3653case FILE_MEMORY_GLOBAL: emitST(); break;3654default:3655assert(!"invalid store");3656emitNOP();3657break;3658}3659break;3660case OP_ATOM:3661if (insn->src(0).getFile() == FILE_MEMORY_SHARED)3662emitATOMS();3663else3664if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)3665emitRED();3666else3667emitATOM();3668break;3669case OP_CCTL:3670emitCCTL();3671break;3672case OP_VFETCH:3673emitALD();3674break;3675case OP_EXPORT:3676emitAST();3677break;3678case OP_PFETCH:3679emitISBERD();3680break;3681case OP_AFETCH:3682emitAL2P();3683break;3684case OP_LINTERP:3685case OP_PINTERP:3686emitIPA();3687break;3688case OP_PIXLD:3689emitPIXLD();3690break;3691case OP_TEX:3692case OP_TXL:3693if (insn->asTex()->tex.scalar)3694emitTEXS();3695else3696emitTEX();3697break;3698case OP_TXB:3699emitTEX();3700break;3701case OP_TXF:3702if (insn->asTex()->tex.scalar)3703emitTEXS();3704else3705emitTLD();3706break;3707case OP_TXG:3708if (insn->asTex()->tex.scalar)3709emitTEXS();3710else3711emitTLD4();3712break;3713case OP_TXD:3714emitTXD();3715break;3716case OP_TXQ:3717emitTXQ();3718break;3719case OP_TXLQ:3720emitTMML();3721break;3722case OP_TEXBAR:3723emitDEPBAR();3724break;3725case OP_QUADOP:3726emitFSWZADD();3727break;3728case OP_NOP:3729emitNOP();3730break;3731case OP_DISCARD:3732emitKIL();3733break;3734case OP_EMIT:3735case OP_RESTART:3736emitOUT();3737break;3738case OP_BAR:3739emitBAR();3740break;3741case OP_MEMBAR:3742emitMEMBAR();3743break;3744case OP_VOTE:3745emitVOTE();3746break;3747case OP_SUSTB:3748case OP_SUSTP:3749emitSUSTx();3750break;3751case OP_SULDB:3752case OP_SULDP:3753emitSULDx();3754break;3755case OP_SUREDB:3756case OP_SUREDP:3757emitSUREDx();3758break;3759default:3760assert(!"invalid opcode");3761emitNOP();3762ret = false;3763break;3764}37653766if (insn->join) {3767/*XXX*/3768}37693770code += 2;3771codeSize += 8;3772return ret;3773}37743775uint32_t3776CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const3777{3778return 8;3779}37803781/*******************************************************************************3782* sched data calculator3783******************************************************************************/37843785inline void3786SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)3787{3788assert(cnt < 16);3789insn->sched |= cnt;3790}37913792inline void3793SchedDataCalculatorGM107::emitYield(Instruction *insn)3794{3795insn->sched |= 1 << 4;3796}37973798inline void3799SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)3800{3801assert(id < 6);3802if ((insn->sched & 0xe0) == 0xe0)3803insn->sched ^= 0xe0;3804insn->sched |= id << 5;3805}38063807inline void3808SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)3809{3810assert(id < 6);3811if ((insn->sched & 0x700) == 0x700)3812insn->sched ^= 0x700;3813insn->sched |= id << 8;3814}38153816inline void3817SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)3818{3819assert(id < 6);3820insn->sched |= 1 << (11 + id);3821}38223823inline void3824SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)3825{3826assert(id < 4);3827insn->sched |= 1 << (17 + id);3828}38293830inline void3831SchedDataCalculatorGM107::printSchedInfo(int cycle,3832const Instruction *insn) const3833{3834uint8_t st, yl, wr, rd, wt, ru;38353836st = (insn->sched & 0x00000f) >> 0;3837yl = (insn->sched & 0x000010) >> 4;3838wr = (insn->sched & 0x0000e0) >> 5;3839rd = (insn->sched & 0x000700) >> 8;3840wt = (insn->sched & 0x01f800) >> 11;3841ru = (insn->sched & 0x1e0000) >> 17;38423843INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",3844cycle, st, yl, wr, rd, wt, ru);3845}38463847inline int3848SchedDataCalculatorGM107::getStall(const Instruction *insn) const3849{3850return insn->sched & 0xf;3851}38523853inline int3854SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const3855{3856return (insn->sched & 0x0000e0) >> 5;3857}38583859inline int3860SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const3861{3862return (insn->sched & 0x000700) >> 8;3863}38643865inline int3866SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const3867{3868return (insn->sched & 0x01f800) >> 11;3869}38703871// Emit the reuse flag which allows to make use of the new memory hierarchy3872// introduced since Maxwell, the operand reuse cache.3873//3874// It allows to reduce bank conflicts by caching operands. Each time you issue3875// an instruction, that flag can tell the hw which operands are going to be3876// re-used by the next instruction. Note that the next instruction has to use3877// the same GPR id in the same operand slot.3878void3879SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)3880{3881Instruction *next = insn->next;3882BitSet defs(255, 1);38833884if (!targ->isReuseSupported(insn))3885return;38863887for (int d = 0; insn->defExists(d); ++d) {3888const Value *def = insn->def(d).rep();3889if (insn->def(d).getFile() != FILE_GPR)3890continue;3891if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)3892continue;3893defs.set(def->reg.data.id);3894}38953896for (int s = 0; insn->srcExists(s); s++) {3897const Value *src = insn->src(s).rep();3898if (insn->src(s).getFile() != FILE_GPR)3899continue;3900if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)3901continue;3902if (defs.test(src->reg.data.id))3903continue;3904if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)3905continue;3906if (src->reg.data.id != next->getSrc(s)->reg.data.id)3907continue;3908assert(s < 4);3909emitReuse(insn, s);3910}3911}39123913void3914SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)3915{3916int a = v->reg.data.id, b;39173918switch (v->reg.file) {3919case FILE_GPR:3920b = a + v->reg.size / 4;3921for (int r = a; r < b; ++r)3922score->rd.r[r] = ready;3923break;3924case FILE_PREDICATE:3925// To immediately use a predicate set by any instructions, the minimum3926// number of stall counts is 13.3927score->rd.p[a] = cycle + 13;3928break;3929case FILE_FLAGS:3930score->rd.c = ready;3931break;3932default:3933break;3934}3935}39363937void3938SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const3939{3940int a = v->reg.data.id, b;3941int ready = cycle;39423943switch (v->reg.file) {3944case FILE_GPR:3945b = a + v->reg.size / 4;3946for (int r = a; r < b; ++r)3947ready = MAX2(ready, score->rd.r[r]);3948break;3949case FILE_PREDICATE:3950ready = MAX2(ready, score->rd.p[a]);3951break;3952case FILE_FLAGS:3953ready = MAX2(ready, score->rd.c);3954break;3955default:3956break;3957}3958if (cycle < ready)3959delay = MAX2(delay, ready - cycle);3960}39613962void3963SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)3964{3965const int ready = cycle + targ->getLatency(insn);39663967for (int d = 0; insn->defExists(d); ++d)3968recordWr(insn->getDef(d), cycle, ready);39693970#ifdef GM107_DEBUG_SCHED_DATA3971score->print(cycle);3972#endif3973}39743975#define GM107_MIN_ISSUE_DELAY 0x13976#define GM107_MAX_ISSUE_DELAY 0xf39773978int3979SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const3980{3981int delay = 0, ready = cycle;39823983for (int s = 0; insn->srcExists(s); ++s)3984checkRd(insn->getSrc(s), cycle, delay);39853986// TODO: make use of getReadLatency()!39873988return MAX2(delay, ready - cycle);3989}39903991void3992SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,3993const Instruction *next)3994{3995const OpClass cl = targ->getOpClass(insn->op);3996int wr, rd;39973998if (insn->op == OP_EXIT ||3999insn->op == OP_BAR ||4000insn->op == OP_MEMBAR) {4001delay = GM107_MAX_ISSUE_DELAY;4002} else4003if (insn->op == OP_QUADON ||4004insn->op == OP_QUADPOP) {4005delay = 0xd;4006} else4007if (cl == OPCLASS_FLOW || insn->join) {4008delay = 0xd;4009}40104011if (!next || !targ->canDualIssue(insn, next)) {4012delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);4013} else {4014delay = 0x0; // dual-issue4015}40164017wr = getWrDepBar(insn);4018rd = getRdDepBar(insn);40194020if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {4021// Barriers take one additional clock cycle to become active on top of4022// the clock consumed by the instruction producing it.4023if (!next || insn->bb != next->bb) {4024delay = 0x2;4025} else {4026int wt = getWtDepBar(next);4027if ((wt & (1 << wr)) | (wt & (1 << rd)))4028delay = 0x2;4029}4030}40314032emitStall(insn, delay);4033}403440354036// Return true when the given instruction needs to emit a read dependency4037// barrier (for WaR hazards) because it doesn't operate at a fixed latency, and4038// setting the maximum number of stall counts is not enough.4039bool4040SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const4041{4042BitSet srcs(255, 1), defs(255, 1);4043int a, b;40444045if (!targ->isBarrierRequired(insn))4046return false;40474048// Do not emit a read dependency barrier when the instruction doesn't use4049// any GPR (like st s[0x4] 0x0) as input because it's unnecessary.4050for (int s = 0; insn->srcExists(s); ++s) {4051const Value *src = insn->src(s).rep();4052if (insn->src(s).getFile() != FILE_GPR)4053continue;4054if (src->reg.data.id == 255)4055continue;40564057a = src->reg.data.id;4058b = a + src->reg.size / 4;4059for (int r = a; r < b; ++r)4060srcs.set(r);4061}40624063if (!srcs.popCount())4064return false;40654066// Do not emit a read dependency barrier when the output GPRs are equal to4067// the input GPRs (like rcp $r0 $r0) because a write dependency barrier will4068// be produced and WaR hazards are prevented.4069for (int d = 0; insn->defExists(d); ++d) {4070const Value *def = insn->def(d).rep();4071if (insn->def(d).getFile() != FILE_GPR)4072continue;4073if (def->reg.data.id == 255)4074continue;40754076a = def->reg.data.id;4077b = a + def->reg.size / 4;4078for (int r = a; r < b; ++r)4079defs.set(r);4080}40814082srcs.andNot(defs);4083if (!srcs.popCount())4084return false;40854086return true;4087}40884089// Return true when the given instruction needs to emit a write dependency4090// barrier (for RaW hazards) because it doesn't operate at a fixed latency, and4091// setting the maximum number of stall counts is not enough. This is only legal4092// if the instruction output something.4093bool4094SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const4095{4096if (!targ->isBarrierRequired(insn))4097return false;40984099for (int d = 0; insn->defExists(d); ++d) {4100if (insn->def(d).getFile() == FILE_GPR ||4101insn->def(d).getFile() == FILE_FLAGS ||4102insn->def(d).getFile() == FILE_PREDICATE)4103return true;4104}4105return false;4106}41074108// Helper function for findFirstUse() and findFirstDef()4109bool4110SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,4111const Value *val) const4112{4113if (val->reg.file != FILE_GPR &&4114val->reg.file != FILE_PREDICATE &&4115val->reg.file != FILE_FLAGS)4116return false;41174118for (int d = 0; insn->defExists(d); ++d) {4119const Value* def = insn->getDef(d);4120int minGPR = def->reg.data.id;4121int maxGPR = minGPR + def->reg.size / 4 - 1;41224123if (def->reg.file != val->reg.file)4124continue;41254126if (def->reg.file == FILE_GPR) {4127if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||4128val->reg.data.id > maxGPR)4129continue;4130return true;4131} else4132if (def->reg.file == FILE_PREDICATE) {4133if (val->reg.data.id != minGPR)4134continue;4135return true;4136} else4137if (def->reg.file == FILE_FLAGS) {4138if (val->reg.data.id != minGPR)4139continue;4140return true;4141}4142}41434144return false;4145}41464147// Find the next instruction inside the same basic block which uses (reads or4148// writes from) the output of the given instruction in order to avoid RaW and4149// WaW hazards.4150Instruction *4151SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const4152{4153Instruction *insn, *next;41544155if (!bari->defExists(0))4156return NULL;41574158for (insn = bari->next; insn != NULL; insn = next) {4159next = insn->next;41604161for (int s = 0; insn->srcExists(s); ++s)4162if (doesInsnWriteTo(bari, insn->getSrc(s)))4163return insn;41644165for (int d = 0; insn->defExists(d); ++d)4166if (doesInsnWriteTo(bari, insn->getDef(d)))4167return insn;4168}4169return NULL;4170}41714172// Find the next instruction inside the same basic block which overwrites, at4173// least, one source of the given instruction in order to avoid WaR hazards.4174Instruction *4175SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const4176{4177Instruction *insn, *next;41784179if (!bari->srcExists(0))4180return NULL;41814182for (insn = bari->next; insn != NULL; insn = next) {4183next = insn->next;41844185for (int s = 0; bari->srcExists(s); ++s)4186if (doesInsnWriteTo(insn, bari->getSrc(s)))4187return insn;4188}4189return NULL;4190}41914192// Dependency barriers:4193// This pass is a bit ugly and could probably be improved by performing a4194// better allocation.4195//4196// The main idea is to avoid WaR and RaW hazards by emitting read/write4197// dependency barriers using the control codes.4198bool4199SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)4200{4201std::list<LiveBarUse> live_uses;4202std::list<LiveBarDef> live_defs;4203Instruction *insn, *next;4204BitSet bars(6, 1);4205int bar_id;42064207for (insn = bb->getEntry(); insn != NULL; insn = next) {4208Instruction *usei = NULL, *defi = NULL;4209bool need_wr_bar, need_rd_bar;42104211next = insn->next;42124213// Expire old barrier uses.4214for (std::list<LiveBarUse>::iterator it = live_uses.begin();4215it != live_uses.end();) {4216if (insn->serial >= it->usei->serial) {4217int wr = getWrDepBar(it->insn);4218emitWtDepBar(insn, wr);4219bars.clr(wr); // free barrier4220it = live_uses.erase(it);4221continue;4222}4223++it;4224}42254226// Expire old barrier defs.4227for (std::list<LiveBarDef>::iterator it = live_defs.begin();4228it != live_defs.end();) {4229if (insn->serial >= it->defi->serial) {4230int rd = getRdDepBar(it->insn);4231emitWtDepBar(insn, rd);4232bars.clr(rd); // free barrier4233it = live_defs.erase(it);4234continue;4235}4236++it;4237}42384239need_wr_bar = needWrDepBar(insn);4240need_rd_bar = needRdDepBar(insn);42414242if (need_wr_bar) {4243// When the instruction requires to emit a write dependency barrier4244// (all which write something at a variable latency), find the next4245// instruction which reads the outputs (or writes to them, potentially4246// completing before this insn.4247usei = findFirstUse(insn);42484249// Allocate and emit a new barrier.4250bar_id = bars.findFreeRange(1);4251if (bar_id == -1)4252bar_id = 5;4253bars.set(bar_id);4254emitWrDepBar(insn, bar_id);4255if (usei)4256live_uses.push_back(LiveBarUse(insn, usei));4257}42584259if (need_rd_bar) {4260// When the instruction requires to emit a read dependency barrier4261// (all which read something at a variable latency), find the next4262// instruction which will write the inputs.4263defi = findFirstDef(insn);42644265if (usei && defi && usei->serial <= defi->serial)4266continue;42674268// Allocate and emit a new barrier.4269bar_id = bars.findFreeRange(1);4270if (bar_id == -1)4271bar_id = 5;4272bars.set(bar_id);4273emitRdDepBar(insn, bar_id);4274if (defi)4275live_defs.push_back(LiveBarDef(insn, defi));4276}4277}42784279// Remove unnecessary barrier waits.4280BitSet alive_bars(6, 1);4281for (insn = bb->getEntry(); insn != NULL; insn = next) {4282int wr, rd, wt;42834284next = insn->next;42854286wr = getWrDepBar(insn);4287rd = getRdDepBar(insn);4288wt = getWtDepBar(insn);42894290for (int idx = 0; idx < 6; ++idx) {4291if (!(wt & (1 << idx)))4292continue;4293if (!alive_bars.test(idx)) {4294insn->sched &= ~(1 << (11 + idx));4295} else {4296alive_bars.clr(idx);4297}4298}42994300if (wr < 6)4301alive_bars.set(wr);4302if (rd < 6)4303alive_bars.set(rd);4304}43054306return true;4307}43084309bool4310SchedDataCalculatorGM107::visit(Function *func)4311{4312ArrayList insns;43134314func->orderInstructions(insns);43154316scoreBoards.resize(func->cfg.getSize());4317for (size_t i = 0; i < scoreBoards.size(); ++i)4318scoreBoards[i].wipe();4319return true;4320}43214322bool4323SchedDataCalculatorGM107::visit(BasicBlock *bb)4324{4325Instruction *insn, *next = NULL;4326int cycle = 0;43274328for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {4329/*XXX*/4330insn->sched = 0x7e0;4331}43324333if (!debug_get_bool_option("NV50_PROG_SCHED", true))4334return true;43354336// Insert read/write dependency barriers for instructions which don't4337// operate at a fixed latency.4338insertBarriers(bb);43394340score = &scoreBoards.at(bb->getId());43414342for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {4343// back branches will wait until all target dependencies are satisfied4344if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized4345continue;4346BasicBlock *in = BasicBlock::get(ei.getNode());4347score->setMax(&scoreBoards.at(in->getId()));4348}43494350#ifdef GM107_DEBUG_SCHED_DATA4351INFO("=== BB:%i initial scores\n", bb->getId());4352score->print(cycle);4353#endif43544355// Because barriers are allocated locally (intra-BB), we have to make sure4356// that all produced barriers have been consumed before entering inside a4357// new basic block. The best way is to do a global allocation pre RA but4358// it's really more difficult, especially because of the phi nodes. Anyways,4359// it seems like that waiting on a barrier which has already been consumed4360// doesn't add any additional cost, it's just not elegant!4361Instruction *start = bb->getEntry();4362if (start && bb->cfg.incidentCount() > 0) {4363for (int b = 0; b < 6; b++)4364emitWtDepBar(start, b);4365}43664367for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {4368next = insn->next;43694370commitInsn(insn, cycle);4371int delay = calcDelay(next, cycle);4372setDelay(insn, delay, next);4373cycle += getStall(insn);43744375setReuseFlag(insn);43764377// XXX: The yield flag seems to destroy a bunch of things when it is4378// set on every instruction, need investigation.4379//emitYield(insn);43804381#ifdef GM107_DEBUG_SCHED_DATA4382printSchedInfo(cycle, insn);4383insn->print();4384next->print();4385#endif4386}43874388if (!insn)4389return true;4390commitInsn(insn, cycle);43914392int bbDelay = -1;43934394#ifdef GM107_DEBUG_SCHED_DATA4395fprintf(stderr, "last instruction is : ");4396insn->print();4397fprintf(stderr, "cycle=%d\n", cycle);4398#endif43994400for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {4401BasicBlock *out = BasicBlock::get(ei.getNode());44024403if (ei.getType() != Graph::Edge::BACK) {4404// Only test the first instruction of the outgoing block.4405next = out->getEntry();4406if (next) {4407bbDelay = MAX2(bbDelay, calcDelay(next, cycle));4408} else {4409// When the outgoing BB is empty, make sure to set the number of4410// stall counts needed by the instruction because we don't know the4411// next instruction.4412bbDelay = MAX2(bbDelay, targ->getLatency(insn));4413}4414} else {4415// Wait until all dependencies are satisfied.4416const int regsFree = score->getLatest();4417next = out->getFirst();4418for (int c = cycle; next && c < regsFree; next = next->next) {4419bbDelay = MAX2(bbDelay, calcDelay(next, c));4420c += getStall(next);4421}4422next = NULL;4423}4424}4425if (bb->cfg.outgoingCount() != 1)4426next = NULL;4427setDelay(insn, bbDelay, next);4428cycle += getStall(insn);44294430score->rebase(cycle); // common base for initializing out blocks' scores4431return true;4432}44334434/*******************************************************************************4435* main4436******************************************************************************/44374438void4439CodeEmitterGM107::prepareEmission(Function *func)4440{4441SchedDataCalculatorGM107 sched(targGM107);4442CodeEmitter::prepareEmission(func);4443sched.run(func, true, true);4444}44454446static inline uint32_t sizeToBundlesGM107(uint32_t size)4447{4448return (size + 23) / 24;4449}44504451void4452CodeEmitterGM107::prepareEmission(Program *prog)4453{4454for (ArrayList::Iterator fi = prog->allFuncs.iterator();4455!fi.end(); fi.next()) {4456Function *func = reinterpret_cast<Function *>(fi.get());4457func->binPos = prog->binSize;4458prepareEmission(func);44594460// adjust sizes & positions for schedulding info:4461if (prog->getTarget()->hasSWSched) {4462uint32_t adjPos = func->binPos;4463BasicBlock *bb = NULL;4464for (int i = 0; i < func->bbCount; ++i) {4465bb = func->bbArray[i];4466int32_t adjSize = bb->binSize;4467if (adjPos % 32) {4468adjSize -= 32 - adjPos % 32;4469if (adjSize < 0)4470adjSize = 0;4471}4472adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;4473bb->binPos = adjPos;4474bb->binSize = adjSize;4475adjPos += adjSize;4476}4477if (bb)4478func->binSize = adjPos - func->binPos;4479}44804481prog->binSize += func->binSize;4482}4483}44844485CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)4486: CodeEmitter(target),4487targGM107(target),4488progType(Program::TYPE_VERTEX),4489insn(NULL),4490writeIssueDelays(target->hasSWSched),4491data(NULL)4492{4493code = NULL;4494codeSize = codeSizeLimit = 0;4495relocInfo = NULL;4496}44974498CodeEmitter *4499TargetGM107::createCodeEmitterGM107(Program::Type type)4500{4501CodeEmitterGM107 *emit = new CodeEmitterGM107(this);4502emit->setProgramType(type);4503return emit;4504}45054506} // namespace nv50_ir450745084509