Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
4574 views
/*1* Copyright 2020 Red Hat Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/21#include "codegen/nv50_ir_emit_gv100.h"22#include "codegen/nv50_ir_sched_gm107.h"2324namespace nv50_ir {2526/*******************************************************************************27* instruction format helpers28******************************************************************************/2930#define FA_NODEF (1 << 0)31#define FA_RRR (1 << 1)32#define FA_RRI (1 << 2)33#define FA_RRC (1 << 3)34#define FA_RIR (1 << 4)35#define FA_RCR (1 << 5)3637#define FA_SRC_MASK 0x0ff38#define FA_SRC_NEG 0x10039#define FA_SRC_ABS 0x2004041#define EMPTY -142#define __(a) (a) // no source modifiers43#define _A(a) ((a) | FA_SRC_ABS)44#define N_(a) ((a) | FA_SRC_NEG)45#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)4647void48CodeEmitterGV100::emitFormA_I32(int src)49{50emitIMMD(32, 32, insn->src(src));51if (insn->src(src).mod.abs())52code[1] &= 0x7fffffff;53if (insn->src(src).mod.neg())54code[1] ^= 0x80000000;55}5657void58CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)59{60emitInsn(op);61if (src1 >= 0) {62emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));63emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));64emitGPR (64, insn->src(src1 & FA_SRC_MASK));65}66if (src2 >= 0) {67emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));68emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));69emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));70}71}7273void74CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)75{76emitInsn(op);77if (src1 >= 0) {78emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));79emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));80emitGPR (64, insn->src(src1 & FA_SRC_MASK));81}82if (src2 >= 0)83emitFormA_I32(src2 & FA_SRC_MASK);84}8586void87CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)88{89emitInsn(op);90if (src2 >= 0) {91emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));92emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));93emitGPR (64, insn->src(src2 & FA_SRC_MASK));94}9596if (src1 >= 0) {97emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));98emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));99emitGPR (32, insn->src(src1 & FA_SRC_MASK));100}101}102103void104CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,105int src0, int src1, int src2)106{107switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {108case FILE_GPR:109switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {110case FILE_GPR:111assert(forms & FA_RRR);112emitFormA_RRR((1 << 9) | op, src1, src2);113break;114case FILE_IMMEDIATE:115assert(forms & FA_RRI);116emitFormA_RRI((2 << 9) | op, src1, src2);117break;118case FILE_MEMORY_CONST:119assert(forms & FA_RRC);120emitFormA_RRC((3 << 9) | op, src1, src2);121break;122default:123assert(!"bad src2 file");124break;125}126break;127case FILE_IMMEDIATE:128assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);129assert(forms & FA_RIR);130emitFormA_RRI((4 << 9) | op, src2, src1);131break;132case FILE_MEMORY_CONST:133assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);134assert(forms & FA_RCR);135emitFormA_RRC((5 << 9) | op, src2, src1);136break;137default:138assert(!"bad src1 file");139break;140}141142if (src0 >= 0) {143assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);144emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));145emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));146emitGPR(24, insn->src(src0 & FA_SRC_MASK));147}148149if (!(forms & FA_NODEF))150emitGPR(16, insn->def(0));151}152153/*******************************************************************************154* control155******************************************************************************/156157void158CodeEmitterGV100::emitBRA()159{160const FlowInstruction *insn = this->insn->asFlow();161int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;162163assert(!insn->indirect && !insn->absolute);164165emitInsn (0x947);166emitField(34, 48, target);167emitPRED (87);168emitField(86, 2, 0); // ./.INC/.DEC169}170171void172CodeEmitterGV100::emitEXIT()173{174emitInsn (0x94d);175emitNOT (90);176emitPRED (87);177emitField(85, 1, 0); // .NO_ATEXIT178emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3179}180181void182CodeEmitterGV100::emitKILL()183{184emitInsn(0x95b);185emitPRED(87);186}187188void189CodeEmitterGV100::emitNOP()190{191emitInsn(0x918);192}193194void195CodeEmitterGV100::emitWARPSYNC()196{197emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);198emitNOT (90);199emitPRED (87);200}201202/*******************************************************************************203* movement / conversion204******************************************************************************/205206void207CodeEmitterGV100::emitCS2R()208{209emitInsn(0x805);210emitSYS (72, insn->src(0));211emitGPR (16, insn->def(0));212}213214void215CodeEmitterGV100::emitF2F()216{217if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)218emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);219else220emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);221emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));222emitFMZ (80, 1);223emitRND (78);224emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));225emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3226}227228void229CodeEmitterGV100::emitF2I()230{231if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)232emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);233else234emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);235emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));236emitFMZ (80, 1);237emitRND (78);238emitField(77, 1, 0); // .NTZ239emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));240emitField(72, 1, isSignedType(insn->dType));241}242243void244CodeEmitterGV100::emitFRND()245{246int subop = 0;247248switch (insn->op) {249case OP_CVT:250switch (insn->rnd) {251case ROUND_NI: subop = 0; break;252case ROUND_MI: subop = 1; break;253case ROUND_PI: subop = 2; break;254case ROUND_ZI: subop = 3; break;255default:256assert(!"invalid FRND mode");257break;258}259break;260case OP_FLOOR: subop = 1; break;261case OP_CEIL : subop = 2; break;262case OP_TRUNC: subop = 3; break;263default:264assert(!"invalid FRND opcode");265break;266}267268if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)269emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);270else271emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);272emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));273emitFMZ (80, 1);274emitField(78, 2, subop);275emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));276}277278void279CodeEmitterGV100::emitI2F()280{281if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)282emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);283else284emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);285emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));286emitRND (78);287emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));288emitField(74, 1, isSignedType(insn->sType));289if (typeSizeof(insn->sType) == 2)290emitField(60, 2, insn->subOp >> 1);291else292emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3293}294295void296CodeEmitterGV100::emitMOV()297{298switch (insn->def(0).getFile()) {299case FILE_GPR:300switch (insn->src(0).getFile()) {301case FILE_GPR:302case FILE_MEMORY_CONST:303case FILE_IMMEDIATE:304emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);305emitField(72, 4, insn->lanes);306break;307case FILE_PREDICATE:308emitInsn (0x807);309emitGPR (16, insn->def(0));310emitGPR (24);311emitField(32, 32, 0xffffffff);312emitField(90, 1, 1);313emitPRED (87, insn->src(0));314break;315case FILE_BARRIER:316case FILE_THREAD_STATE:317emitInsn (0x355);318emitBTS (24, insn->src(0));319emitGPR (16, insn->def(0));320break;321default:322assert(!"bad src file");323break;324}325break;326case FILE_PREDICATE:327emitInsn (0x20c);328emitPRED (87);329emitPRED (84);330emitNOT (71);331emitPRED (68);332emitPRED (81, insn->def(0));333emitCond3(76, CC_NE);334emitGPR (24, insn->src(0));335emitGPR (32);336break;337case FILE_BARRIER:338case FILE_THREAD_STATE:339switch (insn->src(0).getFile()) {340case FILE_GPR:341emitInsn (0x356);342emitGPR (32, insn->src(0));343emitBTS (24, insn->def(0));344break;345case FILE_BARRIER:346emitInsn (0xf56);347emitBTS (24, insn->def(0));348emitBTS (16, insn->src(0));349break;350case FILE_THREAD_STATE:351assert(insn->def(0).getFile() == FILE_BARRIER);352emitInsn (0xf55);353emitBTS (24, insn->src(0));354emitBTS (16, insn->def(0));355break;356default:357assert(!"bad src file");358break;359}360emitField(84, 1, insn->getDef(0)->reg.data.ts == TS_PQUAD_MACTIVE ? 1 : 0);361break;362default:363assert(!"bad dst file");364break;365}366}367368void369CodeEmitterGV100::emitPRMT()370{371emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));372emitField(72, 3, insn->subOp);373}374375void376CodeEmitterGV100::emitS2R()377{378emitInsn(0x919);379emitSYS (72, insn->src(0));380emitGPR (16, insn->def(0));381}382383void384gv100_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)385{386int loc = entry->loc;387bool val = false;388switch (entry->ipa) {389case 0:390val = data.force_persample_interp;391break;392case 1:393val = data.msaa;394break;395}396if (val)397code[loc + 2] |= 1 << 26;398else399code[loc + 2] &= ~(1 << 26);400}401402void403CodeEmitterGV100::emitSEL()404{405emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);406emitNOT (90, insn->src(2));407emitPRED (87, insn->src(2));408if (insn->subOp >= 1)409addInterp(insn->subOp - 1, 0, gv100_selpFlip);410}411412void413CodeEmitterGV100::emitSHFL()414{415switch (insn->src(1).getFile()) {416case FILE_GPR:417switch (insn->src(2).getFile()) {418case FILE_GPR:419emitInsn(0x389);420emitGPR (64, insn->src(2));421break;422case FILE_IMMEDIATE:423emitInsn(0x589);424emitIMMD(40, 13, insn->src(2));425break;426default:427assert(!"bad src2 file");428break;429}430emitGPR(32, insn->src(1));431break;432case FILE_IMMEDIATE:433switch (insn->src(2).getFile()) {434case FILE_GPR:435emitInsn(0x989);436emitGPR (64, insn->src(2));437break;438case FILE_IMMEDIATE:439emitInsn(0xf89);440emitIMMD(40, 13, insn->src(2));441break;442default:443assert(!"bad src2 file");444break;445}446emitIMMD(53, 5, insn->src(1));447break;448default:449assert(!"bad src1 file");450break;451}452453if (insn->defExists(1))454emitPRED(81, insn->def(1));455else456emitPRED(81);457458emitField(58, 2, insn->subOp);459emitGPR (24, insn->src(0));460emitGPR (16, insn->def(0));461}462463/*******************************************************************************464* fp32465******************************************************************************/466467void468CodeEmitterGV100::emitFADD()469{470if (insn->src(1).getFile() == FILE_GPR)471emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY);472else473emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));474emitFMZ (80, 1);475emitRND (78);476emitSAT (77);477}478479void480CodeEmitterGV100::emitFFMA()481{482emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));483emitField(80, 1, insn->ftz);484emitRND (78);485emitSAT (77);486emitField(76, 1, insn->dnz);487}488489void490CodeEmitterGV100::emitFMNMX()491{492emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);493emitField(90, 1, insn->op == OP_MAX);494emitPRED (87);495emitFMZ (80, 1);496}497498void499CodeEmitterGV100::emitFMUL()500{501emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);502emitField(80, 1, insn->ftz);503emitPDIV (84);504emitRND (78);505emitSAT (77);506emitField(76, 1, insn->dnz);507}508509void510CodeEmitterGV100::emitFSET_BF()511{512const CmpInstruction *insn = this->insn->asCmp();513514emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);515emitFMZ (80, 1);516emitCond4(76, insn->setCond);517518if (insn->op != OP_SET) {519switch (insn->op) {520case OP_SET_AND: emitField(74, 2, 0); break;521case OP_SET_OR : emitField(74, 2, 1); break;522case OP_SET_XOR: emitField(74, 2, 2); break;523default:524assert(!"invalid set op");525break;526}527emitNOT (90, insn->src(2));528emitPRED(87, insn->src(2));529} else {530emitPRED(87);531}532}533534void535CodeEmitterGV100::emitFSETP()536{537const CmpInstruction *insn = this->insn->asCmp();538539emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);540emitFMZ (80, 1);541emitCond4(76, insn->setCond);542543if (insn->op != OP_SET) {544switch (insn->op) {545case OP_SET_AND: emitField(74, 2, 0); break;546case OP_SET_OR : emitField(74, 2, 1); break;547case OP_SET_XOR: emitField(74, 2, 2); break;548default:549assert(!"invalid set op");550break;551}552emitNOT (90, insn->src(2));553emitPRED(87, insn->src(2));554} else {555emitPRED(87);556}557558if (insn->defExists(1))559emitPRED(84, insn->def(1));560else561emitPRED(84);562emitPRED(81, insn->def(0));563}564565void566CodeEmitterGV100::emitFSWZADD()567{568uint8_t subOp = 0;569570// NP/PN swapped vs SM60571for (int i = 0; i < 4; i++) {572uint8_t p = ((insn->subOp >> (i * 2)) & 3);573if (p == 1 || p == 2)574p ^= 3;575subOp |= p << (i * 2);576}577578emitInsn (0x822);579emitFMZ (80, 1);580emitRND (78);581emitField(77, 1, insn->lanes); /* abused for .ndv */582emitGPR (64, insn->src(1));583emitField(32, 8, subOp);584emitGPR (24, insn->src(0));585emitGPR (16, insn->def(0));586}587588void589CodeEmitterGV100::emitMUFU()590{591int mufu = 0;592593switch (insn->op) {594case OP_COS : mufu = 0; break;595case OP_SIN : mufu = 1; break;596case OP_EX2 : mufu = 2; break;597case OP_LG2 : mufu = 3; break;598case OP_RCP : mufu = 4 + 2 * insn->subOp; break;599case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;600case OP_SQRT: mufu = 8; break;601default:602assert(!"invalid mufu");603break;604}605606emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);607emitField(74, 4, mufu);608}609610/*******************************************************************************611* fp64612******************************************************************************/613614void615CodeEmitterGV100::emitDADD()616{617emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));618emitRND(78);619}620621void622CodeEmitterGV100::emitDFMA()623{624emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));625emitRND(78);626}627628void629CodeEmitterGV100::emitDMUL()630{631emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);632emitRND(78);633}634635void636CodeEmitterGV100::emitDSETP()637{638const CmpInstruction *insn = this->insn->asCmp();639640if (insn->src(1).getFile() == FILE_GPR)641emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY);642else643emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));644645if (insn->op != OP_SET) {646switch (insn->op) {647case OP_SET_AND: emitField(74, 2, 0); break;648case OP_SET_OR : emitField(74, 2, 1); break;649case OP_SET_XOR: emitField(74, 2, 2); break;650default:651assert(!"invalid set op");652break;653}654emitNOT (90, insn->src(2));655emitPRED(87, insn->src(2));656} else {657emitPRED(87);658}659660if (insn->defExists(1))661emitPRED(84, insn->def(1));662else663emitPRED(84);664emitPRED (81, insn->def(0));665emitCond4(76, insn->setCond);666}667668/*******************************************************************************669* integer670******************************************************************************/671672void673CodeEmitterGV100::emitBMSK()674{675emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);676emitField(75, 1, insn->subOp); // .C/.W677}678679void680CodeEmitterGV100::emitBREV()681{682emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);683}684685void686CodeEmitterGV100::emitFLO()687{688emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);689emitPRED (81);690emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);691emitField(73, 1, isSignedType(insn->dType));692emitNOT (63, insn->src(0));693}694695void696CodeEmitterGV100::emitIABS()697{698emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);699}700701void702CodeEmitterGV100::emitIADD3()703{704// emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));705emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);706emitGPR (64); //XXX: fix when switching back to N_(2)707emitPRED (84, NULL); // .CC1708emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);709if (insn->flagsSrc >= 0) {710emitField(74, 1, 1); // .X711emitPRED (87, insn->getSrc(insn->flagsSrc));712emitField(77, 4, 0xf); // .X1713}714}715716void717CodeEmitterGV100::emitIMAD()718{719emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));720emitField(73, 1, isSignedType(insn->sType));721}722723void724CodeEmitterGV100::emitIMAD_WIDE()725{726emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));727emitPRED (81);728emitField(73, 1, isSignedType(insn->sType));729}730731void732CodeEmitterGV100::emitISETP()733{734const CmpInstruction *insn = this->insn->asCmp();735736emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);737738if (insn->op != OP_SET) {739switch (insn->op) {740case OP_SET_AND: emitField(74, 2, 0); break;741case OP_SET_OR : emitField(74, 2, 1); break;742case OP_SET_XOR: emitField(74, 2, 2); break;743default:744assert(!"invalid set op");745break;746}747emitNOT (90, insn->src(2));748emitPRED(87, insn->src(2));749} else {750emitPRED(87);751}752753//XXX: CC->pred754if (insn->flagsSrc >= 0) {755assert(0);756emitField(68, 4, 6);757} else {758emitNOT (71);759if (!insn->subOp)760emitPRED(68);761}762763if (insn->defExists(1))764emitPRED(84, insn->def(1));765else766emitPRED(84);767emitPRED (81, insn->def(0));768emitCond3(76, insn->setCond);769emitField(73, 1, isSignedType(insn->sType));770771if (insn->subOp) { // .EX772assert(0);773emitField(72, 1, 1);774emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));775}776}777778void779CodeEmitterGV100::emitLEA()780{781assert(insn->src(1).get()->asImm());782783emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);784emitPRED (81);785emitIMMD (75, 5, insn->src(1));786emitGPR (64);787}788789void790CodeEmitterGV100::emitLOP3_LUT()791{792emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));793emitField(90, 1, 1);794emitPRED (87);795emitPRED (81);796emitField(80, 1, 0); // .PAND797emitField(72, 8, insn->subOp);798}799800void801CodeEmitterGV100::emitPOPC()802{803emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);804emitNOT (63, insn->src(0));805}806807void808CodeEmitterGV100::emitSGXT()809{810emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);811emitField(75, 1, 0); // .W812emitField(73, 1, 1); // /.U32813}814815void816CodeEmitterGV100::emitSHF()817{818emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));819emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));820emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));821emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));822823switch (insn->sType) {824case TYPE_S64: emitField(73, 2, 0); break;825case TYPE_U64: emitField(73, 2, 1); break;826case TYPE_S32: emitField(73, 2, 2); break;827case TYPE_U32:828default:829emitField(73, 2, 3);830break;831}832}833834/*******************************************************************************835* load/stores836******************************************************************************/837838void839CodeEmitterGV100::emitALD()840{841emitInsn (0x321);842emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);843emitGPR (32, insn->src(0).getIndirect(1));844emitO (79);845emitP (76);846emitADDR (24, 40, 10, 0, insn->src(0));847emitGPR (16, insn->def(0));848}849850void851CodeEmitterGV100::emitAST()852{853emitInsn (0x322);854emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);855emitGPR (64, insn->src(0).getIndirect(1));856emitP (76);857emitADDR (24, 40, 10, 0, insn->src(0));858emitGPR (32, insn->src(1));859}860861void862CodeEmitterGV100::emitATOM()863{864unsigned subOp, dType;865866if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {867emitInsn(0x38a);868869if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)870subOp = 8;871else872subOp = insn->subOp;873emitField(87, 4, subOp);874875switch (insn->dType) {876case TYPE_U32 : dType = 0; break;877case TYPE_S32 : dType = 1; break;878case TYPE_U64 : dType = 2; break;879case TYPE_F32 : dType = 3; break;880case TYPE_B128: dType = 4; break;881case TYPE_S64 : dType = 5; break;882default:883assert(!"unexpected dType");884dType = 0;885break;886}887emitField(73, 3, dType);888} else {889emitInsn(0x38b);890891switch (insn->dType) {892case TYPE_U32: dType = 0; break;893case TYPE_U64: dType = 2; break;894default:895assert(!"unexpected dType");896dType = 0;897break;898}899emitField(73, 3, dType);900emitGPR (64, insn->src(2));901}902903emitPRED (81);904emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3905emitField(77, 2, 3); // .CTA/.SM/.GPU/.SYS906emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);907emitGPR (32, insn->src(1));908emitADDR (24, 40, 24, 0, insn->src(0));909emitGPR (16, insn->def(0));910}911912void913CodeEmitterGV100::emitATOMS()914{915unsigned dType, subOp;916917if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {918switch (insn->dType) {919case TYPE_U32: dType = 0; break;920case TYPE_S32: dType = 1; break;921case TYPE_U64: dType = 2; break;922default: assert(!"unexpected dType"); dType = 0; break;923}924925emitInsn (0x38d);926emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST927emitField(73, 2, dType);928emitGPR (64, insn->src(2));929} else {930emitInsn(0x38c);931932if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)933subOp = 8;934else935subOp = insn->subOp;936emitField(87, 4, subOp);937938switch (insn->dType) {939case TYPE_U32: dType = 0; break;940case TYPE_S32: dType = 1; break;941case TYPE_U64: dType = 2; break;942default: assert(!"unexpected dType"); dType = 0; break;943}944945emitField(73, 2, dType);946}947948emitGPR (32, insn->src(1));949emitADDR (24, 40, 24, 0, insn->src(0));950emitGPR (16, insn->def(0));951}952953void954gv100_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)955{956int ipa = entry->ipa;957int loc = entry->loc;958959if (data.force_persample_interp &&960(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&961(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {962ipa |= NV50_IR_INTERP_CENTROID;963}964965int sample;966switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) {967case NV50_IR_INTERP_DEFAULT : sample = 0; break;968case NV50_IR_INTERP_CENTROID: sample = 1; break;969case NV50_IR_INTERP_OFFSET : sample = 2; break;970default: unreachable("invalid sample mode");971}972973int interp;974switch (ipa & NV50_IR_INTERP_MODE_MASK) {975case NV50_IR_INTERP_LINEAR :976case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break;977case NV50_IR_INTERP_FLAT : interp = 1; break;978case NV50_IR_INTERP_SC : interp = 2; break;979default: unreachable("invalid ipa mode");980}981982code[loc + 2] &= ~(0xf << 12);983code[loc + 2] |= sample << 12;984code[loc + 2] |= interp << 14;985}986987void988CodeEmitterGV100::emitIPA()989{990emitInsn (0x326);991emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);992993switch (insn->getInterpMode()) {994case NV50_IR_INTERP_LINEAR :995case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;996case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break;997case NV50_IR_INTERP_SC : emitField(78, 2, 2); break;998default:999assert(!"invalid ipa mode");1000break;1001}10021003switch (insn->getSampleMode()) {1004case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;1005case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;1006case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break;1007default:1008assert(!"invalid sample mode");1009break;1010}10111012if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {1013emitGPR (32);1014addInterp(insn->ipa, 0xff, gv100_interpApply);1015} else {1016emitGPR (32, insn->src(1));1017addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gv100_interpApply);1018}10191020assert(!insn->src(0).isIndirect(0));1021emitADDR (-1, 64, 8, 2, insn->src(0));1022emitGPR (16, insn->def(0));1023}10241025void1026CodeEmitterGV100::emitISBERD()1027{1028emitInsn(0x923);1029emitGPR (24, insn->src(0));1030emitGPR (16, insn->def(0));1031}10321033void1034CodeEmitterGV100::emitLDSTc(int posm, int poso)1035{1036int mode = 0;1037int order = 1;10381039switch (insn->cache) {1040case CACHE_CA: mode = 0; order = 1; break;1041case CACHE_CG: mode = 2; order = 2; break;1042case CACHE_CV: mode = 3; order = 2; break;1043default:1044assert(!"invalid caching mode");1045break;1046}10471048emitField(poso, 2, order);1049emitField(posm, 2, mode);1050}10511052void1053CodeEmitterGV100::emitLDSTs(int pos, DataType type)1054{1055int data = 0;10561057switch (typeSizeof(type)) {1058case 1: data = isSignedType(type) ? 1 : 0; break;1059case 2: data = isSignedType(type) ? 3 : 2; break;1060case 4: data = 4; break;1061case 8: data = 5; break;1062case 16: data = 6; break;1063default:1064assert(!"bad type");1065break;1066}10671068emitField(pos, 3, data);1069}10701071void1072CodeEmitterGV100::emitLD()1073{1074emitInsn (0x980);1075emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO1076emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS1077emitLDSTs(73, insn->dType);1078emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);1079emitADDR (24, 32, 32, 0, insn->src(0));1080emitGPR (16, insn->def(0));1081}10821083void1084CodeEmitterGV100::emitLDC()1085{1086emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);1087emitField(78, 2, insn->subOp);1088emitLDSTs(73, insn->dType);1089emitGPR (24, insn->src(0).getIndirect(0));1090}10911092void1093CodeEmitterGV100::emitLDL()1094{1095emitInsn (0x983);1096emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID71097emitLDSTs(73, insn->dType);1098emitADDR (24, 40, 24, 0, insn->src(0));1099emitGPR (16, insn->def(0));1100}11011102void1103CodeEmitterGV100::emitLDS()1104{1105emitInsn (0x984);1106emitLDSTs(73, insn->dType);1107emitADDR (24, 40, 24, 0, insn->src(0));1108emitGPR (16, insn->def(0));1109}11101111void1112CodeEmitterGV100::emitOUT()1113{1114const int cut = insn->op == OP_RESTART || insn->subOp;1115const int emit = insn->op == OP_EMIT;11161117if (insn->op != OP_FINAL)1118emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);1119else1120emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);1121emitField(78, 2, (cut << 1) | emit);1122}11231124void1125CodeEmitterGV100::emitRED()1126{1127unsigned dType;11281129switch (insn->dType) {1130case TYPE_U32: dType = 0; break;1131case TYPE_S32: dType = 1; break;1132case TYPE_U64: dType = 2; break;1133case TYPE_F32: dType = 3; break;1134case TYPE_B128: dType = 4; break;1135case TYPE_S64: dType = 5; break;1136default: assert(!"unexpected dType"); dType = 0; break;1137}11381139emitInsn (0x98e);1140emitField(87, 3, insn->subOp);1141emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA1142emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID31143emitField(77, 2, 3); // .CTA/.SM/.GPU/.SYS1144emitField(73, 3, dType);1145emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);1146emitGPR (32, insn->src(1));1147emitADDR (24, 40, 24, 0, insn->src(0));1148}11491150void1151CodeEmitterGV100::emitST()1152{1153emitInsn (0x385);1154emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO1155emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS1156emitLDSTs(73, insn->dType);1157emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);1158emitGPR (64, insn->src(1));1159emitADDR (24, 32, 32, 0, insn->src(0));1160}11611162void1163CodeEmitterGV100::emitSTL()1164{1165emitInsn (0x387);1166emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID71167emitLDSTs(73, insn->dType);1168emitADDR (24, 40, 24, 0, insn->src(0));1169emitGPR (32, insn->src(1));1170}11711172void1173CodeEmitterGV100::emitSTS()1174{1175emitInsn (0x388);1176emitLDSTs(73, insn->dType);1177emitADDR (24, 40, 24, 0, insn->src(0));1178emitGPR (32, insn->src(1));1179}11801181/*******************************************************************************1182* texture1183******************************************************************************/11841185void1186CodeEmitterGV100::emitTEXs(int pos)1187{1188int src1 = insn->predSrc == 1 ? 2 : 1;1189if (insn->srcExists(src1))1190emitGPR(pos, insn->src(src1));1191else1192emitGPR(pos);1193}11941195void1196CodeEmitterGV100::emitTEX()1197{1198const TexInstruction *insn = this->insn->asTex();1199int lodm = 0;12001201if (!insn->tex.levelZero) {1202switch (insn->op) {1203case OP_TEX: lodm = 0; break;1204case OP_TXB: lodm = 2; break;1205case OP_TXL: lodm = 3; break;1206default:1207assert(!"invalid tex op");1208break;1209}1210} else {1211lodm = 1;1212}12131214if (insn->tex.rIndirectSrc < 0) {1215emitInsn (0xb60);1216emitField(54, 5, prog->driver->io.auxCBSlot);1217emitField(40, 14, insn->tex.r);1218} else {1219emitInsn (0x361);1220emitField(59, 1, 1); // .B1221}1222emitField(90, 1, insn->tex.liveOnly); // .NODEP1223emitField(87, 3, lodm);1224emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA1225emitField(78, 1, insn->tex.target.isShadow()); // .DC1226emitField(77, 1, insn->tex.derivAll); // .NDV1227emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI1228emitPRED (81);1229emitGPR (64, insn->def(1));1230emitGPR (16, insn->def(0));1231emitGPR (24, insn->src(0));1232emitTEXs (32);1233emitField(63, 1, insn->tex.target.isArray());1234emitField(61, 2, insn->tex.target.isCube() ? 3 :1235insn->tex.target.getDim() - 1);1236emitField(72, 4, insn->tex.mask);1237}12381239void1240CodeEmitterGV100::emitTLD()1241{1242const TexInstruction *insn = this->insn->asTex();12431244if (insn->tex.rIndirectSrc < 0) {1245emitInsn (0xb66);1246emitField(54, 5, prog->driver->io.auxCBSlot);1247emitField(40, 14, insn->tex.r);1248} else {1249emitInsn (0x367);1250emitField(59, 1, 1); // .B1251}1252emitField(90, 1, insn->tex.liveOnly);1253emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);1254emitPRED (81);1255emitField(78, 1, insn->tex.target.isMS());1256emitField(76, 1, insn->tex.useOffsets == 1);1257emitField(72, 4, insn->tex.mask);1258emitGPR (64, insn->def(1));1259emitField(63, 1, insn->tex.target.isArray());1260emitField(61, 2, insn->tex.target.isCube() ? 3 :1261insn->tex.target.getDim() - 1);1262emitTEXs (32);1263emitGPR (24, insn->src(0));1264emitGPR (16, insn->def(0));1265}12661267void1268CodeEmitterGV100::emitTLD4()1269{1270const TexInstruction *insn = this->insn->asTex();12711272int offsets = 0;1273switch (insn->tex.useOffsets) {1274case 4: offsets = 2; break;1275case 1: offsets = 1; break;1276case 0: offsets = 0; break;1277default: assert(!"invalid offsets count"); break;1278}12791280if (insn->tex.rIndirectSrc < 0) {1281emitInsn (0xb63);1282emitField(54, 5, prog->driver->io.auxCBSlot);1283emitField(40, 14, insn->tex.r);1284} else {1285emitInsn (0x364);1286emitField(59, 1, 1); // .B1287}1288emitField(90, 1, insn->tex.liveOnly);1289emitField(87, 2, insn->tex.gatherComp);1290emitField(84, 1, 1); // !.EF1291emitPRED (81);1292emitField(78, 1, insn->tex.target.isShadow());1293emitField(76, 2, offsets);1294emitField(72, 4, insn->tex.mask);1295emitGPR (64, insn->def(1));1296emitField(63, 1, insn->tex.target.isArray());1297emitField(61, 2, insn->tex.target.isCube() ? 3 :1298insn->tex.target.getDim() - 1);1299emitTEXs (32);1300emitGPR (24, insn->src(0));1301emitGPR (16, insn->def(0));1302}13031304void1305CodeEmitterGV100::emitTMML()1306{1307const TexInstruction *insn = this->insn->asTex();13081309if (insn->tex.rIndirectSrc < 0) {1310emitInsn (0xb69);1311emitField(54, 5, prog->driver->io.auxCBSlot);1312emitField(40, 14, insn->tex.r);1313} else {1314emitInsn (0x36a);1315emitField(59, 1, 1); // .B1316}1317emitField(90, 1, insn->tex.liveOnly);1318emitField(77, 1, insn->tex.derivAll);1319emitField(72, 4, insn->tex.mask);1320emitGPR (64, insn->def(1));1321emitField(63, 1, insn->tex.target.isArray());1322emitField(61, 2, insn->tex.target.isCube() ? 3 :1323insn->tex.target.getDim() - 1);1324emitTEXs (32);1325emitGPR (24, insn->src(0));1326emitGPR (16, insn->def(0));1327}13281329void1330CodeEmitterGV100::emitTXD()1331{1332const TexInstruction *insn = this->insn->asTex();13331334if (insn->tex.rIndirectSrc < 0) {1335emitInsn (0xb6c);1336emitField(54, 5, prog->driver->io.auxCBSlot);1337emitField(40, 14, insn->tex.r);1338} else {1339emitInsn (0x36d);1340emitField(59, 1, 1); // .B1341}1342emitField(90, 1, insn->tex.liveOnly);1343emitPRED (81);1344emitField(76, 1, insn->tex.useOffsets == 1);1345emitField(72, 4, insn->tex.mask);1346emitGPR (64, insn->def(1));1347emitField(63, 1, insn->tex.target.isArray());1348emitField(61, 2, insn->tex.target.isCube() ? 3 :1349insn->tex.target.getDim() - 1);1350emitTEXs (32);1351emitGPR (24, insn->src(0));1352emitGPR (16, insn->def(0));1353}13541355void1356CodeEmitterGV100::emitTXQ()1357{1358const TexInstruction *insn = this->insn->asTex();1359int type = 0;13601361switch (insn->tex.query) {1362case TXQ_DIMS : type = 0x00; break;1363case TXQ_TYPE : type = 0x01; break;1364case TXQ_SAMPLE_POSITION: type = 0x02; break;1365default:1366assert(!"invalid txq query");1367break;1368}13691370if (insn->tex.rIndirectSrc < 0) {1371emitInsn (0xb6f);1372emitField(54, 5, prog->driver->io.auxCBSlot);1373emitField(40, 14, insn->tex.r);1374} else {1375emitInsn (0x370);1376emitField(59, 1, 1); // .B1377}1378emitField(90, 1, insn->tex.liveOnly);1379emitField(72, 4, insn->tex.mask);1380emitGPR (64, insn->def(1));1381emitField(62, 2, type);1382emitGPR (24, insn->src(0));1383emitGPR (16, insn->def(0));1384}13851386/*******************************************************************************1387* surface1388******************************************************************************/13891390void1391CodeEmitterGV100::emitSUHandle(const int s)1392{1393const TexInstruction *insn = this->insn->asTex();13941395assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);13961397if (insn->src(s).getFile() == FILE_GPR) {1398emitGPR(64, insn->src(s));1399} else {1400assert(0);1401//XXX: not done1402ImmediateValue *imm = insn->getSrc(s)->asImm();1403assert(imm);1404emitField(0x33, 1, 1);1405emitField(0x24, 13, imm->reg.data.u32);1406}1407}14081409void1410CodeEmitterGV100::emitSUTarget()1411{1412const TexInstruction *insn = this->insn->asTex();1413int target = 0;14141415assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);14161417if (insn->tex.target == TEX_TARGET_BUFFER) {1418target = 1;1419} else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {1420target = 2;1421} else if (insn->tex.target == TEX_TARGET_2D ||1422insn->tex.target == TEX_TARGET_RECT) {1423target = 3;1424} else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||1425insn->tex.target == TEX_TARGET_CUBE ||1426insn->tex.target == TEX_TARGET_CUBE_ARRAY) {1427target = 4;1428} else if (insn->tex.target == TEX_TARGET_3D) {1429target = 5;1430} else {1431assert(insn->tex.target == TEX_TARGET_1D);1432}1433emitField(61, 3, target);1434}14351436void1437CodeEmitterGV100::emitSUATOM()1438{1439const TexInstruction *insn = this->insn->asTex();1440uint8_t type = 0, subOp;14411442if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)1443emitInsn(0x396); // SUATOM.D.CAS1444else1445emitInsn(0x394); // SUATOM.D14461447emitSUTarget();14481449// destination type1450switch (insn->dType) {1451case TYPE_S32: type = 1; break;1452case TYPE_U64: type = 2; break;1453case TYPE_F32: type = 3; break;1454case TYPE_S64: type = 5; break;1455default:1456assert(insn->dType == TYPE_U32);1457break;1458}14591460// atomic operation1461if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {1462subOp = 0;1463} else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {1464subOp = 8;1465} else {1466subOp = insn->subOp;1467}14681469emitField(87, 4, subOp);1470emitPRED (81);1471emitField(79, 2, 1);1472emitField(73, 3, type);1473emitField(72, 1, 0); // .BA1474emitGPR (32, insn->src(1));1475emitGPR (24, insn->src(0));1476emitGPR (16, insn->def(0));14771478emitSUHandle(2);1479}14801481void1482CodeEmitterGV100::emitSULD()1483{1484const TexInstruction *insn = this->insn->asTex();1485int type = 0;14861487if (insn->op == OP_SULDB) {1488emitInsn(0x99a);1489emitSUTarget();14901491switch (insn->dType) {1492case TYPE_U8: type = 0; break;1493case TYPE_S8: type = 1; break;1494case TYPE_U16: type = 2; break;1495case TYPE_S16: type = 3; break;1496case TYPE_U32: type = 4; break;1497case TYPE_U64: type = 5; break;1498case TYPE_B128: type = 6; break;1499default:1500assert(0);1501break;1502}1503emitField(73, 3, type);1504} else {1505emitInsn(0x998);1506emitSUTarget();1507emitField(72, 4, 0xf); // rgba1508}15091510emitPRED (81);1511emitLDSTc(77, 79);15121513emitGPR (16, insn->def(0));1514emitGPR (24, insn->src(0));15151516emitSUHandle(1);1517}15181519void1520CodeEmitterGV100::emitSUST()1521{1522const TexInstruction *insn = this->insn->asTex();15231524emitInsn(0x99c); // SUST.P1525#if 01526if (insn->op == OP_SUSTB)1527emitField(0x34, 1, 1);1528#endif1529emitSUTarget();15301531emitLDSTc(77, 79);1532emitField(72, 4, 0xf); // rgba1533emitGPR(32, insn->src(1));1534emitGPR(24, insn->src(0));1535emitSUHandle(2);1536}15371538/*******************************************************************************1539* misc1540******************************************************************************/15411542void1543CodeEmitterGV100::emitAL2P()1544{1545emitInsn (0x920);1546emitO (79);1547emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);1548emitField(40, 11, insn->src(0).get()->reg.data.offset);1549emitGPR (24, insn->src(0).getIndirect(0));1550emitGPR (16, insn->def(0));1551}15521553void1554CodeEmitterGV100::emitBAR()1555{1556uint8_t subop, redop = 0x00;15571558// 801559// 01: DEFER_BLOCKING1560// 78:771561// 00: SYNC1562// 01: ARV1563// 02: RED1564// 03: SCAN1565// 75:741566// 00: RED.POPC1567// 01: RED.AND1568// 02: RED.OR15691570switch (insn->subOp) {1571case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;1572case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;1573case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break;1574case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break;1575default:1576subop = 0x00;1577assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);1578break;1579}15801581if (insn->src(0).getFile() == FILE_GPR) {1582emitInsn ((1 << 9) | 0x11d);1583emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src11584} else {1585ImmediateValue *imm = insn->getSrc(0)->asImm();1586assert(imm);1587if (insn->src(1).getFile() == FILE_GPR) {1588emitInsn ((4 << 9) | 0x11d);1589emitGPR (32, insn->src(1));1590} else {1591emitInsn ((5 << 9) | 0x11d);1592}1593emitField(54, 4, imm->reg.data.u32);1594}15951596emitField(77, 2, subop);1597emitField(74, 2, redop);15981599if (insn->srcExists(2) && (insn->predSrc != 2)) {1600emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));1601emitPRED (87, insn->src(2));1602} else {1603emitField(87, 3, 7);1604}1605}16061607void1608CodeEmitterGV100::emitCCTL()1609{1610if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)1611emitInsn(0x98f);1612else1613emitInsn(0x990);1614emitField(87, 4, insn->subOp);1615emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);1616emitADDR (24, 32, 32, 0, insn->src(0));1617}16181619void1620CodeEmitterGV100::emitMEMBAR()1621{1622emitInsn (0x992);1623switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {1624case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;1625case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;1626case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;1627default:1628assert(!"invalid scope");1629break;1630}1631}16321633void1634CodeEmitterGV100::emitPIXLD()1635{1636emitInsn (0x925);1637switch (insn->subOp) {1638case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK1639case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX1640default:1641assert(0);1642break;1643}1644emitPRED (71);1645emitGPR (16, insn->def(0));1646}16471648void1649CodeEmitterGV100::emitPLOP3_LUT()1650{1651uint8_t op[2] = {};16521653switch (insn->op) {1654case OP_AND: op[0] = 0xf0 & 0xcc; break;1655case OP_OR : op[0] = 0xf0 | 0xcc; break;1656case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;1657default:1658assert(!"invalid PLOP3");1659break;1660}16611662emitInsn(0x81c);1663emitNOT (90, insn->src(0));1664emitPRED(87, insn->src(0));1665emitPRED(84); // def(1)1666emitPRED(81, insn->def(0));1667emitNOT (80, insn->src(1));1668emitPRED(77, insn->src(1));1669emitField(72, 5, op[0] >> 3);1670emitNOT (71); // src(2)1671emitPRED(68); // src(2)1672emitField(64, 3, op[0] & 7);1673emitField(16, 8, op[1]);1674}16751676void1677CodeEmitterGV100::emitVOTE()1678{1679const ImmediateValue *imm;1680uint32_t u32;16811682int r = -1, p = -1;1683for (int i = 0; insn->defExists(i); i++) {1684if (insn->def(i).getFile() == FILE_GPR)1685r = i;1686else if (insn->def(i).getFile() == FILE_PREDICATE)1687p = i;1688}16891690emitInsn (0x806);1691emitField(72, 2, insn->subOp);1692if (r >= 0)1693emitGPR (16, insn->def(r));1694else1695emitGPR (16);1696if (p >= 0)1697emitPRED (81, insn->def(p));1698else1699emitPRED (81);17001701switch (insn->src(0).getFile()) {1702case FILE_PREDICATE:1703emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));1704emitPRED (87, insn->src(0));1705break;1706case FILE_IMMEDIATE:1707imm = insn->getSrc(0)->asImm();1708assert(imm);1709u32 = imm->reg.data.u32;1710assert(u32 == 0 || u32 == 1);1711emitField(90, 1, u32 == 0);1712emitPRED (87);1713break;1714default:1715assert(!"Unhandled src");1716break;1717}1718}17191720bool1721CodeEmitterGV100::emitInstruction(Instruction *i)1722{1723insn = i;17241725switch (insn->op) {1726case OP_ABS:1727assert(!isFloatType(insn->dType));1728emitIABS();1729break;1730case OP_ADD:1731if (isFloatType(insn->dType)) {1732if (insn->dType == TYPE_F32)1733emitFADD();1734else1735emitDADD();1736} else {1737emitIADD3();1738}1739break;1740case OP_AFETCH:1741emitAL2P();1742break;1743case OP_AND:1744case OP_OR:1745case OP_XOR:1746if (insn->def(0).getFile() == FILE_PREDICATE) {1747emitPLOP3_LUT();1748} else {1749assert(!"invalid logop");1750emitNOP();1751}1752break;1753case OP_ATOM:1754if (insn->src(0).getFile() == FILE_MEMORY_SHARED)1755emitATOMS();1756else1757if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)1758emitRED();1759else1760emitATOM();1761break;1762case OP_BAR:1763emitBAR();1764break;1765case OP_BFIND:1766emitFLO();1767break;1768case OP_BMSK:1769emitBMSK();1770break;1771case OP_BREV:1772emitBREV();1773break;1774case OP_BRA:1775case OP_JOIN: //XXX1776emitBRA();1777break;1778case OP_CCTL:1779emitCCTL();1780break;1781case OP_CEIL:1782case OP_CVT:1783case OP_FLOOR:1784case OP_TRUNC:1785if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||1786insn->def(0).getFile() == FILE_BARRIER ||1787insn->def(0).getFile() == FILE_THREAD_STATE ||1788insn->src(0).getFile() == FILE_PREDICATE ||1789insn->src(0).getFile() == FILE_BARRIER ||1790insn->src(0).getFile() == FILE_THREAD_STATE)) {1791emitMOV();1792} else if (isFloatType(insn->dType)) {1793if (isFloatType(insn->sType)) {1794if (insn->sType == insn->dType)1795emitFRND();1796else1797emitF2F();1798} else {1799emitI2F();1800}1801} else {1802if (isFloatType(insn->sType)) {1803emitF2I();1804} else {1805assert(!"I2I");1806emitNOP();1807}1808}1809break;1810case OP_COS:1811case OP_EX2:1812case OP_LG2:1813case OP_RCP:1814case OP_RSQ:1815case OP_SIN:1816case OP_SQRT:1817emitMUFU();1818break;1819case OP_DISCARD:1820emitKILL();1821break;1822case OP_EMIT:1823case OP_FINAL:1824case OP_RESTART:1825emitOUT();1826break;1827case OP_EXIT:1828emitEXIT();1829break;1830case OP_EXPORT:1831emitAST();1832break;1833case OP_FMA:1834case OP_MAD:1835if (isFloatType(insn->dType)) {1836if (insn->dType == TYPE_F32)1837emitFFMA();1838else1839emitDFMA();1840} else {1841if (typeSizeof(insn->dType) != 8)1842emitIMAD();1843else1844emitIMAD_WIDE();1845}1846break;1847case OP_JOINAT: //XXX1848emitNOP();1849break;1850case OP_LINTERP:1851emitIPA();1852break;1853case OP_LOAD:1854switch (insn->src(0).getFile()) {1855case FILE_MEMORY_CONST : emitLDC(); break;1856case FILE_MEMORY_LOCAL : emitLDL(); break;1857case FILE_MEMORY_SHARED: emitLDS(); break;1858case FILE_MEMORY_GLOBAL: emitLD(); break;1859default:1860assert(!"invalid load");1861emitNOP();1862break;1863}1864break;1865case OP_LOP3_LUT:1866emitLOP3_LUT();1867break;1868case OP_MAX:1869case OP_MIN:1870if (isFloatType(insn->dType)) {1871if (insn->dType == TYPE_F32) {1872emitFMNMX();1873} else {1874assert(!"invalid FMNMX");1875emitNOP();1876}1877} else {1878assert(!"invalid MNMX");1879emitNOP();1880}1881break;1882case OP_MEMBAR:1883emitMEMBAR();1884break;1885case OP_MOV:1886emitMOV();1887break;1888case OP_MUL:1889if (isFloatType(insn->dType)) {1890if (insn->dType == TYPE_F32)1891emitFMUL();1892else1893emitDMUL();1894} else {1895assert(!"invalid IMUL");1896emitNOP();1897}1898break;1899case OP_PERMT:1900emitPRMT();1901break;1902case OP_PFETCH:1903emitISBERD();1904break;1905case OP_PIXLD:1906emitPIXLD();1907break;1908case OP_POPCNT:1909emitPOPC();1910break;1911case OP_QUADOP:1912emitFSWZADD();1913break;1914case OP_RDSV:1915if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))1916emitCS2R();1917else1918emitS2R();1919break;1920case OP_SELP:1921emitSEL();1922break;1923case OP_SET:1924case OP_SET_AND:1925case OP_SET_OR:1926case OP_SET_XOR:1927if (insn->def(0).getFile() != FILE_PREDICATE) {1928if (isFloatType(insn->dType)) {1929if (insn->dType == TYPE_F32) {1930emitFSET_BF();1931} else {1932assert(!"invalid FSET");1933emitNOP();1934}1935} else {1936assert(!"invalid SET");1937emitNOP();1938}1939} else {1940if (isFloatType(insn->sType))1941if (insn->sType == TYPE_F64)1942emitDSETP();1943else1944emitFSETP();1945else1946emitISETP();1947}1948break;1949case OP_SGXT:1950emitSGXT();1951break;1952case OP_SHF:1953emitSHF();1954break;1955case OP_SHFL:1956emitSHFL();1957break;1958case OP_SHLADD:1959emitLEA();1960break;1961case OP_STORE:1962switch (insn->src(0).getFile()) {1963case FILE_MEMORY_LOCAL : emitSTL(); break;1964case FILE_MEMORY_SHARED: emitSTS(); break;1965case FILE_MEMORY_GLOBAL: emitST(); break;1966default:1967assert(!"invalid store");1968emitNOP();1969break;1970}1971break;1972case OP_SULDB:1973case OP_SULDP:1974emitSULD();1975break;1976case OP_SUREDB:1977case OP_SUREDP:1978emitSUATOM();1979break;1980case OP_SUSTB:1981case OP_SUSTP:1982emitSUST();1983break;1984case OP_TEX:1985case OP_TXB:1986case OP_TXL:1987emitTEX();1988break;1989case OP_TXD:1990emitTXD();1991break;1992case OP_TXF:1993emitTLD();1994break;1995case OP_TXG:1996emitTLD4();1997break;1998case OP_TXLQ:1999emitTMML();2000break;2001case OP_TXQ:2002emitTXQ();2003break;2004case OP_VFETCH:2005emitALD();2006break;2007case OP_VOTE:2008emitVOTE();2009break;2010case OP_WARPSYNC:2011emitWARPSYNC();2012break;2013default:2014assert(!"invalid opcode");2015emitNOP();2016break;2017}20182019code[3] &= 0x000001ff;2020code[3] |= insn->sched << 9;2021code += 4;2022codeSize += 16;2023return true;2024}20252026void2027CodeEmitterGV100::prepareEmission(BasicBlock *bb)2028{2029Function *func = bb->getFunction();2030Instruction *i;2031int j;20322033for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);20342035for (; j >= 0; --j) {2036BasicBlock *in = func->bbArray[j];2037Instruction *exit = in->getExit();20382039if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {2040in->binSize -= 16;2041func->binSize -= 16;20422043for (++j; j < func->bbCount; ++j)2044func->bbArray[j]->binPos -= 16;20452046in->remove(exit);2047}2048bb->binPos = in->binPos + in->binSize;2049if (in->binSize) // no more no-op branches to bb2050break;2051}2052func->bbArray[func->bbCount++] = bb;20532054if (!bb->getExit())2055return;20562057for (i = bb->getEntry(); i; i = i->next) {2058i->encSize = getMinEncodingSize(i);2059bb->binSize += i->encSize;2060}20612062assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));20632064func->binSize += bb->binSize;2065}20662067void2068CodeEmitterGV100::prepareEmission(Function *func)2069{2070SchedDataCalculatorGM107 sched(targ);2071CodeEmitter::prepareEmission(func);2072sched.run(func, true, true);2073}20742075void2076CodeEmitterGV100::prepareEmission(Program *prog)2077{2078for (ArrayList::Iterator fi = prog->allFuncs.iterator();2079!fi.end(); fi.next()) {2080Function *func = reinterpret_cast<Function *>(fi.get());2081func->binPos = prog->binSize;2082prepareEmission(func);2083prog->binSize += func->binSize;2084}20852086this->prog = prog;2087}20882089CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)2090: CodeEmitter(target), prog(NULL), targ(target), insn(NULL)2091{2092code = NULL;2093codeSize = codeSizeLimit = 0;2094relocInfo = NULL;2095}2096};209720982099