Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "codegen/nv50_ir.h"23#include "codegen/nv50_ir_target.h"2425namespace nv50_ir {2627const uint8_t Target::operationSrcNr[] =28{290, 0, // NOP, PHI300, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT311, 1, 2, // MOV, LOAD, STORE322, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD333, 3, // SHLADD, XMAD341, 1, 1, // ABS, NEG, NOT352, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF362, 2, 1, // MAX, MIN, SAT371, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT383, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT391, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2401, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW410, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,420, 0, 0, // PRERET,CONT,BREAK430, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR441, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP451, 1, 1, // EMIT, RESTART, FINAL461, 1, 1, // TEX, TXB, TXL,471, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP481, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA493, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP500, // TEXBAR511, 1, // DFDX, DFDY521, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP532, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT542, // SGXT553, 2, // ATOM, BAR562, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,572, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL583, // SHFL591, // VOTE601, // BUFQ611, // WARPSYNC62063};6465const OpClass Target::operationClass[] =66{67// NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT68OPCLASS_OTHER,69OPCLASS_PSEUDO,70OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,71// MOV; LOAD; STORE72OPCLASS_MOVE,73OPCLASS_LOAD,74OPCLASS_STORE,75// ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD76OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,77OPCLASS_ARITH, OPCLASS_ARITH,78OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,79// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF80OPCLASS_CONVERT, OPCLASS_CONVERT,81OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,82OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,83// MAX, MIN84OPCLASS_COMPARE, OPCLASS_COMPARE,85// SAT, CEIL, FLOOR, TRUNC; CVT86OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,87OPCLASS_CONVERT,88// SET(AND,OR,XOR); SELP, SLCT89OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,90OPCLASS_COMPARE, OPCLASS_COMPARE,91// RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW92OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,93OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,94OPCLASS_SFU, OPCLASS_SFU,95// BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN96OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,97OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,98OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,99// DISCARD, EXIT100OPCLASS_FLOW, OPCLASS_FLOW,101// MEMBAR102OPCLASS_CONTROL,103// VFETCH, PFETCH, AFETCH, EXPORT104OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,105// LINTERP, PINTERP106OPCLASS_SFU, OPCLASS_SFU,107// EMIT, RESTART, FINAL108OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,109// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP110OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,111OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,112OPCLASS_TEXTURE, OPCLASS_TEXTURE,113// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA114OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,115OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,116// SUBFM, SUCLAMP, SUEAU, SUQ, MADSP117OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,118// TEXBAR119OPCLASS_OTHER,120// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP121OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,122OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,123// POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT124OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,125OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,126// ATOM, BAR127OPCLASS_ATOMIC, OPCLASS_CONTROL,128// VADD, VAVG, VMIN, VMAX129OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,130// VSAD, VSET, VSHR, VSHL131OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,132// VSEL, CCTL133OPCLASS_VECTOR, OPCLASS_CONTROL,134// SHFL135OPCLASS_OTHER,136// VOTE137OPCLASS_OTHER,138// BUFQ139OPCLASS_OTHER,140// WARPSYNC141OPCLASS_OTHER,142OPCLASS_PSEUDO // LAST143};144145146extern Target *getTargetGV100(unsigned int chipset);147extern Target *getTargetGM107(unsigned int chipset);148extern Target *getTargetNVC0(unsigned int chipset);149extern Target *getTargetNV50(unsigned int chipset);150151Target *Target::create(unsigned int chipset)152{153STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);154STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);155switch (chipset & ~0xf) {156case 0x160:157case 0x140:158return getTargetGV100(chipset);159case 0x110:160case 0x120:161case 0x130:162return getTargetGM107(chipset);163case 0xc0:164case 0xd0:165case 0xe0:166case 0xf0:167case 0x100:168return getTargetNVC0(chipset);169case 0x50:170case 0x80:171case 0x90:172case 0xa0:173return getTargetNV50(chipset);174default:175ERROR("unsupported target: NV%x\n", chipset);176return 0;177}178}179180void Target::destroy(Target *targ)181{182delete targ;183}184185CodeEmitter::CodeEmitter(const Target *target) : targ(target), code(NULL),186codeSize(0), codeSizeLimit(0), relocInfo(NULL), fixupInfo(NULL)187{188}189190void191CodeEmitter::setCodeLocation(void *ptr, uint32_t size)192{193code = reinterpret_cast<uint32_t *>(ptr);194codeSize = 0;195codeSizeLimit = size;196}197198void199CodeEmitter::printBinary() const200{201uint32_t *bin = code - codeSize / 4;202INFO("program binary (%u bytes)", codeSize);203for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {204if ((pos % 8) == 0)205INFO("\n");206INFO("%08x ", bin[pos]);207}208INFO("\n");209}210211static inline uint32_t sizeToBundlesNVE4(uint32_t size)212{213return (size + 55) / 56;214}215216void217CodeEmitter::prepareEmission(Program *prog)218{219for (ArrayList::Iterator fi = prog->allFuncs.iterator();220!fi.end(); fi.next()) {221Function *func = reinterpret_cast<Function *>(fi.get());222func->binPos = prog->binSize;223prepareEmission(func);224225// adjust sizes & positions for scheduling info:226if (prog->getTarget()->hasSWSched) {227uint32_t adjPos = func->binPos;228BasicBlock *bb = NULL;229for (int i = 0; i < func->bbCount; ++i) {230bb = func->bbArray[i];231int32_t adjSize = bb->binSize;232if (adjPos % 64) {233adjSize -= 64 - adjPos % 64;234if (adjSize < 0)235adjSize = 0;236}237adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;238bb->binPos = adjPos;239bb->binSize = adjSize;240adjPos += adjSize;241}242if (bb)243func->binSize = adjPos - func->binPos;244}245246prog->binSize += func->binSize;247}248}249250void251CodeEmitter::prepareEmission(Function *func)252{253func->bbCount = 0;254func->bbArray = new BasicBlock * [func->cfg.getSize()];255256BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;257258for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())259prepareEmission(BasicBlock::get(*it));260}261262void263CodeEmitter::prepareEmission(BasicBlock *bb)264{265Instruction *i, *next;266Function *func = bb->getFunction();267int j;268unsigned int nShort;269270for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);271272for (; j >= 0; --j) {273BasicBlock *in = func->bbArray[j];274Instruction *exit = in->getExit();275276if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {277in->binSize -= 8;278func->binSize -= 8;279280for (++j; j < func->bbCount; ++j)281func->bbArray[j]->binPos -= 8;282283in->remove(exit);284}285bb->binPos = in->binPos + in->binSize;286if (in->binSize) // no more no-op branches to bb287break;288}289func->bbArray[func->bbCount++] = bb;290291if (!bb->getExit())292return;293294// determine encoding size, try to group short instructions295nShort = 0;296for (i = bb->getEntry(); i; i = next) {297next = i->next;298299i->encSize = getMinEncodingSize(i);300if (next && i->encSize < 8)301++nShort;302else303if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {304if (i->isCommutationLegal(i->next)) {305bb->permuteAdjacent(i, next);306next->encSize = 4;307next = i;308i = i->prev;309++nShort;310} else311if (i->isCommutationLegal(i->prev) && next->next) {312bb->permuteAdjacent(i->prev, i);313next->encSize = 4;314next = next->next;315bb->binSize += 4;316++nShort;317} else {318i->encSize = 8;319i->prev->encSize = 8;320bb->binSize += 4;321nShort = 0;322}323} else {324i->encSize = 8;325if (nShort & 1) {326i->prev->encSize = 8;327bb->binSize += 4;328}329nShort = 0;330}331bb->binSize += i->encSize;332}333334if (bb->getExit()->encSize == 4) {335assert(nShort);336bb->getExit()->encSize = 8;337bb->binSize += 4;338339if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {340bb->binSize += 8;341bb->getExit()->prev->encSize = 8;342}343}344assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));345346func->binSize += bb->binSize;347}348349bool350Program::emitBinary(struct nv50_ir_prog_info_out *info)351{352CodeEmitter *emit = target->getCodeEmitter(progType);353354emit->prepareEmission(this);355356if (dbgFlags & NV50_IR_DEBUG_BASIC)357this->print();358359if (!binSize) {360code = NULL;361return false;362}363code = reinterpret_cast<uint32_t *>(MALLOC(binSize));364if (!code)365return false;366emit->setCodeLocation(code, binSize);367info->bin.instructions = 0;368369for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {370Function *fn = reinterpret_cast<Function *>(fi.get());371372assert(emit->getCodeSize() == fn->binPos);373374for (int b = 0; b < fn->bbCount; ++b) {375for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {376emit->emitInstruction(i);377info->bin.instructions++;378if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&379(isFloatType(i->sType) || isFloatType(i->dType)))380info->io.fp64 = true;381}382}383}384info->io.fp64 |= fp64;385info->bin.relocData = emit->getRelocInfo();386info->bin.fixupData = emit->getFixupInfo();387388// the nvc0 driver will print the binary itself together with the header389if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)390emit->printBinary();391392delete emit;393return true;394}395396#define RELOC_ALLOC_INCREMENT 8397398bool399CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,400int s)401{402unsigned int n = relocInfo ? relocInfo->count : 0;403404if (!(n % RELOC_ALLOC_INCREMENT)) {405size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);406relocInfo = reinterpret_cast<RelocInfo *>(407REALLOC(relocInfo, n ? size : 0,408size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));409if (!relocInfo)410return false;411if (n == 0)412memset(relocInfo, 0, sizeof(RelocInfo));413}414++relocInfo->count;415416relocInfo->entry[n].data = data;417relocInfo->entry[n].mask = m;418relocInfo->entry[n].offset = codeSize + w * 4;419relocInfo->entry[n].bitPos = s;420relocInfo->entry[n].type = ty;421422return true;423}424425bool426CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)427{428unsigned int n = fixupInfo ? fixupInfo->count : 0;429430if (!(n % RELOC_ALLOC_INCREMENT)) {431size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);432fixupInfo = reinterpret_cast<FixupInfo *>(433REALLOC(fixupInfo, n ? size : 0,434size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));435if (!fixupInfo)436return false;437if (n == 0)438fixupInfo->count = 0;439}440++fixupInfo->count;441442fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);443444return true;445}446447void448RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const449{450uint32_t value = 0;451452switch (type) {453case TYPE_CODE: value = info->codePos; break;454case TYPE_BUILTIN: value = info->libPos; break;455case TYPE_DATA: value = info->dataPos; break;456default:457assert(0);458break;459}460value += data;461value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);462463binary[offset / 4] &= ~mask;464binary[offset / 4] |= value & mask;465}466467} // namespace nv50_ir468469470#include "codegen/nv50_ir_driver.h"471472extern "C" {473474void475nv50_ir_relocate_code(void *relocData, uint32_t *code,476uint32_t codePos,477uint32_t libPos,478uint32_t dataPos)479{480nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);481482info->codePos = codePos;483info->libPos = libPos;484info->dataPos = dataPos;485486for (unsigned int i = 0; i < info->count; ++i)487info->entry[i].apply(code, info);488}489490void491nv50_ir_apply_fixups(void *fixupData, uint32_t *code,492bool force_persample_interp, bool flatshade,493uint8_t alphatest, bool msaa)494{495nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(496fixupData);497498// force_persample_interp: all non-flat -> per-sample499// flatshade: all color -> flat500// alphatest: PIPE_FUNC_* to use with alphatest501// msaa: false = sample id -> 0 for interpolateAtSample502nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest, msaa);503for (unsigned i = 0; i < info->count; ++i)504info->entry[i].apply(&info->entry[i], code, data);505}506507void508nv50_ir_get_target_library(uint32_t chipset,509const uint32_t **code, uint32_t *size)510{511nv50_ir::Target *targ = nv50_ir::Target::create(chipset);512targ->getBuiltinCode(code, size);513nv50_ir::Target::destroy(targ);514}515516}517518519