Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "codegen/nv50_ir.h"23#include "codegen/nv50_ir_target.h"24#include "codegen/nv50_ir_driver.h"2526extern "C" {27#include "nouveau_debug.h"28}2930namespace nv50_ir {3132Modifier::Modifier(operation op)33{34switch (op) {35case OP_NEG: bits = NV50_IR_MOD_NEG; break;36case OP_ABS: bits = NV50_IR_MOD_ABS; break;37case OP_SAT: bits = NV50_IR_MOD_SAT; break;38case OP_NOT: bits = NV50_IR_MOD_NOT; break;39default:40bits = 0;41break;42}43}4445Modifier Modifier::operator*(const Modifier m) const46{47unsigned int a, b, c;4849b = m.bits;50if (this->bits & NV50_IR_MOD_ABS)51b &= ~NV50_IR_MOD_NEG;5253a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);54c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);5556return Modifier(a | c);57}5859ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)60{61indirect[0] = -1;62indirect[1] = -1;63usedAsPtr = false;64set(v);65}6667ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)68{69set(ref);70usedAsPtr = ref.usedAsPtr;71}7273ValueRef::~ValueRef()74{75this->set(NULL);76}7778bool ValueRef::getImmediate(ImmediateValue &imm) const79{80const ValueRef *src = this;81Modifier m;82DataType type = src->insn->sType;8384while (src) {85if (src->mod) {86if (src->insn->sType != type)87break;88m *= src->mod;89}90if (src->getFile() == FILE_IMMEDIATE) {91imm = *(src->value->asImm());92// The immediate's type isn't required to match its use, it's93// more of a hint; applying a modifier makes use of that hint.94imm.reg.type = type;95m.applyTo(imm);96return true;97}9899Instruction *insn = src->value->getUniqueInsn();100101if (insn && insn->op == OP_MOV) {102src = &insn->src(0);103if (src->mod)104WARN("OP_MOV with modifier encountered !\n");105} else {106src = NULL;107}108}109return false;110}111112ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)113{114set(v);115}116117ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)118{119set(def.get());120}121122ValueDef::~ValueDef()123{124this->set(NULL);125}126127void128ValueRef::set(const ValueRef &ref)129{130this->set(ref.get());131mod = ref.mod;132indirect[0] = ref.indirect[0];133indirect[1] = ref.indirect[1];134}135136void137ValueRef::set(Value *refVal)138{139if (value == refVal)140return;141if (value)142value->uses.erase(this);143if (refVal)144refVal->uses.insert(this);145146value = refVal;147}148149void150ValueDef::set(Value *defVal)151{152if (value == defVal)153return;154if (value)155value->defs.remove(this);156if (defVal)157defVal->defs.push_back(this);158159value = defVal;160}161162// Check if we can replace this definition's value by the value in @rep,163// including the source modifiers, i.e. make sure that all uses support164// @rep.mod.165bool166ValueDef::mayReplace(const ValueRef &rep)167{168if (!rep.mod)169return true;170171if (!insn || !insn->bb) // Unbound instruction ?172return false;173174const Target *target = insn->bb->getProgram()->getTarget();175176for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();177++it) {178Instruction *insn = (*it)->getInsn();179int s = -1;180181for (int i = 0; insn->srcExists(i); ++i) {182if (insn->src(i).get() == value) {183// If there are multiple references to us we'd have to check if the184// combination of mods is still supported, but just bail for now.185if (&insn->src(i) != (*it))186return false;187s = i;188}189}190assert(s >= 0); // integrity of uses list191192if (!target->isModSupported(insn, s, rep.mod))193return false;194}195return true;196}197198void199ValueDef::replace(const ValueRef &repVal, bool doSet)200{201assert(mayReplace(repVal));202203if (value == repVal.get())204return;205206while (!value->uses.empty()) {207ValueRef *ref = *value->uses.begin();208ref->set(repVal.get());209ref->mod *= repVal.mod;210}211212if (doSet)213set(repVal.get());214}215216Value::Value()217{218join = this;219memset(®, 0, sizeof(reg));220reg.size = 4;221}222223LValue::LValue(Function *fn, DataFile file)224{225reg.file = file;226reg.size = (file != FILE_PREDICATE) ? 4 : 1;227reg.data.id = -1;228229compMask = 0;230compound = 0;231ssa = 0;232fixedReg = 0;233noSpill = 0;234235fn->add(this, this->id);236}237238LValue::LValue(Function *fn, LValue *lval)239{240assert(lval);241242reg.file = lval->reg.file;243reg.size = lval->reg.size;244reg.data.id = -1;245246compMask = 0;247compound = 0;248ssa = 0;249fixedReg = 0;250noSpill = 0;251252fn->add(this, this->id);253}254255LValue *256LValue::clone(ClonePolicy<Function>& pol) const257{258LValue *that = new_LValue(pol.context(), reg.file);259260pol.set<Value>(this, that);261262that->reg.size = this->reg.size;263that->reg.type = this->reg.type;264that->reg.data = this->reg.data;265266return that;267}268269bool270LValue::isUniform() const271{272if (defs.size() > 1)273return false;274Instruction *insn = getInsn();275if (!insn)276return false;277// let's not try too hard here for now ...278return !insn->srcExists(1) && insn->getSrc(0)->isUniform();279}280281Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)282{283baseSym = NULL;284285reg.file = f;286reg.fileIndex = fidx;287reg.data.offset = 0;288289prog->add(this, this->id);290}291292Symbol *293Symbol::clone(ClonePolicy<Function>& pol) const294{295Program *prog = pol.context()->getProgram();296297Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);298299pol.set<Value>(this, that);300301that->reg.size = this->reg.size;302that->reg.type = this->reg.type;303that->reg.data = this->reg.data;304305that->baseSym = this->baseSym;306307return that;308}309310bool311Symbol::isUniform() const312{313return314reg.file != FILE_SYSTEM_VALUE &&315reg.file != FILE_MEMORY_LOCAL &&316reg.file != FILE_SHADER_INPUT;317}318319ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)320{321memset(®, 0, sizeof(reg));322323reg.file = FILE_IMMEDIATE;324reg.size = 4;325reg.type = TYPE_U32;326327reg.data.u32 = uval;328329prog->add(this, this->id);330}331332ImmediateValue::ImmediateValue(Program *prog, float fval)333{334memset(®, 0, sizeof(reg));335336reg.file = FILE_IMMEDIATE;337reg.size = 4;338reg.type = TYPE_F32;339340reg.data.f32 = fval;341342prog->add(this, this->id);343}344345ImmediateValue::ImmediateValue(Program *prog, double dval)346{347memset(®, 0, sizeof(reg));348349reg.file = FILE_IMMEDIATE;350reg.size = 8;351reg.type = TYPE_F64;352353reg.data.f64 = dval;354355prog->add(this, this->id);356}357358ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)359{360reg = proto->reg;361362reg.type = ty;363reg.size = typeSizeof(ty);364}365366ImmediateValue *367ImmediateValue::clone(ClonePolicy<Function>& pol) const368{369Program *prog = pol.context()->getProgram();370ImmediateValue *that = new_ImmediateValue(prog, 0u);371372pol.set<Value>(this, that);373374that->reg.size = this->reg.size;375that->reg.type = this->reg.type;376that->reg.data = this->reg.data;377378return that;379}380381bool382ImmediateValue::isInteger(const int i) const383{384switch (reg.type) {385case TYPE_S8:386return reg.data.s8 == i;387case TYPE_U8:388return reg.data.u8 == i;389case TYPE_S16:390return reg.data.s16 == i;391case TYPE_U16:392return reg.data.u16 == i;393case TYPE_S32:394case TYPE_U32:395return reg.data.s32 == i; // as if ...396case TYPE_S64:397case TYPE_U64:398return reg.data.s64 == i; // as if ...399case TYPE_F32:400return reg.data.f32 == static_cast<float>(i);401case TYPE_F64:402return reg.data.f64 == static_cast<double>(i);403default:404return false;405}406}407408bool409ImmediateValue::isNegative() const410{411switch (reg.type) {412case TYPE_S8: return reg.data.s8 < 0;413case TYPE_S16: return reg.data.s16 < 0;414case TYPE_S32:415case TYPE_U32: return reg.data.s32 < 0;416case TYPE_F32: return reg.data.u32 & (1 << 31);417case TYPE_F64: return reg.data.u64 & (1ULL << 63);418default:419return false;420}421}422423bool424ImmediateValue::isPow2() const425{426if (reg.type == TYPE_U64 || reg.type == TYPE_S64)427return util_is_power_of_two_or_zero64(reg.data.u64);428else429return util_is_power_of_two_or_zero(reg.data.u32);430}431432void433ImmediateValue::applyLog2()434{435switch (reg.type) {436case TYPE_S8:437case TYPE_S16:438case TYPE_S32:439assert(!this->isNegative());440FALLTHROUGH;441case TYPE_U8:442case TYPE_U16:443case TYPE_U32:444reg.data.u32 = util_logbase2(reg.data.u32);445break;446case TYPE_S64:447assert(!this->isNegative());448FALLTHROUGH;449case TYPE_U64:450reg.data.u64 = util_logbase2_64(reg.data.u64);451break;452case TYPE_F32:453reg.data.f32 = log2f(reg.data.f32);454break;455case TYPE_F64:456reg.data.f64 = log2(reg.data.f64);457break;458default:459assert(0);460break;461}462}463464bool465ImmediateValue::compare(CondCode cc, float fval) const466{467if (reg.type != TYPE_F32)468ERROR("immediate value is not of type f32");469470switch (static_cast<CondCode>(cc & 7)) {471case CC_TR: return true;472case CC_FL: return false;473case CC_LT: return reg.data.f32 < fval;474case CC_LE: return reg.data.f32 <= fval;475case CC_GT: return reg.data.f32 > fval;476case CC_GE: return reg.data.f32 >= fval;477case CC_EQ: return reg.data.f32 == fval;478case CC_NE: return reg.data.f32 != fval;479default:480assert(0);481return false;482}483}484485ImmediateValue&486ImmediateValue::operator=(const ImmediateValue &that)487{488this->reg = that.reg;489return (*this);490}491492bool493Value::interfers(const Value *that) const494{495uint32_t idA, idB;496497if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)498return false;499if (this->asImm())500return false;501502if (this->asSym()) {503idA = this->join->reg.data.offset;504idB = that->join->reg.data.offset;505} else {506idA = this->join->reg.data.id * MIN2(this->reg.size, 4);507idB = that->join->reg.data.id * MIN2(that->reg.size, 4);508}509510if (idA < idB)511return (idA + this->reg.size > idB);512else513if (idA > idB)514return (idB + that->reg.size > idA);515else516return (idA == idB);517}518519bool520Value::equals(const Value *that, bool strict) const521{522if (strict)523return this == that;524525if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)526return false;527if (that->reg.size != this->reg.size)528return false;529530if (that->reg.data.id != this->reg.data.id)531return false;532533return true;534}535536bool537ImmediateValue::equals(const Value *that, bool strict) const538{539const ImmediateValue *imm = that->asImm();540if (!imm)541return false;542return reg.data.u64 == imm->reg.data.u64;543}544545bool546Symbol::equals(const Value *that, bool strict) const547{548if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)549return false;550assert(that->asSym());551552if (this->baseSym != that->asSym()->baseSym)553return false;554555if (reg.file == FILE_SYSTEM_VALUE)556return (this->reg.data.sv.sv == that->reg.data.sv.sv &&557this->reg.data.sv.index == that->reg.data.sv.index);558return this->reg.data.offset == that->reg.data.offset;559}560561void Instruction::init()562{563next = prev = 0;564serial = 0;565566cc = CC_ALWAYS;567rnd = ROUND_N;568cache = CACHE_CA;569subOp = 0;570571saturate = 0;572join = 0;573exit = 0;574terminator = 0;575ftz = 0;576dnz = 0;577perPatch = 0;578fixed = 0;579encSize = 0;580ipa = 0;581mask = 0;582precise = 0;583584lanes = 0xf;585586postFactor = 0;587588predSrc = -1;589flagsDef = -1;590flagsSrc = -1;591592sched = 0;593bb = NULL;594}595596Instruction::Instruction()597{598init();599600op = OP_NOP;601dType = sType = TYPE_F32;602603id = -1;604}605606Instruction::Instruction(Function *fn, operation opr, DataType ty)607{608init();609610op = opr;611dType = sType = ty;612613fn->add(this, id);614}615616Instruction::~Instruction()617{618if (bb) {619Function *fn = bb->getFunction();620bb->remove(this);621fn->allInsns.remove(id);622}623624for (int s = 0; srcExists(s); ++s)625setSrc(s, NULL);626// must unlink defs too since the list pointers will get deallocated627for (int d = 0; defExists(d); ++d)628setDef(d, NULL);629}630631void632Instruction::setDef(int i, Value *val)633{634int size = defs.size();635if (i >= size) {636defs.resize(i + 1);637while (size <= i)638defs[size++].setInsn(this);639}640defs[i].set(val);641}642643void644Instruction::setSrc(int s, Value *val)645{646int size = srcs.size();647if (s >= size) {648srcs.resize(s + 1);649while (size <= s)650srcs[size++].setInsn(this);651}652srcs[s].set(val);653}654655void656Instruction::setSrc(int s, const ValueRef& ref)657{658setSrc(s, ref.get());659srcs[s].mod = ref.mod;660}661662void663Instruction::swapSources(int a, int b)664{665Value *value = srcs[a].get();666Modifier m = srcs[a].mod;667668setSrc(a, srcs[b]);669670srcs[b].set(value);671srcs[b].mod = m;672}673674static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)675{676if (index >= s)677index += delta;678else679if ((delta < 0) && (index >= (s + delta)))680index = -1;681}682683// Moves sources [@s,last_source] by @delta.684// If @delta < 0, sources [@s - abs(@delta), @s) are erased.685void686Instruction::moveSources(const int s, const int delta)687{688if (delta == 0)689return;690assert(s + delta >= 0);691692int k;693694for (k = 0; srcExists(k); ++k) {695for (int i = 0; i < 2; ++i)696moveSourcesAdjustIndex(src(k).indirect[i], s, delta);697}698moveSourcesAdjustIndex(predSrc, s, delta);699moveSourcesAdjustIndex(flagsSrc, s, delta);700if (asTex()) {701TexInstruction *tex = asTex();702moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);703moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);704}705706if (delta > 0) {707--k;708for (int p = k + delta; k >= s; --k, --p)709setSrc(p, src(k));710} else {711int p;712for (p = s; p < k; ++p)713setSrc(p + delta, src(p));714for (; (p + delta) < k; ++p)715setSrc(p + delta, NULL);716}717}718719void720Instruction::takeExtraSources(int s, Value *values[3])721{722values[0] = getIndirect(s, 0);723if (values[0])724setIndirect(s, 0, NULL);725726values[1] = getIndirect(s, 1);727if (values[1])728setIndirect(s, 1, NULL);729730values[2] = getPredicate();731if (values[2])732setPredicate(cc, NULL);733}734735void736Instruction::putExtraSources(int s, Value *values[3])737{738if (values[0])739setIndirect(s, 0, values[0]);740if (values[1])741setIndirect(s, 1, values[1]);742if (values[2])743setPredicate(cc, values[2]);744}745746Instruction *747Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const748{749if (!i)750i = new_Instruction(pol.context(), op, dType);751#if !defined(NDEBUG) && defined(__cpp_rtti)752assert(typeid(*i) == typeid(*this));753#endif754755pol.set<Instruction>(this, i);756757i->sType = sType;758759i->rnd = rnd;760i->cache = cache;761i->subOp = subOp;762763i->saturate = saturate;764i->join = join;765i->exit = exit;766i->mask = mask;767i->ftz = ftz;768i->dnz = dnz;769i->ipa = ipa;770i->lanes = lanes;771i->perPatch = perPatch;772773i->postFactor = postFactor;774775for (int d = 0; defExists(d); ++d)776i->setDef(d, pol.get(getDef(d)));777778for (int s = 0; srcExists(s); ++s) {779i->setSrc(s, pol.get(getSrc(s)));780i->src(s).mod = src(s).mod;781}782783i->cc = cc;784i->predSrc = predSrc;785i->flagsDef = flagsDef;786i->flagsSrc = flagsSrc;787788return i;789}790791unsigned int792Instruction::defCount(unsigned int mask, bool singleFile) const793{794unsigned int i, n;795796if (singleFile) {797unsigned int d = ffs(mask);798if (!d)799return 0;800for (i = d--; defExists(i); ++i)801if (getDef(i)->reg.file != getDef(d)->reg.file)802mask &= ~(1 << i);803}804805for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)806n += mask & 1;807return n;808}809810unsigned int811Instruction::srcCount(unsigned int mask, bool singleFile) const812{813unsigned int i, n;814815if (singleFile) {816unsigned int s = ffs(mask);817if (!s)818return 0;819for (i = s--; srcExists(i); ++i)820if (getSrc(i)->reg.file != getSrc(s)->reg.file)821mask &= ~(1 << i);822}823824for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)825n += mask & 1;826return n;827}828829bool830Instruction::setIndirect(int s, int dim, Value *value)831{832assert(this->srcExists(s));833834int p = srcs[s].indirect[dim];835if (p < 0) {836if (!value)837return true;838p = srcs.size();839while (p > 0 && !srcExists(p - 1))840--p;841}842setSrc(p, value);843srcs[p].usedAsPtr = (value != 0);844srcs[s].indirect[dim] = value ? p : -1;845return true;846}847848bool849Instruction::setPredicate(CondCode ccode, Value *value)850{851cc = ccode;852853if (!value) {854if (predSrc >= 0) {855srcs[predSrc].set(NULL);856predSrc = -1;857}858return true;859}860861if (predSrc < 0) {862predSrc = srcs.size();863while (predSrc > 0 && !srcExists(predSrc - 1))864--predSrc;865}866867setSrc(predSrc, value);868return true;869}870871bool872Instruction::writesPredicate() const873{874for (int d = 0; defExists(d); ++d)875if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))876return true;877return false;878}879880bool881Instruction::canCommuteDefSrc(const Instruction *i) const882{883for (int d = 0; defExists(d); ++d)884for (int s = 0; i->srcExists(s); ++s)885if (getDef(d)->interfers(i->getSrc(s)))886return false;887return true;888}889890bool891Instruction::canCommuteDefDef(const Instruction *i) const892{893for (int d = 0; defExists(d); ++d)894for (int c = 0; i->defExists(c); ++c)895if (getDef(d)->interfers(i->getDef(c)))896return false;897return true;898}899900bool901Instruction::isCommutationLegal(const Instruction *i) const902{903return canCommuteDefDef(i) &&904canCommuteDefSrc(i) &&905i->canCommuteDefSrc(this);906}907908TexInstruction::TexInstruction(Function *fn, operation op)909: Instruction(fn, op, TYPE_F32), tex()910{911tex.rIndirectSrc = -1;912tex.sIndirectSrc = -1;913914if (op == OP_TXF)915sType = TYPE_U32;916}917918TexInstruction::~TexInstruction()919{920for (int c = 0; c < 3; ++c) {921dPdx[c].set(NULL);922dPdy[c].set(NULL);923}924for (int n = 0; n < 4; ++n)925for (int c = 0; c < 3; ++c)926offset[n][c].set(NULL);927}928929TexInstruction *930TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const931{932TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :933new_TexInstruction(pol.context(), op));934935Instruction::clone(pol, tex);936937tex->tex = this->tex;938939if (op == OP_TXD) {940for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {941tex->dPdx[c].set(dPdx[c]);942tex->dPdy[c].set(dPdy[c]);943}944}945946for (int n = 0; n < tex->tex.useOffsets; ++n)947for (int c = 0; c < 3; ++c)948tex->offset[n][c].set(offset[n][c]);949950return tex;951}952953const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =954{955{ "1D", 1, 1, false, false, false },956{ "2D", 2, 2, false, false, false },957{ "2D_MS", 2, 3, false, false, false },958{ "3D", 3, 3, false, false, false },959{ "CUBE", 2, 3, false, true, false },960{ "1D_SHADOW", 1, 1, false, false, true },961{ "2D_SHADOW", 2, 2, false, false, true },962{ "CUBE_SHADOW", 2, 3, false, true, true },963{ "1D_ARRAY", 1, 2, true, false, false },964{ "2D_ARRAY", 2, 3, true, false, false },965{ "2D_MS_ARRAY", 2, 4, true, false, false },966{ "CUBE_ARRAY", 2, 4, true, true, false },967{ "1D_ARRAY_SHADOW", 1, 2, true, false, true },968{ "2D_ARRAY_SHADOW", 2, 3, true, false, true },969{ "RECT", 2, 2, false, false, false },970{ "RECT_SHADOW", 2, 2, false, false, true },971{ "CUBE_ARRAY_SHADOW", 2, 4, true, true, true },972{ "BUFFER", 1, 1, false, false, false },973};974975const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =976{977{ "NONE", 0, { 0, 0, 0, 0 }, UINT },978979{ "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT },980{ "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT },981{ "RG32F", 2, { 32, 32, 0, 0 }, FLOAT },982{ "RG16F", 2, { 16, 16, 0, 0 }, FLOAT },983{ "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT },984{ "R32F", 1, { 32, 0, 0, 0 }, FLOAT },985{ "R16F", 1, { 16, 0, 0, 0 }, FLOAT },986987{ "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT },988{ "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT },989{ "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT },990{ "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT },991{ "RG32UI", 2, { 32, 32, 0, 0 }, UINT },992{ "RG16UI", 2, { 16, 16, 0, 0 }, UINT },993{ "RG8UI", 2, { 8, 8, 0, 0 }, UINT },994{ "R32UI", 1, { 32, 0, 0, 0 }, UINT },995{ "R16UI", 1, { 16, 0, 0, 0 }, UINT },996{ "R8UI", 1, { 8, 0, 0, 0 }, UINT },997998{ "RGBA32I", 4, { 32, 32, 32, 32 }, SINT },999{ "RGBA16I", 4, { 16, 16, 16, 16 }, SINT },1000{ "RGBA8I", 4, { 8, 8, 8, 8 }, SINT },1001{ "RG32I", 2, { 32, 32, 0, 0 }, SINT },1002{ "RG16I", 2, { 16, 16, 0, 0 }, SINT },1003{ "RG8I", 2, { 8, 8, 0, 0 }, SINT },1004{ "R32I", 1, { 32, 0, 0, 0 }, SINT },1005{ "R16I", 1, { 16, 0, 0, 0 }, SINT },1006{ "R8I", 1, { 8, 0, 0, 0 }, SINT },10071008{ "RGBA16", 4, { 16, 16, 16, 16 }, UNORM },1009{ "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM },1010{ "RGBA8", 4, { 8, 8, 8, 8 }, UNORM },1011{ "RG16", 2, { 16, 16, 0, 0 }, UNORM },1012{ "RG8", 2, { 8, 8, 0, 0 }, UNORM },1013{ "R16", 1, { 16, 0, 0, 0 }, UNORM },1014{ "R8", 1, { 8, 0, 0, 0 }, UNORM },10151016{ "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },1017{ "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM },1018{ "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM },1019{ "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM },1020{ "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM },1021{ "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM },10221023{ "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true },1024};10251026const struct TexInstruction::ImgFormatDesc *1027TexInstruction::translateImgFormat(enum pipe_format format)1028{10291030#define FMT_CASE(a, b) \1031case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]10321033switch (format) {1034FMT_CASE(NONE, NONE);10351036FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);1037FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);1038FMT_CASE(R32G32_FLOAT, RG32F);1039FMT_CASE(R16G16_FLOAT, RG16F);1040FMT_CASE(R11G11B10_FLOAT, R11G11B10F);1041FMT_CASE(R32_FLOAT, R32F);1042FMT_CASE(R16_FLOAT, R16F);10431044FMT_CASE(R32G32B32A32_UINT, RGBA32UI);1045FMT_CASE(R16G16B16A16_UINT, RGBA16UI);1046FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);1047FMT_CASE(R8G8B8A8_UINT, RGBA8UI);1048FMT_CASE(R32G32_UINT, RG32UI);1049FMT_CASE(R16G16_UINT, RG16UI);1050FMT_CASE(R8G8_UINT, RG8UI);1051FMT_CASE(R32_UINT, R32UI);1052FMT_CASE(R16_UINT, R16UI);1053FMT_CASE(R8_UINT, R8UI);10541055FMT_CASE(R32G32B32A32_SINT, RGBA32I);1056FMT_CASE(R16G16B16A16_SINT, RGBA16I);1057FMT_CASE(R8G8B8A8_SINT, RGBA8I);1058FMT_CASE(R32G32_SINT, RG32I);1059FMT_CASE(R16G16_SINT, RG16I);1060FMT_CASE(R8G8_SINT, RG8I);1061FMT_CASE(R32_SINT, R32I);1062FMT_CASE(R16_SINT, R16I);1063FMT_CASE(R8_SINT, R8I);10641065FMT_CASE(R16G16B16A16_UNORM, RGBA16);1066FMT_CASE(R10G10B10A2_UNORM, RGB10A2);1067FMT_CASE(R8G8B8A8_UNORM, RGBA8);1068FMT_CASE(R16G16_UNORM, RG16);1069FMT_CASE(R8G8_UNORM, RG8);1070FMT_CASE(R16_UNORM, R16);1071FMT_CASE(R8_UNORM, R8);10721073FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);1074FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);1075FMT_CASE(R16G16_SNORM, RG16_SNORM);1076FMT_CASE(R8G8_SNORM, RG8_SNORM);1077FMT_CASE(R16_SNORM, R16_SNORM);1078FMT_CASE(R8_SNORM, R8_SNORM);10791080FMT_CASE(B8G8R8A8_UNORM, BGRA8);10811082default:1083assert(!"Unexpected format");1084return &formatTable[nv50_ir::FMT_NONE];1085}1086}10871088void1089TexInstruction::setIndirectR(Value *v)1090{1091int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;1092if (p >= 0) {1093tex.rIndirectSrc = p;1094setSrc(p, v);1095srcs[p].usedAsPtr = !!v;1096}1097}10981099void1100TexInstruction::setIndirectS(Value *v)1101{1102int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;1103if (p >= 0) {1104tex.sIndirectSrc = p;1105setSrc(p, v);1106srcs[p].usedAsPtr = !!v;1107}1108}11091110CmpInstruction::CmpInstruction(Function *fn, operation op)1111: Instruction(fn, op, TYPE_F32)1112{1113setCond = CC_ALWAYS;1114}11151116CmpInstruction *1117CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const1118{1119CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :1120new_CmpInstruction(pol.context(), op));1121cmp->dType = dType;1122Instruction::clone(pol, cmp);1123cmp->setCond = setCond;1124return cmp;1125}11261127FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)1128: Instruction(fn, op, TYPE_NONE)1129{1130if (op == OP_CALL)1131target.fn = reinterpret_cast<Function *>(targ);1132else1133target.bb = reinterpret_cast<BasicBlock *>(targ);11341135if (op == OP_BRA ||1136op == OP_CONT || op == OP_BREAK ||1137op == OP_RET || op == OP_EXIT)1138terminator = 1;1139else1140if (op == OP_JOIN)1141terminator = targ ? 1 : 0;11421143allWarp = absolute = limit = builtin = indirect = 0;1144}11451146FlowInstruction *1147FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const1148{1149FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :1150new_FlowInstruction(pol.context(), op, NULL));11511152Instruction::clone(pol, flow);1153flow->allWarp = allWarp;1154flow->absolute = absolute;1155flow->limit = limit;1156flow->builtin = builtin;11571158if (builtin)1159flow->target.builtin = target.builtin;1160else1161if (op == OP_CALL)1162flow->target.fn = target.fn;1163else1164if (target.bb)1165flow->target.bb = pol.get<BasicBlock>(target.bb);11661167return flow;1168}11691170Program::Program(Type type, Target *arch)1171: progType(type),1172target(arch),1173tlsSize(0),1174mem_Instruction(sizeof(Instruction), 6),1175mem_CmpInstruction(sizeof(CmpInstruction), 4),1176mem_TexInstruction(sizeof(TexInstruction), 4),1177mem_FlowInstruction(sizeof(FlowInstruction), 4),1178mem_LValue(sizeof(LValue), 8),1179mem_Symbol(sizeof(Symbol), 7),1180mem_ImmediateValue(sizeof(ImmediateValue), 7),1181driver(NULL),1182driver_out(NULL)1183{1184code = NULL;1185binSize = 0;11861187maxGPR = -1;1188fp64 = false;1189persampleInvocation = false;11901191main = new Function(this, "MAIN", ~0);1192calls.insert(&main->call);11931194dbgFlags = 0;1195optLevel = 0;11961197targetPriv = NULL;1198}11991200Program::~Program()1201{1202for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())1203delete reinterpret_cast<Function *>(it.get());12041205for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())1206releaseValue(reinterpret_cast<Value *>(it.get()));1207}12081209void Program::releaseInstruction(Instruction *insn)1210{1211// TODO: make this not suck so much12121213insn->~Instruction();12141215if (insn->asCmp())1216mem_CmpInstruction.release(insn);1217else1218if (insn->asTex())1219mem_TexInstruction.release(insn);1220else1221if (insn->asFlow())1222mem_FlowInstruction.release(insn);1223else1224mem_Instruction.release(insn);1225}12261227void Program::releaseValue(Value *value)1228{1229value->~Value();12301231if (value->asLValue())1232mem_LValue.release(value);1233else1234if (value->asImm())1235mem_ImmediateValue.release(value);1236else1237if (value->asSym())1238mem_Symbol.release(value);1239}124012411242} // namespace nv50_ir12431244extern "C" {12451246static void1247nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,1248struct nv50_ir_prog_info_out *info_out)1249{1250info_out->target = info->target;1251info_out->type = info->type;1252if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {1253info_out->prop.tp.domain = PIPE_PRIM_MAX;1254info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;1255}1256if (info->type == PIPE_SHADER_GEOMETRY) {1257info_out->prop.gp.instanceCount = 1;1258info_out->prop.gp.maxVertices = 1;1259}1260if (info->type == PIPE_SHADER_COMPUTE) {1261info->prop.cp.numThreads[0] =1262info->prop.cp.numThreads[1] =1263info->prop.cp.numThreads[2] = 1;1264}1265info_out->bin.smemSize = info->bin.smemSize;1266info_out->io.genUserClip = info->io.genUserClip;1267info_out->io.instanceId = 0xff;1268info_out->io.vertexId = 0xff;1269info_out->io.edgeFlagIn = 0xff;1270info_out->io.edgeFlagOut = 0xff;1271info_out->io.fragDepth = 0xff;1272info_out->io.sampleMask = 0xff;1273}12741275int1276nv50_ir_generate_code(struct nv50_ir_prog_info *info,1277struct nv50_ir_prog_info_out *info_out)1278{1279int ret = 0;12801281nv50_ir::Program::Type type;12821283nv50_ir_init_prog_info(info, info_out);12841285#define PROG_TYPE_CASE(a, b) \1286case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break12871288switch (info->type) {1289PROG_TYPE_CASE(VERTEX, VERTEX);1290PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);1291PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);1292PROG_TYPE_CASE(GEOMETRY, GEOMETRY);1293PROG_TYPE_CASE(FRAGMENT, FRAGMENT);1294PROG_TYPE_CASE(COMPUTE, COMPUTE);1295default:1296INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);1297return -1;1298}1299INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);13001301nv50_ir::Target *targ = nv50_ir::Target::create(info->target);1302if (!targ)1303return -1;13041305nv50_ir::Program *prog = new nv50_ir::Program(type, targ);1306if (!prog) {1307nv50_ir::Target::destroy(targ);1308return -1;1309}1310prog->driver = info;1311prog->driver_out = info_out;1312prog->dbgFlags = info->dbgFlags;1313prog->optLevel = info->optLevel;13141315switch (info->bin.sourceRep) {1316case PIPE_SHADER_IR_NIR:1317ret = prog->makeFromNIR(info, info_out) ? 0 : -2;1318break;1319case PIPE_SHADER_IR_TGSI:1320ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;1321break;1322default:1323ret = -1;1324break;1325}1326if (ret < 0)1327goto out;1328if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)1329prog->print();13301331targ->parseDriverInfo(info, info_out);1332prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);13331334prog->convertToSSA();13351336if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)1337prog->print();13381339prog->optimizeSSA(info->optLevel);1340prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);13411342if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)1343prog->print();13441345if (!prog->registerAllocation()) {1346ret = -4;1347goto out;1348}1349prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);13501351prog->optimizePostRA(info->optLevel);13521353if (!prog->emitBinary(info_out)) {1354ret = -5;1355goto out;1356}13571358out:1359INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);13601361info_out->bin.maxGPR = prog->maxGPR;1362info_out->bin.code = prog->code;1363info_out->bin.codeSize = prog->binSize;1364info_out->bin.tlsSpace = prog->tlsSize;13651366delete prog;1367nv50_ir::Target::destroy(targ);13681369return ret;1370}13711372} // extern "C"137313741375