Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
4574 views
/*1* Copyright 2017 Red Hat Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*21* Authors: Karol Herbst <[email protected]>22*/2324#include "compiler/nir/nir.h"2526#include "util/u_debug.h"2728#include "codegen/nv50_ir.h"29#include "codegen/nv50_ir_from_common.h"30#include "codegen/nv50_ir_lowering_helper.h"31#include "codegen/nv50_ir_util.h"32#include "tgsi/tgsi_from_mesa.h"3334#if __cplusplus >= 201103L35#include <unordered_map>36#else37#include <tr1/unordered_map>38#endif39#include <cstring>40#include <list>41#include <vector>4243namespace {4445#if __cplusplus >= 201103L46using std::hash;47using std::unordered_map;48#else49using std::tr1::hash;50using std::tr1::unordered_map;51#endif5253using namespace nv50_ir;5455int56type_size(const struct glsl_type *type, bool bindless)57{58return glsl_count_attribute_slots(type, false);59}6061static void62function_temp_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)63{64assert(glsl_type_is_vector_or_scalar(type));6566unsigned comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;67unsigned length = glsl_get_vector_elements(type);6869*size = comp_size * length;70*align = 0x10;71}7273class Converter : public ConverterCommon74{75public:76Converter(Program *, nir_shader *, nv50_ir_prog_info *, nv50_ir_prog_info_out *);7778bool run();79private:80typedef std::vector<LValue*> LValues;81typedef unordered_map<unsigned, LValues> NirDefMap;82typedef unordered_map<unsigned, nir_load_const_instr*> ImmediateMap;83typedef unordered_map<unsigned, BasicBlock*> NirBlockMap;8485CacheMode convert(enum gl_access_qualifier);86TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow);87LValues& convert(nir_alu_dest *);88BasicBlock* convert(nir_block *);89LValues& convert(nir_dest *);90SVSemantic convert(nir_intrinsic_op);91Value* convert(nir_load_const_instr*, uint8_t);92LValues& convert(nir_register *);93LValues& convert(nir_ssa_def *);9495Value* getSrc(nir_alu_src *, uint8_t component = 0);96Value* getSrc(nir_register *, uint8_t);97Value* getSrc(nir_src *, uint8_t, bool indirect = false);98Value* getSrc(nir_ssa_def *, uint8_t);99100// returned value is the constant part of the given source (either the101// nir_src or the selected source component of an intrinsic). Even though102// this is mostly an optimization to be able to skip indirects in a few103// cases, sometimes we require immediate values or set some fileds on104// instructions (e.g. tex) in order for codegen to consume those.105// If the found value has not a constant part, the Value gets returned106// through the Value parameter.107uint32_t getIndirect(nir_src *, uint8_t, Value *&);108// isScalar indicates that the addressing is scalar, vec4 addressing is109// assumed otherwise110uint32_t getIndirect(nir_intrinsic_instr *, uint8_t s, uint8_t c, Value *&,111bool isScalar = false);112113uint32_t getSlotAddress(nir_intrinsic_instr *, uint8_t idx, uint8_t slot);114115void setInterpolate(nv50_ir_varying *,116uint8_t,117bool centroid,118unsigned semantics);119120Instruction *loadFrom(DataFile, uint8_t, DataType, Value *def, uint32_t base,121uint8_t c, Value *indirect0 = NULL,122Value *indirect1 = NULL, bool patch = false);123void storeTo(nir_intrinsic_instr *, DataFile, operation, DataType,124Value *src, uint8_t idx, uint8_t c, Value *indirect0 = NULL,125Value *indirect1 = NULL);126127bool isFloatType(nir_alu_type);128bool isSignedType(nir_alu_type);129bool isResultFloat(nir_op);130bool isResultSigned(nir_op);131132DataType getDType(nir_alu_instr *);133DataType getDType(nir_intrinsic_instr *);134DataType getDType(nir_intrinsic_instr *, bool isSigned);135DataType getDType(nir_op, uint8_t);136137DataFile getFile(nir_intrinsic_op);138139std::vector<DataType> getSTypes(nir_alu_instr *);140DataType getSType(nir_src &, bool isFloat, bool isSigned);141142operation getOperation(nir_intrinsic_op);143operation getOperation(nir_op);144operation getOperation(nir_texop);145operation preOperationNeeded(nir_op);146147int getSubOp(nir_intrinsic_op);148int getSubOp(nir_op);149150CondCode getCondCode(nir_op);151152bool assignSlots();153bool parseNIR();154155bool visit(nir_alu_instr *);156bool visit(nir_block *);157bool visit(nir_cf_node *);158bool visit(nir_function *);159bool visit(nir_if *);160bool visit(nir_instr *);161bool visit(nir_intrinsic_instr *);162bool visit(nir_jump_instr *);163bool visit(nir_load_const_instr*);164bool visit(nir_loop *);165bool visit(nir_ssa_undef_instr *);166bool visit(nir_tex_instr *);167168// tex stuff169Value* applyProjection(Value *src, Value *proj);170unsigned int getNIRArgCount(TexInstruction::Target&);171172nir_shader *nir;173174NirDefMap ssaDefs;175NirDefMap regDefs;176ImmediateMap immediates;177NirBlockMap blocks;178unsigned int curLoopDepth;179unsigned int curIfDepth;180181BasicBlock *exit;182Value *zero;183Instruction *immInsertPos;184185int clipVertexOutput;186187union {188struct {189Value *position;190} fp;191};192};193194Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info,195nv50_ir_prog_info_out *info_out)196: ConverterCommon(prog, info, info_out),197nir(nir),198curLoopDepth(0),199curIfDepth(0),200exit(NULL),201immInsertPos(NULL),202clipVertexOutput(-1)203{204zero = mkImm((uint32_t)0);205}206207BasicBlock *208Converter::convert(nir_block *block)209{210NirBlockMap::iterator it = blocks.find(block->index);211if (it != blocks.end())212return it->second;213214BasicBlock *bb = new BasicBlock(func);215blocks[block->index] = bb;216return bb;217}218219bool220Converter::isFloatType(nir_alu_type type)221{222return nir_alu_type_get_base_type(type) == nir_type_float;223}224225bool226Converter::isSignedType(nir_alu_type type)227{228return nir_alu_type_get_base_type(type) == nir_type_int;229}230231bool232Converter::isResultFloat(nir_op op)233{234const nir_op_info &info = nir_op_infos[op];235if (info.output_type != nir_type_invalid)236return isFloatType(info.output_type);237238ERROR("isResultFloat not implemented for %s\n", nir_op_infos[op].name);239assert(false);240return true;241}242243bool244Converter::isResultSigned(nir_op op)245{246switch (op) {247// there is no umul and we get wrong results if we treat all muls as signed248case nir_op_imul:249case nir_op_inot:250return false;251default:252const nir_op_info &info = nir_op_infos[op];253if (info.output_type != nir_type_invalid)254return isSignedType(info.output_type);255ERROR("isResultSigned not implemented for %s\n", nir_op_infos[op].name);256assert(false);257return true;258}259}260261DataType262Converter::getDType(nir_alu_instr *insn)263{264if (insn->dest.dest.is_ssa)265return getDType(insn->op, insn->dest.dest.ssa.bit_size);266else267return getDType(insn->op, insn->dest.dest.reg.reg->bit_size);268}269270DataType271Converter::getDType(nir_intrinsic_instr *insn)272{273bool isSigned;274switch (insn->intrinsic) {275case nir_intrinsic_shared_atomic_imax:276case nir_intrinsic_shared_atomic_imin:277case nir_intrinsic_ssbo_atomic_imax:278case nir_intrinsic_ssbo_atomic_imin:279isSigned = true;280break;281default:282isSigned = false;283break;284}285286return getDType(insn, isSigned);287}288289DataType290Converter::getDType(nir_intrinsic_instr *insn, bool isSigned)291{292if (insn->dest.is_ssa)293return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned);294else295return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned);296}297298DataType299Converter::getDType(nir_op op, uint8_t bitSize)300{301DataType ty = typeOfSize(bitSize / 8, isResultFloat(op), isResultSigned(op));302if (ty == TYPE_NONE) {303ERROR("couldn't get Type for op %s with bitSize %u\n", nir_op_infos[op].name, bitSize);304assert(false);305}306return ty;307}308309std::vector<DataType>310Converter::getSTypes(nir_alu_instr *insn)311{312const nir_op_info &info = nir_op_infos[insn->op];313std::vector<DataType> res(info.num_inputs);314315for (uint8_t i = 0; i < info.num_inputs; ++i) {316if (info.input_types[i] != nir_type_invalid) {317res[i] = getSType(insn->src[i].src, isFloatType(info.input_types[i]), isSignedType(info.input_types[i]));318} else {319ERROR("getSType not implemented for %s idx %u\n", info.name, i);320assert(false);321res[i] = TYPE_NONE;322break;323}324}325326return res;327}328329DataType330Converter::getSType(nir_src &src, bool isFloat, bool isSigned)331{332uint8_t bitSize;333if (src.is_ssa)334bitSize = src.ssa->bit_size;335else336bitSize = src.reg.reg->bit_size;337338DataType ty = typeOfSize(bitSize / 8, isFloat, isSigned);339if (ty == TYPE_NONE) {340const char *str;341if (isFloat)342str = "float";343else if (isSigned)344str = "int";345else346str = "uint";347ERROR("couldn't get Type for %s with bitSize %u\n", str, bitSize);348assert(false);349}350return ty;351}352353DataFile354Converter::getFile(nir_intrinsic_op op)355{356switch (op) {357case nir_intrinsic_load_global:358case nir_intrinsic_store_global:359case nir_intrinsic_load_global_constant:360return FILE_MEMORY_GLOBAL;361case nir_intrinsic_load_scratch:362case nir_intrinsic_store_scratch:363return FILE_MEMORY_LOCAL;364case nir_intrinsic_load_shared:365case nir_intrinsic_store_shared:366return FILE_MEMORY_SHARED;367case nir_intrinsic_load_kernel_input:368return FILE_SHADER_INPUT;369default:370ERROR("couldn't get DateFile for op %s\n", nir_intrinsic_infos[op].name);371assert(false);372}373return FILE_NULL;374}375376operation377Converter::getOperation(nir_op op)378{379switch (op) {380// basic ops with float and int variants381case nir_op_fabs:382case nir_op_iabs:383return OP_ABS;384case nir_op_fadd:385case nir_op_iadd:386return OP_ADD;387case nir_op_iand:388return OP_AND;389case nir_op_ifind_msb:390case nir_op_ufind_msb:391return OP_BFIND;392case nir_op_fceil:393return OP_CEIL;394case nir_op_fcos:395return OP_COS;396case nir_op_f2f32:397case nir_op_f2f64:398case nir_op_f2i32:399case nir_op_f2i64:400case nir_op_f2u32:401case nir_op_f2u64:402case nir_op_i2f32:403case nir_op_i2f64:404case nir_op_i2i32:405case nir_op_i2i64:406case nir_op_u2f32:407case nir_op_u2f64:408case nir_op_u2u32:409case nir_op_u2u64:410return OP_CVT;411case nir_op_fddx:412case nir_op_fddx_coarse:413case nir_op_fddx_fine:414return OP_DFDX;415case nir_op_fddy:416case nir_op_fddy_coarse:417case nir_op_fddy_fine:418return OP_DFDY;419case nir_op_fdiv:420case nir_op_idiv:421case nir_op_udiv:422return OP_DIV;423case nir_op_fexp2:424return OP_EX2;425case nir_op_ffloor:426return OP_FLOOR;427case nir_op_ffma:428return OP_FMA;429case nir_op_flog2:430return OP_LG2;431case nir_op_fmax:432case nir_op_imax:433case nir_op_umax:434return OP_MAX;435case nir_op_pack_64_2x32_split:436return OP_MERGE;437case nir_op_fmin:438case nir_op_imin:439case nir_op_umin:440return OP_MIN;441case nir_op_fmod:442case nir_op_imod:443case nir_op_umod:444case nir_op_frem:445case nir_op_irem:446return OP_MOD;447case nir_op_fmul:448case nir_op_imul:449case nir_op_imul_high:450case nir_op_umul_high:451return OP_MUL;452case nir_op_fneg:453case nir_op_ineg:454return OP_NEG;455case nir_op_inot:456return OP_NOT;457case nir_op_ior:458return OP_OR;459case nir_op_fpow:460return OP_POW;461case nir_op_frcp:462return OP_RCP;463case nir_op_frsq:464return OP_RSQ;465case nir_op_fsat:466return OP_SAT;467case nir_op_feq32:468case nir_op_ieq32:469case nir_op_fge32:470case nir_op_ige32:471case nir_op_uge32:472case nir_op_flt32:473case nir_op_ilt32:474case nir_op_ult32:475case nir_op_fneu32:476case nir_op_ine32:477return OP_SET;478case nir_op_ishl:479return OP_SHL;480case nir_op_ishr:481case nir_op_ushr:482return OP_SHR;483case nir_op_fsin:484return OP_SIN;485case nir_op_fsqrt:486return OP_SQRT;487case nir_op_ftrunc:488return OP_TRUNC;489case nir_op_ixor:490return OP_XOR;491default:492ERROR("couldn't get operation for op %s\n", nir_op_infos[op].name);493assert(false);494return OP_NOP;495}496}497498operation499Converter::getOperation(nir_texop op)500{501switch (op) {502case nir_texop_tex:503return OP_TEX;504case nir_texop_lod:505return OP_TXLQ;506case nir_texop_txb:507return OP_TXB;508case nir_texop_txd:509return OP_TXD;510case nir_texop_txf:511case nir_texop_txf_ms:512return OP_TXF;513case nir_texop_tg4:514return OP_TXG;515case nir_texop_txl:516return OP_TXL;517case nir_texop_query_levels:518case nir_texop_texture_samples:519case nir_texop_txs:520return OP_TXQ;521default:522ERROR("couldn't get operation for nir_texop %u\n", op);523assert(false);524return OP_NOP;525}526}527528operation529Converter::getOperation(nir_intrinsic_op op)530{531switch (op) {532case nir_intrinsic_emit_vertex:533return OP_EMIT;534case nir_intrinsic_end_primitive:535return OP_RESTART;536case nir_intrinsic_bindless_image_atomic_add:537case nir_intrinsic_image_atomic_add:538case nir_intrinsic_bindless_image_atomic_and:539case nir_intrinsic_image_atomic_and:540case nir_intrinsic_bindless_image_atomic_comp_swap:541case nir_intrinsic_image_atomic_comp_swap:542case nir_intrinsic_bindless_image_atomic_exchange:543case nir_intrinsic_image_atomic_exchange:544case nir_intrinsic_bindless_image_atomic_imax:545case nir_intrinsic_image_atomic_imax:546case nir_intrinsic_bindless_image_atomic_umax:547case nir_intrinsic_image_atomic_umax:548case nir_intrinsic_bindless_image_atomic_imin:549case nir_intrinsic_image_atomic_imin:550case nir_intrinsic_bindless_image_atomic_umin:551case nir_intrinsic_image_atomic_umin:552case nir_intrinsic_bindless_image_atomic_or:553case nir_intrinsic_image_atomic_or:554case nir_intrinsic_bindless_image_atomic_xor:555case nir_intrinsic_image_atomic_xor:556case nir_intrinsic_bindless_image_atomic_inc_wrap:557case nir_intrinsic_image_atomic_inc_wrap:558case nir_intrinsic_bindless_image_atomic_dec_wrap:559case nir_intrinsic_image_atomic_dec_wrap:560return OP_SUREDP;561case nir_intrinsic_bindless_image_load:562case nir_intrinsic_image_load:563return OP_SULDP;564case nir_intrinsic_bindless_image_samples:565case nir_intrinsic_image_samples:566case nir_intrinsic_bindless_image_size:567case nir_intrinsic_image_size:568return OP_SUQ;569case nir_intrinsic_bindless_image_store:570case nir_intrinsic_image_store:571return OP_SUSTP;572default:573ERROR("couldn't get operation for nir_intrinsic_op %u\n", op);574assert(false);575return OP_NOP;576}577}578579operation580Converter::preOperationNeeded(nir_op op)581{582switch (op) {583case nir_op_fcos:584case nir_op_fsin:585return OP_PRESIN;586default:587return OP_NOP;588}589}590591int592Converter::getSubOp(nir_op op)593{594switch (op) {595case nir_op_imul_high:596case nir_op_umul_high:597return NV50_IR_SUBOP_MUL_HIGH;598case nir_op_ishl:599case nir_op_ishr:600case nir_op_ushr:601return NV50_IR_SUBOP_SHIFT_WRAP;602default:603return 0;604}605}606607int608Converter::getSubOp(nir_intrinsic_op op)609{610switch (op) {611case nir_intrinsic_bindless_image_atomic_add:612case nir_intrinsic_global_atomic_add:613case nir_intrinsic_image_atomic_add:614case nir_intrinsic_shared_atomic_add:615case nir_intrinsic_ssbo_atomic_add:616return NV50_IR_SUBOP_ATOM_ADD;617case nir_intrinsic_bindless_image_atomic_and:618case nir_intrinsic_global_atomic_and:619case nir_intrinsic_image_atomic_and:620case nir_intrinsic_shared_atomic_and:621case nir_intrinsic_ssbo_atomic_and:622return NV50_IR_SUBOP_ATOM_AND;623case nir_intrinsic_bindless_image_atomic_comp_swap:624case nir_intrinsic_global_atomic_comp_swap:625case nir_intrinsic_image_atomic_comp_swap:626case nir_intrinsic_shared_atomic_comp_swap:627case nir_intrinsic_ssbo_atomic_comp_swap:628return NV50_IR_SUBOP_ATOM_CAS;629case nir_intrinsic_bindless_image_atomic_exchange:630case nir_intrinsic_global_atomic_exchange:631case nir_intrinsic_image_atomic_exchange:632case nir_intrinsic_shared_atomic_exchange:633case nir_intrinsic_ssbo_atomic_exchange:634return NV50_IR_SUBOP_ATOM_EXCH;635case nir_intrinsic_bindless_image_atomic_or:636case nir_intrinsic_global_atomic_or:637case nir_intrinsic_image_atomic_or:638case nir_intrinsic_shared_atomic_or:639case nir_intrinsic_ssbo_atomic_or:640return NV50_IR_SUBOP_ATOM_OR;641case nir_intrinsic_bindless_image_atomic_imax:642case nir_intrinsic_bindless_image_atomic_umax:643case nir_intrinsic_global_atomic_imax:644case nir_intrinsic_global_atomic_umax:645case nir_intrinsic_image_atomic_imax:646case nir_intrinsic_image_atomic_umax:647case nir_intrinsic_shared_atomic_imax:648case nir_intrinsic_shared_atomic_umax:649case nir_intrinsic_ssbo_atomic_imax:650case nir_intrinsic_ssbo_atomic_umax:651return NV50_IR_SUBOP_ATOM_MAX;652case nir_intrinsic_bindless_image_atomic_imin:653case nir_intrinsic_bindless_image_atomic_umin:654case nir_intrinsic_global_atomic_imin:655case nir_intrinsic_global_atomic_umin:656case nir_intrinsic_image_atomic_imin:657case nir_intrinsic_image_atomic_umin:658case nir_intrinsic_shared_atomic_imin:659case nir_intrinsic_shared_atomic_umin:660case nir_intrinsic_ssbo_atomic_imin:661case nir_intrinsic_ssbo_atomic_umin:662return NV50_IR_SUBOP_ATOM_MIN;663case nir_intrinsic_bindless_image_atomic_xor:664case nir_intrinsic_global_atomic_xor:665case nir_intrinsic_image_atomic_xor:666case nir_intrinsic_shared_atomic_xor:667case nir_intrinsic_ssbo_atomic_xor:668return NV50_IR_SUBOP_ATOM_XOR;669case nir_intrinsic_bindless_image_atomic_inc_wrap:670case nir_intrinsic_image_atomic_inc_wrap:671return NV50_IR_SUBOP_ATOM_INC;672case nir_intrinsic_bindless_image_atomic_dec_wrap:673case nir_intrinsic_image_atomic_dec_wrap:674return NV50_IR_SUBOP_ATOM_DEC;675676case nir_intrinsic_group_memory_barrier:677case nir_intrinsic_memory_barrier:678case nir_intrinsic_memory_barrier_buffer:679case nir_intrinsic_memory_barrier_image:680return NV50_IR_SUBOP_MEMBAR(M, GL);681case nir_intrinsic_memory_barrier_shared:682return NV50_IR_SUBOP_MEMBAR(M, CTA);683684case nir_intrinsic_vote_all:685return NV50_IR_SUBOP_VOTE_ALL;686case nir_intrinsic_vote_any:687return NV50_IR_SUBOP_VOTE_ANY;688case nir_intrinsic_vote_ieq:689return NV50_IR_SUBOP_VOTE_UNI;690default:691return 0;692}693}694695CondCode696Converter::getCondCode(nir_op op)697{698switch (op) {699case nir_op_feq32:700case nir_op_ieq32:701return CC_EQ;702case nir_op_fge32:703case nir_op_ige32:704case nir_op_uge32:705return CC_GE;706case nir_op_flt32:707case nir_op_ilt32:708case nir_op_ult32:709return CC_LT;710case nir_op_fneu32:711return CC_NEU;712case nir_op_ine32:713return CC_NE;714default:715ERROR("couldn't get CondCode for op %s\n", nir_op_infos[op].name);716assert(false);717return CC_FL;718}719}720721Converter::LValues&722Converter::convert(nir_alu_dest *dest)723{724return convert(&dest->dest);725}726727Converter::LValues&728Converter::convert(nir_dest *dest)729{730if (dest->is_ssa)731return convert(&dest->ssa);732if (dest->reg.indirect) {733ERROR("no support for indirects.");734assert(false);735}736return convert(dest->reg.reg);737}738739Converter::LValues&740Converter::convert(nir_register *reg)741{742assert(!reg->num_array_elems);743744NirDefMap::iterator it = regDefs.find(reg->index);745if (it != regDefs.end())746return it->second;747748LValues newDef(reg->num_components);749for (uint8_t i = 0; i < reg->num_components; i++)750newDef[i] = getScratch(std::max(4, reg->bit_size / 8));751return regDefs[reg->index] = newDef;752}753754Converter::LValues&755Converter::convert(nir_ssa_def *def)756{757NirDefMap::iterator it = ssaDefs.find(def->index);758if (it != ssaDefs.end())759return it->second;760761LValues newDef(def->num_components);762for (uint8_t i = 0; i < def->num_components; i++)763newDef[i] = getSSA(std::max(4, def->bit_size / 8));764return ssaDefs[def->index] = newDef;765}766767Value*768Converter::getSrc(nir_alu_src *src, uint8_t component)769{770if (src->abs || src->negate) {771ERROR("modifiers currently not supported on nir_alu_src\n");772assert(false);773}774return getSrc(&src->src, src->swizzle[component]);775}776777Value*778Converter::getSrc(nir_register *reg, uint8_t idx)779{780NirDefMap::iterator it = regDefs.find(reg->index);781if (it == regDefs.end())782return convert(reg)[idx];783return it->second[idx];784}785786Value*787Converter::getSrc(nir_src *src, uint8_t idx, bool indirect)788{789if (src->is_ssa)790return getSrc(src->ssa, idx);791792if (src->reg.indirect) {793if (indirect)794return getSrc(src->reg.indirect, idx);795ERROR("no support for indirects.");796assert(false);797return NULL;798}799800return getSrc(src->reg.reg, idx);801}802803Value*804Converter::getSrc(nir_ssa_def *src, uint8_t idx)805{806ImmediateMap::iterator iit = immediates.find(src->index);807if (iit != immediates.end())808return convert((*iit).second, idx);809810NirDefMap::iterator it = ssaDefs.find(src->index);811if (it == ssaDefs.end()) {812ERROR("SSA value %u not found\n", src->index);813assert(false);814return NULL;815}816return it->second[idx];817}818819uint32_t820Converter::getIndirect(nir_src *src, uint8_t idx, Value *&indirect)821{822nir_const_value *offset = nir_src_as_const_value(*src);823824if (offset) {825indirect = NULL;826return offset[0].u32;827}828829indirect = getSrc(src, idx, true);830return 0;831}832833uint32_t834Converter::getIndirect(nir_intrinsic_instr *insn, uint8_t s, uint8_t c, Value *&indirect, bool isScalar)835{836int32_t idx = nir_intrinsic_base(insn) + getIndirect(&insn->src[s], c, indirect);837if (indirect && !isScalar)838indirect = mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), indirect, loadImm(NULL, 4));839return idx;840}841842static void843vert_attrib_to_tgsi_semantic(gl_vert_attrib slot, unsigned *name, unsigned *index)844{845assert(name && index);846847if (slot >= VERT_ATTRIB_MAX) {848ERROR("invalid varying slot %u\n", slot);849assert(false);850return;851}852853if (slot >= VERT_ATTRIB_GENERIC0 &&854slot < VERT_ATTRIB_GENERIC0 + VERT_ATTRIB_GENERIC_MAX) {855*name = TGSI_SEMANTIC_GENERIC;856*index = slot - VERT_ATTRIB_GENERIC0;857return;858}859860if (slot >= VERT_ATTRIB_TEX0 &&861slot < VERT_ATTRIB_TEX0 + VERT_ATTRIB_TEX_MAX) {862*name = TGSI_SEMANTIC_TEXCOORD;863*index = slot - VERT_ATTRIB_TEX0;864return;865}866867switch (slot) {868case VERT_ATTRIB_COLOR0:869*name = TGSI_SEMANTIC_COLOR;870*index = 0;871break;872case VERT_ATTRIB_COLOR1:873*name = TGSI_SEMANTIC_COLOR;874*index = 1;875break;876case VERT_ATTRIB_EDGEFLAG:877*name = TGSI_SEMANTIC_EDGEFLAG;878*index = 0;879break;880case VERT_ATTRIB_FOG:881*name = TGSI_SEMANTIC_FOG;882*index = 0;883break;884case VERT_ATTRIB_NORMAL:885*name = TGSI_SEMANTIC_NORMAL;886*index = 0;887break;888case VERT_ATTRIB_POS:889*name = TGSI_SEMANTIC_POSITION;890*index = 0;891break;892case VERT_ATTRIB_POINT_SIZE:893*name = TGSI_SEMANTIC_PSIZE;894*index = 0;895break;896default:897ERROR("unknown vert attrib slot %u\n", slot);898assert(false);899break;900}901}902903void904Converter::setInterpolate(nv50_ir_varying *var,905uint8_t mode,906bool centroid,907unsigned semantic)908{909switch (mode) {910case INTERP_MODE_FLAT:911var->flat = 1;912break;913case INTERP_MODE_NONE:914if (semantic == TGSI_SEMANTIC_COLOR)915var->sc = 1;916else if (semantic == TGSI_SEMANTIC_POSITION)917var->linear = 1;918break;919case INTERP_MODE_NOPERSPECTIVE:920var->linear = 1;921break;922case INTERP_MODE_SMOOTH:923break;924}925var->centroid = centroid;926}927928static uint16_t929calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,930bool input, const nir_variable *var)931{932if (!type->is_array())933return type->count_attribute_slots(false);934935uint16_t slots;936switch (stage) {937case Program::TYPE_GEOMETRY:938slots = type->count_attribute_slots(false);939if (input)940slots /= info.gs.vertices_in;941break;942case Program::TYPE_TESSELLATION_CONTROL:943case Program::TYPE_TESSELLATION_EVAL:944// remove first dimension945if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))946slots = type->count_attribute_slots(false);947else948slots = type->fields.array->count_attribute_slots(false);949break;950default:951slots = type->count_attribute_slots(false);952break;953}954955return slots;956}957958static uint8_t959getMaskForType(const glsl_type *type, uint8_t slot) {960uint16_t comp = type->without_array()->components();961comp = comp ? comp : 4;962963if (glsl_base_type_is_64bit(type->without_array()->base_type)) {964comp *= 2;965if (comp > 4) {966if (slot % 2)967comp -= 4;968else969comp = 4;970}971}972973return (1 << comp) - 1;974}975976bool Converter::assignSlots() {977unsigned name;978unsigned index;979980info->io.viewportId = -1;981info_out->numInputs = 0;982info_out->numOutputs = 0;983info_out->numSysVals = 0;984985uint8_t i;986BITSET_FOREACH_SET(i, nir->info.system_values_read, SYSTEM_VALUE_MAX) {987info_out->sv[info_out->numSysVals].sn = tgsi_get_sysval_semantic(i);988info_out->sv[info_out->numSysVals].si = 0;989info_out->sv[info_out->numSysVals].input = 0; // TODO inferSysValDirection(sn);990991switch (i) {992case SYSTEM_VALUE_INSTANCE_ID:993info_out->io.instanceId = info_out->numSysVals;994break;995case SYSTEM_VALUE_TESS_LEVEL_INNER:996case SYSTEM_VALUE_TESS_LEVEL_OUTER:997info_out->sv[info_out->numSysVals].patch = 1;998break;999case SYSTEM_VALUE_VERTEX_ID:1000info_out->io.vertexId = info_out->numSysVals;1001break;1002default:1003break;1004}10051006info_out->numSysVals += 1;1007}10081009if (prog->getType() == Program::TYPE_COMPUTE)1010return true;10111012nir_foreach_shader_in_variable(var, nir) {1013const glsl_type *type = var->type;1014int slot = var->data.location;1015uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);1016uint32_t vary = var->data.driver_location;10171018assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);10191020switch(prog->getType()) {1021case Program::TYPE_FRAGMENT:1022tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,1023&name, &index);1024for (uint16_t i = 0; i < slots; ++i) {1025setInterpolate(&info_out->in[vary + i], var->data.interpolation,1026var->data.centroid | var->data.sample, name);1027}1028break;1029case Program::TYPE_GEOMETRY:1030tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,1031&name, &index);1032break;1033case Program::TYPE_TESSELLATION_CONTROL:1034case Program::TYPE_TESSELLATION_EVAL:1035tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,1036&name, &index);1037if (var->data.patch && name == TGSI_SEMANTIC_PATCH)1038info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);1039break;1040case Program::TYPE_VERTEX:1041if (slot >= VERT_ATTRIB_GENERIC0)1042slot = VERT_ATTRIB_GENERIC0 + vary;1043vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);1044switch (name) {1045case TGSI_SEMANTIC_EDGEFLAG:1046info_out->io.edgeFlagIn = vary;1047break;1048default:1049break;1050}1051break;1052default:1053ERROR("unknown shader type %u in assignSlots\n", prog->getType());1054return false;1055}10561057for (uint16_t i = 0u; i < slots; ++i, ++vary) {1058nv50_ir_varying *v = &info_out->in[vary];10591060v->patch = var->data.patch;1061v->sn = name;1062v->si = index + i;1063v->mask |= getMaskForType(type, i) << var->data.location_frac;1064}1065info_out->numInputs = std::max<uint8_t>(info_out->numInputs, vary);1066}10671068nir_foreach_shader_out_variable(var, nir) {1069const glsl_type *type = var->type;1070int slot = var->data.location;1071uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);1072uint32_t vary = var->data.driver_location;10731074assert(vary < PIPE_MAX_SHADER_OUTPUTS);10751076switch(prog->getType()) {1077case Program::TYPE_FRAGMENT:1078tgsi_get_gl_frag_result_semantic((gl_frag_result)slot, &name, &index);1079switch (name) {1080case TGSI_SEMANTIC_COLOR:1081if (!var->data.fb_fetch_output)1082info_out->prop.fp.numColourResults++;1083if (var->data.location == FRAG_RESULT_COLOR &&1084nir->info.outputs_written & BITFIELD64_BIT(var->data.location))1085info_out->prop.fp.separateFragData = true;1086// sometimes we get FRAG_RESULT_DATAX with data.index 01087// sometimes we get FRAG_RESULT_DATA0 with data.index X1088index = index == 0 ? var->data.index : index;1089break;1090case TGSI_SEMANTIC_POSITION:1091info_out->io.fragDepth = vary;1092info_out->prop.fp.writesDepth = true;1093break;1094case TGSI_SEMANTIC_SAMPLEMASK:1095info_out->io.sampleMask = vary;1096break;1097default:1098break;1099}1100break;1101case Program::TYPE_GEOMETRY:1102case Program::TYPE_TESSELLATION_CONTROL:1103case Program::TYPE_TESSELLATION_EVAL:1104case Program::TYPE_VERTEX:1105tgsi_get_gl_varying_semantic((gl_varying_slot)slot, true,1106&name, &index);11071108if (var->data.patch && name != TGSI_SEMANTIC_TESSINNER &&1109name != TGSI_SEMANTIC_TESSOUTER)1110info_out->numPatchConstants = MAX2(info_out->numPatchConstants, index + slots);11111112switch (name) {1113case TGSI_SEMANTIC_CLIPDIST:1114info_out->io.genUserClip = -1;1115break;1116case TGSI_SEMANTIC_CLIPVERTEX:1117clipVertexOutput = vary;1118break;1119case TGSI_SEMANTIC_EDGEFLAG:1120info_out->io.edgeFlagOut = vary;1121break;1122case TGSI_SEMANTIC_POSITION:1123if (clipVertexOutput < 0)1124clipVertexOutput = vary;1125break;1126default:1127break;1128}1129break;1130default:1131ERROR("unknown shader type %u in assignSlots\n", prog->getType());1132return false;1133}11341135for (uint16_t i = 0u; i < slots; ++i, ++vary) {1136nv50_ir_varying *v = &info_out->out[vary];1137v->patch = var->data.patch;1138v->sn = name;1139v->si = index + i;1140v->mask |= getMaskForType(type, i) << var->data.location_frac;11411142if (nir->info.outputs_read & 1ull << slot)1143v->oread = 1;1144}1145info_out->numOutputs = std::max<uint8_t>(info_out->numOutputs, vary);1146}11471148if (info_out->io.genUserClip > 0) {1149info_out->io.clipDistances = info_out->io.genUserClip;11501151const unsigned int nOut = (info_out->io.genUserClip + 3) / 4;11521153for (unsigned int n = 0; n < nOut; ++n) {1154unsigned int i = info_out->numOutputs++;1155info_out->out[i].id = i;1156info_out->out[i].sn = TGSI_SEMANTIC_CLIPDIST;1157info_out->out[i].si = n;1158info_out->out[i].mask = ((1 << info_out->io.clipDistances) - 1) >> (n * 4);1159}1160}11611162return info->assignSlots(info_out) == 0;1163}11641165uint32_t1166Converter::getSlotAddress(nir_intrinsic_instr *insn, uint8_t idx, uint8_t slot)1167{1168DataType ty;1169int offset = nir_intrinsic_component(insn);1170bool input;11711172if (nir_intrinsic_infos[insn->intrinsic].has_dest)1173ty = getDType(insn);1174else1175ty = getSType(insn->src[0], false, false);11761177switch (insn->intrinsic) {1178case nir_intrinsic_load_input:1179case nir_intrinsic_load_interpolated_input:1180case nir_intrinsic_load_per_vertex_input:1181input = true;1182break;1183case nir_intrinsic_load_output:1184case nir_intrinsic_load_per_vertex_output:1185case nir_intrinsic_store_output:1186case nir_intrinsic_store_per_vertex_output:1187input = false;1188break;1189default:1190ERROR("unknown intrinsic in getSlotAddress %s",1191nir_intrinsic_infos[insn->intrinsic].name);1192input = false;1193assert(false);1194break;1195}11961197if (typeSizeof(ty) == 8) {1198slot *= 2;1199slot += offset;1200if (slot >= 4) {1201idx += 1;1202slot -= 4;1203}1204} else {1205slot += offset;1206}12071208assert(slot < 4);1209assert(!input || idx < PIPE_MAX_SHADER_INPUTS);1210assert(input || idx < PIPE_MAX_SHADER_OUTPUTS);12111212const nv50_ir_varying *vary = input ? info_out->in : info_out->out;1213return vary[idx].slot[slot] * 4;1214}12151216Instruction *1217Converter::loadFrom(DataFile file, uint8_t i, DataType ty, Value *def,1218uint32_t base, uint8_t c, Value *indirect0,1219Value *indirect1, bool patch)1220{1221unsigned int tySize = typeSizeof(ty);12221223if (tySize == 8 &&1224(file == FILE_MEMORY_CONST || file == FILE_MEMORY_BUFFER || indirect0)) {1225Value *lo = getSSA();1226Value *hi = getSSA();12271228Instruction *loi =1229mkLoad(TYPE_U32, lo,1230mkSymbol(file, i, TYPE_U32, base + c * tySize),1231indirect0);1232loi->setIndirect(0, 1, indirect1);1233loi->perPatch = patch;12341235Instruction *hii =1236mkLoad(TYPE_U32, hi,1237mkSymbol(file, i, TYPE_U32, base + c * tySize + 4),1238indirect0);1239hii->setIndirect(0, 1, indirect1);1240hii->perPatch = patch;12411242return mkOp2(OP_MERGE, ty, def, lo, hi);1243} else {1244Instruction *ld =1245mkLoad(ty, def, mkSymbol(file, i, ty, base + c * tySize), indirect0);1246ld->setIndirect(0, 1, indirect1);1247ld->perPatch = patch;1248return ld;1249}1250}12511252void1253Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op,1254DataType ty, Value *src, uint8_t idx, uint8_t c,1255Value *indirect0, Value *indirect1)1256{1257uint8_t size = typeSizeof(ty);1258uint32_t address = getSlotAddress(insn, idx, c);12591260if (size == 8 && indirect0) {1261Value *split[2];1262mkSplit(split, 4, src);12631264if (op == OP_EXPORT) {1265split[0] = mkMov(getSSA(), split[0], ty)->getDef(0);1266split[1] = mkMov(getSSA(), split[1], ty)->getDef(0);1267}12681269mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address), indirect0,1270split[0])->perPatch = info_out->out[idx].patch;1271mkStore(op, TYPE_U32, mkSymbol(file, 0, TYPE_U32, address + 4), indirect0,1272split[1])->perPatch = info_out->out[idx].patch;1273} else {1274if (op == OP_EXPORT)1275src = mkMov(getSSA(size), src, ty)->getDef(0);1276mkStore(op, ty, mkSymbol(file, 0, ty, address), indirect0,1277src)->perPatch = info_out->out[idx].patch;1278}1279}12801281bool1282Converter::parseNIR()1283{1284info_out->bin.tlsSpace = nir->scratch_size;1285info_out->io.clipDistances = nir->info.clip_distance_array_size;1286info_out->io.cullDistances = nir->info.cull_distance_array_size;1287info_out->io.layer_viewport_relative = nir->info.layer_viewport_relative;12881289switch(prog->getType()) {1290case Program::TYPE_COMPUTE:1291info->prop.cp.numThreads[0] = nir->info.workgroup_size[0];1292info->prop.cp.numThreads[1] = nir->info.workgroup_size[1];1293info->prop.cp.numThreads[2] = nir->info.workgroup_size[2];1294info_out->bin.smemSize = std::max(info_out->bin.smemSize, nir->info.shared_size);1295break;1296case Program::TYPE_FRAGMENT:1297info_out->prop.fp.earlyFragTests = nir->info.fs.early_fragment_tests;1298prog->persampleInvocation =1299BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||1300BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);1301info_out->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;1302info_out->prop.fp.readsSampleLocations =1303BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);1304info_out->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;1305info_out->prop.fp.usesSampleMaskIn =1306!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);1307break;1308case Program::TYPE_GEOMETRY:1309info_out->prop.gp.instanceCount = nir->info.gs.invocations;1310info_out->prop.gp.maxVertices = nir->info.gs.vertices_out;1311info_out->prop.gp.outputPrim = nir->info.gs.output_primitive;1312break;1313case Program::TYPE_TESSELLATION_CONTROL:1314case Program::TYPE_TESSELLATION_EVAL:1315if (nir->info.tess.primitive_mode == GL_ISOLINES)1316info_out->prop.tp.domain = GL_LINES;1317else1318info_out->prop.tp.domain = nir->info.tess.primitive_mode;1319info_out->prop.tp.outputPatchSize = nir->info.tess.tcs_vertices_out;1320info_out->prop.tp.outputPrim =1321nir->info.tess.point_mode ? PIPE_PRIM_POINTS : PIPE_PRIM_TRIANGLES;1322info_out->prop.tp.partitioning = (nir->info.tess.spacing + 1) % 3;1323info_out->prop.tp.winding = !nir->info.tess.ccw;1324break;1325case Program::TYPE_VERTEX:1326info_out->prop.vp.usesDrawParameters =1327BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX) ||1328BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) ||1329BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);1330break;1331default:1332break;1333}13341335return true;1336}13371338bool1339Converter::visit(nir_function *function)1340{1341assert(function->impl);13421343// usually the blocks will set everything up, but main is special1344BasicBlock *entry = new BasicBlock(prog->main);1345exit = new BasicBlock(prog->main);1346blocks[nir_start_block(function->impl)->index] = entry;1347prog->main->setEntry(entry);1348prog->main->setExit(exit);13491350setPosition(entry, true);13511352if (info_out->io.genUserClip > 0) {1353for (int c = 0; c < 4; ++c)1354clipVtx[c] = getScratch();1355}13561357switch (prog->getType()) {1358case Program::TYPE_TESSELLATION_CONTROL:1359outBase = mkOp2v(1360OP_SUB, TYPE_U32, getSSA(),1361mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),1362mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));1363break;1364case Program::TYPE_FRAGMENT: {1365Symbol *sv = mkSysVal(SV_POSITION, 3);1366fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);1367fp.position = mkOp1v(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);1368break;1369}1370default:1371break;1372}13731374nir_index_ssa_defs(function->impl);1375foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {1376if (!visit(node))1377return false;1378}13791380bb->cfg.attach(&exit->cfg, Graph::Edge::TREE);1381setPosition(exit, true);13821383if ((prog->getType() == Program::TYPE_VERTEX ||1384prog->getType() == Program::TYPE_TESSELLATION_EVAL)1385&& info_out->io.genUserClip > 0)1386handleUserClipPlanes();13871388// TODO: for non main function this needs to be a OP_RETURN1389mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;1390return true;1391}13921393bool1394Converter::visit(nir_cf_node *node)1395{1396switch (node->type) {1397case nir_cf_node_block:1398return visit(nir_cf_node_as_block(node));1399case nir_cf_node_if:1400return visit(nir_cf_node_as_if(node));1401case nir_cf_node_loop:1402return visit(nir_cf_node_as_loop(node));1403default:1404ERROR("unknown nir_cf_node type %u\n", node->type);1405return false;1406}1407}14081409bool1410Converter::visit(nir_block *block)1411{1412if (!block->predecessors->entries && block->instr_list.is_empty())1413return true;14141415BasicBlock *bb = convert(block);14161417setPosition(bb, true);1418nir_foreach_instr(insn, block) {1419if (!visit(insn))1420return false;1421}1422return true;1423}14241425bool1426Converter::visit(nir_if *nif)1427{1428curIfDepth++;14291430DataType sType = getSType(nif->condition, false, false);1431Value *src = getSrc(&nif->condition, 0);14321433nir_block *lastThen = nir_if_last_then_block(nif);1434nir_block *lastElse = nir_if_last_else_block(nif);14351436BasicBlock *headBB = bb;1437BasicBlock *ifBB = convert(nir_if_first_then_block(nif));1438BasicBlock *elseBB = convert(nir_if_first_else_block(nif));14391440bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);1441bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);14421443bool insertJoins = lastThen->successors[0] == lastElse->successors[0];1444mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);14451446foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {1447if (!visit(node))1448return false;1449}14501451setPosition(convert(lastThen), true);1452if (!bb->isTerminated()) {1453BasicBlock *tailBB = convert(lastThen->successors[0]);1454mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);1455bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);1456} else {1457insertJoins = insertJoins && bb->getExit()->op == OP_BRA;1458}14591460foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {1461if (!visit(node))1462return false;1463}14641465setPosition(convert(lastElse), true);1466if (!bb->isTerminated()) {1467BasicBlock *tailBB = convert(lastElse->successors[0]);1468mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);1469bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);1470} else {1471insertJoins = insertJoins && bb->getExit()->op == OP_BRA;1472}14731474/* only insert joins for the most outer if */1475if (--curIfDepth)1476insertJoins = false;14771478/* we made sure that all threads would converge at the same block */1479if (insertJoins) {1480BasicBlock *conv = convert(lastThen->successors[0]);1481setPosition(headBB->getExit(), false);1482headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);1483setPosition(conv, false);1484mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;1485}14861487return true;1488}14891490// TODO: add convergency1491bool1492Converter::visit(nir_loop *loop)1493{1494curLoopDepth += 1;1495func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);14961497BasicBlock *loopBB = convert(nir_loop_first_block(loop));1498BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));14991500bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);15011502mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);1503setPosition(loopBB, false);1504mkFlow(OP_PRECONT, loopBB, CC_ALWAYS, NULL);15051506foreach_list_typed(nir_cf_node, node, node, &loop->body) {1507if (!visit(node))1508return false;1509}15101511if (!bb->isTerminated()) {1512mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);1513bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);1514}15151516if (tailBB->cfg.incidentCount() == 0)1517loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);15181519curLoopDepth -= 1;15201521return true;1522}15231524bool1525Converter::visit(nir_instr *insn)1526{1527// we need an insertion point for on the fly generated immediate loads1528immInsertPos = bb->getExit();1529switch (insn->type) {1530case nir_instr_type_alu:1531return visit(nir_instr_as_alu(insn));1532case nir_instr_type_intrinsic:1533return visit(nir_instr_as_intrinsic(insn));1534case nir_instr_type_jump:1535return visit(nir_instr_as_jump(insn));1536case nir_instr_type_load_const:1537return visit(nir_instr_as_load_const(insn));1538case nir_instr_type_ssa_undef:1539return visit(nir_instr_as_ssa_undef(insn));1540case nir_instr_type_tex:1541return visit(nir_instr_as_tex(insn));1542default:1543ERROR("unknown nir_instr type %u\n", insn->type);1544return false;1545}1546return true;1547}15481549SVSemantic1550Converter::convert(nir_intrinsic_op intr)1551{1552switch (intr) {1553case nir_intrinsic_load_base_vertex:1554return SV_BASEVERTEX;1555case nir_intrinsic_load_base_instance:1556return SV_BASEINSTANCE;1557case nir_intrinsic_load_draw_id:1558return SV_DRAWID;1559case nir_intrinsic_load_front_face:1560return SV_FACE;1561case nir_intrinsic_is_helper_invocation:1562case nir_intrinsic_load_helper_invocation:1563return SV_THREAD_KILL;1564case nir_intrinsic_load_instance_id:1565return SV_INSTANCE_ID;1566case nir_intrinsic_load_invocation_id:1567return SV_INVOCATION_ID;1568case nir_intrinsic_load_workgroup_size:1569return SV_NTID;1570case nir_intrinsic_load_local_invocation_id:1571return SV_TID;1572case nir_intrinsic_load_num_workgroups:1573return SV_NCTAID;1574case nir_intrinsic_load_patch_vertices_in:1575return SV_VERTEX_COUNT;1576case nir_intrinsic_load_primitive_id:1577return SV_PRIMITIVE_ID;1578case nir_intrinsic_load_sample_id:1579return SV_SAMPLE_INDEX;1580case nir_intrinsic_load_sample_mask_in:1581return SV_SAMPLE_MASK;1582case nir_intrinsic_load_sample_pos:1583return SV_SAMPLE_POS;1584case nir_intrinsic_load_subgroup_eq_mask:1585return SV_LANEMASK_EQ;1586case nir_intrinsic_load_subgroup_ge_mask:1587return SV_LANEMASK_GE;1588case nir_intrinsic_load_subgroup_gt_mask:1589return SV_LANEMASK_GT;1590case nir_intrinsic_load_subgroup_le_mask:1591return SV_LANEMASK_LE;1592case nir_intrinsic_load_subgroup_lt_mask:1593return SV_LANEMASK_LT;1594case nir_intrinsic_load_subgroup_invocation:1595return SV_LANEID;1596case nir_intrinsic_load_tess_coord:1597return SV_TESS_COORD;1598case nir_intrinsic_load_tess_level_inner:1599return SV_TESS_INNER;1600case nir_intrinsic_load_tess_level_outer:1601return SV_TESS_OUTER;1602case nir_intrinsic_load_vertex_id:1603return SV_VERTEX_ID;1604case nir_intrinsic_load_workgroup_id:1605return SV_CTAID;1606case nir_intrinsic_load_work_dim:1607return SV_WORK_DIM;1608default:1609ERROR("unknown SVSemantic for nir_intrinsic_op %s\n",1610nir_intrinsic_infos[intr].name);1611assert(false);1612return SV_LAST;1613}1614}16151616bool1617Converter::visit(nir_intrinsic_instr *insn)1618{1619nir_intrinsic_op op = insn->intrinsic;1620const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];1621unsigned dest_components = nir_intrinsic_dest_components(insn);16221623switch (op) {1624case nir_intrinsic_load_uniform: {1625LValues &newDefs = convert(&insn->dest);1626const DataType dType = getDType(insn);1627Value *indirect;1628uint32_t coffset = getIndirect(insn, 0, 0, indirect);1629for (uint8_t i = 0; i < dest_components; ++i) {1630loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);1631}1632break;1633}1634case nir_intrinsic_store_output:1635case nir_intrinsic_store_per_vertex_output: {1636Value *indirect;1637DataType dType = getSType(insn->src[0], false, false);1638uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);16391640for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {1641if (!((1u << i) & nir_intrinsic_write_mask(insn)))1642continue;16431644uint8_t offset = 0;1645Value *src = getSrc(&insn->src[0], i);1646switch (prog->getType()) {1647case Program::TYPE_FRAGMENT: {1648if (info_out->out[idx].sn == TGSI_SEMANTIC_POSITION) {1649// TGSI uses a different interface than NIR, TGSI stores that1650// value in the z component, NIR in X1651offset += 2;1652src = mkOp1v(OP_SAT, TYPE_F32, getScratch(), src);1653}1654break;1655}1656case Program::TYPE_GEOMETRY:1657case Program::TYPE_TESSELLATION_EVAL:1658case Program::TYPE_VERTEX: {1659if (info_out->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {1660mkMov(clipVtx[i], src);1661src = clipVtx[i];1662}1663break;1664}1665default:1666break;1667}16681669storeTo(insn, FILE_SHADER_OUTPUT, OP_EXPORT, dType, src, idx, i + offset, indirect);1670}1671break;1672}1673case nir_intrinsic_load_input:1674case nir_intrinsic_load_interpolated_input:1675case nir_intrinsic_load_output: {1676LValues &newDefs = convert(&insn->dest);16771678// FBFetch1679if (prog->getType() == Program::TYPE_FRAGMENT &&1680op == nir_intrinsic_load_output) {1681std::vector<Value*> defs, srcs;1682uint8_t mask = 0;16831684srcs.push_back(getSSA());1685srcs.push_back(getSSA());1686Value *x = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 0));1687Value *y = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 1));1688mkCvt(OP_CVT, TYPE_U32, srcs[0], TYPE_F32, x)->rnd = ROUND_Z;1689mkCvt(OP_CVT, TYPE_U32, srcs[1], TYPE_F32, y)->rnd = ROUND_Z;16901691srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));1692srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));16931694for (uint8_t i = 0u; i < dest_components; ++i) {1695defs.push_back(newDefs[i]);1696mask |= 1 << i;1697}16981699TexInstruction *texi = mkTex(OP_TXF, TEX_TARGET_2D_MS_ARRAY, 0, 0, defs, srcs);1700texi->tex.levelZero = 1;1701texi->tex.mask = mask;1702texi->tex.useOffsets = 0;1703texi->tex.r = 0xffff;1704texi->tex.s = 0xffff;17051706info_out->prop.fp.readsFramebuffer = true;1707break;1708}17091710const DataType dType = getDType(insn);1711Value *indirect;1712bool input = op != nir_intrinsic_load_output;1713operation nvirOp;1714uint32_t mode = 0;17151716uint32_t idx = getIndirect(insn, op == nir_intrinsic_load_interpolated_input ? 1 : 0, 0, indirect);1717nv50_ir_varying& vary = input ? info_out->in[idx] : info_out->out[idx];17181719// see load_barycentric_* handling1720if (prog->getType() == Program::TYPE_FRAGMENT) {1721if (op == nir_intrinsic_load_interpolated_input) {1722ImmediateValue immMode;1723if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))1724mode = immMode.reg.data.u32;1725}1726if (mode == NV50_IR_INTERP_DEFAULT)1727mode |= translateInterpMode(&vary, nvirOp);1728else {1729if (vary.linear) {1730nvirOp = OP_LINTERP;1731mode |= NV50_IR_INTERP_LINEAR;1732} else {1733nvirOp = OP_PINTERP;1734mode |= NV50_IR_INTERP_PERSPECTIVE;1735}1736}1737}17381739for (uint8_t i = 0u; i < dest_components; ++i) {1740uint32_t address = getSlotAddress(insn, idx, i);1741Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);1742if (prog->getType() == Program::TYPE_FRAGMENT) {1743int s = 1;1744if (typeSizeof(dType) == 8) {1745Value *lo = getSSA();1746Value *hi = getSSA();1747Instruction *interp;17481749interp = mkOp1(nvirOp, TYPE_U32, lo, sym);1750if (nvirOp == OP_PINTERP)1751interp->setSrc(s++, fp.position);1752if (mode & NV50_IR_INTERP_OFFSET)1753interp->setSrc(s++, getSrc(&insn->src[0], 0));1754interp->setInterpolate(mode);1755interp->setIndirect(0, 0, indirect);17561757Symbol *sym1 = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address + 4);1758interp = mkOp1(nvirOp, TYPE_U32, hi, sym1);1759if (nvirOp == OP_PINTERP)1760interp->setSrc(s++, fp.position);1761if (mode & NV50_IR_INTERP_OFFSET)1762interp->setSrc(s++, getSrc(&insn->src[0], 0));1763interp->setInterpolate(mode);1764interp->setIndirect(0, 0, indirect);17651766mkOp2(OP_MERGE, dType, newDefs[i], lo, hi);1767} else {1768Instruction *interp = mkOp1(nvirOp, dType, newDefs[i], sym);1769if (nvirOp == OP_PINTERP)1770interp->setSrc(s++, fp.position);1771if (mode & NV50_IR_INTERP_OFFSET)1772interp->setSrc(s++, getSrc(&insn->src[0], 0));1773interp->setInterpolate(mode);1774interp->setIndirect(0, 0, indirect);1775}1776} else {1777mkLoad(dType, newDefs[i], sym, indirect)->perPatch = vary.patch;1778}1779}1780break;1781}1782case nir_intrinsic_load_barycentric_at_offset:1783case nir_intrinsic_load_barycentric_at_sample:1784case nir_intrinsic_load_barycentric_centroid:1785case nir_intrinsic_load_barycentric_pixel:1786case nir_intrinsic_load_barycentric_sample: {1787LValues &newDefs = convert(&insn->dest);1788uint32_t mode;17891790if (op == nir_intrinsic_load_barycentric_centroid ||1791op == nir_intrinsic_load_barycentric_sample) {1792mode = NV50_IR_INTERP_CENTROID;1793} else if (op == nir_intrinsic_load_barycentric_at_offset) {1794Value *offs[2];1795for (uint8_t c = 0; c < 2; c++) {1796offs[c] = getScratch();1797mkOp2(OP_MIN, TYPE_F32, offs[c], getSrc(&insn->src[0], c), loadImm(NULL, 0.4375f));1798mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));1799mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));1800mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);1801}1802mkOp3v(OP_INSBF, TYPE_U32, newDefs[0], offs[1], mkImm(0x1010), offs[0]);18031804mode = NV50_IR_INTERP_OFFSET;1805} else if (op == nir_intrinsic_load_barycentric_pixel) {1806mode = NV50_IR_INTERP_DEFAULT;1807} else if (op == nir_intrinsic_load_barycentric_at_sample) {1808info_out->prop.fp.readsSampleLocations = true;1809Value *sample = getSSA();1810mkOp3(OP_SELP, TYPE_U32, sample, mkImm(0), getSrc(&insn->src[0], 0), mkImm(0))1811->subOp = 2;1812mkOp1(OP_PIXLD, TYPE_U32, newDefs[0], sample)->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;1813mode = NV50_IR_INTERP_OFFSET;1814} else {1815unreachable("all intrinsics already handled above");1816}18171818loadImm(newDefs[1], mode);1819break;1820}1821case nir_intrinsic_demote:1822case nir_intrinsic_discard:1823mkOp(OP_DISCARD, TYPE_NONE, NULL);1824break;1825case nir_intrinsic_demote_if:1826case nir_intrinsic_discard_if: {1827Value *pred = getSSA(1, FILE_PREDICATE);1828if (insn->num_components > 1) {1829ERROR("nir_intrinsic_discard_if only with 1 component supported!\n");1830assert(false);1831return false;1832}1833mkCmp(OP_SET, CC_NE, TYPE_U8, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);1834mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, pred);1835break;1836}1837case nir_intrinsic_load_base_vertex:1838case nir_intrinsic_load_base_instance:1839case nir_intrinsic_load_draw_id:1840case nir_intrinsic_load_front_face:1841case nir_intrinsic_is_helper_invocation:1842case nir_intrinsic_load_helper_invocation:1843case nir_intrinsic_load_instance_id:1844case nir_intrinsic_load_invocation_id:1845case nir_intrinsic_load_workgroup_size:1846case nir_intrinsic_load_local_invocation_id:1847case nir_intrinsic_load_num_workgroups:1848case nir_intrinsic_load_patch_vertices_in:1849case nir_intrinsic_load_primitive_id:1850case nir_intrinsic_load_sample_id:1851case nir_intrinsic_load_sample_mask_in:1852case nir_intrinsic_load_sample_pos:1853case nir_intrinsic_load_subgroup_eq_mask:1854case nir_intrinsic_load_subgroup_ge_mask:1855case nir_intrinsic_load_subgroup_gt_mask:1856case nir_intrinsic_load_subgroup_le_mask:1857case nir_intrinsic_load_subgroup_lt_mask:1858case nir_intrinsic_load_subgroup_invocation:1859case nir_intrinsic_load_tess_coord:1860case nir_intrinsic_load_tess_level_inner:1861case nir_intrinsic_load_tess_level_outer:1862case nir_intrinsic_load_vertex_id:1863case nir_intrinsic_load_workgroup_id:1864case nir_intrinsic_load_work_dim: {1865const DataType dType = getDType(insn);1866SVSemantic sv = convert(op);1867LValues &newDefs = convert(&insn->dest);18681869for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {1870Value *def;1871if (typeSizeof(dType) == 8)1872def = getSSA();1873else1874def = newDefs[i];18751876if (sv == SV_TID && info->prop.cp.numThreads[i] == 1) {1877loadImm(def, 0u);1878} else {1879Symbol *sym = mkSysVal(sv, i);1880Instruction *rdsv = mkOp1(OP_RDSV, TYPE_U32, def, sym);1881if (sv == SV_TESS_OUTER || sv == SV_TESS_INNER)1882rdsv->perPatch = 1;1883}18841885if (typeSizeof(dType) == 8)1886mkOp2(OP_MERGE, dType, newDefs[i], def, loadImm(getSSA(), 0u));1887}1888break;1889}1890// constants1891case nir_intrinsic_load_subgroup_size: {1892LValues &newDefs = convert(&insn->dest);1893loadImm(newDefs[0], 32u);1894break;1895}1896case nir_intrinsic_vote_all:1897case nir_intrinsic_vote_any:1898case nir_intrinsic_vote_ieq: {1899LValues &newDefs = convert(&insn->dest);1900Value *pred = getScratch(1, FILE_PREDICATE);1901mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);1902mkOp1(OP_VOTE, TYPE_U32, pred, pred)->subOp = getSubOp(op);1903mkCvt(OP_CVT, TYPE_U32, newDefs[0], TYPE_U8, pred);1904break;1905}1906case nir_intrinsic_ballot: {1907LValues &newDefs = convert(&insn->dest);1908Value *pred = getSSA(1, FILE_PREDICATE);1909mkCmp(OP_SET, CC_NE, TYPE_U32, pred, TYPE_U32, getSrc(&insn->src[0], 0), zero);1910mkOp1(OP_VOTE, TYPE_U32, newDefs[0], pred)->subOp = NV50_IR_SUBOP_VOTE_ANY;1911break;1912}1913case nir_intrinsic_read_first_invocation:1914case nir_intrinsic_read_invocation: {1915LValues &newDefs = convert(&insn->dest);1916const DataType dType = getDType(insn);1917Value *tmp = getScratch();19181919if (op == nir_intrinsic_read_first_invocation) {1920mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;1921mkOp1(OP_BREV, TYPE_U32, tmp, tmp);1922mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;1923} else1924tmp = getSrc(&insn->src[1], 0);19251926for (uint8_t i = 0; i < dest_components; ++i) {1927mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))1928->subOp = NV50_IR_SUBOP_SHFL_IDX;1929}1930break;1931}1932case nir_intrinsic_load_per_vertex_input: {1933const DataType dType = getDType(insn);1934LValues &newDefs = convert(&insn->dest);1935Value *indirectVertex;1936Value *indirectOffset;1937uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);1938uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);19391940Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),1941mkImm(baseVertex), indirectVertex);1942for (uint8_t i = 0u; i < dest_components; ++i) {1943uint32_t address = getSlotAddress(insn, idx, i);1944loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,1945indirectOffset, vtxBase, info_out->in[idx].patch);1946}1947break;1948}1949case nir_intrinsic_load_per_vertex_output: {1950const DataType dType = getDType(insn);1951LValues &newDefs = convert(&insn->dest);1952Value *indirectVertex;1953Value *indirectOffset;1954uint32_t baseVertex = getIndirect(&insn->src[0], 0, indirectVertex);1955uint32_t idx = getIndirect(insn, 1, 0, indirectOffset);1956Value *vtxBase = NULL;19571958if (indirectVertex)1959vtxBase = indirectVertex;1960else1961vtxBase = loadImm(NULL, baseVertex);19621963vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);19641965for (uint8_t i = 0u; i < dest_components; ++i) {1966uint32_t address = getSlotAddress(insn, idx, i);1967loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,1968indirectOffset, vtxBase, info_out->in[idx].patch);1969}1970break;1971}1972case nir_intrinsic_emit_vertex: {1973if (info_out->io.genUserClip > 0)1974handleUserClipPlanes();1975uint32_t idx = nir_intrinsic_stream_id(insn);1976mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;1977break;1978}1979case nir_intrinsic_end_primitive: {1980uint32_t idx = nir_intrinsic_stream_id(insn);1981if (idx)1982break;1983mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;1984break;1985}1986case nir_intrinsic_load_ubo: {1987const DataType dType = getDType(insn);1988LValues &newDefs = convert(&insn->dest);1989Value *indirectIndex;1990Value *indirectOffset;1991uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;1992uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);19931994for (uint8_t i = 0u; i < dest_components; ++i) {1995loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,1996indirectOffset, indirectIndex);1997}1998break;1999}2000case nir_intrinsic_get_ssbo_size: {2001LValues &newDefs = convert(&insn->dest);2002const DataType dType = getDType(insn);2003Value *indirectBuffer;2004uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);20052006Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, 0);2007mkOp1(OP_BUFQ, dType, newDefs[0], sym)->setIndirect(0, 0, indirectBuffer);2008break;2009}2010case nir_intrinsic_store_ssbo: {2011DataType sType = getSType(insn->src[0], false, false);2012Value *indirectBuffer;2013Value *indirectOffset;2014uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);2015uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);20162017for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {2018if (!((1u << i) & nir_intrinsic_write_mask(insn)))2019continue;2020Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,2021offset + i * typeSizeof(sType));2022mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i))2023->setIndirect(0, 1, indirectBuffer);2024}2025info_out->io.globalAccess |= 0x2;2026break;2027}2028case nir_intrinsic_load_ssbo: {2029const DataType dType = getDType(insn);2030LValues &newDefs = convert(&insn->dest);2031Value *indirectBuffer;2032Value *indirectOffset;2033uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);2034uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);20352036for (uint8_t i = 0u; i < dest_components; ++i)2037loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,2038indirectOffset, indirectBuffer);20392040info_out->io.globalAccess |= 0x1;2041break;2042}2043case nir_intrinsic_shared_atomic_add:2044case nir_intrinsic_shared_atomic_and:2045case nir_intrinsic_shared_atomic_comp_swap:2046case nir_intrinsic_shared_atomic_exchange:2047case nir_intrinsic_shared_atomic_or:2048case nir_intrinsic_shared_atomic_imax:2049case nir_intrinsic_shared_atomic_imin:2050case nir_intrinsic_shared_atomic_umax:2051case nir_intrinsic_shared_atomic_umin:2052case nir_intrinsic_shared_atomic_xor: {2053const DataType dType = getDType(insn);2054LValues &newDefs = convert(&insn->dest);2055Value *indirectOffset;2056uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);2057Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset);2058Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));2059if (op == nir_intrinsic_shared_atomic_comp_swap)2060atom->setSrc(2, getSrc(&insn->src[2], 0));2061atom->setIndirect(0, 0, indirectOffset);2062atom->subOp = getSubOp(op);2063break;2064}2065case nir_intrinsic_ssbo_atomic_add:2066case nir_intrinsic_ssbo_atomic_and:2067case nir_intrinsic_ssbo_atomic_comp_swap:2068case nir_intrinsic_ssbo_atomic_exchange:2069case nir_intrinsic_ssbo_atomic_or:2070case nir_intrinsic_ssbo_atomic_imax:2071case nir_intrinsic_ssbo_atomic_imin:2072case nir_intrinsic_ssbo_atomic_umax:2073case nir_intrinsic_ssbo_atomic_umin:2074case nir_intrinsic_ssbo_atomic_xor: {2075const DataType dType = getDType(insn);2076LValues &newDefs = convert(&insn->dest);2077Value *indirectBuffer;2078Value *indirectOffset;2079uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);2080uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);20812082Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, dType, offset);2083Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym,2084getSrc(&insn->src[2], 0));2085if (op == nir_intrinsic_ssbo_atomic_comp_swap)2086atom->setSrc(2, getSrc(&insn->src[3], 0));2087atom->setIndirect(0, 0, indirectOffset);2088atom->setIndirect(0, 1, indirectBuffer);2089atom->subOp = getSubOp(op);20902091info_out->io.globalAccess |= 0x2;2092break;2093}2094case nir_intrinsic_global_atomic_add:2095case nir_intrinsic_global_atomic_and:2096case nir_intrinsic_global_atomic_comp_swap:2097case nir_intrinsic_global_atomic_exchange:2098case nir_intrinsic_global_atomic_or:2099case nir_intrinsic_global_atomic_imax:2100case nir_intrinsic_global_atomic_imin:2101case nir_intrinsic_global_atomic_umax:2102case nir_intrinsic_global_atomic_umin:2103case nir_intrinsic_global_atomic_xor: {2104const DataType dType = getDType(insn);2105LValues &newDefs = convert(&insn->dest);2106Value *address;2107uint32_t offset = getIndirect(&insn->src[0], 0, address);21082109Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, dType, offset);2110Instruction *atom =2111mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0));2112if (op == nir_intrinsic_global_atomic_comp_swap)2113atom->setSrc(2, getSrc(&insn->src[2], 0));2114atom->setIndirect(0, 0, address);2115atom->subOp = getSubOp(op);21162117info_out->io.globalAccess |= 0x2;2118break;2119}2120case nir_intrinsic_bindless_image_atomic_add:2121case nir_intrinsic_bindless_image_atomic_and:2122case nir_intrinsic_bindless_image_atomic_comp_swap:2123case nir_intrinsic_bindless_image_atomic_exchange:2124case nir_intrinsic_bindless_image_atomic_imax:2125case nir_intrinsic_bindless_image_atomic_umax:2126case nir_intrinsic_bindless_image_atomic_imin:2127case nir_intrinsic_bindless_image_atomic_umin:2128case nir_intrinsic_bindless_image_atomic_or:2129case nir_intrinsic_bindless_image_atomic_xor:2130case nir_intrinsic_bindless_image_atomic_inc_wrap:2131case nir_intrinsic_bindless_image_atomic_dec_wrap:2132case nir_intrinsic_bindless_image_load:2133case nir_intrinsic_bindless_image_samples:2134case nir_intrinsic_bindless_image_size:2135case nir_intrinsic_bindless_image_store:2136case nir_intrinsic_image_atomic_add:2137case nir_intrinsic_image_atomic_and:2138case nir_intrinsic_image_atomic_comp_swap:2139case nir_intrinsic_image_atomic_exchange:2140case nir_intrinsic_image_atomic_imax:2141case nir_intrinsic_image_atomic_umax:2142case nir_intrinsic_image_atomic_imin:2143case nir_intrinsic_image_atomic_umin:2144case nir_intrinsic_image_atomic_or:2145case nir_intrinsic_image_atomic_xor:2146case nir_intrinsic_image_atomic_inc_wrap:2147case nir_intrinsic_image_atomic_dec_wrap:2148case nir_intrinsic_image_load:2149case nir_intrinsic_image_samples:2150case nir_intrinsic_image_size:2151case nir_intrinsic_image_store: {2152std::vector<Value*> srcs, defs;2153Value *indirect;2154DataType ty;21552156uint32_t mask = 0;2157TexInstruction::Target target =2158convert(nir_intrinsic_image_dim(insn), !!nir_intrinsic_image_array(insn), false);2159unsigned int argCount = getNIRArgCount(target);2160uint16_t location = 0;21612162if (opInfo.has_dest) {2163LValues &newDefs = convert(&insn->dest);2164for (uint8_t i = 0u; i < newDefs.size(); ++i) {2165defs.push_back(newDefs[i]);2166mask |= 1 << i;2167}2168}21692170int lod_src = -1;2171bool bindless = false;2172switch (op) {2173case nir_intrinsic_bindless_image_atomic_add:2174case nir_intrinsic_bindless_image_atomic_and:2175case nir_intrinsic_bindless_image_atomic_comp_swap:2176case nir_intrinsic_bindless_image_atomic_exchange:2177case nir_intrinsic_bindless_image_atomic_imax:2178case nir_intrinsic_bindless_image_atomic_umax:2179case nir_intrinsic_bindless_image_atomic_imin:2180case nir_intrinsic_bindless_image_atomic_umin:2181case nir_intrinsic_bindless_image_atomic_or:2182case nir_intrinsic_bindless_image_atomic_xor:2183case nir_intrinsic_bindless_image_atomic_inc_wrap:2184case nir_intrinsic_bindless_image_atomic_dec_wrap:2185ty = getDType(insn);2186bindless = true;2187info_out->io.globalAccess |= 0x2;2188mask = 0x1;2189break;2190case nir_intrinsic_image_atomic_add:2191case nir_intrinsic_image_atomic_and:2192case nir_intrinsic_image_atomic_comp_swap:2193case nir_intrinsic_image_atomic_exchange:2194case nir_intrinsic_image_atomic_imax:2195case nir_intrinsic_image_atomic_umax:2196case nir_intrinsic_image_atomic_imin:2197case nir_intrinsic_image_atomic_umin:2198case nir_intrinsic_image_atomic_or:2199case nir_intrinsic_image_atomic_xor:2200case nir_intrinsic_image_atomic_inc_wrap:2201case nir_intrinsic_image_atomic_dec_wrap:2202ty = getDType(insn);2203bindless = false;2204info_out->io.globalAccess |= 0x2;2205mask = 0x1;2206break;2207case nir_intrinsic_bindless_image_load:2208case nir_intrinsic_image_load:2209ty = TYPE_U32;2210bindless = op == nir_intrinsic_bindless_image_load;2211info_out->io.globalAccess |= 0x1;2212lod_src = 4;2213break;2214case nir_intrinsic_bindless_image_store:2215case nir_intrinsic_image_store:2216ty = TYPE_U32;2217bindless = op == nir_intrinsic_bindless_image_store;2218info_out->io.globalAccess |= 0x2;2219lod_src = 5;2220mask = 0xf;2221break;2222case nir_intrinsic_bindless_image_samples:2223mask = 0x8;2224FALLTHROUGH;2225case nir_intrinsic_image_samples:2226ty = TYPE_U32;2227bindless = op == nir_intrinsic_bindless_image_samples;2228mask = 0x8;2229break;2230case nir_intrinsic_bindless_image_size:2231case nir_intrinsic_image_size:2232assert(nir_src_as_uint(insn->src[1]) == 0);2233ty = TYPE_U32;2234bindless = op == nir_intrinsic_bindless_image_size;2235break;2236default:2237unreachable("unhandled image opcode");2238break;2239}22402241if (bindless)2242indirect = getSrc(&insn->src[0], 0);2243else2244location = getIndirect(&insn->src[0], 0, indirect);22452246// coords2247if (opInfo.num_srcs >= 2)2248for (unsigned int i = 0u; i < argCount; ++i)2249srcs.push_back(getSrc(&insn->src[1], i));22502251// the sampler is just another src added after coords2252if (opInfo.num_srcs >= 3 && target.isMS())2253srcs.push_back(getSrc(&insn->src[2], 0));22542255if (opInfo.num_srcs >= 4 && lod_src != 4) {2256unsigned components = opInfo.src_components[3] ? opInfo.src_components[3] : insn->num_components;2257for (uint8_t i = 0u; i < components; ++i)2258srcs.push_back(getSrc(&insn->src[3], i));2259}22602261if (opInfo.num_srcs >= 5 && lod_src != 5)2262// 1 for aotmic swap2263for (uint8_t i = 0u; i < opInfo.src_components[4]; ++i)2264srcs.push_back(getSrc(&insn->src[4], i));22652266TexInstruction *texi = mkTex(getOperation(op), target.getEnum(), location, 0, defs, srcs);2267texi->tex.bindless = bindless;2268texi->tex.format = nv50_ir::TexInstruction::translateImgFormat(nir_intrinsic_format(insn));2269texi->tex.mask = mask;2270texi->cache = convert(nir_intrinsic_access(insn));2271texi->setType(ty);2272texi->subOp = getSubOp(op);22732274if (indirect)2275texi->setIndirectR(indirect);22762277break;2278}2279case nir_intrinsic_store_scratch:2280case nir_intrinsic_store_shared: {2281DataType sType = getSType(insn->src[0], false, false);2282Value *indirectOffset;2283uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);22842285for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {2286if (!((1u << i) & nir_intrinsic_write_mask(insn)))2287continue;2288Symbol *sym = mkSymbol(getFile(op), 0, sType, offset + i * typeSizeof(sType));2289mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i));2290}2291break;2292}2293case nir_intrinsic_load_kernel_input:2294case nir_intrinsic_load_scratch:2295case nir_intrinsic_load_shared: {2296const DataType dType = getDType(insn);2297LValues &newDefs = convert(&insn->dest);2298Value *indirectOffset;2299uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);23002301for (uint8_t i = 0u; i < dest_components; ++i)2302loadFrom(getFile(op), 0, dType, newDefs[i], offset, i, indirectOffset);23032304break;2305}2306case nir_intrinsic_control_barrier: {2307// TODO: add flag to shader_info2308info_out->numBarriers = 1;2309Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));2310bar->fixed = 1;2311bar->subOp = NV50_IR_SUBOP_BAR_SYNC;2312break;2313}2314case nir_intrinsic_group_memory_barrier:2315case nir_intrinsic_memory_barrier:2316case nir_intrinsic_memory_barrier_buffer:2317case nir_intrinsic_memory_barrier_image:2318case nir_intrinsic_memory_barrier_shared: {2319Instruction *bar = mkOp(OP_MEMBAR, TYPE_NONE, NULL);2320bar->fixed = 1;2321bar->subOp = getSubOp(op);2322break;2323}2324case nir_intrinsic_memory_barrier_tcs_patch:2325break;2326case nir_intrinsic_shader_clock: {2327const DataType dType = getDType(insn);2328LValues &newDefs = convert(&insn->dest);23292330loadImm(newDefs[0], 0u);2331mkOp1(OP_RDSV, dType, newDefs[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;2332break;2333}2334case nir_intrinsic_load_global:2335case nir_intrinsic_load_global_constant: {2336const DataType dType = getDType(insn);2337LValues &newDefs = convert(&insn->dest);2338Value *indirectOffset;2339uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);23402341for (auto i = 0u; i < dest_components; ++i)2342loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);23432344info_out->io.globalAccess |= 0x1;2345break;2346}2347case nir_intrinsic_store_global: {2348DataType sType = getSType(insn->src[0], false, false);23492350for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {2351if (!((1u << i) & nir_intrinsic_write_mask(insn)))2352continue;2353if (typeSizeof(sType) == 8) {2354Value *split[2];2355mkSplit(split, 4, getSrc(&insn->src[0], i));23562357Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType));2358mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[0]);23592360sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, i * typeSizeof(sType) + 4);2361mkStore(OP_STORE, TYPE_U32, sym, getSrc(&insn->src[1], 0), split[1]);2362} else {2363Symbol *sym = mkSymbol(FILE_MEMORY_GLOBAL, 0, sType, i * typeSizeof(sType));2364mkStore(OP_STORE, sType, sym, getSrc(&insn->src[1], 0), getSrc(&insn->src[0], i));2365}2366}23672368info_out->io.globalAccess |= 0x2;2369break;2370}2371default:2372ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name);2373return false;2374}23752376return true;2377}23782379bool2380Converter::visit(nir_jump_instr *insn)2381{2382switch (insn->type) {2383case nir_jump_return:2384// TODO: this only works in the main function2385mkFlow(OP_BRA, exit, CC_ALWAYS, NULL);2386bb->cfg.attach(&exit->cfg, Graph::Edge::CROSS);2387break;2388case nir_jump_break:2389case nir_jump_continue: {2390bool isBreak = insn->type == nir_jump_break;2391nir_block *block = insn->instr.block;2392BasicBlock *target = convert(block->successors[0]);2393mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);2394bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);2395break;2396}2397default:2398ERROR("unknown nir_jump_type %u\n", insn->type);2399return false;2400}24012402return true;2403}24042405Value*2406Converter::convert(nir_load_const_instr *insn, uint8_t idx)2407{2408Value *val;24092410if (immInsertPos)2411setPosition(immInsertPos, true);2412else2413setPosition(bb, false);24142415switch (insn->def.bit_size) {2416case 64:2417val = loadImm(getSSA(8), insn->value[idx].u64);2418break;2419case 32:2420val = loadImm(getSSA(4), insn->value[idx].u32);2421break;2422case 16:2423val = loadImm(getSSA(2), insn->value[idx].u16);2424break;2425case 8:2426val = loadImm(getSSA(1), insn->value[idx].u8);2427break;2428default:2429unreachable("unhandled bit size!\n");2430}2431setPosition(bb, true);2432return val;2433}24342435bool2436Converter::visit(nir_load_const_instr *insn)2437{2438assert(insn->def.bit_size <= 64);2439immediates[insn->def.index] = insn;2440return true;2441}24422443#define DEFAULT_CHECKS \2444if (insn->dest.dest.ssa.num_components > 1) { \2445ERROR("nir_alu_instr only supported with 1 component!\n"); \2446return false; \2447} \2448if (insn->dest.write_mask != 1) { \2449ERROR("nir_alu_instr only with write_mask of 1 supported!\n"); \2450return false; \2451}2452bool2453Converter::visit(nir_alu_instr *insn)2454{2455const nir_op op = insn->op;2456const nir_op_info &info = nir_op_infos[op];2457DataType dType = getDType(insn);2458const std::vector<DataType> sTypes = getSTypes(insn);24592460Instruction *oldPos = this->bb->getExit();24612462switch (op) {2463case nir_op_fabs:2464case nir_op_iabs:2465case nir_op_fadd:2466case nir_op_iadd:2467case nir_op_iand:2468case nir_op_fceil:2469case nir_op_fcos:2470case nir_op_fddx:2471case nir_op_fddx_coarse:2472case nir_op_fddx_fine:2473case nir_op_fddy:2474case nir_op_fddy_coarse:2475case nir_op_fddy_fine:2476case nir_op_fdiv:2477case nir_op_idiv:2478case nir_op_udiv:2479case nir_op_fexp2:2480case nir_op_ffloor:2481case nir_op_ffma:2482case nir_op_flog2:2483case nir_op_fmax:2484case nir_op_imax:2485case nir_op_umax:2486case nir_op_fmin:2487case nir_op_imin:2488case nir_op_umin:2489case nir_op_fmod:2490case nir_op_imod:2491case nir_op_umod:2492case nir_op_fmul:2493case nir_op_imul:2494case nir_op_imul_high:2495case nir_op_umul_high:2496case nir_op_fneg:2497case nir_op_ineg:2498case nir_op_inot:2499case nir_op_ior:2500case nir_op_pack_64_2x32_split:2501case nir_op_fpow:2502case nir_op_frcp:2503case nir_op_frem:2504case nir_op_irem:2505case nir_op_frsq:2506case nir_op_fsat:2507case nir_op_ishr:2508case nir_op_ushr:2509case nir_op_fsin:2510case nir_op_fsqrt:2511case nir_op_ftrunc:2512case nir_op_ishl:2513case nir_op_ixor: {2514DEFAULT_CHECKS;2515LValues &newDefs = convert(&insn->dest);2516operation preOp = preOperationNeeded(op);2517if (preOp != OP_NOP) {2518assert(info.num_inputs < 2);2519Value *tmp = getSSA(typeSizeof(dType));2520Instruction *i0 = mkOp(preOp, dType, tmp);2521Instruction *i1 = mkOp(getOperation(op), dType, newDefs[0]);2522if (info.num_inputs) {2523i0->setSrc(0, getSrc(&insn->src[0]));2524i1->setSrc(0, tmp);2525}2526i1->subOp = getSubOp(op);2527} else {2528Instruction *i = mkOp(getOperation(op), dType, newDefs[0]);2529for (unsigned s = 0u; s < info.num_inputs; ++s) {2530i->setSrc(s, getSrc(&insn->src[s]));2531}2532i->subOp = getSubOp(op);2533}2534break;2535}2536case nir_op_ifind_msb:2537case nir_op_ufind_msb: {2538DEFAULT_CHECKS;2539LValues &newDefs = convert(&insn->dest);2540dType = sTypes[0];2541mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));2542break;2543}2544case nir_op_fround_even: {2545DEFAULT_CHECKS;2546LValues &newDefs = convert(&insn->dest);2547mkCvt(OP_CVT, dType, newDefs[0], dType, getSrc(&insn->src[0]))->rnd = ROUND_NI;2548break;2549}2550// convert instructions2551case nir_op_f2f32:2552case nir_op_f2i32:2553case nir_op_f2u32:2554case nir_op_i2f32:2555case nir_op_i2i32:2556case nir_op_u2f32:2557case nir_op_u2u32:2558case nir_op_f2f64:2559case nir_op_f2i64:2560case nir_op_f2u64:2561case nir_op_i2f64:2562case nir_op_i2i64:2563case nir_op_u2f64:2564case nir_op_u2u64: {2565DEFAULT_CHECKS;2566LValues &newDefs = convert(&insn->dest);2567Instruction *i = mkOp1(getOperation(op), dType, newDefs[0], getSrc(&insn->src[0]));2568if (op == nir_op_f2i32 || op == nir_op_f2i64 || op == nir_op_f2u32 || op == nir_op_f2u64)2569i->rnd = ROUND_Z;2570i->sType = sTypes[0];2571break;2572}2573// compare instructions2574case nir_op_feq32:2575case nir_op_ieq32:2576case nir_op_fge32:2577case nir_op_ige32:2578case nir_op_uge32:2579case nir_op_flt32:2580case nir_op_ilt32:2581case nir_op_ult32:2582case nir_op_fneu32:2583case nir_op_ine32: {2584DEFAULT_CHECKS;2585LValues &newDefs = convert(&insn->dest);2586Instruction *i = mkCmp(getOperation(op),2587getCondCode(op),2588dType,2589newDefs[0],2590dType,2591getSrc(&insn->src[0]),2592getSrc(&insn->src[1]));2593if (info.num_inputs == 3)2594i->setSrc(2, getSrc(&insn->src[2]));2595i->sType = sTypes[0];2596break;2597}2598case nir_op_mov:2599case nir_op_vec2:2600case nir_op_vec3:2601case nir_op_vec4:2602case nir_op_vec8:2603case nir_op_vec16: {2604LValues &newDefs = convert(&insn->dest);2605for (LValues::size_type c = 0u; c < newDefs.size(); ++c) {2606mkMov(newDefs[c], getSrc(&insn->src[c]), dType);2607}2608break;2609}2610// (un)pack2611case nir_op_pack_64_2x32: {2612LValues &newDefs = convert(&insn->dest);2613Instruction *merge = mkOp(OP_MERGE, dType, newDefs[0]);2614merge->setSrc(0, getSrc(&insn->src[0], 0));2615merge->setSrc(1, getSrc(&insn->src[0], 1));2616break;2617}2618case nir_op_pack_half_2x16_split: {2619LValues &newDefs = convert(&insn->dest);2620Value *tmpH = getSSA();2621Value *tmpL = getSSA();26222623mkCvt(OP_CVT, TYPE_F16, tmpL, TYPE_F32, getSrc(&insn->src[0]));2624mkCvt(OP_CVT, TYPE_F16, tmpH, TYPE_F32, getSrc(&insn->src[1]));2625mkOp3(OP_INSBF, TYPE_U32, newDefs[0], tmpH, mkImm(0x1010), tmpL);2626break;2627}2628case nir_op_unpack_half_2x16_split_x:2629case nir_op_unpack_half_2x16_split_y: {2630LValues &newDefs = convert(&insn->dest);2631Instruction *cvt = mkCvt(OP_CVT, TYPE_F32, newDefs[0], TYPE_F16, getSrc(&insn->src[0]));2632if (op == nir_op_unpack_half_2x16_split_y)2633cvt->subOp = 1;2634break;2635}2636case nir_op_unpack_64_2x32: {2637LValues &newDefs = convert(&insn->dest);2638mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, newDefs[1]);2639break;2640}2641case nir_op_unpack_64_2x32_split_x: {2642LValues &newDefs = convert(&insn->dest);2643mkOp1(OP_SPLIT, dType, newDefs[0], getSrc(&insn->src[0]))->setDef(1, getSSA());2644break;2645}2646case nir_op_unpack_64_2x32_split_y: {2647LValues &newDefs = convert(&insn->dest);2648mkOp1(OP_SPLIT, dType, getSSA(), getSrc(&insn->src[0]))->setDef(1, newDefs[0]);2649break;2650}2651// special instructions2652case nir_op_fsign:2653case nir_op_isign: {2654DEFAULT_CHECKS;2655DataType iType;2656if (::isFloatType(dType))2657iType = TYPE_F32;2658else2659iType = TYPE_S32;26602661LValues &newDefs = convert(&insn->dest);2662LValue *val0 = getScratch();2663LValue *val1 = getScratch();2664mkCmp(OP_SET, CC_GT, iType, val0, dType, getSrc(&insn->src[0]), zero);2665mkCmp(OP_SET, CC_LT, iType, val1, dType, getSrc(&insn->src[0]), zero);26662667if (dType == TYPE_F64) {2668mkOp2(OP_SUB, iType, val0, val0, val1);2669mkCvt(OP_CVT, TYPE_F64, newDefs[0], iType, val0);2670} else if (dType == TYPE_S64 || dType == TYPE_U64) {2671mkOp2(OP_SUB, iType, val0, val1, val0);2672mkOp2(OP_SHR, iType, val1, val0, loadImm(NULL, 31));2673mkOp2(OP_MERGE, dType, newDefs[0], val0, val1);2674} else if (::isFloatType(dType))2675mkOp2(OP_SUB, iType, newDefs[0], val0, val1);2676else2677mkOp2(OP_SUB, iType, newDefs[0], val1, val0);2678break;2679}2680case nir_op_fcsel:2681case nir_op_b32csel: {2682DEFAULT_CHECKS;2683LValues &newDefs = convert(&insn->dest);2684mkCmp(OP_SLCT, CC_NE, dType, newDefs[0], sTypes[0], getSrc(&insn->src[1]), getSrc(&insn->src[2]), getSrc(&insn->src[0]));2685break;2686}2687case nir_op_ibitfield_extract:2688case nir_op_ubitfield_extract: {2689DEFAULT_CHECKS;2690Value *tmp = getSSA();2691LValues &newDefs = convert(&insn->dest);2692mkOp3(OP_INSBF, dType, tmp, getSrc(&insn->src[2]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));2693mkOp2(OP_EXTBF, dType, newDefs[0], getSrc(&insn->src[0]), tmp);2694break;2695}2696case nir_op_bfm: {2697DEFAULT_CHECKS;2698LValues &newDefs = convert(&insn->dest);2699mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;2700break;2701}2702case nir_op_bitfield_insert: {2703DEFAULT_CHECKS;2704LValues &newDefs = convert(&insn->dest);2705LValue *temp = getSSA();2706mkOp3(OP_INSBF, TYPE_U32, temp, getSrc(&insn->src[3]), mkImm(0x808), getSrc(&insn->src[2]));2707mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[1]), temp, getSrc(&insn->src[0]));2708break;2709}2710case nir_op_bit_count: {2711DEFAULT_CHECKS;2712LValues &newDefs = convert(&insn->dest);2713mkOp2(OP_POPCNT, dType, newDefs[0], getSrc(&insn->src[0]), getSrc(&insn->src[0]));2714break;2715}2716case nir_op_bitfield_reverse: {2717DEFAULT_CHECKS;2718LValues &newDefs = convert(&insn->dest);2719mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));2720break;2721}2722case nir_op_find_lsb: {2723DEFAULT_CHECKS;2724LValues &newDefs = convert(&insn->dest);2725Value *tmp = getSSA();2726mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));2727mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;2728break;2729}2730case nir_op_extract_u8: {2731DEFAULT_CHECKS;2732LValues &newDefs = convert(&insn->dest);2733Value *prmt = getSSA();2734mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));2735mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));2736break;2737}2738case nir_op_extract_i8: {2739DEFAULT_CHECKS;2740LValues &newDefs = convert(&insn->dest);2741Value *prmt = getSSA();2742mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));2743mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));2744break;2745}2746case nir_op_extract_u16: {2747DEFAULT_CHECKS;2748LValues &newDefs = convert(&insn->dest);2749Value *prmt = getSSA();2750mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));2751mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));2752break;2753}2754case nir_op_extract_i16: {2755DEFAULT_CHECKS;2756LValues &newDefs = convert(&insn->dest);2757Value *prmt = getSSA();2758mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));2759mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));2760break;2761}2762case nir_op_urol: {2763DEFAULT_CHECKS;2764LValues &newDefs = convert(&insn->dest);2765mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),2766getSrc(&insn->src[1]), getSrc(&insn->src[0]))2767->subOp = NV50_IR_SUBOP_SHF_L |2768NV50_IR_SUBOP_SHF_W |2769NV50_IR_SUBOP_SHF_HI;2770break;2771}2772case nir_op_uror: {2773DEFAULT_CHECKS;2774LValues &newDefs = convert(&insn->dest);2775mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),2776getSrc(&insn->src[1]), getSrc(&insn->src[0]))2777->subOp = NV50_IR_SUBOP_SHF_R |2778NV50_IR_SUBOP_SHF_W |2779NV50_IR_SUBOP_SHF_LO;2780break;2781}2782// boolean conversions2783case nir_op_b2f32: {2784DEFAULT_CHECKS;2785LValues &newDefs = convert(&insn->dest);2786mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1.0f));2787break;2788}2789case nir_op_b2f64: {2790DEFAULT_CHECKS;2791LValues &newDefs = convert(&insn->dest);2792Value *tmp = getSSA(4);2793mkOp2(OP_AND, TYPE_U32, tmp, getSrc(&insn->src[0]), loadImm(NULL, 0x3ff00000));2794mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);2795break;2796}2797case nir_op_f2b32:2798case nir_op_i2b32: {2799DEFAULT_CHECKS;2800LValues &newDefs = convert(&insn->dest);2801Value *src1;2802if (typeSizeof(sTypes[0]) == 8) {2803src1 = loadImm(getSSA(8), 0.0);2804} else {2805src1 = zero;2806}2807CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;2808mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);2809break;2810}2811case nir_op_b2i32: {2812DEFAULT_CHECKS;2813LValues &newDefs = convert(&insn->dest);2814mkOp2(OP_AND, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 1));2815break;2816}2817case nir_op_b2i64: {2818DEFAULT_CHECKS;2819LValues &newDefs = convert(&insn->dest);2820LValue *def = getScratch();2821mkOp2(OP_AND, TYPE_U32, def, getSrc(&insn->src[0]), loadImm(NULL, 1));2822mkOp2(OP_MERGE, TYPE_S64, newDefs[0], def, loadImm(NULL, 0));2823break;2824}2825default:2826ERROR("unknown nir_op %s\n", info.name);2827assert(false);2828return false;2829}28302831if (!oldPos) {2832oldPos = this->bb->getEntry();2833oldPos->precise = insn->exact;2834}28352836if (unlikely(!oldPos))2837return true;28382839while (oldPos->next) {2840oldPos = oldPos->next;2841oldPos->precise = insn->exact;2842}2843oldPos->saturate = insn->dest.saturate;28442845return true;2846}2847#undef DEFAULT_CHECKS28482849bool2850Converter::visit(nir_ssa_undef_instr *insn)2851{2852LValues &newDefs = convert(&insn->def);2853for (uint8_t i = 0u; i < insn->def.num_components; ++i) {2854mkOp(OP_NOP, TYPE_NONE, newDefs[i]);2855}2856return true;2857}28582859#define CASE_SAMPLER(ty) \2860case GLSL_SAMPLER_DIM_ ## ty : \2861if (isArray && !isShadow) \2862return TEX_TARGET_ ## ty ## _ARRAY; \2863else if (!isArray && isShadow) \2864return TEX_TARGET_## ty ## _SHADOW; \2865else if (isArray && isShadow) \2866return TEX_TARGET_## ty ## _ARRAY_SHADOW; \2867else \2868return TEX_TARGET_ ## ty28692870TexTarget2871Converter::convert(glsl_sampler_dim dim, bool isArray, bool isShadow)2872{2873switch (dim) {2874CASE_SAMPLER(1D);2875CASE_SAMPLER(2D);2876CASE_SAMPLER(CUBE);2877case GLSL_SAMPLER_DIM_3D:2878return TEX_TARGET_3D;2879case GLSL_SAMPLER_DIM_MS:2880if (isArray)2881return TEX_TARGET_2D_MS_ARRAY;2882return TEX_TARGET_2D_MS;2883case GLSL_SAMPLER_DIM_RECT:2884if (isShadow)2885return TEX_TARGET_RECT_SHADOW;2886return TEX_TARGET_RECT;2887case GLSL_SAMPLER_DIM_BUF:2888return TEX_TARGET_BUFFER;2889case GLSL_SAMPLER_DIM_EXTERNAL:2890return TEX_TARGET_2D;2891default:2892ERROR("unknown glsl_sampler_dim %u\n", dim);2893assert(false);2894return TEX_TARGET_COUNT;2895}2896}2897#undef CASE_SAMPLER28982899Value*2900Converter::applyProjection(Value *src, Value *proj)2901{2902if (!proj)2903return src;2904return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj);2905}29062907unsigned int2908Converter::getNIRArgCount(TexInstruction::Target& target)2909{2910unsigned int result = target.getArgCount();2911if (target.isCube() && target.isArray())2912result--;2913if (target.isMS())2914result--;2915return result;2916}29172918CacheMode2919Converter::convert(enum gl_access_qualifier access)2920{2921if (access & ACCESS_VOLATILE)2922return CACHE_CV;2923if (access & ACCESS_COHERENT)2924return CACHE_CG;2925return CACHE_CA;2926}29272928bool2929Converter::visit(nir_tex_instr *insn)2930{2931switch (insn->op) {2932case nir_texop_lod:2933case nir_texop_query_levels:2934case nir_texop_tex:2935case nir_texop_texture_samples:2936case nir_texop_tg4:2937case nir_texop_txb:2938case nir_texop_txd:2939case nir_texop_txf:2940case nir_texop_txf_ms:2941case nir_texop_txl:2942case nir_texop_txs: {2943LValues &newDefs = convert(&insn->dest);2944std::vector<Value*> srcs;2945std::vector<Value*> defs;2946std::vector<nir_src*> offsets;2947uint8_t mask = 0;2948bool lz = false;2949Value *proj = NULL;2950TexInstruction::Target target = convert(insn->sampler_dim, insn->is_array, insn->is_shadow);2951operation op = getOperation(insn->op);29522953int r, s;2954int biasIdx = nir_tex_instr_src_index(insn, nir_tex_src_bias);2955int compIdx = nir_tex_instr_src_index(insn, nir_tex_src_comparator);2956int coordsIdx = nir_tex_instr_src_index(insn, nir_tex_src_coord);2957int ddxIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddx);2958int ddyIdx = nir_tex_instr_src_index(insn, nir_tex_src_ddy);2959int msIdx = nir_tex_instr_src_index(insn, nir_tex_src_ms_index);2960int lodIdx = nir_tex_instr_src_index(insn, nir_tex_src_lod);2961int offsetIdx = nir_tex_instr_src_index(insn, nir_tex_src_offset);2962int projIdx = nir_tex_instr_src_index(insn, nir_tex_src_projector);2963int sampOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_offset);2964int texOffIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_offset);2965int sampHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_sampler_handle);2966int texHandleIdx = nir_tex_instr_src_index(insn, nir_tex_src_texture_handle);29672968bool bindless = sampHandleIdx != -1 || texHandleIdx != -1;2969assert((sampHandleIdx != -1) == (texHandleIdx != -1));29702971if (projIdx != -1)2972proj = mkOp1v(OP_RCP, TYPE_F32, getScratch(), getSrc(&insn->src[projIdx].src, 0));29732974srcs.resize(insn->coord_components);2975for (uint8_t i = 0u; i < insn->coord_components; ++i)2976srcs[i] = applyProjection(getSrc(&insn->src[coordsIdx].src, i), proj);29772978// sometimes we get less args than target.getArgCount, but codegen expects the latter2979if (insn->coord_components) {2980uint32_t argCount = target.getArgCount();29812982if (target.isMS())2983argCount -= 1;29842985for (uint32_t i = 0u; i < (argCount - insn->coord_components); ++i)2986srcs.push_back(getSSA());2987}29882989if (insn->op == nir_texop_texture_samples)2990srcs.push_back(zero);2991else if (!insn->num_srcs)2992srcs.push_back(loadImm(NULL, 0));2993if (biasIdx != -1)2994srcs.push_back(getSrc(&insn->src[biasIdx].src, 0));2995if (lodIdx != -1)2996srcs.push_back(getSrc(&insn->src[lodIdx].src, 0));2997else if (op == OP_TXF)2998lz = true;2999if (msIdx != -1)3000srcs.push_back(getSrc(&insn->src[msIdx].src, 0));3001if (offsetIdx != -1)3002offsets.push_back(&insn->src[offsetIdx].src);3003if (compIdx != -1)3004srcs.push_back(applyProjection(getSrc(&insn->src[compIdx].src, 0), proj));3005if (texOffIdx != -1) {3006srcs.push_back(getSrc(&insn->src[texOffIdx].src, 0));3007texOffIdx = srcs.size() - 1;3008}3009if (sampOffIdx != -1) {3010srcs.push_back(getSrc(&insn->src[sampOffIdx].src, 0));3011sampOffIdx = srcs.size() - 1;3012}3013if (bindless) {3014// currently we use the lower bits3015Value *split[2];3016Value *handle = getSrc(&insn->src[sampHandleIdx].src, 0);30173018mkSplit(split, 4, handle);30193020srcs.push_back(split[0]);3021texOffIdx = srcs.size() - 1;3022}30233024r = bindless ? 0xff : insn->texture_index;3025s = bindless ? 0x1f : insn->sampler_index;30263027defs.resize(newDefs.size());3028for (uint8_t d = 0u; d < newDefs.size(); ++d) {3029defs[d] = newDefs[d];3030mask |= 1 << d;3031}3032if (target.isMS() || (op == OP_TEX && prog->getType() != Program::TYPE_FRAGMENT))3033lz = true;30343035TexInstruction *texi = mkTex(op, target.getEnum(), r, s, defs, srcs);3036texi->tex.levelZero = lz;3037texi->tex.mask = mask;3038texi->tex.bindless = bindless;30393040if (texOffIdx != -1)3041texi->tex.rIndirectSrc = texOffIdx;3042if (sampOffIdx != -1)3043texi->tex.sIndirectSrc = sampOffIdx;30443045switch (insn->op) {3046case nir_texop_tg4:3047if (!target.isShadow())3048texi->tex.gatherComp = insn->component;3049break;3050case nir_texop_txs:3051texi->tex.query = TXQ_DIMS;3052break;3053case nir_texop_texture_samples:3054texi->tex.mask = 0x4;3055texi->tex.query = TXQ_TYPE;3056break;3057case nir_texop_query_levels:3058texi->tex.mask = 0x8;3059texi->tex.query = TXQ_DIMS;3060break;3061default:3062break;3063}30643065texi->tex.useOffsets = offsets.size();3066if (texi->tex.useOffsets) {3067for (uint8_t s = 0; s < texi->tex.useOffsets; ++s) {3068for (uint32_t c = 0u; c < 3; ++c) {3069uint8_t s2 = std::min(c, target.getDim() - 1);3070texi->offset[s][c].set(getSrc(offsets[s], s2));3071texi->offset[s][c].setInsn(texi);3072}3073}3074}30753076if (op == OP_TXG && offsetIdx == -1) {3077if (nir_tex_instr_has_explicit_tg4_offsets(insn)) {3078texi->tex.useOffsets = 4;3079setPosition(texi, false);3080for (uint8_t i = 0; i < 4; ++i) {3081for (uint8_t j = 0; j < 2; ++j) {3082texi->offset[i][j].set(loadImm(NULL, insn->tg4_offsets[i][j]));3083texi->offset[i][j].setInsn(texi);3084}3085}3086setPosition(texi, true);3087}3088}30893090if (ddxIdx != -1 && ddyIdx != -1) {3091for (uint8_t c = 0u; c < target.getDim() + target.isCube(); ++c) {3092texi->dPdx[c].set(getSrc(&insn->src[ddxIdx].src, c));3093texi->dPdy[c].set(getSrc(&insn->src[ddyIdx].src, c));3094}3095}30963097break;3098}3099default:3100ERROR("unknown nir_texop %u\n", insn->op);3101return false;3102}3103return true;3104}31053106bool3107Converter::run()3108{3109bool progress;31103111if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)3112nir_print_shader(nir, stderr);31133114struct nir_lower_subgroups_options subgroup_options = {};3115subgroup_options.subgroup_size = 32;3116subgroup_options.ballot_bit_size = 32;3117subgroup_options.ballot_components = 1;3118subgroup_options.lower_elect = true;31193120/* prepare for IO lowering */3121NIR_PASS_V(nir, nir_opt_deref);3122NIR_PASS_V(nir, nir_lower_regs_to_ssa);3123NIR_PASS_V(nir, nir_lower_vars_to_ssa);31243125/* codegen assumes vec4 alignment for memory */3126NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, function_temp_type_info);3127NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp, nir_address_format_32bit_offset);3128NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);31293130NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,3131type_size, (nir_lower_io_options)0);31323133NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);31343135NIR_PASS_V(nir, nir_lower_load_const_to_scalar);3136NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);3137NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);31383139/*TODO: improve this lowering/optimisation loop so that we can use3140* nir_opt_idiv_const effectively before this.3141*/3142nir_lower_idiv_options idiv_options = {3143.imprecise_32bit_lowering = false,3144.allow_fp16 = true,3145};3146NIR_PASS(progress, nir, nir_lower_idiv, &idiv_options);31473148do {3149progress = false;3150NIR_PASS(progress, nir, nir_copy_prop);3151NIR_PASS(progress, nir, nir_opt_remove_phis);3152NIR_PASS(progress, nir, nir_opt_trivial_continues);3153NIR_PASS(progress, nir, nir_opt_cse);3154NIR_PASS(progress, nir, nir_opt_algebraic);3155NIR_PASS(progress, nir, nir_opt_constant_folding);3156NIR_PASS(progress, nir, nir_copy_prop);3157NIR_PASS(progress, nir, nir_opt_dce);3158NIR_PASS(progress, nir, nir_opt_dead_cf);3159} while (progress);31603161NIR_PASS_V(nir, nir_lower_bool_to_int32);3162NIR_PASS_V(nir, nir_convert_from_ssa, true);31633164// Garbage collect dead instructions3165nir_sweep(nir);31663167if (!parseNIR()) {3168ERROR("Couldn't prase NIR!\n");3169return false;3170}31713172if (!assignSlots()) {3173ERROR("Couldn't assign slots!\n");3174return false;3175}31763177if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)3178nir_print_shader(nir, stderr);31793180nir_foreach_function(function, nir) {3181if (!visit(function))3182return false;3183}31843185return true;3186}31873188} // unnamed namespace31893190namespace nv50_ir {31913192bool3193Program::makeFromNIR(struct nv50_ir_prog_info *info,3194struct nv50_ir_prog_info_out *info_out)3195{3196nir_shader *nir = (nir_shader*)info->bin.source;3197Converter converter(this, nir, info, info_out);3198bool result = converter.run();3199if (!result)3200return result;3201LoweringHelper lowering;3202lowering.run(this);3203tlsSize = info_out->bin.tlsSpace;3204return result;3205}32063207} // namespace nv50_ir32083209static nir_shader_compiler_options3210nvir_nir_shader_compiler_options(int chipset)3211{3212nir_shader_compiler_options op = {};3213op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);3214op.lower_ffma16 = false;3215op.lower_ffma32 = false;3216op.lower_ffma64 = false;3217op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */3218op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */3219op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */3220op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);3221op.lower_flrp32 = true;3222op.lower_flrp64 = true;3223op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it3224op.lower_fsat = false;3225op.lower_fsqrt = false; // TODO: only before gm2003226op.lower_sincos = false;3227op.lower_fmod = true;3228op.lower_bitfield_extract = false;3229op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET);3230op.lower_bitfield_insert = false;3231op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET);3232op.lower_bitfield_insert_to_bitfield_select = false;3233op.lower_bitfield_reverse = false;3234op.lower_bit_count = false;3235op.lower_ifind_msb = false;3236op.lower_find_lsb = false;3237op.lower_uadd_carry = true; // TODO3238op.lower_usub_borrow = true; // TODO3239op.lower_mul_high = false;3240op.lower_fneg = false;3241op.lower_ineg = false;3242op.lower_scmp = true; // TODO: not implemented yet3243op.lower_vector_cmp = false;3244op.lower_bitops = false;3245op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);3246op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);3247op.lower_fdph = false;3248op.lower_fdot = false;3249op.fdot_replicates = false; // TODO3250op.lower_ffloor = false; // TODO3251op.lower_ffract = true;3252op.lower_fceil = false; // TODO3253op.lower_ftrunc = false;3254op.lower_ldexp = true;3255op.lower_pack_half_2x16 = true;3256op.lower_pack_unorm_2x16 = true;3257op.lower_pack_snorm_2x16 = true;3258op.lower_pack_unorm_4x8 = true;3259op.lower_pack_snorm_4x8 = true;3260op.lower_unpack_half_2x16 = true;3261op.lower_unpack_unorm_2x16 = true;3262op.lower_unpack_snorm_2x16 = true;3263op.lower_unpack_unorm_4x8 = true;3264op.lower_unpack_snorm_4x8 = true;3265op.lower_pack_split = false;3266op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);3267op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);3268op.lower_insert_byte = true;3269op.lower_insert_word = true;3270op.lower_all_io_to_temps = false;3271op.lower_all_io_to_elements = false;3272op.vertex_id_zero_based = false;3273op.lower_base_vertex = false;3274op.lower_helper_invocation = false;3275op.optimize_sample_mask_in = false;3276op.lower_cs_local_index_from_id = true;3277op.lower_cs_local_id_from_index = false;3278op.lower_device_index_to_zero = false; // TODO3279op.lower_wpos_pntc = false; // TODO3280op.lower_hadd = true; // TODO3281op.lower_add_sat = true; // TODO3282op.vectorize_io = false;3283op.lower_to_scalar = false;3284op.unify_interfaces = false;3285op.use_interpolated_input_intrinsics = true;3286op.lower_mul_2x32_64 = true; // TODO3287op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);3288op.has_imul24 = false;3289op.intel_vec4 = false;3290op.max_unroll_iterations = 32;3291op.lower_int64_options = (nir_lower_int64_options) (3292((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |3293((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |3294nir_lower_divmod64 |3295((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |3296((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |3297((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |3298((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |3299((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |3300((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |3301((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |3302((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |3303((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |3304((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |3305nir_lower_ufind_msb643306);3307op.lower_doubles_options = (nir_lower_doubles_options) (3308((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |3309((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |3310((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |3311((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |3312nir_lower_dmod |3313((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |3314((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)3315);3316return op;3317}33183319static const nir_shader_compiler_options gf100_nir_shader_compiler_options =3320nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);3321static const nir_shader_compiler_options gm107_nir_shader_compiler_options =3322nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);3323static const nir_shader_compiler_options gv100_nir_shader_compiler_options =3324nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);33253326const nir_shader_compiler_options *3327nv50_ir_nir_shader_compiler_options(int chipset)3328{3329if (chipset >= NVISA_GV100_CHIPSET)3330return &gv100_nir_shader_compiler_options;3331if (chipset >= NVISA_GM107_CHIPSET)3332return &gm107_nir_shader_compiler_options;3333return &gf100_nir_shader_compiler_options;3334}333533363337