Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir.h
4574 views
/*1* Copyright 2011 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#ifndef __NV50_IR_H__23#define __NV50_IR_H__2425#include <stdio.h>26#include <stdlib.h>27#include <stdint.h>28#include <deque>29#include <list>30#include <vector>3132#include "codegen/unordered_set.h"33#include "codegen/nv50_ir_util.h"34#include "codegen/nv50_ir_graph.h"3536#include "codegen/nv50_ir_driver.h"3738namespace nv50_ir {3940enum operation41{42OP_NOP = 0,43OP_PHI,44OP_UNION, // unify a new definition and several source values45OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced)46OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value47OP_CONSTRAINT, // copy values into consecutive registers48OP_MOV, // simple copy, no modifiers allowed49OP_LOAD,50OP_STORE,51OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds52OP_SUB,53OP_MUL,54OP_DIV,55OP_MOD,56OP_MAD,57OP_FMA,58OP_SAD, // abs(src0 - src1) + src259OP_SHLADD,60// extended multiply-add (GM107+), does a lot of things.61// see envytools for detailed documentation62OP_XMAD,63OP_ABS,64OP_NEG,65OP_NOT,66OP_AND,67OP_OR,68OP_XOR,69OP_LOP3_LUT,70OP_SHL,71OP_SHR,72OP_SHF,73OP_MAX,74OP_MIN,75OP_SAT, // CLAMP(f32, 0.0, 1.0)76OP_CEIL,77OP_FLOOR,78OP_TRUNC,79OP_CVT,80OP_SET_AND, // dst = (src0 CMP src1) & src281OP_SET_OR,82OP_SET_XOR,83OP_SET,84OP_SELP, // dst = src2 ? src0 : src185OP_SLCT, // dst = (src2 CMP 0) ? src0 : src186OP_RCP,87OP_RSQ,88OP_LG2,89OP_SIN,90OP_COS,91OP_EX2,92OP_EXP, // exponential (base M_E)93OP_LOG, // natural logarithm94OP_PRESIN,95OP_PREEX2,96OP_SQRT,97OP_POW,98OP_BRA,99OP_CALL,100OP_RET,101OP_CONT,102OP_BREAK,103OP_PRERET,104OP_PRECONT,105OP_PREBREAK,106OP_BRKPT, // breakpoint (not related to loops)107OP_JOINAT, // push control flow convergence point108OP_JOIN, // converge109OP_DISCARD,110OP_EXIT,111OP_MEMBAR, // memory barrier (mfence, lfence, sfence)112OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base113OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]114OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])115OP_EXPORT,116OP_LINTERP,117OP_PINTERP,118OP_EMIT, // emit vertex119OP_RESTART, // restart primitive120OP_FINAL, // finish emitting primitives121OP_TEX,122OP_TXB, // texture bias123OP_TXL, // texture lod124OP_TXF, // texel fetch125OP_TXQ, // texture size query126OP_TXD, // texture derivatives127OP_TXG, // texture gather128OP_TXLQ, // texture query lod129OP_TEXCSAA, // texture op for coverage sampling130OP_TEXPREP, // turn cube map array into 2d array coordinates131OP_SULDB, // surface load (raw)132OP_SULDP, // surface load (formatted)133OP_SUSTB, // surface store (raw)134OP_SUSTP, // surface store (formatted)135OP_SUREDB,136OP_SUREDP, // surface reduction (atomic op)137OP_SULEA, // surface load effective address138OP_SUBFM, // surface bitfield manipulation139OP_SUCLAMP, // clamp surface coordinates140OP_SUEAU, // surface effective address141OP_SUQ, // surface query142OP_MADSP, // special integer multiply-add143OP_TEXBAR, // texture dependency barrier144OP_DFDX,145OP_DFDY,146OP_RDSV, // read system value147OP_WRSV, // write system value148OP_PIXLD, // get info about raster object or surfaces149OP_QUADOP,150OP_QUADON,151OP_QUADPOP,152OP_POPCNT, // bitcount(src0 & src1)153OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]154OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK155OP_BFIND, // find highest/lowest set bit156OP_BREV, // bitfield reverse157OP_BMSK, // bitfield mask158OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)159OP_SGXT,160OP_ATOM,161OP_BAR, // execution barrier, sources = { id, thread count, predicate }162OP_VADD, // byte/word vector operations163OP_VAVG,164OP_VMIN,165OP_VMAX,166OP_VSAD,167OP_VSET,168OP_VSHR,169OP_VSHL,170OP_VSEL,171OP_CCTL, // cache control172OP_SHFL, // warp shuffle173OP_VOTE,174OP_BUFQ, // buffer query175OP_WARPSYNC,176OP_LAST177};178179// various instruction-specific modifier definitions Instruction::subOp180// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs)181#define NV50_IR_SUBOP_MUL_HIGH 1182#define NV50_IR_SUBOP_EMIT_RESTART 1183#define NV50_IR_SUBOP_LDC_IL 1184#define NV50_IR_SUBOP_LDC_IS 2185#define NV50_IR_SUBOP_LDC_ISL 3186#define NV50_IR_SUBOP_SHIFT_WRAP 1187#define NV50_IR_SUBOP_SHIFT_HIGH 2188#define NV50_IR_SUBOP_EMU_PRERET 1189#define NV50_IR_SUBOP_TEXBAR(n) n190#define NV50_IR_SUBOP_MOV_FINAL 1191#define NV50_IR_SUBOP_EXTBF_REV 1192#define NV50_IR_SUBOP_BFIND_SAMT 1193#define NV50_IR_SUBOP_RCPRSQ_64H 1194#define NV50_IR_SUBOP_PERMT_F4E 1195#define NV50_IR_SUBOP_PERMT_B4E 2196#define NV50_IR_SUBOP_PERMT_RC8 3197#define NV50_IR_SUBOP_PERMT_ECL 4198#define NV50_IR_SUBOP_PERMT_ECR 5199#define NV50_IR_SUBOP_PERMT_RC16 6200#define NV50_IR_SUBOP_BAR_SYNC 0201#define NV50_IR_SUBOP_BAR_ARRIVE 1202#define NV50_IR_SUBOP_BAR_RED_AND 2203#define NV50_IR_SUBOP_BAR_RED_OR 3204#define NV50_IR_SUBOP_BAR_RED_POPC 4205#define NV50_IR_SUBOP_MEMBAR_L 1206#define NV50_IR_SUBOP_MEMBAR_S 2207#define NV50_IR_SUBOP_MEMBAR_M 3208#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2)209#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2)210#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2)211#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3)212#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3)213#define NV50_IR_SUBOP_MEMBAR(d,s) \214(NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s)215#define NV50_IR_SUBOP_ATOM_ADD 0216#define NV50_IR_SUBOP_ATOM_MIN 1217#define NV50_IR_SUBOP_ATOM_MAX 2218#define NV50_IR_SUBOP_ATOM_INC 3219#define NV50_IR_SUBOP_ATOM_DEC 4220#define NV50_IR_SUBOP_ATOM_AND 5221#define NV50_IR_SUBOP_ATOM_OR 6222#define NV50_IR_SUBOP_ATOM_XOR 7223#define NV50_IR_SUBOP_ATOM_CAS 8224#define NV50_IR_SUBOP_ATOM_EXCH 9225#define NV50_IR_SUBOP_CCTL_IV 5226#define NV50_IR_SUBOP_CCTL_IVALL 6227#define NV50_IR_SUBOP_SUST_IGN 0228#define NV50_IR_SUBOP_SUST_TRAP 1229#define NV50_IR_SUBOP_SUST_SDCL 3230#define NV50_IR_SUBOP_SULD_ZERO 0231#define NV50_IR_SUBOP_SULD_TRAP 1232#define NV50_IR_SUBOP_SULD_SDCL 3233#define NV50_IR_SUBOP_SUBFM_3D 1234#define NV50_IR_SUBOP_SUCLAMP_2D 0x10235#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0))236#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0))237#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0))238#define NV50_IR_SUBOP_PIXLD_COUNT 0239#define NV50_IR_SUBOP_PIXLD_COVMASK 1240#define NV50_IR_SUBOP_PIXLD_COVERED 2241#define NV50_IR_SUBOP_PIXLD_OFFSET 3242#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4243#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5244#define NV50_IR_SUBOP_SHFL_IDX 0245#define NV50_IR_SUBOP_SHFL_UP 1246#define NV50_IR_SUBOP_SHFL_DOWN 2247#define NV50_IR_SUBOP_SHFL_BFLY 3248#define NV50_IR_SUBOP_LOAD_LOCKED 1249#define NV50_IR_SUBOP_STORE_UNLOCKED 2250#define NV50_IR_SUBOP_MADSP_SD 0xffff251// Yes, we could represent those with DataType.252// Or put the type into operation and have a couple 1000 values in that enum.253// This will have to do for now.254// The bitfields are supposed to correspond to nve4 ISA.255#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))256#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)257#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)258#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)259#define NV50_IR_SUBOP_Vn(n) ((n) >> 14)260#define NV50_IR_SUBOP_VOTE_ALL 0261#define NV50_IR_SUBOP_VOTE_ANY 1262#define NV50_IR_SUBOP_VOTE_UNI 2263#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0264#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc265#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa266#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \267uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \268uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \269uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \270(uint8_t)(exp); \271})272#define NV50_IR_SUBOP_BMSK_C (0 << 0)273#define NV50_IR_SUBOP_BMSK_W (1 << 0)274275#define NV50_IR_SUBOP_MINMAX_LOW 1276#define NV50_IR_SUBOP_MINMAX_MED 2277#define NV50_IR_SUBOP_MINMAX_HIGH 3278279#define NV50_IR_SUBOP_SHF_L (0 << 0)280#define NV50_IR_SUBOP_SHF_R (1 << 0)281#define NV50_IR_SUBOP_SHF_LO (0 << 1)282#define NV50_IR_SUBOP_SHF_HI (1 << 1)283#define NV50_IR_SUBOP_SHF_C (0 << 2)284#define NV50_IR_SUBOP_SHF_W (1 << 2)285286// xmad(src0, src1, 0) << 16 + src2287#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)288// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)289#define NV50_IR_SUBOP_XMAD_MRG (1 << 1)290// xmad(src0, src1, src2.lo)291#define NV50_IR_SUBOP_XMAD_CLO (1 << 2)292// xmad(src0, src1, src2.hi)293#define NV50_IR_SUBOP_XMAD_CHI (2 << 2)294// if both operands to the multiplication are non-zero, subtract 65536 for each295// negative operand296#define NV50_IR_SUBOP_XMAD_CSFU (3 << 2)297// xmad(src0, src1, src2) + src1 << 16298#define NV50_IR_SUBOP_XMAD_CBCC (4 << 2)299#define NV50_IR_SUBOP_XMAD_CMODE_SHIFT 2300#define NV50_IR_SUBOP_XMAD_CMODE_MASK (0x7 << NV50_IR_SUBOP_XMAD_CMODE_SHIFT)301302// use the high 16 bits instead of the low 16 bits for the multiplication.303// if the instruction's sType is signed, sign extend the operand from 16 bits304// to 32 before multiplication.305#define NV50_IR_SUBOP_XMAD_H1_SHIFT 5306#define NV50_IR_SUBOP_XMAD_H1(i) (1 << (NV50_IR_SUBOP_XMAD_H1_SHIFT + (i)))307#define NV50_IR_SUBOP_XMAD_H1_MASK (0x3 << NV50_IR_SUBOP_XMAD_H1_SHIFT)308309enum DataType310{311TYPE_NONE,312TYPE_U8,313TYPE_S8,314TYPE_U16,315TYPE_S16,316TYPE_U32,317TYPE_S32,318TYPE_U64, // 64 bit operations are only lowered after register allocation319TYPE_S64,320TYPE_F16,321TYPE_F32,322TYPE_F64,323TYPE_B96,324TYPE_B128325};326327enum CondCode328{329CC_FL = 0,330CC_NEVER = CC_FL, // when used with FILE_FLAGS331CC_LT = 1,332CC_EQ = 2,333CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE334CC_LE = 3,335CC_GT = 4,336CC_NE = 5,337CC_P = CC_NE,338CC_GE = 6,339CC_TR = 7,340CC_ALWAYS = CC_TR,341CC_U = 8,342CC_LTU = 9,343CC_EQU = 10,344CC_LEU = 11,345CC_GTU = 12,346CC_NEU = 13,347CC_GEU = 14,348CC_NO = 0x10,349CC_NC = 0x11,350CC_NS = 0x12,351CC_NA = 0x13,352CC_A = 0x14,353CC_S = 0x15,354CC_C = 0x16,355CC_O = 0x17356};357358enum RoundMode359{360ROUND_N, // nearest361ROUND_M, // towards -inf362ROUND_Z, // towards 0363ROUND_P, // towards +inf364ROUND_NI, // nearest integer365ROUND_MI, // to integer towards -inf366ROUND_ZI, // to integer towards 0367ROUND_PI, // to integer towards +inf368};369370enum CacheMode371{372CACHE_CA, // cache at all levels373CACHE_WB = CACHE_CA, // cache write back374CACHE_CG, // cache at global level375CACHE_CS, // cache streaming376CACHE_CV, // cache as volatile377CACHE_WT = CACHE_CV // cache write-through378};379380enum DataFile381{382FILE_NULL = 0,383FILE_GPR,384FILE_PREDICATE, // boolean predicate385FILE_FLAGS, // zero/sign/carry/overflow bits386FILE_ADDRESS,387FILE_BARRIER,388LAST_REGISTER_FILE = FILE_BARRIER,389FILE_IMMEDIATE,390FILE_MEMORY_CONST,391FILE_SHADER_INPUT,392FILE_SHADER_OUTPUT,393FILE_MEMORY_BUFFER,394FILE_MEMORY_GLOBAL,395FILE_MEMORY_SHARED,396FILE_MEMORY_LOCAL,397FILE_SYSTEM_VALUE,398FILE_THREAD_STATE, // "special" barrier registers399DATA_FILE_COUNT400};401402enum TexTarget403{404TEX_TARGET_1D,405TEX_TARGET_2D,406TEX_TARGET_2D_MS,407TEX_TARGET_3D,408TEX_TARGET_CUBE,409TEX_TARGET_1D_SHADOW,410TEX_TARGET_2D_SHADOW,411TEX_TARGET_CUBE_SHADOW,412TEX_TARGET_1D_ARRAY,413TEX_TARGET_2D_ARRAY,414TEX_TARGET_2D_MS_ARRAY,415TEX_TARGET_CUBE_ARRAY,416TEX_TARGET_1D_ARRAY_SHADOW,417TEX_TARGET_2D_ARRAY_SHADOW,418TEX_TARGET_RECT,419TEX_TARGET_RECT_SHADOW,420TEX_TARGET_CUBE_ARRAY_SHADOW,421TEX_TARGET_BUFFER,422TEX_TARGET_COUNT423};424425enum ImgFormat426{427FMT_NONE,428429FMT_RGBA32F,430FMT_RGBA16F,431FMT_RG32F,432FMT_RG16F,433FMT_R11G11B10F,434FMT_R32F,435FMT_R16F,436437FMT_RGBA32UI,438FMT_RGBA16UI,439FMT_RGB10A2UI,440FMT_RGBA8UI,441FMT_RG32UI,442FMT_RG16UI,443FMT_RG8UI,444FMT_R32UI,445FMT_R16UI,446FMT_R8UI,447448FMT_RGBA32I,449FMT_RGBA16I,450FMT_RGBA8I,451FMT_RG32I,452FMT_RG16I,453FMT_RG8I,454FMT_R32I,455FMT_R16I,456FMT_R8I,457458FMT_RGBA16,459FMT_RGB10A2,460FMT_RGBA8,461FMT_RG16,462FMT_RG8,463FMT_R16,464FMT_R8,465466FMT_RGBA16_SNORM,467FMT_RGBA8_SNORM,468FMT_RG16_SNORM,469FMT_RG8_SNORM,470FMT_R16_SNORM,471FMT_R8_SNORM,472473FMT_BGRA8,474475IMG_FORMAT_COUNT,476};477478enum ImgType {479UINT,480SINT,481UNORM,482SNORM,483FLOAT,484};485486enum SVSemantic487{488SV_POSITION, // WPOS489SV_VERTEX_ID,490SV_INSTANCE_ID,491SV_INVOCATION_ID,492SV_PRIMITIVE_ID,493SV_VERTEX_COUNT, // gl_PatchVerticesIn494SV_LAYER,495SV_VIEWPORT_INDEX,496SV_VIEWPORT_MASK,497SV_YDIR,498SV_FACE,499SV_POINT_SIZE,500SV_POINT_COORD,501SV_CLIP_DISTANCE,502SV_SAMPLE_INDEX,503SV_SAMPLE_POS,504SV_SAMPLE_MASK,505SV_TESS_OUTER,506SV_TESS_INNER,507SV_TESS_COORD,508SV_TID,509SV_COMBINED_TID,510SV_CTAID,511SV_NTID,512SV_GRIDID,513SV_NCTAID,514SV_LANEID,515SV_PHYSID,516SV_NPHYSID,517SV_CLOCK,518SV_LBASE,519SV_SBASE,520SV_VERTEX_STRIDE,521SV_INVOCATION_INFO,522SV_THREAD_KILL,523SV_BASEVERTEX,524SV_BASEINSTANCE,525SV_DRAWID,526SV_WORK_DIM,527SV_LANEMASK_EQ,528SV_LANEMASK_LT,529SV_LANEMASK_LE,530SV_LANEMASK_GT,531SV_LANEMASK_GE,532SV_UNDEFINED,533SV_LAST534};535536enum TSSemantic537{538// 0-15 are fixed ones on Volta/Turing539TS_THREAD_STATE_ENUM0 = 0,540TS_THREAD_STATE_ENUM1 = 1,541TS_THREAD_STATE_ENUM2 = 2,542TS_THREAD_STATE_ENUM3 = 3,543TS_THREAD_STATE_ENUM4 = 4,544TS_TRAP_RETURN_PC_LO = 5,545TS_TRAP_RETURN_PC_HI = 6,546TS_TRAP_RETURN_MASK = 7,547TS_MEXITED = 8,548TS_MKILL = 9,549TS_MACTIVE = 10,550TS_MATEXIT = 11,551TS_OPT_STACK = 12,552TS_API_CALL_DEPTH = 13,553TS_ATEXIT_PC_LO = 14,554TS_ATEXIT_PC_HI = 15,555// special ones to make our life easier556TS_PQUAD_MACTIVE,557};558559class Program;560class Function;561class BasicBlock;562563class Target;564565class Instruction;566class CmpInstruction;567class TexInstruction;568class FlowInstruction;569570class Value;571class LValue;572class Symbol;573class ImmediateValue;574575struct Storage576{577DataFile file;578int8_t fileIndex; // signed, may be indirect for CONST[]579uint8_t size; // this should match the Instruction type's size580DataType type; // mainly for pretty printing581union {582uint64_t u64; // immediate values583uint32_t u32;584uint16_t u16;585uint8_t u8;586int64_t s64;587int32_t s32;588int16_t s16;589int8_t s8;590float f32;591double f64;592int32_t offset; // offset from 0 (base of address space)593int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4)594struct {595SVSemantic sv;596int index;597} sv;598TSSemantic ts;599} data;600};601602// precedence: NOT after SAT after NEG after ABS603#define NV50_IR_MOD_ABS (1 << 0)604#define NV50_IR_MOD_NEG (1 << 1)605#define NV50_IR_MOD_SAT (1 << 2)606#define NV50_IR_MOD_NOT (1 << 3)607#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)608609#define NV50_IR_INTERP_MODE_MASK 0x3610#define NV50_IR_INTERP_LINEAR (0 << 0)611#define NV50_IR_INTERP_PERSPECTIVE (1 << 0)612#define NV50_IR_INTERP_FLAT (2 << 0)613#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ?614#define NV50_IR_INTERP_SAMPLE_MASK 0xc615#define NV50_IR_INTERP_DEFAULT (0 << 2)616#define NV50_IR_INTERP_CENTROID (1 << 2)617#define NV50_IR_INTERP_OFFSET (2 << 2)618#define NV50_IR_INTERP_SAMPLEID (3 << 2)619620// do we really want this to be a class ?621class Modifier622{623public:624Modifier() : bits(0) { }625Modifier(unsigned int m) : bits(m) { }626Modifier(operation op);627628// @return new Modifier applying a after b (asserts if unrepresentable)629Modifier operator*(const Modifier) const;630Modifier operator*=(const Modifier m) { *this = *this * m; return *this; }631Modifier operator==(const Modifier m) const { return m.bits == bits; }632Modifier operator!=(const Modifier m) const { return m.bits != bits; }633634inline Modifier operator&(const Modifier m) const { return bits & m.bits; }635inline Modifier operator|(const Modifier m) const { return bits | m.bits; }636inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; }637638operation getOp() const;639640inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; }641inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; }642643inline operator bool() const { return bits ? true : false; }644645void applyTo(ImmediateValue &imm) const;646647int print(char *buf, size_t size) const;648649private:650uint8_t bits;651};652653class ValueRef654{655public:656ValueRef(Value * = NULL);657ValueRef(const ValueRef&);658~ValueRef();659660inline bool exists() const { return value != NULL; }661662void set(Value *);663void set(const ValueRef&);664inline Value *get() const { return value; }665inline Value *rep() const;666667inline Instruction *getInsn() const { return insn; }668inline void setInsn(Instruction *inst) { insn = inst; }669670inline bool isIndirect(int dim) const { return indirect[dim] >= 0; }671inline const ValueRef *getIndirect(int dim) const;672673inline DataFile getFile() const;674inline unsigned getSize() const;675676// SSA: return eventual (traverse MOVs) literal value, if it exists677bool getImmediate(ImmediateValue&) const;678679public:680Modifier mod;681int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i])682683bool usedAsPtr; // for printing684685private:686Value *value;687Instruction *insn;688};689690class ValueDef691{692public:693ValueDef(Value * = NULL);694ValueDef(const ValueDef&);695~ValueDef();696697inline bool exists() const { return value != NULL; }698699inline Value *get() const { return value; }700inline Value *rep() const;701void set(Value *);702bool mayReplace(const ValueRef &);703void replace(const ValueRef &, bool doSet); // replace all uses of the old value704705inline Instruction *getInsn() const { return insn; }706inline void setInsn(Instruction *inst) { insn = inst; }707708inline DataFile getFile() const;709inline unsigned getSize() const;710711inline void setSSA(LValue *);712inline const LValue *preSSA() const;713714private:715Value *value; // should make this LValue * ...716LValue *origin; // pre SSA value717Instruction *insn;718};719720class Value721{722public:723Value();724virtual ~Value() { }725726virtual Value *clone(ClonePolicy<Function>&) const = 0;727728virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0;729730virtual bool equals(const Value *, bool strict = false) const;731virtual bool interfers(const Value *) const;732virtual bool isUniform() const { return true; }733734inline Value *rep() const { return join; }735736inline Instruction *getUniqueInsn() const;737inline Instruction *getInsn() const; // use when uniqueness is certain738739inline int refCount() { return uses.size(); }740741inline LValue *asLValue();742inline Symbol *asSym();743inline ImmediateValue *asImm();744inline const Symbol *asSym() const;745inline const ImmediateValue *asImm() const;746747inline bool inFile(DataFile f) const { return reg.file == f; }748749static inline Value *get(Iterator&);750751unordered_set<ValueRef *> uses;752std::list<ValueDef *> defs;753typedef unordered_set<ValueRef *>::iterator UseIterator;754typedef unordered_set<ValueRef *>::const_iterator UseCIterator;755typedef std::list<ValueDef *>::iterator DefIterator;756typedef std::list<ValueDef *>::const_iterator DefCIterator;757758int id;759Storage reg;760761// TODO: these should be in LValue:762Interval livei;763Value *join;764};765766class LValue : public Value767{768public:769LValue(Function *, DataFile file);770LValue(Function *, LValue *);771~LValue() { }772773virtual bool isUniform() const;774775virtual LValue *clone(ClonePolicy<Function>&) const;776777virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;778779public:780unsigned compMask : 8; // compound/component mask781unsigned compound : 1; // used by RA, value involved in split/merge782unsigned ssa : 1;783unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0)784unsigned noSpill : 1; // do not spill (e.g. if spill temporary already)785};786787class Symbol : public Value788{789public:790Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0);791~Symbol() { }792793virtual Symbol *clone(ClonePolicy<Function>&) const;794795virtual bool equals(const Value *that, bool strict) const;796797virtual bool isUniform() const;798799virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;800801// print with indirect values802int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const;803804inline void setFile(DataFile file, ubyte fileIndex = 0)805{806reg.file = file;807reg.fileIndex = fileIndex;808}809810inline void setOffset(int32_t offset);811inline void setAddress(Symbol *base, int32_t offset);812inline void setSV(SVSemantic sv, uint32_t idx = 0);813814inline const Symbol *getBase() const { return baseSym; }815816private:817Symbol *baseSym; // array base for Symbols representing array elements818};819820class ImmediateValue : public Value821{822public:823ImmediateValue() { }824ImmediateValue(Program *, uint32_t);825ImmediateValue(Program *, float);826ImmediateValue(Program *, double);827// NOTE: not added to program with828ImmediateValue(const ImmediateValue *, DataType ty);829~ImmediateValue() { };830831virtual ImmediateValue *clone(ClonePolicy<Function>&) const;832833virtual bool equals(const Value *that, bool strict) const;834835// these only work if 'type' is valid (we mostly use untyped literals):836bool isInteger(const int ival) const; // ival is cast to this' type837bool isNegative() const;838bool isPow2() const;839840void applyLog2();841842// for constant folding:843ImmediateValue operator+(const ImmediateValue&) const;844ImmediateValue operator-(const ImmediateValue&) const;845ImmediateValue operator*(const ImmediateValue&) const;846ImmediateValue operator/(const ImmediateValue&) const;847848ImmediateValue& operator=(const ImmediateValue&); // only sets value !849850bool compare(CondCode cc, float fval) const;851852virtual int print(char *, size_t, DataType ty = TYPE_NONE) const;853};854855class Instruction856{857public:858Instruction();859Instruction(Function *, operation, DataType);860virtual ~Instruction();861862virtual Instruction *clone(ClonePolicy<Function>&,863Instruction * = NULL) const;864865void setDef(int i, Value *);866void setSrc(int s, Value *);867void setSrc(int s, const ValueRef&);868void swapSources(int a, int b);869void moveSources(int s, int delta);870bool setIndirect(int s, int dim, Value *);871872inline ValueRef& src(int s) { return srcs[s]; }873inline ValueDef& def(int s) { return defs[s]; }874inline const ValueRef& src(int s) const { return srcs[s]; }875inline const ValueDef& def(int s) const { return defs[s]; }876877inline Value *getDef(int d) const { return defs[d].get(); }878inline Value *getSrc(int s) const { return srcs[s].get(); }879inline Value *getIndirect(int s, int dim) const;880881inline bool defExists(unsigned d) const882{883return d < defs.size() && defs[d].exists();884}885inline bool srcExists(unsigned s) const886{887return s < srcs.size() && srcs[s].exists();888}889890inline bool constrainedDefs() const;891892bool setPredicate(CondCode ccode, Value *);893inline Value *getPredicate() const;894bool writesPredicate() const;895inline bool isPredicated() const { return predSrc >= 0; }896897inline void setFlagsSrc(int s, Value *);898inline void setFlagsDef(int d, Value *);899inline bool usesFlags() const { return flagsSrc >= 0; }900901unsigned int defCount() const { return defs.size(); };902unsigned int defCount(unsigned int mask, bool singleFile = false) const;903unsigned int srcCount() const { return srcs.size(); };904unsigned int srcCount(unsigned int mask, bool singleFile = false) const;905906// save & remove / set indirect[0,1] and predicate source907void takeExtraSources(int s, Value *[3]);908void putExtraSources(int s, Value *[3]);909910inline void setType(DataType type) { dType = sType = type; }911912inline void setType(DataType dtype, DataType stype)913{914dType = dtype;915sType = stype;916}917918inline bool isPseudo() const { return op < OP_MOV; }919bool isDead() const;920bool isNop() const;921bool isCommutationLegal(const Instruction *) const; // must be adjacent !922bool isActionEqual(const Instruction *) const;923bool isResultEqual(const Instruction *) const;924925// check whether the defs interfere with srcs and defs of another instruction926bool canCommuteDefDef(const Instruction *) const;927bool canCommuteDefSrc(const Instruction *) const;928929void print() const;930931inline CmpInstruction *asCmp();932inline TexInstruction *asTex();933inline FlowInstruction *asFlow();934inline const TexInstruction *asTex() const;935inline const CmpInstruction *asCmp() const;936inline const FlowInstruction *asFlow() const;937938public:939Instruction *next;940Instruction *prev;941int id;942int serial; // CFG order943944operation op;945DataType dType; // destination or defining type946DataType sType; // source or secondary type947CondCode cc;948RoundMode rnd;949CacheMode cache;950951uint16_t subOp; // quadop, 1 for mul-high, etc.952953unsigned encSize : 5; // encoding size in bytes954unsigned saturate : 1; // to [0.0f, 1.0f]955unsigned join : 1; // converge control flow (use OP_JOIN until end)956unsigned fixed : 1; // prevent dead code elimination957unsigned terminator : 1; // end of basic block958unsigned ftz : 1; // flush denormal to zero959unsigned dnz : 1; // denormals, NaN are zero960unsigned ipa : 4; // interpolation mode961unsigned lanes : 4;962unsigned perPatch : 1;963unsigned exit : 1; // terminate program after insn964unsigned mask : 4; // for vector ops965// prevent algebraic optimisations that aren't bit-for-bit identical966unsigned precise : 1;967968int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor969970int8_t predSrc;971int8_t flagsDef;972int8_t flagsSrc;973974uint32_t sched; // scheduling data (NOTE: maybe move to separate storage)975976BasicBlock *bb;977978protected:979std::deque<ValueDef> defs; // no gaps !980std::deque<ValueRef> srcs; // no gaps !981982// instruction specific methods:983// (don't want to subclass, would need more constructors and memory pools)984public:985inline void setInterpolate(unsigned int mode) { ipa = mode; }986987unsigned int getInterpMode() const { return ipa & 0x3; }988unsigned int getSampleMode() const { return ipa & 0xc; }989990private:991void init();992};993994enum TexQuery995{996TXQ_DIMS, /* x, y, z, levels */997TXQ_TYPE, /* ?, ?, samples, ? */998TXQ_SAMPLE_POSITION,999TXQ_FILTER,1000TXQ_LOD,1001TXQ_WRAP,1002TXQ_BORDER_COLOUR1003};10041005class TexInstruction : public Instruction1006{1007public:1008class Target1009{1010public:1011Target(TexTarget targ = TEX_TARGET_1D) : target(targ) { }10121013const char *getName() const { return descTable[target].name; }1014unsigned int getArgCount() const { return descTable[target].argc; }1015unsigned int getDim() const { return descTable[target].dim; }1016int isArray() const { return descTable[target].array ? 1 : 0; }1017int isCube() const { return descTable[target].cube ? 1 : 0; }1018int isShadow() const { return descTable[target].shadow ? 1 : 0; }1019int isMS() const {1020return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; }1021void clearMS() {1022if (isMS()) {1023if (isArray())1024target = TEX_TARGET_2D_ARRAY;1025else1026target = TEX_TARGET_2D;1027}1028}10291030Target& operator=(TexTarget targ)1031{1032assert(targ < TEX_TARGET_COUNT);1033target = targ;1034return *this;1035}10361037inline bool operator==(TexTarget targ) const { return target == targ; }1038inline bool operator!=(TexTarget targ) const { return target != targ; }10391040enum TexTarget getEnum() const { return target; }10411042private:1043struct Desc1044{1045char name[19];1046uint8_t dim;1047uint8_t argc;1048bool array;1049bool cube;1050bool shadow;1051};10521053static const struct Desc descTable[TEX_TARGET_COUNT];10541055private:1056enum TexTarget target;1057};10581059public:1060struct ImgFormatDesc1061{1062char name[19];1063uint8_t components;1064uint8_t bits[4];1065ImgType type;1066bool bgra;1067};10681069static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];1070static const struct ImgFormatDesc *translateImgFormat(1071enum pipe_format format);10721073public:1074TexInstruction(Function *, operation);1075virtual ~TexInstruction();10761077virtual TexInstruction *clone(ClonePolicy<Function>&,1078Instruction * = NULL) const;10791080inline void setTexture(Target targ, uint8_t r, uint8_t s)1081{1082tex.r = r;1083tex.s = s;1084tex.target = targ;1085}10861087void setIndirectR(Value *);1088void setIndirectS(Value *);1089inline Value *getIndirectR() const;1090inline Value *getIndirectS() const;10911092public:1093struct {1094Target target;10951096uint16_t r;1097uint16_t s;1098int8_t rIndirectSrc;1099int8_t sIndirectSrc;11001101uint8_t mask;1102uint8_t gatherComp;11031104bool liveOnly; // only execute on live pixels of a quad (optimization)1105bool levelZero;1106bool derivAll;1107bool bindless;11081109int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets1110int8_t offset[3]; // only used on nv5011111112enum TexQuery query;1113const struct ImgFormatDesc *format;11141115bool scalar; // for GM107s TEXS, TLDS, TLD4S1116} tex;11171118ValueRef dPdx[3];1119ValueRef dPdy[3];1120ValueRef offset[4][3];1121};11221123class CmpInstruction : public Instruction1124{1125public:1126CmpInstruction(Function *, operation);11271128virtual CmpInstruction *clone(ClonePolicy<Function>&,1129Instruction * = NULL) const;11301131void setCondition(CondCode cond) { setCond = cond; }1132CondCode getCondition() const { return setCond; }11331134public:1135CondCode setCond;1136};11371138class FlowInstruction : public Instruction1139{1140public:1141FlowInstruction(Function *, operation, void *target);11421143virtual FlowInstruction *clone(ClonePolicy<Function>&,1144Instruction * = NULL) const;11451146public:1147unsigned allWarp : 1;1148unsigned absolute : 1;1149unsigned limit : 1;1150unsigned builtin : 1; // true for calls to emulation code1151unsigned indirect : 1; // target in src(0)11521153union {1154BasicBlock *bb;1155int builtin;1156Function *fn;1157} target;1158};11591160class BasicBlock1161{1162public:1163BasicBlock(Function *);1164~BasicBlock();11651166BasicBlock *clone(ClonePolicy<Function>&) const;11671168inline int getId() const { return id; }1169inline unsigned int getInsnCount() const { return numInsns; }1170inline bool isTerminated() const { return exit && exit->terminator; }11711172bool dominatedBy(BasicBlock *bb);1173inline bool reachableBy(const BasicBlock *by, const BasicBlock *term);11741175// returns mask of conditional out blocks1176// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF1177unsigned int initiatesSimpleConditional() const;11781179public:1180Function *getFunction() const { return func; }1181Program *getProgram() const { return program; }11821183Instruction *getEntry() const { return entry; } // first non-phi instruction1184Instruction *getPhi() const { return phi; }1185Instruction *getFirst() const { return phi ? phi : entry; }1186Instruction *getExit() const { return exit; }11871188void insertHead(Instruction *);1189void insertTail(Instruction *);1190void insertBefore(Instruction *, Instruction *);1191void insertAfter(Instruction *, Instruction *);1192void remove(Instruction *);1193void permuteAdjacent(Instruction *, Instruction *);11941195BasicBlock *idom() const;11961197// NOTE: currently does not rebuild the dominator tree1198BasicBlock *splitBefore(Instruction *, bool attach = true);1199BasicBlock *splitAfter(Instruction *, bool attach = true);12001201DLList& getDF() { return df; }1202DLList::Iterator iterDF() { return df.iterator(); }12031204static inline BasicBlock *get(Iterator&);1205static inline BasicBlock *get(Graph::Node *);12061207public:1208Graph::Node cfg; // first edge is branch *taken* (the ELSE branch)1209Graph::Node dom;12101211BitSet liveSet;1212BitSet defSet;12131214uint32_t binPos;1215uint32_t binSize;12161217Instruction *joinAt; // for quick reference12181219bool explicitCont; // loop headers: true if loop contains continue stmts12201221private:1222int id;1223DLList df;12241225Instruction *phi;1226Instruction *entry;1227Instruction *exit;12281229unsigned int numInsns;12301231private:1232Function *func;1233Program *program;12341235void splitCommon(Instruction *, BasicBlock *, bool attach);1236};12371238class Function1239{1240public:1241Function(Program *, const char *name, uint32_t label);1242~Function();12431244static inline Function *get(Graph::Node *node);12451246inline Program *getProgram() const { return prog; }1247inline const char *getName() const { return name; }1248inline int getId() const { return id; }1249inline uint32_t getLabel() const { return label; }12501251void print();1252void printLiveIntervals() const;1253void printCFGraph(const char *filePath);12541255bool setEntry(BasicBlock *);1256bool setExit(BasicBlock *);12571258unsigned int orderInstructions(ArrayList&);12591260inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); }1261inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); }1262inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); }12631264inline LValue *getLValue(int id);12651266void buildLiveSets();1267void buildDefSets();1268bool convertToSSA();12691270public:1271std::deque<ValueDef> ins;1272std::deque<ValueRef> outs;1273std::deque<Value *> clobbers;12741275Graph cfg;1276Graph::Node *cfgExit;1277Graph *domTree;1278Graph::Node call; // node in the call graph12791280BasicBlock **bbArray; // BBs in emission order1281int bbCount;12821283unsigned int loopNestingBound;1284int regClobberMax;12851286uint32_t binPos;1287uint32_t binSize;12881289Value *stackPtr;12901291uint32_t tlsBase; // base address for l[] space (if no stack pointer is used)1292uint32_t tlsSize;12931294ArrayList allBBlocks;1295ArrayList allInsns;1296ArrayList allLValues;12971298private:1299void buildLiveSetsPreSSA(BasicBlock *, const int sequence);1300void buildDefSetsPreSSA(BasicBlock *bb, const int seq);13011302private:1303uint32_t label;1304int id;1305const char *const name;1306Program *prog;1307};13081309enum CGStage1310{1311CG_STAGE_PRE_SSA,1312CG_STAGE_SSA, // expected directly before register allocation1313CG_STAGE_POST_RA1314};13151316class Program1317{1318public:1319enum Type1320{1321TYPE_VERTEX,1322TYPE_TESSELLATION_CONTROL,1323TYPE_TESSELLATION_EVAL,1324TYPE_GEOMETRY,1325TYPE_FRAGMENT,1326TYPE_COMPUTE1327};13281329Program(Type type, Target *targ);1330~Program();13311332void print();13331334Type getType() const { return progType; }13351336inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); }1337inline void del(Function *fn, int& id) { allFuncs.remove(id); }1338inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }13391340bool makeFromNIR(struct nv50_ir_prog_info *,1341struct nv50_ir_prog_info_out *);1342bool makeFromTGSI(struct nv50_ir_prog_info *,1343struct nv50_ir_prog_info_out *);1344bool convertToSSA();1345bool optimizeSSA(int level);1346bool optimizePostRA(int level);1347bool registerAllocation();1348bool emitBinary(struct nv50_ir_prog_info_out *);13491350const Target *getTarget() const { return target; }13511352private:1353Type progType;1354Target *target;13551356public:1357Function *main;1358Graph calls;13591360ArrayList allFuncs;1361ArrayList allRValues;13621363uint32_t *code;1364uint32_t binSize;1365uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL13661367int maxGPR;1368bool fp64;1369bool persampleInvocation;13701371MemoryPool mem_Instruction;1372MemoryPool mem_CmpInstruction;1373MemoryPool mem_TexInstruction;1374MemoryPool mem_FlowInstruction;1375MemoryPool mem_LValue;1376MemoryPool mem_Symbol;1377MemoryPool mem_ImmediateValue;13781379uint32_t dbgFlags;1380uint8_t optLevel;13811382void *targetPriv; // e.g. to carry information between passes13831384const struct nv50_ir_prog_info *driver; // for driver configuration1385const struct nv50_ir_prog_info_out *driver_out; // for driver configuration13861387void releaseInstruction(Instruction *);1388void releaseValue(Value *);1389};13901391// TODO: add const version1392class Pass1393{1394public:1395bool run(Program *, bool ordered = false, bool skipPhi = false);1396bool run(Function *, bool ordered = false, bool skipPhi = false);13971398private:1399// return false to continue with next entity on next higher level1400virtual bool visit(Function *) { return true; }1401virtual bool visit(BasicBlock *) { return true; }1402virtual bool visit(Instruction *) { return false; }14031404bool doRun(Program *, bool ordered, bool skipPhi);1405bool doRun(Function *, bool ordered, bool skipPhi);14061407protected:1408bool err;1409Function *func;1410Program *prog;1411};14121413// =============================================================================14141415#include "codegen/nv50_ir_inlines.h"14161417} // namespace nv50_ir14181419#endif // __NV50_IR_H__142014211422