Path: blob/21.2-virgl/src/freedreno/ir3/instr-a3xx.h
4565 views
/*1* Copyright (c) 2013 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#ifndef INSTR_A3XX_H_24#define INSTR_A3XX_H_2526#define PACKED __attribute__((__packed__))2728#include <assert.h>29#include <stdbool.h>30#include <stdint.h>31#include <stdio.h>3233/* clang-format off */34void ir3_assert_handler(const char *expr, const char *file, int line,35const char *func) __attribute__((weak)) __attribute__((__noreturn__));36/* clang-format on */3738/* A wrapper for assert() that allows overriding handling of a failed39* assert. This is needed for tools like crashdec which can want to40* attempt to disassemble memory that might not actually be valid41* instructions.42*/43#define ir3_assert(expr) \44do { \45if (!(expr)) { \46if (ir3_assert_handler) { \47ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \48} \49assert(expr); \50} \51} while (0)52/* size of largest OPC field of all the instruction categories: */53#define NOPC_BITS 65455#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)5657/* clang-format off */58typedef enum {59/* category 0: */60OPC_NOP = _OPC(0, 0),61OPC_B = _OPC(0, 1),62OPC_JUMP = _OPC(0, 2),63OPC_CALL = _OPC(0, 3),64OPC_RET = _OPC(0, 4),65OPC_KILL = _OPC(0, 5),66OPC_END = _OPC(0, 6),67OPC_EMIT = _OPC(0, 7),68OPC_CUT = _OPC(0, 8),69OPC_CHMASK = _OPC(0, 9),70OPC_CHSH = _OPC(0, 10),71OPC_FLOW_REV = _OPC(0, 11),7273OPC_BKT = _OPC(0, 16),74OPC_STKS = _OPC(0, 17),75OPC_STKR = _OPC(0, 18),76OPC_XSET = _OPC(0, 19),77OPC_XCLR = _OPC(0, 20),78OPC_GETONE = _OPC(0, 21),79OPC_DBG = _OPC(0, 22),80OPC_SHPS = _OPC(0, 23), /* shader prologue start */81OPC_SHPE = _OPC(0, 24), /* shader prologue end */8283OPC_PREDT = _OPC(0, 29), /* predicated true */84OPC_PREDF = _OPC(0, 30), /* predicated false */85OPC_PREDE = _OPC(0, 31), /* predicated end */8687/* Logical opcodes for different branch instruction variations: */88OPC_BR = _OPC(0, 40),89OPC_BRAO = _OPC(0, 41),90OPC_BRAA = _OPC(0, 42),91OPC_BRAC = _OPC(0, 43),92OPC_BANY = _OPC(0, 44),93OPC_BALL = _OPC(0, 45),94OPC_BRAX = _OPC(0, 46),9596/* Logical opcode to distinguish kill and demote */97OPC_DEMOTE = _OPC(0, 47),9899/* category 1: */100OPC_MOV = _OPC(1, 0),101OPC_MOVP = _OPC(1, 1),102/* swz, gat, sct */103OPC_MOVMSK = _OPC(1, 3),104105/* Virtual opcodes for instructions differentiated via a "sub-opcode" that106* replaces the repeat field:107*/108OPC_SWZ = _OPC(1, 4),109OPC_GAT = _OPC(1, 5),110OPC_SCT = _OPC(1, 6),111112/* Logical opcodes for different variants of mov: */113OPC_MOV_IMMED = _OPC(1, 40),114OPC_MOV_CONST = _OPC(1, 41),115OPC_MOV_GPR = _OPC(1, 42),116OPC_MOV_RELGPR = _OPC(1, 43),117OPC_MOV_RELCONST = _OPC(1, 44),118119/* Macros that expand to an if statement + move */120OPC_BALLOT_MACRO = _OPC(1, 50),121OPC_ANY_MACRO = _OPC(1, 51),122OPC_ALL_MACRO = _OPC(1, 52),123OPC_ELECT_MACRO = _OPC(1, 53),124OPC_READ_COND_MACRO = _OPC(1, 54),125OPC_READ_FIRST_MACRO = _OPC(1, 55),126OPC_SWZ_SHARED_MACRO = _OPC(1, 56),127128/* category 2: */129OPC_ADD_F = _OPC(2, 0),130OPC_MIN_F = _OPC(2, 1),131OPC_MAX_F = _OPC(2, 2),132OPC_MUL_F = _OPC(2, 3),133OPC_SIGN_F = _OPC(2, 4),134OPC_CMPS_F = _OPC(2, 5),135OPC_ABSNEG_F = _OPC(2, 6),136OPC_CMPV_F = _OPC(2, 7),137/* 8 - invalid */138OPC_FLOOR_F = _OPC(2, 9),139OPC_CEIL_F = _OPC(2, 10),140OPC_RNDNE_F = _OPC(2, 11),141OPC_RNDAZ_F = _OPC(2, 12),142OPC_TRUNC_F = _OPC(2, 13),143/* 14-15 - invalid */144OPC_ADD_U = _OPC(2, 16),145OPC_ADD_S = _OPC(2, 17),146OPC_SUB_U = _OPC(2, 18),147OPC_SUB_S = _OPC(2, 19),148OPC_CMPS_U = _OPC(2, 20),149OPC_CMPS_S = _OPC(2, 21),150OPC_MIN_U = _OPC(2, 22),151OPC_MIN_S = _OPC(2, 23),152OPC_MAX_U = _OPC(2, 24),153OPC_MAX_S = _OPC(2, 25),154OPC_ABSNEG_S = _OPC(2, 26),155/* 27 - invalid */156OPC_AND_B = _OPC(2, 28),157OPC_OR_B = _OPC(2, 29),158OPC_NOT_B = _OPC(2, 30),159OPC_XOR_B = _OPC(2, 31),160/* 32 - invalid */161OPC_CMPV_U = _OPC(2, 33),162OPC_CMPV_S = _OPC(2, 34),163/* 35-47 - invalid */164OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */165OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */166OPC_MULL_U = _OPC(2, 50),167OPC_BFREV_B = _OPC(2, 51),168OPC_CLZ_S = _OPC(2, 52),169OPC_CLZ_B = _OPC(2, 53),170OPC_SHL_B = _OPC(2, 54),171OPC_SHR_B = _OPC(2, 55),172OPC_ASHR_B = _OPC(2, 56),173OPC_BARY_F = _OPC(2, 57),174OPC_MGEN_B = _OPC(2, 58),175OPC_GETBIT_B = _OPC(2, 59),176OPC_SETRM = _OPC(2, 60),177OPC_CBITS_B = _OPC(2, 61),178OPC_SHB = _OPC(2, 62),179OPC_MSAD = _OPC(2, 63),180181/* category 3: */182OPC_MAD_U16 = _OPC(3, 0),183OPC_MADSH_U16 = _OPC(3, 1),184OPC_MAD_S16 = _OPC(3, 2),185OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */186OPC_MAD_U24 = _OPC(3, 4),187OPC_MAD_S24 = _OPC(3, 5),188OPC_MAD_F16 = _OPC(3, 6),189OPC_MAD_F32 = _OPC(3, 7),190OPC_SEL_B16 = _OPC(3, 8),191OPC_SEL_B32 = _OPC(3, 9),192OPC_SEL_S16 = _OPC(3, 10),193OPC_SEL_S32 = _OPC(3, 11),194OPC_SEL_F16 = _OPC(3, 12),195OPC_SEL_F32 = _OPC(3, 13),196OPC_SAD_S16 = _OPC(3, 14),197OPC_SAD_S32 = _OPC(3, 15),198OPC_SHLG_B16 = _OPC(3, 16),199200/* category 4: */201OPC_RCP = _OPC(4, 0),202OPC_RSQ = _OPC(4, 1),203OPC_LOG2 = _OPC(4, 2),204OPC_EXP2 = _OPC(4, 3),205OPC_SIN = _OPC(4, 4),206OPC_COS = _OPC(4, 5),207OPC_SQRT = _OPC(4, 6),208/* NOTE that these are 8+opc from their highp equivs, so it's possible209* that the high order bit in the opc field has been repurposed for210* half-precision use? But note that other ops (rcp/lsin/cos/sqrt)211* still use the same opc as highp212*/213OPC_HRSQ = _OPC(4, 9),214OPC_HLOG2 = _OPC(4, 10),215OPC_HEXP2 = _OPC(4, 11),216217/* category 5: */218OPC_ISAM = _OPC(5, 0),219OPC_ISAML = _OPC(5, 1),220OPC_ISAMM = _OPC(5, 2),221OPC_SAM = _OPC(5, 3),222OPC_SAMB = _OPC(5, 4),223OPC_SAML = _OPC(5, 5),224OPC_SAMGQ = _OPC(5, 6),225OPC_GETLOD = _OPC(5, 7),226OPC_CONV = _OPC(5, 8),227OPC_CONVM = _OPC(5, 9),228OPC_GETSIZE = _OPC(5, 10),229OPC_GETBUF = _OPC(5, 11),230OPC_GETPOS = _OPC(5, 12),231OPC_GETINFO = _OPC(5, 13),232OPC_DSX = _OPC(5, 14),233OPC_DSY = _OPC(5, 15),234OPC_GATHER4R = _OPC(5, 16),235OPC_GATHER4G = _OPC(5, 17),236OPC_GATHER4B = _OPC(5, 18),237OPC_GATHER4A = _OPC(5, 19),238OPC_SAMGP0 = _OPC(5, 20),239OPC_SAMGP1 = _OPC(5, 21),240OPC_SAMGP2 = _OPC(5, 22),241OPC_SAMGP3 = _OPC(5, 23),242OPC_DSXPP_1 = _OPC(5, 24),243OPC_DSYPP_1 = _OPC(5, 25),244OPC_RGETPOS = _OPC(5, 26),245OPC_RGETINFO = _OPC(5, 27),246/* cat5 meta instructions, placed above the cat5 opc field's size */247OPC_DSXPP_MACRO = _OPC(5, 32),248OPC_DSYPP_MACRO = _OPC(5, 33),249250/* category 6: */251OPC_LDG = _OPC(6, 0), /* load-global */252OPC_LDL = _OPC(6, 1),253OPC_LDP = _OPC(6, 2),254OPC_STG = _OPC(6, 3), /* store-global */255OPC_STL = _OPC(6, 4),256OPC_STP = _OPC(6, 5),257OPC_LDIB = _OPC(6, 6),258OPC_G2L = _OPC(6, 7),259OPC_L2G = _OPC(6, 8),260OPC_PREFETCH = _OPC(6, 9),261OPC_LDLW = _OPC(6, 10),262OPC_STLW = _OPC(6, 11),263OPC_RESFMT = _OPC(6, 14),264OPC_RESINFO = _OPC(6, 15),265OPC_ATOMIC_ADD = _OPC(6, 16),266OPC_ATOMIC_SUB = _OPC(6, 17),267OPC_ATOMIC_XCHG = _OPC(6, 18),268OPC_ATOMIC_INC = _OPC(6, 19),269OPC_ATOMIC_DEC = _OPC(6, 20),270OPC_ATOMIC_CMPXCHG = _OPC(6, 21),271OPC_ATOMIC_MIN = _OPC(6, 22),272OPC_ATOMIC_MAX = _OPC(6, 23),273OPC_ATOMIC_AND = _OPC(6, 24),274OPC_ATOMIC_OR = _OPC(6, 25),275OPC_ATOMIC_XOR = _OPC(6, 26),276OPC_LDGB = _OPC(6, 27),277OPC_STGB = _OPC(6, 28),278OPC_STIB = _OPC(6, 29),279OPC_LDC = _OPC(6, 30),280OPC_LDLV = _OPC(6, 31),281OPC_PIPR = _OPC(6, 32), /* ??? */282OPC_PIPC = _OPC(6, 33), /* ??? */283OPC_EMIT2 = _OPC(6, 34), /* ??? */284OPC_ENDLS = _OPC(6, 35), /* ??? */285OPC_GETSPID = _OPC(6, 36), /* SP ID */286OPC_GETWID = _OPC(6, 37), /* wavefront ID */287288/* Logical opcodes for things that differ in a6xx+ */289OPC_STC = _OPC(6, 40),290OPC_RESINFO_B = _OPC(6, 41),291OPC_LDIB_B = _OPC(6, 42),292OPC_STIB_B = _OPC(6, 43),293294/* Logical opcodes for different atomic instruction variations: */295OPC_ATOMIC_B_ADD = _OPC(6, 44),296OPC_ATOMIC_B_SUB = _OPC(6, 45),297OPC_ATOMIC_B_XCHG = _OPC(6, 46),298OPC_ATOMIC_B_INC = _OPC(6, 47),299OPC_ATOMIC_B_DEC = _OPC(6, 48),300OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),301OPC_ATOMIC_B_MIN = _OPC(6, 50),302OPC_ATOMIC_B_MAX = _OPC(6, 51),303OPC_ATOMIC_B_AND = _OPC(6, 52),304OPC_ATOMIC_B_OR = _OPC(6, 53),305OPC_ATOMIC_B_XOR = _OPC(6, 54),306307OPC_LDG_A = _OPC(6, 55),308OPC_STG_A = _OPC(6, 56),309310/* category 7: */311OPC_BAR = _OPC(7, 0),312OPC_FENCE = _OPC(7, 1),313314/* meta instructions (category -1): */315/* placeholder instr to mark shader inputs: */316OPC_META_INPUT = _OPC(-1, 0),317/* The "collect" and "split" instructions are used for keeping318* track of instructions that write to multiple dst registers319* (split) like texture sample instructions, or read multiple320* consecutive scalar registers (collect) (bary.f, texture samp)321*322* A "split" extracts a scalar component from a vecN, and a323* "collect" gathers multiple scalar components into a vecN324*/325OPC_META_SPLIT = _OPC(-1, 2),326OPC_META_COLLECT = _OPC(-1, 3),327328/* placeholder for texture fetches that run before FS invocation329* starts:330*/331OPC_META_TEX_PREFETCH = _OPC(-1, 4),332333/* Parallel copies have multiple destinations, and copy each destination334* to its corresponding source. This happens "in parallel," meaning that335* it happens as-if every source is read first and then every destination336* is stored. These are produced in RA when register shuffling is337* required, and then lowered away immediately afterwards.338*/339OPC_META_PARALLEL_COPY = _OPC(-1, 5),340OPC_META_PHI = _OPC(-1, 6),341} opc_t;342/* clang-format on */343344#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))345#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))346347const char *disasm_a3xx_instr_name(opc_t opc);348349typedef enum {350TYPE_F16 = 0,351TYPE_F32 = 1,352TYPE_U16 = 2,353TYPE_U32 = 3,354TYPE_S16 = 4,355TYPE_S32 = 5,356TYPE_U8 = 6,357TYPE_S8 = 7, // XXX I assume?358} type_t;359360static inline uint32_t361type_size(type_t type)362{363switch (type) {364case TYPE_F32:365case TYPE_U32:366case TYPE_S32:367return 32;368case TYPE_F16:369case TYPE_U16:370case TYPE_S16:371return 16;372case TYPE_U8:373case TYPE_S8:374return 8;375default:376ir3_assert(0); /* invalid type */377return 0;378}379}380381static inline int382type_float(type_t type)383{384return (type == TYPE_F32) || (type == TYPE_F16);385}386387static inline int388type_uint(type_t type)389{390return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);391}392393static inline int394type_sint(type_t type)395{396return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);397}398399typedef enum {400ROUND_ZERO = 0,401ROUND_EVEN = 1,402ROUND_POS_INF = 2,403ROUND_NEG_INF = 3,404} round_t;405406typedef union PACKED {407/* normal gpr or const src register: */408struct PACKED {409uint32_t comp : 2;410uint32_t num : 10;411};412/* for immediate val: */413int32_t iim_val : 11;414/* to make compiler happy: */415uint32_t dummy32;416uint32_t dummy10 : 10;417int32_t idummy10 : 10;418uint32_t dummy11 : 11;419uint32_t dummy12 : 12;420uint32_t dummy13 : 13;421uint32_t dummy8 : 8;422int32_t idummy13 : 13;423int32_t idummy8 : 8;424} reg_t;425426/* comp:427* 0 - x428* 1 - y429* 2 - z430* 3 - w431*/432static inline uint32_t433regid(int num, int comp)434{435return (num << 2) | (comp & 0x3);436}437438#define INVALID_REG regid(63, 0)439#define VALIDREG(r) ((r) != INVALID_REG)440#define CONDREG(r, val) COND(VALIDREG(r), (val))441442/* special registers: */443#define REG_A0 61 /* address register */444#define REG_P0 62 /* predicate register */445446static inline int447reg_special(reg_t reg)448{449return (reg.num == REG_A0) || (reg.num == REG_P0);450}451452typedef enum {453BRANCH_PLAIN = 0, /* br */454BRANCH_OR = 1, /* brao */455BRANCH_AND = 2, /* braa */456BRANCH_CONST = 3, /* brac */457BRANCH_ANY = 4, /* bany */458BRANCH_ALL = 5, /* ball */459BRANCH_X = 6, /* brax ??? */460} brtype_t;461462typedef struct PACKED {463/* dword0: */464union PACKED {465struct PACKED {466int16_t immed : 16;467uint32_t dummy1 : 16;468} a3xx;469struct PACKED {470int32_t immed : 20;471uint32_t dummy1 : 12;472} a4xx;473struct PACKED {474int32_t immed : 32;475} a5xx;476};477478/* dword1: */479uint32_t idx : 5; /* brac.N index */480uint32_t brtype : 3; /* branch type, see brtype_t */481uint32_t repeat : 3;482uint32_t dummy3 : 1;483uint32_t ss : 1;484uint32_t inv2 : 1;485uint32_t comp2 : 2;486uint32_t eq : 1;487uint32_t opc_hi : 1; /* at least one bit */488uint32_t dummy4 : 2;489uint32_t inv1 : 1;490uint32_t comp1 : 2; /* component for first src */491uint32_t opc : 4;492uint32_t jmp_tgt : 1;493uint32_t sync : 1;494uint32_t opc_cat : 3;495} instr_cat0_t;496497typedef struct PACKED {498/* dword0: */499union PACKED {500/* for normal src register: */501struct PACKED {502uint32_t src : 11;503/* at least low bit of pad must be zero or it will504* look like a address relative src505*/506uint32_t pad : 21;507};508/* for address relative: */509struct PACKED {510int32_t off : 10;511uint32_t src_rel_c : 1;512uint32_t src_rel : 1;513uint32_t unknown : 20;514};515/* for immediate: */516int32_t iim_val;517uint32_t uim_val;518float fim_val;519};520521/* dword1: */522uint32_t dst : 8;523uint32_t repeat : 3;524uint32_t src_r : 1;525uint32_t ss : 1;526uint32_t ul : 1;527uint32_t dst_type : 3;528uint32_t dst_rel : 1;529uint32_t src_type : 3;530uint32_t src_c : 1;531uint32_t src_im : 1;532uint32_t even : 1;533uint32_t pos_inf : 1;534uint32_t opc : 2;535uint32_t jmp_tgt : 1;536uint32_t sync : 1;537uint32_t opc_cat : 3;538} instr_cat1_t;539540typedef struct PACKED {541/* dword0: */542union PACKED {543struct PACKED {544uint32_t src1 : 11;545uint32_t must_be_zero1 : 2;546uint32_t src1_im : 1; /* immediate */547uint32_t src1_neg : 1; /* negate */548uint32_t src1_abs : 1; /* absolute value */549};550struct PACKED {551uint32_t src1 : 10;552uint32_t src1_c : 1; /* relative-const */553uint32_t src1_rel : 1; /* relative address */554uint32_t must_be_zero : 1;555uint32_t dummy : 3;556} rel1;557struct PACKED {558uint32_t src1 : 12;559uint32_t src1_c : 1; /* const */560int32_t dummy : 3;561} c1;562};563564union PACKED {565struct PACKED {566uint32_t src2 : 11;567uint32_t must_be_zero2 : 2;568uint32_t src2_im : 1; /* immediate */569uint32_t src2_neg : 1; /* negate */570uint32_t src2_abs : 1; /* absolute value */571};572struct PACKED {573uint32_t src2 : 10;574uint32_t src2_c : 1; /* relative-const */575uint32_t src2_rel : 1; /* relative address */576uint32_t must_be_zero : 1;577uint32_t dummy : 3;578} rel2;579struct PACKED {580uint32_t src2 : 12;581uint32_t src2_c : 1; /* const */582uint32_t dummy : 3;583} c2;584};585586/* dword1: */587uint32_t dst : 8;588uint32_t repeat : 2;589uint32_t sat : 1;590uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */591uint32_t ss : 1;592uint32_t ul : 1; /* dunno */593uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */594uint32_t ei : 1;595uint32_t cond : 3;596uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */597uint32_t full : 1; /* not half */598uint32_t opc : 6;599uint32_t jmp_tgt : 1;600uint32_t sync : 1;601uint32_t opc_cat : 3;602} instr_cat2_t;603604typedef struct PACKED {605/* dword0: */606union PACKED {607struct PACKED {608uint32_t src1 : 11;609uint32_t must_be_zero1 : 2;610uint32_t src2_c : 1;611uint32_t src1_neg : 1;612uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */613};614struct PACKED {615uint32_t src1 : 10;616uint32_t src1_c : 1;617uint32_t src1_rel : 1;618uint32_t must_be_zero : 1;619uint32_t dummy : 3;620} rel1;621struct PACKED {622uint32_t src1 : 12;623uint32_t src1_c : 1;624uint32_t dummy : 3;625} c1;626};627628union PACKED {629struct PACKED {630uint32_t src3 : 11;631uint32_t must_be_zero2 : 2;632uint32_t src3_r : 1;633uint32_t src2_neg : 1;634uint32_t src3_neg : 1;635};636struct PACKED {637uint32_t src3 : 10;638uint32_t src3_c : 1;639uint32_t src3_rel : 1;640uint32_t must_be_zero : 1;641uint32_t dummy : 3;642} rel2;643struct PACKED {644uint32_t src3 : 12;645uint32_t src3_c : 1;646uint32_t dummy : 3;647} c2;648};649650/* dword1: */651uint32_t dst : 8;652uint32_t repeat : 2;653uint32_t sat : 1;654uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */655uint32_t ss : 1;656uint32_t ul : 1;657uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */658uint32_t src2 : 8;659uint32_t opc : 4;660uint32_t jmp_tgt : 1;661uint32_t sync : 1;662uint32_t opc_cat : 3;663} instr_cat3_t;664665static inline bool666instr_cat3_full(instr_cat3_t *cat3)667{668switch (_OPC(3, cat3->opc)) {669case OPC_MAD_F16:670case OPC_MAD_U16:671case OPC_MAD_S16:672case OPC_SEL_B16:673case OPC_SEL_S16:674case OPC_SEL_F16:675case OPC_SAD_S16:676case OPC_SAD_S32: // really??677return false;678default:679return true;680}681}682683typedef struct PACKED {684/* dword0: */685union PACKED {686struct PACKED {687uint32_t src : 11;688uint32_t must_be_zero1 : 2;689uint32_t src_im : 1; /* immediate */690uint32_t src_neg : 1; /* negate */691uint32_t src_abs : 1; /* absolute value */692};693struct PACKED {694uint32_t src : 10;695uint32_t src_c : 1; /* relative-const */696uint32_t src_rel : 1; /* relative address */697uint32_t must_be_zero : 1;698uint32_t dummy : 3;699} rel;700struct PACKED {701uint32_t src : 12;702uint32_t src_c : 1; /* const */703uint32_t dummy : 3;704} c;705};706uint32_t dummy1 : 16; /* seem to be ignored */707708/* dword1: */709uint32_t dst : 8;710uint32_t repeat : 2;711uint32_t sat : 1;712uint32_t src_r : 1;713uint32_t ss : 1;714uint32_t ul : 1;715uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */716uint32_t dummy2 : 5; /* seem to be ignored */717uint32_t full : 1; /* not half */718uint32_t opc : 6;719uint32_t jmp_tgt : 1;720uint32_t sync : 1;721uint32_t opc_cat : 3;722} instr_cat4_t;723724/* With is_bindless_s2en = 1, this determines whether bindless is enabled and725* if so, how to get the (base, index) pair for both sampler and texture.726* There is a single base embedded in the instruction, which is always used727* for the texture.728*/729typedef enum {730/* Use traditional GL binding model, get texture and sampler index731* from src3 which is not presumed to be uniform. This is732* backwards-compatible with earlier generations, where this field was733* always 0 and nonuniform-indexed sampling always worked.734*/735CAT5_NONUNIFORM = 0,736737/* The sampler base comes from the low 3 bits of a1.x, and the sampler738* and texture index come from src3 which is presumed to be uniform.739*/740CAT5_BINDLESS_A1_UNIFORM = 1,741742/* The texture and sampler share the same base, and the sampler and743* texture index come from src3 which is *not* presumed to be uniform.744*/745CAT5_BINDLESS_NONUNIFORM = 2,746747/* The sampler base comes from the low 3 bits of a1.x, and the sampler748* and texture index come from src3 which is *not* presumed to be749* uniform.750*/751CAT5_BINDLESS_A1_NONUNIFORM = 3,752753/* Use traditional GL binding model, get texture and sampler index754* from src3 which is presumed to be uniform.755*/756CAT5_UNIFORM = 4,757758/* The texture and sampler share the same base, and the sampler and759* texture index come from src3 which is presumed to be uniform.760*/761CAT5_BINDLESS_UNIFORM = 5,762763/* The texture and sampler share the same base, get sampler index from low764* 4 bits of src3 and texture index from high 4 bits.765*/766CAT5_BINDLESS_IMM = 6,767768/* The sampler base comes from the low 3 bits of a1.x, and the texture769* index comes from the next 8 bits of a1.x. The sampler index is an770* immediate in src3.771*/772CAT5_BINDLESS_A1_IMM = 7,773} cat5_desc_mode_t;774775typedef struct PACKED {776/* dword0: */777union PACKED {778/* normal case: */779struct PACKED {780uint32_t full : 1; /* not half */781uint32_t src1 : 8;782uint32_t src2 : 8;783uint32_t dummy1 : 4; /* seem to be ignored */784uint32_t samp : 4;785uint32_t tex : 7;786} norm;787/* s2en case: */788struct PACKED {789uint32_t full : 1; /* not half */790uint32_t src1 : 8;791uint32_t src2 : 8;792uint32_t dummy1 : 2;793uint32_t base_hi : 2;794uint32_t src3 : 8;795uint32_t desc_mode : 3;796} s2en_bindless;797/* same in either case: */798// XXX I think, confirm this799struct PACKED {800uint32_t full : 1; /* not half */801uint32_t src1 : 8;802uint32_t src2 : 8;803uint32_t pad : 15;804};805};806807/* dword1: */808uint32_t dst : 8;809uint32_t wrmask : 4; /* write-mask */810uint32_t type : 3;811uint32_t base_lo : 1; /* used with bindless */812uint32_t is_3d : 1;813814uint32_t is_a : 1;815uint32_t is_s : 1;816uint32_t is_s2en_bindless : 1;817uint32_t is_o : 1;818uint32_t is_p : 1;819820uint32_t opc : 5;821uint32_t jmp_tgt : 1;822uint32_t sync : 1;823uint32_t opc_cat : 3;824} instr_cat5_t;825826/* dword0 encoding for src_off: [src1 + off], src3: */827typedef struct PACKED {828/* dword0: */829uint32_t mustbe1 : 1;830int32_t off : 13; /* src2 */831uint32_t src1 : 8;832uint32_t src1_im : 1;833uint32_t src3_im : 1;834uint32_t src3 : 8;835836/* dword1: */837uint32_t dword1;838} instr_cat6a_t;839840/* dword0 encoding for !src_off: [src1], src2 */841typedef struct PACKED {842/* dword0: */843uint32_t mustbe0 : 1;844uint32_t src1 : 8;845uint32_t pad : 5;846uint32_t ignore0 : 8;847uint32_t src1_im : 1;848uint32_t src2_im : 1;849uint32_t src2 : 8;850851/* dword1: */852uint32_t dword1;853} instr_cat6b_t;854855/* dword1 encoding for dst_off: */856typedef struct PACKED {857/* dword0: */858uint32_t dw0_pad1 : 9;859int32_t off_high : 5;860uint32_t dw0_pad2 : 18;861862uint32_t off : 8;863uint32_t mustbe1 : 1;864uint32_t dst : 8;865uint32_t pad1 : 15;866} instr_cat6c_t;867868/* dword1 encoding for !dst_off: */869typedef struct PACKED {870/* dword0: */871uint32_t dword0;872873uint32_t dst : 8;874uint32_t mustbe0 : 1;875uint32_t idx : 8;876uint32_t pad0 : 15;877} instr_cat6d_t;878879/* ldgb and atomics..880*881* ldgb: pad0=0, pad3=1882* atomic .g: pad0=1, pad3=1883* .l: pad0=1, pad3=0884*/885typedef struct PACKED {886/* dword0: */887uint32_t pad0 : 1;888uint32_t src3 : 8;889uint32_t d : 2;890uint32_t typed : 1;891uint32_t type_size : 2;892uint32_t src1 : 8;893uint32_t src1_im : 1;894uint32_t src2_im : 1;895uint32_t src2 : 8;896897/* dword1: */898uint32_t dst : 8;899uint32_t mustbe0 : 1;900uint32_t src_ssbo : 8;901uint32_t pad2 : 3; // type902uint32_t g : 1;903uint32_t src_ssbo_im : 1;904uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat905} instr_cat6ldgb_t;906907/* stgb, pad0=0, pad3=2908*/909typedef struct PACKED {910/* dword0: */911uint32_t mustbe1 : 1; // ???912uint32_t src1 : 8;913uint32_t d : 2;914uint32_t typed : 1;915uint32_t type_size : 2;916uint32_t pad0 : 9;917uint32_t src2_im : 1;918uint32_t src2 : 8;919920/* dword1: */921uint32_t src3 : 8;922uint32_t src3_im : 1;923uint32_t dst_ssbo : 8;924uint32_t pad2 : 3; // type925uint32_t pad3 : 2;926uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat927} instr_cat6stgb_t;928929typedef union PACKED {930instr_cat6a_t a;931instr_cat6b_t b;932instr_cat6c_t c;933instr_cat6d_t d;934instr_cat6ldgb_t ldgb;935instr_cat6stgb_t stgb;936struct PACKED {937/* dword0: */938uint32_t src_off : 1;939uint32_t pad1 : 31;940941/* dword1: */942uint32_t pad2 : 8;943uint32_t dst_off : 1;944uint32_t pad3 : 8;945uint32_t type : 3;946uint32_t g : 1; /* or in some cases it means dst immed */947uint32_t pad4 : 1;948uint32_t opc : 5;949uint32_t jmp_tgt : 1;950uint32_t sync : 1;951uint32_t opc_cat : 3;952};953} instr_cat6_t;954955/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.956*/957typedef enum {958/* Use old GL binding model with an immediate index. */959CAT6_IMM = 0,960961CAT6_UNIFORM = 1,962963CAT6_NONUNIFORM = 2,964965/* Use the bindless model, with an immediate index.966*/967CAT6_BINDLESS_IMM = 4,968969/* Use the bindless model, with a uniform register index.970*/971CAT6_BINDLESS_UNIFORM = 5,972973/* Use the bindless model, with a register index that isn't guaranteed974* to be uniform. This presumably checks if the indices are equal and975* splits up the load/store, because it works the way you would976* expect.977*/978CAT6_BINDLESS_NONUNIFORM = 6,979} cat6_desc_mode_t;980981/**982* For atomic ops (which return a value):983*984* pad1=1, pad3=6, pad5=3985* src1 - vecN offset/coords986* src2.x - is actually dest register987* src2.y - is 'data' except for cmpxchg where src2.y is 'compare'988* and src2.z is 'data'989*990* For stib (which does not return a value):991* pad1=0, pad3=6, pad5=2992* src1 - vecN offset/coords993* src2 - value to store994*995* For ldib:996* pad1=1, pad3=6, pad5=2997* src1 - vecN offset/coords998*999* for ldc (load from UBO using descriptor):1000* pad1=0, pad3=4, pad5=21001*1002* pad2 and pad5 are only observed to be 0.1003*/1004typedef struct PACKED {1005/* dword0: */1006uint32_t pad1 : 1;1007uint32_t base : 3;1008uint32_t pad2 : 2;1009uint32_t desc_mode : 3;1010uint32_t d : 2;1011uint32_t typed : 1;1012uint32_t type_size : 2;1013uint32_t opc : 6;1014uint32_t pad3 : 4;1015uint32_t src1 : 8; /* coordinate/offset */10161017/* dword1: */1018uint32_t src2 : 8; /* or the dst for load instructions */1019uint32_t pad4 : 1; // mustbe0 ??1020uint32_t ssbo : 8; /* ssbo/image binding point */1021uint32_t type : 3;1022uint32_t pad5 : 7;1023uint32_t jmp_tgt : 1;1024uint32_t sync : 1;1025uint32_t opc_cat : 3;1026} instr_cat6_a6xx_t;10271028typedef struct PACKED {1029/* dword0: */1030uint32_t pad1 : 32;10311032/* dword1: */1033uint32_t pad2 : 12;1034uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */1035uint32_t pad3 : 6;1036uint32_t w : 1; /* write */1037uint32_t r : 1; /* read */1038uint32_t l : 1; /* local */1039uint32_t g : 1; /* global */1040uint32_t opc : 4; /* presumed, but only a couple known OPCs */1041uint32_t jmp_tgt : 1; /* (jp) */1042uint32_t sync : 1; /* (sy) */1043uint32_t opc_cat : 3;1044} instr_cat7_t;10451046typedef union PACKED {1047instr_cat0_t cat0;1048instr_cat1_t cat1;1049instr_cat2_t cat2;1050instr_cat3_t cat3;1051instr_cat4_t cat4;1052instr_cat5_t cat5;1053instr_cat6_t cat6;1054instr_cat6_a6xx_t cat6_a6xx;1055instr_cat7_t cat7;1056struct PACKED {1057/* dword0: */1058uint32_t pad1 : 32;10591060/* dword1: */1061uint32_t pad2 : 12;1062uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */1063uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */1064uint32_t pad3 : 13;1065uint32_t jmp_tgt : 1;1066uint32_t sync : 1;1067uint32_t opc_cat : 3;1068};1069} instr_t;10701071static inline uint32_t1072instr_repeat(instr_t *instr)1073{1074switch (instr->opc_cat) {1075case 0:1076return instr->cat0.repeat;1077case 1:1078return instr->cat1.repeat;1079case 2:1080return instr->cat2.repeat;1081case 3:1082return instr->cat3.repeat;1083case 4:1084return instr->cat4.repeat;1085default:1086return 0;1087}1088}10891090static inline bool1091instr_sat(instr_t *instr)1092{1093switch (instr->opc_cat) {1094case 2:1095return instr->cat2.sat;1096case 3:1097return instr->cat3.sat;1098case 4:1099return instr->cat4.sat;1100default:1101return false;1102}1103}11041105static inline bool1106is_sat_compatible(opc_t opc)1107{1108/* On a6xx saturation doesn't work on cat4 */1109if (opc_cat(opc) != 2 && opc_cat(opc) != 3)1110return false;11111112switch (opc) {1113/* On a3xx and a6xx saturation doesn't work on bary.f */1114case OPC_BARY_F:1115/* On a6xx saturation doesn't work on sel.* */1116case OPC_SEL_B16:1117case OPC_SEL_B32:1118case OPC_SEL_S16:1119case OPC_SEL_S32:1120case OPC_SEL_F16:1121case OPC_SEL_F32:1122return false;1123default:1124return true;1125}1126}11271128/* We can probably drop the gpu_id arg, but keeping it for now so we can1129* assert if we see something we think should be new encoding on an older1130* gpu.1131*/1132static inline bool1133is_cat6_legacy(instr_t *instr, unsigned gpu_id)1134{1135instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;11361137if (gpu_id < 600)1138return true;11391140/* At least one of these two bits is pad in all the possible1141* "legacy" cat6 encodings, and a analysis of all the pre-a6xx1142* cmdstream traces I have indicates that the pad bit is zero1143* in all cases. So we can use this to detect new encoding:1144*/1145if ((cat6->pad3 & 0x4) && (cat6->pad5 & 0x2)) {1146ir3_assert(instr->cat6.opc == 0);1147return false;1148}11491150return true;1151}11521153static inline uint32_t1154instr_opc(instr_t *instr, unsigned gpu_id)1155{1156switch (instr->opc_cat) {1157case 0:1158return instr->cat0.opc | instr->cat0.opc_hi << 4;1159case 1:1160return instr->cat1.opc;1161case 2:1162return instr->cat2.opc;1163case 3:1164return instr->cat3.opc;1165case 4:1166return instr->cat4.opc;1167case 5:1168return instr->cat5.opc;1169case 6:1170if (!is_cat6_legacy(instr, gpu_id))1171return instr->cat6_a6xx.opc;1172return instr->cat6.opc;1173case 7:1174return instr->cat7.opc;1175default:1176return 0;1177}1178}11791180static inline bool1181is_mad(opc_t opc)1182{1183switch (opc) {1184case OPC_MAD_U16:1185case OPC_MAD_S16:1186case OPC_MAD_U24:1187case OPC_MAD_S24:1188case OPC_MAD_F16:1189case OPC_MAD_F32:1190return true;1191default:1192return false;1193}1194}11951196static inline bool1197is_madsh(opc_t opc)1198{1199switch (opc) {1200case OPC_MADSH_U16:1201case OPC_MADSH_M16:1202return true;1203default:1204return false;1205}1206}12071208static inline bool1209is_atomic(opc_t opc)1210{1211switch (opc) {1212case OPC_ATOMIC_ADD:1213case OPC_ATOMIC_SUB:1214case OPC_ATOMIC_XCHG:1215case OPC_ATOMIC_INC:1216case OPC_ATOMIC_DEC:1217case OPC_ATOMIC_CMPXCHG:1218case OPC_ATOMIC_MIN:1219case OPC_ATOMIC_MAX:1220case OPC_ATOMIC_AND:1221case OPC_ATOMIC_OR:1222case OPC_ATOMIC_XOR:1223return true;1224default:1225return false;1226}1227}12281229static inline bool1230is_ssbo(opc_t opc)1231{1232switch (opc) {1233case OPC_RESFMT:1234case OPC_RESINFO:1235case OPC_LDGB:1236case OPC_STGB:1237case OPC_STIB:1238return true;1239default:1240return false;1241}1242}12431244static inline bool1245is_isam(opc_t opc)1246{1247switch (opc) {1248case OPC_ISAM:1249case OPC_ISAML:1250case OPC_ISAMM:1251return true;1252default:1253return false;1254}1255}12561257static inline bool1258is_cat2_float(opc_t opc)1259{1260switch (opc) {1261case OPC_ADD_F:1262case OPC_MIN_F:1263case OPC_MAX_F:1264case OPC_MUL_F:1265case OPC_SIGN_F:1266case OPC_CMPS_F:1267case OPC_ABSNEG_F:1268case OPC_CMPV_F:1269case OPC_FLOOR_F:1270case OPC_CEIL_F:1271case OPC_RNDNE_F:1272case OPC_RNDAZ_F:1273case OPC_TRUNC_F:1274return true;12751276default:1277return false;1278}1279}12801281static inline bool1282is_cat3_float(opc_t opc)1283{1284switch (opc) {1285case OPC_MAD_F16:1286case OPC_MAD_F32:1287case OPC_SEL_F16:1288case OPC_SEL_F32:1289return true;1290default:1291return false;1292}1293}12941295#endif /* INSTR_A3XX_H_ */129612971298