Path: blob/21.2-virgl/src/asahi/compiler/agx_compiler.h
4564 views
/*1* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>2* Copyright (C) 2020 Collabora Ltd.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#ifndef __AGX_COMPILER_H25#define __AGX_COMPILER_H2627#include "compiler/nir/nir.h"28#include "util/u_math.h"29#include "util/half_float.h"30#include "util/u_dynarray.h"31#include "agx_compile.h"32#include "agx_opcodes.h"33#include "agx_minifloat.h"3435enum agx_dbg {36AGX_DBG_MSGS = BITFIELD_BIT(0),37AGX_DBG_SHADERS = BITFIELD_BIT(1),38AGX_DBG_SHADERDB = BITFIELD_BIT(2),39AGX_DBG_VERBOSE = BITFIELD_BIT(3),40AGX_DBG_INTERNAL = BITFIELD_BIT(4),41};4243extern int agx_debug;4445/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */46#define AGX_NUM_REGS (256)4748enum agx_index_type {49AGX_INDEX_NULL = 0,50AGX_INDEX_NORMAL = 1,51AGX_INDEX_IMMEDIATE = 2,52AGX_INDEX_UNIFORM = 3,53AGX_INDEX_REGISTER = 4,54AGX_INDEX_NIR_REGISTER = 5,55};5657enum agx_size {58AGX_SIZE_16 = 0,59AGX_SIZE_32 = 1,60AGX_SIZE_64 = 261};6263typedef struct {64/* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */65unsigned value : 22;6667/* Indicates that this source kills the referenced value (because it is the68* last use in a block and the source is not live after the block). Set by69* liveness analysis. */70bool kill : 1;7172/* Cache hints */73bool cache : 1;74bool discard : 1;7576/* src - float modifiers */77bool abs : 1;78bool neg : 1;7980enum agx_size size : 2;81enum agx_index_type type : 3;82} agx_index;8384static inline agx_index85agx_get_index(unsigned value, enum agx_size size)86{87return (agx_index) {88.type = AGX_INDEX_NORMAL,89.value = value,90.size = size91};92}9394static inline agx_index95agx_immediate(uint16_t imm)96{97return (agx_index) {98.type = AGX_INDEX_IMMEDIATE,99.value = imm,100.size = AGX_SIZE_32101};102}103104static inline agx_index105agx_immediate_f(float f)106{107assert(agx_minifloat_exact(f));108return agx_immediate(agx_minifloat_encode(f));109}110111/* in half-words, specify r0h as 1, r1 as 2... */112static inline agx_index113agx_register(uint8_t imm, enum agx_size size)114{115return (agx_index) {116.type = AGX_INDEX_REGISTER,117.value = imm,118.size = size119};120}121122static inline agx_index123agx_nir_register(unsigned imm, enum agx_size size)124{125return (agx_index) {126.type = AGX_INDEX_NIR_REGISTER,127.value = imm,128.size = size129};130}131132/* Also in half-words */133static inline agx_index134agx_uniform(uint8_t imm, enum agx_size size)135{136return (agx_index) {137.type = AGX_INDEX_UNIFORM,138.value = imm,139.size = size140};141}142143static inline agx_index144agx_null()145{146return (agx_index) { .type = AGX_INDEX_NULL };147}148149static inline agx_index150agx_zero()151{152return agx_immediate(0);153}154155/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa156* = exponent = 0, sign bit set */157158static inline agx_index159agx_negzero()160{161return agx_immediate(0x80);162}163164static inline agx_index165agx_abs(agx_index idx)166{167idx.abs = true;168idx.neg = false;169return idx;170}171172static inline agx_index173agx_neg(agx_index idx)174{175idx.neg ^= true;176return idx;177}178179/* Replaces an index, preserving any modifiers */180181static inline agx_index182agx_replace_index(agx_index old, agx_index replacement)183{184replacement.abs = old.abs;185replacement.neg = old.neg;186return replacement;187}188189static inline bool190agx_is_null(agx_index idx)191{192return idx.type == AGX_INDEX_NULL;193}194195/* Compares equivalence as references */196197static inline bool198agx_is_equiv(agx_index left, agx_index right)199{200return (left.type == right.type) && (left.value == right.value);201}202203#define AGX_MAX_DESTS 1204#define AGX_MAX_SRCS 5205206enum agx_icond {207AGX_ICOND_UEQ = 0,208AGX_ICOND_ULT = 1,209AGX_ICOND_UGT = 2,210/* unknown */211AGX_ICOND_SEQ = 4,212AGX_ICOND_SLT = 5,213AGX_ICOND_SGT = 6,214/* unknown */215};216217enum agx_fcond {218AGX_FCOND_EQ = 0,219AGX_FCOND_LT = 1,220AGX_FCOND_GT = 2,221AGX_FCOND_LTN = 3,222/* unknown */223AGX_FCOND_GE = 5,224AGX_FCOND_LE = 6,225AGX_FCOND_GTN = 7,226};227228enum agx_round {229AGX_ROUND_RTZ = 0,230AGX_ROUND_RTE = 1,231};232233enum agx_convert {234AGX_CONVERT_U8_TO_F = 0,235AGX_CONVERT_S8_TO_F = 1,236AGX_CONVERT_F_TO_U16 = 4,237AGX_CONVERT_F_TO_S16 = 5,238AGX_CONVERT_U16_TO_F = 6,239AGX_CONVERT_S16_TO_F = 7,240AGX_CONVERT_F_TO_U32 = 8,241AGX_CONVERT_F_TO_S32 = 9,242AGX_CONVERT_U32_TO_F = 10,243AGX_CONVERT_S32_TO_F = 11244};245246enum agx_lod_mode {247AGX_LOD_MODE_AUTO_LOD = 0,248AGX_LOD_MODE_LOD_MIN = 6,249AGX_LOD_GRAD = 8,250AGX_LOD_GRAD_MIN = 12251};252253enum agx_dim {254AGX_DIM_TEX_1D = 0,255AGX_DIM_TEX_1D_ARRAY = 1,256AGX_DIM_TEX_2D = 2,257AGX_DIM_TEX_2D_ARRAY = 3,258AGX_DIM_TEX_2D_MS = 4,259AGX_DIM_TEX_3D = 5,260AGX_DIM_TEX_CUBE = 6,261AGX_DIM_TEX_CUBE_ARRAY = 7262};263264/* Forward declare for branch target */265struct agx_block;266267typedef struct {268/* Must be first */269struct list_head link;270271enum agx_opcode op;272273/* Data flow */274agx_index dest[AGX_MAX_DESTS];275agx_index src[AGX_MAX_SRCS];276277union {278uint32_t imm;279uint32_t writeout;280uint32_t truth_table;281uint32_t component;282uint32_t channels;283uint32_t bfi_mask;284enum agx_sr sr;285enum agx_icond icond;286enum agx_fcond fcond;287enum agx_format format;288enum agx_round round;289enum agx_lod_mode lod_mode;290struct agx_block *target;291};292293/* For load varying */294bool perspective : 1;295296/* Invert icond/fcond */297bool invert_cond : 1;298299/* TODO: Handle tex ops more efficient */300enum agx_dim dim : 3;301302/* Final st_vary op */303bool last : 1;304305/* Shift for a bitwise or memory op (conflicts with format for memory ops) */306unsigned shift : 4;307308/* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require309* scoreboarding (everything but memory load/store and texturing). */310unsigned scoreboard : 1;311312/* Number of nested control flow layers to jump by */313unsigned nest : 2;314315/* Output modifiers */316bool saturate : 1;317unsigned mask : 4;318} agx_instr;319320struct agx_block;321322typedef struct agx_block {323/* Link to next block. Must be first */324struct list_head link;325326/* List of instructions emitted for the current block */327struct list_head instructions;328329/* Index of the block in source order */330unsigned name;331332/* Control flow graph */333struct agx_block *successors[2];334struct set *predecessors;335bool unconditional_jumps;336337/* Liveness analysis results */338BITSET_WORD *live_in;339BITSET_WORD *live_out;340341/* Register allocation */342BITSET_DECLARE(regs_out, AGX_NUM_REGS);343344/* Offset of the block in the emitted binary */345off_t offset;346347/** Available for passes to use for metadata */348uint8_t pass_flags;349} agx_block;350351typedef struct {352nir_shader *nir;353gl_shader_stage stage;354struct list_head blocks; /* list of agx_block */355struct agx_shader_info *out;356struct agx_shader_key *key;357358/* Remapping table for varyings indexed by driver_location */359unsigned varyings[AGX_MAX_VARYINGS];360361/* Handling phi nodes is still TODO while we bring up other parts of the362* driver. YOLO the mapping of nir_register to fixed hardware registers */363unsigned *nir_regalloc;364365/* We reserve the top (XXX: that hurts thread count) */366unsigned max_register;367368/* Place to start pushing new values */369unsigned push_base;370371/* For creating temporaries */372unsigned alloc;373374/* I don't really understand how writeout ops work yet */375bool did_writeout;376377/* Has r0l been zeroed yet due to control flow? */378bool any_cf;379380/** Computed metadata */381bool has_liveness;382383/* Number of nested control flow structures within the innermost loop. Since384* NIR is just loop and if-else, this is the number of nested if-else385* statements in the loop */386unsigned loop_nesting;387388/* During instruction selection, for inserting control flow */389agx_block *current_block;390agx_block *continue_block;391agx_block *break_block;392agx_block *after_block;393394/* Stats for shader-db */395unsigned loop_count;396unsigned spills;397unsigned fills;398} agx_context;399400static inline void401agx_remove_instruction(agx_instr *ins)402{403list_del(&ins->link);404}405406static inline agx_index407agx_temp(agx_context *ctx, enum agx_size size)408{409return agx_get_index(ctx->alloc++, size);410}411412static enum agx_size413agx_size_for_bits(unsigned bits)414{415switch (bits) {416case 1:417case 16: return AGX_SIZE_16;418case 32: return AGX_SIZE_32;419case 64: return AGX_SIZE_64;420default: unreachable("Invalid bitsize");421}422}423424static inline agx_index425agx_src_index(nir_src *src)426{427if (!src->is_ssa) {428return agx_nir_register(src->reg.reg->index,429agx_size_for_bits(nir_src_bit_size(*src)));430}431432return agx_get_index(src->ssa->index,433agx_size_for_bits(nir_src_bit_size(*src)));434}435436static inline agx_index437agx_dest_index(nir_dest *dst)438{439if (!dst->is_ssa) {440return agx_nir_register(dst->reg.reg->index,441agx_size_for_bits(nir_dest_bit_size(*dst)));442}443444return agx_get_index(dst->ssa.index,445agx_size_for_bits(nir_dest_bit_size(*dst)));446}447448/* Iterators for AGX IR */449450#define agx_foreach_block(ctx, v) \451list_for_each_entry(agx_block, v, &ctx->blocks, link)452453#define agx_foreach_block_rev(ctx, v) \454list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)455456#define agx_foreach_block_from(ctx, from, v) \457list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)458459#define agx_foreach_block_from_rev(ctx, from, v) \460list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)461462#define agx_foreach_instr_in_block(block, v) \463list_for_each_entry(agx_instr, v, &(block)->instructions, link)464465#define agx_foreach_instr_in_block_rev(block, v) \466list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)467468#define agx_foreach_instr_in_block_safe(block, v) \469list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)470471#define agx_foreach_instr_in_block_safe_rev(block, v) \472list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)473474#define agx_foreach_instr_in_block_from(block, v, from) \475list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)476477#define agx_foreach_instr_in_block_from_rev(block, v, from) \478list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)479480#define agx_foreach_instr_global(ctx, v) \481agx_foreach_block(ctx, v_block) \482agx_foreach_instr_in_block(v_block, v)483484#define agx_foreach_instr_global_rev(ctx, v) \485agx_foreach_block_rev(ctx, v_block) \486agx_foreach_instr_in_block_rev(v_block, v)487488#define agx_foreach_instr_global_safe(ctx, v) \489agx_foreach_block(ctx, v_block) \490agx_foreach_instr_in_block_safe(v_block, v)491492#define agx_foreach_instr_global_safe_rev(ctx, v) \493agx_foreach_block_rev(ctx, v_block) \494agx_foreach_instr_in_block_safe_rev(v_block, v)495496/* Based on set_foreach, expanded with automatic type casts */497498#define agx_foreach_successor(blk, v) \499agx_block *v; \500agx_block **_v; \501for (_v = (agx_block **) &blk->successors[0], \502v = *_v; \503v != NULL && _v < (agx_block **) &blk->successors[2]; \504_v++, v = *_v) \505506#define agx_foreach_predecessor(blk, v) \507struct set_entry *_entry_##v; \508agx_block *v; \509for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \510v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL); \511_entry_##v != NULL; \512_entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \513v = (agx_block *) (_entry_##v ? _entry_##v->key : NULL))514515#define agx_foreach_src(ins, v) \516for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)517518#define agx_foreach_dest(ins, v) \519for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)520521static inline agx_instr *522agx_prev_op(agx_instr *ins)523{524return list_last_entry(&(ins->link), agx_instr, link);525}526527static inline agx_instr *528agx_next_op(agx_instr *ins)529{530return list_first_entry(&(ins->link), agx_instr, link);531}532533static inline agx_block *534agx_next_block(agx_block *block)535{536return list_first_entry(&(block->link), agx_block, link);537}538539static inline agx_block *540agx_exit_block(agx_context *ctx)541{542agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);543assert(!last->successors[0] && !last->successors[1]);544return last;545}546547/* Like in NIR, for use with the builder */548549enum agx_cursor_option {550agx_cursor_after_block,551agx_cursor_before_instr,552agx_cursor_after_instr553};554555typedef struct {556enum agx_cursor_option option;557558union {559agx_block *block;560agx_instr *instr;561};562} agx_cursor;563564static inline agx_cursor565agx_after_block(agx_block *block)566{567return (agx_cursor) {568.option = agx_cursor_after_block,569.block = block570};571}572573static inline agx_cursor574agx_before_instr(agx_instr *instr)575{576return (agx_cursor) {577.option = agx_cursor_before_instr,578.instr = instr579};580}581582static inline agx_cursor583agx_after_instr(agx_instr *instr)584{585return (agx_cursor) {586.option = agx_cursor_after_instr,587.instr = instr588};589}590591/* IR builder in terms of cursor infrastructure */592593typedef struct {594agx_context *shader;595agx_cursor cursor;596} agx_builder;597598static inline agx_builder599agx_init_builder(agx_context *ctx, agx_cursor cursor)600{601return (agx_builder) {602.shader = ctx,603.cursor = cursor604};605}606607/* Insert an instruction at the cursor and move the cursor */608609static inline void610agx_builder_insert(agx_cursor *cursor, agx_instr *I)611{612switch (cursor->option) {613case agx_cursor_after_instr:614list_add(&I->link, &cursor->instr->link);615cursor->instr = I;616return;617618case agx_cursor_after_block:619list_addtail(&I->link, &cursor->block->instructions);620cursor->option = agx_cursor_after_instr;621cursor->instr = I;622return;623624case agx_cursor_before_instr:625list_addtail(&I->link, &cursor->instr->link);626cursor->option = agx_cursor_after_instr;627cursor->instr = I;628return;629}630631unreachable("Invalid cursor option");632}633634/* Uniform file management */635636agx_index637agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,638unsigned index, unsigned length);639640/* Routines defined for AIR */641642void agx_print_instr(agx_instr *I, FILE *fp);643void agx_print_block(agx_block *block, FILE *fp);644void agx_print_shader(agx_context *ctx, FILE *fp);645void agx_optimizer(agx_context *ctx);646void agx_dce(agx_context *ctx);647void agx_ra(agx_context *ctx);648void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);649650void agx_compute_liveness(agx_context *ctx);651void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);652653#endif654655656