Path: blob/21.2-virgl/src/panfrost/bifrost/compiler.h
4564 views
/*1* Copyright (C) 2020 Collabora Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors (Collabora):23* Alyssa Rosenzweig <[email protected]>24*/2526#ifndef __BIFROST_COMPILER_H27#define __BIFROST_COMPILER_H2829#include "bifrost.h"30#include "bi_opcodes.h"31#include "compiler/nir/nir.h"32#include "panfrost/util/pan_ir.h"33#include "util/u_math.h"34#include "util/half_float.h"3536/* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.37* To express widen, use the correpsonding replicated form, i.e. H01 = identity38* for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also39* use the replicated form (interpretation is governed by the opcode). For40* 8-bit lanes with two channels, use replicated forms for replicated forms41* (TODO: what about others?). For 8-bit lanes with four channels using42* matching form (TODO: what about others?).43*/4445enum bi_swizzle {46/* 16-bit swizzle ordering deliberate for fast compute */47BI_SWIZZLE_H00 = 0, /* = B0101 */48BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */49BI_SWIZZLE_H10 = 2, /* = B2301 */50BI_SWIZZLE_H11 = 3, /* = B2323 */5152/* replication order should be maintained for fast compute */53BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */54BI_SWIZZLE_B1111 = 5,55BI_SWIZZLE_B2222 = 6,56BI_SWIZZLE_B3333 = 7,5758/* totally special for explicit pattern matching */59BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */60BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */61BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */62BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */6364BI_SWIZZLE_B0022 = 12, /* for b02 lanes */65};6667enum bi_index_type {68BI_INDEX_NULL = 0,69BI_INDEX_NORMAL = 1,70BI_INDEX_REGISTER = 2,71BI_INDEX_CONSTANT = 3,72BI_INDEX_PASS = 4,73BI_INDEX_FAU = 574};7576typedef struct {77uint32_t value;7879/* modifiers, should only be set if applicable for a given instruction.80* For *IDP.v4i8, abs plays the role of sign. For bitwise ops where81* applicable, neg plays the role of not */82bool abs : 1;83bool neg : 1;8485/* For a source, the swizzle. For a destination, acts a bit like a86* write mask. Identity for the full 32-bit, H00 for only caring about87* the lower half, other values unused. */88enum bi_swizzle swizzle : 4;89uint32_t offset : 2;90bool reg : 1;91enum bi_index_type type : 3;92} bi_index;9394static inline bi_index95bi_get_index(unsigned value, bool is_reg, unsigned offset)96{97return (bi_index) {98.type = BI_INDEX_NORMAL,99.value = value,100.swizzle = BI_SWIZZLE_H01,101.offset = offset,102.reg = is_reg,103};104}105106static inline bi_index107bi_register(unsigned reg)108{109assert(reg < 64);110111return (bi_index) {112.type = BI_INDEX_REGISTER,113.swizzle = BI_SWIZZLE_H01,114.value = reg115};116}117118static inline bi_index119bi_imm_u32(uint32_t imm)120{121return (bi_index) {122.type = BI_INDEX_CONSTANT,123.swizzle = BI_SWIZZLE_H01,124.value = imm125};126}127128static inline bi_index129bi_imm_f32(float imm)130{131return bi_imm_u32(fui(imm));132}133134static inline bi_index135bi_null()136{137return (bi_index) { .type = BI_INDEX_NULL };138}139140static inline bi_index141bi_zero()142{143return bi_imm_u32(0);144}145146static inline bi_index147bi_passthrough(enum bifrost_packed_src value)148{149return (bi_index) {150.type = BI_INDEX_PASS,151.swizzle = BI_SWIZZLE_H01,152.value = value153};154}155156/* Read back power-efficent garbage, TODO maybe merge with null? */157static inline bi_index158bi_dontcare()159{160return bi_passthrough(BIFROST_SRC_FAU_HI);161}162163/* Extracts a word from a vectored index */164static inline bi_index165bi_word(bi_index idx, unsigned component)166{167idx.offset += component;168return idx;169}170171/* Helps construct swizzles */172static inline bi_index173bi_swz_16(bi_index idx, bool x, bool y)174{175assert(idx.swizzle == BI_SWIZZLE_H01);176idx.swizzle = BI_SWIZZLE_H00 | (x << 1) | y;177return idx;178}179180static inline bi_index181bi_half(bi_index idx, bool upper)182{183return bi_swz_16(idx, upper, upper);184}185186static inline bi_index187bi_byte(bi_index idx, unsigned lane)188{189assert(idx.swizzle == BI_SWIZZLE_H01);190assert(lane < 4);191idx.swizzle = BI_SWIZZLE_B0000 + lane;192return idx;193}194195static inline bi_index196bi_abs(bi_index idx)197{198idx.abs = true;199return idx;200}201202static inline bi_index203bi_neg(bi_index idx)204{205idx.neg ^= true;206return idx;207}208209/* Additive identity in IEEE 754 arithmetic */210static inline bi_index211bi_negzero()212{213return bi_neg(bi_zero());214}215216/* Replaces an index, preserving any modifiers */217218static inline bi_index219bi_replace_index(bi_index old, bi_index replacement)220{221replacement.abs = old.abs;222replacement.neg = old.neg;223replacement.swizzle = old.swizzle;224return replacement;225}226227/* For bitwise instructions */228#define bi_not(x) bi_neg(x)229230static inline bi_index231bi_imm_u8(uint8_t imm)232{233return bi_byte(bi_imm_u32(imm), 0);234}235236static inline bi_index237bi_imm_u16(uint16_t imm)238{239return bi_half(bi_imm_u32(imm), false);240}241242static inline bi_index243bi_imm_uintN(uint32_t imm, unsigned sz)244{245assert(sz == 8 || sz == 16 || sz == 32);246return (sz == 8) ? bi_imm_u8(imm) :247(sz == 16) ? bi_imm_u16(imm) :248bi_imm_u32(imm);249}250251static inline bi_index252bi_imm_f16(float imm)253{254return bi_imm_u16(_mesa_float_to_half(imm));255}256257static inline bool258bi_is_null(bi_index idx)259{260return idx.type == BI_INDEX_NULL;261}262263static inline bool264bi_is_ssa(bi_index idx)265{266return idx.type == BI_INDEX_NORMAL && !idx.reg;267}268269/* Compares equivalence as references. Does not compare offsets, swizzles, or270* modifiers. In other words, this forms bi_index equivalence classes by271* partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */272273static inline bool274bi_is_equiv(bi_index left, bi_index right)275{276return (left.type == right.type) &&277(left.reg == right.reg) &&278(left.value == right.value);279}280281/* A stronger equivalence relation that requires the indices access the282* same offset, useful for RA/scheduling to see what registers will283* correspond to */284285static inline bool286bi_is_word_equiv(bi_index left, bi_index right)287{288return bi_is_equiv(left, right) && left.offset == right.offset;289}290291#define BI_MAX_DESTS 2292#define BI_MAX_SRCS 4293294typedef struct {295/* Must be first */296struct list_head link;297298/* Link for the use chain */299struct list_head use;300301enum bi_opcode op;302303/* Data flow */304bi_index dest[BI_MAX_DESTS];305bi_index src[BI_MAX_SRCS];306307/* For a branch */308struct bi_block *branch_target;309310/* These don't fit neatly with anything else.. */311enum bi_register_format register_format;312enum bi_vecsize vecsize;313314/* Can we spill the value written here? Used to prevent315* useless double fills */316bool no_spill;317318/* Override table, inducing a DTSEL_IMM pair if nonzero */319enum bi_table table;320321/* Everything after this MUST NOT be accessed directly, since322* interpretation depends on opcodes */323324/* Destination modifiers */325union {326enum bi_clamp clamp;327bool saturate;328bool not_result;329unsigned dest_mod;330};331332/* Immediates. All seen alone in an instruction, except for varying/texture333* which are specified jointly for VARTEX */334union {335uint32_t shift;336uint32_t fill;337uint32_t index;338uint32_t attribute_index;339340struct {341uint32_t varying_index;342uint32_t sampler_index;343uint32_t texture_index;344};345346/* TEXC, ATOM_CX: # of staging registers used */347uint32_t sr_count;348};349350/* Modifiers specific to particular instructions are thrown in a union */351union {352enum bi_adj adj; /* FEXP_TABLE.u4 */353enum bi_atom_opc atom_opc; /* atomics */354enum bi_func func; /* FPOW_SC_DET */355enum bi_function function; /* LD_VAR_FLAT */356enum bi_mux mux; /* MUX */357enum bi_sem sem; /* FMAX, FMIN */358enum bi_source source; /* LD_GCLK */359bool scale; /* VN_ASST2, FSINCOS_OFFSET */360bool offset; /* FSIN_TABLE, FOCS_TABLE */361bool mask; /* CLZ */362bool threads; /* IMULD, IMOV_FMA */363bool combine; /* BRANCHC */364bool format; /* LEA_TEX */365366struct {367enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */368enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */369};370371struct {372enum bi_result_type result_type; /* FCMP, ICMP */373enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */374};375376struct {377enum bi_stack_mode stack_mode; /* JUMP_EX */378bool test_mode;379};380381struct {382enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */383bool preserve_null; /* SEG_ADD, SEG_SUB */384enum bi_extend extend; /* LOAD, IMUL */385};386387struct {388enum bi_sample sample; /* VAR_TEX, LD_VAR */389enum bi_update update; /* VAR_TEX, LD_VAR */390enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */391bool skip; /* VAR_TEX, TEXS, TEXC */392bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */393};394395/* Maximum size, for hashing */396unsigned flags[5];397398struct {399enum bi_subgroup subgroup; /* WMASK, CLPER */400enum bi_inactive_result inactive_result; /* CLPER */401enum bi_lane_op lane_op; /* CLPER */402};403404struct {405bool z; /* ZS_EMIT */406bool stencil; /* ZS_EMIT */407};408409struct {410bool h; /* VN_ASST1.f16 */411bool l; /* VN_ASST1.f16 */412};413414struct {415bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */416bool result_word;417};418419struct {420bool sqrt; /* FREXPM */421bool log; /* FREXPM */422};423424struct {425enum bi_mode mode; /* FLOG_TABLE */426enum bi_precision precision; /* FLOG_TABLE */427bool divzero; /* FRSQ_APPROX, FRSQ */428};429};430} bi_instr;431432/* Represents the assignment of slots for a given bi_tuple */433434typedef struct {435/* Register to assign to each slot */436unsigned slot[4];437438/* Read slots can be disabled */439bool enabled[2];440441/* Configuration for slots 2/3 */442struct bifrost_reg_ctrl_23 slot23;443444/* Fast-Access-Uniform RAM index */445uint8_t fau_idx;446447/* Whether writes are actually for the last instruction */448bool first_instruction;449} bi_registers;450451/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,452* leave it NULL; the emitter will fill in a nop. Instructions reference453* registers via slots which are assigned per tuple.454*/455456typedef struct {457uint8_t fau_idx;458bi_registers regs;459bi_instr *fma;460bi_instr *add;461} bi_tuple;462463struct bi_block;464465typedef struct {466struct list_head link;467468/* Link back up for branch calculations */469struct bi_block *block;470471/* Architectural limit of 8 tuples/clause */472unsigned tuple_count;473bi_tuple tuples[8];474475/* For scoreboarding -- the clause ID (this is not globally unique!)476* and its dependencies in terms of other clauses, computed during477* scheduling and used when emitting code. Dependencies expressed as a478* bitfield matching the hardware, except shifted by a clause (the479* shift back to the ISA's off-by-one encoding is worked out when480* emitting clauses) */481unsigned scoreboard_id;482uint8_t dependencies;483484/* See ISA header for description */485enum bifrost_flow flow_control;486487/* Can we prefetch the next clause? Usually it makes sense, except for488* clauses ending in unconditional branches */489bool next_clause_prefetch;490491/* Assigned data register */492unsigned staging_register;493494/* Corresponds to the usual bit but shifted by a clause */495bool staging_barrier;496497/* Constants read by this clause. ISA limit. Must satisfy:498*499* constant_count + tuple_count <= 13500*501* Also implicitly constant_count <= tuple_count since a tuple only502* reads a single constant.503*/504uint64_t constants[8];505unsigned constant_count;506507/* Index of a constant to be PC-relative */508unsigned pcrel_idx;509510/* Branches encode a constant offset relative to the program counter511* with some magic flags. By convention, if there is a branch, its512* constant will be last. Set this flag to indicate this is required.513*/514bool branch_constant;515516/* Unique in a clause */517enum bifrost_message_type message_type;518bi_instr *message;519520/* Discard helper threads */521bool td;522} bi_clause;523524typedef struct bi_block {525pan_block base; /* must be first */526527/* If true, uses clauses; if false, uses instructions */528bool scheduled;529struct list_head clauses; /* list of bi_clause */530531/* Post-RA liveness */532uint64_t reg_live_in, reg_live_out;533534/* Flags available for pass-internal use */535uint8_t pass_flags;536} bi_block;537538typedef struct {539const struct panfrost_compile_inputs *inputs;540nir_shader *nir;541struct pan_shader_info *info;542gl_shader_stage stage;543struct list_head blocks; /* list of bi_block */544struct hash_table_u64 *sysval_to_id;545uint32_t quirks;546unsigned arch;547548/* During NIR->BIR */549bi_block *current_block;550bi_block *after_block;551bi_block *break_block;552bi_block *continue_block;553bool emitted_atest;554555/* For creating temporaries */556unsigned ssa_alloc;557unsigned reg_alloc;558559/* Analysis results */560bool has_liveness;561562/* Mask of UBOs that need to be uploaded */563uint32_t ubo_mask;564565/* Stats for shader-db */566unsigned instruction_count;567unsigned loop_count;568unsigned spills;569unsigned fills;570} bi_context;571572static inline void573bi_remove_instruction(bi_instr *ins)574{575list_del(&ins->link);576}577578enum bir_fau {579BIR_FAU_ZERO = 0,580BIR_FAU_LANE_ID = 1,581BIR_FAU_WRAP_ID = 2,582BIR_FAU_CORE_ID = 3,583BIR_FAU_FB_EXTENT = 4,584BIR_FAU_ATEST_PARAM = 5,585BIR_FAU_SAMPLE_POS_ARRAY = 6,586BIR_FAU_BLEND_0 = 8,587/* blend descs 1 - 7 */588BIR_FAU_TYPE_MASK = 15,589BIR_FAU_UNIFORM = (1 << 7),590BIR_FAU_HI = (1 << 8),591};592593static inline bi_index594bi_fau(enum bir_fau value, bool hi)595{596return (bi_index) {597.type = BI_INDEX_FAU,598.value = value,599.swizzle = BI_SWIZZLE_H01,600.offset = hi ? 1 : 0601};602}603604static inline unsigned605bi_max_temp(bi_context *ctx)606{607return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1;608}609610static inline bi_index611bi_temp(bi_context *ctx)612{613return bi_get_index(ctx->ssa_alloc++, false, 0);614}615616static inline bi_index617bi_temp_reg(bi_context *ctx)618{619return bi_get_index(ctx->reg_alloc++, true, 0);620}621622/* Inline constants automatically, will be lowered out by bi_lower_fau where a623* constant is not allowed. load_const_to_scalar gaurantees that this makes624* sense */625626static inline bi_index627bi_src_index(nir_src *src)628{629if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32)630return bi_imm_u32(nir_src_as_uint(*src));631else if (src->is_ssa)632return bi_get_index(src->ssa->index, false, 0);633else {634assert(!src->reg.indirect);635return bi_get_index(src->reg.reg->index, true, 0);636}637}638639static inline bi_index640bi_dest_index(nir_dest *dst)641{642if (dst->is_ssa)643return bi_get_index(dst->ssa.index, false, 0);644else {645assert(!dst->reg.indirect);646return bi_get_index(dst->reg.reg->index, true, 0);647}648}649650static inline unsigned651bi_get_node(bi_index index)652{653if (bi_is_null(index) || index.type != BI_INDEX_NORMAL)654return ~0;655else656return (index.value << 1) | index.reg;657}658659static inline bi_index660bi_node_to_index(unsigned node, unsigned node_count)661{662assert(node < node_count);663assert(node_count < ~0);664665return bi_get_index(node >> 1, node & PAN_IS_REG, 0);666}667668/* Iterators for Bifrost IR */669670#define bi_foreach_block(ctx, v) \671list_for_each_entry(pan_block, v, &ctx->blocks, link)672673#define bi_foreach_block_rev(ctx, v) \674list_for_each_entry_rev(pan_block, v, &ctx->blocks, link)675676#define bi_foreach_block_from(ctx, from, v) \677list_for_each_entry_from(pan_block, v, from, &ctx->blocks, link)678679#define bi_foreach_block_from_rev(ctx, from, v) \680list_for_each_entry_from_rev(pan_block, v, from, &ctx->blocks, link)681682#define bi_foreach_instr_in_block(block, v) \683list_for_each_entry(bi_instr, v, &(block)->base.instructions, link)684685#define bi_foreach_instr_in_block_rev(block, v) \686list_for_each_entry_rev(bi_instr, v, &(block)->base.instructions, link)687688#define bi_foreach_instr_in_block_safe(block, v) \689list_for_each_entry_safe(bi_instr, v, &(block)->base.instructions, link)690691#define bi_foreach_instr_in_block_safe_rev(block, v) \692list_for_each_entry_safe_rev(bi_instr, v, &(block)->base.instructions, link)693694#define bi_foreach_instr_in_block_from(block, v, from) \695list_for_each_entry_from(bi_instr, v, from, &(block)->base.instructions, link)696697#define bi_foreach_instr_in_block_from_rev(block, v, from) \698list_for_each_entry_from_rev(bi_instr, v, from, &(block)->base.instructions, link)699700#define bi_foreach_clause_in_block(block, v) \701list_for_each_entry(bi_clause, v, &(block)->clauses, link)702703#define bi_foreach_clause_in_block_rev(block, v) \704list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)705706#define bi_foreach_clause_in_block_safe(block, v) \707list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)708709#define bi_foreach_clause_in_block_from(block, v, from) \710list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)711712#define bi_foreach_clause_in_block_from_rev(block, v, from) \713list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)714715#define bi_foreach_instr_global(ctx, v) \716bi_foreach_block(ctx, v_block) \717bi_foreach_instr_in_block((bi_block *) v_block, v)718719#define bi_foreach_instr_global_rev(ctx, v) \720bi_foreach_block_rev(ctx, v_block) \721bi_foreach_instr_in_block_rev((bi_block *) v_block, v)722723#define bi_foreach_instr_global_safe(ctx, v) \724bi_foreach_block(ctx, v_block) \725bi_foreach_instr_in_block_safe((bi_block *) v_block, v)726727#define bi_foreach_instr_global_rev_safe(ctx, v) \728bi_foreach_block_rev(ctx, v_block) \729bi_foreach_instr_in_block_rev_safe((bi_block *) v_block, v)730731#define bi_foreach_instr_in_tuple(tuple, v) \732for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \733v != NULL; \734v = (v == (tuple)->add) ? NULL : (tuple)->add)735736/* Based on set_foreach, expanded with automatic type casts */737738#define bi_foreach_predecessor(blk, v) \739struct set_entry *_entry_##v; \740bi_block *v; \741for (_entry_##v = _mesa_set_next_entry(blk->base.predecessors, NULL), \742v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL); \743_entry_##v != NULL; \744_entry_##v = _mesa_set_next_entry(blk->base.predecessors, _entry_##v), \745v = (bi_block *) (_entry_##v ? _entry_##v->key : NULL))746747#define bi_foreach_src(ins, v) \748for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)749750#define bi_foreach_dest(ins, v) \751for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)752753#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \754bi_foreach_instr_in_tuple(tuple, ins) \755bi_foreach_src(ins, s)756757static inline bi_instr *758bi_prev_op(bi_instr *ins)759{760return list_last_entry(&(ins->link), bi_instr, link);761}762763static inline bi_instr *764bi_next_op(bi_instr *ins)765{766return list_first_entry(&(ins->link), bi_instr, link);767}768769static inline pan_block *770pan_next_block(pan_block *block)771{772return list_first_entry(&(block->link), pan_block, link);773}774775/* BIR manipulation */776777bool bi_has_arg(bi_instr *ins, bi_index arg);778unsigned bi_count_read_registers(bi_instr *ins, unsigned src);779unsigned bi_count_write_registers(bi_instr *ins, unsigned dest);780bool bi_is_regfmt_16(enum bi_register_format fmt);781unsigned bi_writemask(bi_instr *ins, unsigned dest);782bi_clause * bi_next_clause(bi_context *ctx, pan_block *block, bi_clause *clause);783bool bi_side_effects(enum bi_opcode op);784785void bi_print_instr(bi_instr *I, FILE *fp);786void bi_print_slots(bi_registers *regs, FILE *fp);787void bi_print_tuple(bi_tuple *tuple, FILE *fp);788void bi_print_clause(bi_clause *clause, FILE *fp);789void bi_print_block(bi_block *block, FILE *fp);790void bi_print_shader(bi_context *ctx, FILE *fp);791792/* BIR passes */793794void bi_analyze_helper_terminate(bi_context *ctx);795void bi_analyze_helper_requirements(bi_context *ctx);796void bi_opt_copy_prop(bi_context *ctx);797void bi_opt_cse(bi_context *ctx);798void bi_opt_mod_prop_forward(bi_context *ctx);799void bi_opt_mod_prop_backward(bi_context *ctx);800void bi_opt_dead_code_eliminate(bi_context *ctx);801void bi_opt_dce_post_ra(bi_context *ctx);802void bi_opt_push_ubo(bi_context *ctx);803void bi_opt_constant_fold(bi_context *ctx);804void bi_lower_swizzle(bi_context *ctx);805void bi_lower_fau(bi_context *ctx);806void bi_schedule(bi_context *ctx);807void bi_assign_scoreboard(bi_context *ctx);808void bi_register_allocate(bi_context *ctx);809810/* Test suite */811int bi_test_scheduler(void);812int bi_test_packing(void);813int bi_test_packing_formats(void);814815/* Liveness */816817void bi_compute_liveness(bi_context *ctx);818void bi_liveness_ins_update(uint16_t *live, bi_instr *ins, unsigned max);819void bi_invalidate_liveness(bi_context *ctx);820821void bi_postra_liveness(bi_context *ctx);822uint64_t bi_postra_liveness_ins(uint64_t live, bi_instr *ins);823824/* Layout */825826bool bi_can_insert_tuple(bi_clause *clause, bool constant);827unsigned bi_clause_quadwords(bi_clause *clause);828signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);829bool bi_ec0_packed(unsigned tuple_count);830831/* Check if there are no more instructions starting with a given block, this832* needs to recurse in case a shader ends with multiple empty blocks */833834static inline bool835bi_is_terminal_block(bi_block *block)836{837return (block == NULL) ||838(list_is_empty(&block->base.instructions) &&839bi_is_terminal_block((bi_block *) block->base.successors[0]) &&840bi_is_terminal_block((bi_block *) block->base.successors[1]));841}842843/* Code emit */844845/* Returns the size of the final clause */846unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);847848struct bi_packed_tuple {849uint64_t lo;850uint64_t hi;851};852853void854bi_pack_format(struct util_dynarray *emission,855unsigned index,856struct bi_packed_tuple *tuples,857ASSERTED unsigned tuple_count,858uint64_t header, uint64_t ec0,859unsigned m0, bool z);860861unsigned bi_pack_fma(bi_instr *I,862enum bifrost_packed_src src0,863enum bifrost_packed_src src1,864enum bifrost_packed_src src2,865enum bifrost_packed_src src3);866unsigned bi_pack_add(bi_instr *I,867enum bifrost_packed_src src0,868enum bifrost_packed_src src1,869enum bifrost_packed_src src2,870enum bifrost_packed_src src3);871872/* Like in NIR, for use with the builder */873874enum bi_cursor_option {875bi_cursor_after_block,876bi_cursor_before_instr,877bi_cursor_after_instr878};879880typedef struct {881enum bi_cursor_option option;882883union {884bi_block *block;885bi_instr *instr;886};887} bi_cursor;888889static inline bi_cursor890bi_after_block(bi_block *block)891{892return (bi_cursor) {893.option = bi_cursor_after_block,894.block = block895};896}897898static inline bi_cursor899bi_before_instr(bi_instr *instr)900{901return (bi_cursor) {902.option = bi_cursor_before_instr,903.instr = instr904};905}906907static inline bi_cursor908bi_after_instr(bi_instr *instr)909{910return (bi_cursor) {911.option = bi_cursor_after_instr,912.instr = instr913};914}915916/* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,917* in which case there must exist a nonempty penultimate tuple */918919ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *920bi_first_instr_in_tuple(bi_tuple *tuple)921{922bi_instr *instr = tuple->fma ?: tuple->add;923assert(instr != NULL);924return instr;925}926927ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *928bi_first_instr_in_clause(bi_clause *clause)929{930return bi_first_instr_in_tuple(&clause->tuples[0]);931}932933ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *934bi_last_instr_in_clause(bi_clause *clause)935{936bi_tuple tuple = clause->tuples[clause->tuple_count - 1];937bi_instr *instr = tuple.add ?: tuple.fma;938939if (!instr) {940assert(clause->tuple_count >= 2);941tuple = clause->tuples[clause->tuple_count - 2];942instr = tuple.add ?: tuple.fma;943}944945assert(instr != NULL);946return instr;947}948949/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start950* (end) of the clause and adding a condition for the clause boundary */951952#define bi_foreach_instr_in_clause(block, clause, pos) \953for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_first_instr_in_clause(clause), link); \954(&pos->link != &(block)->base.instructions) \955&& (pos != bi_next_op(bi_last_instr_in_clause(clause))); \956pos = LIST_ENTRY(bi_instr, pos->link.next, link))957958#define bi_foreach_instr_in_clause_rev(block, clause, pos) \959for (bi_instr *pos = LIST_ENTRY(bi_instr, bi_last_instr_in_clause(clause), link); \960(&pos->link != &(block)->base.instructions) \961&& pos != bi_prev_op(bi_first_instr_in_clause(clause)); \962pos = LIST_ENTRY(bi_instr, pos->link.prev, link))963964static inline bi_cursor965bi_before_clause(bi_clause *clause)966{967return bi_before_instr(bi_first_instr_in_clause(clause));968}969970static inline bi_cursor971bi_before_tuple(bi_tuple *tuple)972{973return bi_before_instr(bi_first_instr_in_tuple(tuple));974}975976static inline bi_cursor977bi_after_clause(bi_clause *clause)978{979return bi_after_instr(bi_last_instr_in_clause(clause));980}981982/* IR builder in terms of cursor infrastructure */983984typedef struct {985bi_context *shader;986bi_cursor cursor;987} bi_builder;988989static inline bi_builder990bi_init_builder(bi_context *ctx, bi_cursor cursor)991{992return (bi_builder) {993.shader = ctx,994.cursor = cursor995};996}997998/* Insert an instruction at the cursor and move the cursor */9991000static inline void1001bi_builder_insert(bi_cursor *cursor, bi_instr *I)1002{1003switch (cursor->option) {1004case bi_cursor_after_instr:1005list_add(&I->link, &cursor->instr->link);1006cursor->instr = I;1007return;10081009case bi_cursor_after_block:1010list_addtail(&I->link, &cursor->block->base.instructions);1011cursor->option = bi_cursor_after_instr;1012cursor->instr = I;1013return;10141015case bi_cursor_before_instr:1016list_addtail(&I->link, &cursor->instr->link);1017cursor->option = bi_cursor_after_instr;1018cursor->instr = I;1019return;1020}10211022unreachable("Invalid cursor option");1023}10241025static inline unsigned1026bi_word_node(bi_index idx)1027{1028assert(idx.type == BI_INDEX_NORMAL && !idx.reg);1029return (idx.value << 2) | idx.offset;1030}10311032/* NIR passes */10331034bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);10351036#endif103710381039