Path: blob/21.2-virgl/src/panfrost/util/pan_ir.h
4560 views
/*1* Copyright (C) 2020 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#ifndef __PAN_IR_H24#define __PAN_IR_H2526#include <stdint.h>27#include "compiler/nir/nir.h"28#include "util/u_dynarray.h"29#include "util/hash_table.h"3031/* Define the general compiler entry point */3233#define MAX_SYSVAL_COUNT 323435/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal36* their class for equal comparison */3738#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)39#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)40#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)4142/* Define some common types. We start at one for easy indexing of hash43* tables internal to the compiler */4445enum {46PAN_SYSVAL_VIEWPORT_SCALE = 1,47PAN_SYSVAL_VIEWPORT_OFFSET = 2,48PAN_SYSVAL_TEXTURE_SIZE = 3,49PAN_SYSVAL_SSBO = 4,50PAN_SYSVAL_NUM_WORK_GROUPS = 5,51PAN_SYSVAL_SAMPLER = 7,52PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,53PAN_SYSVAL_WORK_DIM = 9,54PAN_SYSVAL_IMAGE_SIZE = 10,55PAN_SYSVAL_SAMPLE_POSITIONS = 11,56PAN_SYSVAL_MULTISAMPLED = 12,57PAN_SYSVAL_RT_CONVERSION = 13,58PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,59PAN_SYSVAL_DRAWID = 15,60};6162#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \63((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))6465#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f)66#define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)67#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))6869/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be70* consistent with the blob so we can compare traces easier. */7172enum {73PAN_VERTEX_ID = 16,74PAN_INSTANCE_ID = 17,75PAN_MAX_ATTRIBUTE76};7778struct panfrost_sysvals {79/* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */80unsigned sysvals[MAX_SYSVAL_COUNT];81unsigned sysval_count;82};8384/* Technically Midgard could go up to 92 in a pathological case but we don't85* take advantage of that. Likewise Bifrost's FAU encoding can address 12886* words but actual implementations (G72, G76) are capped at 64 */8788#define PAN_MAX_PUSH 648990/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so91* an offset to a word must be < 2^16. There are less than 2^8 UBOs */9293struct panfrost_ubo_word {94uint16_t ubo;95uint16_t offset;96};9798struct panfrost_ubo_push {99unsigned count;100struct panfrost_ubo_word words[PAN_MAX_PUSH];101};102103/* Helper for searching the above. Note this is O(N) to the number of pushed104* constants, do not run in the draw call hot path */105106unsigned107pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);108109struct hash_table_u64 *110panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);111112unsigned113pan_lookup_sysval(struct hash_table_u64 *sysval_to_id,114struct panfrost_sysvals *sysvals,115int sysval);116117int118panfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);119120struct panfrost_compile_inputs {121unsigned gpu_id;122bool is_blend, is_blit;123struct {124unsigned rt;125unsigned nr_samples;126uint64_t bifrost_blend_desc;127} blend;128unsigned sysval_ubo;129bool shaderdb;130bool no_ubo_to_push;131132enum pipe_format rt_formats[8];133unsigned nr_cbufs;134};135136struct pan_shader_varying {137gl_varying_slot location;138enum pipe_format format;139};140141struct bifrost_shader_blend_info {142nir_alu_type type;143uint32_t return_offset;144145/* mali_bifrost_register_file_format corresponding to nir_alu_type */146unsigned format;147};148149struct bifrost_shader_info {150struct bifrost_shader_blend_info blend[8];151nir_alu_type blend_src1_type;152bool wait_6, wait_7;153154/* Packed, preloaded message descriptors */155uint16_t messages[2];156};157158struct midgard_shader_info {159unsigned first_tag;160};161162struct pan_shader_info {163gl_shader_stage stage;164unsigned work_reg_count;165unsigned tls_size;166unsigned wls_size;167168union {169struct {170bool reads_frag_coord;171bool reads_point_coord;172bool reads_face;173bool helper_invocations;174bool can_discard;175bool writes_depth;176bool writes_stencil;177bool writes_coverage;178bool sidefx;179bool reads_sample_id;180bool reads_sample_pos;181bool reads_sample_mask_in;182bool reads_helper_invocation;183bool sample_shading;184bool early_fragment_tests;185bool can_early_z, can_fpk;186BITSET_WORD outputs_read;187BITSET_WORD outputs_written;188} fs;189190struct {191bool writes_point_size;192} vs;193};194195bool separable;196bool contains_barrier;197bool writes_global;198uint64_t outputs_written;199200unsigned sampler_count;201unsigned texture_count;202unsigned ubo_count;203unsigned attribute_count;204205struct {206unsigned input_count;207struct pan_shader_varying input[MAX_VARYING];208unsigned output_count;209struct pan_shader_varying output[MAX_VARYING];210} varyings;211212struct panfrost_sysvals sysvals;213214/* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access215* Uniforms (Bifrost) */216struct panfrost_ubo_push push;217218uint32_t ubo_mask;219220union {221struct bifrost_shader_info bifrost;222struct midgard_shader_info midgard;223};224};225226typedef struct pan_block {227/* Link to next block. Must be first for mir_get_block */228struct list_head link;229230/* List of instructions emitted for the current block */231struct list_head instructions;232233/* Index of the block in source order */234unsigned name;235236/* Control flow graph */237struct pan_block *successors[2];238struct set *predecessors;239bool unconditional_jumps;240241/* In liveness analysis, these are live masks (per-component) for242* indices for the block. Scalar compilers have the luxury of using243* simple bit fields, but for us, liveness is a vector idea. */244uint16_t *live_in;245uint16_t *live_out;246} pan_block;247248struct pan_instruction {249struct list_head link;250};251252#define pan_foreach_instr_in_block_rev(block, v) \253list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)254255#define pan_foreach_successor(blk, v) \256pan_block *v; \257pan_block **_v; \258for (_v = (pan_block **) &blk->successors[0], \259v = *_v; \260v != NULL && _v < (pan_block **) &blk->successors[2]; \261_v++, v = *_v) \262263#define pan_foreach_predecessor(blk, v) \264struct set_entry *_entry_##v; \265struct pan_block *v; \266for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \267v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL); \268_entry_##v != NULL; \269_entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \270v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))271272static inline pan_block *273pan_exit_block(struct list_head *blocks)274{275pan_block *last = list_last_entry(blocks, pan_block, link);276assert(!last->successors[0] && !last->successors[1]);277return last;278}279280typedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);281282void pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);283void pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);284bool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);285286void pan_compute_liveness(struct list_head *blocks,287unsigned temp_count,288pan_liveness_update callback);289290void pan_free_liveness(struct list_head *blocks);291292uint16_t293pan_to_bytemask(unsigned bytes, unsigned mask);294295void pan_block_add_successor(pan_block *block, pan_block *successor);296297/* IR indexing */298#define PAN_IS_REG (1)299300static inline unsigned301pan_ssa_index(nir_ssa_def *ssa)302{303/* Off-by-one ensures BIR_NO_ARG is skipped */304return ((ssa->index + 1) << 1) | 0;305}306307static inline unsigned308pan_src_index(nir_src *src)309{310if (src->is_ssa)311return pan_ssa_index(src->ssa);312else {313assert(!src->reg.indirect);314return (src->reg.reg->index << 1) | PAN_IS_REG;315}316}317318static inline unsigned319pan_dest_index(nir_dest *dst)320{321if (dst->is_ssa)322return pan_ssa_index(&dst->ssa);323else {324assert(!dst->reg.indirect);325return (dst->reg.reg->index << 1) | PAN_IS_REG;326}327}328329/* IR printing helpers */330void pan_print_alu_type(nir_alu_type t, FILE *fp);331332/* Until it can be upstreamed.. */333bool pan_has_source_mod(nir_alu_src *src, nir_op op);334bool pan_has_dest_mod(nir_dest **dest, nir_op op);335336/* NIR passes to do some backend-specific lowering */337338#define PAN_WRITEOUT_C 1339#define PAN_WRITEOUT_Z 2340#define PAN_WRITEOUT_S 4341342bool pan_nir_reorder_writeout(nir_shader *nir);343bool pan_nir_lower_zs_store(nir_shader *nir);344345bool pan_nir_lower_64bit_intrin(nir_shader *shader);346347bool pan_lower_helper_invocation(nir_shader *shader);348bool pan_lower_sample_pos(nir_shader *shader);349350#endif351352353