Path: blob/21.2-virgl/src/freedreno/ir3/ir3_context.h
4565 views
/*1* Copyright (C) 2015-2018 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#ifndef IR3_CONTEXT_H_27#define IR3_CONTEXT_H_2829#include "ir3.h"30#include "ir3_compiler.h"31#include "ir3_nir.h"3233/* for conditionally setting boolean flag(s): */34#define COND(bool, val) ((bool) ? (val) : 0)3536#define DBG(fmt, ...) \37do { \38mesa_logd("%s:%d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \39} while (0)4041/**42* The context for compilation of a single shader.43*/44struct ir3_context {45struct ir3_compiler *compiler;46const struct ir3_context_funcs *funcs;4748struct nir_shader *s;4950struct nir_instr *cur_instr; /* current instruction, just for debug */5152struct ir3 *ir;53struct ir3_shader_variant *so;5455/* Tables of scalar inputs/outputs. Because of the way varying packing56* works, we could have inputs w/ fractional location, which is a bit57* awkward to deal with unless we keep track of the split scalar in/58* out components.59*60* These *only* have inputs/outputs that are touched by load_*input and61* store_output.62*/63unsigned ninputs, noutputs;64struct ir3_instruction **inputs;65struct ir3_instruction **outputs;6667struct ir3_block *block; /* the current block */68struct ir3_block *in_block; /* block created for shader inputs */6970nir_function_impl *impl;7172/* For fragment shaders, varyings are not actual shader inputs,73* instead the hw passes a ij coord which is used with74* bary.f.75*76* But NIR doesn't know that, it still declares varyings as77* inputs. So we do all the input tracking normally and fix78* things up after compile_instructions()79*/80struct ir3_instruction *ij[IJ_COUNT];8182/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */83struct ir3_instruction *frag_face, *frag_coord;8485/* For vertex shaders, keep track of the system values sources */86struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,87*draw_id, *view_index;8889/* For fragment shaders: */90struct ir3_instruction *samp_id, *samp_mask_in;9192/* For geometry shaders: */93struct ir3_instruction *primitive_id;94struct ir3_instruction *gs_header;9596/* For tessellation shaders: */97struct ir3_instruction *patch_vertices_in;98struct ir3_instruction *tcs_header;99struct ir3_instruction *tess_coord;100101/* Compute shader inputs: */102struct ir3_instruction *local_invocation_id, *work_group_id;103104/* mapping from nir_register to defining instruction: */105struct hash_table *def_ht;106107unsigned num_arrays;108109/* Tracking for max level of flowcontrol (branchstack) needed110* by a5xx+:111*/112unsigned stack, max_stack;113114unsigned loop_id;115116/* a common pattern for indirect addressing is to request the117* same address register multiple times. To avoid generating118* duplicate instruction sequences (which our backend does not119* try to clean up, since that should be done as the NIR stage)120* we cache the address value generated for a given src value:121*122* Note that we have to cache these per alignment, since same123* src used for an array of vec1 cannot be also used for an124* array of vec4.125*/126struct hash_table *addr0_ht[4];127128/* The same for a1.x. We only support immediate values for a1.x, as this129* is the only use so far.130*/131struct hash_table_u64 *addr1_ht;132133struct hash_table *sel_cond_conversions;134135/* last dst array, for indirect we need to insert a var-store.136*/137struct ir3_instruction **last_dst;138unsigned last_dst_n;139140/* maps nir_block to ir3_block, mostly for the purposes of141* figuring out the blocks successors142*/143struct hash_table *block_ht;144145/* maps nir_block at the top of a loop to ir3_block collecting continue146* edges.147*/148struct hash_table *continue_block_ht;149150/* on a4xx, bitmask of samplers which need astc+srgb workaround: */151unsigned astc_srgb;152153unsigned samples; /* bitmask of x,y sample shifts */154155unsigned max_texture_index;156157unsigned prefetch_limit;158159/* set if we encounter something we can't handle yet, so we160* can bail cleanly and fallback to TGSI compiler f/e161*/162bool error;163};164165struct ir3_context_funcs {166void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,167nir_intrinsic_instr *intr,168struct ir3_instruction **dst);169void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,170nir_intrinsic_instr *intr);171struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(172struct ir3_context *ctx, nir_intrinsic_instr *intr);173void (*emit_intrinsic_load_image)(struct ir3_context *ctx,174nir_intrinsic_instr *intr,175struct ir3_instruction **dst);176void (*emit_intrinsic_store_image)(struct ir3_context *ctx,177nir_intrinsic_instr *intr);178struct ir3_instruction *(*emit_intrinsic_atomic_image)(179struct ir3_context *ctx, nir_intrinsic_instr *intr);180void (*emit_intrinsic_image_size)(struct ir3_context *ctx,181nir_intrinsic_instr *intr,182struct ir3_instruction **dst);183void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx,184nir_intrinsic_instr *intr,185struct ir3_instruction **dst);186void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,187nir_intrinsic_instr *intr);188};189190extern const struct ir3_context_funcs ir3_a4xx_funcs;191extern const struct ir3_context_funcs ir3_a6xx_funcs;192193struct ir3_context *ir3_context_init(struct ir3_compiler *compiler,194struct ir3_shader_variant *so);195void ir3_context_free(struct ir3_context *ctx);196197struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,198nir_ssa_def *dst, unsigned n);199struct ir3_instruction **ir3_get_dst(struct ir3_context *ctx, nir_dest *dst,200unsigned n);201struct ir3_instruction *const *ir3_get_src(struct ir3_context *ctx,202nir_src *src);203void ir3_put_dst(struct ir3_context *ctx, nir_dest *dst);204struct ir3_instruction *ir3_create_collect(struct ir3_context *ctx,205struct ir3_instruction *const *arr,206unsigned arrsz);207void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,208struct ir3_instruction *src, unsigned base, unsigned n);209void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);210void ir3_handle_nonuniform(struct ir3_instruction *instr,211nir_intrinsic_instr *intrin);212void emit_intrinsic_image_size_tex(struct ir3_context *ctx,213nir_intrinsic_instr *intr,214struct ir3_instruction **dst);215216#define ir3_collect(ctx, ...) \217({ \218struct ir3_instruction *__arr[] = {__VA_ARGS__}; \219ir3_create_collect(ctx, __arr, ARRAY_SIZE(__arr)); \220})221222NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format,223...);224225#define compile_assert(ctx, cond) \226do { \227if (!(cond)) \228ir3_context_error((ctx), "failed assert: " #cond "\n"); \229} while (0)230231struct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx,232struct ir3_instruction *src, int align);233struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,234unsigned const_val);235struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,236struct ir3_instruction *src);237238void ir3_declare_array(struct ir3_context *ctx, nir_register *reg);239struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg);240struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,241struct ir3_array *arr, int n,242struct ir3_instruction *address);243void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,244int n, struct ir3_instruction *src,245struct ir3_instruction *address);246247static inline type_t248utype_for_size(unsigned bit_size)249{250switch (bit_size) {251case 32:252return TYPE_U32;253case 16:254return TYPE_U16;255case 8:256return TYPE_U8;257default:258unreachable("bad bitsize");259return ~0;260}261}262263static inline type_t264utype_src(nir_src src)265{266return utype_for_size(nir_src_bit_size(src));267}268269static inline type_t270utype_dst(nir_dest dst)271{272return utype_for_size(nir_dest_bit_size(dst));273}274275#endif /* IR3_CONTEXT_H_ */276277278