Path: blob/21.2-virgl/src/freedreno/ir3/ir3_ra.h
4565 views
/*1* Copyright (C) 2021 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#ifndef _IR3_RA_H24#define _IR3_RA_H2526#include "util/rb_tree.h"27#include "ir3.h"28#include "ir3_compiler.h"2930#ifdef DEBUG31#define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)32#else33#define RA_DEBUG 034#endif35#define d(fmt, ...) \36do { \37if (RA_DEBUG) { \38printf("RA: " fmt "\n", ##__VA_ARGS__); \39} \40} while (0)4142#define di(instr, fmt, ...) \43do { \44if (RA_DEBUG) { \45printf("RA: " fmt ": ", ##__VA_ARGS__); \46ir3_print_instr(instr); \47} \48} while (0)4950typedef uint16_t physreg_t;5152static inline unsigned53ra_physreg_to_num(physreg_t physreg, unsigned flags)54{55if (!(flags & IR3_REG_HALF))56physreg /= 2;57if (flags & IR3_REG_SHARED)58physreg += 48 * 4;59return physreg;60}6162static inline physreg_t63ra_num_to_physreg(unsigned num, unsigned flags)64{65if (flags & IR3_REG_SHARED)66num -= 48 * 4;67if (!(flags & IR3_REG_HALF))68num *= 2;69return num;70}7172static inline unsigned73ra_reg_get_num(const struct ir3_register *reg)74{75return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;76}7778static inline physreg_t79ra_reg_get_physreg(const struct ir3_register *reg)80{81return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);82}8384static inline bool85def_is_gpr(const struct ir3_register *reg)86{87return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;88}8990/* Note: don't count undef as a source.91*/92static inline bool93ra_reg_is_src(const struct ir3_register *reg)94{95return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);96}9798static inline bool99ra_reg_is_dst(const struct ir3_register *reg)100{101return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&102((reg->flags & IR3_REG_ARRAY) || reg->wrmask);103}104105/* Iterators for sources and destinations which:106* - Don't include fake sources (irrelevant for RA)107* - Don't include non-SSA sources (immediates and constants, also irrelevant)108* - Consider array destinations as both a source and a destination109*/110111#define ra_foreach_src(__srcreg, __instr) \112for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \113for (unsigned __cnt = (__instr)->srcs_count, __i = 0; __i < __cnt; \114__i++) \115if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))116117#define ra_foreach_src_rev(__srcreg, __instr) \118for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \119for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0; \120__i--) \121if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))122123#define ra_foreach_dst(__dstreg, __instr) \124for (struct ir3_register *__dstreg = (void *)~0; __dstreg; __dstreg = NULL) \125for (unsigned __cnt = (__instr)->dsts_count, __i = 0; __i < __cnt; \126__i++) \127if (ra_reg_is_dst((__dstreg = (__instr)->dsts[__i])))128129#define RA_HALF_SIZE (4 * 48)130#define RA_FULL_SIZE (4 * 48 * 2)131#define RA_SHARED_SIZE (2 * 4 * 8)132#define RA_MAX_FILE_SIZE RA_FULL_SIZE133134struct ir3_liveness {135unsigned block_count;136DECLARE_ARRAY(struct ir3_register *, definitions);137DECLARE_ARRAY(BITSET_WORD *, live_out);138DECLARE_ARRAY(BITSET_WORD *, live_in);139};140141struct ir3_liveness *ir3_calc_liveness(struct ir3_shader_variant *v);142143bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,144struct ir3_instruction *instr);145146void ir3_create_parallel_copies(struct ir3 *ir);147148void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);149150struct ir3_pressure {151unsigned full, half, shared;152};153154void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,155struct ir3_pressure *max_pressure);156157void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,158unsigned half_size, unsigned block_count);159160void ir3_lower_copies(struct ir3_shader_variant *v);161162/* Register interval datastructure163*164* ir3_reg_ctx is used to track which registers are live. The tricky part is165* that some registers may overlap each other, when registers with overlapping166* live ranges get coalesced. For example, splits will overlap with their167* parent vector and sometimes collect sources will also overlap with the168* collect'ed vector. ir3_merge_regs guarantees for us that none of the169* registers in a merge set that are live at any given point partially170* overlap, which means that we can organize them into a forest. While each171* register has a per-merge-set offset, ir3_merge_regs also computes a172* "global" offset which allows us to throw away the original merge sets and173* think of registers as just intervals in a forest of live intervals. When a174* register becomes live, we insert it into the forest, and when it dies we175* remove it from the forest (and then its children get moved up a level). We176* use red-black trees to keep track of each level of the forest, so insertion177* and deletion should be fast operations. ir3_reg_ctx handles all the178* internal bookkeeping for this, so that it can be shared between RA,179* spilling, and register pressure tracking.180*/181182struct ir3_reg_interval {183struct rb_node node;184185struct rb_tree children;186187struct ir3_reg_interval *parent;188189struct ir3_register *reg;190191bool inserted;192};193194struct ir3_reg_ctx {195/* The tree of top-level intervals in the forest. */196struct rb_tree intervals;197198/* Users of ir3_reg_ctx need to keep around additional state that is199* modified when top-level intervals are added or removed. For register200* pressure tracking, this is just the register pressure, but for RA we201* need to keep track of the physreg of each top-level interval. These202* callbacks provide a place to let users deriving from ir3_reg_ctx update203* their state when top-level intervals are inserted/removed.204*/205206/* Called when an interval is added and it turns out to be at the top207* level.208*/209void (*interval_add)(struct ir3_reg_ctx *ctx,210struct ir3_reg_interval *interval);211212/* Called when an interval is deleted from the top level. */213void (*interval_delete)(struct ir3_reg_ctx *ctx,214struct ir3_reg_interval *interval);215216/* Called when an interval is deleted and its child becomes top-level.217*/218void (*interval_readd)(struct ir3_reg_ctx *ctx,219struct ir3_reg_interval *parent,220struct ir3_reg_interval *child);221};222223static inline struct ir3_reg_interval *224ir3_rb_node_to_interval(struct rb_node *node)225{226return rb_node_data(struct ir3_reg_interval, node, node);227}228229static inline const struct ir3_reg_interval *230ir3_rb_node_to_interval_const(const struct rb_node *node)231{232return rb_node_data(struct ir3_reg_interval, node, node);233}234235static inline struct ir3_reg_interval *236ir3_reg_interval_next(struct ir3_reg_interval *interval)237{238struct rb_node *next = rb_node_next(&interval->node);239return next ? ir3_rb_node_to_interval(next) : NULL;240}241242static inline struct ir3_reg_interval *243ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)244{245return interval ? ir3_reg_interval_next(interval) : NULL;246}247248static inline void249ir3_reg_interval_init(struct ir3_reg_interval *interval,250struct ir3_register *reg)251{252rb_tree_init(&interval->children);253interval->reg = reg;254interval->parent = NULL;255interval->inserted = false;256}257258void ir3_reg_interval_dump(struct ir3_reg_interval *interval);259260void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,261struct ir3_reg_interval *interval);262263void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,264struct ir3_reg_interval *interval);265266void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,267struct ir3_reg_interval *interval);268269#endif270271272