Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/gpir.h
4574 views
/*1* Copyright (c) 2017 Lima Project2* Copyright (c) 2013 Connor Abbott3*4* Permission is hereby granted, free of charge, to any person obtaining a copy5* of this software and associated documentation files (the "Software"), to deal6* in the Software without restriction, including without limitation the rights7* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8* copies of the Software, and to permit persons to whom the Software is9* furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE17* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN20* THE SOFTWARE.21*22*/2324#ifndef LIMA_IR_GP_GPIR_H25#define LIMA_IR_GP_GPIR_H2627#include "util/list.h"28#include "util/u_math.h"2930#include "ir/lima_ir.h"3132/* list of operations that a node can do. */33typedef enum {34gpir_op_mov,3536/* mul ops */37gpir_op_mul,38gpir_op_select,39gpir_op_complex1,40gpir_op_complex2,4142/* add ops */43gpir_op_add,44gpir_op_floor,45gpir_op_sign,46gpir_op_ge,47gpir_op_lt,48gpir_op_min,49gpir_op_max,50gpir_op_abs,51gpir_op_not,5253/* mul/add ops */54gpir_op_neg,5556/* passthrough ops */57gpir_op_clamp_const,58gpir_op_preexp2,59gpir_op_postlog2,6061/* complex ops */62gpir_op_exp2_impl,63gpir_op_log2_impl,64gpir_op_rcp_impl,65gpir_op_rsqrt_impl,6667/* load/store ops */68gpir_op_load_uniform,69gpir_op_load_temp,70gpir_op_load_attribute,71gpir_op_load_reg,72gpir_op_store_temp,73gpir_op_store_reg,74gpir_op_store_varying,75gpir_op_store_temp_load_off0,76gpir_op_store_temp_load_off1,77gpir_op_store_temp_load_off2,7879/* branch */80gpir_op_branch_cond,8182/* const (emulated) */83gpir_op_const,8485/* emulated ops */86gpir_op_exp2,87gpir_op_log2,88gpir_op_rcp,89gpir_op_rsqrt,90gpir_op_ceil,91gpir_op_exp,92gpir_op_log,93gpir_op_sin,94gpir_op_cos,95gpir_op_tan,96gpir_op_branch_uncond,97gpir_op_eq,98gpir_op_ne,99100/* auxiliary ops */101gpir_op_dummy_f,102gpir_op_dummy_m,103104gpir_op_num,105} gpir_op;106107typedef enum {108gpir_node_type_alu,109gpir_node_type_const,110gpir_node_type_load,111gpir_node_type_store,112gpir_node_type_branch,113} gpir_node_type;114115typedef struct {116char *name;117bool dest_neg;118bool src_neg[4];119int *slots;120gpir_node_type type;121bool spillless;122bool schedule_first;123bool may_consume_two_slots;124} gpir_op_info;125126extern const gpir_op_info gpir_op_infos[];127128typedef struct {129enum {130GPIR_DEP_INPUT, /* def is the input of use */131GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */132GPIR_DEP_READ_AFTER_WRITE,133GPIR_DEP_WRITE_AFTER_READ,134} type;135136/* node execute before succ */137struct gpir_node *pred;138/* node execute after pred */139struct gpir_node *succ;140141/* for node pred_list */142struct list_head pred_link;143/* for ndoe succ_list */144struct list_head succ_link;145} gpir_dep;146147struct gpir_instr;148struct gpir_store_node;149150typedef struct gpir_node {151struct list_head list;152gpir_op op;153gpir_node_type type;154int index;155char name[16];156bool printed;157struct gpir_block *block;158159/* for nodes relationship */160/* for node who uses this node (successor) */161struct list_head succ_list;162/* for node this node uses (predecessor) */163struct list_head pred_list;164165/* for scheduler and regalloc */166int value_reg;167union {168struct {169struct gpir_instr *instr;170struct gpir_store_node *physreg_store;171int pos;172int dist;173int index;174bool ready;175bool inserted;176bool max_node, next_max_node;177bool complex_allowed;178} sched;179struct {180int parent_index;181float reg_pressure;182int est;183bool scheduled;184} rsched;185struct {186float index;187struct gpir_node *last;188} vreg;189struct {190int index;191} preg;192};193} gpir_node;194195typedef struct {196gpir_node node;197198gpir_node *children[3];199bool children_negate[3];200int num_child;201202bool dest_negate;203} gpir_alu_node;204205typedef struct {206gpir_node node;207union fi value;208} gpir_const_node;209210typedef struct {211int index;212struct list_head list;213} gpir_reg;214215typedef struct {216gpir_node node;217218unsigned index;219unsigned component;220221gpir_reg *reg;222struct list_head reg_link;223} gpir_load_node;224225typedef struct gpir_store_node {226gpir_node node;227228unsigned index;229unsigned component;230gpir_node *child;231232gpir_reg *reg;233} gpir_store_node;234235enum gpir_instr_slot {236GPIR_INSTR_SLOT_MUL0,237GPIR_INSTR_SLOT_MUL1,238GPIR_INSTR_SLOT_ADD0,239GPIR_INSTR_SLOT_ADD1,240GPIR_INSTR_SLOT_PASS,241GPIR_INSTR_SLOT_COMPLEX,242GPIR_INSTR_SLOT_REG0_LOAD0,243GPIR_INSTR_SLOT_REG0_LOAD1,244GPIR_INSTR_SLOT_REG0_LOAD2,245GPIR_INSTR_SLOT_REG0_LOAD3,246GPIR_INSTR_SLOT_REG1_LOAD0,247GPIR_INSTR_SLOT_REG1_LOAD1,248GPIR_INSTR_SLOT_REG1_LOAD2,249GPIR_INSTR_SLOT_REG1_LOAD3,250GPIR_INSTR_SLOT_MEM_LOAD0,251GPIR_INSTR_SLOT_MEM_LOAD1,252GPIR_INSTR_SLOT_MEM_LOAD2,253GPIR_INSTR_SLOT_MEM_LOAD3,254GPIR_INSTR_SLOT_STORE0,255GPIR_INSTR_SLOT_STORE1,256GPIR_INSTR_SLOT_STORE2,257GPIR_INSTR_SLOT_STORE3,258GPIR_INSTR_SLOT_NUM,259GPIR_INSTR_SLOT_END,260GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0,261GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX,262GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,263GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS,264};265266typedef struct gpir_instr {267int index;268struct list_head list;269270gpir_node *slots[GPIR_INSTR_SLOT_NUM];271272/* The number of ALU slots free for moves. */273int alu_num_slot_free;274275/* The number of ALU slots free for moves, except for the complex slot. */276int alu_non_cplx_slot_free;277278/* We need to make sure that we can insert moves in the following cases:279* (1) There was a use of a value two cycles ago.280* (2) There were more than 5 uses of a value 1 cycle ago (or else we can't281* possibly satisfy (1) for the next cycle).282* (3) There is a store instruction scheduled, but not its child.283*284* The complex slot cannot be used for a move in case (1), since it only285* has a FIFO depth of 1, but it can be used for (2) as well as (3) as long286* as the uses aren't in certain slots. It turns out that we don't have to287* worry about nodes that can't use the complex slot for (2), since there288* are at most 4 uses 1 cycle ago that can't use the complex slot, but we289* do have to worry about (3). This means tracking stores whose children290* cannot be in the complex slot. In order to ensure that we have enough291* space for all three, we maintain the following invariants:292*293* (1) alu_num_slot_free >= alu_num_slot_needed_by_store +294* alu_num_slot_needed_by_max +295* max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)296* (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +297* alu_num_slot_needed_by_non_cplx_store298*299* alu_max_allowed_next_max is normally 5 (since there can be at most 5 max300* nodes for the next instruction) but when there is a complex1 node in301* this instruction it reduces to 4 to reserve a slot for complex2 in the302* next instruction.303*/304int alu_num_slot_needed_by_store;305int alu_num_slot_needed_by_non_cplx_store;306int alu_num_slot_needed_by_max;307int alu_num_unscheduled_next_max;308int alu_max_allowed_next_max;309310/* Used to communicate to the scheduler how many slots need to be cleared311* up in order to satisfy the invariants.312*/313int slot_difference;314int non_cplx_slot_difference;315316int reg0_use_count;317bool reg0_is_attr;318int reg0_index;319320int reg1_use_count;321int reg1_index;322323int mem_use_count;324bool mem_is_temp;325int mem_index;326327enum {328GPIR_INSTR_STORE_NONE,329GPIR_INSTR_STORE_VARYING,330GPIR_INSTR_STORE_REG,331GPIR_INSTR_STORE_TEMP,332} store_content[2];333int store_index[2];334} gpir_instr;335336typedef struct gpir_block {337struct list_head list;338struct list_head node_list;339struct list_head instr_list;340struct gpir_compiler *comp;341342struct gpir_block *successors[2];343struct list_head predecessors;344struct list_head predecessors_node;345346/* for regalloc */347348/* The set of live registers, i.e. registers whose value may be used349* eventually, at the beginning of the block.350*/351BITSET_WORD *live_in;352353/* Set of live registers at the end of the block. */354BITSET_WORD *live_out;355356/* Set of registers that may have a value defined at the end of the357* block.358*/359BITSET_WORD *def_out;360361/* After register allocation, the set of live physical registers at the end362* of the block. Needed for scheduling.363*/364uint64_t live_out_phys;365366/* For codegen, the offset in the final program. */367unsigned instr_offset;368369/* for scheduler */370union {371struct {372int instr_index;373} sched;374struct {375int node_index;376} rsched;377};378} gpir_block;379380typedef struct {381gpir_node node;382gpir_block *dest;383gpir_node *cond;384} gpir_branch_node;385386struct lima_vs_compiled_shader;387388#define GPIR_VECTOR_SSA_VIEWPORT_SCALE 0389#define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1390#define GPIR_VECTOR_SSA_NUM 2391392typedef struct gpir_compiler {393struct list_head block_list;394int cur_index;395396/* Find the gpir node for a given NIR SSA def. */397gpir_node **node_for_ssa;398399/* Find the gpir node for a given NIR register. */400gpir_node **node_for_reg;401402/* Find the gpir register for a given NIR SSA def. */403gpir_reg **reg_for_ssa;404405/* Find the gpir register for a given NIR register. */406gpir_reg **reg_for_reg;407408/* gpir block for NIR block. */409gpir_block **blocks;410411/* for physical reg */412struct list_head reg_list;413int cur_reg;414415/* lookup for vector ssa */416struct {417int ssa;418gpir_node *nodes[4];419} vector_ssa[GPIR_VECTOR_SSA_NUM];420421struct lima_vs_compiled_shader *prog;422int constant_base;423424/* shaderdb */425int num_instr;426int num_loops;427int num_spills;428int num_fills;429} gpir_compiler;430431#define GPIR_VALUE_REG_NUM 11432#define GPIR_PHYSICAL_REG_NUM 64433434void *gpir_node_create(gpir_block *block, gpir_op op);435gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);436void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);437void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);438void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);439void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);440void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);441void gpir_node_delete(gpir_node *node);442void gpir_node_print_prog_dep(gpir_compiler *comp);443void gpir_node_print_prog_seq(gpir_compiler *comp);444445#define gpir_node_foreach_succ(node, dep) \446list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)447#define gpir_node_foreach_succ_safe(node, dep) \448list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)449#define gpir_node_foreach_pred(node, dep) \450list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)451#define gpir_node_foreach_pred_safe(node, dep) \452list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)453454static inline bool gpir_node_is_root(gpir_node *node)455{456return list_is_empty(&node->succ_list);457}458459static inline bool gpir_node_is_leaf(gpir_node *node)460{461return list_is_empty(&node->pred_list);462}463464#define gpir_node_to_alu(node) ((gpir_alu_node *)(node))465#define gpir_node_to_const(node) ((gpir_const_node *)(node))466#define gpir_node_to_load(node) ((gpir_load_node *)(node))467#define gpir_node_to_store(node) ((gpir_store_node *)(node))468#define gpir_node_to_branch(node) ((gpir_branch_node *)(node))469470gpir_instr *gpir_instr_create(gpir_block *block);471bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);472void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);473void gpir_instr_print_prog(gpir_compiler *comp);474475bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);476477bool gpir_optimize(gpir_compiler *comp);478bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);479bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);480bool gpir_regalloc_prog(gpir_compiler *comp);481bool gpir_schedule_prog(gpir_compiler *comp);482bool gpir_codegen_prog(gpir_compiler *comp);483484gpir_reg *gpir_create_reg(gpir_compiler *comp);485486#endif487488489