Path: blob/21.2-virgl/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h
4570 views
/*1* Copyright (c) 2020 Etnaviv Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Jonathan Marek <[email protected]>24*/2526#ifndef H_ETNAVIV_COMPILER_NIR27#define H_ETNAVIV_COMPILER_NIR2829#include "compiler/nir/nir.h"30#include "etnaviv_asm.h"31#include "etnaviv_compiler.h"32#include "util/compiler.h"3334struct etna_compile {35nir_shader *nir;36nir_function_impl *impl;37#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)38const struct etna_specs *specs;39struct etna_shader_variant *variant;4041/* block # to instr index */42unsigned *block_ptr;4344/* Code generation */45int inst_ptr; /* current instruction pointer */46struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];4748/* constants */49uint64_t consts[ETNA_MAX_IMM];50unsigned const_count;5152/* ra state */53struct ra_graph *g;54unsigned *live_map;55unsigned num_nodes;5657/* There was an error during compilation */58bool error;59};6061#define compile_error(ctx, args...) ({ \62printf(args); \63ctx->error = true; \64assert(0); \65})6667enum {68BYPASS_DST = 1,69BYPASS_SRC = 2,70};7172static inline bool is_sysval(nir_instr *instr)73{74if (instr->type != nir_instr_type_intrinsic)75return false;7677nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);78return intr->intrinsic == nir_intrinsic_load_front_face ||79intr->intrinsic == nir_intrinsic_load_frag_coord;80}8182/* get unique ssa/reg index for nir_src */83static inline unsigned84src_index(nir_function_impl *impl, nir_src *src)85{86return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc);87}8889/* get unique ssa/reg index for nir_dest */90static inline unsigned91dest_index(nir_function_impl *impl, nir_dest *dest)92{93return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc);94}9596static inline void97update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask)98{99if (!swiz)100return;101102bool is_vec = dest != NULL;103unsigned swizzle = 0, write_mask = 0;104for (unsigned i = 0; i < 4; i++) {105/* channel not written */106if (!(alu->dest.write_mask & (1 << i)))107continue;108/* src is different (only check for vecN) */109if (is_vec && alu->src[i].src.ssa != &dest->ssa)110continue;111112unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i];113swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2;114/* this channel isn't written through this chain */115if (*mask & (1 << src_swiz))116write_mask |= 1 << i;117}118*swiz = swizzle;119*mask = write_mask;120}121122static nir_dest *123real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask)124{125if (!dest || !dest->is_ssa)126return dest;127128bool can_bypass_src = !list_length(&dest->ssa.if_uses);129nir_instr *p_instr = dest->ssa.parent_instr;130131/* if used by a vecN, the "real" destination becomes the vecN destination132* lower_alu guarantees that values used by a vecN are only used by that vecN133* we can apply the same logic to movs in a some cases too134*/135nir_foreach_use(use_src, &dest->ssa) {136nir_instr *instr = use_src->parent_instr;137138/* src bypass check: for now only deal with tex src mov case139* note: for alu don't bypass mov for multiple uniform sources140*/141switch (instr->type) {142case nir_instr_type_tex:143if (p_instr->type == nir_instr_type_alu &&144nir_instr_as_alu(p_instr)->op == nir_op_mov) {145break;146}147FALLTHROUGH;148default:149can_bypass_src = false;150break;151}152153if (instr->type != nir_instr_type_alu)154continue;155156nir_alu_instr *alu = nir_instr_as_alu(instr);157158switch (alu->op) {159case nir_op_vec2:160case nir_op_vec3:161case nir_op_vec4:162assert(list_length(&dest->ssa.if_uses) == 0);163nir_foreach_use(use_src, &dest->ssa)164assert(use_src->parent_instr == instr);165166update_swiz_mask(alu, dest, swiz, mask);167break;168case nir_op_mov: {169switch (dest->ssa.parent_instr->type) {170case nir_instr_type_alu:171case nir_instr_type_tex:172break;173default:174continue;175}176if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1)177continue;178179update_swiz_mask(alu, NULL, swiz, mask);180break;181};182default:183continue;184}185186assert(!(instr->pass_flags & BYPASS_SRC));187instr->pass_flags |= BYPASS_DST;188return real_dest(&alu->dest.dest, swiz, mask);189}190191if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) {192p_instr->pass_flags |= BYPASS_SRC;193return NULL;194}195196return dest;197}198199/* if instruction dest needs a register, return nir_dest for it */200static inline nir_dest *201dest_for_instr(nir_instr *instr)202{203nir_dest *dest = NULL;204205switch (instr->type) {206case nir_instr_type_alu:207dest = &nir_instr_as_alu(instr)->dest.dest;208break;209case nir_instr_type_tex:210dest = &nir_instr_as_tex(instr)->dest;211break;212case nir_instr_type_intrinsic: {213nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);214if (intr->intrinsic == nir_intrinsic_load_uniform ||215intr->intrinsic == nir_intrinsic_load_ubo ||216intr->intrinsic == nir_intrinsic_load_input ||217intr->intrinsic == nir_intrinsic_load_instance_id ||218intr->intrinsic == nir_intrinsic_load_texture_rect_scaling)219dest = &intr->dest;220} break;221case nir_instr_type_deref:222return NULL;223default:224break;225}226return real_dest(dest, NULL, NULL);227}228229struct live_def {230nir_instr *instr;231nir_dest *dest; /* cached dest_for_instr */232unsigned live_start, live_end; /* live range */233};234235unsigned236etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map);237238/* Swizzles and write masks can be used to layer virtual non-interfering239* registers on top of the real VEC4 registers. For example, the virtual240* VEC3_XYZ register and the virtual SCALAR_W register that use the same241* physical VEC4 base register do not interfere.242*/243enum reg_class {244REG_CLASS_VIRT_SCALAR,245REG_CLASS_VIRT_VEC2,246REG_CLASS_VIRT_VEC3,247REG_CLASS_VEC4,248/* special vec2 class for fast transcendentals, limited to XY or ZW */249REG_CLASS_VIRT_VEC2T,250/* special classes for LOAD - contiguous components */251REG_CLASS_VIRT_VEC2C,252REG_CLASS_VIRT_VEC3C,253NUM_REG_CLASSES,254};255256enum reg_type {257REG_TYPE_VEC4,258REG_TYPE_VIRT_VEC3_XYZ,259REG_TYPE_VIRT_VEC3_XYW,260REG_TYPE_VIRT_VEC3_XZW,261REG_TYPE_VIRT_VEC3_YZW,262REG_TYPE_VIRT_VEC2_XY,263REG_TYPE_VIRT_VEC2_XZ,264REG_TYPE_VIRT_VEC2_XW,265REG_TYPE_VIRT_VEC2_YZ,266REG_TYPE_VIRT_VEC2_YW,267REG_TYPE_VIRT_VEC2_ZW,268REG_TYPE_VIRT_SCALAR_X,269REG_TYPE_VIRT_SCALAR_Y,270REG_TYPE_VIRT_SCALAR_Z,271REG_TYPE_VIRT_SCALAR_W,272REG_TYPE_VIRT_VEC2T_XY,273REG_TYPE_VIRT_VEC2T_ZW,274REG_TYPE_VIRT_VEC2C_XY,275REG_TYPE_VIRT_VEC2C_YZ,276REG_TYPE_VIRT_VEC2C_ZW,277REG_TYPE_VIRT_VEC3C_XYZ,278REG_TYPE_VIRT_VEC3C_YZW,279NUM_REG_TYPES,280};281282/* writemask when used as dest */283static const uint8_t284reg_writemask[NUM_REG_TYPES] = {285[REG_TYPE_VEC4] = 0xf,286[REG_TYPE_VIRT_SCALAR_X] = 0x1,287[REG_TYPE_VIRT_SCALAR_Y] = 0x2,288[REG_TYPE_VIRT_VEC2_XY] = 0x3,289[REG_TYPE_VIRT_VEC2T_XY] = 0x3,290[REG_TYPE_VIRT_VEC2C_XY] = 0x3,291[REG_TYPE_VIRT_SCALAR_Z] = 0x4,292[REG_TYPE_VIRT_VEC2_XZ] = 0x5,293[REG_TYPE_VIRT_VEC2_YZ] = 0x6,294[REG_TYPE_VIRT_VEC2C_YZ] = 0x6,295[REG_TYPE_VIRT_VEC3_XYZ] = 0x7,296[REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,297[REG_TYPE_VIRT_SCALAR_W] = 0x8,298[REG_TYPE_VIRT_VEC2_XW] = 0x9,299[REG_TYPE_VIRT_VEC2_YW] = 0xa,300[REG_TYPE_VIRT_VEC3_XYW] = 0xb,301[REG_TYPE_VIRT_VEC2_ZW] = 0xc,302[REG_TYPE_VIRT_VEC2T_ZW] = 0xc,303[REG_TYPE_VIRT_VEC2C_ZW] = 0xc,304[REG_TYPE_VIRT_VEC3_XZW] = 0xd,305[REG_TYPE_VIRT_VEC3_YZW] = 0xe,306[REG_TYPE_VIRT_VEC3C_YZW] = 0xe,307};308309static inline int reg_get_type(int virt_reg)310{311return virt_reg % NUM_REG_TYPES;312}313314static inline int reg_get_base(struct etna_compile *c, int virt_reg)315{316/* offset by 1 to avoid reserved position register */317if (c->nir->info.stage == MESA_SHADER_FRAGMENT)318return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;319return virt_reg / NUM_REG_TYPES;320}321322struct ra_regs *323etna_ra_setup(void *mem_ctx);324325void326etna_ra_assign(struct etna_compile *c, nir_shader *shader);327328unsigned329etna_ra_finish(struct etna_compile *c);330331static inline void332emit_inst(struct etna_compile *c, struct etna_inst *inst)333{334c->code[c->inst_ptr++] = *inst;335}336337void338etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,339struct etna_inst_src src[3], bool saturate);340341void342etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,343struct etna_inst_dst dst, struct etna_inst_src coord,344struct etna_inst_src lod_bias, struct etna_inst_src compare);345346void347etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition);348349void350etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition);351352#endif353354355