Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/ir2_private.h
4574 views
/*1* Copyright (C) 2018 Jonathan Marek <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Jonathan Marek <[email protected]>24*/2526#include <assert.h>27#include <stdint.h>28#include <stdio.h>29#include <stdlib.h>30#include <string.h>3132#include "ir2/instr-a2xx.h"33#include "fd2_program.h"34#include "ir2.h"3536enum ir2_src_type {37IR2_SRC_SSA,38IR2_SRC_REG,39IR2_SRC_INPUT,40IR2_SRC_CONST,41};4243struct ir2_src {44/* num can mean different things45* ssa: index of instruction46* reg: index in ctx->reg array47* input: index in ctx->input array48* const: constant index (C0, C1, etc)49*/50uint16_t num;51uint8_t swizzle;52enum ir2_src_type type : 2;53uint8_t abs : 1;54uint8_t negate : 1;55uint8_t : 4;56};5758struct ir2_reg_component {59uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */60bool alloc : 1; /* is it currently allocated */61uint8_t ref_count; /* for ra */62};6364struct ir2_reg {65uint8_t idx; /* assigned hardware register */66uint8_t ncomp;6768uint8_t loop_depth;69bool initialized;70/* block_idx to free on (-1 = free on ref_count==0) */71int block_idx_free;72struct ir2_reg_component comp[4];73};7475struct ir2_instr {76unsigned idx;7778unsigned block_idx;7980enum {81IR2_NONE,82IR2_FETCH,83IR2_ALU,84IR2_CF,85} type : 2;8687/* instruction needs to be emitted (for scheduling) */88bool need_emit : 1;8990/* predicate value - (usually) same for entire block */91uint8_t pred : 2;9293/* src */94uint8_t src_count;95struct ir2_src src[4];9697/* dst */98bool is_ssa;99union {100struct ir2_reg ssa;101struct ir2_reg *reg;102};103104/* type-specific */105union {106struct {107instr_fetch_opc_t opc : 5;108union {109struct {110uint8_t const_idx;111uint8_t const_idx_sel;112} vtx;113struct {114bool is_cube : 1;115bool is_rect : 1;116uint8_t samp_id;117} tex;118};119} fetch;120struct {121/* store possible opcs, then we can choose vector/scalar instr */122instr_scalar_opc_t scalar_opc : 6;123instr_vector_opc_t vector_opc : 5;124/* same as nir */125uint8_t write_mask : 4;126bool saturate : 1;127128/* export idx (-1 no export) */129int8_t export;130131/* for scalarized 2 src instruction */132uint8_t src1_swizzle;133} alu;134struct {135/* jmp dst block_idx */136uint8_t block_idx;137} cf;138};139};140141struct ir2_sched_instr {142uint32_t reg_state[8];143struct ir2_instr *instr, *instr_s;144};145146struct ir2_context {147struct fd2_shader_stateobj *so;148149unsigned block_idx, pred_idx;150uint8_t pred;151bool block_has_jump[64];152153unsigned loop_last_block[64];154unsigned loop_depth;155156nir_shader *nir;157158/* ssa index of position output */159struct ir2_src position;160161/* to translate SSA ids to instruction ids */162int16_t ssa_map[1024];163164struct ir2_shader_info *info;165struct ir2_frag_linkage *f;166167int prev_export;168169/* RA state */170struct ir2_reg *live_regs[64];171uint32_t reg_state[256 / 32]; /* 64*4 bits */172173/* inputs */174struct ir2_reg input[16 + 1]; /* 16 + param */175176/* non-ssa regs */177struct ir2_reg reg[64];178unsigned reg_count;179180struct ir2_instr instr[0x300];181unsigned instr_count;182183struct ir2_sched_instr instr_sched[0x180];184unsigned instr_sched_count;185};186187void assemble(struct ir2_context *ctx, bool binning);188189void ir2_nir_compile(struct ir2_context *ctx, bool binning);190bool ir2_nir_lower_scalar(nir_shader *shader);191192void ra_count_refs(struct ir2_context *ctx);193void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,194bool export, uint8_t export_writemask);195void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);196void ra_block_free(struct ir2_context *ctx, unsigned block);197198void cp_src(struct ir2_context *ctx);199void cp_export(struct ir2_context *ctx);200201/* utils */202enum {203IR2_SWIZZLE_Y = 1 << 0,204IR2_SWIZZLE_Z = 2 << 0,205IR2_SWIZZLE_W = 3 << 0,206207IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,208209IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,210211IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,212IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,213IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,214IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,215IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,216IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,217IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,218IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,219};220221#define compile_error(ctx, args...) \222({ \223printf(args); \224assert(0); \225})226227static inline struct ir2_src228ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)229{230return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};231}232233/* ir2_assemble uses it .. */234struct ir2_src ir2_zero(struct ir2_context *ctx);235236#define ir2_foreach_instr(it, ctx) \237for (struct ir2_instr *it = (ctx)->instr; ({ \238while (it != &(ctx)->instr[(ctx)->instr_count] && \239it->type == IR2_NONE) \240it++; \241it != &(ctx)->instr[(ctx)->instr_count]; \242}); \243it++)244245#define ir2_foreach_live_reg(it, ctx) \246for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \247while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \248__ptr++; \249__ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \250}); \251it++)252253#define ir2_foreach_avail(it) \254for (struct ir2_instr **__instrp = avail, *it; \255it = *__instrp, __instrp != &avail[avail_count]; __instrp++)256257#define ir2_foreach_src(it, instr) \258for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \259it++)260261/* mask for register allocation262* 64 registers with 4 components each = 256 bits263*/264/* typedef struct {265uint64_t data[4];266} regmask_t; */267268static inline bool269mask_isset(uint32_t *mask, unsigned num)270{271return !!(mask[num / 32] & 1 << num % 32);272}273274static inline void275mask_set(uint32_t *mask, unsigned num)276{277mask[num / 32] |= 1 << num % 32;278}279280static inline void281mask_unset(uint32_t *mask, unsigned num)282{283mask[num / 32] &= ~(1 << num % 32);284}285286static inline unsigned287mask_reg(uint32_t *mask, unsigned num)288{289return mask[num / 8] >> num % 8 * 4 & 0xf;290}291292static inline bool293is_export(struct ir2_instr *instr)294{295return instr->type == IR2_ALU && instr->alu.export >= 0;296}297298static inline instr_alloc_type_t299export_buf(unsigned num)300{301return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;302}303304/* component c for channel i */305static inline unsigned306swiz_set(unsigned c, unsigned i)307{308return ((c - i) & 3) << i * 2;309}310311/* get swizzle in channel i */312static inline unsigned313swiz_get(unsigned swiz, unsigned i)314{315return ((swiz >> i * 2) + i) & 3;316}317318static inline unsigned319swiz_merge(unsigned swiz0, unsigned swiz1)320{321unsigned swiz = 0;322for (int i = 0; i < 4; i++)323swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);324return swiz;325}326327static inline void328swiz_merge_p(uint8_t *swiz0, unsigned swiz1)329{330unsigned swiz = 0;331for (int i = 0; i < 4; i++)332swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);333*swiz0 = swiz;334}335336static inline struct ir2_reg *337get_reg(struct ir2_instr *instr)338{339return instr->is_ssa ? &instr->ssa : instr->reg;340}341342static inline struct ir2_reg *343get_reg_src(struct ir2_context *ctx, struct ir2_src *src)344{345switch (src->type) {346case IR2_SRC_INPUT:347return &ctx->input[src->num];348case IR2_SRC_SSA:349return &ctx->instr[src->num].ssa;350case IR2_SRC_REG:351return &ctx->reg[src->num];352default:353return NULL;354}355}356357/* gets a ncomp value for the dst */358static inline unsigned359dst_ncomp(struct ir2_instr *instr)360{361if (instr->is_ssa)362return instr->ssa.ncomp;363364if (instr->type == IR2_FETCH)365return instr->reg->ncomp;366367assert(instr->type == IR2_ALU);368369unsigned ncomp = 0;370for (int i = 0; i < instr->reg->ncomp; i++)371ncomp += !!(instr->alu.write_mask & 1 << i);372return ncomp;373}374375/* gets a ncomp value for the src registers */376static inline unsigned377src_ncomp(struct ir2_instr *instr)378{379if (instr->type == IR2_FETCH) {380switch (instr->fetch.opc) {381case VTX_FETCH:382return 1;383case TEX_FETCH:384return instr->fetch.tex.is_cube ? 3 : 2;385case TEX_SET_TEX_LOD:386return 1;387default:388assert(0);389}390}391392switch (instr->alu.scalar_opc) {393case PRED_SETEs ... KILLONEs:394return 1;395default:396break;397}398399switch (instr->alu.vector_opc) {400case DOT2ADDv:401return 2;402case DOT3v:403return 3;404case DOT4v:405case CUBEv:406case PRED_SETE_PUSHv:407return 4;408default:409return dst_ncomp(instr);410}411}412413414