Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/fd2_program.c
4574 views
/*1* Copyright (C) 2012 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24* Jonathan Marek <[email protected]>25*/2627#include "nir/tgsi_to_nir.h"28#include "pipe/p_state.h"29#include "tgsi/tgsi_dump.h"30#include "tgsi/tgsi_parse.h"31#include "util/format/u_format.h"32#include "util/u_inlines.h"33#include "util/u_memory.h"34#include "util/u_string.h"3536#include "freedreno_program.h"3738#include "ir2/instr-a2xx.h"39#include "fd2_program.h"40#include "fd2_texture.h"41#include "fd2_util.h"42#include "ir2.h"4344static struct fd2_shader_stateobj *45create_shader(struct pipe_context *pctx, gl_shader_stage type)46{47struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);48if (!so)49return NULL;50so->type = type;51so->is_a20x = is_a20x(fd_context(pctx)->screen);52return so;53}5455static void56delete_shader(struct fd2_shader_stateobj *so)57{58if (!so)59return;60ralloc_free(so->nir);61for (int i = 0; i < ARRAY_SIZE(so->variant); i++)62free(so->variant[i].info.dwords);63free(so);64}6566static void67emit(struct fd_ringbuffer *ring, gl_shader_stage type,68struct ir2_shader_info *info, struct util_dynarray *patches)69{70unsigned i;7172assert(info->sizedwords);7374OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);75OUT_RING(ring, type == MESA_SHADER_FRAGMENT);76OUT_RING(ring, info->sizedwords);7778if (patches)79util_dynarray_append(patches, uint32_t *,80&ring->cur[info->mem_export_ptr]);8182for (i = 0; i < info->sizedwords; i++)83OUT_RING(ring, info->dwords[i]);84}8586static int87ir2_glsl_type_size(const struct glsl_type *type, bool bindless)88{89return glsl_count_attribute_slots(type, false);90}9192static void *93fd2_fp_state_create(struct pipe_context *pctx,94const struct pipe_shader_state *cso)95{96struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);97if (!so)98return NULL;99100so->nir = (cso->type == PIPE_SHADER_IR_NIR)101? cso->ir.nir102: tgsi_to_nir(cso->tokens, pctx->screen, false);103104NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,105ir2_glsl_type_size, (nir_lower_io_options)0);106107if (ir2_optimize_nir(so->nir, true))108goto fail;109110so->first_immediate = so->nir->num_uniforms;111112ir2_compile(so, 0, NULL);113114ralloc_free(so->nir);115so->nir = NULL;116return so;117118fail:119delete_shader(so);120return NULL;121}122123static void124fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)125{126struct fd2_shader_stateobj *so = hwcso;127delete_shader(so);128}129130static void *131fd2_vp_state_create(struct pipe_context *pctx,132const struct pipe_shader_state *cso)133{134struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);135if (!so)136return NULL;137138so->nir = (cso->type == PIPE_SHADER_IR_NIR)139? cso->ir.nir140: tgsi_to_nir(cso->tokens, pctx->screen, false);141142NIR_PASS_V(so->nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,143ir2_glsl_type_size, (nir_lower_io_options)0);144145if (ir2_optimize_nir(so->nir, true))146goto fail;147148so->first_immediate = so->nir->num_uniforms;149150/* compile binning variant now */151ir2_compile(so, 0, NULL);152153return so;154155fail:156delete_shader(so);157return NULL;158}159160static void161fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)162{163struct fd2_shader_stateobj *so = hwcso;164delete_shader(so);165}166167static void168patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,169instr_fetch_vtx_t *instr, uint16_t dst_swiz) assert_dt170{171struct surface_format fmt = fd2_pipe2surface(elem->src_format);172173instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);174instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;175instr->num_format_all = fmt.num_format;176instr->format = fmt.format;177instr->exp_adjust_all = fmt.exp_adjust;178instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;179instr->offset = elem->src_offset;180}181182static void183patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,184struct fd_vertex_stateobj *vtx,185struct fd_texture_stateobj *tex) assert_dt186{187for (int i = 0; i < info->num_fetch_instrs; i++) {188struct ir2_fetch_info *fi = &info->fetch_info[i];189190instr_fetch_t *instr = (instr_fetch_t *)&info->dwords[fi->offset];191if (instr->opc == VTX_FETCH) {192unsigned idx =193(instr->vtx.const_index - 20) * 3 + instr->vtx.const_index_sel;194patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);195continue;196}197198assert(instr->opc == TEX_FETCH);199instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);200instr->tex.src_swiz = fi->tex.src_swiz;201}202}203204void205fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,206struct fd_program_stateobj *prog)207{208struct fd2_shader_stateobj *fp = NULL, *vp;209struct ir2_shader_info *fpi, *vpi;210struct ir2_frag_linkage *f;211uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;212enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;213bool binning = (ctx->batch && ring == ctx->batch->binning);214unsigned variant = 0;215216vp = prog->vs;217218/* find variant matching the linked fragment shader */219if (!binning) {220fp = prog->fs;221for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {222/* if checked all variants, compile a new variant */223if (!vp->variant[variant].info.sizedwords) {224ir2_compile(vp, variant, fp);225break;226}227228/* check if fragment shader linkage matches */229if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,230sizeof(struct ir2_frag_linkage)))231break;232}233assert(variant < ARRAY_SIZE(vp->variant));234}235236vpi = &vp->variant[variant].info;237fpi = &fp->variant[0].info;238f = &fp->variant[0].f;239240/* clear/gmem2mem/mem2gmem need to be changed to remove this condition */241if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {242patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);243if (fp)244patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);245}246247emit(ring, MESA_SHADER_VERTEX, vpi,248binning ? &ctx->batch->shader_patches : NULL);249250if (fp) {251emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);252fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;253vs_export = MAX2(1, f->inputs_count) - 1;254}255256vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;257258if (vp->writes_psize && !binning)259mode = POSITION_2_VECTORS_SPRITE;260261/* set register to use for param (fragcoord/pointcoord/frontfacing) */262OUT_PKT3(ring, CP_SET_CONSTANT, 2);263OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));264OUT_RING(ring,265A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |266COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |267/* we need SCREEN_XY for both fragcoord and frontfacing */268A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);269270OUT_PKT3(ring, CP_SET_CONSTANT, 2);271OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));272OUT_RING(ring,273A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |274A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |275A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |276A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |277A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |278A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |279A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |280COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |281COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));282}283284void285fd2_prog_init(struct pipe_context *pctx)286{287struct fd_context *ctx = fd_context(pctx);288struct fd_program_stateobj *prog;289struct fd2_shader_stateobj *so;290struct ir2_shader_info *info;291instr_fetch_vtx_t *instr;292293pctx->create_fs_state = fd2_fp_state_create;294pctx->delete_fs_state = fd2_fp_state_delete;295296pctx->create_vs_state = fd2_vp_state_create;297pctx->delete_vs_state = fd2_vp_state_delete;298299fd_prog_init(pctx);300301/* XXX maybe its possible to reuse patch_vtx_fetch somehow? */302303prog = &ctx->solid_prog;304so = prog->vs;305ir2_compile(prog->vs, 1, prog->fs);306307#define IR2_FETCH_SWIZ_XY01 0xb08308#define IR2_FETCH_SWIZ_XYZ1 0xa88309310info = &so->variant[1].info;311312instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];313instr->const_index = 26;314instr->const_index_sel = 0;315instr->format = FMT_32_32_32_FLOAT;316instr->format_comp_all = false;317instr->stride = 12;318instr->num_format_all = true;319instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;320321prog = &ctx->blit_prog[0];322so = prog->vs;323ir2_compile(prog->vs, 1, prog->fs);324325info = &so->variant[1].info;326327instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[0].offset];328instr->const_index = 26;329instr->const_index_sel = 1;330instr->format = FMT_32_32_FLOAT;331instr->format_comp_all = false;332instr->stride = 8;333instr->num_format_all = false;334instr->dst_swiz = IR2_FETCH_SWIZ_XY01;335336instr = (instr_fetch_vtx_t *)&info->dwords[info->fetch_info[1].offset];337instr->const_index = 26;338instr->const_index_sel = 0;339instr->format = FMT_32_32_32_FLOAT;340instr->format_comp_all = false;341instr->stride = 12;342instr->num_format_all = false;343instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;344}345346347