Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
4574 views
/*1* Copyright (C) 2013 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "pipe/p_state.h"27#include "util/format/u_format.h"28#include "util/u_inlines.h"29#include "util/u_memory.h"30#include "util/u_string.h"3132#include "freedreno_draw.h"33#include "freedreno_resource.h"34#include "freedreno_state.h"3536#include "fd3_context.h"37#include "fd3_emit.h"38#include "fd3_format.h"39#include "fd3_gmem.h"40#include "fd3_program.h"41#include "fd3_zsa.h"4243static void44fd3_gmem_emit_set_prog(struct fd_context *ctx, struct fd3_emit *emit,45struct fd_program_stateobj *prog)46{47emit->skip_consts = true;48emit->key.vs = prog->vs;49emit->key.fs = prog->fs;50emit->prog = fd3_program_state(51ir3_cache_lookup(ctx->shader_cache, &emit->key, &ctx->debug));52/* reset the fd3_emit_get_*p cache */53emit->vs = NULL;54emit->fs = NULL;55}5657static void58emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,59struct pipe_surface **bufs, const uint32_t *bases, uint32_t bin_w,60bool decode_srgb)61{62enum a3xx_tile_mode tile_mode;63unsigned i;6465for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {66enum pipe_format pformat = 0;67enum a3xx_color_fmt format = 0;68enum a3xx_color_swap swap = WZYX;69bool srgb = false;70struct fd_resource *rsc = NULL;71uint32_t stride = 0;72uint32_t base = 0;73uint32_t offset = 0;7475if (bin_w) {76tile_mode = TILE_32X32;77} else {78tile_mode = LINEAR;79}8081if ((i < nr_bufs) && bufs[i]) {82struct pipe_surface *psurf = bufs[i];8384rsc = fd_resource(psurf->texture);85pformat = psurf->format;86/* In case we're drawing to Z32F_S8, the "color" actually goes to87* the stencil88*/89if (rsc->stencil) {90rsc = rsc->stencil;91pformat = rsc->b.b.format;92if (bases)93bases++;94}95format = fd3_pipe2color(pformat);96if (decode_srgb)97srgb = util_format_is_srgb(pformat);98else99pformat = util_format_linear(pformat);100101debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);102103offset = fd_resource_offset(rsc, psurf->u.tex.level,104psurf->u.tex.first_layer);105swap = rsc->layout.tile_mode ? WZYX : fd3_pipe2swap(pformat);106107if (bin_w) {108stride = bin_w << fdl_cpp_shift(&rsc->layout);109110if (bases) {111base = bases[i];112}113} else {114stride = fd_resource_pitch(rsc, psurf->u.tex.level);115tile_mode = rsc->layout.tile_mode;116}117} else if (i < nr_bufs && bases) {118base = bases[i];119}120121OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);122OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |123A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |124A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |125A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |126COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));127if (bin_w || (i >= nr_bufs) || !bufs[i]) {128OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));129} else {130OUT_RELOC(ring, rsc->bo, offset, 0, -1);131}132133OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);134OUT_RING(ring, COND((i < nr_bufs) && bufs[i],135A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(136fd3_fs_output_format(pformat))));137}138}139140static bool141use_hw_binning(struct fd_batch *batch)142{143const struct fd_gmem_stateobj *gmem = batch->gmem_state;144145/* workaround: combining scissor optimization and hw binning146* seems problematic. Seems like we end up with a mismatch147* between binning pass and rendering pass, wrt. where the hw148* thinks the vertices belong. And the blob driver doesn't149* seem to implement anything like scissor optimization, so150* not entirely sure what I might be missing.151*152* But scissor optimization is mainly for window managers,153* which don't have many vertices (and therefore doesn't154* benefit much from binning pass).155*156* So for now just disable binning if scissor optimization is157* used.158*/159if (gmem->minx || gmem->miny)160return false;161162if ((gmem->maxpw * gmem->maxph) > 32)163return false;164165if ((gmem->maxpw > 15) || (gmem->maxph > 15))166return false;167168return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);169}170171/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */172static void update_vsc_pipe(struct fd_batch *batch);173static void174emit_binning_workaround(struct fd_batch *batch) assert_dt175{176struct fd_context *ctx = batch->ctx;177const struct fd_gmem_stateobj *gmem = batch->gmem_state;178struct fd_ringbuffer *ring = batch->gmem;179struct fd3_emit emit = {180.debug = &ctx->debug,181.vtx = &ctx->solid_vbuf_state,182.key =183{184.vs = ctx->solid_prog.vs,185.fs = ctx->solid_prog.fs,186},187};188189fd3_gmem_emit_set_prog(ctx, &emit, &ctx->solid_prog);190191OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);192OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |193A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |194A3XX_RB_MODE_CONTROL_MRT(0));195OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |196A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |197A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));198199OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);200OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |201A3XX_RB_COPY_CONTROL_MODE(0) |202A3XX_RB_COPY_CONTROL_GMEM_BASE(0));203OUT_RELOC(ring, fd_resource(ctx->solid_vbuf)->bo, 0x20, 0,204-1); /* RB_COPY_DEST_BASE */205OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));206OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |207A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |208A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |209A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |210A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));211212OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);213OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |214A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |215A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));216217fd3_program_emit(ring, &emit, 0, NULL);218fd3_emit_vertex_bufs(ring, &emit);219220OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);221OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |222A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |223A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |224A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);225OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |226A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);227OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));228OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */229230OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);231OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |232A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));233234OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);235OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |236A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |237A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));238239OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);240OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));241242OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);243OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |244A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |245A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |246A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |247A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |248A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |249A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |250A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));251252OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);253OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));254255OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);256OUT_RING(ring, 0); /* VFD_INDEX_MIN */257OUT_RING(ring, 2); /* VFD_INDEX_MAX */258OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */259OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */260261OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);262OUT_RING(ring,263A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |264A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |265A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |266A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);267268OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);269OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |270A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));271OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |272A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));273274OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);275OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |276A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));277OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |278A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));279280fd_wfi(batch, ring);281OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);282OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));283OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));284OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));285OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));286OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));287OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));288289OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);290OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |291A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |292A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |293A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |294A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);295296OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);297OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |298A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));299300OUT_PKT3(ring, CP_DRAW_INDX_2, 5);301OUT_RING(ring, 0x00000000); /* viz query info. */302OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE, INDEX_SIZE_32_BIT,303IGNORE_VISIBILITY, 0));304OUT_RING(ring, 2); /* NumIndices */305OUT_RING(ring, 2);306OUT_RING(ring, 1);307fd_reset_wfi(batch);308309OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);310OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));311312OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);313OUT_RING(ring, 0x00000000);314315fd_wfi(batch, ring);316OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);317OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |318A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));319320OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);321OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |322A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |323A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));324325OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);326OUT_RING(ring, 0x00000000);327}328329/* transfer from gmem to system memory (ie. normal RAM) */330331static void332emit_gmem2mem_surf(struct fd_batch *batch,333enum adreno_rb_copy_control_mode mode, bool stencil,334uint32_t base, struct pipe_surface *psurf)335{336struct fd_ringbuffer *ring = batch->gmem;337struct fd_resource *rsc = fd_resource(psurf->texture);338enum pipe_format format = psurf->format;339340if (!rsc->valid)341return;342343if (stencil) {344rsc = rsc->stencil;345format = rsc->b.b.format;346}347348uint32_t offset =349fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);350uint32_t pitch = fd_resource_pitch(rsc, psurf->u.tex.level);351352debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);353354OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);355OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |356A3XX_RB_COPY_CONTROL_MODE(mode) |357A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |358COND(format == PIPE_FORMAT_Z32_FLOAT ||359format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,360A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE));361362OUT_RELOC(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */363OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(pitch));364OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(rsc->layout.tile_mode) |365A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |366A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |367A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |368A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));369370fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,371DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);372}373374static void375fd3_emit_tile_gmem2mem(struct fd_batch *batch,376const struct fd_tile *tile) assert_dt377{378struct fd_context *ctx = batch->ctx;379struct fd_ringbuffer *ring = batch->gmem;380const struct fd_gmem_stateobj *gmem = batch->gmem_state;381struct pipe_framebuffer_state *pfb = &batch->framebuffer;382struct fd3_emit emit = {.debug = &ctx->debug,383.vtx = &ctx->solid_vbuf_state,384.key = {385.vs = ctx->solid_prog.vs,386.fs = ctx->solid_prog.fs,387}};388int i;389390emit.prog = fd3_program_state(391ir3_cache_lookup(ctx->shader_cache, &emit.key, &ctx->debug));392393OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);394OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));395396OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);397OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |398A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |399A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |400A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |401A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |402A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |403A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |404A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));405406OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);407OUT_RING(ring, 0xff000000 | A3XX_RB_STENCILREFMASK_STENCILREF(0) |408A3XX_RB_STENCILREFMASK_STENCILMASK(0) |409A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));410OUT_RING(ring, 0xff000000 | A3XX_RB_STENCILREFMASK_STENCILREF(0) |411A3XX_RB_STENCILREFMASK_STENCILMASK(0) |412A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));413414OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);415OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));416417OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);418OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */419420fd_wfi(batch, ring);421OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);422OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width / 2.0 - 0.5));423OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width / 2.0));424OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height / 2.0 - 0.5));425OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height / 2.0));426OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));427OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));428429OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);430OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |431A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |432A3XX_RB_MODE_CONTROL_MRT(0));433434OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);435OUT_RING(ring,436A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |437A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |438A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |439A3XX_RB_RENDER_CONTROL_BIN_WIDTH(batch->gmem_state->bin_w));440441OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);442OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |443A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |444A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));445446OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);447OUT_RING(ring,448A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |449A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |450A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |451A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);452453OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);454OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |455A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));456OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |457A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));458459OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);460OUT_RING(ring, 0); /* VFD_INDEX_MIN */461OUT_RING(ring, 2); /* VFD_INDEX_MAX */462OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */463OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */464465fd3_program_emit(ring, &emit, 0, NULL);466fd3_emit_vertex_bufs(ring, &emit);467468if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {469struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);470if (!rsc->stencil || batch->resolve & FD_BUFFER_DEPTH)471emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, false,472gmem->zsbuf_base[0], pfb->zsbuf);473if (rsc->stencil && batch->resolve & FD_BUFFER_STENCIL)474emit_gmem2mem_surf(batch, RB_COPY_DEPTH_STENCIL, true,475gmem->zsbuf_base[1], pfb->zsbuf);476}477478if (batch->resolve & FD_BUFFER_COLOR) {479for (i = 0; i < pfb->nr_cbufs; i++) {480if (!pfb->cbufs[i])481continue;482if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))483continue;484emit_gmem2mem_surf(batch, RB_COPY_RESOLVE, false, gmem->cbuf_base[i],485pfb->cbufs[i]);486}487}488489OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);490OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |491A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |492A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));493494OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);495OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |496A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |497A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));498}499500/* transfer from system memory to gmem */501502static void503emit_mem2gmem_surf(struct fd_batch *batch, const uint32_t bases[],504struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)505{506struct fd_ringbuffer *ring = batch->gmem;507struct pipe_surface *zsbufs[2];508509assert(bufs > 0);510511OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);512OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |513A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |514A3XX_RB_MODE_CONTROL_MRT(bufs - 1));515516emit_mrt(ring, bufs, psurf, bases, bin_w, false);517518if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||519psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {520/* Depth is stored as unorm in gmem, so we have to write it in using a521* special blit shader which writes depth.522*/523OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);524OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |525A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |526A3XX_RB_DEPTH_CONTROL_Z_ENABLE |527A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |528A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));529530OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);531OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |532A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));533OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * batch->gmem_state->bin_w));534535if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {536OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);537OUT_RING(ring, 0);538} else {539/* The gmem_restore_tex logic will put the first buffer's stencil540* as color. Supply it with the proper information to make that541* happen.542*/543zsbufs[0] = zsbufs[1] = psurf[0];544psurf = zsbufs;545bufs = 2;546}547} else {548OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);549OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));550}551552fd3_emit_gmem_restore_tex(ring, psurf, bufs);553554fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,555DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);556}557558static void559fd3_emit_tile_mem2gmem(struct fd_batch *batch,560const struct fd_tile *tile) assert_dt561{562struct fd_context *ctx = batch->ctx;563const struct fd_gmem_stateobj *gmem = batch->gmem_state;564struct fd_ringbuffer *ring = batch->gmem;565struct pipe_framebuffer_state *pfb = &batch->framebuffer;566struct fd3_emit emit = {567.debug = &ctx->debug,568.vtx = &ctx->blit_vbuf_state,569.sprite_coord_enable = 1,570};571/* NOTE: They all use the same VP, this is for vtx bufs. */572fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);573574float x0, y0, x1, y1;575unsigned bin_w = tile->bin_w;576unsigned bin_h = tile->bin_h;577unsigned i;578579/* write texture coordinates to vertexbuf: */580x0 = ((float)tile->xoff) / ((float)pfb->width);581x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);582y0 = ((float)tile->yoff) / ((float)pfb->height);583y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);584585OUT_PKT3(ring, CP_MEM_WRITE, 5);586OUT_RELOC(ring, fd_resource(ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);587OUT_RING(ring, fui(x0));588OUT_RING(ring, fui(y0));589OUT_RING(ring, fui(x1));590OUT_RING(ring, fui(y1));591592fd3_emit_cache_flush(batch, ring);593594for (i = 0; i < 4; i++) {595OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);596OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |597A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |598A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));599600OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);601OUT_RING(602ring,603A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |604A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |605A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |606A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |607A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |608A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));609}610611OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);612OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |613A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));614615fd_wfi(batch, ring);616OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);617OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));618619OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);620OUT_RING(ring, 0);621OUT_RING(ring, 0);622623OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);624OUT_RING(ring,625A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */626627fd_wfi(batch, ring);628OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);629OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w / 2.0 - 0.5));630OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w / 2.0));631OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h / 2.0 - 0.5));632OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h / 2.0));633OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));634OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));635636OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);637OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |638A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));639OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |640A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));641642OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);643OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |644A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));645OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |646A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));647648OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);649OUT_RING(ring, 0x2 | A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |650A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |651A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |652A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |653A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |654A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |655A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |656A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));657658OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);659OUT_RING(ring, 0); /* RB_STENCIL_INFO */660OUT_RING(ring, 0); /* RB_STENCIL_PITCH */661662OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);663OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |664A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |665A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));666667OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);668OUT_RING(ring,669A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |670A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |671A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |672A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);673674OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);675OUT_RING(ring, 0); /* VFD_INDEX_MIN */676OUT_RING(ring, 2); /* VFD_INDEX_MAX */677OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */678OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */679680fd3_emit_vertex_bufs(ring, &emit);681682/* for gmem pitch/base calculations, we need to use the non-683* truncated tile sizes:684*/685bin_w = gmem->bin_w;686bin_h = gmem->bin_h;687688if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {689fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[pfb->nr_cbufs - 1]);690fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);691emit_mem2gmem_surf(batch, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs,692bin_w);693}694695if (fd_gmem_needs_restore(batch, tile,696FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {697if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&698pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {699/* Non-float can use a regular color write. It's split over 8-bit700* components, so half precision is always sufficient.701*/702fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_prog[0]);703} else {704/* Float depth needs special blit shader that writes depth */705if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)706fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_z);707else708fd3_gmem_emit_set_prog(ctx, &emit, &ctx->blit_zs);709}710fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);711emit_mem2gmem_surf(batch, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);712}713714OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);715OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |716A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |717A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));718719OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);720OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |721A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |722A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));723}724725static void726patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)727{728unsigned i;729for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {730struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);731*patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);732}733util_dynarray_clear(&batch->draw_patches);734}735736static void737patch_rbrc(struct fd_batch *batch, uint32_t val)738{739unsigned i;740for (i = 0; i < fd_patch_num_elements(&batch->rbrc_patches); i++) {741struct fd_cs_patch *patch = fd_patch_element(&batch->rbrc_patches, i);742*patch->cs = patch->val | val;743}744util_dynarray_clear(&batch->rbrc_patches);745}746747/* for rendering directly to system memory: */748static void749fd3_emit_sysmem_prep(struct fd_batch *batch) assert_dt750{751struct pipe_framebuffer_state *pfb = &batch->framebuffer;752struct fd_ringbuffer *ring = batch->gmem;753uint32_t i, pitch = 0;754755for (i = 0; i < pfb->nr_cbufs; i++) {756struct pipe_surface *psurf = pfb->cbufs[i];757if (!psurf)758continue;759struct fd_resource *rsc = fd_resource(psurf->texture);760pitch = fd_resource_pitch(rsc, psurf->u.tex.level) / rsc->layout.cpp;761}762763fd3_emit_restore(batch, ring);764765OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);766OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |767A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));768769emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);770771/* setup scissor/offset for current tile: */772OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);773OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0));774775OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);776OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |777A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));778OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |779A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));780781OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);782OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |783A3XX_RB_MODE_CONTROL_GMEM_BYPASS |784A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |785A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));786787patch_draws(batch, IGNORE_VISIBILITY);788patch_rbrc(batch, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));789}790791static void792update_vsc_pipe(struct fd_batch *batch) assert_dt793{794struct fd_context *ctx = batch->ctx;795const struct fd_gmem_stateobj *gmem = batch->gmem_state;796struct fd3_context *fd3_ctx = fd3_context(ctx);797struct fd_ringbuffer *ring = batch->gmem;798int i;799800OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);801OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */802803for (i = 0; i < 8; i++) {804const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];805806if (!ctx->vsc_pipe_bo[i]) {807ctx->vsc_pipe_bo[i] = fd_bo_new(808ctx->dev, 0x40000, 0, "vsc_pipe[%u]", i);809}810811OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);812OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |813A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |814A3XX_VSC_PIPE_CONFIG_W(pipe->w) |815A3XX_VSC_PIPE_CONFIG_H(pipe->h));816OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,8170); /* VSC_PIPE[i].DATA_ADDRESS */818OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -81932); /* VSC_PIPE[i].DATA_LENGTH */820}821}822823static void824emit_binning_pass(struct fd_batch *batch) assert_dt825{826struct fd_context *ctx = batch->ctx;827const struct fd_gmem_stateobj *gmem = batch->gmem_state;828struct pipe_framebuffer_state *pfb = &batch->framebuffer;829struct fd_ringbuffer *ring = batch->gmem;830int i;831832uint32_t x1 = gmem->minx;833uint32_t y1 = gmem->miny;834uint32_t x2 = gmem->minx + gmem->width - 1;835uint32_t y2 = gmem->miny + gmem->height - 1;836837if (ctx->screen->gpu_id == 320) {838emit_binning_workaround(batch);839fd_wfi(batch, ring);840OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);841OUT_RING(ring, 0x00007fff);842}843844OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);845OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);846847OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);848OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |849A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |850A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));851852OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);853OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |854A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));855856OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);857OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |858A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |859A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));860861/* setup scissor/offset for whole screen: */862OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);863OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) | A3XX_RB_WINDOW_OFFSET_Y(y1));864865OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);866OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);867868OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);869OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |870A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));871OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |872A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));873874OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);875OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |876A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |877A3XX_RB_MODE_CONTROL_MRT(0));878879for (i = 0; i < 4; i++) {880OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);881OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |882A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |883A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));884}885886OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);887OUT_RING(ring,888A3XX_PC_VSTREAM_CONTROL_SIZE(1) | A3XX_PC_VSTREAM_CONTROL_N(0));889890/* emit IB to binning drawcmds: */891fd3_emit_ib(ring, batch->binning);892fd_reset_wfi(batch);893894fd_wfi(batch, ring);895896/* and then put stuff back the way it was: */897898OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);899OUT_RING(ring, 0x00000000);900901OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);902OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |903A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |904A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |905A3XX_SP_SP_CTRL_REG_L0MODE(0));906907OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);908OUT_RING(ring, 0x00000000);909910OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);911OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |912A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |913A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));914915OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);916OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |917A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |918A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));919OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |920A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |921A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));922923fd_event_write(batch, ring, CACHE_FLUSH);924fd_wfi(batch, ring);925926if (ctx->screen->gpu_id == 320) {927/* dummy-draw workaround: */928OUT_PKT3(ring, CP_DRAW_INDX, 3);929OUT_RING(ring, 0x00000000);930OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN,931IGNORE_VISIBILITY, 0));932OUT_RING(ring, 0); /* NumIndices */933fd_reset_wfi(batch);934}935936OUT_PKT3(ring, CP_NOP, 4);937OUT_RING(ring, 0x00000000);938OUT_RING(ring, 0x00000000);939OUT_RING(ring, 0x00000000);940OUT_RING(ring, 0x00000000);941942fd_wfi(batch, ring);943944if (ctx->screen->gpu_id == 320) {945emit_binning_workaround(batch);946}947}948949/* before first tile */950static void951fd3_emit_tile_init(struct fd_batch *batch) assert_dt952{953struct fd_ringbuffer *ring = batch->gmem;954struct pipe_framebuffer_state *pfb = &batch->framebuffer;955const struct fd_gmem_stateobj *gmem = batch->gmem_state;956uint32_t rb_render_control;957958fd3_emit_restore(batch, ring);959960/* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated961* at the right and bottom edge tiles962*/963OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);964OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |965A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));966967update_vsc_pipe(batch);968969fd_wfi(batch, ring);970OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);971OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |972A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));973974if (use_hw_binning(batch)) {975/* emit hw binning pass: */976emit_binning_pass(batch);977978patch_draws(batch, USE_VISIBILITY);979} else {980patch_draws(batch, IGNORE_VISIBILITY);981}982983rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |984A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);985986patch_rbrc(batch, rb_render_control);987}988989/* before mem2gmem */990static void991fd3_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)992{993struct fd_ringbuffer *ring = batch->gmem;994struct pipe_framebuffer_state *pfb = &batch->framebuffer;995996OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);997OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |998A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |999A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));1000}10011002/* before IB to rendering cmds: */1003static void1004fd3_emit_tile_renderprep(struct fd_batch *batch,1005const struct fd_tile *tile) assert_dt1006{1007struct fd_context *ctx = batch->ctx;1008struct fd3_context *fd3_ctx = fd3_context(ctx);1009struct fd_ringbuffer *ring = batch->gmem;1010const struct fd_gmem_stateobj *gmem = batch->gmem_state;1011struct pipe_framebuffer_state *pfb = &batch->framebuffer;10121013uint32_t x1 = tile->xoff;1014uint32_t y1 = tile->yoff;1015uint32_t x2 = tile->xoff + tile->bin_w - 1;1016uint32_t y2 = tile->yoff + tile->bin_h - 1;10171018uint32_t reg;10191020OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);1021reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);1022if (pfb->zsbuf) {1023reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));1024}1025OUT_RING(ring, reg);1026if (pfb->zsbuf) {1027struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);1028OUT_RING(ring,1029A3XX_RB_DEPTH_PITCH(gmem->bin_w << fdl_cpp_shift(&rsc->layout)));1030if (rsc->stencil) {1031OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);1032OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));1033OUT_RING(ring, A3XX_RB_STENCIL_PITCH(gmem->bin_w << fdl_cpp_shift(1034&rsc->stencil->layout)));1035}1036} else {1037OUT_RING(ring, 0x00000000);1038}10391040if (use_hw_binning(batch)) {1041const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];1042struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];10431044assert(pipe->w && pipe->h);10451046fd_event_write(batch, ring, HLSQ_FLUSH);1047fd_wfi(batch, ring);10481049OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);1050OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |1051A3XX_PC_VSTREAM_CONTROL_N(tile->n));10521053OUT_PKT3(ring, CP_SET_BIN_DATA, 2);1054OUT_RELOC(ring, pipe_bo, 0, 0,10550); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */1056OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <-1057VSC_SIZE_ADDRESS + (p * 4) */1058(tile->p * 4), 0, 0);1059} else {1060OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);1061OUT_RING(ring, 0x00000000);1062}10631064OUT_PKT3(ring, CP_SET_BIN, 3);1065OUT_RING(ring, 0x00000000);1066OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));1067OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));10681069emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w,1070true);10711072/* setup scissor/offset for current tile: */1073OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);1074OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |1075A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));10761077OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);1078OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |1079A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));1080OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |1081A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));1082}10831084void1085fd3_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis1086{1087struct fd_context *ctx = fd_context(pctx);10881089ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;1090ctx->emit_tile_init = fd3_emit_tile_init;1091ctx->emit_tile_prep = fd3_emit_tile_prep;1092ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;1093ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;1094ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;1095}109610971098