Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c
4574 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "pipe/p_state.h"27#include "util/format/u_format.h"28#include "util/u_inlines.h"29#include "util/u_memory.h"30#include "util/u_string.h"3132#include "freedreno_draw.h"33#include "freedreno_resource.h"34#include "freedreno_state.h"3536#include "fd5_context.h"37#include "fd5_draw.h"38#include "fd5_emit.h"39#include "fd5_format.h"40#include "fd5_gmem.h"41#include "fd5_program.h"42#include "fd5_zsa.h"4344static void45emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,46struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)47{48enum a5xx_tile_mode tile_mode;49unsigned i;5051for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {52enum a5xx_color_fmt format = 0;53enum a3xx_color_swap swap = WZYX;54bool srgb = false, sint = false, uint = false;55struct fd_resource *rsc = NULL;56struct fdl_slice *slice = NULL;57uint32_t stride = 0;58uint32_t size = 0;59uint32_t base = 0;60uint32_t offset = 0;6162if (gmem) {63tile_mode = TILE5_2;64} else {65tile_mode = TILE5_LINEAR;66}6768if ((i < nr_bufs) && bufs[i]) {69struct pipe_surface *psurf = bufs[i];70enum pipe_format pformat = psurf->format;7172rsc = fd_resource(psurf->texture);7374slice = fd_resource_slice(rsc, psurf->u.tex.level);75format = fd5_pipe2color(pformat);76swap = fd5_pipe2swap(pformat);77srgb = util_format_is_srgb(pformat);78sint = util_format_is_pure_sint(pformat);79uint = util_format_is_pure_uint(pformat);8081debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);8283offset = fd_resource_offset(rsc, psurf->u.tex.level,84psurf->u.tex.first_layer);8586if (gmem) {87stride = gmem->bin_w * gmem->cbuf_cpp[i];88size = stride * gmem->bin_h;89base = gmem->cbuf_base[i];90} else {91stride = fd_resource_pitch(rsc, psurf->u.tex.level);92size = slice->size0;9394tile_mode =95fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);96}97}9899OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);100OUT_RING(101ring,102A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |103A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |104A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |105COND(gmem,1060x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */107COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));108OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));109OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));110if (gmem || (i >= nr_bufs) || !bufs[i]) {111OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */112OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */113} else {114debug_assert((offset + size) <= fd_bo_size(rsc->bo));115OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */116}117118OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);119OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |120COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |121COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |122COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));123124/* when we support UBWC, these would be the system memory125* addr/pitch/etc:126*/127OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);128OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */129OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */130OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));131OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));132}133}134135static void136emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,137const struct fd_gmem_stateobj *gmem)138{139if (zsbuf) {140struct fd_resource *rsc = fd_resource(zsbuf->texture);141enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);142uint32_t cpp = rsc->layout.cpp;143uint32_t stride = 0;144uint32_t size = 0;145146if (gmem) {147stride = cpp * gmem->bin_w;148size = stride * gmem->bin_h;149} else {150stride = fd_resource_pitch(rsc, 0);151size = fd_resource_slice(rsc, 0)->size0;152}153154OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);155OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));156if (gmem) {157OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */158OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */159} else {160OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */161}162OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));163OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));164165OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);166OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));167168OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);169OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */170OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */171OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */172173if (rsc->lrz) {174OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);175OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);176OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));177178OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);179OUT_RELOC(ring, rsc->lrz, 0, 0, 0);180} else {181OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);182OUT_RING(ring, 0x00000000);183OUT_RING(ring, 0x00000000);184OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */185186OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);187OUT_RING(ring, 0x00000000);188OUT_RING(ring, 0x00000000);189}190191if (rsc->stencil) {192if (gmem) {193stride = 1 * gmem->bin_w;194size = stride * gmem->bin_h;195} else {196stride = fd_resource_pitch(rsc->stencil, 0);197size = fd_resource_slice(rsc->stencil, 0)->size0;198}199200OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);201OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);202if (gmem) {203OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */204OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */205} else {206OUT_RELOC(ring, rsc->stencil->bo, 0, 0,2070); /* RB_STENCIL_BASE_LO/HI */208}209OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));210OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));211} else {212OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);213OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */214}215} else {216OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);217OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));218OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */219OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */220OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */221OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */222223OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);224OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));225226OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);227OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */228OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */229OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */230231OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);232OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */233}234}235236static bool237use_hw_binning(struct fd_batch *batch)238{239const struct fd_gmem_stateobj *gmem = batch->gmem_state;240241if ((gmem->maxpw * gmem->maxph) > 32)242return false;243244if ((gmem->maxpw > 15) || (gmem->maxph > 15))245return false;246247return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&248(batch->num_draws > 0);249}250251static void252patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)253{254unsigned i;255for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {256struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);257*patch->cs = patch->val | DRAW4(0, 0, 0, vismode);258}259util_dynarray_clear(&batch->draw_patches);260}261262static void263update_vsc_pipe(struct fd_batch *batch) assert_dt264{265struct fd_context *ctx = batch->ctx;266struct fd5_context *fd5_ctx = fd5_context(ctx);267const struct fd_gmem_stateobj *gmem = batch->gmem_state;268struct fd_ringbuffer *ring = batch->gmem;269int i;270271OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);272OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |273A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));274OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */275276OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);277OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */278OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */279280OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);281for (i = 0; i < 16; i++) {282const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];283OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |284A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |285A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |286A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));287}288289OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);290for (i = 0; i < 16; i++) {291if (!ctx->vsc_pipe_bo[i]) {292ctx->vsc_pipe_bo[i] = fd_bo_new(293ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);294}295OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,2960); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */297}298299OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);300for (i = 0; i < 16; i++) {301OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -30232); /* VSC_PIPE_DATA_LENGTH[i] */303}304}305306static void307emit_binning_pass(struct fd_batch *batch) assert_dt308{309struct fd_ringbuffer *ring = batch->gmem;310const struct fd_gmem_stateobj *gmem = batch->gmem_state;311312uint32_t x1 = gmem->minx;313uint32_t y1 = gmem->miny;314uint32_t x2 = gmem->minx + gmem->width - 1;315uint32_t y2 = gmem->miny + gmem->height - 1;316317fd5_set_render_mode(batch->ctx, ring, BINNING);318319OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);320OUT_RING(ring,321A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));322323OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);324OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |325A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));326OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |327A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));328329OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);330OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));331OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));332333update_vsc_pipe(batch);334335OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);336OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);337338fd5_event_write(batch, ring, UNK_2C, false);339340OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);341OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));342343/* emit IB to binning drawcmds: */344fd5_emit_ib(ring, batch->binning);345346fd_reset_wfi(batch);347348fd5_event_write(batch, ring, UNK_2D, false);349350fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);351352// TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)353354fd_wfi(batch, ring);355356OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);357OUT_RING(ring, 0x0);358}359360/* before first tile */361static void362fd5_emit_tile_init(struct fd_batch *batch) assert_dt363{364struct fd_ringbuffer *ring = batch->gmem;365struct pipe_framebuffer_state *pfb = &batch->framebuffer;366367fd5_emit_restore(batch, ring);368369if (batch->prologue)370fd5_emit_ib(ring, batch->prologue);371372fd5_emit_lrz_flush(batch, ring);373374OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);375OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */376377OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);378OUT_RING(ring, 0x0);379380OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);381OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */382383OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);384OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */385386/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */387fd_wfi(batch, ring);388OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);389OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */390391emit_zs(ring, pfb->zsbuf, batch->gmem_state);392emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);393394/* Enable stream output for the first pass (likely the binning). */395OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);396OUT_RING(ring, 0);397398if (use_hw_binning(batch)) {399emit_binning_pass(batch);400401/* Disable stream output after binning, since each VS output should get402* streamed out once.403*/404OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);405OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);406407fd5_emit_lrz_flush(batch, ring);408patch_draws(batch, USE_VISIBILITY);409} else {410patch_draws(batch, IGNORE_VISIBILITY);411}412413fd5_set_render_mode(batch->ctx, ring, GMEM);414415/* XXX If we're in gmem mode but not doing HW binning, then after the first416* tile we should disable stream output (fd6_gmem.c doesn't do that either).417*/418}419420/* before mem2gmem */421static void422fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt423{424struct fd_context *ctx = batch->ctx;425const struct fd_gmem_stateobj *gmem = batch->gmem_state;426struct fd5_context *fd5_ctx = fd5_context(ctx);427struct fd_ringbuffer *ring = batch->gmem;428429uint32_t x1 = tile->xoff;430uint32_t y1 = tile->yoff;431uint32_t x2 = tile->xoff + tile->bin_w - 1;432uint32_t y2 = tile->yoff + tile->bin_h - 1;433434OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);435OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |436A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));437OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |438A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));439440OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);441OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));442OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));443444if (use_hw_binning(batch)) {445const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];446struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];447448OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);449450OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);451OUT_RING(ring, 0x0);452453OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);454OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |455CP_SET_BIN_DATA5_0_VSC_N(tile->n));456OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */457OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */458(tile->p * 4), 0, 0);459} else {460OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);461OUT_RING(ring, 0x1);462}463464OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);465OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));466}467468/*469* transfer from system memory to gmem470*/471472static void473emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,474struct pipe_surface *psurf, enum a5xx_blit_buf buf)475{476struct fd_ringbuffer *ring = batch->gmem;477const struct fd_gmem_stateobj *gmem = batch->gmem_state;478struct fd_resource *rsc = fd_resource(psurf->texture);479uint32_t stride, size;480481debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);482483if (buf == BLIT_S)484rsc = rsc->stencil;485486if ((buf == BLIT_ZS) || (buf == BLIT_S)) {487// XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't488// know otherwise how to go from linear in sysmem to tiled in gmem.489// possibly we want to flip this around gmem2mem and keep depth490// tiled in sysmem (and fixup sampler state to assume tiled).. this491// might be required for doing depth/stencil in bypass mode?492struct fdl_slice *slice = fd_resource_slice(rsc, 0);493enum a5xx_color_fmt format =494fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));495496OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);497OUT_RING(ring,498A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |499A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |500A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));501OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));502OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));503OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */504505buf = BLIT_MRT0;506}507508stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);509size = stride * gmem->bin_h;510511OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);512OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */513OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */514OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */515OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */516517OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);518OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */519OUT_RING(ring, base); /* RB_BLIT_DST_LO */520OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */521OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));522OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));523524OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);525OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));526527fd5_emit_blit(batch, ring);528}529530static void531fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)532{533struct fd_ringbuffer *ring = batch->gmem;534const struct fd_gmem_stateobj *gmem = batch->gmem_state;535struct pipe_framebuffer_state *pfb = &batch->framebuffer;536537/*538* setup mrt and zs with system memory base addresses:539*/540541emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);542// emit_zs(ring, pfb->zsbuf, NULL);543544OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);545OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |546A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);547548if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {549unsigned i;550for (i = 0; i < pfb->nr_cbufs; i++) {551if (!pfb->cbufs[i])552continue;553if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))554continue;555emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],556BLIT_MRT0 + i);557}558}559560if (fd_gmem_needs_restore(batch, tile,561FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {562struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);563564if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))565emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);566if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))567emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);568}569}570571/* before IB to rendering cmds: */572static void573fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)574{575struct fd_ringbuffer *ring = batch->gmem;576const struct fd_gmem_stateobj *gmem = batch->gmem_state;577struct pipe_framebuffer_state *pfb = &batch->framebuffer;578579OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);580OUT_RING(ring,581A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));582583emit_zs(ring, pfb->zsbuf, gmem);584emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);585586enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);587588OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);589OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));590OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |591COND(samples == MSAA_ONE,592A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));593594OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);595OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));596OUT_RING(ring,597A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |598COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));599600OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);601OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));602OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |603COND(samples == MSAA_ONE,604A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));605}606607/*608* transfer from gmem to system memory (ie. normal RAM)609*/610611static void612emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,613struct pipe_surface *psurf, enum a5xx_blit_buf buf)614{615struct fd_ringbuffer *ring = batch->gmem;616struct fd_resource *rsc = fd_resource(psurf->texture);617struct fdl_slice *slice;618bool tiled;619uint32_t offset, pitch;620621if (!rsc->valid)622return;623624if (buf == BLIT_S)625rsc = rsc->stencil;626627slice = fd_resource_slice(rsc, psurf->u.tex.level);628offset =629fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);630pitch = fd_resource_pitch(rsc, psurf->u.tex.level);631632debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);633634OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);635OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */636OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */637OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */638OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */639640tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);641642OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);643OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */644COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));645OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */646OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));647OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));648649OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);650OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));651652// bool msaa_resolve = pfb->samples > 1;653bool msaa_resolve = false;654OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);655OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));656657fd5_emit_blit(batch, ring);658}659660static void661fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)662{663const struct fd_gmem_stateobj *gmem = batch->gmem_state;664struct pipe_framebuffer_state *pfb = &batch->framebuffer;665666if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {667struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);668669if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))670emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);671if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))672emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);673}674675if (batch->resolve & FD_BUFFER_COLOR) {676unsigned i;677for (i = 0; i < pfb->nr_cbufs; i++) {678if (!pfb->cbufs[i])679continue;680if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))681continue;682emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],683BLIT_MRT0 + i);684}685}686}687688static void689fd5_emit_tile_fini(struct fd_batch *batch) assert_dt690{691struct fd_ringbuffer *ring = batch->gmem;692693OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);694OUT_RING(ring, 0x0);695696fd5_emit_lrz_flush(batch, ring);697698fd5_cache_flush(batch, ring);699fd5_set_render_mode(batch->ctx, ring, BYPASS);700}701702static void703fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt704{705struct fd_ringbuffer *ring = batch->gmem;706707fd5_emit_restore(batch, ring);708709fd5_emit_lrz_flush(batch, ring);710711if (batch->prologue)712fd5_emit_ib(ring, batch->prologue);713714OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);715OUT_RING(ring, 0x0);716717fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);718719OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);720OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */721722OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);723OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */724725/* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */726fd_wfi(batch, ring);727OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);728OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */729730OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);731OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |732A5XX_RB_CNTL_BYPASS);733734/* remaining setup below here does not apply to blit/compute: */735if (batch->nondraw)736return;737738struct pipe_framebuffer_state *pfb = &batch->framebuffer;739740OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);741OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |742A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));743OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |744A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));745746OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);747OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));748OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |749A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));750751OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);752OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));753754/* Enable stream output, since there's no binning pass to put it in. */755OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);756OUT_RING(ring, 0);757758OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);759OUT_RING(ring, 0x1);760761patch_draws(batch, IGNORE_VISIBILITY);762763emit_zs(ring, pfb->zsbuf, NULL);764emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);765766OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);767OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));768OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |769A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);770771OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);772OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));773OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |774A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);775776OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);777OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));778OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |779A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);780}781782static void783fd5_emit_sysmem_fini(struct fd_batch *batch)784{785struct fd_ringbuffer *ring = batch->gmem;786787OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);788OUT_RING(ring, 0x0);789790fd5_emit_lrz_flush(batch, ring);791792fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);793fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);794}795796void797fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis798{799struct fd_context *ctx = fd_context(pctx);800801ctx->emit_tile_init = fd5_emit_tile_init;802ctx->emit_tile_prep = fd5_emit_tile_prep;803ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;804ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;805ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;806ctx->emit_tile_fini = fd5_emit_tile_fini;807ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;808ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;809}810811812