Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
4574 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2* Copyright © 2018 Google, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23* Authors:24* Rob Clark <[email protected]>25*/2627#include <stdio.h>2829#include "pipe/p_state.h"30#include "util/format/u_format.h"31#include "util/u_inlines.h"32#include "util/u_memory.h"33#include "util/u_string.h"3435#include "freedreno_draw.h"36#include "freedreno_resource.h"37#include "freedreno_state.h"38#include "freedreno_tracepoints.h"3940#include "fd6_blitter.h"41#include "fd6_context.h"42#include "fd6_draw.h"43#include "fd6_emit.h"44#include "fd6_format.h"45#include "fd6_gmem.h"46#include "fd6_pack.h"47#include "fd6_program.h"48#include "fd6_resource.h"49#include "fd6_zsa.h"5051/**52* Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,53* RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.54*/55void56fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,57int level, int layer)58{59if (fd_resource_ubwc_enabled(rsc, level)) {60OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,610);62OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(63fdl_ubwc_pitch(&rsc->layout, level)) |64A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(65rsc->layout.ubwc_layer_size >> 2));66} else {67OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */68OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */69OUT_RING(ring, 0x00000000);70}71}7273static void74emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,75const struct fd_gmem_stateobj *gmem)76{77unsigned srgb_cntl = 0;78unsigned i;7980unsigned max_layer_index = 0;8182for (i = 0; i < pfb->nr_cbufs; i++) {83enum a6xx_format format = 0;84enum a3xx_color_swap swap = WZYX;85bool sint = false, uint = false;86struct fd_resource *rsc = NULL;87struct fdl_slice *slice = NULL;88uint32_t stride = 0;89uint32_t array_stride = 0;90uint32_t offset;91uint32_t tile_mode;9293if (!pfb->cbufs[i])94continue;9596struct pipe_surface *psurf = pfb->cbufs[i];97enum pipe_format pformat = psurf->format;98rsc = fd_resource(psurf->texture);99if (!rsc->bo)100continue;101102uint32_t base = gmem ? gmem->cbuf_base[i] : 0;103slice = fd_resource_slice(rsc, psurf->u.tex.level);104format = fd6_pipe2color(pformat);105sint = util_format_is_pure_sint(pformat);106uint = util_format_is_pure_uint(pformat);107108if (util_format_is_srgb(pformat))109srgb_cntl |= (1 << i);110111offset =112fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);113114stride = fd_resource_pitch(rsc, psurf->u.tex.level);115array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);116swap = fd6_resource_swap(rsc, pformat);117118tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);119max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;120121debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));122123OUT_REG(124ring,125A6XX_RB_MRT_BUF_INFO(i, .color_format = format,126.color_tile_mode = tile_mode, .color_swap = swap),127A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),128A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),129A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),130A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));131132OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,133.color_sint = sint, .color_uint = uint));134135OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);136fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,137psurf->u.tex.first_layer);138}139140OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));141OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));142143OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));144}145146static void147emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,148const struct fd_gmem_stateobj *gmem)149{150if (zsbuf) {151struct fd_resource *rsc = fd_resource(zsbuf->texture);152enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);153uint32_t stride = fd_resource_pitch(rsc, 0);154uint32_t array_stride = fd_resource_layer_stride(rsc, 0);155uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;156uint32_t offset =157fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);158159OUT_REG(160ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),161A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),162A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =163array_stride),164A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),165A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));166167OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));168169OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);170fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,171zsbuf->u.tex.first_layer);172173if (rsc->lrz) {174OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),175A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),176// XXX a6xx seems to use a different buffer here.. not sure177// what for..178A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());179} else {180OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);181OUT_RING(ring, 0x00000000);182OUT_RING(ring, 0x00000000);183OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */184OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */185OUT_RING(ring, 0x00000000);186}187188/* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE189* plus this CP_EVENT_WRITE at the end in it's own IB..190*/191OUT_PKT7(ring, CP_EVENT_WRITE, 1);192OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));193194if (rsc->stencil) {195stride = fd_resource_pitch(rsc->stencil, 0);196array_stride = fd_resource_layer_stride(rsc->stencil, 0);197uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;198199OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),200A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =201stride),202A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(203.a6xx_rb_stencil_buffer_array_pitch = array_stride),204A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo),205A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));206} else {207OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));208}209} else {210OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);211OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));212OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */213OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */214OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */215OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */216OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */217218OUT_REG(ring,219A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));220221OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);222OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */223OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */224OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */225OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */226OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */227228OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));229}230}231232static bool233use_hw_binning(struct fd_batch *batch)234{235const struct fd_gmem_stateobj *gmem = batch->gmem_state;236237if ((gmem->maxpw * gmem->maxph) > 32)238return false;239240return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&241(batch->num_draws > 0);242}243244static void245patch_fb_read_gmem(struct fd_batch *batch)246{247unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);248if (!num_patches)249return;250251struct fd_screen *screen = batch->ctx->screen;252const struct fd_gmem_stateobj *gmem = batch->gmem_state;253struct pipe_framebuffer_state *pfb = &batch->framebuffer;254struct pipe_surface *psurf = pfb->cbufs[0];255uint32_t texconst0 = fd6_tex_const_0(256psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,257PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);258259/* always TILE6_2 mode in GMEM.. which also means no swap: */260texconst0 &=261~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);262texconst0 |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);263264for (unsigned i = 0; i < num_patches; i++) {265struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);266patch->cs[0] = texconst0;267patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |268A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);269patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base);270patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |271A6XX_TEX_CONST_5_DEPTH(1);272}273util_dynarray_clear(&batch->fb_read_patches);274}275276static void277patch_fb_read_sysmem(struct fd_batch *batch)278{279unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);280if (!num_patches)281return;282283struct pipe_framebuffer_state *pfb = &batch->framebuffer;284struct pipe_surface *psurf = pfb->cbufs[0];285if (!psurf)286return;287288struct fd_resource *rsc = fd_resource(psurf->texture);289unsigned lvl = psurf->u.tex.level;290unsigned layer = psurf->u.tex.first_layer;291bool ubwc_enabled = fd_resource_ubwc_enabled(rsc, lvl);292uint64_t iova = fd_bo_get_iova(rsc->bo) + fd_resource_offset(rsc, lvl, layer);293uint64_t ubwc_iova = fd_bo_get_iova(rsc->bo) + fd_resource_ubwc_offset(rsc, lvl, layer);294uint32_t texconst0 = fd6_tex_const_0(295psurf->texture, psurf->u.tex.level, psurf->format, PIPE_SWIZZLE_X,296PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);297uint32_t block_width, block_height;298fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);299300for (unsigned i = 0; i < num_patches; i++) {301struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);302patch->cs[0] = texconst0;303patch->cs[2] = A6XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) |304A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);305/* This is cheating a bit, since we can't use OUT_RELOC() here.. but306* the render target will already have a reloc emitted for RB_MRT state,307* so we can get away with manually patching in the address here:308*/309patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(iova);310patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(iova >> 32) |311A6XX_TEX_CONST_5_DEPTH(1);312313if (!ubwc_enabled)314continue;315316patch->cs[3] |= A6XX_TEX_CONST_3_FLAG;317patch->cs[7] = A6XX_TEX_CONST_7_FLAG_LO(ubwc_iova);318patch->cs[8] = A6XX_TEX_CONST_8_FLAG_HI(ubwc_iova >> 32);319patch->cs[9] = A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(320rsc->layout.ubwc_layer_size >> 2);321patch->cs[10] =322A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(323fdl_ubwc_pitch(&rsc->layout, lvl)) |324A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(325DIV_ROUND_UP(u_minify(psurf->texture->width0, lvl), block_width))) |326A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(327DIV_ROUND_UP(u_minify(psurf->texture->height0, lvl), block_height)));328}329util_dynarray_clear(&batch->fb_read_patches);330}331332static void333update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,334bool binning)335{336struct fd_ringbuffer *ring = batch->gmem;337struct fd_screen *screen = batch->ctx->screen;338uint32_t cntl = 0;339bool depth_ubwc_enable = false;340uint32_t mrts_ubwc_enable = 0;341int i;342343if (pfb->zsbuf) {344struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);345depth_ubwc_enable =346fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);347}348349for (i = 0; i < pfb->nr_cbufs; i++) {350if (!pfb->cbufs[i])351continue;352353struct pipe_surface *psurf = pfb->cbufs[i];354struct fd_resource *rsc = fd_resource(psurf->texture);355if (!rsc->bo)356continue;357358if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))359mrts_ubwc_enable |= 1 << i;360}361362cntl |= A6XX_RB_RENDER_CNTL_UNK4;363if (binning)364cntl |= A6XX_RB_RENDER_CNTL_BINNING;365366if (screen->info->a6xx.has_cp_reg_write) {367OUT_PKT7(ring, CP_REG_WRITE, 3);368OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));369OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);370} else {371OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);372}373OUT_RING(ring, cntl |374COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |375A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));376}377378/* extra size to store VSC_DRAW_STRM_SIZE: */379#define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)380#define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)381382static void383update_vsc_pipe(struct fd_batch *batch)384{385struct fd_context *ctx = batch->ctx;386struct fd6_context *fd6_ctx = fd6_context(ctx);387const struct fd_gmem_stateobj *gmem = batch->gmem_state;388struct fd_ringbuffer *ring = batch->gmem;389int i;390391if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {392if (fd6_ctx->vsc_draw_strm)393fd_bo_del(fd6_ctx->vsc_draw_strm);394fd6_ctx->vsc_draw_strm = NULL;395/* Note: probably only need to align to 0x40, but aligning stronger396* reduces the odds that we will have to realloc again on the next397* frame:398*/399fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);400mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",401fd6_ctx->vsc_draw_strm_pitch);402}403404if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {405if (fd6_ctx->vsc_prim_strm)406fd_bo_del(fd6_ctx->vsc_prim_strm);407fd6_ctx->vsc_prim_strm = NULL;408fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);409mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",410fd6_ctx->vsc_prim_strm_pitch);411}412413if (!fd6_ctx->vsc_draw_strm) {414fd6_ctx->vsc_draw_strm = fd_bo_new(415ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),4160, "vsc_draw_strm");417}418419if (!fd6_ctx->vsc_prim_strm) {420fd6_ctx->vsc_prim_strm = fd_bo_new(421ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),4220, "vsc_prim_strm");423}424425OUT_REG(426ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),427A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,428.bo_offset =42932 * fd6_ctx->vsc_draw_strm_pitch));430431OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));432433OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);434for (i = 0; i < 32; i++) {435const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];436OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |437A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |438A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |439A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));440}441442OUT_REG(443ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),444A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),445A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));446447OUT_REG(448ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),449A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),450A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));451}452453/*454* If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3455* (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is456* written to control->vsc_overflow. This allows the CPU to457* detect which buffer overflowed (and, since the current size is458* encoded as well, this protects against already-submitted but459* not executed batches from fooling the CPU into increasing the460* size again unnecessarily).461*/462static void463emit_vsc_overflow_test(struct fd_batch *batch)464{465struct fd_ringbuffer *ring = batch->gmem;466const struct fd_gmem_stateobj *gmem = batch->gmem_state;467struct fd6_context *fd6_ctx = fd6_context(batch->ctx);468469debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);470debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);471472/* Check for overflow, write vsc_scratch if detected: */473for (int i = 0; i < gmem->num_vsc_pipes; i++) {474OUT_PKT7(ring, CP_COND_WRITE5, 8);475OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |476CP_COND_WRITE5_0_WRITE_MEMORY);477OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(478REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));479OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));480OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));481OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));482OUT_RELOC(ring,483control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */484OUT_RING(ring,485CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));486487OUT_PKT7(ring, CP_COND_WRITE5, 8);488OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |489CP_COND_WRITE5_0_WRITE_MEMORY);490OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(491REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));492OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));493OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));494OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));495OUT_RELOC(ring,496control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */497OUT_RING(ring,498CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));499}500501OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);502}503504static void505check_vsc_overflow(struct fd_context *ctx)506{507struct fd6_context *fd6_ctx = fd6_context(ctx);508struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);509uint32_t vsc_overflow = control->vsc_overflow;510511if (!vsc_overflow)512return;513514/* clear overflow flag: */515control->vsc_overflow = 0;516517unsigned buffer = vsc_overflow & 0x3;518unsigned size = vsc_overflow & ~0x3;519520if (buffer == 0x1) {521/* VSC_DRAW_STRM overflow: */522523if (size < fd6_ctx->vsc_draw_strm_pitch) {524/* we've already increased the size, this overflow is525* from a batch submitted before resize, but executed526* after527*/528return;529}530531fd_bo_del(fd6_ctx->vsc_draw_strm);532fd6_ctx->vsc_draw_strm = NULL;533fd6_ctx->vsc_draw_strm_pitch *= 2;534535mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",536fd6_ctx->vsc_draw_strm_pitch);537538} else if (buffer == 0x3) {539/* VSC_PRIM_STRM overflow: */540541if (size < fd6_ctx->vsc_prim_strm_pitch) {542/* we've already increased the size */543return;544}545546fd_bo_del(fd6_ctx->vsc_prim_strm);547fd6_ctx->vsc_prim_strm = NULL;548fd6_ctx->vsc_prim_strm_pitch *= 2;549550mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",551fd6_ctx->vsc_prim_strm_pitch);552553} else {554/* NOTE: it's possible, for example, for overflow to corrupt the555* control page. I mostly just see this hit if I set initial VSC556* buffer size extremely small. Things still seem to recover,557* but maybe we should pre-emptively realloc vsc_data/vsc_data2558* and hope for different memory placement?559*/560mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);561}562}563564static void565emit_common_init(struct fd_batch *batch)566{567struct fd_ringbuffer *ring = batch->gmem;568struct fd_autotune *at = &batch->ctx->autotune;569struct fd_batch_result *result = batch->autotune_result;570571if (!result)572return;573574OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);575OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);576577OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);578OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));579580fd6_event_write(batch, ring, ZPASS_DONE, false);581}582583static void584emit_common_fini(struct fd_batch *batch)585{586struct fd_ringbuffer *ring = batch->gmem;587struct fd_autotune *at = &batch->ctx->autotune;588struct fd_batch_result *result = batch->autotune_result;589590if (!result)591return;592593OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);594OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);595596OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);597OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));598599fd6_event_write(batch, ring, ZPASS_DONE, false);600601// TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice602OUT_PKT7(ring, CP_EVENT_WRITE, 4);603OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));604OUT_RELOC(ring, results_ptr(at, fence));605OUT_RING(ring, result->fence);606}607608/*609* Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB610* is skipped for tiles that have no visible geometry.611*/612static void613emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,614struct fd_ringbuffer *target)615{616struct fd_ringbuffer *ring = batch->gmem;617618if (target->cur == target->start)619return;620621emit_marker6(ring, 6);622623unsigned count = fd_ringbuffer_cmd_count(target);624625BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */626627OUT_PKT7(ring, CP_REG_TEST, 1);628OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |629A6XX_CP_REG_TEST_0_BIT(tile->n) |630A6XX_CP_REG_TEST_0_WAIT_FOR_ME);631632OUT_PKT7(ring, CP_COND_REG_EXEC, 2);633OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));634OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));635636for (unsigned i = 0; i < count; i++) {637uint32_t dwords;638OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);639dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;640assert(dwords > 0);641OUT_RING(ring, dwords);642}643644emit_marker6(ring, 6);645}646647static void648set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,649uint32_t y2)650{651OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),652A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));653654OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),655A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));656}657658static void659set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)660{661OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));662OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));663/* no flag for RB_BIN_CONTROL2... */664OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));665}666667static void668emit_binning_pass(struct fd_batch *batch) assert_dt669{670struct fd_ringbuffer *ring = batch->gmem;671const struct fd_gmem_stateobj *gmem = batch->gmem_state;672struct fd_screen *screen = batch->ctx->screen;673674debug_assert(!batch->tessellation);675676set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);677678emit_marker6(ring, 7);679OUT_PKT7(ring, CP_SET_MARKER, 1);680OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));681emit_marker6(ring, 7);682683OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);684OUT_RING(ring, 0x1);685686OUT_PKT7(ring, CP_SET_MODE, 1);687OUT_RING(ring, 0x1);688689OUT_WFI5(ring);690691OUT_REG(ring, A6XX_VFD_MODE_CNTL(.binning_pass = true));692693update_vsc_pipe(batch);694695OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);696OUT_RING(ring, screen->info->a6xx.magic.PC_UNKNOWN_9805);697698OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);699OUT_RING(ring, screen->info->a6xx.magic.SP_UNKNOWN_A0F8);700701OUT_PKT7(ring, CP_EVENT_WRITE, 1);702OUT_RING(ring, UNK_2C);703704OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);705OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));706707OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);708OUT_RING(ring,709A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));710711/* emit IB to binning drawcmds: */712trace_start_binning_ib(&batch->trace);713fd6_emit_ib(ring, batch->draw);714trace_end_binning_ib(&batch->trace);715716fd_reset_wfi(batch);717718OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);719OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |720CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |721CP_SET_DRAW_STATE__0_GROUP_ID(0));722OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));723OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));724725OUT_PKT7(ring, CP_EVENT_WRITE, 1);726OUT_RING(ring, UNK_2D);727728fd6_cache_inv(batch, ring);729fd6_cache_flush(batch, ring);730fd_wfi(batch, ring);731732OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);733734trace_start_vsc_overflow_test(&batch->trace);735emit_vsc_overflow_test(batch);736trace_end_vsc_overflow_test(&batch->trace);737738OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);739OUT_RING(ring, 0x0);740741OUT_PKT7(ring, CP_SET_MODE, 1);742OUT_RING(ring, 0x0);743744OUT_WFI5(ring);745746OUT_REG(ring,747A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,748.gmem = true,749.unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));750}751752static void753emit_msaa(struct fd_ringbuffer *ring, unsigned nr)754{755enum a3xx_msaa_samples samples = fd_msaa_samples(nr);756757OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);758OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));759OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |760COND(samples == MSAA_ONE,761A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));762763OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);764OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));765OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |766COND(samples == MSAA_ONE,767A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));768769OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);770OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));771OUT_RING(ring,772A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |773COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));774775OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);776OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));777}778779static void prepare_tile_setup_ib(struct fd_batch *batch);780static void prepare_tile_fini_ib(struct fd_batch *batch);781782/* before first tile */783static void784fd6_emit_tile_init(struct fd_batch *batch) assert_dt785{786struct fd_ringbuffer *ring = batch->gmem;787struct pipe_framebuffer_state *pfb = &batch->framebuffer;788const struct fd_gmem_stateobj *gmem = batch->gmem_state;789struct fd_screen *screen = batch->ctx->screen;790791fd6_emit_restore(batch, ring);792793fd6_emit_lrz_flush(ring);794795if (batch->prologue) {796trace_start_prologue(&batch->trace);797fd6_emit_ib(ring, batch->prologue);798trace_end_prologue(&batch->trace);799}800801fd6_cache_inv(batch, ring);802803prepare_tile_setup_ib(batch);804prepare_tile_fini_ib(batch);805806OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);807OUT_RING(ring, 0x0);808809/* blob controls "local" in IB2, but I think that is not required */810OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);811OUT_RING(ring, 0x1);812813fd_wfi(batch, ring);814OUT_REG(ring,815A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,816.gmem = true,817.unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));818819emit_zs(ring, pfb->zsbuf, batch->gmem_state);820emit_mrt(ring, pfb, batch->gmem_state);821emit_msaa(ring, pfb->samples);822patch_fb_read_gmem(batch);823824if (use_hw_binning(batch)) {825/* enable stream-out during binning pass: */826OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));827828set_bin_size(ring, gmem->bin_w, gmem->bin_h,829A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000);830update_render_cntl(batch, pfb, true);831emit_binning_pass(batch);832833/* and disable stream-out for draw pass: */834OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));835836/*837* NOTE: even if we detect VSC overflow and disable use of838* visibility stream in draw pass, it is still safe to execute839* the reset of these cmds:840*/841842// NOTE a618 not setting .USE_VIZ .. from a quick check on a630, it843// does not appear that this bit changes much (ie. it isn't actually844// .USE_VIZ like previous gens)845set_bin_size(ring, gmem->bin_w, gmem->bin_h,846A6XX_RB_BIN_CONTROL_USE_VIZ | 0x6000000);847848OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);849OUT_RING(ring, 0x0);850851OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);852OUT_RING(ring, screen->info->a6xx.magic.PC_UNKNOWN_9805);853854OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);855OUT_RING(ring, screen->info->a6xx.magic.SP_UNKNOWN_A0F8);856857OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);858OUT_RING(ring, 0x1);859} else {860/* no binning pass, so enable stream-out for draw pass:: */861OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));862863set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);864}865866update_render_cntl(batch, pfb, false);867868emit_common_init(batch);869}870871static void872set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)873{874OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);875OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));876877OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);878OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));879880OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);881OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));882883OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);884OUT_RING(ring,885A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));886}887888/* before mem2gmem */889static void890fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)891{892struct fd_context *ctx = batch->ctx;893const struct fd_gmem_stateobj *gmem = batch->gmem_state;894struct fd6_context *fd6_ctx = fd6_context(ctx);895struct fd_ringbuffer *ring = batch->gmem;896897emit_marker6(ring, 7);898OUT_PKT7(ring, CP_SET_MARKER, 1);899OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));900emit_marker6(ring, 7);901902uint32_t x1 = tile->xoff;903uint32_t y1 = tile->yoff;904uint32_t x2 = tile->xoff + tile->bin_w - 1;905uint32_t y2 = tile->yoff + tile->bin_h - 1;906907set_scissor(ring, x1, y1, x2, y2);908909if (use_hw_binning(batch)) {910const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];911912OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);913914OUT_PKT7(ring, CP_SET_MODE, 1);915OUT_RING(ring, 0x0);916917OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);918OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |919CP_SET_BIN_DATA5_0_VSC_N(tile->n));920OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */921(tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);922OUT_RELOC(ring,923fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */924(tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);925OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,926(tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);927928OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);929OUT_RING(ring, 0x0);930931set_window_offset(ring, x1, y1);932933const struct fd_gmem_stateobj *gmem = batch->gmem_state;934set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);935936OUT_PKT7(ring, CP_SET_MODE, 1);937OUT_RING(ring, 0x0);938} else {939set_window_offset(ring, x1, y1);940941OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);942OUT_RING(ring, 0x1);943944OUT_PKT7(ring, CP_SET_MODE, 1);945OUT_RING(ring, 0x0);946}947}948949static void950set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)951{952struct pipe_scissor_state blit_scissor = batch->max_scissor;953954blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);955blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);956blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);957blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);958959OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);960OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |961A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));962OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |963A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));964}965966static void967emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,968struct pipe_surface *psurf, bool stencil)969{970struct fd_resource *rsc = fd_resource(psurf->texture);971enum pipe_format pfmt = psurf->format;972uint32_t offset;973bool ubwc_enabled;974975debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);976977/* separate stencil case: */978if (stencil) {979rsc = rsc->stencil;980pfmt = rsc->b.b.format;981}982983offset =984fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);985ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);986987debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);988989enum a6xx_format format = fd6_pipe2color(pfmt);990uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);991uint32_t size = fd_resource_slice(rsc, psurf->u.tex.level)->size0;992enum a3xx_color_swap swap = fd6_resource_swap(rsc, pfmt);993enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);994uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);995996OUT_REG(ring,997A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,998.color_format = format, .color_swap = swap,999.flags = ubwc_enabled),1000A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),1001A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),1002A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = size));10031004OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));10051006if (ubwc_enabled) {1007OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);1008fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,1009psurf->u.tex.first_layer);1010}10111012fd6_emit_blit(batch, ring);1013}10141015static void1016emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,1017uint32_t base, struct pipe_surface *psurf, unsigned buffer)1018{1019bool stencil = (buffer == FD_BUFFER_STENCIL);10201021OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,1022.depth = (buffer == FD_BUFFER_DEPTH),1023.sample_0 = util_format_is_pure_integer(1024psurf->format)));10251026emit_blit(batch, ring, base, psurf, stencil);1027}10281029static void1030emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)1031{1032struct pipe_framebuffer_state *pfb = &batch->framebuffer;1033const struct fd_gmem_stateobj *gmem = batch->gmem_state;1034enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);10351036uint32_t buffers = batch->fast_cleared;10371038if (buffers & PIPE_CLEAR_COLOR) {10391040for (int i = 0; i < pfb->nr_cbufs; i++) {1041union pipe_color_union *color = &batch->clear_color[i];1042union util_color uc = {0};10431044if (!pfb->cbufs[i])1045continue;10461047if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))1048continue;10491050enum pipe_format pfmt = pfb->cbufs[i]->format;10511052// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??1053union pipe_color_union swapped;1054switch (fd6_pipe2swap(pfmt)) {1055case WZYX:1056swapped.ui[0] = color->ui[0];1057swapped.ui[1] = color->ui[1];1058swapped.ui[2] = color->ui[2];1059swapped.ui[3] = color->ui[3];1060break;1061case WXYZ:1062swapped.ui[2] = color->ui[0];1063swapped.ui[1] = color->ui[1];1064swapped.ui[0] = color->ui[2];1065swapped.ui[3] = color->ui[3];1066break;1067case ZYXW:1068swapped.ui[3] = color->ui[0];1069swapped.ui[0] = color->ui[1];1070swapped.ui[1] = color->ui[2];1071swapped.ui[2] = color->ui[3];1072break;1073case XYZW:1074swapped.ui[3] = color->ui[0];1075swapped.ui[2] = color->ui[1];1076swapped.ui[1] = color->ui[2];1077swapped.ui[0] = color->ui[3];1078break;1079}10801081util_pack_color_union(pfmt, &uc, &swapped);10821083OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);1084OUT_RING(ring,1085A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |1086A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |1087A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));10881089OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);1090OUT_RING(ring,1091A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));10921093OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);1094OUT_RING(ring, gmem->cbuf_base[i]);10951096OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);1097OUT_RING(ring, 0);10981099OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);1100OUT_RING(ring, uc.ui[0]);1101OUT_RING(ring, uc.ui[1]);1102OUT_RING(ring, uc.ui[2]);1103OUT_RING(ring, uc.ui[3]);11041105fd6_emit_blit(batch, ring);1106}1107}11081109const bool has_depth = pfb->zsbuf;1110const bool has_separate_stencil =1111has_depth && fd_resource(pfb->zsbuf->texture)->stencil;11121113/* First clear depth or combined depth/stencil. */1114if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||1115(!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {1116enum pipe_format pfmt = pfb->zsbuf->format;1117uint32_t clear_value;1118uint32_t mask = 0;11191120if (has_separate_stencil) {1121pfmt = util_format_get_depth_only(pfb->zsbuf->format);1122clear_value = util_pack_z(pfmt, batch->clear_depth);1123} else {1124pfmt = pfb->zsbuf->format;1125clear_value =1126util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);1127}11281129if (buffers & PIPE_CLEAR_DEPTH)1130mask |= 0x1;11311132if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))1133mask |= 0x2;11341135OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);1136OUT_RING(ring,1137A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |1138A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |1139A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));11401141OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);1142OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |1143// XXX UNK0 for separate stencil ??1144A6XX_RB_BLIT_INFO_DEPTH |1145A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));11461147OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);1148OUT_RING(ring, gmem->zsbuf_base[0]);11491150OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);1151OUT_RING(ring, 0);11521153OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);1154OUT_RING(ring, clear_value);11551156fd6_emit_blit(batch, ring);1157}11581159/* Then clear the separate stencil buffer in case of 32 bit depth1160* formats with separate stencil. */1161if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {1162OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);1163OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |1164A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |1165A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));11661167OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);1168OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |1169// A6XX_RB_BLIT_INFO_UNK0 |1170A6XX_RB_BLIT_INFO_DEPTH |1171A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));11721173OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);1174OUT_RING(ring, gmem->zsbuf_base[1]);11751176OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);1177OUT_RING(ring, 0);11781179OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);1180OUT_RING(ring, batch->clear_stencil & 0xff);11811182fd6_emit_blit(batch, ring);1183}1184}11851186/*1187* transfer from system memory to gmem1188*/1189static void1190emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)1191{1192const struct fd_gmem_stateobj *gmem = batch->gmem_state;1193struct pipe_framebuffer_state *pfb = &batch->framebuffer;11941195if (batch->restore & FD_BUFFER_COLOR) {1196unsigned i;1197for (i = 0; i < pfb->nr_cbufs; i++) {1198if (!pfb->cbufs[i])1199continue;1200if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))1201continue;1202emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],1203FD_BUFFER_COLOR);1204}1205}12061207if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {1208struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);12091210if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {1211emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,1212FD_BUFFER_DEPTH);1213}1214if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {1215emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,1216FD_BUFFER_STENCIL);1217}1218}1219}12201221static void1222prepare_tile_setup_ib(struct fd_batch *batch)1223{1224if (!(batch->restore || batch->fast_cleared))1225return;12261227batch->tile_setup =1228fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);12291230set_blit_scissor(batch, batch->tile_setup);12311232emit_restore_blits(batch, batch->tile_setup);1233emit_clears(batch, batch->tile_setup);1234}12351236/*1237* transfer from system memory to gmem1238*/1239static void1240fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)1241{1242}12431244/* before IB to rendering cmds: */1245static void1246fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)1247{1248if (!batch->tile_setup)1249return;12501251trace_start_clear_restore(&batch->trace, batch->fast_cleared);1252if (batch->fast_cleared || !use_hw_binning(batch)) {1253fd6_emit_ib(batch->gmem, batch->tile_setup);1254} else {1255emit_conditional_ib(batch, tile, batch->tile_setup);1256}1257trace_end_clear_restore(&batch->trace);1258}12591260static bool1261blit_can_resolve(enum pipe_format format)1262{1263const struct util_format_description *desc = util_format_description(format);12641265/* blit event can only do resolve for simple cases:1266* averaging samples as unsigned integers or choosing only one sample1267*/1268if (util_format_is_snorm(format) || util_format_is_srgb(format))1269return false;12701271/* can't do formats with larger channel sizes1272* note: this includes all float formats1273* note2: single channel integer formats seem OK1274*/1275if (desc->channel[0].size > 10)1276return false;12771278switch (format) {1279/* for unknown reasons blit event can't msaa resolve these formats when tiled1280* likely related to these formats having different layout from other cpp=21281* formats1282*/1283case PIPE_FORMAT_R8G8_UNORM:1284case PIPE_FORMAT_R8G8_UINT:1285case PIPE_FORMAT_R8G8_SINT:1286/* TODO: this one should be able to work? */1287case PIPE_FORMAT_Z24_UNORM_S8_UINT:1288return false;1289default:1290break;1291}12921293return true;1294}12951296static bool1297needs_resolve(struct pipe_surface *psurf)1298{1299return psurf->nr_samples &&1300(psurf->nr_samples != psurf->texture->nr_samples);1301}13021303static void1304emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,1305uint32_t base, struct pipe_surface *psurf,1306unsigned buffer) assert_dt1307{1308uint32_t info = 0;1309bool stencil = false;13101311if (!fd_resource(psurf->texture)->valid)1312return;13131314/* if we need to resolve, but cannot with BLIT event, we instead need1315* to generate per-tile CP_BLIT (r2d) commands:1316*1317* The separate-stencil is a special case, we might need to use CP_BLIT1318* for depth, but we can still resolve stencil with a BLIT event1319*/1320if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&1321(buffer != FD_BUFFER_STENCIL)) {1322fd6_resolve_tile(batch, ring, base, psurf);1323return;1324}13251326switch (buffer) {1327case FD_BUFFER_COLOR:1328break;1329case FD_BUFFER_STENCIL:1330info |= A6XX_RB_BLIT_INFO_UNK0;1331stencil = true;1332break;1333case FD_BUFFER_DEPTH:1334info |= A6XX_RB_BLIT_INFO_DEPTH;1335break;1336}13371338if (util_format_is_pure_integer(psurf->format) ||1339util_format_is_depth_or_stencil(psurf->format))1340info |= A6XX_RB_BLIT_INFO_SAMPLE_0;13411342OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);1343OUT_RING(ring, info);13441345emit_blit(batch, ring, base, psurf, stencil);1346}13471348/*1349* transfer from gmem to system memory (ie. normal RAM)1350*/13511352static void1353prepare_tile_fini_ib(struct fd_batch *batch) assert_dt1354{1355const struct fd_gmem_stateobj *gmem = batch->gmem_state;1356struct pipe_framebuffer_state *pfb = &batch->framebuffer;1357struct fd_ringbuffer *ring;13581359batch->tile_fini =1360fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);1361ring = batch->tile_fini;13621363set_blit_scissor(batch, ring);13641365if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {1366struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);13671368if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {1369emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,1370FD_BUFFER_DEPTH);1371}1372if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {1373emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,1374FD_BUFFER_STENCIL);1375}1376}13771378if (batch->resolve & FD_BUFFER_COLOR) {1379unsigned i;1380for (i = 0; i < pfb->nr_cbufs; i++) {1381if (!pfb->cbufs[i])1382continue;1383if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))1384continue;1385emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],1386FD_BUFFER_COLOR);1387}1388}1389}13901391static void1392fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)1393{1394if (!use_hw_binning(batch)) {1395fd6_emit_ib(batch->gmem, batch->draw);1396} else {1397emit_conditional_ib(batch, tile, batch->draw);1398}13991400if (batch->epilogue)1401fd6_emit_ib(batch->gmem, batch->epilogue);1402}14031404static void1405fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)1406{1407struct fd_ringbuffer *ring = batch->gmem;14081409if (use_hw_binning(batch)) {1410OUT_PKT7(ring, CP_SET_MARKER, 1);1411OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));1412}14131414OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);1415OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |1416CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |1417CP_SET_DRAW_STATE__0_GROUP_ID(0));1418OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));1419OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));14201421OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);1422OUT_RING(ring, 0x0);14231424emit_marker6(ring, 7);1425OUT_PKT7(ring, CP_SET_MARKER, 1);1426OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));1427emit_marker6(ring, 7);14281429trace_start_resolve(&batch->trace);1430if (batch->fast_cleared || !use_hw_binning(batch)) {1431fd6_emit_ib(batch->gmem, batch->tile_fini);1432} else {1433emit_conditional_ib(batch, tile, batch->tile_fini);1434}1435trace_end_resolve(&batch->trace);1436}14371438static void1439fd6_emit_tile_fini(struct fd_batch *batch)1440{1441struct fd_ringbuffer *ring = batch->gmem;14421443emit_common_fini(batch);14441445OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);1446OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);14471448fd6_emit_lrz_flush(ring);14491450fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);14511452if (use_hw_binning(batch)) {1453check_vsc_overflow(batch->ctx);1454}1455}14561457static void1458emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt1459{1460struct fd_context *ctx = batch->ctx;1461struct pipe_framebuffer_state *pfb = &batch->framebuffer;14621463uint32_t buffers = batch->fast_cleared;14641465if (!buffers)1466return;14671468trace_start_clear_restore(&batch->trace, buffers);14691470if (buffers & PIPE_CLEAR_COLOR) {1471for (int i = 0; i < pfb->nr_cbufs; i++) {1472union pipe_color_union color = batch->clear_color[i];14731474if (!pfb->cbufs[i])1475continue;14761477if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))1478continue;14791480fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,1481&color);1482}1483}1484if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {1485union pipe_color_union value = {};14861487const bool has_depth = pfb->zsbuf;1488struct pipe_resource *separate_stencil =1489has_depth && fd_resource(pfb->zsbuf->texture)->stencil1490? &fd_resource(pfb->zsbuf->texture)->stencil->b.b1491: NULL;14921493if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||1494(!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {1495value.f[0] = batch->clear_depth;1496value.ui[1] = batch->clear_stencil;1497fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,1498&value);1499}15001501if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {1502value.ui[0] = batch->clear_stencil;15031504struct pipe_surface stencil_surf = *pfb->zsbuf;1505stencil_surf.format = PIPE_FORMAT_S8_UINT;1506stencil_surf.texture = separate_stencil;15071508fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,1509&value);1510}1511}15121513fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);15141515trace_end_clear_restore(&batch->trace);1516}15171518static void1519setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)1520{1521struct fd_context *ctx = batch->ctx;15221523batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,15240, "tessfactor");15251526batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,15270, "tessparam");15281529OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);1530OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);15311532batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;1533OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);1534OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);1535}15361537static void1538fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt1539{1540struct fd_ringbuffer *ring = batch->gmem;1541struct fd_screen *screen = batch->ctx->screen;15421543fd6_emit_restore(batch, ring);1544fd6_emit_lrz_flush(ring);15451546if (batch->prologue) {1547if (!batch->nondraw) {1548trace_start_prologue(&batch->trace);1549}1550fd6_emit_ib(ring, batch->prologue);1551if (!batch->nondraw) {1552trace_end_prologue(&batch->trace);1553}1554}15551556/* remaining setup below here does not apply to blit/compute: */1557if (batch->nondraw)1558return;15591560struct pipe_framebuffer_state *pfb = &batch->framebuffer;15611562if (pfb->width > 0 && pfb->height > 0)1563set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);1564else1565set_scissor(ring, 0, 0, 0, 0);15661567set_window_offset(ring, 0, 0);15681569set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */15701571emit_sysmem_clears(batch, ring);15721573emit_marker6(ring, 7);1574OUT_PKT7(ring, CP_SET_MARKER, 1);1575OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));1576emit_marker6(ring, 7);15771578if (batch->tessellation)1579setup_tess_buffers(batch, ring);15801581OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);1582OUT_RING(ring, 0x0);15831584/* blob controls "local" in IB2, but I think that is not required */1585OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);1586OUT_RING(ring, 0x1);15871588fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);1589fd6_cache_inv(batch, ring);15901591fd_wfi(batch, ring);1592OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));15931594/* enable stream-out, with sysmem there is only one pass: */1595OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));15961597OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);1598OUT_RING(ring, 0x1);15991600emit_zs(ring, pfb->zsbuf, NULL);1601emit_mrt(ring, pfb, NULL);1602emit_msaa(ring, pfb->samples);1603patch_fb_read_sysmem(batch);16041605update_render_cntl(batch, pfb, false);16061607emit_common_init(batch);1608}16091610static void1611fd6_emit_sysmem_fini(struct fd_batch *batch)1612{1613struct fd_ringbuffer *ring = batch->gmem;16141615emit_common_fini(batch);16161617if (batch->epilogue)1618fd6_emit_ib(batch->gmem, batch->epilogue);16191620OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);1621OUT_RING(ring, 0x0);16221623fd6_emit_lrz_flush(ring);16241625fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);1626fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);1627}16281629void1630fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis1631{1632struct fd_context *ctx = fd_context(pctx);16331634ctx->emit_tile_init = fd6_emit_tile_init;1635ctx->emit_tile_prep = fd6_emit_tile_prep;1636ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;1637ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;1638ctx->emit_tile = fd6_emit_tile;1639ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;1640ctx->emit_tile_fini = fd6_emit_tile_fini;1641ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;1642ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;1643}164416451646