Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
4574 views
/*1* Copyright (C) 2012-2013 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "pipe/p_state.h"27#include "util/u_memory.h"28#include "util/u_prim.h"29#include "util/u_string.h"3031#include "freedreno_resource.h"32#include "freedreno_state.h"3334#include "fd2_context.h"35#include "fd2_draw.h"36#include "fd2_emit.h"37#include "fd2_program.h"38#include "fd2_util.h"39#include "fd2_zsa.h"4041static void42emit_cacheflush(struct fd_ringbuffer *ring)43{44unsigned i;4546for (i = 0; i < 12; i++) {47OUT_PKT3(ring, CP_EVENT_WRITE, 1);48OUT_RING(ring, CACHE_FLUSH);49}50}5152static void53emit_vertexbufs(struct fd_context *ctx) assert_dt54{55struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;56struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;57struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];58unsigned i;5960if (!vtx->num_elements)61return;6263for (i = 0; i < vtx->num_elements; i++) {64struct pipe_vertex_element *elem = &vtx->pipe[i];65struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];66bufs[i].offset = vb->buffer_offset;67bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);68bufs[i].prsc = vb->buffer.resource;69}7071// NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the72// CONST(20,0) (or CONST(26,0) in soliv_vp)7374fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);75fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);76}7778static void79draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,80const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,81unsigned index_offset, bool binning) assert_dt82{83OUT_PKT3(ring, CP_SET_CONSTANT, 2);84OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));85OUT_RING(ring, info->index_size ? 0 : draw->start);8687OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);88OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);8990if (is_a20x(ctx->screen)) {91/* wait for DMA to finish and92* dummy draw one triangle with indexes 0,0,0.93* with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.94*95* this workaround is for a HW bug related to DMA alignment:96* it is necessary for indexed draws and possibly also97* draws that read binning data98*/99OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);100OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */101OUT_RING(ring, 0x00000000);102OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */103OUT_RING(ring, 0x00000001);104105OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);106OUT_RING(ring, 0x00000000);107OUT_RING(ring, 0x0003c004);108OUT_RING(ring, 0x00000000);109OUT_RING(ring, 0x00000003);110OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,1110);112OUT_RING(ring, 0x00000006);113} else {114OUT_WFI(ring);115116OUT_PKT3(ring, CP_SET_CONSTANT, 3);117OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));118OUT_RING(ring, info->index_bounds_valid ? info->max_index119: ~0); /* VGT_MAX_VTX_INDX */120OUT_RING(ring, info->index_bounds_valid ? info->min_index121: 0); /* VGT_MIN_VTX_INDX */122}123124/* binning shader will take offset from C64 */125if (binning && is_a20x(ctx->screen)) {126OUT_PKT3(ring, CP_SET_CONSTANT, 5);127OUT_RING(ring, 0x00000180);128OUT_RING(ring, fui(ctx->batch->num_vertices));129OUT_RING(ring, fui(0.0f));130OUT_RING(ring, fui(0.0f));131OUT_RING(ring, fui(0.0f));132}133134enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;135if (binning || info->mode == PIPE_PRIM_POINTS)136vismode = IGNORE_VISIBILITY;137138fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode], vismode, info,139draw, index_offset);140141if (is_a20x(ctx->screen)) {142/* not sure why this is required, but it fixes some hangs */143OUT_WFI(ring);144} else {145OUT_PKT3(ring, CP_SET_CONSTANT, 2);146OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));147OUT_RING(ring, 0x00000000);148}149150emit_cacheflush(ring);151}152153static bool154fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,155unsigned drawid_offset,156const struct pipe_draw_indirect_info *indirect,157const struct pipe_draw_start_count_bias *pdraw,158unsigned index_offset) assert_dt159{160if (!ctx->prog.fs || !ctx->prog.vs)161return false;162163if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart &&164!u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))165return false;166167if (ctx->dirty & FD_DIRTY_VTXBUF)168emit_vertexbufs(ctx);169170if (fd_binning_enabled)171fd2_emit_state_binning(ctx, ctx->dirty);172173fd2_emit_state(ctx, ctx->dirty);174175/* a2xx can draw only 65535 vertices at once176* on a22x the field in the draw command is 32bits but seems limited too177* using a limit of 32k because it fixes an unexplained hang178* 32766 works for all primitives (multiple of 2 and 3)179*/180if (pdraw->count > 32766) {181/* clang-format off */182static const uint16_t step_tbl[PIPE_PRIM_MAX] = {183[0 ... PIPE_PRIM_MAX - 1] = 32766,184[PIPE_PRIM_LINE_STRIP] = 32765,185[PIPE_PRIM_TRIANGLE_STRIP] = 32764,186187/* needs more work */188[PIPE_PRIM_TRIANGLE_FAN] = 0,189[PIPE_PRIM_LINE_LOOP] = 0,190};191/* clang-format on */192193struct pipe_draw_start_count_bias draw = *pdraw;194unsigned count = draw.count;195unsigned step = step_tbl[pinfo->mode];196unsigned num_vertices = ctx->batch->num_vertices;197198if (!step)199return false;200201for (; count + step > 32766; count -= step) {202draw.count = MIN2(count, 32766);203draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);204draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);205draw.start += step;206ctx->batch->num_vertices += step;207}208/* changing this value is a hack, restore it */209ctx->batch->num_vertices = num_vertices;210} else {211draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);212draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);213}214215fd_context_all_clean(ctx);216217return true;218}219220static void221clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,222unsigned buffers, bool fast_clear) assert_dt223{224struct fd_context *ctx = batch->ctx;225struct fd2_context *fd2_ctx = fd2_context(ctx);226uint32_t reg;227228fd2_emit_vertex_bufs(ring, 0x9c,229(struct fd2_vertex_buf[]){230{.prsc = fd2_ctx->solid_vertexbuf, .size = 36},231},2321);233234OUT_PKT3(ring, CP_SET_CONSTANT, 2);235OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));236OUT_RING(ring, 0);237238fd2_program_emit(ctx, ring, &ctx->solid_prog);239240OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);241OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);242243if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {244OUT_PKT3(ring, CP_SET_CONSTANT, 2);245OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));246reg = 0;247if (buffers & PIPE_CLEAR_DEPTH) {248reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |249A2XX_RB_DEPTHCONTROL_Z_ENABLE |250A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |251A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;252}253if (buffers & PIPE_CLEAR_STENCIL) {254reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |255A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |256A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);257}258OUT_RING(ring, reg);259}260261OUT_PKT3(ring, CP_SET_CONSTANT, 2);262OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));263OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |264A2XX_RB_COLORCONTROL_BLEND_DISABLE |265A2XX_RB_COLORCONTROL_ROP_CODE(12) |266A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |267A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));268269OUT_PKT3(ring, CP_SET_CONSTANT, 3);270OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));271OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */272OUT_RING(273ring,274A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */275A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |276A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |277(fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));278279if (fast_clear) {280OUT_PKT3(ring, CP_SET_CONSTANT, 2);281OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));282OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));283}284285OUT_PKT3(ring, CP_SET_CONSTANT, 2);286OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));287OUT_RING(ring, 0x0000ffff);288289OUT_PKT3(ring, CP_SET_CONSTANT, 2);290OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));291if (buffers & PIPE_CLEAR_COLOR) {292OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |293A2XX_RB_COLOR_MASK_WRITE_GREEN |294A2XX_RB_COLOR_MASK_WRITE_BLUE |295A2XX_RB_COLOR_MASK_WRITE_ALPHA);296} else {297OUT_RING(ring, 0x0);298}299300OUT_PKT3(ring, CP_SET_CONSTANT, 2);301OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));302OUT_RING(ring, 0);303304if (is_a20x(batch->ctx->screen))305return;306307OUT_PKT3(ring, CP_SET_CONSTANT, 3);308OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));309OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */310OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */311312OUT_PKT3(ring, CP_SET_CONSTANT, 3);313OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));314OUT_RING(ring,3150xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));316OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));317318OUT_PKT3(ring, CP_SET_CONSTANT, 2);319OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));320OUT_RING(ring, 0x00000084);321322OUT_PKT3(ring, CP_SET_CONSTANT, 2);323OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));324OUT_RING(ring, 0x0000028f);325}326327static void328clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)329{330if (is_a20x(ctx->screen))331return;332333OUT_PKT3(ring, CP_SET_CONSTANT, 2);334OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));335OUT_RING(ring, 0x00000000);336337OUT_PKT3(ring, CP_SET_CONSTANT, 2);338OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));339OUT_RING(ring, 0x00000000);340341OUT_PKT3(ring, CP_SET_CONSTANT, 2);342OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));343OUT_RING(ring, 0x0000003b);344}345346static void347clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,348uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)349{350BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */351352/* zero values are patched in */353OUT_PKT3(ring, CP_SET_CONSTANT, 2);354OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));355OUT_RINGP(ring, patch_type, &batch->gmem_patches);356357OUT_PKT3(ring, CP_SET_CONSTANT, 4);358OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));359OUT_RING(ring, 0x8000 | 32);360OUT_RING(ring, 0);361OUT_RING(ring, 0);362363/* set fill values */364if (!is_a20x(batch->ctx->screen)) {365OUT_PKT3(ring, CP_SET_CONSTANT, 2);366OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));367OUT_RING(ring, color_clear);368369OUT_PKT3(ring, CP_SET_CONSTANT, 2);370OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));371OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |372A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));373374OUT_PKT3(ring, CP_SET_CONSTANT, 2);375OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));376OUT_RING(ring, depth_clear);377} else {378const float sc = 1.0f / 255.0f;379380OUT_PKT3(ring, CP_SET_CONSTANT, 5);381OUT_RING(ring, 0x00000480);382OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));383OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));384OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));385OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));386387// XXX if using float the rounding error breaks it..388float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);389assert((unsigned)(((double)depth * (double)0xffffff)) ==390(depth_clear >> 8));391392OUT_PKT3(ring, CP_SET_CONSTANT, 3);393OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));394OUT_RING(ring, fui(0.0f));395OUT_RING(ring, fui(depth));396397OUT_PKT3(ring, CP_SET_CONSTANT, 3);398OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));399OUT_RING(ring,4000xff000000 |401A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |402A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));403OUT_RING(ring, 0xff000000 |404A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |405A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));406}407408fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,409DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);410}411412static bool413fd2_clear_fast(struct fd_context *ctx, unsigned buffers,414const union pipe_color_union *color, double depth,415unsigned stencil) assert_dt416{417/* using 4x MSAA allows clearing ~2x faster418* then we can use higher bpp clearing to clear lower bpp419* 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)420* note: its possible to clear with 32_32_32_32 format but its not faster421* note: fast clear doesn't work with sysmem rendering422* (sysmem rendering is disabled when clear is used)423*424* we only have 16-bit / 32-bit color formats425* and 16-bit / 32-bit depth formats426* so there are only a few possible combinations427*428* if the bpp of the color/depth doesn't match429* we clear with depth/color individually430*/431struct fd2_context *fd2_ctx = fd2_context(ctx);432struct fd_batch *batch = ctx->batch;433struct fd_ringbuffer *ring = batch->draw;434struct pipe_framebuffer_state *pfb = &batch->framebuffer;435uint32_t color_clear = 0, depth_clear = 0;436enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);437int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */438int color_size = -1;439440/* TODO: need to test performance on a22x */441if (!is_a20x(ctx->screen))442return false;443444if (buffers & PIPE_CLEAR_COLOR)445color_size = util_format_get_blocksizebits(format) == 32;446447if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {448/* no fast clear when clearing only one component of depth+stencil buffer */449if (!(buffers & PIPE_CLEAR_DEPTH))450return false;451452if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||453pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&454!(buffers & PIPE_CLEAR_STENCIL))455return false;456457depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;458}459460assert(color_size >= 0 || depth_size >= 0);461462if (color_size == 0) {463color_clear = pack_rgba(format, color->f);464color_clear = (color_clear << 16) | (color_clear & 0xffff);465} else if (color_size == 1) {466color_clear = pack_rgba(format, color->f);467}468469if (depth_size == 0) {470depth_clear = (uint32_t)(0xffff * depth);471depth_clear |= depth_clear << 16;472} else if (depth_size == 1) {473depth_clear = (((uint32_t)(0xffffff * depth)) << 8);474depth_clear |= (stencil & 0xff);475}476477/* disable "window" scissor.. */478OUT_PKT3(ring, CP_SET_CONSTANT, 3);479OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));480OUT_RING(ring, xy2d(0, 0));481OUT_RING(ring, xy2d(0x7fff, 0x7fff));482483/* make sure we fill all "pixels" (in SCREEN_SCISSOR) */484OUT_PKT3(ring, CP_SET_CONSTANT, 5);485OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));486OUT_RING(ring, fui(4096.0));487OUT_RING(ring, fui(4096.0));488OUT_RING(ring, fui(4096.0));489OUT_RING(ring, fui(4096.0));490491clear_state(batch, ring, ~0u, true);492493if (color_size >= 0 && depth_size != color_size)494clear_fast(batch, ring, color_clear, color_clear,495GMEM_PATCH_FASTCLEAR_COLOR);496497if (depth_size >= 0 && depth_size != color_size)498clear_fast(batch, ring, depth_clear, depth_clear,499GMEM_PATCH_FASTCLEAR_DEPTH);500501if (depth_size == color_size)502clear_fast(batch, ring, color_clear, depth_clear,503GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);504505clear_state_restore(ctx, ring);506507OUT_PKT3(ring, CP_SET_CONSTANT, 2);508OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));509OUT_RING(ring, 0);510511/* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.512* MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT513* the value is read from byte offset 60 in the given bo514*/515OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);516OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);517OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));518OUT_RING(ring, 1);519520OUT_PKT3(ring, CP_SET_CONSTANT, 4);521OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));522OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);523OUT_RING(ring, 0);524OUT_RING(ring, 0);525return true;526}527528static bool529fd2_clear(struct fd_context *ctx, unsigned buffers,530const union pipe_color_union *color, double depth,531unsigned stencil) assert_dt532{533struct fd_ringbuffer *ring = ctx->batch->draw;534struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;535536if (fd2_clear_fast(ctx, buffers, color, depth, stencil))537goto dirty;538539/* set clear value */540if (is_a20x(ctx->screen)) {541if (buffers & PIPE_CLEAR_COLOR) {542/* C0 used by fragment shader */543OUT_PKT3(ring, CP_SET_CONSTANT, 5);544OUT_RING(ring, 0x00000480);545OUT_RING(ring, color->ui[0]);546OUT_RING(ring, color->ui[1]);547OUT_RING(ring, color->ui[2]);548OUT_RING(ring, color->ui[3]);549}550551if (buffers & PIPE_CLEAR_DEPTH) {552/* use viewport to set depth value */553OUT_PKT3(ring, CP_SET_CONSTANT, 3);554OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));555OUT_RING(ring, fui(0.0f));556OUT_RING(ring, fui(depth));557}558559if (buffers & PIPE_CLEAR_STENCIL) {560OUT_PKT3(ring, CP_SET_CONSTANT, 3);561OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));562OUT_RING(ring, 0xff000000 |563A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |564A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));565OUT_RING(ring, 0xff000000 |566A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |567A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));568}569} else {570if (buffers & PIPE_CLEAR_COLOR) {571OUT_PKT3(ring, CP_SET_CONSTANT, 2);572OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));573OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));574}575576if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {577uint32_t clear_mask, depth_clear;578switch (fd_pipe2depth(fb->zsbuf->format)) {579case DEPTHX_24_8:580clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) |581((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0);582depth_clear =583(((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);584break;585case DEPTHX_16:586clear_mask = 0xf;587depth_clear = (uint32_t)(0xffffffff * depth);588break;589default:590unreachable("invalid depth");591break;592}593594OUT_PKT3(ring, CP_SET_CONSTANT, 2);595OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));596OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |597A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));598599OUT_PKT3(ring, CP_SET_CONSTANT, 2);600OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));601OUT_RING(ring, depth_clear);602}603}604605/* scissor state */606OUT_PKT3(ring, CP_SET_CONSTANT, 3);607OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));608OUT_RING(ring, xy2d(0, 0));609OUT_RING(ring, xy2d(fb->width, fb->height));610611/* viewport state */612OUT_PKT3(ring, CP_SET_CONSTANT, 5);613OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));614OUT_RING(ring, fui((float)fb->width / 2.0));615OUT_RING(ring, fui((float)fb->width / 2.0));616OUT_RING(ring, fui((float)fb->height / 2.0));617OUT_RING(ring, fui((float)fb->height / 2.0));618619/* common state */620clear_state(ctx->batch, ring, buffers, false);621622fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,623DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);624625clear_state_restore(ctx, ring);626627dirty:628ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |629FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |630FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;631632ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;633ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=634FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;635636return true;637}638639void640fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis641{642struct fd_context *ctx = fd_context(pctx);643ctx->draw_vbo = fd2_draw_vbo;644ctx->clear = fd2_clear;645}646647648