Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a5xx/fd5_emit.h
4574 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#ifndef FD5_EMIT_H27#define FD5_EMIT_H2829#include "pipe/p_context.h"3031#include "fd5_context.h"32#include "fd5_format.h"33#include "fd5_program.h"34#include "fd5_screen.h"35#include "freedreno_batch.h"36#include "freedreno_context.h"37#include "ir3_gallium.h"3839struct fd_ringbuffer;4041/* grouped together emit-state for prog/vertex/state emit: */42struct fd5_emit {43struct pipe_debug_callback *debug;44const struct fd_vertex_state *vtx;45const struct fd5_program_state *prog;46const struct pipe_draw_info *info;47unsigned drawid_offset;48const struct pipe_draw_indirect_info *indirect;49const struct pipe_draw_start_count_bias *draw;50bool binning_pass;51struct ir3_cache_key key;52enum fd_dirty_3d_state dirty;5354uint32_t sprite_coord_enable; /* bitmask */55bool sprite_coord_mode;56bool rasterflat;5758/* in binning pass, we don't have real frag shader, so we59* don't know if real draw disqualifies lrz write. So just60* figure that out up-front and stash it in the emit.61*/62bool no_lrz_write;6364/* cached to avoid repeated lookups of same variants: */65const struct ir3_shader_variant *vs, *fs;66/* TODO: other shader stages.. */6768unsigned streamout_mask;69};7071static inline enum a5xx_color_fmt72fd5_emit_format(struct pipe_surface *surf)73{74if (!surf)75return 0;76return fd5_pipe2color(surf->format);77}7879static inline const struct ir3_shader_variant *80fd5_emit_get_vp(struct fd5_emit *emit)81{82if (!emit->vs) {83/* We use nonbinning VS during binning when TFB is enabled because that84* is what has all the outputs that might be involved in TFB.85*/86if (emit->binning_pass &&87!emit->prog->vs->shader->stream_output.num_outputs)88emit->vs = emit->prog->bs;89else90emit->vs = emit->prog->vs;91}92return emit->vs;93}9495static inline const struct ir3_shader_variant *96fd5_emit_get_fp(struct fd5_emit *emit)97{98if (!emit->fs) {99if (emit->binning_pass) {100/* use dummy stateobj to simplify binning vs non-binning: */101static const struct ir3_shader_variant binning_fs = {};102emit->fs = &binning_fs;103} else {104emit->fs = emit->prog->fs;105}106}107return emit->fs;108}109110static inline void111fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt112{113fd_reset_wfi(batch);114OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);115OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */116OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */117OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */118OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */119OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */120fd_wfi(batch, ring);121}122123static inline void124fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,125enum render_mode_cmd mode)126{127/* TODO add preemption support, gmem bypass, etc */128emit_marker5(ring, 7);129OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);130OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));131OUT_RING(ring, 0x00000000); /* ADDR_LO */132OUT_RING(ring, 0x00000000); /* ADDR_HI */133OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |134COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));135OUT_RING(ring, 0x00000000);136emit_marker5(ring, 7);137}138139static inline void140fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,141enum vgt_event_type evt, bool timestamp)142{143OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);144OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));145if (timestamp) {146OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,1470); /* ADDR_LO/HI */148OUT_RING(ring, 0x00000000);149}150}151152static inline void153fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)154{155emit_marker5(ring, 7);156fd5_event_write(batch, ring, BLIT, true);157emit_marker5(ring, 7);158}159160static inline void161fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt162{163struct fd_ringbuffer *ring =164binning ? ctx->batch->binning : ctx->batch->draw;165166/* TODO eventually this partially depends on the pfb state, ie.167* which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part168* we could probably cache and just regenerate if framebuffer169* state is dirty (or something like that)..170*171* Other bits seem to depend on query state, like if samples-passed172* query is active.173*/174bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);175OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);176OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */177COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |178COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |179COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |180COND(!blit, 0x8));181182OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);183OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */184COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |185COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));186}187188static inline void189fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)190{191/* TODO I think the extra writes to GRAS_LRZ_CNTL are probably192* a workaround and not needed on all a5xx.193*/194OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);195OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);196197fd5_event_write(batch, ring, LRZ_FLUSH, false);198199OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);200OUT_RING(ring, 0x0);201}202203void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,204struct fd5_emit *emit) assert_dt;205206void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,207struct fd5_emit *emit) assert_dt;208209void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,210struct ir3_shader_variant *cp) assert_dt;211void fd5_emit_cs_consts(const struct ir3_shader_variant *v,212struct fd_ringbuffer *ring, struct fd_context *ctx,213const struct pipe_grid_info *info) assert_dt;214215void fd5_emit_restore(struct fd_batch *batch,216struct fd_ringbuffer *ring) assert_dt;217218void fd5_emit_init_screen(struct pipe_screen *pscreen);219void fd5_emit_init(struct pipe_context *pctx);220221static inline void222fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)223{224/* for debug after a lock up, write a unique counter value225* to scratch6 for each IB, to make it easier to match up226* register dumps to cmdstream. The combination of IB and227* DRAW (scratch7) is enough to "triangulate" the particular228* draw that caused lockup.229*/230emit_marker5(ring, 6);231__OUT_IB5(ring, target);232emit_marker5(ring, 6);233}234235#endif /* FD5_EMIT_H */236237238