Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_batch.h
4570 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#ifndef FREEDRENO_BATCH_H_27#define FREEDRENO_BATCH_H_2829#include "util/list.h"30#include "util/simple_mtx.h"31#include "util/u_inlines.h"32#include "util/u_queue.h"33#include "util/u_trace.h"3435#include "freedreno_context.h"36#include "freedreno_fence.h"37#include "freedreno_util.h"3839#ifdef __cplusplus40extern "C" {41#endif4243struct fd_resource;44struct fd_batch_key;45struct fd_batch_result;4647/* A batch tracks everything about a cmdstream batch/submit, including the48* ringbuffers used for binning, draw, and gmem cmds, list of associated49* fd_resource-s, etc.50*/51struct fd_batch {52struct pipe_reference reference;53unsigned seqno;54unsigned idx; /* index into cache->batches[] */5556struct u_trace trace;5758/* To detect cases where we can skip cmdstream to record timestamp: */59uint32_t *last_timestamp_cmd;6061int in_fence_fd;62struct pipe_fence_handle *fence;6364struct fd_context *ctx;6566/* emit_lock serializes cmdstream emission and flush. Acquire before67* screen->lock.68*/69simple_mtx_t submit_lock;7071/* do we need to mem2gmem before rendering. We don't, if for example,72* there was a glClear() that invalidated the entire previous buffer73* contents. Keep track of which buffer(s) are cleared, or needs74* restore. Masks of PIPE_CLEAR_*75*76* The 'cleared' bits will be set for buffers which are *entirely*77* cleared, and 'partial_cleared' bits will be set if you must78* check cleared_scissor.79*80* The 'invalidated' bits are set for cleared buffers, and buffers81* where the contents are undefined, ie. what we don't need to restore82* to gmem.83*/84enum {85/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */86FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,87FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,88FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,89FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,90} invalidated, cleared, fast_cleared, restore, resolve;9192/* is this a non-draw batch (ie compute/blit which has no pfb state)? */93bool nondraw : 1;94bool needs_flush : 1;95bool flushed : 1;96bool tessellation : 1; /* tessellation used in batch */9798/* Keep track if WAIT_FOR_IDLE is needed for registers we need99* to update via RMW:100*/101bool needs_wfi : 1;102103/* To decide whether to render to system memory, keep track of the104* number of draws, and whether any of them require multisample,105* depth_test (or depth write), stencil_test, blending, and106* color_logic_Op (since those functions are disabled when by-107* passing GMEM.108*/109enum fd_gmem_reason gmem_reason;110111/* At submit time, once we've decided that this batch will use GMEM112* rendering, the appropriate gmem state is looked up:113*/114const struct fd_gmem_stateobj *gmem_state;115116/* A calculated "draw cost" value for the batch, which tries to117* estimate the bandwidth-per-sample of all the draws according118* to:119*120* foreach_draw (...) {121* cost += num_mrt;122* if (blend_enabled)123* cost += num_mrt;124* if (depth_test_enabled)125* cost++;126* if (depth_write_enabled)127* cost++;128* }129*130* The idea is that each sample-passed minimally does one write131* per MRT. If blend is enabled, the hw will additionally do132* a framebuffer read per sample-passed (for each MRT with blend133* enabled). If depth-test is enabled, the hw will additionally134* a depth buffer read. If depth-write is enable, the hw will135* additionally do a depth buffer write.136*137* This does ignore depth buffer traffic for samples which do not138* pass do to depth-test fail, and some other details. But it is139* just intended to be a rough estimate that is easy to calculate.140*/141unsigned cost;142143/* Tells the gen specific backend where to write stats used for144* the autotune module.145*146* Pointer only valid during gmem emit code.147*/148struct fd_batch_result *autotune_result;149150unsigned num_draws; /* number of draws in current batch */151unsigned num_vertices; /* number of vertices in current batch */152153/* Currently only used on a6xx, to calculate vsc prim/draw stream154* sizes:155*/156unsigned num_bins_per_pipe;157unsigned prim_strm_bits;158unsigned draw_strm_bits;159160/* Track the maximal bounds of the scissor of all the draws within a161* batch. Used at the tile rendering step (fd_gmem_render_tiles(),162* mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.163*/164struct pipe_scissor_state max_scissor;165166/* Keep track of DRAW initiators that need to be patched up depending167* on whether we using binning or not:168*/169struct util_dynarray draw_patches;170171/* texture state that needs patching for fb_read: */172struct util_dynarray fb_read_patches;173174/* Keep track of writes to RB_RENDER_CONTROL which need to be patched175* once we know whether or not to use GMEM, and GMEM tile pitch.176*177* (only for a3xx.. but having gen specific subclasses of fd_batch178* seemed overkill for now)179*/180struct util_dynarray rbrc_patches;181182/* Keep track of GMEM related values that need to be patched up once we183* know the gmem layout:184*/185struct util_dynarray gmem_patches;186187/* Keep track of pointer to start of MEM exports for a20x binning shaders188*189* this is so the end of the shader can be cut off at the right point190* depending on the GMEM configuration191*/192struct util_dynarray shader_patches;193194struct pipe_framebuffer_state framebuffer;195196struct fd_submit *submit;197198/** draw pass cmdstream: */199struct fd_ringbuffer *draw;200/** binning pass cmdstream: */201struct fd_ringbuffer *binning;202/** tiling/gmem (IB0) cmdstream: */203struct fd_ringbuffer *gmem;204205/** preemble cmdstream (executed once before first tile): */206struct fd_ringbuffer *prologue;207208/** epilogue cmdstream (executed after each tile): */209struct fd_ringbuffer *epilogue;210211struct fd_ringbuffer *tile_setup;212struct fd_ringbuffer *tile_fini;213214union pipe_color_union clear_color[MAX_RENDER_TARGETS];215double clear_depth;216unsigned clear_stencil;217218/**219* hw query related state:220*/221/*@{*/222/* next sample offset.. incremented for each sample in the batch/223* submit, reset to zero on next submit.224*/225uint32_t next_sample_offset;226227/* cached samples (in case multiple queries need to reference228* the same sample snapshot)229*/230struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];231232/* which sample providers were used in the current batch: */233uint32_t query_providers_used;234235/* which sample providers are currently enabled in the batch: */236uint32_t query_providers_active;237238/* list of samples in current batch: */239struct util_dynarray samples;240241/* current query result bo and tile stride: */242struct pipe_resource *query_buf;243uint32_t query_tile_stride;244/*@}*/245246/* Set of resources used by currently-unsubmitted batch (read or247* write).. does not hold a reference to the resource.248*/249struct set *resources;250251/** key in batch-cache (if not null): */252struct fd_batch_key *key;253uint32_t hash;254255/** set of dependent batches.. holds refs to dependent batches: */256uint32_t dependents_mask;257258/* Buffer for tessellation engine input259*/260struct fd_bo *tessfactor_bo;261uint32_t tessfactor_size;262263/* Buffer for passing parameters between TCS and TES264*/265struct fd_bo *tessparam_bo;266uint32_t tessparam_size;267268struct fd_ringbuffer *tess_addrs_constobj;269};270271struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);272273void fd_batch_reset(struct fd_batch *batch) assert_dt;274void fd_batch_flush(struct fd_batch *batch) assert_dt;275void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;276void fd_batch_resource_write(struct fd_batch *batch,277struct fd_resource *rsc) assert_dt;278void fd_batch_resource_read_slowpath(struct fd_batch *batch,279struct fd_resource *rsc) assert_dt;280void fd_batch_check_size(struct fd_batch *batch) assert_dt;281282uint32_t fd_batch_key_hash(const void *_key);283bool fd_batch_key_equals(const void *_a, const void *_b);284struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,285const struct fd_batch_key *key);286287/* not called directly: */288void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;289void __fd_batch_destroy(struct fd_batch *batch);290291/*292* NOTE the rule is, you need to hold the screen->lock when destroying293* a batch.. so either use fd_batch_reference() (which grabs the lock294* for you) if you don't hold the lock, or fd_batch_reference_locked()295* if you do hold the lock.296*297* WARNING the _locked() version can briefly drop the lock. Without298* recursive mutexes, I'm not sure there is much else we can do (since299* __fd_batch_destroy() needs to unref resources)300*301* WARNING you must acquire the screen->lock and use the _locked()302* version in case that the batch being ref'd can disappear under303* you.304*/305306static inline void307fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)308{309struct fd_batch *old_batch = *ptr;310311/* only need lock if a reference is dropped: */312if (old_batch)313fd_screen_assert_locked(old_batch->ctx->screen);314315if (pipe_reference_described(316&(*ptr)->reference, &batch->reference,317(debug_reference_descriptor)__fd_batch_describe))318__fd_batch_destroy(old_batch);319320*ptr = batch;321}322323static inline void324fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)325{326struct fd_batch *old_batch = *ptr;327struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;328329if (ctx)330fd_screen_lock(ctx->screen);331332fd_batch_reference_locked(ptr, batch);333334if (ctx)335fd_screen_unlock(ctx->screen);336}337338static inline void339fd_batch_unlock_submit(struct fd_batch *batch)340{341simple_mtx_unlock(&batch->submit_lock);342}343344/**345* Returns true if emit-lock was acquired, false if failed to acquire lock,346* ie. batch already flushed.347*/348static inline bool MUST_CHECK349fd_batch_lock_submit(struct fd_batch *batch)350{351simple_mtx_lock(&batch->submit_lock);352bool ret = !batch->flushed;353if (!ret)354fd_batch_unlock_submit(batch);355return ret;356}357358/**359* Mark the batch as having something worth flushing (rendering, blit, query,360* etc)361*/362static inline void363fd_batch_needs_flush(struct fd_batch *batch)364{365batch->needs_flush = true;366fd_fence_ref(&batch->ctx->last_fence, NULL);367}368369/* Since we reorder batches and can pause/resume queries (notably for disabling370* queries dueing some meta operations), we update the current query state for371* the batch before each draw.372*/373static inline void374fd_batch_update_queries(struct fd_batch *batch) assert_dt375{376struct fd_context *ctx = batch->ctx;377378if (ctx->query_update_batch)379ctx->query_update_batch(batch, false);380}381382static inline void383fd_batch_finish_queries(struct fd_batch *batch) assert_dt384{385struct fd_context *ctx = batch->ctx;386387if (ctx->query_update_batch)388ctx->query_update_batch(batch, true);389}390391static inline void392fd_reset_wfi(struct fd_batch *batch)393{394batch->needs_wfi = true;395}396397void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;398399/* emit a CP_EVENT_WRITE:400*/401static inline void402fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,403enum vgt_event_type evt)404{405OUT_PKT3(ring, CP_EVENT_WRITE, 1);406OUT_RING(ring, evt);407fd_reset_wfi(batch);408}409410/* Get per-tile epilogue */411static inline struct fd_ringbuffer *412fd_batch_get_epilogue(struct fd_batch *batch)413{414if (batch->epilogue == NULL) {415batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,416(enum fd_ringbuffer_flags)0);417}418419return batch->epilogue;420}421422struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);423424#ifdef __cplusplus425}426#endif427428#endif /* FREEDRENO_BATCH_H_ */429430431