Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_batch.c
4570 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "util/hash_table.h"27#include "util/list.h"28#include "util/set.h"29#include "util/u_string.h"3031#include "freedreno_batch.h"32#include "freedreno_context.h"33#include "freedreno_fence.h"34#include "freedreno_query_hw.h"35#include "freedreno_resource.h"3637static struct fd_ringbuffer *38alloc_ring(struct fd_batch *batch, unsigned sz, enum fd_ringbuffer_flags flags)39{40struct fd_context *ctx = batch->ctx;4142/* if kernel is too old to support unlimited # of cmd buffers, we43* have no option but to allocate large worst-case sizes so that44* we don't need to grow the ringbuffer. Performance is likely to45* suffer, but there is no good alternative.46*47* Otherwise if supported, allocate a growable ring with initial48* size of zero.49*/50if ((fd_device_version(ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) &&51!FD_DBG(NOGROW)) {52flags |= FD_RINGBUFFER_GROWABLE;53sz = 0;54}5556return fd_submit_new_ringbuffer(batch->submit, sz, flags);57}5859static void60batch_init(struct fd_batch *batch)61{62struct fd_context *ctx = batch->ctx;6364batch->submit = fd_submit_new(ctx->pipe);65if (batch->nondraw) {66batch->gmem = alloc_ring(batch, 0x1000, FD_RINGBUFFER_PRIMARY);67batch->draw = alloc_ring(batch, 0x100000, 0);68} else {69batch->gmem = alloc_ring(batch, 0x100000, FD_RINGBUFFER_PRIMARY);70batch->draw = alloc_ring(batch, 0x100000, 0);7172/* a6xx+ re-uses draw rb for both draw and binning pass: */73if (ctx->screen->gpu_id < 600) {74batch->binning = alloc_ring(batch, 0x100000, 0);75}76}7778batch->in_fence_fd = -1;79batch->fence = NULL;8081/* Work around problems on earlier gens with submit merging, etc,82* by always creating a fence to request that the submit is flushed83* immediately:84*/85if (ctx->screen->gpu_id < 600)86batch->fence = fd_fence_create(batch);8788batch->cleared = 0;89batch->fast_cleared = 0;90batch->invalidated = 0;91batch->restore = batch->resolve = 0;92batch->needs_flush = false;93batch->flushed = false;94batch->gmem_reason = 0;95batch->num_draws = 0;96batch->num_vertices = 0;97batch->num_bins_per_pipe = 0;98batch->prim_strm_bits = 0;99batch->draw_strm_bits = 0;100101fd_reset_wfi(batch);102103util_dynarray_init(&batch->draw_patches, NULL);104util_dynarray_init(&batch->fb_read_patches, NULL);105106if (is_a2xx(ctx->screen)) {107util_dynarray_init(&batch->shader_patches, NULL);108util_dynarray_init(&batch->gmem_patches, NULL);109}110111if (is_a3xx(ctx->screen))112util_dynarray_init(&batch->rbrc_patches, NULL);113114assert(batch->resources->entries == 0);115116util_dynarray_init(&batch->samples, NULL);117118u_trace_init(&batch->trace, &ctx->trace_context);119batch->last_timestamp_cmd = NULL;120}121122struct fd_batch *123fd_batch_create(struct fd_context *ctx, bool nondraw)124{125struct fd_batch *batch = CALLOC_STRUCT(fd_batch);126127if (!batch)128return NULL;129130DBG("%p", batch);131132pipe_reference_init(&batch->reference, 1);133batch->ctx = ctx;134batch->nondraw = nondraw;135136simple_mtx_init(&batch->submit_lock, mtx_plain);137138batch->resources =139_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);140141batch_init(batch);142143return batch;144}145146static void147cleanup_submit(struct fd_batch *batch)148{149if (!batch->submit)150return;151152fd_ringbuffer_del(batch->draw);153fd_ringbuffer_del(batch->gmem);154155if (batch->binning) {156fd_ringbuffer_del(batch->binning);157batch->binning = NULL;158}159160if (batch->prologue) {161fd_ringbuffer_del(batch->prologue);162batch->prologue = NULL;163}164165if (batch->epilogue) {166fd_ringbuffer_del(batch->epilogue);167batch->epilogue = NULL;168}169170if (batch->tile_setup) {171fd_ringbuffer_del(batch->tile_setup);172batch->tile_setup = NULL;173}174175if (batch->tile_fini) {176fd_ringbuffer_del(batch->tile_fini);177batch->tile_fini = NULL;178}179180if (batch->tessellation) {181fd_bo_del(batch->tessfactor_bo);182fd_bo_del(batch->tessparam_bo);183fd_ringbuffer_del(batch->tess_addrs_constobj);184}185186fd_submit_del(batch->submit);187batch->submit = NULL;188}189190static void191batch_fini(struct fd_batch *batch)192{193DBG("%p", batch);194195pipe_resource_reference(&batch->query_buf, NULL);196197if (batch->in_fence_fd != -1)198close(batch->in_fence_fd);199200/* in case batch wasn't flushed but fence was created: */201if (batch->fence)202fd_fence_set_batch(batch->fence, NULL);203204fd_fence_ref(&batch->fence, NULL);205206cleanup_submit(batch);207208util_dynarray_fini(&batch->draw_patches);209util_dynarray_fini(&batch->fb_read_patches);210211if (is_a2xx(batch->ctx->screen)) {212util_dynarray_fini(&batch->shader_patches);213util_dynarray_fini(&batch->gmem_patches);214}215216if (is_a3xx(batch->ctx->screen))217util_dynarray_fini(&batch->rbrc_patches);218219while (batch->samples.size > 0) {220struct fd_hw_sample *samp =221util_dynarray_pop(&batch->samples, struct fd_hw_sample *);222fd_hw_sample_reference(batch->ctx, &samp, NULL);223}224util_dynarray_fini(&batch->samples);225226u_trace_fini(&batch->trace);227}228229static void230batch_flush_dependencies(struct fd_batch *batch) assert_dt231{232struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;233struct fd_batch *dep;234235foreach_batch (dep, cache, batch->dependents_mask) {236fd_batch_flush(dep);237fd_batch_reference(&dep, NULL);238}239240batch->dependents_mask = 0;241}242243static void244batch_reset_dependencies(struct fd_batch *batch)245{246struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;247struct fd_batch *dep;248249foreach_batch (dep, cache, batch->dependents_mask) {250fd_batch_reference(&dep, NULL);251}252253batch->dependents_mask = 0;254}255256static void257batch_reset_resources(struct fd_batch *batch)258{259fd_screen_assert_locked(batch->ctx->screen);260261set_foreach (batch->resources, entry) {262struct fd_resource *rsc = (struct fd_resource *)entry->key;263_mesa_set_remove(batch->resources, entry);264debug_assert(rsc->track->batch_mask & (1 << batch->idx));265rsc->track->batch_mask &= ~(1 << batch->idx);266if (rsc->track->write_batch == batch)267fd_batch_reference_locked(&rsc->track->write_batch, NULL);268}269}270271static void272batch_reset(struct fd_batch *batch) assert_dt273{274DBG("%p", batch);275276batch_reset_dependencies(batch);277278fd_screen_lock(batch->ctx->screen);279batch_reset_resources(batch);280fd_screen_unlock(batch->ctx->screen);281282batch_fini(batch);283batch_init(batch);284}285286void287fd_batch_reset(struct fd_batch *batch)288{289if (batch->needs_flush)290batch_reset(batch);291}292293void294__fd_batch_destroy(struct fd_batch *batch)295{296struct fd_context *ctx = batch->ctx;297298DBG("%p", batch);299300fd_screen_assert_locked(batch->ctx->screen);301302fd_bc_invalidate_batch(batch, true);303304batch_reset_resources(batch);305debug_assert(batch->resources->entries == 0);306_mesa_set_destroy(batch->resources, NULL);307308fd_screen_unlock(ctx->screen);309batch_reset_dependencies(batch);310debug_assert(batch->dependents_mask == 0);311312util_copy_framebuffer_state(&batch->framebuffer, NULL);313batch_fini(batch);314315simple_mtx_destroy(&batch->submit_lock);316317free(batch->key);318free(batch);319fd_screen_lock(ctx->screen);320}321322void323__fd_batch_describe(char *buf, const struct fd_batch *batch)324{325sprintf(buf, "fd_batch<%u>", batch->seqno);326}327328/* Get per-batch prologue */329struct fd_ringbuffer *330fd_batch_get_prologue(struct fd_batch *batch)331{332if (!batch->prologue)333batch->prologue = alloc_ring(batch, 0x1000, 0);334return batch->prologue;335}336337/* Only called from fd_batch_flush() */338static void339batch_flush(struct fd_batch *batch) assert_dt340{341DBG("%p: needs_flush=%d", batch, batch->needs_flush);342343if (!fd_batch_lock_submit(batch))344return;345346batch->needs_flush = false;347348/* close out the draw cmds by making sure any active queries are349* paused:350*/351fd_batch_finish_queries(batch);352353batch_flush_dependencies(batch);354355fd_screen_lock(batch->ctx->screen);356batch_reset_resources(batch);357/* NOTE: remove=false removes the batch from the hashtable, so future358* lookups won't cache-hit a flushed batch, but leaves the weak reference359* to the batch to avoid having multiple batches with same batch->idx, as360* that causes all sorts of hilarity.361*/362fd_bc_invalidate_batch(batch, false);363batch->flushed = true;364365if (batch == batch->ctx->batch)366fd_batch_reference_locked(&batch->ctx->batch, NULL);367368fd_screen_unlock(batch->ctx->screen);369370if (batch->fence)371fd_fence_ref(&batch->ctx->last_fence, batch->fence);372373fd_gmem_render_tiles(batch);374375debug_assert(batch->reference.count > 0);376377cleanup_submit(batch);378fd_batch_unlock_submit(batch);379}380381/* NOTE: could drop the last ref to batch382*/383void384fd_batch_flush(struct fd_batch *batch)385{386struct fd_batch *tmp = NULL;387388/* NOTE: we need to hold an extra ref across the body of flush,389* since the last ref to this batch could be dropped when cleaning390* up used_resources391*/392fd_batch_reference(&tmp, batch);393batch_flush(tmp);394fd_batch_reference(&tmp, NULL);395}396397/* find a batches dependents mask, including recursive dependencies: */398static uint32_t399recursive_dependents_mask(struct fd_batch *batch)400{401struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;402struct fd_batch *dep;403uint32_t dependents_mask = batch->dependents_mask;404405foreach_batch (dep, cache, batch->dependents_mask)406dependents_mask |= recursive_dependents_mask(dep);407408return dependents_mask;409}410411void412fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)413{414fd_screen_assert_locked(batch->ctx->screen);415416if (batch->dependents_mask & (1 << dep->idx))417return;418419/* a loop should not be possible */420debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep)));421422struct fd_batch *other = NULL;423fd_batch_reference_locked(&other, dep);424batch->dependents_mask |= (1 << dep->idx);425DBG("%p: added dependency on %p", batch, dep);426}427428static void429flush_write_batch(struct fd_resource *rsc) assert_dt430{431struct fd_batch *b = NULL;432fd_batch_reference_locked(&b, rsc->track->write_batch);433434fd_screen_unlock(b->ctx->screen);435fd_batch_flush(b);436fd_screen_lock(b->ctx->screen);437438fd_batch_reference_locked(&b, NULL);439}440441static void442fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)443{444445if (likely(fd_batch_references_resource(batch, rsc))) {446debug_assert(_mesa_set_search(batch->resources, rsc));447return;448}449450debug_assert(!_mesa_set_search(batch->resources, rsc));451452_mesa_set_add(batch->resources, rsc);453rsc->track->batch_mask |= (1 << batch->idx);454}455456void457fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)458{459fd_screen_assert_locked(batch->ctx->screen);460461DBG("%p: write %p", batch, rsc);462463/* Must do this before the early out, so we unset a previous resource464* invalidate (which may have left the write_batch state in place).465*/466rsc->valid = true;467468if (rsc->track->write_batch == batch)469return;470471fd_batch_write_prep(batch, rsc);472473if (rsc->stencil)474fd_batch_resource_write(batch, rsc->stencil);475476/* note, invalidate write batch, to avoid further writes to rsc477* resulting in a write-after-read hazard.478*/479/* if we are pending read or write by any other batch: */480if (unlikely(rsc->track->batch_mask & ~(1 << batch->idx))) {481struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;482struct fd_batch *dep;483484if (rsc->track->write_batch)485flush_write_batch(rsc);486487foreach_batch (dep, cache, rsc->track->batch_mask) {488struct fd_batch *b = NULL;489if (dep == batch)490continue;491/* note that batch_add_dep could flush and unref dep, so492* we need to hold a reference to keep it live for the493* fd_bc_invalidate_batch()494*/495fd_batch_reference(&b, dep);496fd_batch_add_dep(batch, b);497fd_bc_invalidate_batch(b, false);498fd_batch_reference_locked(&b, NULL);499}500}501fd_batch_reference_locked(&rsc->track->write_batch, batch);502503fd_batch_add_resource(batch, rsc);504}505506void507fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)508{509fd_screen_assert_locked(batch->ctx->screen);510511if (rsc->stencil)512fd_batch_resource_read(batch, rsc->stencil);513514DBG("%p: read %p", batch, rsc);515516/* If reading a resource pending a write, go ahead and flush the517* writer. This avoids situations where we end up having to518* flush the current batch in _resource_used()519*/520if (unlikely(rsc->track->write_batch && rsc->track->write_batch != batch))521flush_write_batch(rsc);522523fd_batch_add_resource(batch, rsc);524}525526void527fd_batch_check_size(struct fd_batch *batch)528{529if (FD_DBG(FLUSH)) {530fd_batch_flush(batch);531return;532}533534/* Place a reasonable upper bound on prim/draw stream buffer size: */535const unsigned limit_bits = 8 * 8 * 1024 * 1024;536if ((batch->prim_strm_bits > limit_bits) ||537(batch->draw_strm_bits > limit_bits)) {538fd_batch_flush(batch);539return;540}541542if (!fd_ringbuffer_check_size(batch->draw))543fd_batch_flush(batch);544}545546/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already547* been one since last draw:548*/549void550fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring)551{552if (batch->needs_wfi) {553if (batch->ctx->screen->gpu_id >= 500)554OUT_WFI5(ring);555else556OUT_WFI(ring);557batch->needs_wfi = false;558}559}560561562