Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_batch_cache.c
4570 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "util/hash_table.h"27#include "util/list.h"28#include "util/set.h"29#include "util/u_string.h"30#define XXH_INLINE_ALL31#include "util/xxhash.h"3233#include "freedreno_batch.h"34#include "freedreno_batch_cache.h"35#include "freedreno_context.h"36#include "freedreno_resource.h"3738/* Overview:39*40* The batch cache provides lookup for mapping pipe_framebuffer_state41* to a batch.42*43* It does this via hashtable, with key that roughly matches the44* pipe_framebuffer_state, as described below.45*46* Batch Cache hashtable key:47*48* To serialize the key, and to avoid dealing with holding a reference to49* pipe_surface's (which hold a reference to pipe_resource and complicate50* the whole refcnting thing), the key is variable length and inline's the51* pertinent details of the pipe_surface.52*53* Batch:54*55* Each batch needs to hold a reference to each resource it depends on (ie.56* anything that needs a mem2gmem). And a weak reference to resources it57* renders to. (If both src[n] and dst[n] are not NULL then they are the58* same.)59*60* When a resource is destroyed, we need to remove entries in the batch61* cache that reference the resource, to avoid dangling pointer issues.62* So each resource holds a hashset of batches which have reference them63* in their hashtable key.64*65* When a batch has weak reference to no more resources (ie. all the66* surfaces it rendered to are destroyed) the batch can be destroyed.67* Could happen in an app that renders and never uses the result. More68* common scenario, I think, will be that some, but not all, of the69* surfaces are destroyed before the batch is submitted.70*71* If (for example), batch writes to zsbuf but that surface is destroyed72* before batch is submitted, we can skip gmem2mem (but still need to73* alloc gmem space as before. If the batch depended on previous contents74* of that surface, it would be holding a reference so the surface would75* not have been destroyed.76*/7778struct fd_batch_key {79uint32_t width;80uint32_t height;81uint16_t layers;82uint16_t samples;83uint16_t num_surfs;84uint16_t ctx_seqno;85struct {86struct pipe_resource *texture;87union pipe_surface_desc u;88uint8_t pos, samples;89uint16_t format;90} surf[0];91};9293static struct fd_batch_key *94key_alloc(unsigned num_surfs)95{96struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(97fd_batch_key, sizeof(key->surf[0]) * num_surfs);98return key;99}100101uint32_t102fd_batch_key_hash(const void *_key)103{104const struct fd_batch_key *key = _key;105uint32_t hash = 0;106hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);107hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);108return hash;109}110111bool112fd_batch_key_equals(const void *_a, const void *_b)113{114const struct fd_batch_key *a = _a;115const struct fd_batch_key *b = _b;116return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&117(memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);118}119120struct fd_batch_key *121fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)122{123unsigned sz =124sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);125struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);126memcpy(new_key, key, sz);127return new_key;128}129130void131fd_bc_init(struct fd_batch_cache *cache)132{133cache->ht =134_mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);135}136137void138fd_bc_fini(struct fd_batch_cache *cache)139{140_mesa_hash_table_destroy(cache->ht, NULL);141}142143/* Flushes all batches in the batch cache. Used at glFlush() and similar times. */144void145fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt146{147struct fd_batch_cache *cache = &ctx->screen->batch_cache;148149/* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)150* can cause batches to be unref'd and freed under our feet, so grab151* a reference to all the batches we need up-front.152*/153struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};154struct fd_batch *batch;155unsigned n = 0;156157fd_screen_lock(ctx->screen);158159foreach_batch (batch, cache, cache->batch_mask) {160if (batch->ctx == ctx) {161fd_batch_reference_locked(&batches[n++], batch);162}163}164165/* deferred flush doesn't actually flush, but it marks every other166* batch associated with the context as dependent on the current167* batch. So when the current batch gets flushed, all other batches168* that came before also get flushed.169*/170if (deferred) {171struct fd_batch *current_batch = fd_context_batch(ctx);172173for (unsigned i = 0; i < n; i++) {174if (batches[i] && (batches[i]->ctx == ctx) &&175(batches[i] != current_batch)) {176fd_batch_add_dep(current_batch, batches[i]);177}178}179180fd_batch_reference_locked(¤t_batch, NULL);181182fd_screen_unlock(ctx->screen);183} else {184fd_screen_unlock(ctx->screen);185186for (unsigned i = 0; i < n; i++) {187fd_batch_flush(batches[i]);188}189}190191for (unsigned i = 0; i < n; i++) {192fd_batch_reference(&batches[i], NULL);193}194}195196/**197* Flushes the batch (if any) writing this resource. Must not hold the screen198* lock.199*/200void201fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt202{203fd_screen_lock(ctx->screen);204struct fd_batch *write_batch = NULL;205fd_batch_reference_locked(&write_batch, rsc->track->write_batch);206fd_screen_unlock(ctx->screen);207208if (write_batch) {209fd_batch_flush(write_batch);210fd_batch_reference(&write_batch, NULL);211}212}213214/**215* Flushes any batches reading this resource. Must not hold the screen lock.216*/217void218fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt219{220struct fd_batch *batch, *batches[32] = {};221uint32_t batch_count = 0;222223/* This is a bit awkward, probably a fd_batch_flush_locked()224* would make things simpler.. but we need to hold the lock225* to iterate the batches which reference this resource. So226* we must first grab references under a lock, then flush.227*/228fd_screen_lock(ctx->screen);229foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask)230fd_batch_reference_locked(&batches[batch_count++], batch);231fd_screen_unlock(ctx->screen);232233for (int i = 0; i < batch_count; i++) {234fd_batch_flush(batches[i]);235fd_batch_reference(&batches[i], NULL);236}237}238239void240fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)241{242struct fd_batch_cache *cache = &ctx->screen->batch_cache;243244if (!FD_DBG(MSGS))245return;246247fd_screen_lock(ctx->screen);248249va_list ap;250va_start(ap, fmt);251vprintf(fmt, ap);252va_end(ap);253254for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) {255struct fd_batch *batch = cache->batches[i];256if (batch) {257printf(" %p<%u>%s\n", batch, batch->seqno,258batch->needs_flush ? ", NEEDS FLUSH" : "");259}260}261262printf("----\n");263264fd_screen_unlock(ctx->screen);265}266267/**268* Note that when batch is flushed, it needs to remain in the cache so269* that fd_bc_invalidate_resource() can work.. otherwise we can have270* the case where a rsc is destroyed while a batch still has a dangling271* reference to it.272*273* Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)274* would have a reference to the underlying bo, so it is ok for the275* rsc to be destroyed before the batch.276*/277void278fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)279{280if (!batch)281return;282283struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;284struct fd_batch_key *key = batch->key;285286fd_screen_assert_locked(batch->ctx->screen);287288if (remove) {289cache->batches[batch->idx] = NULL;290cache->batch_mask &= ~(1 << batch->idx);291}292293if (!key)294return;295296DBG("%p: key=%p", batch, batch->key);297for (unsigned idx = 0; idx < key->num_surfs; idx++) {298struct fd_resource *rsc = fd_resource(key->surf[idx].texture);299rsc->track->bc_batch_mask &= ~(1 << batch->idx);300}301302struct hash_entry *entry =303_mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);304_mesa_hash_table_remove(cache->ht, entry);305}306307void308fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)309{310struct fd_screen *screen = fd_screen(rsc->b.b.screen);311struct fd_batch *batch;312313fd_screen_lock(screen);314315if (destroy) {316foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {317struct set_entry *entry = _mesa_set_search(batch->resources, rsc);318_mesa_set_remove(batch->resources, entry);319}320rsc->track->batch_mask = 0;321322fd_batch_reference_locked(&rsc->track->write_batch, NULL);323}324325foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)326fd_bc_invalidate_batch(batch, false);327328rsc->track->bc_batch_mask = 0;329330fd_screen_unlock(screen);331}332333static struct fd_batch *334alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,335bool nondraw) assert_dt336{337struct fd_batch *batch;338uint32_t idx;339340fd_screen_assert_locked(ctx->screen);341342while ((idx = ffs(~cache->batch_mask)) == 0) {343#if 0344for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {345batch = cache->batches[i];346debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);347set_foreach (batch->dependencies, entry) {348struct fd_batch *dep = (struct fd_batch *)entry->key;349debug_printf(" %d", dep->idx);350}351debug_printf("\n");352}353#endif354/* TODO: is LRU the better policy? Or perhaps the batch that355* depends on the fewest other batches?356*/357struct fd_batch *flush_batch = NULL;358for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {359if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))360fd_batch_reference_locked(&flush_batch, cache->batches[i]);361}362363/* we can drop lock temporarily here, since we hold a ref,364* flush_batch won't disappear under us.365*/366fd_screen_unlock(ctx->screen);367DBG("%p: too many batches! flush forced!", flush_batch);368fd_batch_flush(flush_batch);369fd_screen_lock(ctx->screen);370371/* While the resources get cleaned up automatically, the flush_batch372* doesn't get removed from the dependencies of other batches, so373* it won't be unref'd and will remain in the table.374*375* TODO maybe keep a bitmask of batches that depend on me, to make376* this easier:377*/378for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {379struct fd_batch *other = cache->batches[i];380if (!other)381continue;382if (other->dependents_mask & (1 << flush_batch->idx)) {383other->dependents_mask &= ~(1 << flush_batch->idx);384struct fd_batch *ref = flush_batch;385fd_batch_reference_locked(&ref, NULL);386}387}388389fd_batch_reference_locked(&flush_batch, NULL);390}391392idx--; /* bit zero returns 1 for ffs() */393394batch = fd_batch_create(ctx, nondraw);395if (!batch)396return NULL;397398batch->seqno = cache->cnt++;399batch->idx = idx;400cache->batch_mask |= (1 << idx);401402debug_assert(cache->batches[idx] == NULL);403cache->batches[idx] = batch;404405return batch;406}407408struct fd_batch *409fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)410{411struct fd_batch_cache *cache = &ctx->screen->batch_cache;412struct fd_batch *batch;413414/* For normal draw batches, pctx->set_framebuffer_state() handles415* this, but for nondraw batches, this is a nice central location416* to handle them all.417*/418if (nondraw)419fd_context_switch_from(ctx);420421fd_screen_lock(ctx->screen);422batch = alloc_batch_locked(cache, ctx, nondraw);423fd_screen_unlock(ctx->screen);424425if (batch && nondraw)426fd_context_switch_to(ctx, batch);427428return batch;429}430431static struct fd_batch *432batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt433{434struct fd_batch_cache *cache = &ctx->screen->batch_cache;435struct fd_batch *batch = NULL;436uint32_t hash = fd_batch_key_hash(key);437struct hash_entry *entry =438_mesa_hash_table_search_pre_hashed(cache->ht, hash, key);439440if (entry) {441free(key);442fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);443assert(!batch->flushed);444return batch;445}446447batch = alloc_batch_locked(cache, ctx, false);448#ifdef DEBUG449DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,450key->height, key->layers, key->samples);451for (unsigned idx = 0; idx < key->num_surfs; idx++) {452DBG("%p: surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,453key->surf[idx].pos, key->surf[idx].texture,454util_format_name(key->surf[idx].format),455key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,456key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,457key->surf[idx].u.tex.level);458}459#endif460if (!batch)461return NULL;462463/* reset max_scissor, which will be adjusted on draws464* according to the actual scissor.465*/466batch->max_scissor.minx = ~0;467batch->max_scissor.miny = ~0;468batch->max_scissor.maxx = 0;469batch->max_scissor.maxy = 0;470471_mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);472batch->key = key;473batch->hash = hash;474475for (unsigned idx = 0; idx < key->num_surfs; idx++) {476struct fd_resource *rsc = fd_resource(key->surf[idx].texture);477rsc->track->bc_batch_mask = (1 << batch->idx);478}479480return batch;481}482483static void484key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,485struct pipe_surface *psurf)486{487key->surf[idx].texture = psurf->texture;488key->surf[idx].u = psurf->u;489key->surf[idx].pos = pos;490key->surf[idx].samples = MAX2(1, psurf->nr_samples);491key->surf[idx].format = psurf->format;492}493494struct fd_batch *495fd_batch_from_fb(struct fd_context *ctx,496const struct pipe_framebuffer_state *pfb)497{498unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);499struct fd_batch_key *key = key_alloc(n);500501key->width = pfb->width;502key->height = pfb->height;503key->layers = pfb->layers;504key->samples = util_framebuffer_get_num_samples(pfb);505key->ctx_seqno = ctx->seqno;506507if (pfb->zsbuf)508key_surf(key, idx++, 0, pfb->zsbuf);509510for (unsigned i = 0; i < pfb->nr_cbufs; i++)511if (pfb->cbufs[i])512key_surf(key, idx++, i + 1, pfb->cbufs[i]);513514key->num_surfs = idx;515516fd_screen_lock(ctx->screen);517struct fd_batch *batch = batch_from_key(ctx, key);518fd_screen_unlock(ctx->screen);519520return batch;521}522523524