Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_context.c
4570 views
/*1* Copyright (C) 2012 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "freedreno_context.h"27#include "ir3/ir3_cache.h"28#include "util/u_upload_mgr.h"29#include "freedreno_blitter.h"30#include "freedreno_draw.h"31#include "freedreno_fence.h"32#include "freedreno_gmem.h"33#include "freedreno_program.h"34#include "freedreno_query.h"35#include "freedreno_query_hw.h"36#include "freedreno_resource.h"37#include "freedreno_state.h"38#include "freedreno_texture.h"39#include "freedreno_util.h"4041static void42fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep,43unsigned flags) in_dt44{45struct fd_context *ctx = fd_context(pctx);46struct pipe_fence_handle *fence = NULL;47struct fd_batch *batch = NULL;4849/* We want to lookup current batch if it exists, but not create a new50* one if not (unless we need a fence)51*/52fd_batch_reference(&batch, ctx->batch);5354DBG("%p: flush: flags=%x, fencep=%p", batch, flags, fencep);5556if (fencep && !batch) {57batch = fd_context_batch(ctx);58} else if (!batch) {59if (ctx->screen->reorder)60fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED);61fd_bc_dump(ctx, "%p: NULL batch, remaining:\n", ctx);62return;63}6465/* With TC_FLUSH_ASYNC, the fence will have been pre-created from66* the front-end thread. But not yet associated with a batch,67* because we cannot safely access ctx->batch outside of the driver68* thread. So instead, replace the existing batch->fence with the69* one created earlier70*/71if ((flags & TC_FLUSH_ASYNC) && fencep) {72/* We don't currently expect async+flush in the fence-fd73* case.. for that to work properly we'd need TC to tell74* us in the create_fence callback that it needs an fd.75*/76assert(!(flags & PIPE_FLUSH_FENCE_FD));7778fd_fence_set_batch(*fencep, batch);79fd_fence_ref(&batch->fence, *fencep);8081/* If we have nothing to flush, update the pre-created unflushed82* fence with the current state of the last-fence:83*/84if (ctx->last_fence) {85fd_fence_repopulate(*fencep, ctx->last_fence);86fd_fence_ref(&fence, *fencep);87fd_bc_dump(ctx, "%p: (deferred) reuse last_fence, remaining:\n", ctx);88goto out;89}9091/* async flush is not compatible with deferred flush, since92* nothing triggers the batch flush which fence_flush() would93* be waiting for94*/95flags &= ~PIPE_FLUSH_DEFERRED;96} else if (!batch->fence) {97batch->fence = fd_fence_create(batch);98}99100/* In some sequence of events, we can end up with a last_fence that is101* not an "fd" fence, which results in eglDupNativeFenceFDANDROID()102* errors.103*/104if ((flags & PIPE_FLUSH_FENCE_FD) && ctx->last_fence &&105!fd_fence_is_fd(ctx->last_fence))106fd_fence_ref(&ctx->last_fence, NULL);107108/* if no rendering since last flush, ie. app just decided it needed109* a fence, re-use the last one:110*/111if (ctx->last_fence) {112fd_fence_ref(&fence, ctx->last_fence);113fd_bc_dump(ctx, "%p: reuse last_fence, remaining:\n", ctx);114goto out;115}116117/* Take a ref to the batch's fence (batch can be unref'd when flushed: */118fd_fence_ref(&fence, batch->fence);119120if (flags & PIPE_FLUSH_FENCE_FD)121fence->submit_fence.use_fence_fd = true;122123fd_bc_dump(ctx, "%p: flushing %p<%u>, flags=0x%x, pending:\n", ctx,124batch, batch->seqno, flags);125126/* If we get here, we need to flush for a fence, even if there is127* no rendering yet:128*/129batch->needs_flush = true;130131if (!ctx->screen->reorder) {132fd_batch_flush(batch);133} else {134fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED);135}136137fd_bc_dump(ctx, "%p: remaining:\n", ctx);138139out:140if (fencep)141fd_fence_ref(fencep, fence);142143fd_fence_ref(&ctx->last_fence, fence);144145fd_fence_ref(&fence, NULL);146147fd_batch_reference(&batch, NULL);148149u_trace_context_process(&ctx->trace_context,150!!(flags & PIPE_FLUSH_END_OF_FRAME));151}152153static void154fd_texture_barrier(struct pipe_context *pctx, unsigned flags) in_dt155{156if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {157struct fd_context *ctx = fd_context(pctx);158159if (ctx->framebuffer_barrier) {160ctx->framebuffer_barrier(ctx);161return;162}163}164165/* On devices that could sample from GMEM we could possibly do better.166* Or if we knew that we were doing GMEM bypass we could just emit a167* cache flush, perhaps? But we don't know if future draws would cause168* us to use GMEM, and a flush in bypass isn't the end of the world.169*/170fd_context_flush(pctx, NULL, 0);171}172173static void174fd_memory_barrier(struct pipe_context *pctx, unsigned flags)175{176if (!(flags & ~PIPE_BARRIER_UPDATE))177return;178179fd_context_flush(pctx, NULL, 0);180181/* TODO do we need to check for persistently mapped buffers and182* fd_bo_cpu_prep()??183*/184}185186static void187emit_string_tail(struct fd_ringbuffer *ring, const char *string, int len)188{189const uint32_t *buf = (const void *)string;190191while (len >= 4) {192OUT_RING(ring, *buf);193buf++;194len -= 4;195}196197/* copy remainder bytes without reading past end of input string: */198if (len > 0) {199uint32_t w = 0;200memcpy(&w, buf, len);201OUT_RING(ring, w);202}203}204205/* for prior to a5xx: */206void207fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len)208{209/* max packet size is 0x3fff+1 dwords: */210len = MIN2(len, 0x4000 * 4);211212OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);213emit_string_tail(ring, string, len);214}215216/* for a5xx+ */217void218fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len)219{220/* max packet size is 0x3fff dwords: */221len = MIN2(len, 0x3fff * 4);222223OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);224emit_string_tail(ring, string, len);225}226227/**228* emit marker string as payload of a no-op packet, which can be229* decoded by cffdump.230*/231static void232fd_emit_string_marker(struct pipe_context *pctx, const char *string,233int len) in_dt234{235struct fd_context *ctx = fd_context(pctx);236237DBG("%.*s", len, string);238239if (!ctx->batch)240return;241242struct fd_batch *batch = fd_context_batch_locked(ctx);243244fd_batch_needs_flush(batch);245246if (ctx->screen->gpu_id >= 500) {247fd_emit_string5(batch->draw, string, len);248} else {249fd_emit_string(batch->draw, string, len);250}251252fd_batch_unlock_submit(batch);253fd_batch_reference(&batch, NULL);254}255256/**257* If we have a pending fence_server_sync() (GPU side sync), flush now.258* The alternative to try to track this with batch dependencies gets259* hairy quickly.260*261* Call this before switching to a different batch, to handle this case.262*/263void264fd_context_switch_from(struct fd_context *ctx)265{266if (ctx->batch && (ctx->batch->in_fence_fd != -1))267fd_batch_flush(ctx->batch);268}269270/**271* If there is a pending fence-fd that we need to sync on, this will272* transfer the reference to the next batch we are going to render273* to.274*/275void276fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch)277{278if (ctx->in_fence_fd != -1) {279sync_accumulate("freedreno", &batch->in_fence_fd, ctx->in_fence_fd);280close(ctx->in_fence_fd);281ctx->in_fence_fd = -1;282}283}284285/**286* Return a reference to the current batch, caller must unref.287*/288struct fd_batch *289fd_context_batch(struct fd_context *ctx)290{291struct fd_batch *batch = NULL;292293tc_assert_driver_thread(ctx->tc);294295fd_batch_reference(&batch, ctx->batch);296297if (unlikely(!batch)) {298batch =299fd_batch_from_fb(ctx, &ctx->framebuffer);300util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer);301fd_batch_reference(&ctx->batch, batch);302fd_context_all_dirty(ctx);303}304fd_context_switch_to(ctx, batch);305306return batch;307}308309/**310* Return a locked reference to the current batch. A batch with emit311* lock held is protected against flushing while the lock is held.312* The emit-lock should be acquired before screen-lock. The emit-lock313* should be held while emitting cmdstream.314*/315struct fd_batch *316fd_context_batch_locked(struct fd_context *ctx)317{318struct fd_batch *batch = NULL;319320while (!batch) {321batch = fd_context_batch(ctx);322if (!fd_batch_lock_submit(batch)) {323fd_batch_reference(&batch, NULL);324}325}326327return batch;328}329330void331fd_context_destroy(struct pipe_context *pctx)332{333struct fd_context *ctx = fd_context(pctx);334unsigned i;335336DBG("");337338fd_screen_lock(ctx->screen);339list_del(&ctx->node);340fd_screen_unlock(ctx->screen);341342fd_fence_ref(&ctx->last_fence, NULL);343344if (ctx->in_fence_fd != -1)345close(ctx->in_fence_fd);346347for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) {348if (ctx->pvtmem[i].bo)349fd_bo_del(ctx->pvtmem[i].bo);350}351352util_copy_framebuffer_state(&ctx->framebuffer, NULL);353fd_batch_reference(&ctx->batch, NULL); /* unref current batch */354355/* Make sure nothing in the batch cache references our context any more. */356fd_bc_flush(ctx, false);357358fd_prog_fini(pctx);359360if (ctx->blitter)361util_blitter_destroy(ctx->blitter);362363if (pctx->stream_uploader)364u_upload_destroy(pctx->stream_uploader);365366for (i = 0; i < ARRAY_SIZE(ctx->clear_rs_state); i++)367if (ctx->clear_rs_state[i])368pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state[i]);369370if (ctx->primconvert)371util_primconvert_destroy(ctx->primconvert);372373slab_destroy_child(&ctx->transfer_pool);374slab_destroy_child(&ctx->transfer_pool_unsync);375376for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) {377if (!ctx->vsc_pipe_bo[i])378break;379fd_bo_del(ctx->vsc_pipe_bo[i]);380}381382fd_device_del(ctx->dev);383fd_pipe_purge(ctx->pipe);384fd_pipe_del(ctx->pipe);385386simple_mtx_destroy(&ctx->gmem_lock);387388u_trace_context_fini(&ctx->trace_context);389390fd_autotune_fini(&ctx->autotune);391392ir3_cache_destroy(ctx->shader_cache);393394if (FD_DBG(BSTAT) || FD_DBG(MSGS)) {395mesa_logi(396"batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, "397"batch_restore=%u\n",398(uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,399(uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,400(uint32_t)ctx->stats.batch_restore);401}402}403404static void405fd_set_debug_callback(struct pipe_context *pctx,406const struct pipe_debug_callback *cb)407{408struct fd_context *ctx = fd_context(pctx);409410if (cb)411ctx->debug = *cb;412else413memset(&ctx->debug, 0, sizeof(ctx->debug));414}415416static uint32_t417fd_get_reset_count(struct fd_context *ctx, bool per_context)418{419uint64_t val;420enum fd_param_id param = per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS;421int ret = fd_pipe_get_param(ctx->pipe, param, &val);422debug_assert(!ret);423return val;424}425426static enum pipe_reset_status427fd_get_device_reset_status(struct pipe_context *pctx)428{429struct fd_context *ctx = fd_context(pctx);430int context_faults = fd_get_reset_count(ctx, true);431int global_faults = fd_get_reset_count(ctx, false);432enum pipe_reset_status status;433434/* Not called in driver thread, but threaded_context syncs435* before calling this:436*/437fd_context_access_begin(ctx);438439if (context_faults != ctx->context_reset_count) {440status = PIPE_GUILTY_CONTEXT_RESET;441} else if (global_faults != ctx->global_reset_count) {442status = PIPE_INNOCENT_CONTEXT_RESET;443} else {444status = PIPE_NO_RESET;445}446447ctx->context_reset_count = context_faults;448ctx->global_reset_count = global_faults;449450fd_context_access_end(ctx);451452return status;453}454455static void456fd_trace_record_ts(struct u_trace *ut, struct pipe_resource *timestamps,457unsigned idx)458{459struct fd_batch *batch = container_of(ut, struct fd_batch, trace);460struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;461462if (ring->cur == batch->last_timestamp_cmd) {463uint64_t *ts = fd_bo_map(fd_resource(timestamps)->bo);464ts[idx] = U_TRACE_NO_TIMESTAMP;465return;466}467468unsigned ts_offset = idx * sizeof(uint64_t);469batch->ctx->record_timestamp(ring, fd_resource(timestamps)->bo, ts_offset);470batch->last_timestamp_cmd = ring->cur;471}472473static uint64_t474fd_trace_read_ts(struct u_trace_context *utctx,475struct pipe_resource *timestamps, unsigned idx)476{477struct fd_context *ctx =478container_of(utctx, struct fd_context, trace_context);479struct fd_bo *ts_bo = fd_resource(timestamps)->bo;480481/* Only need to stall on results for the first entry: */482if (idx == 0) {483/* Avoid triggering deferred submits from flushing, since that484* changes the behavior of what we are trying to measure:485*/486while (fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_NOSYNC))487usleep(10000);488int ret = fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_READ);489if (ret)490return U_TRACE_NO_TIMESTAMP;491}492493uint64_t *ts = fd_bo_map(ts_bo);494495/* Don't translate the no-timestamp marker: */496if (ts[idx] == U_TRACE_NO_TIMESTAMP)497return U_TRACE_NO_TIMESTAMP;498499return ctx->ts_to_ns(ts[idx]);500}501502/* TODO we could combine a few of these small buffers (solid_vbuf,503* blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and504* save a tiny bit of memory505*/506507static struct pipe_resource *508create_solid_vertexbuf(struct pipe_context *pctx)509{510static const float init_shader_const[] = {511-1.000000, +1.000000, +1.000000, +1.000000, -1.000000, +1.000000,512};513struct pipe_resource *prsc =514pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,515sizeof(init_shader_const));516pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),517init_shader_const);518return prsc;519}520521static struct pipe_resource *522create_blit_texcoord_vertexbuf(struct pipe_context *pctx)523{524struct pipe_resource *prsc = pipe_buffer_create(525pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);526return prsc;527}528529void530fd_context_setup_common_vbos(struct fd_context *ctx)531{532struct pipe_context *pctx = &ctx->base;533534ctx->solid_vbuf = create_solid_vertexbuf(pctx);535ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);536537/* setup solid_vbuf_state: */538ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(539pctx, 1,540(struct pipe_vertex_element[]){{541.vertex_buffer_index = 0,542.src_offset = 0,543.src_format = PIPE_FORMAT_R32G32B32_FLOAT,544}});545ctx->solid_vbuf_state.vertexbuf.count = 1;546ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;547ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;548549/* setup blit_vbuf_state: */550ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(551pctx, 2,552(struct pipe_vertex_element[]){553{554.vertex_buffer_index = 0,555.src_offset = 0,556.src_format = PIPE_FORMAT_R32G32_FLOAT,557},558{559.vertex_buffer_index = 1,560.src_offset = 0,561.src_format = PIPE_FORMAT_R32G32B32_FLOAT,562}});563ctx->blit_vbuf_state.vertexbuf.count = 2;564ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;565ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource =566ctx->blit_texcoord_vbuf;567ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;568ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;569}570571void572fd_context_cleanup_common_vbos(struct fd_context *ctx)573{574struct pipe_context *pctx = &ctx->base;575576pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);577pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);578579pipe_resource_reference(&ctx->solid_vbuf, NULL);580pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);581}582583struct pipe_context *584fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,585const uint8_t *primtypes, void *priv,586unsigned flags) disable_thread_safety_analysis587{588struct fd_screen *screen = fd_screen(pscreen);589struct pipe_context *pctx;590unsigned prio = 1;591int i;592593/* lower numerical value == higher priority: */594if (FD_DBG(HIPRIO))595prio = 0;596else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)597prio = 0;598else if (flags & PIPE_CONTEXT_LOW_PRIORITY)599prio = 2;600601/* Some of the stats will get printed out at context destroy, so602* make sure they are collected:603*/604if (FD_DBG(BSTAT) || FD_DBG(MSGS))605ctx->stats_users++;606607ctx->screen = screen;608ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);609610ctx->in_fence_fd = -1;611612if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) {613ctx->context_reset_count = fd_get_reset_count(ctx, true);614ctx->global_reset_count = fd_get_reset_count(ctx, false);615}616617ctx->primtypes = primtypes;618ctx->primtype_mask = 0;619for (i = 0; i <= PIPE_PRIM_MAX; i++)620if (primtypes[i])621ctx->primtype_mask |= (1 << i);622623simple_mtx_init(&ctx->gmem_lock, mtx_plain);624625/* need some sane default in case gallium frontends don't626* set some state:627*/628ctx->sample_mask = 0xffff;629ctx->active_queries = true;630631pctx = &ctx->base;632pctx->screen = pscreen;633pctx->priv = priv;634pctx->flush = fd_context_flush;635pctx->emit_string_marker = fd_emit_string_marker;636pctx->set_debug_callback = fd_set_debug_callback;637pctx->get_device_reset_status = fd_get_device_reset_status;638pctx->create_fence_fd = fd_create_fence_fd;639pctx->fence_server_sync = fd_fence_server_sync;640pctx->fence_server_signal = fd_fence_server_signal;641pctx->texture_barrier = fd_texture_barrier;642pctx->memory_barrier = fd_memory_barrier;643644pctx->stream_uploader = u_upload_create_default(pctx);645if (!pctx->stream_uploader)646goto fail;647pctx->const_uploader = pctx->stream_uploader;648649slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);650slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);651652fd_draw_init(pctx);653fd_resource_context_init(pctx);654fd_query_context_init(pctx);655fd_texture_init(pctx);656fd_state_init(pctx);657658ctx->blitter = util_blitter_create(pctx);659if (!ctx->blitter)660goto fail;661662ctx->primconvert = util_primconvert_create(pctx, ctx->primtype_mask);663if (!ctx->primconvert)664goto fail;665666list_inithead(&ctx->hw_active_queries);667list_inithead(&ctx->acc_active_queries);668669fd_screen_lock(ctx->screen);670ctx->seqno = ++screen->ctx_seqno;671list_add(&ctx->node, &ctx->screen->context_list);672fd_screen_unlock(ctx->screen);673674ctx->current_scissor = &ctx->disabled_scissor;675676u_trace_context_init(&ctx->trace_context, pctx, fd_trace_record_ts,677fd_trace_read_ts);678679fd_autotune_init(&ctx->autotune, screen->dev);680681return pctx;682683fail:684pctx->destroy(pctx);685return NULL;686}687688struct pipe_context *689fd_context_init_tc(struct pipe_context *pctx, unsigned flags)690{691struct fd_context *ctx = fd_context(pctx);692693if (!(flags & PIPE_CONTEXT_PREFER_THREADED))694return pctx;695696/* Clover (compute-only) is unsupported. */697if (flags & PIPE_CONTEXT_COMPUTE_ONLY)698return pctx;699700struct pipe_context *tc = threaded_context_create(701pctx, &ctx->screen->transfer_pool,702fd_replace_buffer_storage,703fd_fence_create_unflushed,704fd_resource_busy,705false,706&ctx->tc);707708uint64_t total_ram;709if (tc && tc != pctx && os_get_total_physical_memory(&total_ram)) {710((struct threaded_context *)tc)->bytes_mapped_limit = total_ram / 16;711}712713return tc;714}715716717