Path: blob/21.2-virgl/src/gallium/drivers/panfrost/pan_job.c
4570 views
/*1* Copyright (C) 2019-2020 Collabora, Ltd.2* Copyright (C) 2019 Alyssa Rosenzweig3* Copyright (C) 2014-2017 Broadcom4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the "Software"),7* to deal in the Software without restriction, including without limitation8* the rights to use, copy, modify, merge, publish, distribute, sublicense,9* and/or sell copies of the Software, and to permit persons to whom the10* Software is furnished to do so, subject to the following conditions:11*12* The above copyright notice and this permission notice (including the next13* paragraph) shall be included in all copies or substantial portions of the14* Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR17* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL19* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER20* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,21* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE22* SOFTWARE.23*24*/2526#include <assert.h>2728#include "drm-uapi/panfrost_drm.h"2930#include "pan_bo.h"31#include "pan_context.h"32#include "util/hash_table.h"33#include "util/ralloc.h"34#include "util/format/u_format.h"35#include "util/u_pack_color.h"36#include "util/rounding.h"37#include "util/u_framebuffer.h"38#include "pan_util.h"39#include "decode.h"40#include "panfrost-quirks.h"4142static unsigned43panfrost_batch_idx(struct panfrost_batch *batch)44{45return batch - batch->ctx->batches.slots;46}4748static void49panfrost_batch_init(struct panfrost_context *ctx,50const struct pipe_framebuffer_state *key,51struct panfrost_batch *batch)52{53struct panfrost_device *dev = pan_device(ctx->base.screen);5455batch->ctx = ctx;5657batch->seqnum = ++ctx->batches.seqnum;5859batch->first_bo = INT32_MAX;60batch->last_bo = INT32_MIN;61util_sparse_array_init(&batch->bos, sizeof(uint32_t), 64);6263batch->minx = batch->miny = ~0;64batch->maxx = batch->maxy = 0;6566util_copy_framebuffer_state(&batch->key, key);67util_dynarray_init(&batch->resources, NULL);6869/* Preallocate the main pool, since every batch has at least one job70* structure so it will be used */71panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);7273/* Don't preallocate the invisible pool, since not every batch will use74* the pre-allocation, particularly if the varyings are larger than the75* preallocation and a reallocation is needed after anyway. */76panfrost_pool_init(&batch->invisible_pool, NULL, dev,77PAN_BO_INVISIBLE, 65536, "Varyings", false, true);7879panfrost_batch_add_fbo_bos(batch);8081/* Reserve the framebuffer and local storage descriptors */82batch->framebuffer =83(dev->quirks & MIDGARD_SFBD) ?84pan_pool_alloc_desc(&batch->pool.base, SINGLE_TARGET_FRAMEBUFFER) :85pan_pool_alloc_desc_aggregate(&batch->pool.base,86PAN_DESC(MULTI_TARGET_FRAMEBUFFER),87PAN_DESC(ZS_CRC_EXTENSION),88PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET));8990/* Add the MFBD tag now, other tags will be added at submit-time */91if (!(dev->quirks & MIDGARD_SFBD))92batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;9394/* On Midgard, the TLS is embedded in the FB descriptor */95if (pan_is_bifrost(dev))96batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);97else98batch->tls = batch->framebuffer;99}100101static void102panfrost_batch_cleanup(struct panfrost_batch *batch)103{104if (!batch)105return;106107struct panfrost_context *ctx = batch->ctx;108struct panfrost_device *dev = pan_device(ctx->base.screen);109110assert(batch->seqnum);111112if (ctx->batch == batch)113ctx->batch = NULL;114115unsigned batch_idx = panfrost_batch_idx(batch);116117for (int i = batch->first_bo; i <= batch->last_bo; i++) {118uint32_t *flags = util_sparse_array_get(&batch->bos, i);119120if (!*flags)121continue;122123struct panfrost_bo *bo = pan_lookup_bo(dev, i);124panfrost_bo_unreference(bo);125}126127util_dynarray_foreach(&batch->resources, struct panfrost_resource *, rsrc) {128BITSET_CLEAR((*rsrc)->track.users, batch_idx);129130if ((*rsrc)->track.writer == batch)131(*rsrc)->track.writer = NULL;132133pipe_resource_reference((struct pipe_resource **) rsrc, NULL);134}135136util_dynarray_fini(&batch->resources);137panfrost_pool_cleanup(&batch->pool);138panfrost_pool_cleanup(&batch->invisible_pool);139140util_unreference_framebuffer_state(&batch->key);141142util_sparse_array_finish(&batch->bos);143144memset(batch, 0, sizeof(*batch));145}146147static void148panfrost_batch_submit(struct panfrost_batch *batch,149uint32_t in_sync, uint32_t out_sync);150151static struct panfrost_batch *152panfrost_get_batch(struct panfrost_context *ctx,153const struct pipe_framebuffer_state *key)154{155struct panfrost_batch *batch = NULL;156157for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {158if (ctx->batches.slots[i].seqnum &&159util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {160/* We found a match, increase the seqnum for the LRU161* eviction logic.162*/163ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;164return &ctx->batches.slots[i];165}166167if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)168batch = &ctx->batches.slots[i];169}170171assert(batch);172173/* The selected slot is used, we need to flush the batch */174if (batch->seqnum)175panfrost_batch_submit(batch, 0, 0);176177panfrost_batch_init(ctx, key, batch);178179return batch;180}181182struct panfrost_batch *183panfrost_get_fresh_batch(struct panfrost_context *ctx,184const struct pipe_framebuffer_state *key)185{186struct panfrost_batch *batch = panfrost_get_batch(ctx, key);187188panfrost_dirty_state_all(ctx);189190/* The batch has no draw/clear queued, let's return it directly.191* Note that it's perfectly fine to re-use a batch with an192* existing clear, we'll just update it with the new clear request.193*/194if (!batch->scoreboard.first_job) {195ctx->batch = batch;196return batch;197}198199/* Otherwise, we need to flush the existing one and instantiate a new200* one.201*/202panfrost_batch_submit(batch, 0, 0);203batch = panfrost_get_batch(ctx, key);204return batch;205}206207/* Get the job corresponding to the FBO we're currently rendering into */208209struct panfrost_batch *210panfrost_get_batch_for_fbo(struct panfrost_context *ctx)211{212/* If we already began rendering, use that */213214if (ctx->batch) {215assert(util_framebuffer_state_equal(&ctx->batch->key,216&ctx->pipe_framebuffer));217return ctx->batch;218}219220/* If not, look up the job */221struct panfrost_batch *batch = panfrost_get_batch(ctx,222&ctx->pipe_framebuffer);223224/* Set this job as the current FBO job. Will be reset when updating the225* FB state and when submitting or releasing a job.226*/227ctx->batch = batch;228panfrost_dirty_state_all(ctx);229return batch;230}231232struct panfrost_batch *233panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)234{235struct panfrost_batch *batch;236237batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);238panfrost_dirty_state_all(ctx);239240/* The batch has no draw/clear queued, let's return it directly.241* Note that it's perfectly fine to re-use a batch with an242* existing clear, we'll just update it with the new clear request.243*/244if (!batch->scoreboard.first_job) {245ctx->batch = batch;246return batch;247}248249/* Otherwise, we need to freeze the existing one and instantiate a new250* one.251*/252panfrost_batch_submit(batch, 0, 0);253batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);254ctx->batch = batch;255return batch;256}257258static void259panfrost_batch_update_access(struct panfrost_batch *batch,260struct panfrost_resource *rsrc, bool writes)261{262struct panfrost_context *ctx = batch->ctx;263uint32_t batch_idx = panfrost_batch_idx(batch);264struct panfrost_batch *writer = rsrc->track.writer;265266if (unlikely(!BITSET_TEST(rsrc->track.users, batch_idx))) {267BITSET_SET(rsrc->track.users, batch_idx);268269/* Reference the resource on the batch */270struct pipe_resource **dst = util_dynarray_grow(&batch->resources,271struct pipe_resource *, 1);272273*dst = NULL;274pipe_resource_reference(dst, &rsrc->base);275}276277/* Flush users if required */278if (writes || ((writer != NULL) && (writer != batch))) {279unsigned i;280BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {281/* Skip the entry if this our batch. */282if (i == batch_idx)283continue;284285panfrost_batch_submit(&ctx->batches.slots[i], 0, 0);286}287}288289if (writes)290rsrc->track.writer = batch;291}292293static void294panfrost_batch_add_bo_old(struct panfrost_batch *batch,295struct panfrost_bo *bo, uint32_t flags)296{297if (!bo)298return;299300uint32_t *entry = util_sparse_array_get(&batch->bos, bo->gem_handle);301uint32_t old_flags = *entry;302303if (!old_flags) {304batch->num_bos++;305batch->first_bo = MIN2(batch->first_bo, bo->gem_handle);306batch->last_bo = MAX2(batch->last_bo, bo->gem_handle);307panfrost_bo_reference(bo);308}309310if (old_flags == flags)311return;312313flags |= old_flags;314*entry = flags;315}316317static uint32_t318panfrost_access_for_stage(enum pipe_shader_type stage)319{320return (stage == PIPE_SHADER_FRAGMENT) ?321PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;322}323324void325panfrost_batch_add_bo(struct panfrost_batch *batch,326struct panfrost_bo *bo, enum pipe_shader_type stage)327{328panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |329panfrost_access_for_stage(stage));330}331332void333panfrost_batch_read_rsrc(struct panfrost_batch *batch,334struct panfrost_resource *rsrc,335enum pipe_shader_type stage)336{337uint32_t access = PAN_BO_ACCESS_READ |338panfrost_access_for_stage(stage);339340panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);341342if (rsrc->image.crc.bo)343panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);344345if (rsrc->separate_stencil)346panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);347348panfrost_batch_update_access(batch, rsrc, false);349}350351void352panfrost_batch_write_rsrc(struct panfrost_batch *batch,353struct panfrost_resource *rsrc,354enum pipe_shader_type stage)355{356uint32_t access = PAN_BO_ACCESS_WRITE |357panfrost_access_for_stage(stage);358359panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);360361if (rsrc->image.crc.bo)362panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);363364if (rsrc->separate_stencil)365panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);366367panfrost_batch_update_access(batch, rsrc, true);368}369370/* Adds the BO backing surface to a batch if the surface is non-null */371372static void373panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)374{375if (surf) {376struct panfrost_resource *rsrc = pan_resource(surf->texture);377panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);378}379}380381void382panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)383{384for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)385panfrost_batch_add_surface(batch, batch->key.cbufs[i]);386387panfrost_batch_add_surface(batch, batch->key.zsbuf);388}389390struct panfrost_bo *391panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,392uint32_t create_flags, enum pipe_shader_type stage,393const char *label)394{395struct panfrost_bo *bo;396397bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,398create_flags, label);399panfrost_batch_add_bo(batch, bo, stage);400401/* panfrost_batch_add_bo() has retained a reference and402* panfrost_bo_create() initialize the refcnt to 1, so let's403* unreference the BO here so it gets released when the batch is404* destroyed (unless it's retained by someone else in the meantime).405*/406panfrost_bo_unreference(bo);407return bo;408}409410/* Returns the polygon list's GPU address if available, or otherwise allocates411* the polygon list. It's perfectly fast to use allocate/free BO directly,412* since we'll hit the BO cache and this is one-per-batch anyway. */413414static mali_ptr415panfrost_batch_get_polygon_list(struct panfrost_batch *batch)416{417struct panfrost_device *dev = pan_device(batch->ctx->base.screen);418419assert(!pan_is_bifrost(dev));420421if (!batch->tiler_ctx.midgard.polygon_list) {422bool has_draws = batch->scoreboard.first_tiler != NULL;423unsigned size =424panfrost_tiler_get_polygon_list_size(dev,425batch->key.width,426batch->key.height,427has_draws);428size = util_next_power_of_two(size);429430/* Create the BO as invisible if we can. In the non-hierarchical tiler case,431* we need to write the polygon list manually because there's not WRITE_VALUE432* job in the chain (maybe we should add one...). */433bool init_polygon_list = !has_draws && (dev->quirks & MIDGARD_NO_HIER_TILING);434batch->tiler_ctx.midgard.polygon_list =435panfrost_batch_create_bo(batch, size,436init_polygon_list ? 0 : PAN_BO_INVISIBLE,437PIPE_SHADER_VERTEX,438"Polygon list");439panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,440PIPE_SHADER_FRAGMENT);441442if (init_polygon_list) {443assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);444uint32_t *polygon_list_body =445batch->tiler_ctx.midgard.polygon_list->ptr.cpu +446MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;447polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */448}449450batch->tiler_ctx.midgard.disable = !has_draws;451}452453return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;454}455456struct panfrost_bo *457panfrost_batch_get_scratchpad(struct panfrost_batch *batch,458unsigned size_per_thread,459unsigned thread_tls_alloc,460unsigned core_count)461{462unsigned size = panfrost_get_total_stack_size(size_per_thread,463thread_tls_alloc,464core_count);465466if (batch->scratchpad) {467assert(batch->scratchpad->size >= size);468} else {469batch->scratchpad = panfrost_batch_create_bo(batch, size,470PAN_BO_INVISIBLE,471PIPE_SHADER_VERTEX,472"Thread local storage");473474panfrost_batch_add_bo(batch, batch->scratchpad,475PIPE_SHADER_FRAGMENT);476}477478return batch->scratchpad;479}480481struct panfrost_bo *482panfrost_batch_get_shared_memory(struct panfrost_batch *batch,483unsigned size,484unsigned workgroup_count)485{486if (batch->shared_memory) {487assert(batch->shared_memory->size >= size);488} else {489batch->shared_memory = panfrost_batch_create_bo(batch, size,490PAN_BO_INVISIBLE,491PIPE_SHADER_VERTEX,492"Workgroup shared memory");493}494495return batch->shared_memory;496}497498mali_ptr499panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)500{501struct panfrost_device *dev = pan_device(batch->ctx->base.screen);502assert(pan_is_bifrost(dev));503504if (!vertex_count)505return 0;506507if (batch->tiler_ctx.bifrost)508return batch->tiler_ctx.bifrost;509510struct panfrost_ptr t =511pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER_HEAP);512513pan_emit_bifrost_tiler_heap(dev, t.cpu);514515mali_ptr heap = t.gpu;516517t = pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER);518pan_emit_bifrost_tiler(dev, batch->key.width, batch->key.height,519util_framebuffer_get_num_samples(&batch->key),520heap, t.cpu);521522batch->tiler_ctx.bifrost = t.gpu;523return batch->tiler_ctx.bifrost;524}525526static void527panfrost_batch_to_fb_info(const struct panfrost_batch *batch,528struct pan_fb_info *fb,529struct pan_image_view *rts,530struct pan_image_view *zs,531struct pan_image_view *s,532bool reserve)533{534memset(fb, 0, sizeof(*fb));535memset(rts, 0, sizeof(*rts) * 8);536memset(zs, 0, sizeof(*zs));537memset(s, 0, sizeof(*s));538539fb->width = batch->key.width;540fb->height = batch->key.height;541fb->extent.minx = batch->minx;542fb->extent.miny = batch->miny;543fb->extent.maxx = batch->maxx - 1;544fb->extent.maxy = batch->maxy - 1;545fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);546fb->rt_count = batch->key.nr_cbufs;547548static const unsigned char id_swz[] = {549PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,550};551552for (unsigned i = 0; i < fb->rt_count; i++) {553struct pipe_surface *surf = batch->key.cbufs[i];554555if (!surf)556continue;557558struct panfrost_resource *prsrc = pan_resource(surf->texture);559unsigned mask = PIPE_CLEAR_COLOR0 << i;560561if (batch->clear & mask) {562fb->rts[i].clear = true;563memcpy(fb->rts[i].clear_value, batch->clear_color[i],564sizeof((fb->rts[i].clear_value)));565}566567fb->rts[i].discard = !reserve && !(batch->resolve & mask);568569rts[i].format = surf->format;570rts[i].dim = MALI_TEXTURE_DIMENSION_2D;571rts[i].last_level = rts[i].first_level = surf->u.tex.level;572rts[i].first_layer = surf->u.tex.first_layer;573rts[i].last_layer = surf->u.tex.last_layer;574rts[i].image = &prsrc->image;575rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);576memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));577fb->rts[i].crc_valid = &prsrc->valid.crc;578fb->rts[i].view = &rts[i];579580/* Preload if the RT is read or updated */581if (!(batch->clear & mask) &&582((batch->read & mask) ||583((batch->draws & mask) &&584BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))585fb->rts[i].preload = true;586587}588589const struct pan_image_view *s_view = NULL, *z_view = NULL;590struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;591592if (batch->key.zsbuf) {593struct pipe_surface *surf = batch->key.zsbuf;594z_rsrc = pan_resource(surf->texture);595596zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?597PIPE_FORMAT_Z32_FLOAT : surf->format;598zs->dim = MALI_TEXTURE_DIMENSION_2D;599zs->last_level = zs->first_level = surf->u.tex.level;600zs->first_layer = surf->u.tex.first_layer;601zs->last_layer = surf->u.tex.last_layer;602zs->image = &z_rsrc->image;603zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);604memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));605fb->zs.view.zs = zs;606z_view = zs;607if (util_format_is_depth_and_stencil(zs->format)) {608s_view = zs;609s_rsrc = z_rsrc;610}611612if (z_rsrc->separate_stencil) {613s_rsrc = z_rsrc->separate_stencil;614s->format = PIPE_FORMAT_S8_UINT;615s->dim = MALI_TEXTURE_DIMENSION_2D;616s->last_level = s->first_level = surf->u.tex.level;617s->first_layer = surf->u.tex.first_layer;618s->last_layer = surf->u.tex.last_layer;619s->image = &s_rsrc->image;620s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);621memcpy(s->swizzle, id_swz, sizeof(s->swizzle));622fb->zs.view.s = s;623s_view = s;624}625}626627if (batch->clear & PIPE_CLEAR_DEPTH) {628fb->zs.clear.z = true;629fb->zs.clear_value.depth = batch->clear_depth;630}631632if (batch->clear & PIPE_CLEAR_STENCIL) {633fb->zs.clear.s = true;634fb->zs.clear_value.stencil = batch->clear_stencil;635}636637fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);638fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);639640if (!fb->zs.clear.z &&641((batch->read & PIPE_CLEAR_DEPTH) ||642((batch->draws & PIPE_CLEAR_DEPTH) &&643z_rsrc && BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))644fb->zs.preload.z = true;645646if (!fb->zs.clear.s &&647((batch->read & PIPE_CLEAR_STENCIL) ||648((batch->draws & PIPE_CLEAR_STENCIL) &&649s_rsrc && BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))650fb->zs.preload.s = true;651652/* Preserve both component if we have a combined ZS view and653* one component needs to be preserved.654*/655if (s_view == z_view && fb->zs.discard.z != fb->zs.discard.s) {656bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);657658fb->zs.discard.z = false;659fb->zs.discard.s = false;660fb->zs.preload.z = !fb->zs.clear.z && valid;661fb->zs.preload.s = !fb->zs.clear.s && valid;662}663}664665static int666panfrost_batch_submit_ioctl(struct panfrost_batch *batch,667mali_ptr first_job_desc,668uint32_t reqs,669uint32_t in_sync,670uint32_t out_sync)671{672struct panfrost_context *ctx = batch->ctx;673struct pipe_context *gallium = (struct pipe_context *) ctx;674struct panfrost_device *dev = pan_device(gallium->screen);675struct drm_panfrost_submit submit = {0,};676uint32_t *bo_handles;677int ret;678679/* If we trace, we always need a syncobj, so make one of our own if we680* weren't given one to use. Remember that we did so, so we can free it681* after we're done but preventing double-frees if we were given a682* syncobj */683684if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))685out_sync = ctx->syncobj;686687submit.out_sync = out_sync;688submit.jc = first_job_desc;689submit.requirements = reqs;690if (in_sync) {691submit.in_syncs = (u64)(uintptr_t)(&in_sync);692submit.in_sync_count = 1;693}694695bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +696panfrost_pool_num_bos(&batch->invisible_pool) +697batch->num_bos + 2,698sizeof(*bo_handles));699assert(bo_handles);700701for (int i = batch->first_bo; i <= batch->last_bo; i++) {702uint32_t *flags = util_sparse_array_get(&batch->bos, i);703704if (!*flags)705continue;706707assert(submit.bo_handle_count < batch->num_bos);708bo_handles[submit.bo_handle_count++] = i;709710/* Update the BO access flags so that panfrost_bo_wait() knows711* about all pending accesses.712* We only keep the READ/WRITE info since this is all the BO713* wait logic cares about.714* We also preserve existing flags as this batch might not715* be the first one to access the BO.716*/717struct panfrost_bo *bo = pan_lookup_bo(dev, i);718719bo->gpu_access |= *flags & (PAN_BO_ACCESS_RW);720}721722panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);723submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);724panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);725submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);726727/* Add the tiler heap to the list of accessed BOs if the batch has at728* least one tiler job. Tiler heap is written by tiler jobs and read729* by fragment jobs (the polygon list is coming from this heap).730*/731if (batch->scoreboard.first_tiler)732bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;733734/* Always used on Bifrost, occassionally used on Midgard */735bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;736737submit.bo_handles = (u64) (uintptr_t) bo_handles;738if (ctx->is_noop)739ret = 0;740else741ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);742free(bo_handles);743744if (ret)745return errno;746747/* Trace the job if we're doing that */748if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {749/* Wait so we can get errors reported back */750drmSyncobjWait(dev->fd, &out_sync, 1,751INT64_MAX, 0, NULL);752753if (dev->debug & PAN_DBG_TRACE)754pandecode_jc(submit.jc, pan_is_bifrost(dev), dev->gpu_id);755756if (dev->debug & PAN_DBG_SYNC)757pandecode_abort_on_fault(submit.jc);758}759760return 0;761}762763/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an764* outsync corresponding to the later of the two (since there will be an765* implicit dep between them) */766767static int768panfrost_batch_submit_jobs(struct panfrost_batch *batch,769const struct pan_fb_info *fb,770uint32_t in_sync, uint32_t out_sync)771{772struct pipe_screen *pscreen = batch->ctx->base.screen;773struct panfrost_screen *screen = pan_screen(pscreen);774struct panfrost_device *dev = pan_device(pscreen);775bool has_draws = batch->scoreboard.first_job;776bool has_tiler = batch->scoreboard.first_tiler;777bool has_frag = has_tiler || batch->clear;778int ret = 0;779780/* Take the submit lock to make sure no tiler jobs from other context781* are inserted between our tiler and fragment jobs, failing to do that782* might result in tiler heap corruption.783*/784if (has_tiler)785pthread_mutex_lock(&dev->submit_lock);786787if (has_draws) {788ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,7890, in_sync, has_frag ? 0 : out_sync);790791if (ret)792goto done;793}794795if (has_frag) {796/* Whether we program the fragment job for draws or not depends797* on whether there is any *tiler* activity (so fragment798* shaders). If there are draws but entirely RASTERIZER_DISCARD799* (say, for transform feedback), we want a fragment job that800* *only* clears, since otherwise the tiler structures will be801* uninitialized leading to faults (or state leaks) */802803mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);804ret = panfrost_batch_submit_ioctl(batch, fragjob,805PANFROST_JD_REQ_FS, 0,806out_sync);807if (ret)808goto done;809}810811done:812if (has_tiler)813pthread_mutex_unlock(&dev->submit_lock);814815return ret;816}817818static void819panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)820{821if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])822return;823824struct pipe_surface *surf = batch->key.cbufs[0];825struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;826827if (pres && pres->damage.tile_map.enable) {828fb->tile_map.base =829pan_pool_upload_aligned(&batch->pool.base,830pres->damage.tile_map.data,831pres->damage.tile_map.size,83264);833fb->tile_map.stride = pres->damage.tile_map.stride;834}835}836837static void838panfrost_batch_submit(struct panfrost_batch *batch,839uint32_t in_sync, uint32_t out_sync)840{841struct pipe_screen *pscreen = batch->ctx->base.screen;842struct panfrost_screen *screen = pan_screen(pscreen);843struct panfrost_device *dev = pan_device(pscreen);844int ret;845846/* Nothing to do! */847if (!batch->scoreboard.first_job && !batch->clear)848goto out;849850struct pan_fb_info fb;851struct pan_image_view rts[8], zs, s;852853panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);854855screen->vtbl.preload(batch, &fb);856857if (!pan_is_bifrost(dev)) {858mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch);859860panfrost_scoreboard_initialize_tiler(&batch->pool.base,861&batch->scoreboard,862polygon_list);863}864865/* Now that all draws are in, we can finally prepare the866* FBD for the batch (if there is one). */867868screen->vtbl.emit_tls(batch);869panfrost_emit_tile_map(batch, &fb);870871if (batch->scoreboard.first_tiler || batch->clear)872screen->vtbl.emit_fbd(batch, &fb);873874ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);875876if (ret)877fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);878879/* We must reset the damage info of our render targets here even880* though a damage reset normally happens when the DRI layer swaps881* buffers. That's because there can be implicit flushes the GL882* app is not aware of, and those might impact the damage region: if883* part of the damaged portion is drawn during those implicit flushes,884* you have to reload those areas before next draws are pushed, and885* since the driver can't easily know what's been modified by the draws886* it flushed, the easiest solution is to reload everything.887*/888for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {889if (!batch->key.cbufs[i])890continue;891892panfrost_resource_set_damage_region(batch->ctx->base.screen,893batch->key.cbufs[i]->texture,8940, NULL);895}896897out:898panfrost_batch_cleanup(batch);899}900901/* Submit all batches, applying the out_sync to the currently bound batch */902903void904panfrost_flush_all_batches(struct panfrost_context *ctx)905{906struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);907panfrost_batch_submit(batch, ctx->syncobj, ctx->syncobj);908909for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {910if (ctx->batches.slots[i].seqnum) {911panfrost_batch_submit(&ctx->batches.slots[i],912ctx->syncobj, ctx->syncobj);913}914}915}916917void918panfrost_flush_writer(struct panfrost_context *ctx,919struct panfrost_resource *rsrc)920{921if (rsrc->track.writer) {922panfrost_batch_submit(rsrc->track.writer, ctx->syncobj, ctx->syncobj);923rsrc->track.writer = NULL;924}925}926927void928panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,929struct panfrost_resource *rsrc)930{931unsigned i;932BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {933panfrost_batch_submit(&ctx->batches.slots[i],934ctx->syncobj, ctx->syncobj);935}936937assert(!BITSET_COUNT(rsrc->track.users));938rsrc->track.writer = NULL;939}940941void942panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)943{944struct panfrost_context *ctx = batch->ctx;945946for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {947struct panfrost_shader_state *ss;948949ss = panfrost_get_shader_state(ctx, i);950if (!ss)951continue;952953batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);954}955}956957void958panfrost_batch_clear(struct panfrost_batch *batch,959unsigned buffers,960const union pipe_color_union *color,961double depth, unsigned stencil)962{963struct panfrost_context *ctx = batch->ctx;964965if (buffers & PIPE_CLEAR_COLOR) {966for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {967if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))968continue;969970enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;971pan_pack_color(batch->clear_color[i], color, format);972}973}974975if (buffers & PIPE_CLEAR_DEPTH) {976batch->clear_depth = depth;977}978979if (buffers & PIPE_CLEAR_STENCIL) {980batch->clear_stencil = stencil;981}982983batch->clear |= buffers;984batch->resolve |= buffers;985986/* Clearing affects the entire framebuffer (by definition -- this is987* the Gallium clear callback, which clears the whole framebuffer. If988* the scissor test were enabled from the GL side, the gallium frontend989* would emit a quad instead and we wouldn't go down this code path) */990991panfrost_batch_union_scissor(batch, 0, 0,992ctx->pipe_framebuffer.width,993ctx->pipe_framebuffer.height);994}995996/* Given a new bounding rectangle (scissor), let the job cover the union of the997* new and old bounding rectangles */998999void1000panfrost_batch_union_scissor(struct panfrost_batch *batch,1001unsigned minx, unsigned miny,1002unsigned maxx, unsigned maxy)1003{1004batch->minx = MIN2(batch->minx, minx);1005batch->miny = MIN2(batch->miny, miny);1006batch->maxx = MAX2(batch->maxx, maxx);1007batch->maxy = MAX2(batch->maxy, maxy);1008}10091010void1011panfrost_batch_intersection_scissor(struct panfrost_batch *batch,1012unsigned minx, unsigned miny,1013unsigned maxx, unsigned maxy)1014{1015batch->minx = MAX2(batch->minx, minx);1016batch->miny = MAX2(batch->miny, miny);1017batch->maxx = MIN2(batch->maxx, maxx);1018batch->maxy = MIN2(batch->maxy, maxy);1019}102010211022