Path: blob/21.2-virgl/src/gallium/drivers/v3d/v3d_job.c
4570 views
/*1* Copyright © 2014-2017 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/** @file v3d_job.c24*25* Functions for submitting V3D render jobs to the kernel.26*/2728#include <xf86drm.h>29#include "v3d_context.h"30/* The OQ/semaphore packets are the same across V3D versions. */31#define V3D_VERSION 3332#include "broadcom/cle/v3dx_pack.h"33#include "broadcom/common/v3d_macros.h"34#include "util/hash_table.h"35#include "util/ralloc.h"36#include "util/set.h"37#include "broadcom/clif/clif_dump.h"3839void40v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)41{42set_foreach(job->bos, entry) {43struct v3d_bo *bo = (struct v3d_bo *)entry->key;44v3d_bo_unreference(&bo);45}4647_mesa_hash_table_remove_key(v3d->jobs, &job->key);4849if (job->write_prscs) {50set_foreach(job->write_prscs, entry) {51const struct pipe_resource *prsc = entry->key;5253_mesa_hash_table_remove_key(v3d->write_jobs, prsc);54}55}5657for (int i = 0; i < job->nr_cbufs; i++) {58if (job->cbufs[i]) {59_mesa_hash_table_remove_key(v3d->write_jobs,60job->cbufs[i]->texture);61pipe_surface_reference(&job->cbufs[i], NULL);62}63}64if (job->zsbuf) {65struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);66if (rsc->separate_stencil)67_mesa_hash_table_remove_key(v3d->write_jobs,68&rsc->separate_stencil->base);6970_mesa_hash_table_remove_key(v3d->write_jobs,71job->zsbuf->texture);72pipe_surface_reference(&job->zsbuf, NULL);73}74if (job->bbuf)75pipe_surface_reference(&job->bbuf, NULL);7677if (v3d->job == job)78v3d->job = NULL;7980v3d_destroy_cl(&job->bcl);81v3d_destroy_cl(&job->rcl);82v3d_destroy_cl(&job->indirect);83v3d_bo_unreference(&job->tile_alloc);84v3d_bo_unreference(&job->tile_state);8586ralloc_free(job);87}8889struct v3d_job *90v3d_job_create(struct v3d_context *v3d)91{92struct v3d_job *job = rzalloc(v3d, struct v3d_job);9394job->v3d = v3d;9596v3d_init_cl(job, &job->bcl);97v3d_init_cl(job, &job->rcl);98v3d_init_cl(job, &job->indirect);99100job->draw_min_x = ~0;101job->draw_min_y = ~0;102job->draw_max_x = 0;103job->draw_max_y = 0;104105job->bos = _mesa_set_create(job,106_mesa_hash_pointer,107_mesa_key_pointer_equal);108return job;109}110111void112v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)113{114if (!bo)115return;116117if (_mesa_set_search(job->bos, bo))118return;119120v3d_bo_reference(bo);121_mesa_set_add(job->bos, bo);122job->referenced_size += bo->size;123124uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;125126if (job->submit.bo_handle_count >= job->bo_handles_size) {127job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);128bo_handles = reralloc(job, bo_handles,129uint32_t, job->bo_handles_size);130job->submit.bo_handles = (uintptr_t)(void *)bo_handles;131}132bo_handles[job->submit.bo_handle_count++] = bo->handle;133}134135void136v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)137{138struct v3d_context *v3d = job->v3d;139140if (!job->write_prscs) {141job->write_prscs = _mesa_set_create(job,142_mesa_hash_pointer,143_mesa_key_pointer_equal);144}145146_mesa_set_add(job->write_prscs, prsc);147_mesa_hash_table_insert(v3d->write_jobs, prsc, job);148}149150void151v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)152{153hash_table_foreach(v3d->jobs, entry) {154struct v3d_job *job = entry->data;155156if (_mesa_set_search(job->bos, bo))157v3d_job_submit(v3d, job);158}159}160161void162v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)163{164v3d_job_add_write_resource(job, prsc);165166if (!job->tf_write_prscs)167job->tf_write_prscs = _mesa_pointer_set_create(job);168169_mesa_set_add(job->tf_write_prscs, prsc);170}171172static bool173v3d_job_writes_resource_from_tf(struct v3d_job *job,174struct pipe_resource *prsc)175{176if (!job->tf_enabled)177return false;178179if (!job->tf_write_prscs)180return false;181182return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;183}184185void186v3d_flush_jobs_writing_resource(struct v3d_context *v3d,187struct pipe_resource *prsc,188enum v3d_flush_cond flush_cond,189bool is_compute_pipeline)190{191struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,192prsc);193struct v3d_resource *rsc = v3d_resource(prsc);194195/* We need to sync if graphics pipeline reads a resource written196* by the compute pipeline. The same would be needed for the case of197* graphics-compute dependency but nowadays all compute jobs198* are serialized with the previous submitted job.199*/200if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {201v3d->sync_on_last_compute_job = true;202rsc->compute_written = false;203}204205if (!entry)206return;207208struct v3d_job *job = entry->data;209210bool needs_flush;211switch (flush_cond) {212case V3D_FLUSH_ALWAYS:213needs_flush = true;214break;215case V3D_FLUSH_NOT_CURRENT_JOB:216needs_flush = !v3d->job || v3d->job != job;217break;218case V3D_FLUSH_DEFAULT:219default:220/* For writes from TF in the same job we use the "Wait for TF"221* feature provided by the hardware so we don't want to flush.222* The exception to this is when the caller is about to map the223* resource since in that case we don't have a 'Wait for TF'224* command the in command stream. In this scenario the caller225* is expected to set 'always_flush' to True.226*/227needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);228}229230if (needs_flush)231v3d_job_submit(v3d, job);232}233234void235v3d_flush_jobs_reading_resource(struct v3d_context *v3d,236struct pipe_resource *prsc,237enum v3d_flush_cond flush_cond,238bool is_compute_pipeline)239{240struct v3d_resource *rsc = v3d_resource(prsc);241242/* We only need to force the flush on TF writes, which is the only243* case where we might skip the flush to use the 'Wait for TF'244* command. Here we are flushing for a read, which means that the245* caller intends to write to the resource, so we don't care if246* there was a previous TF write to it.247*/248v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,249is_compute_pipeline);250251hash_table_foreach(v3d->jobs, entry) {252struct v3d_job *job = entry->data;253254if (!_mesa_set_search(job->bos, rsc->bo))255continue;256257bool needs_flush;258switch (flush_cond) {259case V3D_FLUSH_NOT_CURRENT_JOB:260needs_flush = !v3d->job || v3d->job != job;261break;262case V3D_FLUSH_ALWAYS:263case V3D_FLUSH_DEFAULT:264default:265needs_flush = true;266}267268if (needs_flush)269v3d_job_submit(v3d, job);270271/* Reminder: v3d->jobs is safe to keep iterating even272* after deletion of an entry.273*/274continue;275}276}277278/**279* Returns a v3d_job struture for tracking V3D rendering to a particular FBO.280*281* If we've already started rendering to this FBO, then return the same job,282* otherwise make a new one. If we're beginning rendering to an FBO, make283* sure that any previous reads of the FBO (or writes to its color/Z surfaces)284* have been flushed.285*/286struct v3d_job *287v3d_get_job(struct v3d_context *v3d,288uint32_t nr_cbufs,289struct pipe_surface **cbufs,290struct pipe_surface *zsbuf,291struct pipe_surface *bbuf)292{293/* Return the existing job for this FBO if we have one */294struct v3d_job_key local_key = {295.cbufs = {296cbufs[0],297cbufs[1],298cbufs[2],299cbufs[3],300},301.zsbuf = zsbuf,302.bbuf = bbuf,303};304struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,305&local_key);306if (entry)307return entry->data;308309/* Creating a new job. Make sure that any previous jobs reading or310* writing these buffers are flushed.311*/312struct v3d_job *job = v3d_job_create(v3d);313job->nr_cbufs = nr_cbufs;314315for (int i = 0; i < job->nr_cbufs; i++) {316if (cbufs[i]) {317v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,318V3D_FLUSH_DEFAULT,319false);320pipe_surface_reference(&job->cbufs[i], cbufs[i]);321322if (cbufs[i]->texture->nr_samples > 1)323job->msaa = true;324}325}326if (zsbuf) {327v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,328V3D_FLUSH_DEFAULT,329false);330pipe_surface_reference(&job->zsbuf, zsbuf);331if (zsbuf->texture->nr_samples > 1)332job->msaa = true;333}334if (bbuf) {335pipe_surface_reference(&job->bbuf, bbuf);336if (bbuf->texture->nr_samples > 1)337job->msaa = true;338}339340for (int i = 0; i < job->nr_cbufs; i++) {341if (cbufs[i])342_mesa_hash_table_insert(v3d->write_jobs,343cbufs[i]->texture, job);344}345if (zsbuf) {346_mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);347348struct v3d_resource *rsc = v3d_resource(zsbuf->texture);349if (rsc->separate_stencil) {350v3d_flush_jobs_reading_resource(v3d,351&rsc->separate_stencil->base,352V3D_FLUSH_DEFAULT,353false);354_mesa_hash_table_insert(v3d->write_jobs,355&rsc->separate_stencil->base,356job);357}358}359360memcpy(&job->key, &local_key, sizeof(local_key));361_mesa_hash_table_insert(v3d->jobs, &job->key, job);362363return job;364}365366struct v3d_job *367v3d_get_job_for_fbo(struct v3d_context *v3d)368{369if (v3d->job)370return v3d->job;371372uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;373struct pipe_surface **cbufs = v3d->framebuffer.cbufs;374struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;375struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);376377if (v3d->framebuffer.samples >= 1)378job->msaa = true;379380v3d_get_tile_buffer_size(job->msaa, job->nr_cbufs,381job->cbufs, job->bbuf,382&job->tile_width,383&job->tile_height,384&job->internal_bpp);385386/* The dirty flags are tracking what's been updated while v3d->job has387* been bound, so set them all to ~0 when switching between jobs. We388* also need to reset all state at the start of rendering.389*/390v3d->dirty = ~0;391392/* If we're binding to uninitialized buffers, no need to load their393* contents before drawing.394*/395for (int i = 0; i < nr_cbufs; i++) {396if (cbufs[i]) {397struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);398if (!rsc->writes)399job->clear |= PIPE_CLEAR_COLOR0 << i;400}401}402403if (zsbuf) {404struct v3d_resource *rsc = v3d_resource(zsbuf->texture);405if (!rsc->writes)406job->clear |= PIPE_CLEAR_DEPTH;407408if (rsc->separate_stencil)409rsc = rsc->separate_stencil;410411if (!rsc->writes)412job->clear |= PIPE_CLEAR_STENCIL;413}414415job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,416job->tile_width);417job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,418job->tile_height);419420v3d->job = job;421422return job;423}424425static void426v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)427{428if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))429return;430431struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,432stderr,433V3D_DEBUG & V3D_DEBUG_CL);434435set_foreach(job->bos, entry) {436struct v3d_bo *bo = (void *)entry->key;437char *name = ralloc_asprintf(NULL, "%s_0x%x",438bo->name, bo->offset);439440v3d_bo_map(bo);441clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);442443ralloc_free(name);444}445446clif_dump(clif, &job->submit);447448clif_dump_destroy(clif);449}450451static void452v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)453{454assert(v3d->prim_counts);455456perf_debug("stalling on TF counts readback\n");457struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);458if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {459uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;460v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];461/* When we only have a vertex shader we determine the primitive462* count in the CPU so don't update it here again.463*/464if (v3d->prog.gs)465v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];466}467}468469/**470* Submits the job to the kernel and then reinitializes it.471*/472void473v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)474{475struct v3d_screen *screen = v3d->screen;476477if (!job->needs_flush)478goto done;479480if (screen->devinfo.ver >= 41)481v3d41_emit_rcl(job);482else483v3d33_emit_rcl(job);484485if (cl_offset(&job->bcl) > 0) {486if (screen->devinfo.ver >= 41)487v3d41_bcl_epilogue(v3d, job);488else489v3d33_bcl_epilogue(v3d, job);490}491492/* While the RCL will implicitly depend on the last RCL to have493* finished, we also need to block on any previous TFU job we may have494* dispatched.495*/496job->submit.in_sync_rcl = v3d->out_sync;497498/* Update the sync object for the last rendering by our context. */499job->submit.out_sync = v3d->out_sync;500501job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);502job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);503504job->submit.flags = 0;505if (job->tmu_dirty_rcl && screen->has_cache_flush)506job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;507508/* On V3D 4.1, the tile alloc/state setup moved to register writes509* instead of binner packets.510*/511if (screen->devinfo.ver >= 41) {512v3d_job_add_bo(job, job->tile_alloc);513job->submit.qma = job->tile_alloc->offset;514job->submit.qms = job->tile_alloc->size;515516v3d_job_add_bo(job, job->tile_state);517job->submit.qts = job->tile_state->offset;518}519520v3d_clif_dump(v3d, job);521522if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {523int ret;524525ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);526static bool warned = false;527if (ret && !warned) {528fprintf(stderr, "Draw call returned %s. "529"Expect corruption.\n", strerror(errno));530warned = true;531}532533/* If we are submitting a job in the middle of transform534* feedback we need to read the primitive counts and accumulate535* them, otherwise they will be reset at the start of the next536* draw when we emit the Tile Binning Mode Configuration packet.537*538* If the job doesn't have any TF draw calls, then we know539* the primitive count must be zero and we can skip stalling540* for this. This also fixes a problem because it seems that541* in this scenario the counters are not reset with the Tile542* Binning Mode Configuration packet, which would translate543* to us reading an obsolete (possibly non-zero) value from544* the GPU counters.545*/546if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0)547v3d_read_and_accumulate_primitive_counters(v3d);548}549550done:551v3d_job_free(v3d, job);552}553554static bool555v3d_job_compare(const void *a, const void *b)556{557return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;558}559560static uint32_t561v3d_job_hash(const void *key)562{563return _mesa_hash_data(key, sizeof(struct v3d_job_key));564}565566void567v3d_job_init(struct v3d_context *v3d)568{569v3d->jobs = _mesa_hash_table_create(v3d,570v3d_job_hash,571v3d_job_compare);572v3d->write_jobs = _mesa_hash_table_create(v3d,573_mesa_hash_pointer,574_mesa_key_pointer_equal);575}576577578579