Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_job.c
4570 views
/*1* Copyright © 2014-2015 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/** @file vc4_job.c24*25* Functions for submitting VC4 render jobs to the kernel.26*/2728#include <xf86drm.h>29#include "vc4_cl_dump.h"30#include "vc4_context.h"31#include "util/hash_table.h"3233static void34vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)35{36struct vc4_bo **referenced_bos = job->bo_pointers.base;37for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {38vc4_bo_unreference(&referenced_bos[i]);39}4041_mesa_hash_table_remove_key(vc4->jobs, &job->key);4243if (job->color_write) {44_mesa_hash_table_remove_key(vc4->write_jobs,45job->color_write->texture);46pipe_surface_reference(&job->color_write, NULL);47}48if (job->msaa_color_write) {49_mesa_hash_table_remove_key(vc4->write_jobs,50job->msaa_color_write->texture);51pipe_surface_reference(&job->msaa_color_write, NULL);52}53if (job->zs_write) {54_mesa_hash_table_remove_key(vc4->write_jobs,55job->zs_write->texture);56pipe_surface_reference(&job->zs_write, NULL);57}58if (job->msaa_zs_write) {59_mesa_hash_table_remove_key(vc4->write_jobs,60job->msaa_zs_write->texture);61pipe_surface_reference(&job->msaa_zs_write, NULL);62}6364pipe_surface_reference(&job->color_read, NULL);65pipe_surface_reference(&job->zs_read, NULL);6667if (vc4->job == job)68vc4->job = NULL;6970ralloc_free(job);71}7273static struct vc4_job *74vc4_job_create(struct vc4_context *vc4)75{76struct vc4_job *job = rzalloc(vc4, struct vc4_job);7778vc4_init_cl(job, &job->bcl);79vc4_init_cl(job, &job->shader_rec);80vc4_init_cl(job, &job->uniforms);81vc4_init_cl(job, &job->bo_handles);82vc4_init_cl(job, &job->bo_pointers);8384job->draw_min_x = ~0;85job->draw_min_y = ~0;86job->draw_max_x = 0;87job->draw_max_y = 0;8889job->last_gem_handle_hindex = ~0;9091if (vc4->perfmon)92job->perfmon = vc4->perfmon;9394return job;95}9697void98vc4_flush_jobs_writing_resource(struct vc4_context *vc4,99struct pipe_resource *prsc)100{101struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,102prsc);103if (entry) {104struct vc4_job *job = entry->data;105vc4_job_submit(vc4, job);106}107}108109void110vc4_flush_jobs_reading_resource(struct vc4_context *vc4,111struct pipe_resource *prsc)112{113struct vc4_resource *rsc = vc4_resource(prsc);114115vc4_flush_jobs_writing_resource(vc4, prsc);116117hash_table_foreach(vc4->jobs, entry) {118struct vc4_job *job = entry->data;119120struct vc4_bo **referenced_bos = job->bo_pointers.base;121bool found = false;122for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {123if (referenced_bos[i] == rsc->bo) {124found = true;125break;126}127}128if (found) {129vc4_job_submit(vc4, job);130continue;131}132133/* Also check for the Z/color buffers, since the references to134* those are only added immediately before submit.135*/136if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {137struct vc4_resource *ctex =138vc4_resource(job->color_read->texture);139if (ctex->bo == rsc->bo) {140vc4_job_submit(vc4, job);141continue;142}143}144145if (job->zs_read && !(job->cleared &146(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {147struct vc4_resource *ztex =148vc4_resource(job->zs_read->texture);149if (ztex->bo == rsc->bo) {150vc4_job_submit(vc4, job);151continue;152}153}154}155}156157/**158* Returns a vc4_job struture for tracking V3D rendering to a particular FBO.159*160* If we've already started rendering to this FBO, then return old same job,161* otherwise make a new one. If we're beginning rendering to an FBO, make162* sure that any previous reads of the FBO (or writes to its color/Z surfaces)163* have been flushed.164*/165struct vc4_job *166vc4_get_job(struct vc4_context *vc4,167struct pipe_surface *cbuf, struct pipe_surface *zsbuf)168{169/* Return the existing job for this FBO if we have one */170struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};171struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,172&local_key);173if (entry)174return entry->data;175176/* Creating a new job. Make sure that any previous jobs reading or177* writing these buffers are flushed.178*/179if (cbuf)180vc4_flush_jobs_reading_resource(vc4, cbuf->texture);181if (zsbuf)182vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);183184struct vc4_job *job = vc4_job_create(vc4);185186if (cbuf) {187if (cbuf->texture->nr_samples > 1) {188job->msaa = true;189pipe_surface_reference(&job->msaa_color_write, cbuf);190} else {191pipe_surface_reference(&job->color_write, cbuf);192}193}194195if (zsbuf) {196if (zsbuf->texture->nr_samples > 1) {197job->msaa = true;198pipe_surface_reference(&job->msaa_zs_write, zsbuf);199} else {200pipe_surface_reference(&job->zs_write, zsbuf);201}202}203204if (job->msaa) {205job->tile_width = 32;206job->tile_height = 32;207} else {208job->tile_width = 64;209job->tile_height = 64;210}211212if (cbuf)213_mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);214if (zsbuf)215_mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);216217job->key.cbuf = cbuf;218job->key.zsbuf = zsbuf;219_mesa_hash_table_insert(vc4->jobs, &job->key, job);220221return job;222}223224struct vc4_job *225vc4_get_job_for_fbo(struct vc4_context *vc4)226{227if (vc4->job)228return vc4->job;229230struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];231struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;232struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);233234/* The dirty flags are tracking what's been updated while vc4->job has235* been bound, so set them all to ~0 when switching between jobs. We236* also need to reset all state at the start of rendering.237*/238vc4->dirty = ~0;239240/* Set up the read surfaces in the job. If they aren't actually241* getting read (due to a clear starting the frame), job->cleared will242* mask out the read.243*/244pipe_surface_reference(&job->color_read, cbuf);245pipe_surface_reference(&job->zs_read, zsbuf);246247/* If we're binding to uninitialized buffers, no need to load their248* contents before drawing.249*/250if (cbuf) {251struct vc4_resource *rsc = vc4_resource(cbuf->texture);252if (!rsc->writes)253job->cleared |= PIPE_CLEAR_COLOR0;254}255256if (zsbuf) {257struct vc4_resource *rsc = vc4_resource(zsbuf->texture);258if (!rsc->writes)259job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;260}261262job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,263job->tile_width);264job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,265job->tile_height);266267/* Initialize the job with the raster order flags -- each draw will268* check that we haven't changed the flags, since that requires a269* flush.270*/271if (vc4->rasterizer)272job->flags = vc4->rasterizer->tile_raster_order_flags;273274vc4->job = job;275276return job;277}278279static void280vc4_submit_setup_rcl_surface(struct vc4_job *job,281struct drm_vc4_submit_rcl_surface *submit_surf,282struct pipe_surface *psurf,283bool is_depth, bool is_write)284{285struct vc4_surface *surf = vc4_surface(psurf);286287if (!surf)288return;289290struct vc4_resource *rsc = vc4_resource(psurf->texture);291submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);292submit_surf->offset = surf->offset;293294if (psurf->texture->nr_samples <= 1) {295if (is_depth) {296submit_surf->bits =297VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,298VC4_LOADSTORE_TILE_BUFFER_BUFFER);299300} else {301submit_surf->bits =302VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,303VC4_LOADSTORE_TILE_BUFFER_BUFFER) |304VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?305VC4_LOADSTORE_TILE_BUFFER_BGR565 :306VC4_LOADSTORE_TILE_BUFFER_RGBA8888,307VC4_LOADSTORE_TILE_BUFFER_FORMAT);308}309submit_surf->bits |=310VC4_SET_FIELD(surf->tiling,311VC4_LOADSTORE_TILE_BUFFER_TILING);312} else {313assert(!is_write);314submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;315}316317if (is_write)318rsc->writes++;319}320321static void322vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,323struct drm_vc4_submit_rcl_surface *submit_surf,324struct pipe_surface *psurf)325{326struct vc4_surface *surf = vc4_surface(psurf);327328if (!surf)329return;330331struct vc4_resource *rsc = vc4_resource(psurf->texture);332submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);333submit_surf->offset = surf->offset;334335if (psurf->texture->nr_samples <= 1) {336submit_surf->bits =337VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?338VC4_RENDER_CONFIG_FORMAT_BGR565 :339VC4_RENDER_CONFIG_FORMAT_RGBA8888,340VC4_RENDER_CONFIG_FORMAT) |341VC4_SET_FIELD(surf->tiling,342VC4_RENDER_CONFIG_MEMORY_FORMAT);343}344345rsc->writes++;346}347348static void349vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,350struct drm_vc4_submit_rcl_surface *submit_surf,351struct pipe_surface *psurf)352{353struct vc4_surface *surf = vc4_surface(psurf);354355if (!surf)356return;357358struct vc4_resource *rsc = vc4_resource(psurf->texture);359submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);360submit_surf->offset = surf->offset;361submit_surf->bits = 0;362rsc->writes++;363}364365/**366* Submits the job to the kernel and then reinitializes it.367*/368void369vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)370{371if (!job->needs_flush)372goto done;373374/* The RCL setup would choke if the draw bounds cause no drawing, so375* just drop the drawing if that's the case.376*/377if (job->draw_max_x <= job->draw_min_x ||378job->draw_max_y <= job->draw_min_y) {379goto done;380}381382if (vc4_debug & VC4_DEBUG_CL) {383fprintf(stderr, "BCL:\n");384vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);385}386387if (cl_offset(&job->bcl) > 0) {388/* Increment the semaphore indicating that binning is done and389* unblocking the render thread. Note that this doesn't act390* until the FLUSH completes.391*/392cl_ensure_space(&job->bcl, 8);393cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);394/* The FLUSH caps all of our bin lists with a395* VC4_PACKET_RETURN.396*/397cl_emit(&job->bcl, FLUSH, flush);398}399struct drm_vc4_submit_cl submit = {400.color_read.hindex = ~0,401.zs_read.hindex = ~0,402.color_write.hindex = ~0,403.msaa_color_write.hindex = ~0,404.zs_write.hindex = ~0,405.msaa_zs_write.hindex = ~0,406};407408cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));409cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));410411if (job->resolve & PIPE_CLEAR_COLOR) {412if (!(job->cleared & PIPE_CLEAR_COLOR)) {413vc4_submit_setup_rcl_surface(job, &submit.color_read,414job->color_read,415false, false);416}417vc4_submit_setup_rcl_render_config_surface(job,418&submit.color_write,419job->color_write);420vc4_submit_setup_rcl_msaa_surface(job,421&submit.msaa_color_write,422job->msaa_color_write);423}424if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {425if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {426vc4_submit_setup_rcl_surface(job, &submit.zs_read,427job->zs_read, true, false);428}429vc4_submit_setup_rcl_surface(job, &submit.zs_write,430job->zs_write, true, true);431vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,432job->msaa_zs_write);433}434435if (job->msaa) {436/* This bit controls how many pixels the general437* (i.e. subsampled) loads/stores are iterating over438* (multisample loads replicate out to the other samples).439*/440submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;441/* Controls whether color_write's442* VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation443*/444submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;445}446447submit.bo_handles = (uintptr_t)job->bo_handles.base;448submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;449submit.bin_cl = (uintptr_t)job->bcl.base;450submit.bin_cl_size = cl_offset(&job->bcl);451submit.shader_rec = (uintptr_t)job->shader_rec.base;452submit.shader_rec_size = cl_offset(&job->shader_rec);453submit.shader_rec_count = job->shader_rec_count;454submit.uniforms = (uintptr_t)job->uniforms.base;455submit.uniforms_size = cl_offset(&job->uniforms);456if (job->perfmon)457submit.perfmonid = job->perfmon->id;458459assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);460submit.min_x_tile = job->draw_min_x / job->tile_width;461submit.min_y_tile = job->draw_min_y / job->tile_height;462submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;463submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;464submit.width = job->draw_width;465submit.height = job->draw_height;466if (job->cleared) {467submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;468submit.clear_color[0] = job->clear_color[0];469submit.clear_color[1] = job->clear_color[1];470submit.clear_z = job->clear_depth;471submit.clear_s = job->clear_stencil;472}473submit.flags |= job->flags;474475if (vc4->screen->has_syncobj) {476submit.out_sync = vc4->job_syncobj;477478if (vc4->in_fence_fd >= 0) {479/* This replaces the fence in the syncobj. */480drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,481vc4->in_fence_fd);482submit.in_sync = vc4->in_syncobj;483close(vc4->in_fence_fd);484vc4->in_fence_fd = -1;485}486}487488if (!(vc4_debug & VC4_DEBUG_NORAST)) {489int ret;490491ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);492static bool warned = false;493if (ret && !warned) {494fprintf(stderr, "Draw call returned %s. "495"Expect corruption.\n", strerror(errno));496warned = true;497} else if (!ret) {498vc4->last_emit_seqno = submit.seqno;499if (job->perfmon)500job->perfmon->last_seqno = submit.seqno;501}502}503504if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {505if (!vc4_wait_seqno(vc4->screen,506vc4->last_emit_seqno - 5,507PIPE_TIMEOUT_INFINITE,508"job throttling")) {509fprintf(stderr, "Job throttling failed\n");510}511}512513if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {514if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,515PIPE_TIMEOUT_INFINITE, "sync")) {516fprintf(stderr, "Wait failed.\n");517abort();518}519}520521done:522vc4_job_free(vc4, job);523}524525static bool526vc4_job_compare(const void *a, const void *b)527{528return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;529}530531static uint32_t532vc4_job_hash(const void *key)533{534return _mesa_hash_data(key, sizeof(struct vc4_job_key));535}536537int538vc4_job_init(struct vc4_context *vc4)539{540vc4->jobs = _mesa_hash_table_create(vc4,541vc4_job_hash,542vc4_job_compare);543vc4->write_jobs = _mesa_hash_table_create(vc4,544_mesa_hash_pointer,545_mesa_key_pointer_equal);546547if (vc4->screen->has_syncobj) {548/* Create the syncobj as signaled since with no job executed549* there is nothing to wait on.550*/551int ret = drmSyncobjCreate(vc4->fd,552DRM_SYNCOBJ_CREATE_SIGNALED,553&vc4->job_syncobj);554if (ret) {555/* If the screen indicated syncobj support, we should556* be able to create a signaled syncobj.557* At this point it is too late to pretend the screen558* has no syncobj support.559*/560return ret;561}562}563564return 0;565}566567568569