Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_batch.c
4570 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file crocus_batch.c24*25* Batchbuffer and command submission module.26*27* Every API draw call results in a number of GPU commands, which we28* collect into a "batch buffer". Typically, many draw calls are grouped29* into a single batch to amortize command submission overhead.30*31* We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.32* One critical piece of data is the "validation list", which contains a33* list of the buffer objects (BOs) which the commands in the GPU need.34* The kernel will make sure these are resident and pinned at the correct35* virtual memory address before executing our batch. If a BO is not in36* the validation list, it effectively does not exist, so take care.37*/3839#include "crocus_batch.h"40#include "crocus_bufmgr.h"41#include "crocus_context.h"42#include "crocus_fence.h"4344#include "drm-uapi/i915_drm.h"4546#include "intel/common/intel_gem.h"47#include "main/macros.h"48#include "util/hash_table.h"49#include "util/set.h"50#include "util/u_upload_mgr.h"5152#include <errno.h>53#include <xf86drm.h>5455#if HAVE_VALGRIND56#include <memcheck.h>57#include <valgrind.h>58#define VG(x) x59#else60#define VG(x)61#endif6263#define FILE_DEBUG_FLAG DEBUG_BUFMGR6465/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END66* or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may67* need an extra 4 bytes to pad out to the nearest QWord. So reserve 16.68*/69#define BATCH_RESERVED(devinfo) ((devinfo)->is_haswell ? 32 : 16)7071static void crocus_batch_reset(struct crocus_batch *batch);7273static unsigned74num_fences(struct crocus_batch *batch)75{76return util_dynarray_num_elements(&batch->exec_fences,77struct drm_i915_gem_exec_fence);78}7980/**81* Debugging code to dump the fence list, used by INTEL_DEBUG=submit.82*/83static void84dump_fence_list(struct crocus_batch *batch)85{86fprintf(stderr, "Fence list (length %u): ", num_fences(batch));8788util_dynarray_foreach(&batch->exec_fences,89struct drm_i915_gem_exec_fence, f) {90fprintf(stderr, "%s%u%s ",91(f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "",92f->handle,93(f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : "");94}9596fprintf(stderr, "\n");97}9899/**100* Debugging code to dump the validation list, used by INTEL_DEBUG=submit.101*/102static void103dump_validation_list(struct crocus_batch *batch)104{105fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);106107for (int i = 0; i < batch->exec_count; i++) {108uint64_t flags = batch->validation_list[i].flags;109assert(batch->validation_list[i].handle ==110batch->exec_bos[i]->gem_handle);111fprintf(stderr,112"[%2d]: %2d %-14s @ 0x%"PRIx64" (%" PRIu64 "B)\t %2d refs %s\n", i,113batch->validation_list[i].handle, batch->exec_bos[i]->name,114(uint64_t)batch->validation_list[i].offset, batch->exec_bos[i]->size,115batch->exec_bos[i]->refcount,116(flags & EXEC_OBJECT_WRITE) ? " (write)" : "");117}118}119120/**121* Return BO information to the batch decoder (for debugging).122*/123static struct intel_batch_decode_bo124decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)125{126struct crocus_batch *batch = v_batch;127128for (int i = 0; i < batch->exec_count; i++) {129struct crocus_bo *bo = batch->exec_bos[i];130/* The decoder zeroes out the top 16 bits, so we need to as well */131uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);132133if (address >= bo_address && address < bo_address + bo->size) {134return (struct intel_batch_decode_bo){135.addr = address,136.size = bo->size,137.map = crocus_bo_map(batch->dbg, bo, MAP_READ) +138(address - bo_address),139};140}141}142143return (struct intel_batch_decode_bo) { };144}145146static unsigned147decode_get_state_size(void *v_batch, uint64_t address,148uint64_t base_address)149{150struct crocus_batch *batch = v_batch;151152/* The decoder gives us offsets from a base address, which is not great.153* Binding tables are relative to surface state base address, and other154* state is relative to dynamic state base address. These could alias,155* but in practice it's unlikely because surface offsets are always in156* the [0, 64K) range, and we assign dynamic state addresses starting at157* the top of the 4GB range. We should fix this but it's likely good158* enough for now.159*/160unsigned size = (uintptr_t)161_mesa_hash_table_u64_search(batch->state_sizes, address - base_address);162163return size;164}165166/**167* Decode the current batch.168*/169static void170decode_batch(struct crocus_batch *batch)171{172void *map = crocus_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);173intel_print_batch(&batch->decoder, map, batch->primary_batch_size,174batch->exec_bos[0]->gtt_offset, false);175}176177static void178init_reloc_list(struct crocus_reloc_list *rlist, int count)179{180rlist->reloc_count = 0;181rlist->reloc_array_size = count;182rlist->relocs = malloc(rlist->reloc_array_size *183sizeof(struct drm_i915_gem_relocation_entry));184}185186void187crocus_init_batch(struct crocus_context *ice,188enum crocus_batch_name name,189int priority)190{191struct crocus_batch *batch = &ice->batches[name];192struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;193struct intel_device_info *devinfo = &screen->devinfo;194195batch->ice = ice;196batch->screen = screen;197batch->dbg = &ice->dbg;198batch->reset = &ice->reset;199batch->name = name;200batch->contains_fence_signal = false;201202if (devinfo->ver >= 7) {203batch->fine_fences.uploader =204u_upload_create(&ice->ctx, 4096, PIPE_BIND_CUSTOM,205PIPE_USAGE_STAGING, 0);206}207crocus_fine_fence_init(batch);208209batch->hw_ctx_id = crocus_create_hw_context(screen->bufmgr);210assert(batch->hw_ctx_id);211212crocus_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority);213214batch->valid_reloc_flags = EXEC_OBJECT_WRITE;215if (devinfo->ver == 6)216batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;217218if (INTEL_DEBUG & DEBUG_BATCH) {219/* The shadow doesn't get relocs written so state decode fails. */220batch->use_shadow_copy = false;221} else222batch->use_shadow_copy = !devinfo->has_llc;223224util_dynarray_init(&batch->exec_fences, ralloc_context(NULL));225util_dynarray_init(&batch->syncobjs, ralloc_context(NULL));226227init_reloc_list(&batch->command.relocs, 250);228init_reloc_list(&batch->state.relocs, 250);229230batch->exec_count = 0;231batch->exec_array_size = 100;232batch->exec_bos =233malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));234batch->validation_list =235malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));236237batch->cache.render = _mesa_hash_table_create(NULL, NULL,238_mesa_key_pointer_equal);239batch->cache.depth = _mesa_set_create(NULL, NULL,240_mesa_key_pointer_equal);241242memset(batch->other_batches, 0, sizeof(batch->other_batches));243244for (int i = 0, j = 0; i < ice->batch_count; i++) {245if (i != name)246batch->other_batches[j++] = &ice->batches[i];247}248249if (INTEL_DEBUG & DEBUG_BATCH) {250251batch->state_sizes = _mesa_hash_table_u64_create(NULL);252const unsigned decode_flags =253INTEL_BATCH_DECODE_FULL |254((INTEL_DEBUG & DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |255INTEL_BATCH_DECODE_OFFSETS | INTEL_BATCH_DECODE_FLOATS;256257intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, stderr,258decode_flags, NULL, decode_get_bo,259decode_get_state_size, batch);260batch->decoder.max_vbo_decoded_lines = 32;261}262263crocus_batch_reset(batch);264}265266static struct drm_i915_gem_exec_object2 *267find_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo)268{269unsigned index = READ_ONCE(bo->index);270271if (index < batch->exec_count && batch->exec_bos[index] == bo)272return &batch->validation_list[index];273274/* May have been shared between multiple active batches */275for (index = 0; index < batch->exec_count; index++) {276if (batch->exec_bos[index] == bo)277return &batch->validation_list[index];278}279280return NULL;281}282283static void284ensure_exec_obj_space(struct crocus_batch *batch, uint32_t count)285{286while (batch->exec_count + count > batch->exec_array_size) {287batch->exec_array_size *= 2;288batch->exec_bos = realloc(289batch->exec_bos, batch->exec_array_size * sizeof(batch->exec_bos[0]));290batch->validation_list =291realloc(batch->validation_list,292batch->exec_array_size * sizeof(batch->validation_list[0]));293}294}295296static struct drm_i915_gem_exec_object2 *297crocus_use_bo(struct crocus_batch *batch, struct crocus_bo *bo, bool writable)298{299assert(bo->bufmgr == batch->command.bo->bufmgr);300301struct drm_i915_gem_exec_object2 *existing_entry =302find_validation_entry(batch, bo);303304if (existing_entry) {305/* The BO is already in the validation list; mark it writable */306if (writable)307existing_entry->flags |= EXEC_OBJECT_WRITE;308return existing_entry;309}310311if (bo != batch->command.bo && bo != batch->state.bo) {312/* This is the first time our batch has seen this BO. Before we use it,313* we may need to flush and synchronize with other batches.314*/315for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {316317if (!batch->other_batches[b])318continue;319struct drm_i915_gem_exec_object2 *other_entry =320find_validation_entry(batch->other_batches[b], bo);321322/* If the buffer is referenced by another batch, and either batch323* intends to write it, then flush the other batch and synchronize.324*325* Consider these cases:326*327* 1. They read, we read => No synchronization required.328* 2. They read, we write => Synchronize (they need the old value)329* 3. They write, we read => Synchronize (we need their new value)330* 4. They write, we write => Synchronize (order writes)331*332* The read/read case is very common, as multiple batches usually333* share a streaming state buffer or shader assembly buffer, and334* we want to avoid synchronizing in this case.335*/336if (other_entry &&337((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) {338crocus_batch_flush(batch->other_batches[b]);339crocus_batch_add_syncobj(batch,340batch->other_batches[b]->last_fence->syncobj,341I915_EXEC_FENCE_WAIT);342}343}344}345346/* Bump the ref count since the batch is now using this bo. */347crocus_bo_reference(bo);348349ensure_exec_obj_space(batch, 1);350351batch->validation_list[batch->exec_count] =352(struct drm_i915_gem_exec_object2) {353.handle = bo->gem_handle,354.offset = bo->gtt_offset,355.flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0),356};357358bo->index = batch->exec_count;359batch->exec_bos[batch->exec_count] = bo;360batch->aperture_space += bo->size;361362batch->exec_count++;363364return &batch->validation_list[batch->exec_count - 1];365}366367static uint64_t368emit_reloc(struct crocus_batch *batch,369struct crocus_reloc_list *rlist, uint32_t offset,370struct crocus_bo *target, int32_t target_offset,371unsigned int reloc_flags)372{373assert(target != NULL);374375if (target == batch->ice->workaround_bo)376reloc_flags &= ~RELOC_WRITE;377378bool writable = reloc_flags & RELOC_WRITE;379380struct drm_i915_gem_exec_object2 *entry =381crocus_use_bo(batch, target, writable);382383if (rlist->reloc_count == rlist->reloc_array_size) {384rlist->reloc_array_size *= 2;385rlist->relocs = realloc(rlist->relocs,386rlist->reloc_array_size *387sizeof(struct drm_i915_gem_relocation_entry));388}389390if (reloc_flags & RELOC_32BIT) {391/* Restrict this buffer to the low 32 bits of the address space.392*393* Altering the validation list flags restricts it for this batch,394* but we also alter the BO's kflags to restrict it permanently395* (until the BO is destroyed and put back in the cache). Buffers396* may stay bound across batches, and we want keep it constrained.397*/398target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;399entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;400401/* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */402reloc_flags &= ~RELOC_32BIT;403}404405if (reloc_flags)406entry->flags |= reloc_flags & batch->valid_reloc_flags;407408rlist->relocs[rlist->reloc_count++] =409(struct drm_i915_gem_relocation_entry) {410.offset = offset,411.delta = target_offset,412.target_handle = target->index,413.presumed_offset = entry->offset,414};415416/* Using the old buffer offset, write in what the right data would be, in417* case the buffer doesn't move and we can short-circuit the relocation418* processing in the kernel419*/420return entry->offset + target_offset;421}422423uint64_t424crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,425struct crocus_bo *target, uint32_t target_offset,426unsigned int reloc_flags)427{428assert(batch_offset <= batch->command.bo->size - sizeof(uint32_t));429430return emit_reloc(batch, &batch->command.relocs, batch_offset,431target, target_offset, reloc_flags);432}433434uint64_t435crocus_state_reloc(struct crocus_batch *batch, uint32_t state_offset,436struct crocus_bo *target, uint32_t target_offset,437unsigned int reloc_flags)438{439assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));440441return emit_reloc(batch, &batch->state.relocs, state_offset,442target, target_offset, reloc_flags);443}444445static void446recreate_growing_buffer(struct crocus_batch *batch,447struct crocus_growing_bo *grow,448const char *name, unsigned size)449{450struct crocus_screen *screen = batch->screen;451struct crocus_bufmgr *bufmgr = screen->bufmgr;452grow->bo = crocus_bo_alloc(bufmgr, name, size);453grow->bo->kflags |= EXEC_OBJECT_CAPTURE;454grow->partial_bo = NULL;455grow->partial_bo_map = NULL;456grow->partial_bytes = 0;457if (batch->use_shadow_copy)458grow->map = realloc(grow->map, grow->bo->size);459else460grow->map = crocus_bo_map(NULL, grow->bo, MAP_READ | MAP_WRITE);461grow->map_next = grow->map;462}463464static void465create_batch(struct crocus_batch *batch)466{467struct crocus_screen *screen = batch->screen;468469recreate_growing_buffer(batch, &batch->command,470"command buffer",471BATCH_SZ + BATCH_RESERVED(&screen->devinfo));472473crocus_use_bo(batch, batch->command.bo, false);474475/* Always add workaround_bo which contains a driver identifier to be476* recorded in error states.477*/478crocus_use_bo(batch, batch->ice->workaround_bo, false);479480recreate_growing_buffer(batch, &batch->state,481"state buffer",482STATE_SZ);483484batch->state.used = 1;485crocus_use_bo(batch, batch->state.bo, false);486}487488static void489crocus_batch_maybe_noop(struct crocus_batch *batch)490{491/* We only insert the NOOP at the beginning of the batch. */492assert(crocus_batch_bytes_used(batch) == 0);493494if (batch->noop_enabled) {495/* Emit MI_BATCH_BUFFER_END to prevent any further command to be496* executed.497*/498uint32_t *map = batch->command.map_next;499500map[0] = (0xA << 23);501502batch->command.map_next += 4;503}504}505506static void507crocus_batch_reset(struct crocus_batch *batch)508{509struct crocus_screen *screen = batch->screen;510511crocus_bo_unreference(batch->command.bo);512crocus_bo_unreference(batch->state.bo);513batch->primary_batch_size = 0;514batch->contains_draw = false;515batch->contains_fence_signal = false;516batch->state_base_address_emitted = false;517batch->screen->vtbl.batch_reset_dirty(batch);518519create_batch(batch);520assert(batch->command.bo->index == 0);521522if (batch->state_sizes)523_mesa_hash_table_u64_clear(batch->state_sizes);524struct crocus_syncobj *syncobj = crocus_create_syncobj(screen);525crocus_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL);526crocus_syncobj_reference(screen, &syncobj, NULL);527528crocus_cache_sets_clear(batch);529}530531void532crocus_batch_free(struct crocus_batch *batch)533{534struct crocus_screen *screen = batch->screen;535struct crocus_bufmgr *bufmgr = screen->bufmgr;536537if (batch->use_shadow_copy) {538free(batch->command.map);539free(batch->state.map);540}541542for (int i = 0; i < batch->exec_count; i++) {543crocus_bo_unreference(batch->exec_bos[i]);544}545546pipe_resource_reference(&batch->fine_fences.ref.res, NULL);547548free(batch->command.relocs.relocs);549free(batch->state.relocs.relocs);550free(batch->exec_bos);551free(batch->validation_list);552553ralloc_free(batch->exec_fences.mem_ctx);554555util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)556crocus_syncobj_reference(screen, s, NULL);557ralloc_free(batch->syncobjs.mem_ctx);558559crocus_fine_fence_reference(batch->screen, &batch->last_fence, NULL);560if (batch_has_fine_fence(batch))561u_upload_destroy(batch->fine_fences.uploader);562563crocus_bo_unreference(batch->command.bo);564crocus_bo_unreference(batch->state.bo);565batch->command.bo = NULL;566batch->command.map = NULL;567batch->command.map_next = NULL;568569crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);570571_mesa_hash_table_destroy(batch->cache.render, NULL);572_mesa_set_destroy(batch->cache.depth, NULL);573574if (batch->state_sizes) {575_mesa_hash_table_u64_destroy(batch->state_sizes);576intel_batch_decode_ctx_finish(&batch->decoder);577}578}579580/**581* If we've chained to a secondary batch, or are getting near to the end,582* then flush. This should only be called between draws.583*/584void585crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate)586{587if (batch->command.bo != batch->exec_bos[0] ||588crocus_batch_bytes_used(batch) + estimate >= BATCH_SZ) {589crocus_batch_flush(batch);590}591}592593/**594* Finish copying the old batch/state buffer's contents to the new one595* after we tried to "grow" the buffer in an earlier operation.596*/597static void598finish_growing_bos(struct crocus_growing_bo *grow)599{600struct crocus_bo *old_bo = grow->partial_bo;601if (!old_bo)602return;603604memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes);605606grow->partial_bo = NULL;607grow->partial_bo_map = NULL;608grow->partial_bytes = 0;609610crocus_bo_unreference(old_bo);611}612613void614crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,615unsigned used,616unsigned new_size)617{618struct crocus_screen *screen = batch->screen;619struct crocus_bufmgr *bufmgr = screen->bufmgr;620struct crocus_growing_bo *grow = grow_state ? &batch->state : &batch->command;621struct crocus_bo *bo = grow->bo;622623if (grow->partial_bo) {624/* We've already grown once, and now we need to do it again.625* Finish our last grow operation so we can start a new one.626* This should basically never happen.627*/628finish_growing_bos(grow);629}630631struct crocus_bo *new_bo = crocus_bo_alloc(bufmgr, bo->name, new_size);632633/* Copy existing data to the new larger buffer */634grow->partial_bo_map = grow->map;635636if (batch->use_shadow_copy) {637/* We can't safely use realloc, as it may move the existing buffer,638* breaking existing pointers the caller may still be using. Just639* malloc a new copy and memcpy it like the normal BO path.640*641* Use bo->size rather than new_size because the bufmgr may have642* rounded up the size, and we want the shadow size to match.643*/644grow->map = malloc(new_bo->size);645} else {646grow->map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE);647}648/* Try to put the new BO at the same GTT offset as the old BO (which649* we're throwing away, so it doesn't need to be there).650*651* This guarantees that our relocations continue to work: values we've652* already written into the buffer, values we're going to write into the653* buffer, and the validation/relocation lists all will match.654*655* Also preserve kflags for EXEC_OBJECT_CAPTURE.656*/657new_bo->gtt_offset = bo->gtt_offset;658new_bo->index = bo->index;659new_bo->kflags = bo->kflags;660661/* Batch/state buffers are per-context, and if we've run out of space,662* we must have actually used them before, so...they will be in the list.663*/664assert(bo->index < batch->exec_count);665assert(batch->exec_bos[bo->index] == bo);666667/* Update the validation list to use the new BO. */668batch->validation_list[bo->index].handle = new_bo->gem_handle;669/* Exchange the two BOs...without breaking pointers to the old BO.670*671* Consider this scenario:672*673* 1. Somebody calls brw_state_batch() to get a region of memory, and674* and then creates a brw_address pointing to brw->batch.state.bo.675* 2. They then call brw_state_batch() a second time, which happens to676* grow and replace the state buffer. They then try to emit a677* relocation to their first section of memory.678*679* If we replace the brw->batch.state.bo pointer at step 2, we would680* break the address created in step 1. They'd have a pointer to the681* old destroyed BO. Emitting a relocation would add this dead BO to682* the validation list...causing /both/ statebuffers to be in the list,683* and all kinds of disasters.684*685* This is not a contrived case - BLORP vertex data upload hits this.686*687* There are worse scenarios too. Fences for GL sync objects reference688* brw->batch.batch.bo. If we replaced the batch pointer when growing,689* we'd need to chase down every fence and update it to point to the690* new BO. Otherwise, it would refer to a "batch" that never actually691* gets submitted, and would fail to trigger.692*693* To work around both of these issues, we transmutate the buffers in694* place, making the existing struct brw_bo represent the new buffer,695* and "new_bo" represent the old BO. This is highly unusual, but it696* seems like a necessary evil.697*698* We also defer the memcpy of the existing batch's contents. Callers699* may make multiple brw_state_batch calls, and retain pointers to the700* old BO's map. We'll perform the memcpy in finish_growing_bo() when701* we finally submit the batch, at which point we've finished uploading702* state, and nobody should have any old references anymore.703*704* To do that, we keep a reference to the old BO in grow->partial_bo,705* and store the number of bytes to copy in grow->partial_bytes. We706* can monkey with the refcounts directly without atomics because these707* are per-context BOs and they can only be touched by this thread.708*/709assert(new_bo->refcount == 1);710new_bo->refcount = bo->refcount;711bo->refcount = 1;712713struct crocus_bo tmp;714memcpy(&tmp, bo, sizeof(struct crocus_bo));715memcpy(bo, new_bo, sizeof(struct crocus_bo));716memcpy(new_bo, &tmp, sizeof(struct crocus_bo));717718grow->partial_bo = new_bo; /* the one reference of the OLD bo */719grow->partial_bytes = used;720}721722static void723finish_seqno(struct crocus_batch *batch)724{725struct crocus_fine_fence *sq = crocus_fine_fence_new(batch, CROCUS_FENCE_END);726if (!sq)727return;728729crocus_fine_fence_reference(batch->screen, &batch->last_fence, sq);730crocus_fine_fence_reference(batch->screen, &sq, NULL);731}732733/**734* Terminate a batch with MI_BATCH_BUFFER_END.735*/736static void737crocus_finish_batch(struct crocus_batch *batch)738{739740batch->no_wrap = true;741if (batch->screen->vtbl.finish_batch)742batch->screen->vtbl.finish_batch(batch);743744finish_seqno(batch);745746/* Emit MI_BATCH_BUFFER_END to finish our batch. */747uint32_t *map = batch->command.map_next;748749map[0] = (0xA << 23);750751batch->command.map_next += 4;752VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->command.map, crocus_batch_bytes_used(batch)));753754if (batch->command.bo == batch->exec_bos[0])755batch->primary_batch_size = crocus_batch_bytes_used(batch);756batch->no_wrap = false;757}758759/**760* Replace our current GEM context with a new one (in case it got banned).761*/762static bool763replace_hw_ctx(struct crocus_batch *batch)764{765struct crocus_screen *screen = batch->screen;766struct crocus_bufmgr *bufmgr = screen->bufmgr;767768uint32_t new_ctx = crocus_clone_hw_context(bufmgr, batch->hw_ctx_id);769if (!new_ctx)770return false;771772crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);773batch->hw_ctx_id = new_ctx;774775/* Notify the context that state must be re-initialized. */776crocus_lost_context_state(batch);777778return true;779}780781enum pipe_reset_status782crocus_batch_check_for_reset(struct crocus_batch *batch)783{784struct crocus_screen *screen = batch->screen;785enum pipe_reset_status status = PIPE_NO_RESET;786struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id };787788if (drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))789DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));790791if (stats.batch_active != 0) {792/* A reset was observed while a batch from this hardware context was793* executing. Assume that this context was at fault.794*/795status = PIPE_GUILTY_CONTEXT_RESET;796} else if (stats.batch_pending != 0) {797/* A reset was observed while a batch from this context was in progress,798* but the batch was not executing. In this case, assume that the799* context was not at fault.800*/801status = PIPE_INNOCENT_CONTEXT_RESET;802}803804if (status != PIPE_NO_RESET) {805/* Our context is likely banned, or at least in an unknown state.806* Throw it away and start with a fresh context. Ideally this may807* catch the problem before our next execbuf fails with -EIO.808*/809replace_hw_ctx(batch);810}811812return status;813}814815/**816* Submit the batch to the GPU via execbuffer2.817*/818static int819submit_batch(struct crocus_batch *batch)820{821822if (batch->use_shadow_copy) {823void *bo_map = crocus_bo_map(batch->dbg, batch->command.bo, MAP_WRITE);824memcpy(bo_map, batch->command.map, crocus_batch_bytes_used(batch));825826bo_map = crocus_bo_map(batch->dbg, batch->state.bo, MAP_WRITE);827memcpy(bo_map, batch->state.map, batch->state.used);828}829830crocus_bo_unmap(batch->command.bo);831crocus_bo_unmap(batch->state.bo);832833/* The requirement for using I915_EXEC_NO_RELOC are:834*835* The addresses written in the objects must match the corresponding836* reloc.gtt_offset which in turn must match the corresponding837* execobject.offset.838*839* Any render targets written to in the batch must be flagged with840* EXEC_OBJECT_WRITE.841*842* To avoid stalling, execobject.offset should match the current843* address of that object within the active context.844*/845/* Set statebuffer relocations */846const unsigned state_index = batch->state.bo->index;847if (state_index < batch->exec_count &&848batch->exec_bos[state_index] == batch->state.bo) {849struct drm_i915_gem_exec_object2 *entry =850&batch->validation_list[state_index];851assert(entry->handle == batch->state.bo->gem_handle);852entry->relocation_count = batch->state.relocs.reloc_count;853entry->relocs_ptr = (uintptr_t)batch->state.relocs.relocs;854}855856/* Set batchbuffer relocations */857struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];858assert(entry->handle == batch->command.bo->gem_handle);859entry->relocation_count = batch->command.relocs.reloc_count;860entry->relocs_ptr = (uintptr_t)batch->command.relocs.relocs;861862struct drm_i915_gem_execbuffer2 execbuf = {863.buffers_ptr = (uintptr_t)batch->validation_list,864.buffer_count = batch->exec_count,865.batch_start_offset = 0,866/* This must be QWord aligned. */867.batch_len = ALIGN(batch->primary_batch_size, 8),868.flags = I915_EXEC_RENDER |869I915_EXEC_NO_RELOC |870I915_EXEC_BATCH_FIRST |871I915_EXEC_HANDLE_LUT,872.rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */873};874875if (num_fences(batch)) {876execbuf.flags |= I915_EXEC_FENCE_ARRAY;877execbuf.num_cliprects = num_fences(batch);878execbuf.cliprects_ptr =879(uintptr_t)util_dynarray_begin(&batch->exec_fences);880}881882int ret = 0;883if (!batch->screen->no_hw &&884intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))885ret = -errno;886887for (int i = 0; i < batch->exec_count; i++) {888struct crocus_bo *bo = batch->exec_bos[i];889890bo->idle = false;891bo->index = -1;892893/* Update brw_bo::gtt_offset */894if (batch->validation_list[i].offset != bo->gtt_offset) {895DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",896bo->gem_handle, bo->gtt_offset,897(uint64_t)batch->validation_list[i].offset);898assert(!(bo->kflags & EXEC_OBJECT_PINNED));899bo->gtt_offset = batch->validation_list[i].offset;900}901}902903return ret;904}905906static const char *907batch_name_to_string(enum crocus_batch_name name)908{909const char *names[CROCUS_BATCH_COUNT] = {910[CROCUS_BATCH_RENDER] = "render",911[CROCUS_BATCH_COMPUTE] = "compute",912};913return names[name];914}915916/**917* Flush the batch buffer, submitting it to the GPU and resetting it so918* we're ready to emit the next batch.919*920* \param in_fence_fd is ignored if -1. Otherwise, this function takes921* ownership of the fd.922*923* \param out_fence_fd is ignored if NULL. Otherwise, the caller must924* take ownership of the returned fd.925*/926void927_crocus_batch_flush(struct crocus_batch *batch, const char *file, int line)928{929struct crocus_screen *screen = batch->screen;930931/* If a fence signals we need to flush it. */932if (crocus_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal)933return;934935assert(!batch->no_wrap);936crocus_finish_batch(batch);937938finish_growing_bos(&batch->command);939finish_growing_bos(&batch->state);940int ret = submit_batch(batch);941942if (unlikely(INTEL_DEBUG &943(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL))) {944int bytes_for_commands = crocus_batch_bytes_used(batch);945int second_bytes = 0;946if (batch->command.bo != batch->exec_bos[0]) {947second_bytes = bytes_for_commands;948bytes_for_commands += batch->primary_batch_size;949}950fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) "951"(cmds), %4d BOs (%0.1fMb aperture),"952" %4d command relocs, %4d state relocs\n",953file, line, batch_name_to_string(batch->name), batch->hw_ctx_id,954batch->primary_batch_size, second_bytes,955100.0f * bytes_for_commands / BATCH_SZ,956batch->exec_count,957(float) batch->aperture_space / (1024 * 1024),958batch->command.relocs.reloc_count,959batch->state.relocs.reloc_count);960961if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT)) {962dump_fence_list(batch);963dump_validation_list(batch);964}965966if (INTEL_DEBUG & DEBUG_BATCH) {967decode_batch(batch);968}969}970971for (int i = 0; i < batch->exec_count; i++) {972struct crocus_bo *bo = batch->exec_bos[i];973crocus_bo_unreference(bo);974}975976batch->command.relocs.reloc_count = 0;977batch->state.relocs.reloc_count = 0;978batch->exec_count = 0;979batch->aperture_space = 0;980981util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)982crocus_syncobj_reference(screen, s, NULL);983util_dynarray_clear(&batch->syncobjs);984985util_dynarray_clear(&batch->exec_fences);986987if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {988dbg_printf("waiting for idle\n");989crocus_bo_wait_rendering(batch->command.bo); /* if execbuf failed; this is a nop */990}991992/* Start a new batch buffer. */993crocus_batch_reset(batch);994995/* EIO means our context is banned. In this case, try and replace it996* with a new logical context, and inform crocus_context that all state997* has been lost and needs to be re-initialized. If this succeeds,998* dubiously claim success...999*/1000if (ret == -EIO && replace_hw_ctx(batch)) {1001if (batch->reset->reset) {1002/* Tell the state tracker the device is lost and it was our fault. */1003batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET);1004}10051006ret = 0;1007}10081009if (ret < 0) {1010#ifdef DEBUG1011const bool color = INTEL_DEBUG & DEBUG_COLOR;1012fprintf(stderr, "%scrocus: Failed to submit batchbuffer: %-80s%s\n",1013color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : "");1014#endif1015abort();1016}1017}10181019/**1020* Does the current batch refer to the given BO?1021*1022* (In other words, is the BO in the current batch's validation list?)1023*/1024bool1025crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo)1026{1027return find_validation_entry(batch, bo) != NULL;1028}10291030/**1031* Updates the state of the noop feature. Returns true if there was a noop1032* transition that led to state invalidation.1033*/1034bool1035crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable)1036{1037if (batch->noop_enabled == noop_enable)1038return 0;10391040batch->noop_enabled = noop_enable;10411042crocus_batch_flush(batch);10431044/* If the batch was empty, flush had no effect, so insert our noop. */1045if (crocus_batch_bytes_used(batch) == 0)1046crocus_batch_maybe_noop(batch);10471048/* We only need to update the entire state if we transition from noop ->1049* not-noop.1050*/1051return !batch->noop_enabled;1052}105310541055