Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_queue.c
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "v3dv_private.h"24#include "drm-uapi/v3d_drm.h"2526#include "broadcom/clif/clif_dump.h"2728#include <errno.h>29#include <time.h>3031static void32v3dv_clif_dump(struct v3dv_device *device,33struct v3dv_job *job,34struct drm_v3d_submit_cl *submit)35{36if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))37return;3839struct clif_dump *clif = clif_dump_init(&device->devinfo,40stderr,41V3D_DEBUG & V3D_DEBUG_CL);4243set_foreach(job->bos, entry) {44struct v3dv_bo *bo = (void *)entry->key;45char *name = ralloc_asprintf(NULL, "%s_0x%x",46bo->name, bo->offset);4748bool ok = v3dv_bo_map(device, bo, bo->size);49if (!ok) {50fprintf(stderr, "failed to map BO for clif_dump.\n");51ralloc_free(name);52goto free_clif;53}54clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);5556ralloc_free(name);57}5859clif_dump(clif, submit);6061free_clif:62clif_dump_destroy(clif);63}6465static uint64_t66gettime_ns()67{68struct timespec current;69clock_gettime(CLOCK_MONOTONIC, ¤t);70return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;71}7273static uint64_t74get_absolute_timeout(uint64_t timeout)75{76uint64_t current_time = gettime_ns();77uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;7879timeout = MIN2(max_timeout, timeout);8081return (current_time + timeout);82}8384static VkResult85queue_submit_job(struct v3dv_queue *queue,86struct v3dv_job *job,87bool do_sem_wait,88pthread_t *wait_thread);8990/* Waits for active CPU wait threads spawned before the current thread to91* complete and submit all their GPU jobs.92*/93static void94cpu_queue_wait_idle(struct v3dv_queue *queue)95{96const pthread_t this_thread = pthread_self();9798retry:99mtx_lock(&queue->mutex);100list_for_each_entry(struct v3dv_queue_submit_wait_info, info,101&queue->submit_wait_list, list_link) {102for (uint32_t i = 0; i < info->wait_thread_count; i++) {103if (info->wait_threads[i].finished)104continue;105106/* Because we are testing this against the list of spawned threads107* it will never match for the main thread, so when we call this from108* the main thread we are effectively waiting for all active threads109* to complete, and otherwise we are only waiting for work submitted110* before the wait thread that called this (a wait thread should never111* be waiting for work submitted after it).112*/113if (info->wait_threads[i].thread == this_thread)114goto done;115116/* Wait and try again */117mtx_unlock(&queue->mutex);118usleep(500); /* 0.5 ms */119goto retry;120}121}122123done:124mtx_unlock(&queue->mutex);125}126127static VkResult128gpu_queue_wait_idle(struct v3dv_queue *queue)129{130struct v3dv_device *device = queue->device;131132mtx_lock(&device->mutex);133uint32_t last_job_sync = device->last_job_sync;134mtx_unlock(&device->mutex);135136int ret = drmSyncobjWait(device->pdevice->render_fd,137&last_job_sync, 1, INT64_MAX, 0, NULL);138if (ret)139return VK_ERROR_DEVICE_LOST;140141return VK_SUCCESS;142}143144VKAPI_ATTR VkResult VKAPI_CALL145v3dv_QueueWaitIdle(VkQueue _queue)146{147V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);148149/* Check that we don't have any wait threads running in the CPU first,150* as these can spawn new GPU jobs.151*/152cpu_queue_wait_idle(queue);153154/* Check we don't have any GPU jobs running */155return gpu_queue_wait_idle(queue);156}157158static VkResult159handle_reset_query_cpu_job(struct v3dv_job *job)160{161struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;162assert(info->pool);163164/* We are about to reset query counters so we need to make sure that165* The GPU is not using them. The exception is timestamp queries, since166* we handle those in the CPU.167*168* FIXME: we could avoid blocking the main thread for this if we use169* submission thread.170*/171if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)172v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);173174for (uint32_t i = info->first; i < info->first + info->count; i++) {175assert(i < info->pool->query_count);176struct v3dv_query *q = &info->pool->queries[i];177q->maybe_available = false;178switch (info->pool->query_type) {179case VK_QUERY_TYPE_OCCLUSION: {180const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset;181uint32_t *counter = (uint32_t *) q_addr;182*counter = 0;183break;184}185case VK_QUERY_TYPE_TIMESTAMP:186q->value = 0;187break;188default:189unreachable("Unsupported query type");190}191}192193return VK_SUCCESS;194}195196static VkResult197handle_end_query_cpu_job(struct v3dv_job *job)198{199struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;200assert(info->query < info->pool->query_count);201struct v3dv_query *query = &info->pool->queries[info->query];202query->maybe_available = true;203204return VK_SUCCESS;205}206207static VkResult208handle_copy_query_results_cpu_job(struct v3dv_job *job)209{210struct v3dv_copy_query_results_cpu_job_info *info =211&job->cpu.query_copy_results;212213assert(info->dst && info->dst->mem && info->dst->mem->bo);214struct v3dv_bo *bo = info->dst->mem->bo;215216/* Map the entire dst buffer for the CPU copy if needed */217assert(!bo->map || bo->map_size == bo->size);218if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))219return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);220221/* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a222* sync wait on the CPU for the corresponding GPU jobs to finish. We might223* want to use a submission thread to avoid blocking on the main thread.224*/225uint8_t *offset = ((uint8_t *) bo->map) +226info->offset + info->dst->mem_offset;227v3dv_get_query_pool_results_cpu(job->device,228info->pool,229info->first,230info->count,231offset,232info->stride,233info->flags);234235return VK_SUCCESS;236}237238static VkResult239handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)240{241/* From the Vulkan 1.0 spec:242*243* "When vkCmdSetEvent is submitted to a queue, it defines an execution244* dependency on commands that were submitted before it, and defines an245* event signal operation which sets the event to the signaled state.246* The first synchronization scope includes every command previously247* submitted to the same queue, including those in the same command248* buffer and batch".249*250* So we should wait for all prior work to be completed before signaling251* the event, this includes all active CPU wait threads spawned for any252* command buffer submitted *before* this.253*254* FIXME: we could avoid blocking the main thread for this if we use a255* submission thread.256*/257258/* If we are calling this from a wait thread it will only wait259* wait threads sspawned before it, otherwise it will wait for260* all active threads to complete.261*/262cpu_queue_wait_idle(&job->device->queue);263264VkResult result = gpu_queue_wait_idle(&job->device->queue);265if (result != VK_SUCCESS)266return result;267268struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;269p_atomic_set(&info->event->state, info->state);270271return VK_SUCCESS;272}273274static bool275check_wait_events_complete(struct v3dv_job *job)276{277assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);278279struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;280for (uint32_t i = 0; i < info->event_count; i++) {281if (!p_atomic_read(&info->events[i]->state))282return false;283}284return true;285}286287static void288wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)289{290mtx_lock(&queue->mutex);291list_for_each_entry(struct v3dv_queue_submit_wait_info, info,292&queue->submit_wait_list, list_link) {293for (uint32_t i = 0; i < info->wait_thread_count; i++) {294if (info->wait_threads[i].thread == thread) {295info->wait_threads[i].finished = true;296goto done;297}298}299}300301unreachable(!"Failed to finish wait thread: not found");302303done:304mtx_unlock(&queue->mutex);305}306307static void *308event_wait_thread_func(void *_job)309{310struct v3dv_job *job = (struct v3dv_job *) _job;311assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);312struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;313314/* Wait for events to be signaled */315const useconds_t wait_interval_ms = 1;316while (!check_wait_events_complete(job))317usleep(wait_interval_ms * 1000);318319/* Now continue submitting pending jobs for the same command buffer after320* the wait job.321*/322struct v3dv_queue *queue = &job->device->queue;323list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,324&job->cmd_buffer->jobs, list_link) {325/* We don't want to spawn more than one wait thread per command buffer.326* If this job also requires a wait for events, we will do the wait here.327*/328VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);329if (result == VK_NOT_READY) {330while (!check_wait_events_complete(pjob)) {331usleep(wait_interval_ms * 1000);332}333result = VK_SUCCESS;334}335336if (result != VK_SUCCESS) {337fprintf(stderr, "Wait thread job execution failed.\n");338goto done;339}340}341342done:343wait_thread_finish(queue, pthread_self());344return NULL;345}346347static VkResult348spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)349350{351assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);352assert(job->cmd_buffer);353assert(wait_thread != NULL);354355if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))356return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);357358return VK_NOT_READY;359}360361static VkResult362handle_wait_events_cpu_job(struct v3dv_job *job,363bool sem_wait,364pthread_t *wait_thread)365{366assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);367struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;368369/* If all events are signaled then we are done and can continue submitting370* the rest of the command buffer normally.371*/372if (check_wait_events_complete(job))373return VK_SUCCESS;374375/* Otherwise, we put the rest of the command buffer on a wait thread until376* all events are signaled. We only spawn a new thread on the first377* wait job we see for a command buffer, any additional wait jobs in the378* same command buffer will run in that same wait thread and will get here379* with a NULL wait_thread pointer.380*381* Also, whether we spawn a wait thread or not, we always return382* VK_NOT_READY (unless an error happened), so we stop trying to submit383* any jobs in the same command buffer after the wait job. The wait thread384* will attempt to submit them after the wait completes.385*/386info->sem_wait = sem_wait;387if (wait_thread)388return spawn_event_wait_thread(job, wait_thread);389else390return VK_NOT_READY;391}392393static VkResult394handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)395{396assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);397struct v3dv_copy_buffer_to_image_cpu_job_info *info =398&job->cpu.copy_buffer_to_image;399400/* Wait for all GPU work to finish first, since we may be accessing401* the BOs involved in the operation.402*/403v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));404405/* Map BOs */406struct v3dv_bo *dst_bo = info->image->mem->bo;407assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);408if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))409return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);410void *dst_ptr = dst_bo->map;411412struct v3dv_bo *src_bo = info->buffer->mem->bo;413assert(!src_bo->map || src_bo->map_size == src_bo->size);414if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))415return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);416void *src_ptr = src_bo->map;417418const struct v3d_resource_slice *slice =419&info->image->slices[info->mip_level];420421const struct pipe_box box = {422info->image_offset.x, info->image_offset.y, info->base_layer,423info->image_extent.width, info->image_extent.height, info->layer_count,424};425426/* Copy each layer */427for (uint32_t i = 0; i < info->layer_count; i++) {428const uint32_t dst_offset =429v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);430const uint32_t src_offset =431info->buffer->mem_offset + info->buffer_offset +432info->buffer_layer_stride * i;433v3d_store_tiled_image(434dst_ptr + dst_offset, slice->stride,435src_ptr + src_offset, info->buffer_stride,436slice->tiling, info->image->cpp, slice->padded_height, &box);437}438439return VK_SUCCESS;440}441442static VkResult443handle_timestamp_query_cpu_job(struct v3dv_job *job)444{445assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);446struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;447448/* Wait for completion of all work queued before the timestamp query */449v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));450451/* Compute timestamp */452struct timespec t;453clock_gettime(CLOCK_MONOTONIC, &t);454assert(info->query < info->pool->query_count);455struct v3dv_query *query = &info->pool->queries[info->query];456query->maybe_available = true;457query->value = t.tv_sec * 1000000000ull + t.tv_nsec;458459return VK_SUCCESS;460}461462static VkResult463handle_csd_job(struct v3dv_queue *queue,464struct v3dv_job *job,465bool do_sem_wait);466467static VkResult468handle_csd_indirect_cpu_job(struct v3dv_queue *queue,469struct v3dv_job *job,470bool do_sem_wait)471{472assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);473struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;474assert(info->csd_job);475476/* Make sure the GPU is no longer using the indirect buffer*/477assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);478v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);479480/* Map the indirect buffer and read the dispatch parameters */481assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);482struct v3dv_bo *bo = info->buffer->mem->bo;483if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))484return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);485assert(bo->map);486487const uint32_t offset = info->buffer->mem_offset + info->offset;488const uint32_t *group_counts = (uint32_t *) (bo->map + offset);489if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)490return VK_SUCCESS;491492if (memcmp(group_counts, info->csd_job->csd.wg_count,493sizeof(info->csd_job->csd.wg_count)) != 0) {494v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);495}496497handle_csd_job(queue, info->csd_job, do_sem_wait);498499return VK_SUCCESS;500}501502static VkResult503process_semaphores_to_signal(struct v3dv_device *device,504uint32_t count, const VkSemaphore *sems)505{506if (count == 0)507return VK_SUCCESS;508509int render_fd = device->pdevice->render_fd;510511int fd;512mtx_lock(&device->mutex);513drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);514mtx_unlock(&device->mutex);515if (fd == -1)516return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);517518VkResult result = VK_SUCCESS;519for (uint32_t i = 0; i < count; i++) {520struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);521522int ret;523if (!sem->temp_sync)524ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);525else526ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd);527528if (ret) {529result = VK_ERROR_OUT_OF_HOST_MEMORY;530break;531}532}533534assert(fd >= 0);535close(fd);536537return result;538}539540static VkResult541process_fence_to_signal(struct v3dv_device *device, VkFence _fence)542{543if (_fence == VK_NULL_HANDLE)544return VK_SUCCESS;545546struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);547548int render_fd = device->pdevice->render_fd;549550int fd;551mtx_lock(&device->mutex);552drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);553mtx_unlock(&device->mutex);554if (fd == -1)555return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);556557int ret;558if (!fence->temp_sync)559ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);560else561ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd);562563assert(fd >= 0);564close(fd);565566return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;567}568569static VkResult570handle_cl_job(struct v3dv_queue *queue,571struct v3dv_job *job,572bool do_sem_wait)573{574struct v3dv_device *device = queue->device;575576struct drm_v3d_submit_cl submit;577578/* Sanity check: we should only flag a bcl sync on a job that needs to be579* serialized.580*/581assert(job->serialize || !job->needs_bcl_sync);582583/* We expect to have just one RCL per job which should fit in just one BO.584* Our BCL, could chain multiple BOS together though.585*/586assert(list_length(&job->rcl.bo_list) == 1);587assert(list_length(&job->bcl.bo_list) >= 1);588struct v3dv_bo *bcl_fist_bo =589list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);590submit.bcl_start = bcl_fist_bo->offset;591submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);592submit.rcl_start = job->rcl.bo->offset;593submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);594595submit.qma = job->tile_alloc->offset;596submit.qms = job->tile_alloc->size;597submit.qts = job->tile_state->offset;598599submit.flags = 0;600if (job->tmu_dirty_rcl)601submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;602603submit.bo_handle_count = job->bo_count;604uint32_t *bo_handles =605(uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);606uint32_t bo_idx = 0;607set_foreach(job->bos, entry) {608struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;609bo_handles[bo_idx++] = bo->handle;610}611assert(bo_idx == submit.bo_handle_count);612submit.bo_handles = (uintptr_t)(void *)bo_handles;613614/* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or615* if the job comes after a pipeline barrier than involves geometry stages616* (needs_bcl_sync).617*618* We need a render sync if the job doesn't need a binning sync but has619* still been flagged for serialization. It should be noted that RCL jobs620* don't start until the previous RCL job has finished so we don't really621* need to add a fence for those, however, we might need to wait on a CSD or622* TFU job, which are not automatically serialized with CL jobs.623*624* FIXME: for now, if we are asked to wait on any semaphores, we just wait625* on the last job we submitted. In the future we might want to pass the626* actual syncobj of the wait semaphores so we don't block on the last RCL627* if we only need to wait for a previous CSD or TFU, for example, but628* we would have to extend our kernel interface to support the case where629* we have more than one semaphore to wait on.630*/631const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;632const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;633634mtx_lock(&queue->device->mutex);635submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;636submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;637submit.out_sync = device->last_job_sync;638v3dv_clif_dump(device, job, &submit);639int ret = v3dv_ioctl(device->pdevice->render_fd,640DRM_IOCTL_V3D_SUBMIT_CL, &submit);641mtx_unlock(&queue->device->mutex);642643static bool warned = false;644if (ret && !warned) {645fprintf(stderr, "Draw call returned %s. Expect corruption.\n",646strerror(errno));647warned = true;648}649650free(bo_handles);651652if (ret)653return vk_error(device->instance, VK_ERROR_DEVICE_LOST);654655return VK_SUCCESS;656}657658static VkResult659handle_tfu_job(struct v3dv_queue *queue,660struct v3dv_job *job,661bool do_sem_wait)662{663struct v3dv_device *device = queue->device;664665const bool needs_sync = do_sem_wait || job->serialize;666667mtx_lock(&device->mutex);668job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;669job->tfu.out_sync = device->last_job_sync;670int ret = v3dv_ioctl(device->pdevice->render_fd,671DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);672mtx_unlock(&device->mutex);673674if (ret != 0) {675fprintf(stderr, "Failed to submit TFU job: %d\n", ret);676return vk_error(device->instance, VK_ERROR_DEVICE_LOST);677}678679return VK_SUCCESS;680}681682static VkResult683handle_csd_job(struct v3dv_queue *queue,684struct v3dv_job *job,685bool do_sem_wait)686{687struct v3dv_device *device = queue->device;688689struct drm_v3d_submit_csd *submit = &job->csd.submit;690691submit->bo_handle_count = job->bo_count;692uint32_t *bo_handles =693(uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));694uint32_t bo_idx = 0;695set_foreach(job->bos, entry) {696struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;697bo_handles[bo_idx++] = bo->handle;698}699assert(bo_idx == submit->bo_handle_count);700submit->bo_handles = (uintptr_t)(void *)bo_handles;701702const bool needs_sync = do_sem_wait || job->serialize;703704mtx_lock(&queue->device->mutex);705submit->in_sync = needs_sync ? device->last_job_sync : 0;706submit->out_sync = device->last_job_sync;707int ret = v3dv_ioctl(device->pdevice->render_fd,708DRM_IOCTL_V3D_SUBMIT_CSD, submit);709mtx_unlock(&queue->device->mutex);710711static bool warned = false;712if (ret && !warned) {713fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",714strerror(errno));715warned = true;716}717718free(bo_handles);719720if (ret)721return vk_error(device->instance, VK_ERROR_DEVICE_LOST);722723return VK_SUCCESS;724}725726static VkResult727queue_submit_job(struct v3dv_queue *queue,728struct v3dv_job *job,729bool do_sem_wait,730pthread_t *wait_thread)731{732assert(job);733734switch (job->type) {735case V3DV_JOB_TYPE_GPU_CL:736return handle_cl_job(queue, job, do_sem_wait);737case V3DV_JOB_TYPE_GPU_TFU:738return handle_tfu_job(queue, job, do_sem_wait);739case V3DV_JOB_TYPE_GPU_CSD:740return handle_csd_job(queue, job, do_sem_wait);741case V3DV_JOB_TYPE_CPU_RESET_QUERIES:742return handle_reset_query_cpu_job(job);743case V3DV_JOB_TYPE_CPU_END_QUERY:744return handle_end_query_cpu_job(job);745case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:746return handle_copy_query_results_cpu_job(job);747case V3DV_JOB_TYPE_CPU_SET_EVENT:748return handle_set_event_cpu_job(job, wait_thread != NULL);749case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:750return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);751case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:752return handle_copy_buffer_to_image_cpu_job(job);753case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:754return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);755case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:756return handle_timestamp_query_cpu_job(job);757default:758unreachable("Unhandled job type");759}760}761762static VkResult763queue_create_noop_job(struct v3dv_queue *queue)764{765struct v3dv_device *device = queue->device;766queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,767VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);768if (!queue->noop_job)769return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);770v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);771772v3dv_X(device, job_emit_noop)(queue->noop_job);773774return VK_SUCCESS;775}776777static VkResult778queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)779{780/* VkQueue host access is externally synchronized so we don't need to lock781* here for the static variable.782*/783if (!queue->noop_job) {784VkResult result = queue_create_noop_job(queue);785if (result != VK_SUCCESS)786return result;787}788789return queue_submit_job(queue, queue->noop_job,790pSubmit->waitSemaphoreCount > 0, NULL);791}792793static VkResult794queue_submit_cmd_buffer(struct v3dv_queue *queue,795struct v3dv_cmd_buffer *cmd_buffer,796const VkSubmitInfo *pSubmit,797pthread_t *wait_thread)798{799assert(cmd_buffer);800assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);801802if (list_is_empty(&cmd_buffer->jobs))803return queue_submit_noop_job(queue, pSubmit);804805list_for_each_entry_safe(struct v3dv_job, job,806&cmd_buffer->jobs, list_link) {807VkResult result = queue_submit_job(queue, job,808pSubmit->waitSemaphoreCount > 0,809wait_thread);810if (result != VK_SUCCESS)811return result;812}813814return VK_SUCCESS;815}816817static void818add_wait_thread_to_list(struct v3dv_device *device,819pthread_t thread,820struct v3dv_queue_submit_wait_info **wait_info)821{822/* If this is the first time we spawn a wait thread for this queue823* submission create a v3dv_queue_submit_wait_info to track this and824* any other threads in the same submission and add it to the global list825* in the queue.826*/827if (*wait_info == NULL) {828*wait_info =829vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,830VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);831(*wait_info)->device = device;832}833834/* And add the thread to the list of wait threads for this submission */835const uint32_t thread_idx = (*wait_info)->wait_thread_count;836assert(thread_idx < 16);837(*wait_info)->wait_threads[thread_idx].thread = thread;838(*wait_info)->wait_threads[thread_idx].finished = false;839(*wait_info)->wait_thread_count++;840}841842static void843add_signal_semaphores_to_wait_list(struct v3dv_device *device,844const VkSubmitInfo *pSubmit,845struct v3dv_queue_submit_wait_info *wait_info)846{847assert(wait_info);848849if (pSubmit->signalSemaphoreCount == 0)850return;851852/* FIXME: We put all the semaphores in a list and we signal all of them853* together from the submit master thread when the last wait thread in the854* submit completes. We could do better though: group the semaphores per855* submit and signal them as soon as all wait threads for a particular856* submit completes. Not sure if the extra work would be worth it though,857* since we only spawn waith threads for event waits and only when the858* event if set from the host after the queue submission.859*/860861/* Check the size of the current semaphore list */862const uint32_t prev_count = wait_info->signal_semaphore_count;863const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);864VkSemaphore *prev_list = wait_info->signal_semaphores;865866/* Resize the list to hold the additional semaphores */867const uint32_t extra_alloc_size =868pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);869wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;870wait_info->signal_semaphores =871vk_alloc(&device->vk.alloc, prev_alloc_size + extra_alloc_size, 8,872VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);873874/* Copy the old list to the new allocation and free the old list */875if (prev_count > 0) {876memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);877vk_free(&device->vk.alloc, prev_list);878}879880/* Add the new semaphores to the list */881memcpy(wait_info->signal_semaphores + prev_count,882pSubmit->pSignalSemaphores, extra_alloc_size);883}884885static VkResult886queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,887const VkSubmitInfo *pSubmit,888struct v3dv_queue_submit_wait_info **wait_info)889{890VkResult result = VK_SUCCESS;891bool has_wait_threads = false;892893/* Even if we don't have any actual work to submit we still need to wait894* on the wait semaphores and signal the signal semaphores and fence, so895* in this scenario we just submit a trivial no-op job so we don't have896* to do anything special, it should not be a common case anyway.897*/898if (pSubmit->commandBufferCount == 0) {899result = queue_submit_noop_job(queue, pSubmit);900} else {901for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {902pthread_t wait_thread;903struct v3dv_cmd_buffer *cmd_buffer =904v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);905result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,906&wait_thread);907908/* We get VK_NOT_READY if we had to spawn a wait thread for the909* command buffer. In that scenario, we want to continue submitting910* any pending command buffers in the batch, but we don't want to911* process any signal semaphores for the batch until we know we have912* submitted every job for every command buffer in the batch.913*/914if (result == VK_NOT_READY) {915result = VK_SUCCESS;916add_wait_thread_to_list(queue->device, wait_thread, wait_info);917has_wait_threads = true;918}919920if (result != VK_SUCCESS)921break;922}923}924925if (result != VK_SUCCESS)926return result;927928/* If had to emit any wait threads in this submit we need to wait for all929* of them to complete before we can signal any semaphores.930*/931if (!has_wait_threads) {932return process_semaphores_to_signal(queue->device,933pSubmit->signalSemaphoreCount,934pSubmit->pSignalSemaphores);935} else {936assert(*wait_info);937add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);938return VK_NOT_READY;939}940}941942static void *943master_wait_thread_func(void *_wait_info)944{945struct v3dv_queue_submit_wait_info *wait_info =946(struct v3dv_queue_submit_wait_info *) _wait_info;947948struct v3dv_queue *queue = &wait_info->device->queue;949950/* Wait for all command buffer wait threads to complete */951for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {952int res = pthread_join(wait_info->wait_threads[i].thread, NULL);953if (res != 0)954fprintf(stderr, "Wait thread failed to join.\n");955}956957/* Signal semaphores and fences */958VkResult result;959result = process_semaphores_to_signal(wait_info->device,960wait_info->signal_semaphore_count,961wait_info->signal_semaphores);962if (result != VK_SUCCESS)963fprintf(stderr, "Wait thread semaphore signaling failed.");964965result = process_fence_to_signal(wait_info->device, wait_info->fence);966if (result != VK_SUCCESS)967fprintf(stderr, "Wait thread fence signaling failed.");968969/* Release wait_info */970mtx_lock(&queue->mutex);971list_del(&wait_info->list_link);972mtx_unlock(&queue->mutex);973974vk_free(&wait_info->device->vk.alloc, wait_info->signal_semaphores);975vk_free(&wait_info->device->vk.alloc, wait_info);976977return NULL;978}979980981static VkResult982spawn_master_wait_thread(struct v3dv_queue *queue,983struct v3dv_queue_submit_wait_info *wait_info)984985{986VkResult result = VK_SUCCESS;987988mtx_lock(&queue->mutex);989if (pthread_create(&wait_info->master_wait_thread, NULL,990master_wait_thread_func, wait_info)) {991result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);992goto done;993}994995list_addtail(&wait_info->list_link, &queue->submit_wait_list);996997done:998mtx_unlock(&queue->mutex);999return result;1000}10011002VKAPI_ATTR VkResult VKAPI_CALL1003v3dv_QueueSubmit(VkQueue _queue,1004uint32_t submitCount,1005const VkSubmitInfo* pSubmits,1006VkFence fence)1007{1008V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);10091010struct v3dv_queue_submit_wait_info *wait_info = NULL;10111012VkResult result = VK_SUCCESS;1013for (uint32_t i = 0; i < submitCount; i++) {1014result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);1015if (result != VK_SUCCESS && result != VK_NOT_READY)1016goto done;1017}10181019if (!wait_info) {1020assert(result != VK_NOT_READY);1021result = process_fence_to_signal(queue->device, fence);1022goto done;1023}10241025/* We emitted wait threads, so we have to spwan a master thread for this1026* queue submission that waits for all other threads to complete and then1027* will signal any semaphores and fences.1028*/1029assert(wait_info);1030wait_info->fence = fence;1031result = spawn_master_wait_thread(queue, wait_info);10321033done:1034return result;1035}10361037static void1038destroy_syncobj(uint32_t device_fd, uint32_t *sync)1039{1040assert(sync);1041drmSyncobjDestroy(device_fd, *sync);1042*sync = 0;1043}10441045VKAPI_ATTR VkResult VKAPI_CALL1046v3dv_CreateSemaphore(VkDevice _device,1047const VkSemaphoreCreateInfo *pCreateInfo,1048const VkAllocationCallbacks *pAllocator,1049VkSemaphore *pSemaphore)1050{1051V3DV_FROM_HANDLE(v3dv_device, device, _device);10521053assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);10541055struct v3dv_semaphore *sem =1056vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore),1057VK_OBJECT_TYPE_SEMAPHORE);1058if (sem == NULL)1059return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);10601061int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync);1062if (ret) {1063vk_object_free(&device->vk, pAllocator, sem);1064return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1065}10661067*pSemaphore = v3dv_semaphore_to_handle(sem);10681069return VK_SUCCESS;1070}10711072VKAPI_ATTR void VKAPI_CALL1073v3dv_GetPhysicalDeviceExternalSemaphoreProperties(1074VkPhysicalDevice physicalDevice,1075const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,1076VkExternalSemaphoreProperties *pExternalSemaphoreProperties)1077{1078switch (pExternalSemaphoreInfo->handleType) {1079case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:1080case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:1081pExternalSemaphoreProperties->exportFromImportedHandleTypes =1082VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |1083VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;1084pExternalSemaphoreProperties->compatibleHandleTypes =1085VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |1086VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;10871088/* FIXME: we can't import external semaphores until we improve the kernel1089* submit interface to handle multiple in syncobjs, because once we have1090* an imported semaphore in our list of semaphores to wait on, we can no1091* longer use the workaround of waiting on the last syncobj fence produced1092* from the device, since the imported semaphore may not (and in fact, it1093* would typically not) have been produced from same device.1094*1095* This behavior is exercised via dEQP-VK.synchronization.cross_instance.*.1096* Particularly, this test:1097* dEQP-VK.synchronization.cross_instance.dedicated.1098* write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd1099* fails consistently because of this, so it'll be a good reference to1100* verify the implementation when the kernel bits are in place.1101*/1102pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;11031104/* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties1105* for details on why we can't export to SYNC_FD.1106*/1107if (pExternalSemaphoreInfo->handleType !=1108VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {1109pExternalSemaphoreProperties->externalSemaphoreFeatures |=1110VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;1111}1112break;1113default:1114pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;1115pExternalSemaphoreProperties->compatibleHandleTypes = 0;1116pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;1117break;1118}1119}11201121VKAPI_ATTR VkResult VKAPI_CALL1122v3dv_ImportSemaphoreFdKHR(1123VkDevice _device,1124const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)1125{1126V3DV_FROM_HANDLE(v3dv_device, device, _device);1127V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);11281129assert(pImportSemaphoreFdInfo->sType ==1130VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR);11311132int fd = pImportSemaphoreFdInfo->fd;1133int render_fd = device->pdevice->render_fd;11341135bool is_temporary =1136pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||1137(pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT);11381139uint32_t new_sync;1140switch (pImportSemaphoreFdInfo->handleType) {1141case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {1142/* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the1143* special value -1 for fd is treated like a valid sync file descriptor1144* referring to an object that has already signaled. The import1145* operation will succeed and the VkSemaphore will have a temporarily1146* imported payload as if a valid file descriptor had been provided."1147*/1148unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;1149if (drmSyncobjCreate(render_fd, flags, &new_sync))1150return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);11511152if (fd != -1) {1153if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {1154drmSyncobjDestroy(render_fd, new_sync);1155return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1156}1157}1158break;1159}1160case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: {1161if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))1162return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1163break;1164}1165default:1166return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1167}11681169destroy_syncobj(render_fd, &sem->temp_sync);1170if (is_temporary) {1171sem->temp_sync = new_sync;1172} else {1173destroy_syncobj(render_fd, &sem->sync);1174sem->sync = new_sync;1175}11761177/* From the Vulkan 1.0.53 spec:1178*1179* "Importing a semaphore payload from a file descriptor transfers1180* ownership of the file descriptor from the application to the1181* Vulkan implementation. The application must not perform any1182* operations on the file descriptor after a successful import."1183*1184* If the import fails, we leave the file descriptor open.1185*/1186if (fd != -1)1187close(fd);11881189return VK_SUCCESS;1190}11911192VKAPI_ATTR VkResult VKAPI_CALL1193v3dv_GetSemaphoreFdKHR(VkDevice _device,1194const VkSemaphoreGetFdInfoKHR *pGetFdInfo,1195int *pFd)1196{1197V3DV_FROM_HANDLE(v3dv_device, device, _device);1198V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore);11991200assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);12011202*pFd = -1;1203int render_fd = device->pdevice->render_fd;1204switch (pGetFdInfo->handleType) {1205case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {1206drmSyncobjExportSyncFile(render_fd, sem->sync, pFd);1207if (*pFd == -1)1208return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1209break;1210case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:1211drmSyncobjHandleToFD(render_fd, sem->sync, pFd);1212if (*pFd == -1)1213return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1214break;1215}1216default:1217unreachable("Unsupported external semaphore handle type");1218}12191220return VK_SUCCESS;1221}12221223VKAPI_ATTR void VKAPI_CALL1224v3dv_DestroySemaphore(VkDevice _device,1225VkSemaphore semaphore,1226const VkAllocationCallbacks *pAllocator)1227{1228V3DV_FROM_HANDLE(v3dv_device, device, _device);1229V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);12301231if (sem == NULL)1232return;12331234destroy_syncobj(device->pdevice->render_fd, &sem->sync);1235destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync);12361237vk_object_free(&device->vk, pAllocator, sem);1238}12391240VKAPI_ATTR VkResult VKAPI_CALL1241v3dv_CreateFence(VkDevice _device,1242const VkFenceCreateInfo *pCreateInfo,1243const VkAllocationCallbacks *pAllocator,1244VkFence *pFence)1245{1246V3DV_FROM_HANDLE(v3dv_device, device, _device);12471248assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);12491250struct v3dv_fence *fence =1251vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence),1252VK_OBJECT_TYPE_FENCE);1253if (fence == NULL)1254return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);12551256unsigned flags = 0;1257if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)1258flags |= DRM_SYNCOBJ_CREATE_SIGNALED;1259int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync);1260if (ret) {1261vk_object_free(&device->vk, pAllocator, fence);1262return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1263}12641265*pFence = v3dv_fence_to_handle(fence);12661267return VK_SUCCESS;1268}12691270VKAPI_ATTR void VKAPI_CALL1271v3dv_GetPhysicalDeviceExternalFenceProperties(1272VkPhysicalDevice physicalDevice,1273const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,1274VkExternalFenceProperties *pExternalFenceProperties)12751276{1277switch (pExternalFenceInfo->handleType) {1278case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:1279case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:1280pExternalFenceProperties->exportFromImportedHandleTypes =1281VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |1282VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;1283pExternalFenceProperties->compatibleHandleTypes =1284VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |1285VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;1286pExternalFenceProperties->externalFenceFeatures =1287VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;12881289/* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not1290* the syncobj itself, and that fence is only created after we have1291* submitted to the kernel and updated the syncobj for the fence to import1292* the actual DRM fence created with the submission. Unfortunately, if the1293* queue submission has a 'wait for events' we may hold any jobs after the1294* wait in a user-space thread until the events are signaled, and in that1295* case we don't update the out fence of the submit until the events are1296* signaled and we can submit all the jobs involved with the vkQueueSubmit1297* call. This means that if the applications submits with an out fence and1298* a wait for events, trying to export the out fence to a SYNC_FD rigth1299* after the submission and before the events are signaled will fail,1300* because the actual DRM fence won't exist yet. This is not a problem1301* with OPAQUE_FD because in this case we export the entire syncobj, not1302* the underlying DRM fence. To fix this we need to rework our kernel1303* interface to be more flexible and accept multiple in/out syncobjs so1304* we can implement event waits as regular fence waits on the kernel side,1305* until then, we can only reliably export OPAQUE_FD.1306*/1307if (pExternalFenceInfo->handleType !=1308VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {1309pExternalFenceProperties->externalFenceFeatures |=1310VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT;1311}1312break;1313default:1314pExternalFenceProperties->exportFromImportedHandleTypes = 0;1315pExternalFenceProperties->compatibleHandleTypes = 0;1316pExternalFenceProperties->externalFenceFeatures = 0;1317break;1318}1319}13201321VKAPI_ATTR VkResult VKAPI_CALL1322v3dv_ImportFenceFdKHR(VkDevice _device,1323const VkImportFenceFdInfoKHR *pImportFenceFdInfo)1324{1325V3DV_FROM_HANDLE(v3dv_device, device, _device);1326V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence);13271328assert(pImportFenceFdInfo->sType ==1329VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);13301331int fd = pImportFenceFdInfo->fd;1332int render_fd = device->pdevice->render_fd;13331334bool is_temporary =1335pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT ||1336(pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT);13371338uint32_t new_sync;1339switch (pImportFenceFdInfo->handleType) {1340case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {1341/* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the1342* special value -1 for fd is treated like a valid sync file descriptor1343* referring to an object that has already signaled. The import1344* operation will succeed and the VkFence will have a temporarily1345* imported payload as if a valid file descriptor had been provided."1346*/1347unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;1348if (drmSyncobjCreate(render_fd, flags, &new_sync))1349return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);13501351if (fd != -1) {1352if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {1353drmSyncobjDestroy(render_fd, new_sync);1354return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1355}1356}1357break;1358}1359case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {1360if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))1361return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1362break;1363}1364default:1365return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);1366}13671368destroy_syncobj(render_fd, &fence->temp_sync);1369if (is_temporary) {1370fence->temp_sync = new_sync;1371} else {1372destroy_syncobj(render_fd, &fence->sync);1373fence->sync = new_sync;1374}13751376/* From the Vulkan 1.0.53 spec:1377*1378* "Importing a fence payload from a file descriptor transfers1379* ownership of the file descriptor from the application to the1380* Vulkan implementation. The application must not perform any1381* operations on the file descriptor after a successful import."1382*1383* If the import fails, we leave the file descriptor open.1384*/1385if (fd != -1)1386close(fd);13871388return VK_SUCCESS;1389}13901391VKAPI_ATTR void VKAPI_CALL1392v3dv_DestroyFence(VkDevice _device,1393VkFence _fence,1394const VkAllocationCallbacks *pAllocator)1395{1396V3DV_FROM_HANDLE(v3dv_device, device, _device);1397V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);13981399if (fence == NULL)1400return;14011402destroy_syncobj(device->pdevice->render_fd, &fence->sync);1403destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync);14041405vk_object_free(&device->vk, pAllocator, fence);1406}14071408VKAPI_ATTR VkResult VKAPI_CALL1409v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)1410{1411V3DV_FROM_HANDLE(v3dv_device, device, _device);1412V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);14131414int ret = drmSyncobjWait(device->pdevice->render_fd, &fence->sync, 1,14150, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);1416if (ret == -ETIME)1417return VK_NOT_READY;1418else if (ret)1419return vk_error(device->instance, VK_ERROR_DEVICE_LOST);1420return VK_SUCCESS;1421}14221423VKAPI_ATTR VkResult VKAPI_CALL1424v3dv_GetFenceFdKHR(VkDevice _device,1425const VkFenceGetFdInfoKHR *pGetFdInfo,1426int *pFd)1427{1428V3DV_FROM_HANDLE(v3dv_device, device, _device);1429V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence);14301431assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);14321433*pFd = -1;1434int render_fd = device->pdevice->render_fd;1435switch (pGetFdInfo->handleType) {1436case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {1437drmSyncobjExportSyncFile(render_fd, fence->sync, pFd);1438if (*pFd == -1)1439return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1440break;1441case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:1442drmSyncobjHandleToFD(render_fd, fence->sync, pFd);1443if (*pFd == -1)1444return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1445break;1446}1447default:1448unreachable("Unsupported external fence handle type");1449}14501451return VK_SUCCESS;1452}14531454VKAPI_ATTR VkResult VKAPI_CALL1455v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)1456{1457V3DV_FROM_HANDLE(v3dv_device, device, _device);14581459uint32_t *syncobjs = vk_alloc(&device->vk.alloc,1460sizeof(*syncobjs) * fenceCount, 8,1461VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);1462if (!syncobjs)1463return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);14641465int render_fd = device->pdevice->render_fd;1466uint32_t reset_count = 0;1467for (uint32_t i = 0; i < fenceCount; i++) {1468struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);1469/* From the Vulkan spec, section 'Importing Fence Payloads':1470*1471* "If the import is temporary, the fence will be restored to its1472* permanent state the next time that fence is passed to1473* vkResetFences.1474*1475* Note: Restoring a fence to its prior permanent payload is a1476* distinct operation from resetting a fence payload."1477*1478* To restore the previous state, we just need to destroy the temporary.1479*/1480if (fence->temp_sync)1481destroy_syncobj(render_fd, &fence->temp_sync);1482else1483syncobjs[reset_count++] = fence->sync;1484}14851486int ret = 0;1487if (reset_count > 0)1488ret = drmSyncobjReset(render_fd, syncobjs, reset_count);14891490vk_free(&device->vk.alloc, syncobjs);14911492if (ret)1493return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);1494return VK_SUCCESS;1495}14961497VKAPI_ATTR VkResult VKAPI_CALL1498v3dv_WaitForFences(VkDevice _device,1499uint32_t fenceCount,1500const VkFence *pFences,1501VkBool32 waitAll,1502uint64_t timeout)1503{1504V3DV_FROM_HANDLE(v3dv_device, device, _device);15051506const uint64_t abs_timeout = get_absolute_timeout(timeout);15071508uint32_t *syncobjs = vk_alloc(&device->vk.alloc,1509sizeof(*syncobjs) * fenceCount, 8,1510VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);1511if (!syncobjs)1512return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);15131514for (uint32_t i = 0; i < fenceCount; i++) {1515struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);1516syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync;1517}15181519unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;1520if (waitAll)1521flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;15221523int ret;1524do {1525ret = drmSyncobjWait(device->pdevice->render_fd, syncobjs, fenceCount,1526timeout, flags, NULL);1527} while (ret == -ETIME && gettime_ns() < abs_timeout);15281529vk_free(&device->vk.alloc, syncobjs);15301531if (ret == -ETIME)1532return VK_TIMEOUT;1533else if (ret)1534return vk_error(device->instance, VK_ERROR_DEVICE_LOST);1535return VK_SUCCESS;1536}15371538VKAPI_ATTR VkResult VKAPI_CALL1539v3dv_QueueBindSparse(VkQueue _queue,1540uint32_t bindInfoCount,1541const VkBindSparseInfo *pBindInfo,1542VkFence fence)1543{1544V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);1545return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);1546}154715481549