Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_queue.c
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "v3dv_private.h"
25
#include "drm-uapi/v3d_drm.h"
26
27
#include "broadcom/clif/clif_dump.h"
28
29
#include <errno.h>
30
#include <time.h>
31
32
static void
33
v3dv_clif_dump(struct v3dv_device *device,
34
struct v3dv_job *job,
35
struct drm_v3d_submit_cl *submit)
36
{
37
if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
38
return;
39
40
struct clif_dump *clif = clif_dump_init(&device->devinfo,
41
stderr,
42
V3D_DEBUG & V3D_DEBUG_CL);
43
44
set_foreach(job->bos, entry) {
45
struct v3dv_bo *bo = (void *)entry->key;
46
char *name = ralloc_asprintf(NULL, "%s_0x%x",
47
bo->name, bo->offset);
48
49
bool ok = v3dv_bo_map(device, bo, bo->size);
50
if (!ok) {
51
fprintf(stderr, "failed to map BO for clif_dump.\n");
52
ralloc_free(name);
53
goto free_clif;
54
}
55
clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
56
57
ralloc_free(name);
58
}
59
60
clif_dump(clif, submit);
61
62
free_clif:
63
clif_dump_destroy(clif);
64
}
65
66
static uint64_t
67
gettime_ns()
68
{
69
struct timespec current;
70
clock_gettime(CLOCK_MONOTONIC, &current);
71
return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
72
}
73
74
static uint64_t
75
get_absolute_timeout(uint64_t timeout)
76
{
77
uint64_t current_time = gettime_ns();
78
uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
79
80
timeout = MIN2(max_timeout, timeout);
81
82
return (current_time + timeout);
83
}
84
85
static VkResult
86
queue_submit_job(struct v3dv_queue *queue,
87
struct v3dv_job *job,
88
bool do_sem_wait,
89
pthread_t *wait_thread);
90
91
/* Waits for active CPU wait threads spawned before the current thread to
92
* complete and submit all their GPU jobs.
93
*/
94
static void
95
cpu_queue_wait_idle(struct v3dv_queue *queue)
96
{
97
const pthread_t this_thread = pthread_self();
98
99
retry:
100
mtx_lock(&queue->mutex);
101
list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
102
&queue->submit_wait_list, list_link) {
103
for (uint32_t i = 0; i < info->wait_thread_count; i++) {
104
if (info->wait_threads[i].finished)
105
continue;
106
107
/* Because we are testing this against the list of spawned threads
108
* it will never match for the main thread, so when we call this from
109
* the main thread we are effectively waiting for all active threads
110
* to complete, and otherwise we are only waiting for work submitted
111
* before the wait thread that called this (a wait thread should never
112
* be waiting for work submitted after it).
113
*/
114
if (info->wait_threads[i].thread == this_thread)
115
goto done;
116
117
/* Wait and try again */
118
mtx_unlock(&queue->mutex);
119
usleep(500); /* 0.5 ms */
120
goto retry;
121
}
122
}
123
124
done:
125
mtx_unlock(&queue->mutex);
126
}
127
128
static VkResult
129
gpu_queue_wait_idle(struct v3dv_queue *queue)
130
{
131
struct v3dv_device *device = queue->device;
132
133
mtx_lock(&device->mutex);
134
uint32_t last_job_sync = device->last_job_sync;
135
mtx_unlock(&device->mutex);
136
137
int ret = drmSyncobjWait(device->pdevice->render_fd,
138
&last_job_sync, 1, INT64_MAX, 0, NULL);
139
if (ret)
140
return VK_ERROR_DEVICE_LOST;
141
142
return VK_SUCCESS;
143
}
144
145
VKAPI_ATTR VkResult VKAPI_CALL
146
v3dv_QueueWaitIdle(VkQueue _queue)
147
{
148
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
149
150
/* Check that we don't have any wait threads running in the CPU first,
151
* as these can spawn new GPU jobs.
152
*/
153
cpu_queue_wait_idle(queue);
154
155
/* Check we don't have any GPU jobs running */
156
return gpu_queue_wait_idle(queue);
157
}
158
159
static VkResult
160
handle_reset_query_cpu_job(struct v3dv_job *job)
161
{
162
struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
163
assert(info->pool);
164
165
/* We are about to reset query counters so we need to make sure that
166
* The GPU is not using them. The exception is timestamp queries, since
167
* we handle those in the CPU.
168
*
169
* FIXME: we could avoid blocking the main thread for this if we use
170
* submission thread.
171
*/
172
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
173
v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
174
175
for (uint32_t i = info->first; i < info->first + info->count; i++) {
176
assert(i < info->pool->query_count);
177
struct v3dv_query *q = &info->pool->queries[i];
178
q->maybe_available = false;
179
switch (info->pool->query_type) {
180
case VK_QUERY_TYPE_OCCLUSION: {
181
const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset;
182
uint32_t *counter = (uint32_t *) q_addr;
183
*counter = 0;
184
break;
185
}
186
case VK_QUERY_TYPE_TIMESTAMP:
187
q->value = 0;
188
break;
189
default:
190
unreachable("Unsupported query type");
191
}
192
}
193
194
return VK_SUCCESS;
195
}
196
197
static VkResult
198
handle_end_query_cpu_job(struct v3dv_job *job)
199
{
200
struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
201
assert(info->query < info->pool->query_count);
202
struct v3dv_query *query = &info->pool->queries[info->query];
203
query->maybe_available = true;
204
205
return VK_SUCCESS;
206
}
207
208
static VkResult
209
handle_copy_query_results_cpu_job(struct v3dv_job *job)
210
{
211
struct v3dv_copy_query_results_cpu_job_info *info =
212
&job->cpu.query_copy_results;
213
214
assert(info->dst && info->dst->mem && info->dst->mem->bo);
215
struct v3dv_bo *bo = info->dst->mem->bo;
216
217
/* Map the entire dst buffer for the CPU copy if needed */
218
assert(!bo->map || bo->map_size == bo->size);
219
if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
220
return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
221
222
/* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
223
* sync wait on the CPU for the corresponding GPU jobs to finish. We might
224
* want to use a submission thread to avoid blocking on the main thread.
225
*/
226
uint8_t *offset = ((uint8_t *) bo->map) +
227
info->offset + info->dst->mem_offset;
228
v3dv_get_query_pool_results_cpu(job->device,
229
info->pool,
230
info->first,
231
info->count,
232
offset,
233
info->stride,
234
info->flags);
235
236
return VK_SUCCESS;
237
}
238
239
static VkResult
240
handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)
241
{
242
/* From the Vulkan 1.0 spec:
243
*
244
* "When vkCmdSetEvent is submitted to a queue, it defines an execution
245
* dependency on commands that were submitted before it, and defines an
246
* event signal operation which sets the event to the signaled state.
247
* The first synchronization scope includes every command previously
248
* submitted to the same queue, including those in the same command
249
* buffer and batch".
250
*
251
* So we should wait for all prior work to be completed before signaling
252
* the event, this includes all active CPU wait threads spawned for any
253
* command buffer submitted *before* this.
254
*
255
* FIXME: we could avoid blocking the main thread for this if we use a
256
* submission thread.
257
*/
258
259
/* If we are calling this from a wait thread it will only wait
260
* wait threads sspawned before it, otherwise it will wait for
261
* all active threads to complete.
262
*/
263
cpu_queue_wait_idle(&job->device->queue);
264
265
VkResult result = gpu_queue_wait_idle(&job->device->queue);
266
if (result != VK_SUCCESS)
267
return result;
268
269
struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
270
p_atomic_set(&info->event->state, info->state);
271
272
return VK_SUCCESS;
273
}
274
275
static bool
276
check_wait_events_complete(struct v3dv_job *job)
277
{
278
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
279
280
struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
281
for (uint32_t i = 0; i < info->event_count; i++) {
282
if (!p_atomic_read(&info->events[i]->state))
283
return false;
284
}
285
return true;
286
}
287
288
static void
289
wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)
290
{
291
mtx_lock(&queue->mutex);
292
list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
293
&queue->submit_wait_list, list_link) {
294
for (uint32_t i = 0; i < info->wait_thread_count; i++) {
295
if (info->wait_threads[i].thread == thread) {
296
info->wait_threads[i].finished = true;
297
goto done;
298
}
299
}
300
}
301
302
unreachable(!"Failed to finish wait thread: not found");
303
304
done:
305
mtx_unlock(&queue->mutex);
306
}
307
308
static void *
309
event_wait_thread_func(void *_job)
310
{
311
struct v3dv_job *job = (struct v3dv_job *) _job;
312
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
313
struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
314
315
/* Wait for events to be signaled */
316
const useconds_t wait_interval_ms = 1;
317
while (!check_wait_events_complete(job))
318
usleep(wait_interval_ms * 1000);
319
320
/* Now continue submitting pending jobs for the same command buffer after
321
* the wait job.
322
*/
323
struct v3dv_queue *queue = &job->device->queue;
324
list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,
325
&job->cmd_buffer->jobs, list_link) {
326
/* We don't want to spawn more than one wait thread per command buffer.
327
* If this job also requires a wait for events, we will do the wait here.
328
*/
329
VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);
330
if (result == VK_NOT_READY) {
331
while (!check_wait_events_complete(pjob)) {
332
usleep(wait_interval_ms * 1000);
333
}
334
result = VK_SUCCESS;
335
}
336
337
if (result != VK_SUCCESS) {
338
fprintf(stderr, "Wait thread job execution failed.\n");
339
goto done;
340
}
341
}
342
343
done:
344
wait_thread_finish(queue, pthread_self());
345
return NULL;
346
}
347
348
static VkResult
349
spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
350
351
{
352
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
353
assert(job->cmd_buffer);
354
assert(wait_thread != NULL);
355
356
if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
357
return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);
358
359
return VK_NOT_READY;
360
}
361
362
static VkResult
363
handle_wait_events_cpu_job(struct v3dv_job *job,
364
bool sem_wait,
365
pthread_t *wait_thread)
366
{
367
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
368
struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
369
370
/* If all events are signaled then we are done and can continue submitting
371
* the rest of the command buffer normally.
372
*/
373
if (check_wait_events_complete(job))
374
return VK_SUCCESS;
375
376
/* Otherwise, we put the rest of the command buffer on a wait thread until
377
* all events are signaled. We only spawn a new thread on the first
378
* wait job we see for a command buffer, any additional wait jobs in the
379
* same command buffer will run in that same wait thread and will get here
380
* with a NULL wait_thread pointer.
381
*
382
* Also, whether we spawn a wait thread or not, we always return
383
* VK_NOT_READY (unless an error happened), so we stop trying to submit
384
* any jobs in the same command buffer after the wait job. The wait thread
385
* will attempt to submit them after the wait completes.
386
*/
387
info->sem_wait = sem_wait;
388
if (wait_thread)
389
return spawn_event_wait_thread(job, wait_thread);
390
else
391
return VK_NOT_READY;
392
}
393
394
static VkResult
395
handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
396
{
397
assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
398
struct v3dv_copy_buffer_to_image_cpu_job_info *info =
399
&job->cpu.copy_buffer_to_image;
400
401
/* Wait for all GPU work to finish first, since we may be accessing
402
* the BOs involved in the operation.
403
*/
404
v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
405
406
/* Map BOs */
407
struct v3dv_bo *dst_bo = info->image->mem->bo;
408
assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
409
if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
410
return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
411
void *dst_ptr = dst_bo->map;
412
413
struct v3dv_bo *src_bo = info->buffer->mem->bo;
414
assert(!src_bo->map || src_bo->map_size == src_bo->size);
415
if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
416
return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
417
void *src_ptr = src_bo->map;
418
419
const struct v3d_resource_slice *slice =
420
&info->image->slices[info->mip_level];
421
422
const struct pipe_box box = {
423
info->image_offset.x, info->image_offset.y, info->base_layer,
424
info->image_extent.width, info->image_extent.height, info->layer_count,
425
};
426
427
/* Copy each layer */
428
for (uint32_t i = 0; i < info->layer_count; i++) {
429
const uint32_t dst_offset =
430
v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
431
const uint32_t src_offset =
432
info->buffer->mem_offset + info->buffer_offset +
433
info->buffer_layer_stride * i;
434
v3d_store_tiled_image(
435
dst_ptr + dst_offset, slice->stride,
436
src_ptr + src_offset, info->buffer_stride,
437
slice->tiling, info->image->cpp, slice->padded_height, &box);
438
}
439
440
return VK_SUCCESS;
441
}
442
443
static VkResult
444
handle_timestamp_query_cpu_job(struct v3dv_job *job)
445
{
446
assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
447
struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
448
449
/* Wait for completion of all work queued before the timestamp query */
450
v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
451
452
/* Compute timestamp */
453
struct timespec t;
454
clock_gettime(CLOCK_MONOTONIC, &t);
455
assert(info->query < info->pool->query_count);
456
struct v3dv_query *query = &info->pool->queries[info->query];
457
query->maybe_available = true;
458
query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
459
460
return VK_SUCCESS;
461
}
462
463
static VkResult
464
handle_csd_job(struct v3dv_queue *queue,
465
struct v3dv_job *job,
466
bool do_sem_wait);
467
468
static VkResult
469
handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
470
struct v3dv_job *job,
471
bool do_sem_wait)
472
{
473
assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
474
struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
475
assert(info->csd_job);
476
477
/* Make sure the GPU is no longer using the indirect buffer*/
478
assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
479
v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
480
481
/* Map the indirect buffer and read the dispatch parameters */
482
assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
483
struct v3dv_bo *bo = info->buffer->mem->bo;
484
if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
485
return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
486
assert(bo->map);
487
488
const uint32_t offset = info->buffer->mem_offset + info->offset;
489
const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
490
if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
491
return VK_SUCCESS;
492
493
if (memcmp(group_counts, info->csd_job->csd.wg_count,
494
sizeof(info->csd_job->csd.wg_count)) != 0) {
495
v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
496
}
497
498
handle_csd_job(queue, info->csd_job, do_sem_wait);
499
500
return VK_SUCCESS;
501
}
502
503
static VkResult
504
process_semaphores_to_signal(struct v3dv_device *device,
505
uint32_t count, const VkSemaphore *sems)
506
{
507
if (count == 0)
508
return VK_SUCCESS;
509
510
int render_fd = device->pdevice->render_fd;
511
512
int fd;
513
mtx_lock(&device->mutex);
514
drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
515
mtx_unlock(&device->mutex);
516
if (fd == -1)
517
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
518
519
VkResult result = VK_SUCCESS;
520
for (uint32_t i = 0; i < count; i++) {
521
struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
522
523
int ret;
524
if (!sem->temp_sync)
525
ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);
526
else
527
ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd);
528
529
if (ret) {
530
result = VK_ERROR_OUT_OF_HOST_MEMORY;
531
break;
532
}
533
}
534
535
assert(fd >= 0);
536
close(fd);
537
538
return result;
539
}
540
541
static VkResult
542
process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
543
{
544
if (_fence == VK_NULL_HANDLE)
545
return VK_SUCCESS;
546
547
struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
548
549
int render_fd = device->pdevice->render_fd;
550
551
int fd;
552
mtx_lock(&device->mutex);
553
drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
554
mtx_unlock(&device->mutex);
555
if (fd == -1)
556
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
557
558
int ret;
559
if (!fence->temp_sync)
560
ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);
561
else
562
ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd);
563
564
assert(fd >= 0);
565
close(fd);
566
567
return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;
568
}
569
570
static VkResult
571
handle_cl_job(struct v3dv_queue *queue,
572
struct v3dv_job *job,
573
bool do_sem_wait)
574
{
575
struct v3dv_device *device = queue->device;
576
577
struct drm_v3d_submit_cl submit;
578
579
/* Sanity check: we should only flag a bcl sync on a job that needs to be
580
* serialized.
581
*/
582
assert(job->serialize || !job->needs_bcl_sync);
583
584
/* We expect to have just one RCL per job which should fit in just one BO.
585
* Our BCL, could chain multiple BOS together though.
586
*/
587
assert(list_length(&job->rcl.bo_list) == 1);
588
assert(list_length(&job->bcl.bo_list) >= 1);
589
struct v3dv_bo *bcl_fist_bo =
590
list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
591
submit.bcl_start = bcl_fist_bo->offset;
592
submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
593
submit.rcl_start = job->rcl.bo->offset;
594
submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
595
596
submit.qma = job->tile_alloc->offset;
597
submit.qms = job->tile_alloc->size;
598
submit.qts = job->tile_state->offset;
599
600
submit.flags = 0;
601
if (job->tmu_dirty_rcl)
602
submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
603
604
submit.bo_handle_count = job->bo_count;
605
uint32_t *bo_handles =
606
(uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);
607
uint32_t bo_idx = 0;
608
set_foreach(job->bos, entry) {
609
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
610
bo_handles[bo_idx++] = bo->handle;
611
}
612
assert(bo_idx == submit.bo_handle_count);
613
submit.bo_handles = (uintptr_t)(void *)bo_handles;
614
615
/* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
616
* if the job comes after a pipeline barrier than involves geometry stages
617
* (needs_bcl_sync).
618
*
619
* We need a render sync if the job doesn't need a binning sync but has
620
* still been flagged for serialization. It should be noted that RCL jobs
621
* don't start until the previous RCL job has finished so we don't really
622
* need to add a fence for those, however, we might need to wait on a CSD or
623
* TFU job, which are not automatically serialized with CL jobs.
624
*
625
* FIXME: for now, if we are asked to wait on any semaphores, we just wait
626
* on the last job we submitted. In the future we might want to pass the
627
* actual syncobj of the wait semaphores so we don't block on the last RCL
628
* if we only need to wait for a previous CSD or TFU, for example, but
629
* we would have to extend our kernel interface to support the case where
630
* we have more than one semaphore to wait on.
631
*/
632
const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
633
const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
634
635
mtx_lock(&queue->device->mutex);
636
submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
637
submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
638
submit.out_sync = device->last_job_sync;
639
v3dv_clif_dump(device, job, &submit);
640
int ret = v3dv_ioctl(device->pdevice->render_fd,
641
DRM_IOCTL_V3D_SUBMIT_CL, &submit);
642
mtx_unlock(&queue->device->mutex);
643
644
static bool warned = false;
645
if (ret && !warned) {
646
fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
647
strerror(errno));
648
warned = true;
649
}
650
651
free(bo_handles);
652
653
if (ret)
654
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
655
656
return VK_SUCCESS;
657
}
658
659
static VkResult
660
handle_tfu_job(struct v3dv_queue *queue,
661
struct v3dv_job *job,
662
bool do_sem_wait)
663
{
664
struct v3dv_device *device = queue->device;
665
666
const bool needs_sync = do_sem_wait || job->serialize;
667
668
mtx_lock(&device->mutex);
669
job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;
670
job->tfu.out_sync = device->last_job_sync;
671
int ret = v3dv_ioctl(device->pdevice->render_fd,
672
DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
673
mtx_unlock(&device->mutex);
674
675
if (ret != 0) {
676
fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
677
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
678
}
679
680
return VK_SUCCESS;
681
}
682
683
static VkResult
684
handle_csd_job(struct v3dv_queue *queue,
685
struct v3dv_job *job,
686
bool do_sem_wait)
687
{
688
struct v3dv_device *device = queue->device;
689
690
struct drm_v3d_submit_csd *submit = &job->csd.submit;
691
692
submit->bo_handle_count = job->bo_count;
693
uint32_t *bo_handles =
694
(uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
695
uint32_t bo_idx = 0;
696
set_foreach(job->bos, entry) {
697
struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
698
bo_handles[bo_idx++] = bo->handle;
699
}
700
assert(bo_idx == submit->bo_handle_count);
701
submit->bo_handles = (uintptr_t)(void *)bo_handles;
702
703
const bool needs_sync = do_sem_wait || job->serialize;
704
705
mtx_lock(&queue->device->mutex);
706
submit->in_sync = needs_sync ? device->last_job_sync : 0;
707
submit->out_sync = device->last_job_sync;
708
int ret = v3dv_ioctl(device->pdevice->render_fd,
709
DRM_IOCTL_V3D_SUBMIT_CSD, submit);
710
mtx_unlock(&queue->device->mutex);
711
712
static bool warned = false;
713
if (ret && !warned) {
714
fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
715
strerror(errno));
716
warned = true;
717
}
718
719
free(bo_handles);
720
721
if (ret)
722
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
723
724
return VK_SUCCESS;
725
}
726
727
static VkResult
728
queue_submit_job(struct v3dv_queue *queue,
729
struct v3dv_job *job,
730
bool do_sem_wait,
731
pthread_t *wait_thread)
732
{
733
assert(job);
734
735
switch (job->type) {
736
case V3DV_JOB_TYPE_GPU_CL:
737
return handle_cl_job(queue, job, do_sem_wait);
738
case V3DV_JOB_TYPE_GPU_TFU:
739
return handle_tfu_job(queue, job, do_sem_wait);
740
case V3DV_JOB_TYPE_GPU_CSD:
741
return handle_csd_job(queue, job, do_sem_wait);
742
case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
743
return handle_reset_query_cpu_job(job);
744
case V3DV_JOB_TYPE_CPU_END_QUERY:
745
return handle_end_query_cpu_job(job);
746
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
747
return handle_copy_query_results_cpu_job(job);
748
case V3DV_JOB_TYPE_CPU_SET_EVENT:
749
return handle_set_event_cpu_job(job, wait_thread != NULL);
750
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
751
return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);
752
case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
753
return handle_copy_buffer_to_image_cpu_job(job);
754
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
755
return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
756
case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
757
return handle_timestamp_query_cpu_job(job);
758
default:
759
unreachable("Unhandled job type");
760
}
761
}
762
763
static VkResult
764
queue_create_noop_job(struct v3dv_queue *queue)
765
{
766
struct v3dv_device *device = queue->device;
767
queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
768
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
769
if (!queue->noop_job)
770
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
771
v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
772
773
v3dv_X(device, job_emit_noop)(queue->noop_job);
774
775
return VK_SUCCESS;
776
}
777
778
static VkResult
779
queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
780
{
781
/* VkQueue host access is externally synchronized so we don't need to lock
782
* here for the static variable.
783
*/
784
if (!queue->noop_job) {
785
VkResult result = queue_create_noop_job(queue);
786
if (result != VK_SUCCESS)
787
return result;
788
}
789
790
return queue_submit_job(queue, queue->noop_job,
791
pSubmit->waitSemaphoreCount > 0, NULL);
792
}
793
794
static VkResult
795
queue_submit_cmd_buffer(struct v3dv_queue *queue,
796
struct v3dv_cmd_buffer *cmd_buffer,
797
const VkSubmitInfo *pSubmit,
798
pthread_t *wait_thread)
799
{
800
assert(cmd_buffer);
801
assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);
802
803
if (list_is_empty(&cmd_buffer->jobs))
804
return queue_submit_noop_job(queue, pSubmit);
805
806
list_for_each_entry_safe(struct v3dv_job, job,
807
&cmd_buffer->jobs, list_link) {
808
VkResult result = queue_submit_job(queue, job,
809
pSubmit->waitSemaphoreCount > 0,
810
wait_thread);
811
if (result != VK_SUCCESS)
812
return result;
813
}
814
815
return VK_SUCCESS;
816
}
817
818
static void
819
add_wait_thread_to_list(struct v3dv_device *device,
820
pthread_t thread,
821
struct v3dv_queue_submit_wait_info **wait_info)
822
{
823
/* If this is the first time we spawn a wait thread for this queue
824
* submission create a v3dv_queue_submit_wait_info to track this and
825
* any other threads in the same submission and add it to the global list
826
* in the queue.
827
*/
828
if (*wait_info == NULL) {
829
*wait_info =
830
vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,
831
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
832
(*wait_info)->device = device;
833
}
834
835
/* And add the thread to the list of wait threads for this submission */
836
const uint32_t thread_idx = (*wait_info)->wait_thread_count;
837
assert(thread_idx < 16);
838
(*wait_info)->wait_threads[thread_idx].thread = thread;
839
(*wait_info)->wait_threads[thread_idx].finished = false;
840
(*wait_info)->wait_thread_count++;
841
}
842
843
static void
844
add_signal_semaphores_to_wait_list(struct v3dv_device *device,
845
const VkSubmitInfo *pSubmit,
846
struct v3dv_queue_submit_wait_info *wait_info)
847
{
848
assert(wait_info);
849
850
if (pSubmit->signalSemaphoreCount == 0)
851
return;
852
853
/* FIXME: We put all the semaphores in a list and we signal all of them
854
* together from the submit master thread when the last wait thread in the
855
* submit completes. We could do better though: group the semaphores per
856
* submit and signal them as soon as all wait threads for a particular
857
* submit completes. Not sure if the extra work would be worth it though,
858
* since we only spawn waith threads for event waits and only when the
859
* event if set from the host after the queue submission.
860
*/
861
862
/* Check the size of the current semaphore list */
863
const uint32_t prev_count = wait_info->signal_semaphore_count;
864
const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);
865
VkSemaphore *prev_list = wait_info->signal_semaphores;
866
867
/* Resize the list to hold the additional semaphores */
868
const uint32_t extra_alloc_size =
869
pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);
870
wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;
871
wait_info->signal_semaphores =
872
vk_alloc(&device->vk.alloc, prev_alloc_size + extra_alloc_size, 8,
873
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
874
875
/* Copy the old list to the new allocation and free the old list */
876
if (prev_count > 0) {
877
memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);
878
vk_free(&device->vk.alloc, prev_list);
879
}
880
881
/* Add the new semaphores to the list */
882
memcpy(wait_info->signal_semaphores + prev_count,
883
pSubmit->pSignalSemaphores, extra_alloc_size);
884
}
885
886
static VkResult
887
queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
888
const VkSubmitInfo *pSubmit,
889
struct v3dv_queue_submit_wait_info **wait_info)
890
{
891
VkResult result = VK_SUCCESS;
892
bool has_wait_threads = false;
893
894
/* Even if we don't have any actual work to submit we still need to wait
895
* on the wait semaphores and signal the signal semaphores and fence, so
896
* in this scenario we just submit a trivial no-op job so we don't have
897
* to do anything special, it should not be a common case anyway.
898
*/
899
if (pSubmit->commandBufferCount == 0) {
900
result = queue_submit_noop_job(queue, pSubmit);
901
} else {
902
for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
903
pthread_t wait_thread;
904
struct v3dv_cmd_buffer *cmd_buffer =
905
v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
906
result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,
907
&wait_thread);
908
909
/* We get VK_NOT_READY if we had to spawn a wait thread for the
910
* command buffer. In that scenario, we want to continue submitting
911
* any pending command buffers in the batch, but we don't want to
912
* process any signal semaphores for the batch until we know we have
913
* submitted every job for every command buffer in the batch.
914
*/
915
if (result == VK_NOT_READY) {
916
result = VK_SUCCESS;
917
add_wait_thread_to_list(queue->device, wait_thread, wait_info);
918
has_wait_threads = true;
919
}
920
921
if (result != VK_SUCCESS)
922
break;
923
}
924
}
925
926
if (result != VK_SUCCESS)
927
return result;
928
929
/* If had to emit any wait threads in this submit we need to wait for all
930
* of them to complete before we can signal any semaphores.
931
*/
932
if (!has_wait_threads) {
933
return process_semaphores_to_signal(queue->device,
934
pSubmit->signalSemaphoreCount,
935
pSubmit->pSignalSemaphores);
936
} else {
937
assert(*wait_info);
938
add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);
939
return VK_NOT_READY;
940
}
941
}
942
943
static void *
944
master_wait_thread_func(void *_wait_info)
945
{
946
struct v3dv_queue_submit_wait_info *wait_info =
947
(struct v3dv_queue_submit_wait_info *) _wait_info;
948
949
struct v3dv_queue *queue = &wait_info->device->queue;
950
951
/* Wait for all command buffer wait threads to complete */
952
for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {
953
int res = pthread_join(wait_info->wait_threads[i].thread, NULL);
954
if (res != 0)
955
fprintf(stderr, "Wait thread failed to join.\n");
956
}
957
958
/* Signal semaphores and fences */
959
VkResult result;
960
result = process_semaphores_to_signal(wait_info->device,
961
wait_info->signal_semaphore_count,
962
wait_info->signal_semaphores);
963
if (result != VK_SUCCESS)
964
fprintf(stderr, "Wait thread semaphore signaling failed.");
965
966
result = process_fence_to_signal(wait_info->device, wait_info->fence);
967
if (result != VK_SUCCESS)
968
fprintf(stderr, "Wait thread fence signaling failed.");
969
970
/* Release wait_info */
971
mtx_lock(&queue->mutex);
972
list_del(&wait_info->list_link);
973
mtx_unlock(&queue->mutex);
974
975
vk_free(&wait_info->device->vk.alloc, wait_info->signal_semaphores);
976
vk_free(&wait_info->device->vk.alloc, wait_info);
977
978
return NULL;
979
}
980
981
982
static VkResult
983
spawn_master_wait_thread(struct v3dv_queue *queue,
984
struct v3dv_queue_submit_wait_info *wait_info)
985
986
{
987
VkResult result = VK_SUCCESS;
988
989
mtx_lock(&queue->mutex);
990
if (pthread_create(&wait_info->master_wait_thread, NULL,
991
master_wait_thread_func, wait_info)) {
992
result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
993
goto done;
994
}
995
996
list_addtail(&wait_info->list_link, &queue->submit_wait_list);
997
998
done:
999
mtx_unlock(&queue->mutex);
1000
return result;
1001
}
1002
1003
VKAPI_ATTR VkResult VKAPI_CALL
1004
v3dv_QueueSubmit(VkQueue _queue,
1005
uint32_t submitCount,
1006
const VkSubmitInfo* pSubmits,
1007
VkFence fence)
1008
{
1009
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1010
1011
struct v3dv_queue_submit_wait_info *wait_info = NULL;
1012
1013
VkResult result = VK_SUCCESS;
1014
for (uint32_t i = 0; i < submitCount; i++) {
1015
result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);
1016
if (result != VK_SUCCESS && result != VK_NOT_READY)
1017
goto done;
1018
}
1019
1020
if (!wait_info) {
1021
assert(result != VK_NOT_READY);
1022
result = process_fence_to_signal(queue->device, fence);
1023
goto done;
1024
}
1025
1026
/* We emitted wait threads, so we have to spwan a master thread for this
1027
* queue submission that waits for all other threads to complete and then
1028
* will signal any semaphores and fences.
1029
*/
1030
assert(wait_info);
1031
wait_info->fence = fence;
1032
result = spawn_master_wait_thread(queue, wait_info);
1033
1034
done:
1035
return result;
1036
}
1037
1038
static void
1039
destroy_syncobj(uint32_t device_fd, uint32_t *sync)
1040
{
1041
assert(sync);
1042
drmSyncobjDestroy(device_fd, *sync);
1043
*sync = 0;
1044
}
1045
1046
VKAPI_ATTR VkResult VKAPI_CALL
1047
v3dv_CreateSemaphore(VkDevice _device,
1048
const VkSemaphoreCreateInfo *pCreateInfo,
1049
const VkAllocationCallbacks *pAllocator,
1050
VkSemaphore *pSemaphore)
1051
{
1052
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1053
1054
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
1055
1056
struct v3dv_semaphore *sem =
1057
vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore),
1058
VK_OBJECT_TYPE_SEMAPHORE);
1059
if (sem == NULL)
1060
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1061
1062
int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync);
1063
if (ret) {
1064
vk_object_free(&device->vk, pAllocator, sem);
1065
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1066
}
1067
1068
*pSemaphore = v3dv_semaphore_to_handle(sem);
1069
1070
return VK_SUCCESS;
1071
}
1072
1073
VKAPI_ATTR void VKAPI_CALL
1074
v3dv_GetPhysicalDeviceExternalSemaphoreProperties(
1075
VkPhysicalDevice physicalDevice,
1076
const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
1077
VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
1078
{
1079
switch (pExternalSemaphoreInfo->handleType) {
1080
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1081
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
1082
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
1083
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1084
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1085
pExternalSemaphoreProperties->compatibleHandleTypes =
1086
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1087
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1088
1089
/* FIXME: we can't import external semaphores until we improve the kernel
1090
* submit interface to handle multiple in syncobjs, because once we have
1091
* an imported semaphore in our list of semaphores to wait on, we can no
1092
* longer use the workaround of waiting on the last syncobj fence produced
1093
* from the device, since the imported semaphore may not (and in fact, it
1094
* would typically not) have been produced from same device.
1095
*
1096
* This behavior is exercised via dEQP-VK.synchronization.cross_instance.*.
1097
* Particularly, this test:
1098
* dEQP-VK.synchronization.cross_instance.dedicated.
1099
* write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd
1100
* fails consistently because of this, so it'll be a good reference to
1101
* verify the implementation when the kernel bits are in place.
1102
*/
1103
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1104
1105
/* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties
1106
* for details on why we can't export to SYNC_FD.
1107
*/
1108
if (pExternalSemaphoreInfo->handleType !=
1109
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
1110
pExternalSemaphoreProperties->externalSemaphoreFeatures |=
1111
VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;
1112
}
1113
break;
1114
default:
1115
pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
1116
pExternalSemaphoreProperties->compatibleHandleTypes = 0;
1117
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1118
break;
1119
}
1120
}
1121
1122
VKAPI_ATTR VkResult VKAPI_CALL
1123
v3dv_ImportSemaphoreFdKHR(
1124
VkDevice _device,
1125
const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
1126
{
1127
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1128
V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
1129
1130
assert(pImportSemaphoreFdInfo->sType ==
1131
VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR);
1132
1133
int fd = pImportSemaphoreFdInfo->fd;
1134
int render_fd = device->pdevice->render_fd;
1135
1136
bool is_temporary =
1137
pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||
1138
(pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT);
1139
1140
uint32_t new_sync;
1141
switch (pImportSemaphoreFdInfo->handleType) {
1142
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1143
/* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the
1144
* special value -1 for fd is treated like a valid sync file descriptor
1145
* referring to an object that has already signaled. The import
1146
* operation will succeed and the VkSemaphore will have a temporarily
1147
* imported payload as if a valid file descriptor had been provided."
1148
*/
1149
unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1150
if (drmSyncobjCreate(render_fd, flags, &new_sync))
1151
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1152
1153
if (fd != -1) {
1154
if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1155
drmSyncobjDestroy(render_fd, new_sync);
1156
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1157
}
1158
}
1159
break;
1160
}
1161
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1162
if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1163
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1164
break;
1165
}
1166
default:
1167
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1168
}
1169
1170
destroy_syncobj(render_fd, &sem->temp_sync);
1171
if (is_temporary) {
1172
sem->temp_sync = new_sync;
1173
} else {
1174
destroy_syncobj(render_fd, &sem->sync);
1175
sem->sync = new_sync;
1176
}
1177
1178
/* From the Vulkan 1.0.53 spec:
1179
*
1180
* "Importing a semaphore payload from a file descriptor transfers
1181
* ownership of the file descriptor from the application to the
1182
* Vulkan implementation. The application must not perform any
1183
* operations on the file descriptor after a successful import."
1184
*
1185
* If the import fails, we leave the file descriptor open.
1186
*/
1187
if (fd != -1)
1188
close(fd);
1189
1190
return VK_SUCCESS;
1191
}
1192
1193
VKAPI_ATTR VkResult VKAPI_CALL
1194
v3dv_GetSemaphoreFdKHR(VkDevice _device,
1195
const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
1196
int *pFd)
1197
{
1198
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1199
V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore);
1200
1201
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
1202
1203
*pFd = -1;
1204
int render_fd = device->pdevice->render_fd;
1205
switch (pGetFdInfo->handleType) {
1206
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1207
drmSyncobjExportSyncFile(render_fd, sem->sync, pFd);
1208
if (*pFd == -1)
1209
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1210
break;
1211
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1212
drmSyncobjHandleToFD(render_fd, sem->sync, pFd);
1213
if (*pFd == -1)
1214
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1215
break;
1216
}
1217
default:
1218
unreachable("Unsupported external semaphore handle type");
1219
}
1220
1221
return VK_SUCCESS;
1222
}
1223
1224
VKAPI_ATTR void VKAPI_CALL
1225
v3dv_DestroySemaphore(VkDevice _device,
1226
VkSemaphore semaphore,
1227
const VkAllocationCallbacks *pAllocator)
1228
{
1229
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1230
V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);
1231
1232
if (sem == NULL)
1233
return;
1234
1235
destroy_syncobj(device->pdevice->render_fd, &sem->sync);
1236
destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync);
1237
1238
vk_object_free(&device->vk, pAllocator, sem);
1239
}
1240
1241
VKAPI_ATTR VkResult VKAPI_CALL
1242
v3dv_CreateFence(VkDevice _device,
1243
const VkFenceCreateInfo *pCreateInfo,
1244
const VkAllocationCallbacks *pAllocator,
1245
VkFence *pFence)
1246
{
1247
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1248
1249
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1250
1251
struct v3dv_fence *fence =
1252
vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence),
1253
VK_OBJECT_TYPE_FENCE);
1254
if (fence == NULL)
1255
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1256
1257
unsigned flags = 0;
1258
if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1259
flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1260
int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync);
1261
if (ret) {
1262
vk_object_free(&device->vk, pAllocator, fence);
1263
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1264
}
1265
1266
*pFence = v3dv_fence_to_handle(fence);
1267
1268
return VK_SUCCESS;
1269
}
1270
1271
VKAPI_ATTR void VKAPI_CALL
1272
v3dv_GetPhysicalDeviceExternalFenceProperties(
1273
VkPhysicalDevice physicalDevice,
1274
const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
1275
VkExternalFenceProperties *pExternalFenceProperties)
1276
1277
{
1278
switch (pExternalFenceInfo->handleType) {
1279
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1280
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1281
pExternalFenceProperties->exportFromImportedHandleTypes =
1282
VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1283
VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1284
pExternalFenceProperties->compatibleHandleTypes =
1285
VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1286
VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1287
pExternalFenceProperties->externalFenceFeatures =
1288
VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1289
1290
/* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not
1291
* the syncobj itself, and that fence is only created after we have
1292
* submitted to the kernel and updated the syncobj for the fence to import
1293
* the actual DRM fence created with the submission. Unfortunately, if the
1294
* queue submission has a 'wait for events' we may hold any jobs after the
1295
* wait in a user-space thread until the events are signaled, and in that
1296
* case we don't update the out fence of the submit until the events are
1297
* signaled and we can submit all the jobs involved with the vkQueueSubmit
1298
* call. This means that if the applications submits with an out fence and
1299
* a wait for events, trying to export the out fence to a SYNC_FD rigth
1300
* after the submission and before the events are signaled will fail,
1301
* because the actual DRM fence won't exist yet. This is not a problem
1302
* with OPAQUE_FD because in this case we export the entire syncobj, not
1303
* the underlying DRM fence. To fix this we need to rework our kernel
1304
* interface to be more flexible and accept multiple in/out syncobjs so
1305
* we can implement event waits as regular fence waits on the kernel side,
1306
* until then, we can only reliably export OPAQUE_FD.
1307
*/
1308
if (pExternalFenceInfo->handleType !=
1309
VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
1310
pExternalFenceProperties->externalFenceFeatures |=
1311
VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT;
1312
}
1313
break;
1314
default:
1315
pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1316
pExternalFenceProperties->compatibleHandleTypes = 0;
1317
pExternalFenceProperties->externalFenceFeatures = 0;
1318
break;
1319
}
1320
}
1321
1322
VKAPI_ATTR VkResult VKAPI_CALL
1323
v3dv_ImportFenceFdKHR(VkDevice _device,
1324
const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
1325
{
1326
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1327
V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence);
1328
1329
assert(pImportFenceFdInfo->sType ==
1330
VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
1331
1332
int fd = pImportFenceFdInfo->fd;
1333
int render_fd = device->pdevice->render_fd;
1334
1335
bool is_temporary =
1336
pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT ||
1337
(pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT);
1338
1339
uint32_t new_sync;
1340
switch (pImportFenceFdInfo->handleType) {
1341
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1342
/* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
1343
* special value -1 for fd is treated like a valid sync file descriptor
1344
* referring to an object that has already signaled. The import
1345
* operation will succeed and the VkFence will have a temporarily
1346
* imported payload as if a valid file descriptor had been provided."
1347
*/
1348
unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1349
if (drmSyncobjCreate(render_fd, flags, &new_sync))
1350
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1351
1352
if (fd != -1) {
1353
if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1354
drmSyncobjDestroy(render_fd, new_sync);
1355
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1356
}
1357
}
1358
break;
1359
}
1360
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1361
if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1362
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1363
break;
1364
}
1365
default:
1366
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1367
}
1368
1369
destroy_syncobj(render_fd, &fence->temp_sync);
1370
if (is_temporary) {
1371
fence->temp_sync = new_sync;
1372
} else {
1373
destroy_syncobj(render_fd, &fence->sync);
1374
fence->sync = new_sync;
1375
}
1376
1377
/* From the Vulkan 1.0.53 spec:
1378
*
1379
* "Importing a fence payload from a file descriptor transfers
1380
* ownership of the file descriptor from the application to the
1381
* Vulkan implementation. The application must not perform any
1382
* operations on the file descriptor after a successful import."
1383
*
1384
* If the import fails, we leave the file descriptor open.
1385
*/
1386
if (fd != -1)
1387
close(fd);
1388
1389
return VK_SUCCESS;
1390
}
1391
1392
VKAPI_ATTR void VKAPI_CALL
1393
v3dv_DestroyFence(VkDevice _device,
1394
VkFence _fence,
1395
const VkAllocationCallbacks *pAllocator)
1396
{
1397
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1398
V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1399
1400
if (fence == NULL)
1401
return;
1402
1403
destroy_syncobj(device->pdevice->render_fd, &fence->sync);
1404
destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync);
1405
1406
vk_object_free(&device->vk, pAllocator, fence);
1407
}
1408
1409
VKAPI_ATTR VkResult VKAPI_CALL
1410
v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
1411
{
1412
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1413
V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1414
1415
int ret = drmSyncobjWait(device->pdevice->render_fd, &fence->sync, 1,
1416
0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
1417
if (ret == -ETIME)
1418
return VK_NOT_READY;
1419
else if (ret)
1420
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1421
return VK_SUCCESS;
1422
}
1423
1424
VKAPI_ATTR VkResult VKAPI_CALL
1425
v3dv_GetFenceFdKHR(VkDevice _device,
1426
const VkFenceGetFdInfoKHR *pGetFdInfo,
1427
int *pFd)
1428
{
1429
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1430
V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence);
1431
1432
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
1433
1434
*pFd = -1;
1435
int render_fd = device->pdevice->render_fd;
1436
switch (pGetFdInfo->handleType) {
1437
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1438
drmSyncobjExportSyncFile(render_fd, fence->sync, pFd);
1439
if (*pFd == -1)
1440
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1441
break;
1442
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1443
drmSyncobjHandleToFD(render_fd, fence->sync, pFd);
1444
if (*pFd == -1)
1445
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1446
break;
1447
}
1448
default:
1449
unreachable("Unsupported external fence handle type");
1450
}
1451
1452
return VK_SUCCESS;
1453
}
1454
1455
VKAPI_ATTR VkResult VKAPI_CALL
1456
v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1457
{
1458
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1459
1460
uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1461
sizeof(*syncobjs) * fenceCount, 8,
1462
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1463
if (!syncobjs)
1464
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1465
1466
int render_fd = device->pdevice->render_fd;
1467
uint32_t reset_count = 0;
1468
for (uint32_t i = 0; i < fenceCount; i++) {
1469
struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1470
/* From the Vulkan spec, section 'Importing Fence Payloads':
1471
*
1472
* "If the import is temporary, the fence will be restored to its
1473
* permanent state the next time that fence is passed to
1474
* vkResetFences.
1475
*
1476
* Note: Restoring a fence to its prior permanent payload is a
1477
* distinct operation from resetting a fence payload."
1478
*
1479
* To restore the previous state, we just need to destroy the temporary.
1480
*/
1481
if (fence->temp_sync)
1482
destroy_syncobj(render_fd, &fence->temp_sync);
1483
else
1484
syncobjs[reset_count++] = fence->sync;
1485
}
1486
1487
int ret = 0;
1488
if (reset_count > 0)
1489
ret = drmSyncobjReset(render_fd, syncobjs, reset_count);
1490
1491
vk_free(&device->vk.alloc, syncobjs);
1492
1493
if (ret)
1494
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1495
return VK_SUCCESS;
1496
}
1497
1498
VKAPI_ATTR VkResult VKAPI_CALL
1499
v3dv_WaitForFences(VkDevice _device,
1500
uint32_t fenceCount,
1501
const VkFence *pFences,
1502
VkBool32 waitAll,
1503
uint64_t timeout)
1504
{
1505
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1506
1507
const uint64_t abs_timeout = get_absolute_timeout(timeout);
1508
1509
uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1510
sizeof(*syncobjs) * fenceCount, 8,
1511
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1512
if (!syncobjs)
1513
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1514
1515
for (uint32_t i = 0; i < fenceCount; i++) {
1516
struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1517
syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync;
1518
}
1519
1520
unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1521
if (waitAll)
1522
flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1523
1524
int ret;
1525
do {
1526
ret = drmSyncobjWait(device->pdevice->render_fd, syncobjs, fenceCount,
1527
timeout, flags, NULL);
1528
} while (ret == -ETIME && gettime_ns() < abs_timeout);
1529
1530
vk_free(&device->vk.alloc, syncobjs);
1531
1532
if (ret == -ETIME)
1533
return VK_TIMEOUT;
1534
else if (ret)
1535
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1536
return VK_SUCCESS;
1537
}
1538
1539
VKAPI_ATTR VkResult VKAPI_CALL
1540
v3dv_QueueBindSparse(VkQueue _queue,
1541
uint32_t bindInfoCount,
1542
const VkBindSparseInfo *pBindInfo,
1543
VkFence fence)
1544
{
1545
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1546
return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1547
}
1548
1549