CoCalc -- v3dv

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_queue.c
⁴⁵⁶⁰ views
1
/*
2
 * Copyright © 2019 Raspberry Pi
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "v3dv_private.h"
25
#include "drm-uapi/v3d_drm.h"
26

27
#include "broadcom/clif/clif_dump.h"
28

29
#include <errno.h>
30
#include <time.h>
31

32
static void
33
v3dv_clif_dump(struct v3dv_device *device,
34
               struct v3dv_job *job,
35
               struct drm_v3d_submit_cl *submit)
36
{
37
   if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
38
      return;
39

40
   struct clif_dump *clif = clif_dump_init(&device->devinfo,
41
                                           stderr,
42
                                           V3D_DEBUG & V3D_DEBUG_CL);
43

44
   set_foreach(job->bos, entry) {
45
      struct v3dv_bo *bo = (void *)entry->key;
46
      char *name = ralloc_asprintf(NULL, "%s_0x%x",
47
                                   bo->name, bo->offset);
48

49
      bool ok = v3dv_bo_map(device, bo, bo->size);
50
      if (!ok) {
51
         fprintf(stderr, "failed to map BO for clif_dump.\n");
52
         ralloc_free(name);
53
         goto free_clif;
54
      }
55
      clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
56

57
      ralloc_free(name);
58
   }
59

60
   clif_dump(clif, submit);
61

62
 free_clif:
63
   clif_dump_destroy(clif);
64
}
65

66
static uint64_t
67
gettime_ns()
68
{
69
   struct timespec current;
70
   clock_gettime(CLOCK_MONOTONIC, &current);
71
   return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
72
}
73

74
static uint64_t
75
get_absolute_timeout(uint64_t timeout)
76
{
77
   uint64_t current_time = gettime_ns();
78
   uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
79

80
   timeout = MIN2(max_timeout, timeout);
81

82
   return (current_time + timeout);
83
}
84

85
static VkResult
86
queue_submit_job(struct v3dv_queue *queue,
87
                 struct v3dv_job *job,
88
                 bool do_sem_wait,
89
                 pthread_t *wait_thread);
90

91
/* Waits for active CPU wait threads spawned before the current thread to
92
 * complete and submit all their GPU jobs.
93
 */
94
static void
95
cpu_queue_wait_idle(struct v3dv_queue *queue)
96
{
97
   const pthread_t this_thread = pthread_self();
98

99
retry:
100
   mtx_lock(&queue->mutex);
101
   list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
102
                       &queue->submit_wait_list, list_link) {
103
      for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
104
         if (info->wait_threads[i].finished)
105
            continue;
106

107
         /* Because we are testing this against the list of spawned threads
108
          * it will never match for the main thread, so when we call this from
109
          * the main thread we are effectively waiting for all active threads
110
          * to complete, and otherwise we are only waiting for work submitted
111
          * before the wait thread that called this (a wait thread should never
112
          * be waiting for work submitted after it).
113
          */
114
         if (info->wait_threads[i].thread == this_thread)
115
            goto done;
116

117
         /* Wait and try again */
118
         mtx_unlock(&queue->mutex);
119
         usleep(500); /* 0.5 ms */
120
         goto retry;
121
      }
122
   }
123

124
done:
125
   mtx_unlock(&queue->mutex);
126
}
127

128
static VkResult
129
gpu_queue_wait_idle(struct v3dv_queue *queue)
130
{
131
   struct v3dv_device *device = queue->device;
132

133
   mtx_lock(&device->mutex);
134
   uint32_t last_job_sync = device->last_job_sync;
135
   mtx_unlock(&device->mutex);
136

137
   int ret = drmSyncobjWait(device->pdevice->render_fd,
138
                            &last_job_sync, 1, INT64_MAX, 0, NULL);
139
   if (ret)
140
      return VK_ERROR_DEVICE_LOST;
141

142
   return VK_SUCCESS;
143
}
144

145
VKAPI_ATTR VkResult VKAPI_CALL
146
v3dv_QueueWaitIdle(VkQueue _queue)
147
{
148
   V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
149

150
   /* Check that we don't have any wait threads running in the CPU first,
151
    * as these can spawn new GPU jobs.
152
    */
153
   cpu_queue_wait_idle(queue);
154

155
   /* Check we don't have any GPU jobs running */
156
   return gpu_queue_wait_idle(queue);
157
}
158

159
static VkResult
160
handle_reset_query_cpu_job(struct v3dv_job *job)
161
{
162
   struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
163
   assert(info->pool);
164

165
   /* We are about to reset query counters so we need to make sure that
166
    * The GPU is not using them. The exception is timestamp queries, since
167
    * we handle those in the CPU.
168
    *
169
    * FIXME: we could avoid blocking the main thread for this if we use
170
    *        submission thread.
171
    */
172
   if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
173
         v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
174

175
   for (uint32_t i = info->first; i < info->first + info->count; i++) {
176
      assert(i < info->pool->query_count);
177
      struct v3dv_query *q = &info->pool->queries[i];
178
      q->maybe_available = false;
179
      switch (info->pool->query_type) {
180
      case VK_QUERY_TYPE_OCCLUSION: {
181
         const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset;
182
         uint32_t *counter = (uint32_t *) q_addr;
183
         *counter = 0;
184
         break;
185
      }
186
      case VK_QUERY_TYPE_TIMESTAMP:
187
         q->value = 0;
188
         break;
189
      default:
190
         unreachable("Unsupported query type");
191
      }
192
   }
193

194
   return VK_SUCCESS;
195
}
196

197
static VkResult
198
handle_end_query_cpu_job(struct v3dv_job *job)
199
{
200
   struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
201
   assert(info->query < info->pool->query_count);
202
   struct v3dv_query *query = &info->pool->queries[info->query];
203
   query->maybe_available = true;
204

205
   return VK_SUCCESS;
206
}
207

208
static VkResult
209
handle_copy_query_results_cpu_job(struct v3dv_job *job)
210
{
211
   struct v3dv_copy_query_results_cpu_job_info *info =
212
      &job->cpu.query_copy_results;
213

214
   assert(info->dst && info->dst->mem && info->dst->mem->bo);
215
   struct v3dv_bo *bo = info->dst->mem->bo;
216

217
   /* Map the entire dst buffer for the CPU copy if needed */
218
   assert(!bo->map || bo->map_size == bo->size);
219
   if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
220
      return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
221

222
   /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
223
    * sync wait on the CPU for the corresponding GPU jobs to finish. We might
224
    * want to use a submission thread to avoid blocking on the main thread.
225
    */
226
   uint8_t *offset = ((uint8_t *) bo->map) +
227
                     info->offset + info->dst->mem_offset;
228
   v3dv_get_query_pool_results_cpu(job->device,
229
                                   info->pool,
230
                                   info->first,
231
                                   info->count,
232
                                   offset,
233
                                   info->stride,
234
                                   info->flags);
235

236
   return VK_SUCCESS;
237
}
238

239
static VkResult
240
handle_set_event_cpu_job(struct v3dv_job *job, bool is_wait_thread)
241
{
242
   /* From the Vulkan 1.0 spec:
243
    *
244
    *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
245
    *     dependency on commands that were submitted before it, and defines an
246
    *     event signal operation which sets the event to the signaled state.
247
    *     The first synchronization scope includes every command previously
248
    *     submitted to the same queue, including those in the same command
249
    *     buffer and batch".
250
    *
251
    * So we should wait for all prior work to be completed before signaling
252
    * the event, this includes all active CPU wait threads spawned for any
253
    * command buffer submitted *before* this.
254
    *
255
    * FIXME: we could avoid blocking the main thread for this if we use a
256
    *        submission thread.
257
    */
258

259
   /* If we are calling this from a wait thread it will only wait
260
    * wait threads sspawned before it, otherwise it will wait for
261
    * all active threads to complete.
262
    */
263
   cpu_queue_wait_idle(&job->device->queue);
264

265
   VkResult result = gpu_queue_wait_idle(&job->device->queue);
266
   if (result != VK_SUCCESS)
267
      return result;
268

269
   struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
270
   p_atomic_set(&info->event->state, info->state);
271

272
   return VK_SUCCESS;
273
}
274

275
static bool
276
check_wait_events_complete(struct v3dv_job *job)
277
{
278
   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
279

280
   struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
281
   for (uint32_t i = 0; i < info->event_count; i++) {
282
      if (!p_atomic_read(&info->events[i]->state))
283
         return false;
284
   }
285
   return true;
286
}
287

288
static void
289
wait_thread_finish(struct v3dv_queue *queue, pthread_t thread)
290
{
291
   mtx_lock(&queue->mutex);
292
   list_for_each_entry(struct v3dv_queue_submit_wait_info, info,
293
                       &queue->submit_wait_list, list_link) {
294
      for (uint32_t  i = 0; i < info->wait_thread_count; i++) {
295
         if (info->wait_threads[i].thread == thread) {
296
            info->wait_threads[i].finished = true;
297
            goto done;
298
         }
299
      }
300
   }
301

302
   unreachable(!"Failed to finish wait thread: not found");
303

304
done:
305
   mtx_unlock(&queue->mutex);
306
}
307

308
static void *
309
event_wait_thread_func(void *_job)
310
{
311
   struct v3dv_job *job = (struct v3dv_job *) _job;
312
   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
313
   struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
314

315
   /* Wait for events to be signaled */
316
   const useconds_t wait_interval_ms = 1;
317
   while (!check_wait_events_complete(job))
318
      usleep(wait_interval_ms * 1000);
319

320
   /* Now continue submitting pending jobs for the same command buffer after
321
    * the wait job.
322
    */
323
   struct v3dv_queue *queue = &job->device->queue;
324
   list_for_each_entry_from(struct v3dv_job, pjob, job->list_link.next,
325
                            &job->cmd_buffer->jobs, list_link) {
326
      /* We don't want to spawn more than one wait thread per command buffer.
327
       * If this job also requires a wait for events, we will do the wait here.
328
       */
329
      VkResult result = queue_submit_job(queue, pjob, info->sem_wait, NULL);
330
      if (result == VK_NOT_READY) {
331
         while (!check_wait_events_complete(pjob)) {
332
            usleep(wait_interval_ms * 1000);
333
         }
334
         result = VK_SUCCESS;
335
      }
336

337
      if (result != VK_SUCCESS) {
338
         fprintf(stderr, "Wait thread job execution failed.\n");
339
         goto done;
340
      }
341
   }
342

343
done:
344
   wait_thread_finish(queue, pthread_self());
345
   return NULL;
346
}
347

348
static VkResult
349
spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
350

351
{
352
   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
353
   assert(job->cmd_buffer);
354
   assert(wait_thread != NULL);
355

356
   if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
357
      return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);
358

359
   return VK_NOT_READY;
360
}
361

362
static VkResult
363
handle_wait_events_cpu_job(struct v3dv_job *job,
364
                           bool sem_wait,
365
                           pthread_t *wait_thread)
366
{
367
   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
368
   struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
369

370
   /* If all events are signaled then we are done and can continue submitting
371
    * the rest of the command buffer normally.
372
    */
373
   if (check_wait_events_complete(job))
374
      return VK_SUCCESS;
375

376
   /* Otherwise, we put the rest of the command buffer on a wait thread until
377
    * all events are signaled. We only spawn a new thread on the first
378
    * wait job we see for a command buffer, any additional wait jobs in the
379
    * same command buffer will run in that same wait thread and will get here
380
    * with a NULL wait_thread pointer.
381
    *
382
    * Also, whether we spawn a wait thread or not, we always return
383
    * VK_NOT_READY (unless an error happened), so we stop trying to submit
384
    * any jobs in the same command buffer after the wait job. The wait thread
385
    * will attempt to submit them after the wait completes.
386
    */
387
   info->sem_wait = sem_wait;
388
   if (wait_thread)
389
      return spawn_event_wait_thread(job, wait_thread);
390
   else
391
      return VK_NOT_READY;
392
}
393

394
static VkResult
395
handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
396
{
397
   assert(job->type == V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE);
398
   struct v3dv_copy_buffer_to_image_cpu_job_info *info =
399
      &job->cpu.copy_buffer_to_image;
400

401
   /* Wait for all GPU work to finish first, since we may be accessing
402
    * the BOs involved in the operation.
403
    */
404
   v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
405

406
   /* Map BOs */
407
   struct v3dv_bo *dst_bo = info->image->mem->bo;
408
   assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
409
   if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
410
      return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
411
   void *dst_ptr = dst_bo->map;
412

413
   struct v3dv_bo *src_bo = info->buffer->mem->bo;
414
   assert(!src_bo->map || src_bo->map_size == src_bo->size);
415
   if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
416
      return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
417
   void *src_ptr = src_bo->map;
418

419
   const struct v3d_resource_slice *slice =
420
      &info->image->slices[info->mip_level];
421

422
   const struct pipe_box box = {
423
      info->image_offset.x, info->image_offset.y, info->base_layer,
424
      info->image_extent.width, info->image_extent.height, info->layer_count,
425
   };
426

427
   /* Copy each layer */
428
   for (uint32_t i = 0; i < info->layer_count; i++) {
429
      const uint32_t dst_offset =
430
         v3dv_layer_offset(info->image, info->mip_level, info->base_layer + i);
431
      const uint32_t src_offset =
432
         info->buffer->mem_offset + info->buffer_offset +
433
         info->buffer_layer_stride * i;
434
      v3d_store_tiled_image(
435
         dst_ptr + dst_offset, slice->stride,
436
         src_ptr + src_offset, info->buffer_stride,
437
         slice->tiling, info->image->cpp, slice->padded_height, &box);
438
   }
439

440
   return VK_SUCCESS;
441
}
442

443
static VkResult
444
handle_timestamp_query_cpu_job(struct v3dv_job *job)
445
{
446
   assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
447
   struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
448

449
   /* Wait for completion of all work queued before the timestamp query */
450
   v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
451

452
   /* Compute timestamp */
453
   struct timespec t;
454
   clock_gettime(CLOCK_MONOTONIC, &t);
455
   assert(info->query < info->pool->query_count);
456
   struct v3dv_query *query = &info->pool->queries[info->query];
457
   query->maybe_available = true;
458
   query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
459

460
   return VK_SUCCESS;
461
}
462

463
static VkResult
464
handle_csd_job(struct v3dv_queue *queue,
465
               struct v3dv_job *job,
466
               bool do_sem_wait);
467

468
static VkResult
469
handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
470
                            struct v3dv_job *job,
471
                            bool do_sem_wait)
472
{
473
   assert(job->type == V3DV_JOB_TYPE_CPU_CSD_INDIRECT);
474
   struct v3dv_csd_indirect_cpu_job_info *info = &job->cpu.csd_indirect;
475
   assert(info->csd_job);
476

477
   /* Make sure the GPU is no longer using the indirect buffer*/
478
   assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
479
   v3dv_bo_wait(queue->device, info->buffer->mem->bo, PIPE_TIMEOUT_INFINITE);
480

481
   /* Map the indirect buffer and read the dispatch parameters */
482
   assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
483
   struct v3dv_bo *bo = info->buffer->mem->bo;
484
   if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
485
      return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
486
   assert(bo->map);
487

488
   const uint32_t offset = info->buffer->mem_offset + info->offset;
489
   const uint32_t *group_counts = (uint32_t *) (bo->map + offset);
490
   if (group_counts[0] == 0 || group_counts[1] == 0|| group_counts[2] == 0)
491
      return VK_SUCCESS;
492

493
   if (memcmp(group_counts, info->csd_job->csd.wg_count,
494
              sizeof(info->csd_job->csd.wg_count)) != 0) {
495
      v3dv_cmd_buffer_rewrite_indirect_csd_job(info, group_counts);
496
   }
497

498
   handle_csd_job(queue, info->csd_job, do_sem_wait);
499

500
   return VK_SUCCESS;
501
}
502

503
static VkResult
504
process_semaphores_to_signal(struct v3dv_device *device,
505
                             uint32_t count, const VkSemaphore *sems)
506
{
507
   if (count == 0)
508
      return VK_SUCCESS;
509

510
   int render_fd = device->pdevice->render_fd;
511

512
   int fd;
513
   mtx_lock(&device->mutex);
514
   drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
515
   mtx_unlock(&device->mutex);
516
   if (fd == -1)
517
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
518

519
   VkResult result = VK_SUCCESS;
520
   for (uint32_t i = 0; i < count; i++) {
521
      struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
522

523
      int ret;
524
      if (!sem->temp_sync)
525
         ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);
526
      else
527
         ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd);
528

529
      if (ret) {
530
         result = VK_ERROR_OUT_OF_HOST_MEMORY;
531
         break;
532
      }
533
   }
534

535
   assert(fd >= 0);
536
   close(fd);
537

538
   return result;
539
}
540

541
static VkResult
542
process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
543
{
544
   if (_fence == VK_NULL_HANDLE)
545
      return VK_SUCCESS;
546

547
   struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
548

549
   int render_fd = device->pdevice->render_fd;
550

551
   int fd;
552
   mtx_lock(&device->mutex);
553
   drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
554
   mtx_unlock(&device->mutex);
555
   if (fd == -1)
556
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
557

558
   int ret;
559
   if (!fence->temp_sync)
560
      ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);
561
   else
562
      ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd);
563

564
   assert(fd >= 0);
565
   close(fd);
566

567
   return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;
568
}
569

570
static VkResult
571
handle_cl_job(struct v3dv_queue *queue,
572
              struct v3dv_job *job,
573
              bool do_sem_wait)
574
{
575
   struct v3dv_device *device = queue->device;
576

577
   struct drm_v3d_submit_cl submit;
578

579
   /* Sanity check: we should only flag a bcl sync on a job that needs to be
580
    * serialized.
581
    */
582
   assert(job->serialize || !job->needs_bcl_sync);
583

584
   /* We expect to have just one RCL per job which should fit in just one BO.
585
    * Our BCL, could chain multiple BOS together though.
586
    */
587
   assert(list_length(&job->rcl.bo_list) == 1);
588
   assert(list_length(&job->bcl.bo_list) >= 1);
589
   struct v3dv_bo *bcl_fist_bo =
590
      list_first_entry(&job->bcl.bo_list, struct v3dv_bo, list_link);
591
   submit.bcl_start = bcl_fist_bo->offset;
592
   submit.bcl_end = job->bcl.bo->offset + v3dv_cl_offset(&job->bcl);
593
   submit.rcl_start = job->rcl.bo->offset;
594
   submit.rcl_end = job->rcl.bo->offset + v3dv_cl_offset(&job->rcl);
595

596
   submit.qma = job->tile_alloc->offset;
597
   submit.qms = job->tile_alloc->size;
598
   submit.qts = job->tile_state->offset;
599

600
   submit.flags = 0;
601
   if (job->tmu_dirty_rcl)
602
      submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
603

604
   submit.bo_handle_count = job->bo_count;
605
   uint32_t *bo_handles =
606
      (uint32_t *) malloc(sizeof(uint32_t) * submit.bo_handle_count);
607
   uint32_t bo_idx = 0;
608
   set_foreach(job->bos, entry) {
609
      struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
610
      bo_handles[bo_idx++] = bo->handle;
611
   }
612
   assert(bo_idx == submit.bo_handle_count);
613
   submit.bo_handles = (uintptr_t)(void *)bo_handles;
614

615
   /* We need a binning sync if we are waiting on a sempahore (do_sem_wait) or
616
    * if the job comes after a pipeline barrier than involves geometry stages
617
    * (needs_bcl_sync).
618
    *
619
    * We need a render sync if the job doesn't need a binning sync but has
620
    * still been flagged for serialization. It should be noted that RCL jobs
621
    * don't start until the previous RCL job has finished so we don't really
622
    * need to add a fence for those, however, we might need to wait on a CSD or
623
    * TFU job, which are not automatically serialized with CL jobs.
624
    *
625
    * FIXME: for now, if we are asked to wait on any semaphores, we just wait
626
    * on the last job we submitted. In the future we might want to pass the
627
    * actual syncobj of the wait semaphores so we don't block on the last RCL
628
    * if we only need to wait for a previous CSD or TFU, for example, but
629
    * we would have to extend our kernel interface to support the case where
630
    * we have more than one semaphore to wait on.
631
    */
632
   const bool needs_bcl_sync = do_sem_wait || job->needs_bcl_sync;
633
   const bool needs_rcl_sync = job->serialize && !needs_bcl_sync;
634

635
   mtx_lock(&queue->device->mutex);
636
   submit.in_sync_bcl = needs_bcl_sync ? device->last_job_sync : 0;
637
   submit.in_sync_rcl = needs_rcl_sync ? device->last_job_sync : 0;
638
   submit.out_sync = device->last_job_sync;
639
   v3dv_clif_dump(device, job, &submit);
640
   int ret = v3dv_ioctl(device->pdevice->render_fd,
641
                        DRM_IOCTL_V3D_SUBMIT_CL, &submit);
642
   mtx_unlock(&queue->device->mutex);
643

644
   static bool warned = false;
645
   if (ret && !warned) {
646
      fprintf(stderr, "Draw call returned %s. Expect corruption.\n",
647
              strerror(errno));
648
      warned = true;
649
   }
650

651
   free(bo_handles);
652

653
   if (ret)
654
      return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
655

656
   return VK_SUCCESS;
657
}
658

659
static VkResult
660
handle_tfu_job(struct v3dv_queue *queue,
661
               struct v3dv_job *job,
662
               bool do_sem_wait)
663
{
664
   struct v3dv_device *device = queue->device;
665

666
   const bool needs_sync = do_sem_wait || job->serialize;
667

668
   mtx_lock(&device->mutex);
669
   job->tfu.in_sync = needs_sync ? device->last_job_sync : 0;
670
   job->tfu.out_sync = device->last_job_sync;
671
   int ret = v3dv_ioctl(device->pdevice->render_fd,
672
                        DRM_IOCTL_V3D_SUBMIT_TFU, &job->tfu);
673
   mtx_unlock(&device->mutex);
674

675
   if (ret != 0) {
676
      fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
677
      return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
678
   }
679

680
   return VK_SUCCESS;
681
}
682

683
static VkResult
684
handle_csd_job(struct v3dv_queue *queue,
685
               struct v3dv_job *job,
686
               bool do_sem_wait)
687
{
688
   struct v3dv_device *device = queue->device;
689

690
   struct drm_v3d_submit_csd *submit = &job->csd.submit;
691

692
   submit->bo_handle_count = job->bo_count;
693
   uint32_t *bo_handles =
694
      (uint32_t *) malloc(sizeof(uint32_t) * MAX2(4, submit->bo_handle_count * 2));
695
   uint32_t bo_idx = 0;
696
   set_foreach(job->bos, entry) {
697
      struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
698
      bo_handles[bo_idx++] = bo->handle;
699
   }
700
   assert(bo_idx == submit->bo_handle_count);
701
   submit->bo_handles = (uintptr_t)(void *)bo_handles;
702

703
   const bool needs_sync = do_sem_wait || job->serialize;
704

705
   mtx_lock(&queue->device->mutex);
706
   submit->in_sync = needs_sync ? device->last_job_sync : 0;
707
   submit->out_sync = device->last_job_sync;
708
   int ret = v3dv_ioctl(device->pdevice->render_fd,
709
                        DRM_IOCTL_V3D_SUBMIT_CSD, submit);
710
   mtx_unlock(&queue->device->mutex);
711

712
   static bool warned = false;
713
   if (ret && !warned) {
714
      fprintf(stderr, "Compute dispatch returned %s. Expect corruption.\n",
715
              strerror(errno));
716
      warned = true;
717
   }
718

719
   free(bo_handles);
720

721
   if (ret)
722
      return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
723

724
   return VK_SUCCESS;
725
}
726

727
static VkResult
728
queue_submit_job(struct v3dv_queue *queue,
729
                 struct v3dv_job *job,
730
                 bool do_sem_wait,
731
                 pthread_t *wait_thread)
732
{
733
   assert(job);
734

735
   switch (job->type) {
736
   case V3DV_JOB_TYPE_GPU_CL:
737
      return handle_cl_job(queue, job, do_sem_wait);
738
   case V3DV_JOB_TYPE_GPU_TFU:
739
      return handle_tfu_job(queue, job, do_sem_wait);
740
   case V3DV_JOB_TYPE_GPU_CSD:
741
      return handle_csd_job(queue, job, do_sem_wait);
742
   case V3DV_JOB_TYPE_CPU_RESET_QUERIES:
743
      return handle_reset_query_cpu_job(job);
744
   case V3DV_JOB_TYPE_CPU_END_QUERY:
745
      return handle_end_query_cpu_job(job);
746
   case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
747
      return handle_copy_query_results_cpu_job(job);
748
   case V3DV_JOB_TYPE_CPU_SET_EVENT:
749
      return handle_set_event_cpu_job(job, wait_thread != NULL);
750
   case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
751
      return handle_wait_events_cpu_job(job, do_sem_wait, wait_thread);
752
   case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
753
      return handle_copy_buffer_to_image_cpu_job(job);
754
   case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
755
      return handle_csd_indirect_cpu_job(queue, job, do_sem_wait);
756
   case V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY:
757
      return handle_timestamp_query_cpu_job(job);
758
   default:
759
      unreachable("Unhandled job type");
760
   }
761
}
762

763
static VkResult
764
queue_create_noop_job(struct v3dv_queue *queue)
765
{
766
   struct v3dv_device *device = queue->device;
767
   queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
768
                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
769
   if (!queue->noop_job)
770
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
771
   v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
772

773
   v3dv_X(device, job_emit_noop)(queue->noop_job);
774

775
   return VK_SUCCESS;
776
}
777

778
static VkResult
779
queue_submit_noop_job(struct v3dv_queue *queue, const VkSubmitInfo *pSubmit)
780
{
781
   /* VkQueue host access is externally synchronized so we don't need to lock
782
    * here for the static variable.
783
    */
784
   if (!queue->noop_job) {
785
      VkResult result = queue_create_noop_job(queue);
786
      if (result != VK_SUCCESS)
787
         return result;
788
   }
789

790
   return queue_submit_job(queue, queue->noop_job,
791
                           pSubmit->waitSemaphoreCount > 0, NULL);
792
}
793

794
static VkResult
795
queue_submit_cmd_buffer(struct v3dv_queue *queue,
796
                        struct v3dv_cmd_buffer *cmd_buffer,
797
                        const VkSubmitInfo *pSubmit,
798
                        pthread_t *wait_thread)
799
{
800
   assert(cmd_buffer);
801
   assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_EXECUTABLE);
802

803
   if (list_is_empty(&cmd_buffer->jobs))
804
      return queue_submit_noop_job(queue, pSubmit);
805

806
   list_for_each_entry_safe(struct v3dv_job, job,
807
                            &cmd_buffer->jobs, list_link) {
808
      VkResult result = queue_submit_job(queue, job,
809
                                         pSubmit->waitSemaphoreCount > 0,
810
                                         wait_thread);
811
      if (result != VK_SUCCESS)
812
         return result;
813
   }
814

815
   return VK_SUCCESS;
816
}
817

818
static void
819
add_wait_thread_to_list(struct v3dv_device *device,
820
                        pthread_t thread,
821
                        struct v3dv_queue_submit_wait_info **wait_info)
822
{
823
   /* If this is the first time we spawn a wait thread for this queue
824
    * submission create a v3dv_queue_submit_wait_info to track this and
825
    * any other threads in the same submission and add it to the global list
826
    * in the queue.
827
    */
828
   if (*wait_info == NULL) {
829
      *wait_info =
830
         vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_queue_submit_wait_info), 8,
831
                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
832
      (*wait_info)->device = device;
833
   }
834

835
   /* And add the thread to the list of wait threads for this submission */
836
   const uint32_t thread_idx = (*wait_info)->wait_thread_count;
837
   assert(thread_idx < 16);
838
   (*wait_info)->wait_threads[thread_idx].thread = thread;
839
   (*wait_info)->wait_threads[thread_idx].finished = false;
840
   (*wait_info)->wait_thread_count++;
841
}
842

843
static void
844
add_signal_semaphores_to_wait_list(struct v3dv_device *device,
845
                                   const VkSubmitInfo *pSubmit,
846
                                   struct v3dv_queue_submit_wait_info *wait_info)
847
{
848
   assert(wait_info);
849

850
   if (pSubmit->signalSemaphoreCount == 0)
851
      return;
852

853
   /* FIXME: We put all the semaphores in a list and we signal all of them
854
    * together from the submit master thread when the last wait thread in the
855
    * submit completes. We could do better though: group the semaphores per
856
    * submit and signal them as soon as all wait threads for a particular
857
    * submit completes. Not sure if the extra work would be worth it though,
858
    * since we only spawn waith threads for event waits and only when the
859
    * event if set from the host after the queue submission.
860
    */
861

862
   /* Check the size of the current semaphore list */
863
   const uint32_t prev_count = wait_info->signal_semaphore_count;
864
   const uint32_t prev_alloc_size = prev_count * sizeof(VkSemaphore);
865
   VkSemaphore *prev_list = wait_info->signal_semaphores;
866

867
   /* Resize the list to hold the additional semaphores */
868
   const uint32_t extra_alloc_size =
869
      pSubmit->signalSemaphoreCount * sizeof(VkSemaphore);
870
   wait_info->signal_semaphore_count += pSubmit->signalSemaphoreCount;
871
   wait_info->signal_semaphores =
872
      vk_alloc(&device->vk.alloc, prev_alloc_size + extra_alloc_size, 8,
873
               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
874

875
   /* Copy the old list to the new allocation and free the old list */
876
   if (prev_count > 0) {
877
      memcpy(wait_info->signal_semaphores, prev_list, prev_alloc_size);
878
      vk_free(&device->vk.alloc, prev_list);
879
   }
880

881
   /* Add the new semaphores to the list */
882
   memcpy(wait_info->signal_semaphores + prev_count,
883
          pSubmit->pSignalSemaphores, extra_alloc_size);
884
}
885

886
static VkResult
887
queue_submit_cmd_buffer_batch(struct v3dv_queue *queue,
888
                              const VkSubmitInfo *pSubmit,
889
                              struct v3dv_queue_submit_wait_info **wait_info)
890
{
891
   VkResult result = VK_SUCCESS;
892
   bool has_wait_threads = false;
893

894
   /* Even if we don't have any actual work to submit we still need to wait
895
    * on the wait semaphores and signal the signal semaphores and fence, so
896
    * in this scenario we just submit a trivial no-op job so we don't have
897
    * to do anything special, it should not be a common case anyway.
898
    */
899
   if (pSubmit->commandBufferCount == 0) {
900
      result = queue_submit_noop_job(queue, pSubmit);
901
   } else {
902
      for (uint32_t i = 0; i < pSubmit->commandBufferCount; i++) {
903
         pthread_t wait_thread;
904
         struct v3dv_cmd_buffer *cmd_buffer =
905
            v3dv_cmd_buffer_from_handle(pSubmit->pCommandBuffers[i]);
906
         result = queue_submit_cmd_buffer(queue, cmd_buffer, pSubmit,
907
                                          &wait_thread);
908

909
         /* We get VK_NOT_READY if we had to spawn a wait thread for the
910
          * command buffer. In that scenario, we want to continue submitting
911
          * any pending command buffers in the batch, but we don't want to
912
          * process any signal semaphores for the batch until we know we have
913
          * submitted every job for every command buffer in the batch.
914
          */
915
         if (result == VK_NOT_READY) {
916
            result = VK_SUCCESS;
917
            add_wait_thread_to_list(queue->device, wait_thread, wait_info);
918
            has_wait_threads = true;
919
         }
920

921
         if (result != VK_SUCCESS)
922
            break;
923
      }
924
   }
925

926
   if (result != VK_SUCCESS)
927
      return result;
928

929
   /* If had to emit any wait threads in this submit we need to wait for all
930
    * of them to complete before we can signal any semaphores.
931
    */
932
   if (!has_wait_threads) {
933
      return process_semaphores_to_signal(queue->device,
934
                                          pSubmit->signalSemaphoreCount,
935
                                          pSubmit->pSignalSemaphores);
936
   } else {
937
      assert(*wait_info);
938
      add_signal_semaphores_to_wait_list(queue->device, pSubmit, *wait_info);
939
      return VK_NOT_READY;
940
   }
941
}
942

943
static void *
944
master_wait_thread_func(void *_wait_info)
945
{
946
   struct v3dv_queue_submit_wait_info *wait_info =
947
      (struct v3dv_queue_submit_wait_info *) _wait_info;
948

949
   struct v3dv_queue *queue = &wait_info->device->queue;
950

951
   /* Wait for all command buffer wait threads to complete */
952
   for (uint32_t i = 0; i < wait_info->wait_thread_count; i++) {
953
      int res = pthread_join(wait_info->wait_threads[i].thread, NULL);
954
      if (res != 0)
955
         fprintf(stderr, "Wait thread failed to join.\n");
956
   }
957

958
   /* Signal semaphores and fences */
959
   VkResult result;
960
   result = process_semaphores_to_signal(wait_info->device,
961
                                         wait_info->signal_semaphore_count,
962
                                         wait_info->signal_semaphores);
963
   if (result != VK_SUCCESS)
964
      fprintf(stderr, "Wait thread semaphore signaling failed.");
965

966
   result = process_fence_to_signal(wait_info->device, wait_info->fence);
967
   if (result != VK_SUCCESS)
968
      fprintf(stderr, "Wait thread fence signaling failed.");
969

970
   /* Release wait_info */
971
   mtx_lock(&queue->mutex);
972
   list_del(&wait_info->list_link);
973
   mtx_unlock(&queue->mutex);
974

975
   vk_free(&wait_info->device->vk.alloc, wait_info->signal_semaphores);
976
   vk_free(&wait_info->device->vk.alloc, wait_info);
977

978
   return NULL;
979
}
980

981

982
static VkResult
983
spawn_master_wait_thread(struct v3dv_queue *queue,
984
                         struct v3dv_queue_submit_wait_info *wait_info)
985

986
{
987
   VkResult result = VK_SUCCESS;
988

989
   mtx_lock(&queue->mutex);
990
   if (pthread_create(&wait_info->master_wait_thread, NULL,
991
                      master_wait_thread_func, wait_info)) {
992
      result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
993
      goto done;
994
   }
995

996
   list_addtail(&wait_info->list_link, &queue->submit_wait_list);
997

998
done:
999
   mtx_unlock(&queue->mutex);
1000
   return result;
1001
}
1002

1003
VKAPI_ATTR VkResult VKAPI_CALL
1004
v3dv_QueueSubmit(VkQueue _queue,
1005
                 uint32_t submitCount,
1006
                 const VkSubmitInfo* pSubmits,
1007
                 VkFence fence)
1008
{
1009
   V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1010

1011
   struct v3dv_queue_submit_wait_info *wait_info = NULL;
1012

1013
   VkResult result = VK_SUCCESS;
1014
   for (uint32_t i = 0; i < submitCount; i++) {
1015
      result = queue_submit_cmd_buffer_batch(queue, &pSubmits[i], &wait_info);
1016
      if (result != VK_SUCCESS && result != VK_NOT_READY)
1017
         goto done;
1018
   }
1019

1020
   if (!wait_info) {
1021
      assert(result != VK_NOT_READY);
1022
      result = process_fence_to_signal(queue->device, fence);
1023
      goto done;
1024
   }
1025

1026
   /* We emitted wait threads, so we have to spwan a master thread for this
1027
    * queue submission that waits for all other threads to complete and then
1028
    * will signal any semaphores and fences.
1029
    */
1030
   assert(wait_info);
1031
   wait_info->fence = fence;
1032
   result = spawn_master_wait_thread(queue, wait_info);
1033

1034
done:
1035
   return result;
1036
}
1037

1038
static void
1039
destroy_syncobj(uint32_t device_fd, uint32_t *sync)
1040
{
1041
   assert(sync);
1042
   drmSyncobjDestroy(device_fd, *sync);
1043
   *sync = 0;
1044
}
1045

1046
VKAPI_ATTR VkResult VKAPI_CALL
1047
v3dv_CreateSemaphore(VkDevice _device,
1048
                     const VkSemaphoreCreateInfo *pCreateInfo,
1049
                     const VkAllocationCallbacks *pAllocator,
1050
                     VkSemaphore *pSemaphore)
1051
{
1052
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1053

1054
   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
1055

1056
   struct v3dv_semaphore *sem =
1057
      vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore),
1058
                       VK_OBJECT_TYPE_SEMAPHORE);
1059
   if (sem == NULL)
1060
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1061

1062
   int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync);
1063
   if (ret) {
1064
      vk_object_free(&device->vk, pAllocator, sem);
1065
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1066
   }
1067

1068
   *pSemaphore = v3dv_semaphore_to_handle(sem);
1069

1070
   return VK_SUCCESS;
1071
}
1072

1073
VKAPI_ATTR void VKAPI_CALL
1074
v3dv_GetPhysicalDeviceExternalSemaphoreProperties(
1075
    VkPhysicalDevice physicalDevice,
1076
    const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
1077
    VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
1078
{
1079
   switch (pExternalSemaphoreInfo->handleType) {
1080
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1081
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
1082
      pExternalSemaphoreProperties->exportFromImportedHandleTypes =
1083
         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1084
         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1085
      pExternalSemaphoreProperties->compatibleHandleTypes =
1086
         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
1087
         VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
1088

1089
      /* FIXME: we can't import external semaphores until we improve the kernel
1090
       * submit interface to handle multiple in syncobjs, because once we have
1091
       * an imported semaphore in our list of semaphores to wait on, we can no
1092
       * longer use the workaround of waiting on the last syncobj fence produced
1093
       * from the device, since the imported semaphore may not (and in fact, it
1094
       * would typically not) have been produced from same device.
1095
       *
1096
       * This behavior is exercised via dEQP-VK.synchronization.cross_instance.*.
1097
       * Particularly, this test:
1098
       * dEQP-VK.synchronization.cross_instance.dedicated.
1099
       * write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd
1100
       * fails consistently because of this, so it'll be a good reference to
1101
       * verify the implementation when the kernel bits are in place.
1102
       */
1103
      pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1104

1105
      /* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties
1106
       * for details on why we can't export to SYNC_FD.
1107
       */
1108
      if (pExternalSemaphoreInfo->handleType !=
1109
          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
1110
         pExternalSemaphoreProperties->externalSemaphoreFeatures |=
1111
            VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;
1112
      }
1113
      break;
1114
   default:
1115
      pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
1116
      pExternalSemaphoreProperties->compatibleHandleTypes = 0;
1117
      pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
1118
      break;
1119
   }
1120
}
1121

1122
VKAPI_ATTR VkResult VKAPI_CALL
1123
v3dv_ImportSemaphoreFdKHR(
1124
   VkDevice _device,
1125
   const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
1126
{
1127
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1128
   V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
1129

1130
   assert(pImportSemaphoreFdInfo->sType ==
1131
          VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR);
1132

1133
   int fd = pImportSemaphoreFdInfo->fd;
1134
   int render_fd = device->pdevice->render_fd;
1135

1136
   bool is_temporary =
1137
      pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||
1138
      (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT);
1139

1140
   uint32_t new_sync;
1141
   switch (pImportSemaphoreFdInfo->handleType) {
1142
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1143
      /* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the
1144
       *  special value -1 for fd is treated like a valid sync file descriptor
1145
       *  referring to an object that has already signaled. The import
1146
       *  operation will succeed and the VkSemaphore will have a temporarily
1147
       *  imported payload as if a valid file descriptor had been provided."
1148
       */
1149
      unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1150
      if (drmSyncobjCreate(render_fd, flags, &new_sync))
1151
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1152

1153
      if (fd != -1) {
1154
         if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1155
            drmSyncobjDestroy(render_fd, new_sync);
1156
            return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1157
         }
1158
      }
1159
      break;
1160
   }
1161
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1162
      if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1163
         return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1164
      break;
1165
   }
1166
   default:
1167
      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1168
   }
1169

1170
   destroy_syncobj(render_fd, &sem->temp_sync);
1171
   if (is_temporary) {
1172
      sem->temp_sync = new_sync;
1173
   } else {
1174
      destroy_syncobj(render_fd, &sem->sync);
1175
      sem->sync = new_sync;
1176
   }
1177

1178
   /* From the Vulkan 1.0.53 spec:
1179
    *
1180
    *    "Importing a semaphore payload from a file descriptor transfers
1181
    *     ownership of the file descriptor from the application to the
1182
    *     Vulkan implementation. The application must not perform any
1183
    *     operations on the file descriptor after a successful import."
1184
    *
1185
    * If the import fails, we leave the file descriptor open.
1186
    */
1187
   if (fd != -1)
1188
      close(fd);
1189

1190
   return VK_SUCCESS;
1191
}
1192

1193
VKAPI_ATTR VkResult VKAPI_CALL
1194
v3dv_GetSemaphoreFdKHR(VkDevice _device,
1195
                       const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
1196
                       int *pFd)
1197
{
1198
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1199
   V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore);
1200

1201
   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
1202

1203
   *pFd = -1;
1204
   int render_fd = device->pdevice->render_fd;
1205
   switch (pGetFdInfo->handleType) {
1206
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
1207
      drmSyncobjExportSyncFile(render_fd, sem->sync, pFd);
1208
      if (*pFd == -1)
1209
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1210
      break;
1211
   case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
1212
      drmSyncobjHandleToFD(render_fd, sem->sync, pFd);
1213
      if (*pFd == -1)
1214
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1215
      break;
1216
   }
1217
   default:
1218
      unreachable("Unsupported external semaphore handle type");
1219
   }
1220

1221
   return VK_SUCCESS;
1222
}
1223

1224
VKAPI_ATTR void VKAPI_CALL
1225
v3dv_DestroySemaphore(VkDevice _device,
1226
                      VkSemaphore semaphore,
1227
                      const VkAllocationCallbacks *pAllocator)
1228
{
1229
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1230
   V3DV_FROM_HANDLE(v3dv_semaphore, sem, semaphore);
1231

1232
   if (sem == NULL)
1233
      return;
1234

1235
   destroy_syncobj(device->pdevice->render_fd, &sem->sync);
1236
   destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync);
1237

1238
   vk_object_free(&device->vk, pAllocator, sem);
1239
}
1240

1241
VKAPI_ATTR VkResult VKAPI_CALL
1242
v3dv_CreateFence(VkDevice _device,
1243
                 const VkFenceCreateInfo *pCreateInfo,
1244
                 const VkAllocationCallbacks *pAllocator,
1245
                 VkFence *pFence)
1246
{
1247
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1248

1249
   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1250

1251
   struct v3dv_fence *fence =
1252
      vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence),
1253
                       VK_OBJECT_TYPE_FENCE);
1254
   if (fence == NULL)
1255
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1256

1257
   unsigned flags = 0;
1258
   if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1259
      flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1260
   int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync);
1261
   if (ret) {
1262
      vk_object_free(&device->vk, pAllocator, fence);
1263
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1264
   }
1265

1266
   *pFence = v3dv_fence_to_handle(fence);
1267

1268
   return VK_SUCCESS;
1269
}
1270

1271
VKAPI_ATTR void VKAPI_CALL
1272
v3dv_GetPhysicalDeviceExternalFenceProperties(
1273
    VkPhysicalDevice physicalDevice,
1274
    const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
1275
    VkExternalFenceProperties *pExternalFenceProperties)
1276

1277
{
1278
   switch (pExternalFenceInfo->handleType) {
1279
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1280
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1281
      pExternalFenceProperties->exportFromImportedHandleTypes =
1282
         VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1283
         VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1284
      pExternalFenceProperties->compatibleHandleTypes =
1285
         VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1286
         VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1287
      pExternalFenceProperties->externalFenceFeatures =
1288
         VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1289

1290
      /* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not
1291
       * the syncobj itself, and that fence is only created after we have
1292
       * submitted to the kernel and updated the syncobj for the fence to import
1293
       * the actual DRM fence created with the submission. Unfortunately, if the
1294
       * queue submission has a 'wait for events' we may hold any jobs after the
1295
       * wait in a user-space thread until the events are signaled, and in that
1296
       * case we don't update the out fence of the submit until the events are
1297
       * signaled and we can submit all the jobs involved with the vkQueueSubmit
1298
       * call. This means that if the applications submits with an out fence and
1299
       * a wait for events, trying to export the out fence to a SYNC_FD rigth
1300
       * after the submission and before the events are signaled will fail,
1301
       * because the actual DRM fence won't exist yet. This is not a problem
1302
       * with OPAQUE_FD because in this case we export the entire syncobj, not
1303
       * the underlying DRM fence. To fix this we need to rework our kernel
1304
       * interface to be more flexible and accept multiple in/out syncobjs so
1305
       * we can implement event waits as regular fence waits on the kernel side,
1306
       * until then, we can only reliably export OPAQUE_FD.
1307
       */
1308
      if (pExternalFenceInfo->handleType !=
1309
          VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
1310
         pExternalFenceProperties->externalFenceFeatures |=
1311
            VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT;
1312
      }
1313
      break;
1314
   default:
1315
      pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1316
      pExternalFenceProperties->compatibleHandleTypes = 0;
1317
      pExternalFenceProperties->externalFenceFeatures = 0;
1318
      break;
1319
   }
1320
}
1321

1322
VKAPI_ATTR VkResult VKAPI_CALL
1323
v3dv_ImportFenceFdKHR(VkDevice _device,
1324
                      const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
1325
{
1326
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1327
   V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence);
1328

1329
   assert(pImportFenceFdInfo->sType ==
1330
          VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
1331

1332
   int fd = pImportFenceFdInfo->fd;
1333
   int render_fd = device->pdevice->render_fd;
1334

1335
   bool is_temporary =
1336
      pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT ||
1337
      (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT);
1338

1339
   uint32_t new_sync;
1340
   switch (pImportFenceFdInfo->handleType) {
1341
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1342
      /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
1343
       *  special value -1 for fd is treated like a valid sync file descriptor
1344
       *  referring to an object that has already signaled. The import
1345
       *  operation will succeed and the VkFence will have a temporarily
1346
       *  imported payload as if a valid file descriptor had been provided."
1347
       */
1348
      unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
1349
      if (drmSyncobjCreate(render_fd, flags, &new_sync))
1350
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1351

1352
      if (fd != -1) {
1353
         if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
1354
            drmSyncobjDestroy(render_fd, new_sync);
1355
            return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1356
         }
1357
      }
1358
      break;
1359
   }
1360
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
1361
      if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
1362
         return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1363
      break;
1364
   }
1365
   default:
1366
      return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1367
   }
1368

1369
   destroy_syncobj(render_fd, &fence->temp_sync);
1370
   if (is_temporary) {
1371
      fence->temp_sync = new_sync;
1372
   } else {
1373
      destroy_syncobj(render_fd, &fence->sync);
1374
      fence->sync = new_sync;
1375
   }
1376

1377
   /* From the Vulkan 1.0.53 spec:
1378
    *
1379
    *    "Importing a fence payload from a file descriptor transfers
1380
    *     ownership of the file descriptor from the application to the
1381
    *     Vulkan implementation. The application must not perform any
1382
    *     operations on the file descriptor after a successful import."
1383
    *
1384
    * If the import fails, we leave the file descriptor open.
1385
    */
1386
   if (fd != -1)
1387
      close(fd);
1388

1389
   return VK_SUCCESS;
1390
}
1391

1392
VKAPI_ATTR void VKAPI_CALL
1393
v3dv_DestroyFence(VkDevice _device,
1394
                  VkFence _fence,
1395
                  const VkAllocationCallbacks *pAllocator)
1396
{
1397
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1398
   V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1399

1400
   if (fence == NULL)
1401
      return;
1402

1403
   destroy_syncobj(device->pdevice->render_fd, &fence->sync);
1404
   destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync);
1405

1406
   vk_object_free(&device->vk, pAllocator, fence);
1407
}
1408

1409
VKAPI_ATTR VkResult VKAPI_CALL
1410
v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
1411
{
1412
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1413
   V3DV_FROM_HANDLE(v3dv_fence, fence, _fence);
1414

1415
   int ret = drmSyncobjWait(device->pdevice->render_fd, &fence->sync, 1,
1416
                            0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
1417
   if (ret == -ETIME)
1418
      return VK_NOT_READY;
1419
   else if (ret)
1420
      return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1421
   return VK_SUCCESS;
1422
}
1423

1424
VKAPI_ATTR VkResult VKAPI_CALL
1425
v3dv_GetFenceFdKHR(VkDevice _device,
1426
                   const VkFenceGetFdInfoKHR *pGetFdInfo,
1427
                   int *pFd)
1428
{
1429
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1430
   V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence);
1431

1432
   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
1433

1434
   *pFd = -1;
1435
   int render_fd = device->pdevice->render_fd;
1436
   switch (pGetFdInfo->handleType) {
1437
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
1438
      drmSyncobjExportSyncFile(render_fd, fence->sync, pFd);
1439
      if (*pFd == -1)
1440
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1441
      break;
1442
   case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1443
      drmSyncobjHandleToFD(render_fd, fence->sync, pFd);
1444
      if (*pFd == -1)
1445
         return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1446
      break;
1447
   }
1448
   default:
1449
      unreachable("Unsupported external fence handle type");
1450
   }
1451

1452
   return VK_SUCCESS;
1453
}
1454

1455
VKAPI_ATTR VkResult VKAPI_CALL
1456
v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1457
{
1458
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1459

1460
   uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1461
                                 sizeof(*syncobjs) * fenceCount, 8,
1462
                                 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1463
   if (!syncobjs)
1464
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1465

1466
   int render_fd = device->pdevice->render_fd;
1467
   uint32_t reset_count = 0;
1468
   for (uint32_t i = 0; i < fenceCount; i++) {
1469
      struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1470
      /* From the Vulkan spec, section 'Importing Fence Payloads':
1471
       *
1472
       *    "If the import is temporary, the fence will be restored to its
1473
       *     permanent state the next time that fence is passed to
1474
       *     vkResetFences.
1475
       *
1476
       *     Note: Restoring a fence to its prior permanent payload is a
1477
       *     distinct operation from resetting a fence payload."
1478
       *
1479
       * To restore the previous state, we just need to destroy the temporary.
1480
       */
1481
      if (fence->temp_sync)
1482
         destroy_syncobj(render_fd, &fence->temp_sync);
1483
      else
1484
         syncobjs[reset_count++] = fence->sync;
1485
   }
1486

1487
   int ret = 0;
1488
   if (reset_count > 0)
1489
      ret = drmSyncobjReset(render_fd, syncobjs, reset_count);
1490

1491
   vk_free(&device->vk.alloc, syncobjs);
1492

1493
   if (ret)
1494
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1495
   return VK_SUCCESS;
1496
}
1497

1498
VKAPI_ATTR VkResult VKAPI_CALL
1499
v3dv_WaitForFences(VkDevice _device,
1500
                   uint32_t fenceCount,
1501
                   const VkFence *pFences,
1502
                   VkBool32 waitAll,
1503
                   uint64_t timeout)
1504
{
1505
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
1506

1507
   const uint64_t abs_timeout = get_absolute_timeout(timeout);
1508

1509
   uint32_t *syncobjs = vk_alloc(&device->vk.alloc,
1510
                                 sizeof(*syncobjs) * fenceCount, 8,
1511
                                 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1512
   if (!syncobjs)
1513
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1514

1515
   for (uint32_t i = 0; i < fenceCount; i++) {
1516
      struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
1517
      syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync;
1518
   }
1519

1520
   unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
1521
   if (waitAll)
1522
      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1523

1524
   int ret;
1525
   do {
1526
      ret = drmSyncobjWait(device->pdevice->render_fd, syncobjs, fenceCount,
1527
                           timeout, flags, NULL);
1528
   } while (ret == -ETIME && gettime_ns() < abs_timeout);
1529

1530
   vk_free(&device->vk.alloc, syncobjs);
1531

1532
   if (ret == -ETIME)
1533
      return VK_TIMEOUT;
1534
   else if (ret)
1535
      return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
1536
   return VK_SUCCESS;
1537
}
1538

1539
VKAPI_ATTR VkResult VKAPI_CALL
1540
v3dv_QueueBindSparse(VkQueue _queue,
1541
                     uint32_t bindInfoCount,
1542
                     const VkBindSparseInfo *pBindInfo,
1543
                     VkFence fence)
1544
{
1545
   V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
1546
   return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1547
}
1548

1549
Product

Resources

Company