CoCalc -- msm_ringbuffer

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/drm/msm_ringbuffer_sp.c
⁴⁵⁶⁴ views
1
/*
2
 * Copyright (C) 2018 Rob Clark <[email protected]>
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Rob Clark <[email protected]>
25
 */
26

27
#include <assert.h>
28
#include <inttypes.h>
29
#include <pthread.h>
30

31
#include "util/hash_table.h"
32
#include "util/os_file.h"
33
#include "util/slab.h"
34

35
#include "drm/freedreno_ringbuffer.h"
36
#include "msm_priv.h"
37

38
/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
39
 * by avoiding the additional tracking necessary to build cmds/relocs tables
40
 * (but still builds a bos table)
41
 */
42

43
#define INIT_SIZE 0x1000
44

45
#define SUBALLOC_SIZE (32 * 1024)
46

47
/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
48
 * instead use a condition-variable.  Note that pipe->flush() is not expected
49
 * to be a common/hot path.
50
 */
51
static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
52
static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
53

54

55
struct msm_submit_sp {
56
   struct fd_submit base;
57

58
   DECLARE_ARRAY(struct fd_bo *, bos);
59

60
   /* maps fd_bo to idx in bos table: */
61
   struct hash_table *bo_table;
62

63
   struct slab_child_pool ring_pool;
64

65
   /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
66
    * the same underlying bo)..
67
    *
68
    * We also rely on previous stateobj having been fully constructed
69
    * so we can reclaim extra space at it's end.
70
    */
71
   struct fd_ringbuffer *suballoc_ring;
72

73
   /* Flush args, potentially attached to the last submit in the list
74
    * of submits to merge:
75
    */
76
   int in_fence_fd;
77
   struct fd_submit_fence *out_fence;
78

79
   /* State for enqueued submits:
80
    */
81
   struct list_head submit_list;   /* includes this submit as last element */
82

83
   /* Used in case out_fence==NULL: */
84
   struct util_queue_fence fence;
85
};
86
FD_DEFINE_CAST(fd_submit, msm_submit_sp);
87

88
/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
89
 * and sizes.  Ie. a finalized buffer can have no more commands appended to
90
 * it.
91
 */
92
struct msm_cmd_sp {
93
   struct fd_bo *ring_bo;
94
   unsigned size;
95
};
96

97
struct msm_ringbuffer_sp {
98
   struct fd_ringbuffer base;
99

100
   /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
101
   unsigned offset;
102

103
   union {
104
      /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
105
       * this one
106
       */
107
      struct {
108
         struct fd_pipe *pipe;
109
         DECLARE_ARRAY(struct fd_bo *, reloc_bos);
110
      };
111
      /* for other cases: */
112
      struct {
113
         struct fd_submit *submit;
114
         DECLARE_ARRAY(struct msm_cmd_sp, cmds);
115
      };
116
   } u;
117

118
   struct fd_bo *ring_bo;
119
};
120
FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
121

122
static void finalize_current_cmd(struct fd_ringbuffer *ring);
123
static struct fd_ringbuffer *
124
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
125
                       enum fd_ringbuffer_flags flags);
126

127
/* add (if needed) bo to submit and return index: */
128
static uint32_t
129
msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
130
{
131
   struct msm_bo *msm_bo = to_msm_bo(bo);
132
   uint32_t idx;
133

134
   /* NOTE: it is legal to use the same bo on different threads for
135
    * different submits.  But it is not legal to use the same submit
136
    * from different threads.
137
    */
138
   idx = READ_ONCE(msm_bo->idx);
139

140
   if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
141
      uint32_t hash = _mesa_hash_pointer(bo);
142
      struct hash_entry *entry;
143

144
      entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
145
      if (entry) {
146
         /* found */
147
         idx = (uint32_t)(uintptr_t)entry->data;
148
      } else {
149
         idx = APPEND(submit, bos, fd_bo_ref(bo));
150

151
         _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
152
                                            (void *)(uintptr_t)idx);
153
      }
154
      msm_bo->idx = idx;
155
   }
156

157
   return idx;
158
}
159

160
static void
161
msm_submit_suballoc_ring_bo(struct fd_submit *submit,
162
                            struct msm_ringbuffer_sp *msm_ring, uint32_t size)
163
{
164
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
165
   unsigned suballoc_offset = 0;
166
   struct fd_bo *suballoc_bo = NULL;
167

168
   if (msm_submit->suballoc_ring) {
169
      struct msm_ringbuffer_sp *suballoc_ring =
170
         to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
171

172
      suballoc_bo = suballoc_ring->ring_bo;
173
      suballoc_offset =
174
         fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset;
175

176
      suballoc_offset = align(suballoc_offset, 0x10);
177

178
      if ((size + suballoc_offset) > suballoc_bo->size) {
179
         suballoc_bo = NULL;
180
      }
181
   }
182

183
   if (!suballoc_bo) {
184
      // TODO possibly larger size for streaming bo?
185
      msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
186
      msm_ring->offset = 0;
187
   } else {
188
      msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
189
      msm_ring->offset = suballoc_offset;
190
   }
191

192
   struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
193

194
   msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
195

196
   if (old_suballoc_ring)
197
      fd_ringbuffer_del(old_suballoc_ring);
198
}
199

200
static struct fd_ringbuffer *
201
msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
202
                             enum fd_ringbuffer_flags flags)
203
{
204
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
205
   struct msm_ringbuffer_sp *msm_ring;
206

207
   msm_ring = slab_alloc(&msm_submit->ring_pool);
208

209
   msm_ring->u.submit = submit;
210

211
   /* NOTE: needs to be before _suballoc_ring_bo() since it could
212
    * increment the refcnt of the current ring
213
    */
214
   msm_ring->base.refcnt = 1;
215

216
   if (flags & FD_RINGBUFFER_STREAMING) {
217
      msm_submit_suballoc_ring_bo(submit, msm_ring, size);
218
   } else {
219
      if (flags & FD_RINGBUFFER_GROWABLE)
220
         size = INIT_SIZE;
221

222
      msm_ring->offset = 0;
223
      msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
224
   }
225

226
   if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
227
      return NULL;
228

229
   return &msm_ring->base;
230
}
231

232
/**
233
 * Prepare submit for flush, always done synchronously.
234
 *
235
 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
236
 *    be written into it, since from the PoV of the upper level driver
237
 *    the submit is flushed, even if deferred
238
 * 2) Add cmdstream bos to bos table
239
 * 3) Update bo fences
240
 */
241
static bool
242
msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
243
                         struct fd_submit_fence *out_fence)
244
{
245
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
246
   bool has_shared = false;
247

248
   finalize_current_cmd(submit->primary);
249

250
   struct msm_ringbuffer_sp *primary =
251
      to_msm_ringbuffer_sp(submit->primary);
252

253
   for (unsigned i = 0; i < primary->u.nr_cmds; i++)
254
      msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo);
255

256
   simple_mtx_lock(&table_lock);
257
   for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
258
      fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
259
      has_shared |= msm_submit->bos[i]->shared;
260
   }
261
   simple_mtx_unlock(&table_lock);
262

263
   msm_submit->out_fence   = out_fence;
264
   msm_submit->in_fence_fd = (in_fence_fd == -1) ?
265
         -1 : os_dupfd_cloexec(in_fence_fd);
266

267
   return has_shared;
268
}
269

270
static int
271
flush_submit_list(struct list_head *submit_list)
272
{
273
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list));
274
   struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe);
275
   struct drm_msm_gem_submit req = {
276
      .flags = msm_pipe->pipe,
277
      .queueid = msm_pipe->queue_id,
278
   };
279
   int ret;
280

281
   unsigned nr_cmds = 0;
282

283
   /* Determine the number of extra cmds's from deferred submits that
284
    * we will be merging in:
285
    */
286
   foreach_submit (submit, submit_list) {
287
      assert(submit->pipe == &msm_pipe->base);
288
      nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds;
289
   }
290

291
   struct drm_msm_gem_submit_cmd cmds[nr_cmds];
292

293
   unsigned cmd_idx = 0;
294

295
   /* Build up the table of cmds, and for all but the last submit in the
296
    * list, merge their bo tables into the last submit.
297
    */
298
   foreach_submit_safe (submit, submit_list) {
299
      struct msm_ringbuffer_sp *deferred_primary =
300
         to_msm_ringbuffer_sp(submit->primary);
301

302
      for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
303
         cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
304
         cmds[cmd_idx].submit_idx =
305
               msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo);
306
         cmds[cmd_idx].submit_offset = deferred_primary->offset;
307
         cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
308
         cmds[cmd_idx].pad = 0;
309
         cmds[cmd_idx].nr_relocs = 0;
310

311
         cmd_idx++;
312
      }
313

314
      /* We are merging all the submits in the list into the last submit,
315
       * so the remainder of the loop body doesn't apply to the last submit
316
       */
317
      if (submit == last_submit(submit_list)) {
318
         DEBUG_MSG("merged %u submits", cmd_idx);
319
         break;
320
      }
321

322
      struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit);
323
      for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) {
324
         /* Note: if bo is used in both the current submit and the deferred
325
          * submit being merged, we expect to hit the fast-path as we add it
326
          * to the current submit:
327
          */
328
         msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]);
329
      }
330

331
      /* Now that the cmds/bos have been transfered over to the current submit,
332
       * we can remove the deferred submit from the list and drop it's reference
333
       */
334
      list_del(&submit->node);
335
      fd_submit_del(submit);
336
   }
337

338
   if (msm_submit->in_fence_fd != -1) {
339
      req.flags |= MSM_SUBMIT_FENCE_FD_IN;
340
      req.fence_fd = msm_submit->in_fence_fd;
341
      msm_pipe->no_implicit_sync = true;
342
   }
343

344
   if (msm_pipe->no_implicit_sync) {
345
      req.flags |= MSM_SUBMIT_NO_IMPLICIT;
346
   }
347

348
   if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) {
349
      req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
350
   }
351

352
   /* Needs to be after get_cmd() as that could create bos/cmds table:
353
    *
354
    * NOTE allocate on-stack in the common case, but with an upper-
355
    * bound to limit on-stack allocation to 4k:
356
    */
357
   const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
358
   bool bos_on_stack = msm_submit->nr_bos < bo_limit;
359
   struct drm_msm_gem_submit_bo
360
      _submit_bos[bos_on_stack ? msm_submit->nr_bos : 0];
361
   struct drm_msm_gem_submit_bo *submit_bos;
362
   if (bos_on_stack) {
363
      submit_bos = _submit_bos;
364
   } else {
365
      submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
366
   }
367

368
   for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
369
      submit_bos[i].flags = msm_submit->bos[i]->flags;
370
      submit_bos[i].handle = msm_submit->bos[i]->handle;
371
      submit_bos[i].presumed = 0;
372
   }
373

374
   req.bos = VOID2U64(submit_bos);
375
   req.nr_bos = msm_submit->nr_bos;
376
   req.cmds = VOID2U64(cmds);
377
   req.nr_cmds = nr_cmds;
378

379
   DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
380

381
   ret = drmCommandWriteRead(msm_pipe->base.dev->fd, DRM_MSM_GEM_SUBMIT, &req,
382
                             sizeof(req));
383
   if (ret) {
384
      ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
385
      msm_dump_submit(&req);
386
   } else if (!ret && msm_submit->out_fence) {
387
      msm_submit->out_fence->fence.kfence = req.fence;
388
      msm_submit->out_fence->fence.ufence = msm_submit->base.fence;
389
      msm_submit->out_fence->fence_fd = req.fence_fd;
390
   }
391

392
   if (!bos_on_stack)
393
      free(submit_bos);
394

395
   pthread_mutex_lock(&flush_mtx);
396
   assert(fd_fence_before(msm_pipe->last_submit_fence, msm_submit->base.fence));
397
   msm_pipe->last_submit_fence = msm_submit->base.fence;
398
   pthread_cond_broadcast(&flush_cnd);
399
   pthread_mutex_unlock(&flush_mtx);
400

401
   if (msm_submit->in_fence_fd != -1)
402
      close(msm_submit->in_fence_fd);
403

404
   return ret;
405
}
406

407
static void
408
msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
409
{
410
   struct fd_submit *submit = job;
411
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
412

413
   flush_submit_list(&msm_submit->submit_list);
414

415
   DEBUG_MSG("finish: %u", submit->fence);
416
}
417

418
static void
419
msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
420
{
421
   struct fd_submit *submit = job;
422
   fd_submit_del(submit);
423
}
424

425
static int
426
enqueue_submit_list(struct list_head *submit_list)
427
{
428
   struct fd_submit *submit = last_submit(submit_list);
429
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
430
   struct msm_device *msm_dev = to_msm_device(submit->pipe->dev);
431

432
   list_replace(submit_list, &msm_submit->submit_list);
433
   list_inithead(submit_list);
434

435
   struct util_queue_fence *fence;
436
   if (msm_submit->out_fence) {
437
      fence = &msm_submit->out_fence->ready;
438
   } else {
439
      util_queue_fence_init(&msm_submit->fence);
440
      fence = &msm_submit->fence;
441
   }
442

443
   DEBUG_MSG("enqueue: %u", submit->fence);
444

445
   util_queue_add_job(&msm_dev->submit_queue,
446
                      submit, fence,
447
                      msm_submit_sp_flush_execute,
448
                      msm_submit_sp_flush_cleanup,
449
                      0);
450

451
   return 0;
452
}
453

454
static bool
455
should_defer(struct fd_submit *submit)
456
{
457
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
458

459
   /* if too many bo's, it may not be worth the CPU cost of submit merging: */
460
   if (msm_submit->nr_bos > 30)
461
      return false;
462

463
   /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
464
    * cmds before we exceed the size of the ringbuffer, which results in
465
    * deadlock writing into the RB (ie. kernel doesn't finish writing into
466
    * the RB so it doesn't kick the GPU to start consuming from the RB)
467
    */
468
   if (submit->pipe->dev->deferred_cmds > 128)
469
      return false;
470

471
   return true;
472
}
473

474
static int
475
msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
476
                    struct fd_submit_fence *out_fence)
477
{
478
   struct fd_device *dev = submit->pipe->dev;
479
   struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
480

481
   /* Acquire lock before flush_prep() because it is possible to race between
482
    * this and pipe->flush():
483
    */
484
   simple_mtx_lock(&dev->submit_lock);
485

486
   /* If there are deferred submits from another fd_pipe, flush them now,
487
    * since we can't merge submits from different submitqueue's (ie. they
488
    * could have different priority, etc)
489
    */
490
   if (!list_is_empty(&dev->deferred_submits) &&
491
       (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
492
      struct list_head submit_list;
493

494
      list_replace(&dev->deferred_submits, &submit_list);
495
      list_inithead(&dev->deferred_submits);
496
      dev->deferred_cmds = 0;
497

498
      enqueue_submit_list(&submit_list);
499
   }
500

501
   list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
502

503
   bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
504

505
   assert(fd_fence_before(msm_pipe->last_enqueue_fence, submit->fence));
506
   msm_pipe->last_enqueue_fence = submit->fence;
507

508
   /* If we don't need an out-fence, we can defer the submit.
509
    *
510
    * TODO we could defer submits with in-fence as well.. if we took our own
511
    * reference to the fd, and merged all the in-fence-fd's when we flush the
512
    * deferred submits
513
    */
514
   if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
515
      DEBUG_MSG("defer: %u", submit->fence);
516
      dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
517
      assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
518
      simple_mtx_unlock(&dev->submit_lock);
519

520
      return 0;
521
   }
522

523
   struct list_head submit_list;
524

525
   list_replace(&dev->deferred_submits, &submit_list);
526
   list_inithead(&dev->deferred_submits);
527
   dev->deferred_cmds = 0;
528

529
   simple_mtx_unlock(&dev->submit_lock);
530

531
   return enqueue_submit_list(&submit_list);
532
}
533

534
void
535
msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
536
{
537
   struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
538
   struct fd_device *dev = pipe->dev;
539
   struct list_head submit_list;
540

541
   DEBUG_MSG("flush: %u", fence);
542

543
   list_inithead(&submit_list);
544

545
   simple_mtx_lock(&dev->submit_lock);
546

547
   assert(!fd_fence_after(fence, msm_pipe->last_enqueue_fence));
548

549
   foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
550
      /* We should never have submits from multiple pipes in the deferred
551
       * list.  If we did, we couldn't compare their fence to our fence,
552
       * since each fd_pipe is an independent timeline.
553
       */
554
      if (deferred_submit->pipe != pipe)
555
         break;
556

557
      if (fd_fence_after(deferred_submit->fence, fence))
558
         break;
559

560
      list_del(&deferred_submit->node);
561
      list_addtail(&deferred_submit->node, &submit_list);
562
      dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
563
   }
564

565
   assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
566

567
   simple_mtx_unlock(&dev->submit_lock);
568

569
   if (list_is_empty(&submit_list))
570
      goto flush_sync;
571

572
   enqueue_submit_list(&submit_list);
573

574
flush_sync:
575
   /* Once we are sure that we've enqueued at least up to the requested
576
    * submit, we need to be sure that submitq has caught up and flushed
577
    * them to the kernel
578
    */
579
   pthread_mutex_lock(&flush_mtx);
580
   while (fd_fence_before(msm_pipe->last_submit_fence, fence)) {
581
      pthread_cond_wait(&flush_cnd, &flush_mtx);
582
   }
583
   pthread_mutex_unlock(&flush_mtx);
584
}
585

586
static void
587
msm_submit_sp_destroy(struct fd_submit *submit)
588
{
589
   struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
590

591
   if (msm_submit->suballoc_ring)
592
      fd_ringbuffer_del(msm_submit->suballoc_ring);
593

594
   _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
595

596
   // TODO it would be nice to have a way to debug_assert() if all
597
   // rb's haven't been free'd back to the slab, because that is
598
   // an indication that we are leaking bo's
599
   slab_destroy_child(&msm_submit->ring_pool);
600

601
   for (unsigned i = 0; i < msm_submit->nr_bos; i++)
602
      fd_bo_del(msm_submit->bos[i]);
603

604
   free(msm_submit->bos);
605
   free(msm_submit);
606
}
607

608
static const struct fd_submit_funcs submit_funcs = {
609
   .new_ringbuffer = msm_submit_sp_new_ringbuffer,
610
   .flush = msm_submit_sp_flush,
611
   .destroy = msm_submit_sp_destroy,
612
};
613

614
struct fd_submit *
615
msm_submit_sp_new(struct fd_pipe *pipe)
616
{
617
   struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
618
   struct fd_submit *submit;
619

620
   msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
621
                                                  _mesa_key_pointer_equal);
622

623
   slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
624

625
   submit = &msm_submit->base;
626
   submit->funcs = &submit_funcs;
627

628
   return submit;
629
}
630

631
void
632
msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
633
{
634
   // TODO tune size:
635
   slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp),
636
                      16);
637
}
638

639
void
640
msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
641
{
642
   if (msm_pipe->ring_pool.num_elements)
643
      slab_destroy_parent(&msm_pipe->ring_pool);
644
}
645

646
static void
647
finalize_current_cmd(struct fd_ringbuffer *ring)
648
{
649
   debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
650

651
   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
652
   APPEND(&msm_ring->u, cmds,
653
          (struct msm_cmd_sp){
654
             .ring_bo = fd_bo_ref(msm_ring->ring_bo),
655
             .size = offset_bytes(ring->cur, ring->start),
656
          });
657
}
658

659
static void
660
msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
661
{
662
   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
663
   struct fd_pipe *pipe = msm_ring->u.submit->pipe;
664

665
   debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
666

667
   finalize_current_cmd(ring);
668

669
   fd_bo_del(msm_ring->ring_bo);
670
   msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
671

672
   ring->start = fd_bo_map(msm_ring->ring_bo);
673
   ring->end = &(ring->start[size / 4]);
674
   ring->cur = ring->start;
675
   ring->size = size;
676
}
677

678
static inline bool
679
msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
680
{
681
   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
682

683
   for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
684
      if (msm_ring->u.reloc_bos[i] == bo)
685
         return true;
686
   }
687
   return false;
688
}
689

690
#define PTRSZ 64
691
#include "msm_ringbuffer_sp.h"
692
#undef PTRSZ
693
#define PTRSZ 32
694
#include "msm_ringbuffer_sp.h"
695
#undef PTRSZ
696

697
static uint32_t
698
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
699
{
700
   if (ring->flags & FD_RINGBUFFER_GROWABLE)
701
      return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
702
   return 1;
703
}
704

705
static bool
706
msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
707
{
708
   assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
709
   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
710
   struct fd_submit *submit = msm_ring->u.submit;
711

712
   if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
713
      return false;
714
   }
715

716
   return true;
717
}
718

719
static void
720
msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
721
{
722
   struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
723

724
   fd_bo_del(msm_ring->ring_bo);
725

726
   if (ring->flags & _FD_RINGBUFFER_OBJECT) {
727
      for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
728
         fd_bo_del(msm_ring->u.reloc_bos[i]);
729
      }
730
      free(msm_ring->u.reloc_bos);
731

732
      free(msm_ring);
733
   } else {
734
      struct fd_submit *submit = msm_ring->u.submit;
735

736
      for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
737
         fd_bo_del(msm_ring->u.cmds[i].ring_bo);
738
      }
739
      free(msm_ring->u.cmds);
740

741
      slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
742
   }
743
}
744

745
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
746
   .grow = msm_ringbuffer_sp_grow,
747
   .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
748
   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
749
   .cmd_count = msm_ringbuffer_sp_cmd_count,
750
   .check_size = msm_ringbuffer_sp_check_size,
751
   .destroy = msm_ringbuffer_sp_destroy,
752
};
753

754
static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
755
   .grow = msm_ringbuffer_sp_grow,
756
   .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
757
   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
758
   .cmd_count = msm_ringbuffer_sp_cmd_count,
759
   .destroy = msm_ringbuffer_sp_destroy,
760
};
761

762
static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
763
   .grow = msm_ringbuffer_sp_grow,
764
   .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
765
   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
766
   .cmd_count = msm_ringbuffer_sp_cmd_count,
767
   .check_size = msm_ringbuffer_sp_check_size,
768
   .destroy = msm_ringbuffer_sp_destroy,
769
};
770

771
static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
772
   .grow = msm_ringbuffer_sp_grow,
773
   .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
774
   .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
775
   .cmd_count = msm_ringbuffer_sp_cmd_count,
776
   .destroy = msm_ringbuffer_sp_destroy,
777
};
778

779
static inline struct fd_ringbuffer *
780
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
781
                       enum fd_ringbuffer_flags flags)
782
{
783
   struct fd_ringbuffer *ring = &msm_ring->base;
784

785
   /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
786
   STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
787
   STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
788
   STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
789

790
   debug_assert(msm_ring->ring_bo);
791

792
   uint8_t *base = fd_bo_map(msm_ring->ring_bo);
793
   ring->start = (void *)(base + msm_ring->offset);
794
   ring->end = &(ring->start[size / 4]);
795
   ring->cur = ring->start;
796

797
   ring->size = size;
798
   ring->flags = flags;
799

800
   if (flags & _FD_RINGBUFFER_OBJECT) {
801
      if (msm_ring->u.pipe->gpu_id >= 500) {
802
         ring->funcs = &ring_funcs_obj_64;
803
      } else {
804
         ring->funcs = &ring_funcs_obj_32;
805
      }
806
   } else {
807
      if (msm_ring->u.submit->pipe->gpu_id >= 500) {
808
         ring->funcs = &ring_funcs_nonobj_64;
809
      } else {
810
         ring->funcs = &ring_funcs_nonobj_32;
811
      }
812
   }
813

814
   // TODO initializing these could probably be conditional on flags
815
   // since unneed for FD_RINGBUFFER_STAGING case..
816
   msm_ring->u.cmds = NULL;
817
   msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
818

819
   msm_ring->u.reloc_bos = NULL;
820
   msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
821

822
   return ring;
823
}
824

825
struct fd_ringbuffer *
826
msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
827
{
828
   struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
829
   struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
830

831
   /* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation
832
    * can happen both on the frontend (most CSOs) and the driver thread (a6xx
833
    * cached tex state, for example)
834
    */
835
   static simple_mtx_t suballoc_lock = _SIMPLE_MTX_INITIALIZER_NP;
836
   simple_mtx_lock(&suballoc_lock);
837

838
   /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
839
   msm_ring->offset = align(msm_pipe->suballoc_offset, 64);
840
   if (!msm_pipe->suballoc_bo ||
841
       msm_ring->offset + size > fd_bo_size(msm_pipe->suballoc_bo)) {
842
      if (msm_pipe->suballoc_bo)
843
         fd_bo_del(msm_pipe->suballoc_bo);
844
      msm_pipe->suballoc_bo =
845
         fd_bo_new_ring(pipe->dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
846
      msm_ring->offset = 0;
847
   }
848

849
   msm_ring->u.pipe = pipe;
850
   msm_ring->ring_bo = fd_bo_ref(msm_pipe->suballoc_bo);
851
   msm_ring->base.refcnt = 1;
852

853
   msm_pipe->suballoc_offset = msm_ring->offset + size;
854

855
   simple_mtx_unlock(&suballoc_lock);
856

857
   return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
858
}
859

860
Product

Resources

Company