CoCalc -- pan

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/lib/pan_bo.c
⁴⁵⁶⁰ views
1
/*
2
 * Copyright 2019 Collabora, Ltd.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors (Collabora):
24
 *   Alyssa Rosenzweig <[email protected]>
25
 */
26
#include <errno.h>
27
#include <stdio.h>
28
#include <fcntl.h>
29
#include <xf86drm.h>
30
#include <pthread.h>
31
#include "drm-uapi/panfrost_drm.h"
32

33
#include "pan_bo.h"
34
#include "pan_device.h"
35
#include "pan_util.h"
36
#include "wrap.h"
37

38
#include "os/os_mman.h"
39

40
#include "util/u_inlines.h"
41
#include "util/u_math.h"
42

43
/* This file implements a userspace BO cache. Allocating and freeing
44
 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
45
 * adds more work than we would like at this point. So caching BOs in userspace
46
 * solves both of these problems and does not require kernel updates.
47
 *
48
 * Cached BOs are sorted into a bucket based on rounding their size down to the
49
 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
50
 * objects. Putting a BO into the cache is accomplished by adding it to the
51
 * corresponding bucket. Getting a BO from the cache consists of finding the
52
 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
53
 * BO and removing it from the bucket. We special case evicting all BOs from
54
 * the cache, since that's what helpful in practice and avoids extra logic
55
 * around the linked list.
56
 */
57

58
static struct panfrost_bo *
59
panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
60
                  uint32_t flags, const char *label)
61
{
62
        struct drm_panfrost_create_bo create_bo = { .size = size };
63
        struct panfrost_bo *bo;
64
        int ret;
65

66
        if (dev->kernel_version->version_major > 1 ||
67
            dev->kernel_version->version_minor >= 1) {
68
                if (flags & PAN_BO_GROWABLE)
69
                        create_bo.flags |= PANFROST_BO_HEAP;
70
                if (!(flags & PAN_BO_EXECUTE))
71
                        create_bo.flags |= PANFROST_BO_NOEXEC;
72
        }
73

74
        ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
75
        if (ret) {
76
                fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
77
                return NULL;
78
        }
79

80
        bo = pan_lookup_bo(dev, create_bo.handle);
81
        assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
82

83
        bo->size = create_bo.size;
84
        bo->ptr.gpu = create_bo.offset;
85
        bo->gem_handle = create_bo.handle;
86
        bo->flags = flags;
87
        bo->dev = dev;
88
        bo->label = label;
89
        return bo;
90
}
91

92
static void
93
panfrost_bo_free(struct panfrost_bo *bo)
94
{
95
        struct drm_gem_close gem_close = { .handle = bo->gem_handle };
96
        int ret;
97

98
        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
99
        if (ret) {
100
                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
101
                assert(0);
102
        }
103

104
        /* BO will be freed with the sparse array, but zero to indicate free */
105
        memset(bo, 0, sizeof(*bo));
106
}
107

108
/* Returns true if the BO is ready, false otherwise.
109
 * access_type is encoding the type of access one wants to ensure is done.
110
 * Waiting is always done for writers, but if wait_readers is set then readers
111
 * are also waited for.
112
 */
113
bool
114
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
115
{
116
        struct drm_panfrost_wait_bo req = {
117
                .handle = bo->gem_handle,
118
		.timeout_ns = timeout_ns,
119
        };
120
        int ret;
121

122
        /* If the BO has been exported or imported we can't rely on the cached
123
         * state, we need to call the WAIT_BO ioctl.
124
         */
125
        if (!(bo->flags & PAN_BO_SHARED)) {
126
                /* If ->gpu_access is 0, the BO is idle, no need to wait. */
127
                if (!bo->gpu_access)
128
                        return true;
129

130
                /* If the caller only wants to wait for writers and no
131
                 * writes are pending, we don't have to wait.
132
                 */
133
                if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
134
                        return true;
135
        }
136

137
        /* The ioctl returns >= 0 value when the BO we are waiting for is ready
138
         * -1 otherwise.
139
         */
140
        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
141
        if (ret != -1) {
142
                /* Set gpu_access to 0 so that the next call to bo_wait()
143
                 * doesn't have to call the WAIT_BO ioctl.
144
                 */
145
                bo->gpu_access = 0;
146
                return true;
147
        }
148

149
        /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
150
         * is invalid, which shouldn't happen here.
151
         */
152
        assert(errno == ETIMEDOUT || errno == EBUSY);
153
        return false;
154
}
155

156
/* Helper to calculate the bucket index of a BO */
157

158
static unsigned
159
pan_bucket_index(unsigned size)
160
{
161
        /* Round down to POT to compute a bucket index */
162

163
        unsigned bucket_index = util_logbase2(size);
164

165
        /* Clamp the bucket index; all huge allocations will be
166
         * sorted into the largest bucket */
167

168
        bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
169

170
        /* The minimum bucket size must equal the minimum allocation
171
         * size; the maximum we clamped */
172

173
        assert(bucket_index >= MIN_BO_CACHE_BUCKET);
174
        assert(bucket_index <= MAX_BO_CACHE_BUCKET);
175

176
        /* Reindex from 0 */
177
        return (bucket_index - MIN_BO_CACHE_BUCKET);
178
}
179

180
static struct list_head *
181
pan_bucket(struct panfrost_device *dev, unsigned size)
182
{
183
        return &dev->bo_cache.buckets[pan_bucket_index(size)];
184
}
185

186
/* Tries to fetch a BO of sufficient size with the appropriate flags from the
187
 * BO cache. If it succeeds, it returns that BO and removes the BO from the
188
 * cache. If it fails, it returns NULL signaling the caller to allocate a new
189
 * BO. */
190

191
static struct panfrost_bo *
192
panfrost_bo_cache_fetch(struct panfrost_device *dev,
193
                        size_t size, uint32_t flags, const char *label,
194
                        bool dontwait)
195
{
196
        pthread_mutex_lock(&dev->bo_cache.lock);
197
        struct list_head *bucket = pan_bucket(dev, size);
198
        struct panfrost_bo *bo = NULL;
199

200
        /* Iterate the bucket looking for something suitable */
201
        list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
202
                                 bucket_link) {
203
                if (entry->size < size || entry->flags != flags)
204
                        continue;
205

206
                /* If the oldest BO in the cache is busy, likely so is
207
                 * everything newer, so bail. */
208
                if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
209
                                      PAN_BO_ACCESS_RW))
210
                        break;
211

212
                struct drm_panfrost_madvise madv = {
213
                        .handle = entry->gem_handle,
214
                        .madv = PANFROST_MADV_WILLNEED,
215
                };
216
                int ret;
217

218
                /* This one works, splice it out of the cache */
219
                list_del(&entry->bucket_link);
220
                list_del(&entry->lru_link);
221

222
                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
223
                if (!ret && !madv.retained) {
224
                        panfrost_bo_free(entry);
225
                        continue;
226
                }
227
                /* Let's go! */
228
                bo = entry;
229
                bo->label = label;
230
                break;
231
        }
232
        pthread_mutex_unlock(&dev->bo_cache.lock);
233

234
        return bo;
235
}
236

237
static void
238
panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
239
{
240
        struct timespec time;
241

242
        clock_gettime(CLOCK_MONOTONIC, &time);
243
        list_for_each_entry_safe(struct panfrost_bo, entry,
244
                                 &dev->bo_cache.lru, lru_link) {
245
                /* We want all entries that have been used more than 1 sec
246
                 * ago to be dropped, others can be kept.
247
                 * Note the <= 2 check and not <= 1. It's here to account for
248
                 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
249
                 * That means we might keep entries that are between 1 and 2
250
                 * seconds old, but we don't really care, as long as unused BOs
251
                 * are dropped at some point.
252
                 */
253
                if (time.tv_sec - entry->last_used <= 2)
254
                        break;
255

256
                list_del(&entry->bucket_link);
257
                list_del(&entry->lru_link);
258
                panfrost_bo_free(entry);
259
        }
260
}
261

262
/* Tries to add a BO to the cache. Returns if it was
263
 * successful */
264

265
static bool
266
panfrost_bo_cache_put(struct panfrost_bo *bo)
267
{
268
        struct panfrost_device *dev = bo->dev;
269

270
        if (bo->flags & PAN_BO_SHARED)
271
                return false;
272

273
        pthread_mutex_lock(&dev->bo_cache.lock);
274
        struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
275
        struct drm_panfrost_madvise madv;
276
        struct timespec time;
277

278
        madv.handle = bo->gem_handle;
279
        madv.madv = PANFROST_MADV_DONTNEED;
280
	madv.retained = 0;
281

282
        drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
283

284
        /* Add us to the bucket */
285
        list_addtail(&bo->bucket_link, bucket);
286

287
        /* Add us to the LRU list and update the last_used field. */
288
        list_addtail(&bo->lru_link, &dev->bo_cache.lru);
289
        clock_gettime(CLOCK_MONOTONIC, &time);
290
        bo->last_used = time.tv_sec;
291

292
        /* Let's do some cleanup in the BO cache while we hold the
293
         * lock.
294
         */
295
        panfrost_bo_cache_evict_stale_bos(dev);
296
        pthread_mutex_unlock(&dev->bo_cache.lock);
297

298
        /* Update the label to help debug BO cache memory usage issues */
299
        bo->label = "Unused (BO cache)";
300

301
        return true;
302
}
303

304
/* Evicts all BOs from the cache. Called during context
305
 * destroy or during low-memory situations (to free up
306
 * memory that may be unused by us just sitting in our
307
 * cache, but still reserved from the perspective of the
308
 * OS) */
309

310
void
311
panfrost_bo_cache_evict_all(
312
                struct panfrost_device *dev)
313
{
314
        pthread_mutex_lock(&dev->bo_cache.lock);
315
        for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
316
                struct list_head *bucket = &dev->bo_cache.buckets[i];
317

318
                list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
319
                                         bucket_link) {
320
                        list_del(&entry->bucket_link);
321
                        list_del(&entry->lru_link);
322
                        panfrost_bo_free(entry);
323
                }
324
        }
325
        pthread_mutex_unlock(&dev->bo_cache.lock);
326
}
327

328
void
329
panfrost_bo_mmap(struct panfrost_bo *bo)
330
{
331
        struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
332
        int ret;
333

334
        if (bo->ptr.cpu)
335
                return;
336

337
        ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
338
        if (ret) {
339
                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
340
                assert(0);
341
        }
342

343
        bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
344
                              bo->dev->fd, mmap_bo.offset);
345
        if (bo->ptr.cpu == MAP_FAILED) {
346
                bo->ptr.cpu = NULL;
347
                fprintf(stderr,
348
                        "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
349
                        bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
350
                        (long long)mmap_bo.offset);
351
        }
352
}
353

354
static void
355
panfrost_bo_munmap(struct panfrost_bo *bo)
356
{
357
        if (!bo->ptr.cpu)
358
                return;
359

360
        if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
361
                perror("munmap");
362
                abort();
363
        }
364

365
        bo->ptr.cpu = NULL;
366
}
367

368
struct panfrost_bo *
369
panfrost_bo_create(struct panfrost_device *dev, size_t size,
370
                   uint32_t flags, const char *label)
371
{
372
        struct panfrost_bo *bo;
373

374
        /* Kernel will fail (confusingly) with EPERM otherwise */
375
        assert(size > 0);
376

377
        /* To maximize BO cache usage, don't allocate tiny BOs */
378
        size = ALIGN_POT(size, 4096);
379

380
        /* GROWABLE BOs cannot be mmapped */
381
        if (flags & PAN_BO_GROWABLE)
382
                assert(flags & PAN_BO_INVISIBLE);
383

384
        /* Before creating a BO, we first want to check the cache but without
385
         * waiting for BO readiness (BOs in the cache can still be referenced
386
         * by jobs that are not finished yet).
387
         * If the cached allocation fails we fall back on fresh BO allocation,
388
         * and if that fails too, we try one more time to allocate from the
389
         * cache, but this time we accept to wait.
390
         */
391
        bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
392
        if (!bo)
393
                bo = panfrost_bo_alloc(dev, size, flags, label);
394
        if (!bo)
395
                bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
396

397
        if (!bo)
398
                fprintf(stderr, "BO creation failed\n");
399

400
        assert(bo);
401

402
        /* Only mmap now if we know we need to. For CPU-invisible buffers, we
403
         * never map since we don't care about their contents; they're purely
404
         * for GPU-internal use. But we do trace them anyway. */
405

406
        if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
407
                panfrost_bo_mmap(bo);
408

409
        p_atomic_set(&bo->refcnt, 1);
410

411
        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
412
                if (flags & PAN_BO_INVISIBLE)
413
                        pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
414
                else if (!(flags & PAN_BO_DELAY_MMAP))
415
                        pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
416
        }
417

418
        return bo;
419
}
420

421
void
422
panfrost_bo_reference(struct panfrost_bo *bo)
423
{
424
        if (bo) {
425
                ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
426
                assert(count != 1);
427
        }
428
}
429

430
void
431
panfrost_bo_unreference(struct panfrost_bo *bo)
432
{
433
        if (!bo)
434
                return;
435

436
        /* Don't return to cache if there are still references */
437
        if (p_atomic_dec_return(&bo->refcnt))
438
                return;
439

440
        struct panfrost_device *dev = bo->dev;
441

442
        pthread_mutex_lock(&dev->bo_map_lock);
443

444
        /* Someone might have imported this BO while we were waiting for the
445
         * lock, let's make sure it's still not referenced before freeing it.
446
         */
447
        if (p_atomic_read(&bo->refcnt) == 0) {
448
                /* When the reference count goes to zero, we need to cleanup */
449
                panfrost_bo_munmap(bo);
450

451
                if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
452
                        pandecode_inject_free(bo->ptr.gpu, bo->size);
453

454
                /* Rather than freeing the BO now, we'll cache the BO for later
455
                 * allocations if we're allowed to.
456
                 */
457
                if (!panfrost_bo_cache_put(bo))
458
                        panfrost_bo_free(bo);
459

460
        }
461
        pthread_mutex_unlock(&dev->bo_map_lock);
462
}
463

464
struct panfrost_bo *
465
panfrost_bo_import(struct panfrost_device *dev, int fd)
466
{
467
        struct panfrost_bo *bo;
468
        struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
469
        ASSERTED int ret;
470
        unsigned gem_handle;
471

472
        ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
473
        assert(!ret);
474

475
        pthread_mutex_lock(&dev->bo_map_lock);
476
        bo = pan_lookup_bo(dev, gem_handle);
477

478
        if (!bo->dev) {
479
                get_bo_offset.handle = gem_handle;
480
                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
481
                assert(!ret);
482

483
                bo->dev = dev;
484
                bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
485
                bo->size = lseek(fd, 0, SEEK_END);
486
                /* Sometimes this can fail and return -1. size of -1 is not
487
                 * a nice thing for mmap to try mmap. Be more robust also
488
                 * for zero sized maps and fail nicely too
489
                 */
490
                if ((bo->size == 0) || (bo->size == (size_t)-1)) {
491
                        pthread_mutex_unlock(&dev->bo_map_lock);
492
                        return NULL;
493
                }
494
                bo->flags = PAN_BO_SHARED;
495
                bo->gem_handle = gem_handle;
496
                p_atomic_set(&bo->refcnt, 1);
497
                // TODO map and unmap on demand?
498
                panfrost_bo_mmap(bo);
499
        } else {
500
                /* bo->refcnt == 0 can happen if the BO
501
                 * was being released but panfrost_bo_import() acquired the
502
                 * lock before panfrost_bo_unreference(). In that case, refcnt
503
                 * is 0 and we can't use panfrost_bo_reference() directly, we
504
                 * have to re-initialize the refcnt().
505
                 * Note that panfrost_bo_unreference() checks
506
                 * refcnt value just after acquiring the lock to
507
                 * make sure the object is not freed if panfrost_bo_import()
508
                 * acquired it in the meantime.
509
                 */
510
                if (p_atomic_read(&bo->refcnt) == 0)
511
                        p_atomic_set(&bo->refcnt, 1);
512
                else
513
                        panfrost_bo_reference(bo);
514
                assert(bo->ptr.cpu);
515
        }
516
        pthread_mutex_unlock(&dev->bo_map_lock);
517

518
        return bo;
519
}
520

521
int
522
panfrost_bo_export(struct panfrost_bo *bo)
523
{
524
        struct drm_prime_handle args = {
525
                .handle = bo->gem_handle,
526
                .flags = DRM_CLOEXEC,
527
        };
528

529
        int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
530
        if (ret == -1)
531
                return -1;
532

533
        bo->flags |= PAN_BO_SHARED;
534
        return args.fd;
535
}
536

537

538
Product

Resources

Company