CoCalc -- radeon_drm

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
⁴⁵⁶⁶ views
1
/*
2
 * Copyright © 2011 Marek Olšák <[email protected]>
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining
6
 * a copy of this software and associated documentation files (the
7
 * "Software"), to deal in the Software without restriction, including
8
 * without limitation the rights to use, copy, modify, merge, publish,
9
 * distribute, sub license, and/or sell copies of the Software, and to
10
 * permit persons to whom the Software is furnished to do so, subject to
11
 * the following conditions:
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16
 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17
 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * The above copyright notice and this permission notice (including the
23
 * next paragraph) shall be included in all copies or substantial portions
24
 * of the Software.
25
 */
26

27
#include "radeon_drm_cs.h"
28

29
#include "util/u_hash_table.h"
30
#include "util/u_memory.h"
31
#include "util/simple_list.h"
32
#include "os/os_thread.h"
33
#include "os/os_mman.h"
34
#include "util/os_time.h"
35

36
#include "frontend/drm_driver.h"
37

38
#include <sys/ioctl.h>
39
#include <xf86drm.h>
40
#include <errno.h>
41
#include <fcntl.h>
42
#include <stdio.h>
43
#include <inttypes.h>
44

45
static struct pb_buffer *
46
radeon_winsys_bo_create(struct radeon_winsys *rws,
47
                        uint64_t size,
48
                        unsigned alignment,
49
                        enum radeon_bo_domain domain,
50
                        enum radeon_bo_flag flags);
51

52
static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
53
{
54
   return (struct radeon_bo *)bo;
55
}
56

57
struct radeon_bo_va_hole {
58
   struct list_head list;
59
   uint64_t         offset;
60
   uint64_t         size;
61
};
62

63
static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
64
{
65
   struct drm_radeon_gem_busy args = {0};
66

67
   args.handle = bo->handle;
68
   return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
69
                              &args, sizeof(args)) != 0;
70
}
71

72
static bool radeon_bo_is_busy(struct radeon_bo *bo)
73
{
74
   unsigned num_idle;
75
   bool busy = false;
76

77
   if (bo->handle)
78
      return radeon_real_bo_is_busy(bo);
79

80
   mtx_lock(&bo->rws->bo_fence_lock);
81
   for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
82
      if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
83
         busy = true;
84
         break;
85
      }
86
      radeon_ws_bo_reference(&bo->u.slab.fences[num_idle], NULL);
87
   }
88
   memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
89
         (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
90
   bo->u.slab.num_fences -= num_idle;
91
   mtx_unlock(&bo->rws->bo_fence_lock);
92

93
   return busy;
94
}
95

96
static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
97
{
98
   struct drm_radeon_gem_wait_idle args = {0};
99

100
   args.handle = bo->handle;
101
   while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
102
                          &args, sizeof(args)) == -EBUSY);
103
}
104

105
static void radeon_bo_wait_idle(struct radeon_bo *bo)
106
{
107
   if (bo->handle) {
108
      radeon_real_bo_wait_idle(bo);
109
   } else {
110
      mtx_lock(&bo->rws->bo_fence_lock);
111
      while (bo->u.slab.num_fences) {
112
         struct radeon_bo *fence = NULL;
113
         radeon_ws_bo_reference(&fence, bo->u.slab.fences[0]);
114
         mtx_unlock(&bo->rws->bo_fence_lock);
115

116
         /* Wait without holding the fence lock. */
117
         radeon_real_bo_wait_idle(fence);
118

119
         mtx_lock(&bo->rws->bo_fence_lock);
120
         if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
121
            radeon_ws_bo_reference(&bo->u.slab.fences[0], NULL);
122
            memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
123
                  (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
124
            bo->u.slab.num_fences--;
125
         }
126
         radeon_ws_bo_reference(&fence, NULL);
127
      }
128
      mtx_unlock(&bo->rws->bo_fence_lock);
129
   }
130
}
131

132
static bool radeon_bo_wait(struct radeon_winsys *rws,
133
                           struct pb_buffer *_buf, uint64_t timeout,
134
                           enum radeon_bo_usage usage)
135
{
136
   struct radeon_bo *bo = radeon_bo(_buf);
137
   int64_t abs_timeout;
138

139
   /* No timeout. Just query. */
140
   if (timeout == 0)
141
      return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
142

143
   abs_timeout = os_time_get_absolute_timeout(timeout);
144

145
   /* Wait if any ioctl is being submitted with this buffer. */
146
   if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
147
      return false;
148

149
   /* Infinite timeout. */
150
   if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
151
      radeon_bo_wait_idle(bo);
152
      return true;
153
   }
154

155
   /* Other timeouts need to be emulated with a loop. */
156
   while (radeon_bo_is_busy(bo)) {
157
      if (os_time_get_nano() >= abs_timeout)
158
         return false;
159
      os_time_sleep(10);
160
   }
161

162
   return true;
163
}
164

165
static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
166
{
167
   /* Zero domains the driver doesn't understand. */
168
   domain &= RADEON_DOMAIN_VRAM_GTT;
169

170
   /* If no domain is set, we must set something... */
171
   if (!domain)
172
      domain = RADEON_DOMAIN_VRAM_GTT;
173

174
   return domain;
175
}
176

177
static enum radeon_bo_domain radeon_bo_get_initial_domain(
178
      struct pb_buffer *buf)
179
{
180
   struct radeon_bo *bo = (struct radeon_bo*)buf;
181
   struct drm_radeon_gem_op args;
182

183
   if (bo->rws->info.drm_minor < 38)
184
      return RADEON_DOMAIN_VRAM_GTT;
185

186
   memset(&args, 0, sizeof(args));
187
   args.handle = bo->handle;
188
   args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
189

190
   if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
191
                           &args, sizeof(args))) {
192
      fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
193
              bo, bo->handle);
194
      /* Default domain as returned by get_valid_domain. */
195
      return RADEON_DOMAIN_VRAM_GTT;
196
   }
197

198
   /* GEM domains and winsys domains are defined the same. */
199
   return get_valid_domain(args.value);
200
}
201

202
static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
203
                                     struct radeon_vm_heap *heap,
204
                                     uint64_t size, uint64_t alignment)
205
{
206
   struct radeon_bo_va_hole *hole, *n;
207
   uint64_t offset = 0, waste = 0;
208

209
   /* All VM address space holes will implicitly start aligned to the
210
    * size alignment, so we don't need to sanitize the alignment here
211
    */
212
   size = align(size, info->gart_page_size);
213

214
   mtx_lock(&heap->mutex);
215
   /* first look for a hole */
216
   LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
217
      offset = hole->offset;
218
      waste = offset % alignment;
219
      waste = waste ? alignment - waste : 0;
220
      offset += waste;
221
      if (offset >= (hole->offset + hole->size)) {
222
         continue;
223
      }
224
      if (!waste && hole->size == size) {
225
         offset = hole->offset;
226
         list_del(&hole->list);
227
         FREE(hole);
228
         mtx_unlock(&heap->mutex);
229
         return offset;
230
      }
231
      if ((hole->size - waste) > size) {
232
         if (waste) {
233
            n = CALLOC_STRUCT(radeon_bo_va_hole);
234
            n->size = waste;
235
            n->offset = hole->offset;
236
            list_add(&n->list, &hole->list);
237
         }
238
         hole->size -= (size + waste);
239
         hole->offset += size + waste;
240
         mtx_unlock(&heap->mutex);
241
         return offset;
242
      }
243
      if ((hole->size - waste) == size) {
244
         hole->size = waste;
245
         mtx_unlock(&heap->mutex);
246
         return offset;
247
      }
248
   }
249

250
   offset = heap->start;
251
   waste = offset % alignment;
252
   waste = waste ? alignment - waste : 0;
253

254
   if (offset + waste + size > heap->end) {
255
      mtx_unlock(&heap->mutex);
256
      return 0;
257
   }
258

259
   if (waste) {
260
      n = CALLOC_STRUCT(radeon_bo_va_hole);
261
      n->size = waste;
262
      n->offset = offset;
263
      list_add(&n->list, &heap->holes);
264
   }
265
   offset += waste;
266
   heap->start += size + waste;
267
   mtx_unlock(&heap->mutex);
268
   return offset;
269
}
270

271
static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
272
                                       uint64_t size, uint64_t alignment)
273
{
274
   uint64_t va = 0;
275

276
   /* Try to allocate from the 64-bit address space first.
277
    * If it doesn't exist (start = 0) or if it doesn't have enough space,
278
    * fall back to the 32-bit address space.
279
    */
280
   if (ws->vm64.start)
281
      va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
282
   if (!va)
283
      va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
284
   return va;
285
}
286

287
static void radeon_bomgr_free_va(const struct radeon_info *info,
288
                                 struct radeon_vm_heap *heap,
289
                                 uint64_t va, uint64_t size)
290
{
291
   struct radeon_bo_va_hole *hole = NULL;
292

293
   size = align(size, info->gart_page_size);
294

295
   mtx_lock(&heap->mutex);
296
   if ((va + size) == heap->start) {
297
      heap->start = va;
298
      /* Delete uppermost hole if it reaches the new top */
299
      if (!list_is_empty(&heap->holes)) {
300
         hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
301
         if ((hole->offset + hole->size) == va) {
302
            heap->start = hole->offset;
303
            list_del(&hole->list);
304
            FREE(hole);
305
         }
306
      }
307
   } else {
308
      struct radeon_bo_va_hole *next;
309

310
      hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
311
      LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
312
         if (next->offset < va)
313
            break;
314
         hole = next;
315
      }
316

317
      if (&hole->list != &heap->holes) {
318
         /* Grow upper hole if it's adjacent */
319
         if (hole->offset == (va + size)) {
320
            hole->offset = va;
321
            hole->size += size;
322
            /* Merge lower hole if it's adjacent */
323
            if (next != hole && &next->list != &heap->holes &&
324
                (next->offset + next->size) == va) {
325
               next->size += hole->size;
326
               list_del(&hole->list);
327
               FREE(hole);
328
            }
329
            goto out;
330
         }
331
      }
332

333
      /* Grow lower hole if it's adjacent */
334
      if (next != hole && &next->list != &heap->holes &&
335
          (next->offset + next->size) == va) {
336
         next->size += size;
337
         goto out;
338
      }
339

340
      /* FIXME on allocation failure we just lose virtual address space
341
       * maybe print a warning
342
       */
343
      next = CALLOC_STRUCT(radeon_bo_va_hole);
344
      if (next) {
345
         next->size = size;
346
         next->offset = va;
347
         list_add(&next->list, &hole->list);
348
      }
349
   }
350
out:
351
   mtx_unlock(&heap->mutex);
352
}
353

354
void radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)
355
{
356
   struct radeon_bo *bo = radeon_bo(_buf);
357
   struct radeon_drm_winsys *rws = bo->rws;
358
   struct drm_gem_close args;
359

360
   assert(bo->handle && "must not be called for slab entries");
361

362
   memset(&args, 0, sizeof(args));
363

364
   mtx_lock(&rws->bo_handles_mutex);
365
   _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
366
   if (bo->flink_name) {
367
      _mesa_hash_table_remove_key(rws->bo_names,
368
                                  (void*)(uintptr_t)bo->flink_name);
369
   }
370
   mtx_unlock(&rws->bo_handles_mutex);
371

372
   if (bo->u.real.ptr)
373
      os_munmap(bo->u.real.ptr, bo->base.size);
374

375
   if (rws->info.r600_has_virtual_memory) {
376
      if (rws->va_unmap_working) {
377
         struct drm_radeon_gem_va va;
378

379
         va.handle = bo->handle;
380
         va.vm_id = 0;
381
         va.operation = RADEON_VA_UNMAP;
382
         va.flags = RADEON_VM_PAGE_READABLE |
383
                    RADEON_VM_PAGE_WRITEABLE |
384
                    RADEON_VM_PAGE_SNOOPED;
385
         va.offset = bo->va;
386

387
         if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
388
                                 sizeof(va)) != 0 &&
389
             va.operation == RADEON_VA_RESULT_ERROR) {
390
            fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
391
            fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
392
            fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
393
         }
394
      }
395

396
      radeon_bomgr_free_va(&rws->info,
397
                           bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
398
                           bo->va, bo->base.size);
399
   }
400

401
   /* Close object. */
402
   args.handle = bo->handle;
403
   drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
404

405
   mtx_destroy(&bo->u.real.map_mutex);
406

407
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
408
      rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
409
   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
410
      rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
411

412
   if (bo->u.real.map_count >= 1) {
413
      if (bo->initial_domain & RADEON_DOMAIN_VRAM)
414
         bo->rws->mapped_vram -= bo->base.size;
415
      else
416
         bo->rws->mapped_gtt -= bo->base.size;
417
      bo->rws->num_mapped_buffers--;
418
   }
419

420
   FREE(bo);
421
}
422

423
static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)
424
{
425
   struct radeon_bo *bo = radeon_bo(_buf);
426

427
   assert(bo->handle && "must not be called for slab entries");
428

429
   if (bo->u.real.use_reusable_pool)
430
      pb_cache_add_buffer(&bo->u.real.cache_entry);
431
   else
432
      radeon_bo_destroy(NULL, _buf);
433
}
434

435
void *radeon_bo_do_map(struct radeon_bo *bo)
436
{
437
   struct drm_radeon_gem_mmap args = {0};
438
   void *ptr;
439
   unsigned offset;
440

441
   /* If the buffer is created from user memory, return the user pointer. */
442
   if (bo->user_ptr)
443
      return bo->user_ptr;
444

445
   if (bo->handle) {
446
      offset = 0;
447
   } else {
448
      offset = bo->va - bo->u.slab.real->va;
449
      bo = bo->u.slab.real;
450
   }
451

452
   /* Map the buffer. */
453
   mtx_lock(&bo->u.real.map_mutex);
454
   /* Return the pointer if it's already mapped. */
455
   if (bo->u.real.ptr) {
456
      bo->u.real.map_count++;
457
      mtx_unlock(&bo->u.real.map_mutex);
458
      return (uint8_t*)bo->u.real.ptr + offset;
459
   }
460
   args.handle = bo->handle;
461
   args.offset = 0;
462
   args.size = (uint64_t)bo->base.size;
463
   if (drmCommandWriteRead(bo->rws->fd,
464
                           DRM_RADEON_GEM_MMAP,
465
                           &args,
466
                           sizeof(args))) {
467
      mtx_unlock(&bo->u.real.map_mutex);
468
      fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
469
              bo, bo->handle);
470
      return NULL;
471
   }
472

473
   ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
474
                 bo->rws->fd, args.addr_ptr);
475
   if (ptr == MAP_FAILED) {
476
      /* Clear the cache and try again. */
477
      pb_cache_release_all_buffers(&bo->rws->bo_cache);
478

479
      ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
480
                    bo->rws->fd, args.addr_ptr);
481
      if (ptr == MAP_FAILED) {
482
         mtx_unlock(&bo->u.real.map_mutex);
483
         fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
484
         return NULL;
485
      }
486
   }
487
   bo->u.real.ptr = ptr;
488
   bo->u.real.map_count = 1;
489

490
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
491
      bo->rws->mapped_vram += bo->base.size;
492
   else
493
      bo->rws->mapped_gtt += bo->base.size;
494
   bo->rws->num_mapped_buffers++;
495

496
   mtx_unlock(&bo->u.real.map_mutex);
497
   return (uint8_t*)bo->u.real.ptr + offset;
498
}
499

500
static void *radeon_bo_map(struct radeon_winsys *rws,
501
                           struct pb_buffer *buf,
502
                           struct radeon_cmdbuf *rcs,
503
                           enum pipe_map_flags usage)
504
{
505
   struct radeon_bo *bo = (struct radeon_bo*)buf;
506
   struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
507

508
   /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
509
   if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
510
      /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
511
      if (usage & PIPE_MAP_DONTBLOCK) {
512
         if (!(usage & PIPE_MAP_WRITE)) {
513
            /* Mapping for read.
514
             *
515
             * Since we are mapping for read, we don't need to wait
516
             * if the GPU is using the buffer for read too
517
             * (neither one is changing it).
518
             *
519
             * Only check whether the buffer is being used for write. */
520
            if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
521
               cs->flush_cs(cs->flush_data,
522
                            RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
523
               return NULL;
524
            }
525

526
            if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
527
                                RADEON_USAGE_WRITE)) {
528
               return NULL;
529
            }
530
         } else {
531
            if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
532
               cs->flush_cs(cs->flush_data,
533
                            RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
534
               return NULL;
535
            }
536

537
            if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
538
                                RADEON_USAGE_READWRITE)) {
539
               return NULL;
540
            }
541
         }
542
      } else {
543
         uint64_t time = os_time_get_nano();
544

545
         if (!(usage & PIPE_MAP_WRITE)) {
546
            /* Mapping for read.
547
             *
548
             * Since we are mapping for read, we don't need to wait
549
             * if the GPU is using the buffer for read too
550
             * (neither one is changing it).
551
             *
552
             * Only check whether the buffer is being used for write. */
553
            if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
554
               cs->flush_cs(cs->flush_data,
555
                            RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
556
            }
557
            radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
558
                           RADEON_USAGE_WRITE);
559
         } else {
560
            /* Mapping for write. */
561
            if (cs) {
562
               if (radeon_bo_is_referenced_by_cs(cs, bo)) {
563
                  cs->flush_cs(cs->flush_data,
564
                               RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
565
               } else {
566
                  /* Try to avoid busy-waiting in radeon_bo_wait. */
567
                  if (p_atomic_read(&bo->num_active_ioctls))
568
                     radeon_drm_cs_sync_flush(rcs);
569
               }
570
            }
571

572
            radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
573
                           RADEON_USAGE_READWRITE);
574
         }
575

576
         bo->rws->buffer_wait_time += os_time_get_nano() - time;
577
      }
578
   }
579

580
   return radeon_bo_do_map(bo);
581
}
582

583
static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)
584
{
585
   struct radeon_bo *bo = (struct radeon_bo*)_buf;
586

587
   if (bo->user_ptr)
588
      return;
589

590
   if (!bo->handle)
591
      bo = bo->u.slab.real;
592

593
   mtx_lock(&bo->u.real.map_mutex);
594
   if (!bo->u.real.ptr) {
595
      mtx_unlock(&bo->u.real.map_mutex);
596
      return; /* it's not been mapped */
597
   }
598

599
   assert(bo->u.real.map_count);
600
   if (--bo->u.real.map_count) {
601
      mtx_unlock(&bo->u.real.map_mutex);
602
      return; /* it's been mapped multiple times */
603
   }
604

605
   os_munmap(bo->u.real.ptr, bo->base.size);
606
   bo->u.real.ptr = NULL;
607

608
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
609
      bo->rws->mapped_vram -= bo->base.size;
610
   else
611
      bo->rws->mapped_gtt -= bo->base.size;
612
   bo->rws->num_mapped_buffers--;
613

614
   mtx_unlock(&bo->u.real.map_mutex);
615
}
616

617
static const struct pb_vtbl radeon_bo_vtbl = {
618
   radeon_bo_destroy_or_cache
619
   /* other functions are never called */
620
};
621

622
static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
623
                                          unsigned size, unsigned alignment,
624
                                          unsigned initial_domains,
625
                                          unsigned flags,
626
                                          int heap)
627
{
628
   struct radeon_bo *bo;
629
   struct drm_radeon_gem_create args;
630
   int r;
631

632
   memset(&args, 0, sizeof(args));
633

634
   assert(initial_domains);
635
   assert((initial_domains &
636
           ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
637

638
   args.size = size;
639
   args.alignment = alignment;
640
   args.initial_domain = initial_domains;
641
   args.flags = 0;
642

643
   /* If VRAM is just stolen system memory, allow both VRAM and
644
    * GTT, whichever has free space. If a buffer is evicted from
645
    * VRAM to GTT, it will stay there.
646
    */
647
   if (!rws->info.has_dedicated_vram)
648
      args.initial_domain |= RADEON_DOMAIN_GTT;
649

650
   if (flags & RADEON_FLAG_GTT_WC)
651
      args.flags |= RADEON_GEM_GTT_WC;
652
   if (flags & RADEON_FLAG_NO_CPU_ACCESS)
653
      args.flags |= RADEON_GEM_NO_CPU_ACCESS;
654

655
   if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
656
                           &args, sizeof(args))) {
657
      fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
658
      fprintf(stderr, "radeon:    size      : %u bytes\n", size);
659
      fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
660
      fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
661
      fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
662
      return NULL;
663
   }
664

665
   assert(args.handle != 0);
666

667
   bo = CALLOC_STRUCT(radeon_bo);
668
   if (!bo)
669
      return NULL;
670

671
   pipe_reference_init(&bo->base.reference, 1);
672
   bo->base.alignment_log2 = util_logbase2(alignment);
673
   bo->base.usage = 0;
674
   bo->base.size = size;
675
   bo->base.vtbl = &radeon_bo_vtbl;
676
   bo->rws = rws;
677
   bo->handle = args.handle;
678
   bo->va = 0;
679
   bo->initial_domain = initial_domains;
680
   bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
681
   (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
682

683
   if (heap >= 0) {
684
      pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
685
                          heap);
686
   }
687

688
   if (rws->info.r600_has_virtual_memory) {
689
      struct drm_radeon_gem_va va;
690
      unsigned va_gap_size;
691

692
      va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
693

694
      if (flags & RADEON_FLAG_32BIT) {
695
         bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
696
                                       size + va_gap_size, alignment);
697
         assert(bo->va + size < rws->vm32.end);
698
      } else {
699
         bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
700
      }
701

702
      va.handle = bo->handle;
703
      va.vm_id = 0;
704
      va.operation = RADEON_VA_MAP;
705
      va.flags = RADEON_VM_PAGE_READABLE |
706
                 RADEON_VM_PAGE_WRITEABLE |
707
                 RADEON_VM_PAGE_SNOOPED;
708
      va.offset = bo->va;
709
      r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
710
      if (r && va.operation == RADEON_VA_RESULT_ERROR) {
711
         fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
712
         fprintf(stderr, "radeon:    size      : %d bytes\n", size);
713
         fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
714
         fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
715
         fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
716
         radeon_bo_destroy(NULL, &bo->base);
717
         return NULL;
718
      }
719
      mtx_lock(&rws->bo_handles_mutex);
720
      if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
721
         struct pb_buffer *b = &bo->base;
722
         struct radeon_bo *old_bo =
723
               _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
724

725
         mtx_unlock(&rws->bo_handles_mutex);
726
         pb_reference(&b, &old_bo->base);
727
         return radeon_bo(b);
728
      }
729

730
      _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
731
      mtx_unlock(&rws->bo_handles_mutex);
732
   }
733

734
   if (initial_domains & RADEON_DOMAIN_VRAM)
735
      rws->allocated_vram += align(size, rws->info.gart_page_size);
736
   else if (initial_domains & RADEON_DOMAIN_GTT)
737
      rws->allocated_gtt += align(size, rws->info.gart_page_size);
738

739
   return bo;
740
}
741

742
bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)
743
{
744
   struct radeon_bo *bo = radeon_bo(_buf);
745

746
   if (radeon_bo_is_referenced_by_any_cs(bo))
747
      return false;
748

749
   return radeon_bo_wait(winsys, _buf, 0, RADEON_USAGE_READWRITE);
750
}
751

752
bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
753
{
754
   struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
755

756
   return radeon_bo_can_reclaim(NULL, &bo->base);
757
}
758

759
static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)
760
{
761
   struct radeon_bo *bo = radeon_bo(_buf);
762

763
   assert(!bo->handle);
764

765
   pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
766
}
767

768
static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
769
   radeon_bo_slab_destroy
770
   /* other functions are never called */
771
};
772

773
struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
774
                                     unsigned entry_size,
775
                                     unsigned group_index)
776
{
777
   struct radeon_drm_winsys *ws = priv;
778
   struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
779
   enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
780
   enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
781
   unsigned base_hash;
782

783
   if (!slab)
784
      return NULL;
785

786
   slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
787
                                                    64 * 1024, 64 * 1024,
788
                                                    domains, flags));
789
   if (!slab->buffer)
790
      goto fail;
791

792
   assert(slab->buffer->handle);
793

794
   slab->base.num_entries = slab->buffer->base.size / entry_size;
795
   slab->base.num_free = slab->base.num_entries;
796
   slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
797
   if (!slab->entries)
798
      goto fail_buffer;
799

800
   list_inithead(&slab->base.free);
801

802
   base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
803

804
   for (unsigned i = 0; i < slab->base.num_entries; ++i) {
805
      struct radeon_bo *bo = &slab->entries[i];
806

807
      bo->base.alignment_log2 = util_logbase2(entry_size);
808
      bo->base.usage = slab->buffer->base.usage;
809
      bo->base.size = entry_size;
810
      bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
811
      bo->rws = ws;
812
      bo->va = slab->buffer->va + i * entry_size;
813
      bo->initial_domain = domains;
814
      bo->hash = base_hash + i;
815
      bo->u.slab.entry.slab = &slab->base;
816
      bo->u.slab.entry.group_index = group_index;
817
      bo->u.slab.entry.entry_size = entry_size;
818
      bo->u.slab.real = slab->buffer;
819

820
      list_addtail(&bo->u.slab.entry.head, &slab->base.free);
821
   }
822

823
   return &slab->base;
824

825
fail_buffer:
826
   radeon_ws_bo_reference(&slab->buffer, NULL);
827
fail:
828
   FREE(slab);
829
   return NULL;
830
}
831

832
void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
833
{
834
   struct radeon_slab *slab = (struct radeon_slab *)pslab;
835

836
   for (unsigned i = 0; i < slab->base.num_entries; ++i) {
837
      struct radeon_bo *bo = &slab->entries[i];
838
      for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
839
         radeon_ws_bo_reference(&bo->u.slab.fences[j], NULL);
840
      FREE(bo->u.slab.fences);
841
   }
842

843
   FREE(slab->entries);
844
   radeon_ws_bo_reference(&slab->buffer, NULL);
845
   FREE(slab);
846
}
847

848
static unsigned eg_tile_split(unsigned tile_split)
849
{
850
   switch (tile_split) {
851
   case 0:     tile_split = 64;    break;
852
   case 1:     tile_split = 128;   break;
853
   case 2:     tile_split = 256;   break;
854
   case 3:     tile_split = 512;   break;
855
   default:
856
   case 4:     tile_split = 1024;  break;
857
   case 5:     tile_split = 2048;  break;
858
   case 6:     tile_split = 4096;  break;
859
   }
860
   return tile_split;
861
}
862

863
static unsigned eg_tile_split_rev(unsigned eg_tile_split)
864
{
865
   switch (eg_tile_split) {
866
   case 64:    return 0;
867
   case 128:   return 1;
868
   case 256:   return 2;
869
   case 512:   return 3;
870
   default:
871
   case 1024:  return 4;
872
   case 2048:  return 5;
873
   case 4096:  return 6;
874
   }
875
}
876

877
static void radeon_bo_get_metadata(struct radeon_winsys *rws,
878
                                   struct pb_buffer *_buf,
879
                                   struct radeon_bo_metadata *md,
880
                                   struct radeon_surf *surf)
881
{
882
   struct radeon_bo *bo = radeon_bo(_buf);
883
   struct drm_radeon_gem_set_tiling args;
884

885
   assert(bo->handle && "must not be called for slab entries");
886

887
   memset(&args, 0, sizeof(args));
888

889
   args.handle = bo->handle;
890

891
   drmCommandWriteRead(bo->rws->fd,
892
                       DRM_RADEON_GEM_GET_TILING,
893
                       &args,
894
                       sizeof(args));
895

896
   if (surf) {
897
      if (args.tiling_flags & RADEON_TILING_MACRO)
898
         md->mode = RADEON_SURF_MODE_2D;
899
      else if (args.tiling_flags & RADEON_TILING_MICRO)
900
         md->mode = RADEON_SURF_MODE_1D;
901
      else
902
         md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
903

904
      surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
905
      surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
906
      surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
907
      surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
908
      surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
909

910
      if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
911
         surf->flags |= RADEON_SURF_SCANOUT;
912
      else
913
         surf->flags &= ~RADEON_SURF_SCANOUT;
914
      return;
915
   }
916

917
   md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
918
   md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
919
   if (args.tiling_flags & RADEON_TILING_MICRO)
920
      md->u.legacy.microtile = RADEON_LAYOUT_TILED;
921
   else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
922
      md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
923

924
   if (args.tiling_flags & RADEON_TILING_MACRO)
925
      md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
926

927
   md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
928
   md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
929
   md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
930
   md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
931
   md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
932
   md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
933
}
934

935
static void radeon_bo_set_metadata(struct radeon_winsys *rws,
936
                                   struct pb_buffer *_buf,
937
                                   struct radeon_bo_metadata *md,
938
                                   struct radeon_surf *surf)
939
{
940
   struct radeon_bo *bo = radeon_bo(_buf);
941
   struct drm_radeon_gem_set_tiling args;
942

943
   assert(bo->handle && "must not be called for slab entries");
944

945
   memset(&args, 0, sizeof(args));
946

947
   os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
948

949
   if (surf) {
950
      if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
951
         args.tiling_flags |= RADEON_TILING_MICRO;
952
      if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
953
         args.tiling_flags |= RADEON_TILING_MACRO;
954

955
      args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
956
                           RADEON_TILING_EG_BANKW_SHIFT;
957
      args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
958
                           RADEON_TILING_EG_BANKH_SHIFT;
959
      if (surf->u.legacy.tile_split) {
960
         args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
961
                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
962
                              RADEON_TILING_EG_TILE_SPLIT_SHIFT;
963
      }
964
      args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
965
                           RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
966

967
      if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
968
         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
969

970
      args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
971
   } else {
972
      if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
973
         args.tiling_flags |= RADEON_TILING_MICRO;
974
      else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
975
         args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
976

977
      if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
978
         args.tiling_flags |= RADEON_TILING_MACRO;
979

980
      args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
981
                           RADEON_TILING_EG_BANKW_SHIFT;
982
      args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
983
                           RADEON_TILING_EG_BANKH_SHIFT;
984
      if (md->u.legacy.tile_split) {
985
         args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
986
                               RADEON_TILING_EG_TILE_SPLIT_MASK) <<
987
                              RADEON_TILING_EG_TILE_SPLIT_SHIFT;
988
      }
989
      args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
990
                           RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
991

992
      if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
993
         args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
994

995
      args.pitch = md->u.legacy.stride;
996
   }
997

998
   args.handle = bo->handle;
999

1000
   drmCommandWriteRead(bo->rws->fd,
1001
                       DRM_RADEON_GEM_SET_TILING,
1002
                       &args,
1003
                       sizeof(args));
1004
}
1005

1006
static struct pb_buffer *
1007
radeon_winsys_bo_create(struct radeon_winsys *rws,
1008
                        uint64_t size,
1009
                        unsigned alignment,
1010
                        enum radeon_bo_domain domain,
1011
                        enum radeon_bo_flag flags)
1012
{
1013
   struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1014
   struct radeon_bo *bo;
1015
   int heap = -1;
1016

1017
   assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
1018

1019
   /* Only 32-bit sizes are supported. */
1020
   if (size > UINT_MAX)
1021
      return NULL;
1022

1023
   /* VRAM implies WC. This is not optional. */
1024
   if (domain & RADEON_DOMAIN_VRAM)
1025
      flags |= RADEON_FLAG_GTT_WC;
1026
   /* NO_CPU_ACCESS is valid with VRAM only. */
1027
   if (domain != RADEON_DOMAIN_VRAM)
1028
      flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
1029

1030
   /* Sub-allocate small buffers from slabs. */
1031
   if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
1032
       size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1033
       ws->info.r600_has_virtual_memory &&
1034
       alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1035
      struct pb_slab_entry *entry;
1036
      int heap = radeon_get_heap_index(domain, flags);
1037

1038
      if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1039
         goto no_slab;
1040

1041
      entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1042
      if (!entry) {
1043
         /* Clear the cache and try again. */
1044
         pb_cache_release_all_buffers(&ws->bo_cache);
1045

1046
         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1047
      }
1048
      if (!entry)
1049
         return NULL;
1050

1051
      bo = container_of(entry, struct radeon_bo, u.slab.entry);
1052

1053
      pipe_reference_init(&bo->base.reference, 1);
1054

1055
      return &bo->base;
1056
   }
1057
no_slab:
1058

1059
   /* This flag is irrelevant for the cache. */
1060
   flags &= ~RADEON_FLAG_NO_SUBALLOC;
1061

1062
   /* Align size to page size. This is the minimum alignment for normal
1063
    * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1064
    * like constant/uniform buffers, can benefit from better and more reuse.
1065
    */
1066
   size = align(size, ws->info.gart_page_size);
1067
   alignment = align(alignment, ws->info.gart_page_size);
1068

1069
   bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1070

1071
   /* Shared resources don't use cached heaps. */
1072
   if (use_reusable_pool) {
1073
      heap = radeon_get_heap_index(domain, flags);
1074
      assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1075

1076
      bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1077
                                             0, heap));
1078
      if (bo)
1079
         return &bo->base;
1080
   }
1081

1082
   bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1083
   if (!bo) {
1084
      /* Clear the cache and try again. */
1085
      if (ws->info.r600_has_virtual_memory)
1086
         pb_slabs_reclaim(&ws->bo_slabs);
1087
      pb_cache_release_all_buffers(&ws->bo_cache);
1088
      bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1089
      if (!bo)
1090
         return NULL;
1091
   }
1092

1093
   bo->u.real.use_reusable_pool = use_reusable_pool;
1094

1095
   mtx_lock(&ws->bo_handles_mutex);
1096
   _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1097
   mtx_unlock(&ws->bo_handles_mutex);
1098

1099
   return &bo->base;
1100
}
1101

1102
static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1103
                                                   void *pointer, uint64_t size)
1104
{
1105
   struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1106
   struct drm_radeon_gem_userptr args;
1107
   struct radeon_bo *bo;
1108
   int r;
1109

1110
   bo = CALLOC_STRUCT(radeon_bo);
1111
   if (!bo)
1112
      return NULL;
1113

1114
   memset(&args, 0, sizeof(args));
1115
   args.addr = (uintptr_t)pointer;
1116
   args.size = align(size, ws->info.gart_page_size);
1117
   args.flags = RADEON_GEM_USERPTR_ANONONLY |
1118
                RADEON_GEM_USERPTR_VALIDATE |
1119
                RADEON_GEM_USERPTR_REGISTER;
1120
   if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1121
                           &args, sizeof(args))) {
1122
      FREE(bo);
1123
      return NULL;
1124
   }
1125

1126
   assert(args.handle != 0);
1127

1128
   mtx_lock(&ws->bo_handles_mutex);
1129

1130
   /* Initialize it. */
1131
   pipe_reference_init(&bo->base.reference, 1);
1132
   bo->handle = args.handle;
1133
   bo->base.alignment_log2 = 0;
1134
   bo->base.size = size;
1135
   bo->base.vtbl = &radeon_bo_vtbl;
1136
   bo->rws = ws;
1137
   bo->user_ptr = pointer;
1138
   bo->va = 0;
1139
   bo->initial_domain = RADEON_DOMAIN_GTT;
1140
   bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1141
   (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1142

1143
   _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1144

1145
   mtx_unlock(&ws->bo_handles_mutex);
1146

1147
   if (ws->info.r600_has_virtual_memory) {
1148
      struct drm_radeon_gem_va va;
1149

1150
      bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1151

1152
      va.handle = bo->handle;
1153
      va.operation = RADEON_VA_MAP;
1154
      va.vm_id = 0;
1155
      va.offset = bo->va;
1156
      va.flags = RADEON_VM_PAGE_READABLE |
1157
                 RADEON_VM_PAGE_WRITEABLE |
1158
                 RADEON_VM_PAGE_SNOOPED;
1159
      va.offset = bo->va;
1160
      r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1161
      if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1162
         fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1163
         radeon_bo_destroy(NULL, &bo->base);
1164
         return NULL;
1165
      }
1166
      mtx_lock(&ws->bo_handles_mutex);
1167
      if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1168
         struct pb_buffer *b = &bo->base;
1169
         struct radeon_bo *old_bo =
1170
               _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1171

1172
         mtx_unlock(&ws->bo_handles_mutex);
1173
         pb_reference(&b, &old_bo->base);
1174
         return b;
1175
      }
1176

1177
      _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1178
      mtx_unlock(&ws->bo_handles_mutex);
1179
   }
1180

1181
   ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1182

1183
   return (struct pb_buffer*)bo;
1184
}
1185

1186
static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1187
                                                      struct winsys_handle *whandle,
1188
                                                      unsigned vm_alignment)
1189
{
1190
   struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1191
   struct radeon_bo *bo;
1192
   int r;
1193
   unsigned handle;
1194
   uint64_t size = 0;
1195

1196
   /* We must maintain a list of pairs <handle, bo>, so that we always return
1197
    * the same BO for one particular handle. If we didn't do that and created
1198
    * more than one BO for the same handle and then relocated them in a CS,
1199
    * we would hit a deadlock in the kernel.
1200
    *
1201
    * The list of pairs is guarded by a mutex, of course. */
1202
   mtx_lock(&ws->bo_handles_mutex);
1203

1204
   if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1205
      /* First check if there already is an existing bo for the handle. */
1206
      bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1207
   } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1208
      /* We must first get the GEM handle, as fds are unreliable keys */
1209
      r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1210
      if (r)
1211
         goto fail;
1212
      bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1213
   } else {
1214
      /* Unknown handle type */
1215
      goto fail;
1216
   }
1217

1218
   if (bo) {
1219
      /* Increase the refcount. */
1220
      struct pb_buffer *b = NULL;
1221
      pb_reference(&b, &bo->base);
1222
      goto done;
1223
   }
1224

1225
   /* There isn't, create a new one. */
1226
   bo = CALLOC_STRUCT(radeon_bo);
1227
   if (!bo) {
1228
      goto fail;
1229
   }
1230

1231
   if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1232
      struct drm_gem_open open_arg = {};
1233
      memset(&open_arg, 0, sizeof(open_arg));
1234
      /* Open the BO. */
1235
      open_arg.name = whandle->handle;
1236
      if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1237
         FREE(bo);
1238
         goto fail;
1239
      }
1240
      handle = open_arg.handle;
1241
      size = open_arg.size;
1242
      bo->flink_name = whandle->handle;
1243
   } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1244
      size = lseek(whandle->handle, 0, SEEK_END);
1245
      /*
1246
       * Could check errno to determine whether the kernel is new enough, but
1247
       * it doesn't really matter why this failed, just that it failed.
1248
       */
1249
      if (size == (off_t)-1) {
1250
         FREE(bo);
1251
         goto fail;
1252
      }
1253
      lseek(whandle->handle, 0, SEEK_SET);
1254
   }
1255

1256
   assert(handle != 0);
1257

1258
   bo->handle = handle;
1259

1260
   /* Initialize it. */
1261
   pipe_reference_init(&bo->base.reference, 1);
1262
   bo->base.alignment_log2 = 0;
1263
   bo->base.size = (unsigned) size;
1264
   bo->base.vtbl = &radeon_bo_vtbl;
1265
   bo->rws = ws;
1266
   bo->va = 0;
1267
   bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1268
   (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1269

1270
   if (bo->flink_name)
1271
      _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1272

1273
   _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1274

1275
done:
1276
   mtx_unlock(&ws->bo_handles_mutex);
1277

1278
   if (ws->info.r600_has_virtual_memory && !bo->va) {
1279
      struct drm_radeon_gem_va va;
1280

1281
      bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1282

1283
      va.handle = bo->handle;
1284
      va.operation = RADEON_VA_MAP;
1285
      va.vm_id = 0;
1286
      va.offset = bo->va;
1287
      va.flags = RADEON_VM_PAGE_READABLE |
1288
                 RADEON_VM_PAGE_WRITEABLE |
1289
                 RADEON_VM_PAGE_SNOOPED;
1290
      va.offset = bo->va;
1291
      r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1292
      if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1293
         fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1294
         radeon_bo_destroy(NULL, &bo->base);
1295
         return NULL;
1296
      }
1297
      mtx_lock(&ws->bo_handles_mutex);
1298
      if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1299
         struct pb_buffer *b = &bo->base;
1300
         struct radeon_bo *old_bo =
1301
               _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1302

1303
         mtx_unlock(&ws->bo_handles_mutex);
1304
         pb_reference(&b, &old_bo->base);
1305
         return b;
1306
      }
1307

1308
      _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1309
      mtx_unlock(&ws->bo_handles_mutex);
1310
   }
1311

1312
   bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1313

1314
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1315
      ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1316
   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1317
      ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1318

1319
   return (struct pb_buffer*)bo;
1320

1321
fail:
1322
   mtx_unlock(&ws->bo_handles_mutex);
1323
   return NULL;
1324
}
1325

1326
static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1327
                                        struct pb_buffer *buffer,
1328
                                        struct winsys_handle *whandle)
1329
{
1330
   struct drm_gem_flink flink;
1331
   struct radeon_bo *bo = radeon_bo(buffer);
1332
   struct radeon_drm_winsys *ws = bo->rws;
1333

1334
   /* Don't allow exports of slab entries. */
1335
   if (!bo->handle)
1336
      return false;
1337

1338
   memset(&flink, 0, sizeof(flink));
1339

1340
   bo->u.real.use_reusable_pool = false;
1341

1342
   if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1343
      if (!bo->flink_name) {
1344
         flink.handle = bo->handle;
1345

1346
         if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1347
            return false;
1348
         }
1349

1350
         bo->flink_name = flink.name;
1351

1352
         mtx_lock(&ws->bo_handles_mutex);
1353
         _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1354
         mtx_unlock(&ws->bo_handles_mutex);
1355
      }
1356
      whandle->handle = bo->flink_name;
1357
   } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1358
      whandle->handle = bo->handle;
1359
   } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1360
      if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1361
         return false;
1362
   }
1363

1364
   return true;
1365
}
1366

1367
static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1368
{
1369
   return ((struct radeon_bo*)buf)->user_ptr != NULL;
1370
}
1371

1372
static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1373
{
1374
   return !((struct radeon_bo*)buf)->handle;
1375
}
1376

1377
static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1378
{
1379
   return ((struct radeon_bo*)buf)->va;
1380
}
1381

1382
static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1383
{
1384
   struct radeon_bo *bo = radeon_bo(buf);
1385

1386
   if (bo->handle)
1387
      return 0;
1388

1389
   return bo->va - bo->u.slab.real->va;
1390
}
1391

1392
void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1393
{
1394
   ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1395
   ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1396
   ws->base.buffer_map = radeon_bo_map;
1397
   ws->base.buffer_unmap = radeon_bo_unmap;
1398
   ws->base.buffer_wait = radeon_bo_wait;
1399
   ws->base.buffer_create = radeon_winsys_bo_create;
1400
   ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1401
   ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1402
   ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1403
   ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1404
   ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1405
   ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1406
   ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1407
   ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1408
}
1409

1410
Product

Resources

Company