CoCalc -- nouveau

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nouveau_buffer.c
⁴⁵⁷⁰ views
1

2
#include "util/u_inlines.h"
3
#include "util/u_memory.h"
4
#include "util/u_math.h"
5
#include "util/u_surface.h"
6

7
#include "nouveau_screen.h"
8
#include "nouveau_context.h"
9
#include "nouveau_winsys.h"
10
#include "nouveau_fence.h"
11
#include "nouveau_buffer.h"
12
#include "nouveau_mm.h"
13

14
struct nouveau_transfer {
15
   struct pipe_transfer base;
16

17
   uint8_t *map;
18
   struct nouveau_bo *bo;
19
   struct nouveau_mm_allocation *mm;
20
   uint32_t offset;
21
};
22

23
static void *
24
nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
25
                              struct pipe_resource *resource,
26
                              unsigned level, unsigned usage,
27
                              const struct pipe_box *box,
28
                              struct pipe_transfer **ptransfer);
29

30
static void
31
nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
32
                                struct pipe_transfer *transfer);
33

34
static inline struct nouveau_transfer *
35
nouveau_transfer(struct pipe_transfer *transfer)
36
{
37
   return (struct nouveau_transfer *)transfer;
38
}
39

40
static inline bool
41
nouveau_buffer_malloc(struct nv04_resource *buf)
42
{
43
   if (!buf->data)
44
      buf->data = align_malloc(buf->base.width0, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
45
   return !!buf->data;
46
}
47

48
static inline bool
49
nouveau_buffer_allocate(struct nouveau_screen *screen,
50
                        struct nv04_resource *buf, unsigned domain)
51
{
52
   uint32_t size = align(buf->base.width0, 0x100);
53

54
   if (domain == NOUVEAU_BO_VRAM) {
55
      buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
56
                                    &buf->bo, &buf->offset);
57
      if (!buf->bo)
58
         return nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_GART);
59
      NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_vid, buf->base.width0);
60
   } else
61
   if (domain == NOUVEAU_BO_GART) {
62
      buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
63
                                    &buf->bo, &buf->offset);
64
      if (!buf->bo)
65
         return false;
66
      NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
67
   } else {
68
      assert(domain == 0);
69
      if (!nouveau_buffer_malloc(buf))
70
         return false;
71
   }
72
   buf->domain = domain;
73
   if (buf->bo)
74
      buf->address = buf->bo->offset + buf->offset;
75

76
   util_range_set_empty(&buf->valid_buffer_range);
77

78
   return true;
79
}
80

81
static inline void
82
release_allocation(struct nouveau_mm_allocation **mm,
83
                   struct nouveau_fence *fence)
84
{
85
   nouveau_fence_work(fence, nouveau_mm_free_work, *mm);
86
   (*mm) = NULL;
87
}
88

89
inline void
90
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
91
{
92
   assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
93

94
   if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
95
      nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
96
      buf->bo = NULL;
97
   } else {
98
      nouveau_bo_ref(NULL, &buf->bo);
99
   }
100

101
   if (buf->mm)
102
      release_allocation(&buf->mm, buf->fence);
103

104
   if (buf->domain == NOUVEAU_BO_VRAM)
105
      NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_vid, -(uint64_t)buf->base.width0);
106
   if (buf->domain == NOUVEAU_BO_GART)
107
      NOUVEAU_DRV_STAT_RES(buf, buf_obj_current_bytes_sys, -(uint64_t)buf->base.width0);
108

109
   buf->domain = 0;
110
}
111

112
static inline bool
113
nouveau_buffer_reallocate(struct nouveau_screen *screen,
114
                          struct nv04_resource *buf, unsigned domain)
115
{
116
   nouveau_buffer_release_gpu_storage(buf);
117

118
   nouveau_fence_ref(NULL, &buf->fence);
119
   nouveau_fence_ref(NULL, &buf->fence_wr);
120

121
   buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
122

123
   return nouveau_buffer_allocate(screen, buf, domain);
124
}
125

126
void
127
nouveau_buffer_destroy(struct pipe_screen *pscreen,
128
                       struct pipe_resource *presource)
129
{
130
   struct nv04_resource *res = nv04_resource(presource);
131

132
   if (res->status & NOUVEAU_BUFFER_STATUS_USER_PTR) {
133
      FREE(res);
134
      return;
135
   }
136

137
   nouveau_buffer_release_gpu_storage(res);
138

139
   if (res->data && !(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY))
140
      align_free(res->data);
141

142
   nouveau_fence_ref(NULL, &res->fence);
143
   nouveau_fence_ref(NULL, &res->fence_wr);
144

145
   util_range_destroy(&res->valid_buffer_range);
146

147
   FREE(res);
148

149
   NOUVEAU_DRV_STAT(nouveau_screen(pscreen), buf_obj_current_count, -1);
150
}
151

152
/* Set up a staging area for the transfer. This is either done in "regular"
153
 * system memory if the driver supports push_data (nv50+) and the data is
154
 * small enough (and permit_pb == true), or in GART memory.
155
 */
156
static uint8_t *
157
nouveau_transfer_staging(struct nouveau_context *nv,
158
                         struct nouveau_transfer *tx, bool permit_pb)
159
{
160
   const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
161
   const unsigned size = align(tx->base.box.width, 4) + adj;
162

163
   if (!nv->push_data)
164
      permit_pb = false;
165

166
   if ((size <= nv->screen->transfer_pushbuf_threshold) && permit_pb) {
167
      tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
168
      if (tx->map)
169
         tx->map += adj;
170
   } else {
171
      tx->mm =
172
         nouveau_mm_allocate(nv->screen->mm_GART, size, &tx->bo, &tx->offset);
173
      if (tx->bo) {
174
         tx->offset += adj;
175
         if (!nouveau_bo_map(tx->bo, 0, NULL))
176
            tx->map = (uint8_t *)tx->bo->map + tx->offset;
177
      }
178
   }
179
   return tx->map;
180
}
181

182
/* Copies data from the resource into the transfer's temporary GART
183
 * buffer. Also updates buf->data if present.
184
 *
185
 * Maybe just migrate to GART right away if we actually need to do this. */
186
static bool
187
nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
188
{
189
   struct nv04_resource *buf = nv04_resource(tx->base.resource);
190
   const unsigned base = tx->base.box.x;
191
   const unsigned size = tx->base.box.width;
192

193
   NOUVEAU_DRV_STAT(nv->screen, buf_read_bytes_staging_vid, size);
194

195
   nv->copy_data(nv, tx->bo, tx->offset, NOUVEAU_BO_GART,
196
                 buf->bo, buf->offset + base, buf->domain, size);
197

198
   if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
199
      return false;
200

201
   if (buf->data)
202
      memcpy(buf->data + base, tx->map, size);
203

204
   return true;
205
}
206

207
static void
208
nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
209
                       unsigned offset, unsigned size)
210
{
211
   struct nv04_resource *buf = nv04_resource(tx->base.resource);
212
   uint8_t *data = tx->map + offset;
213
   const unsigned base = tx->base.box.x + offset;
214
   const bool can_cb = !((base | size) & 3);
215

216
   if (buf->data)
217
      memcpy(data, buf->data + base, size);
218
   else
219
      buf->status |= NOUVEAU_BUFFER_STATUS_DIRTY;
220

221
   if (buf->domain == NOUVEAU_BO_VRAM)
222
      NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_vid, size);
223
   if (buf->domain == NOUVEAU_BO_GART)
224
      NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_staging_sys, size);
225

226
   if (tx->bo)
227
      nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
228
                    tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
229
   else
230
   if (nv->push_cb && can_cb)
231
      nv->push_cb(nv, buf,
232
                  base, size / 4, (const uint32_t *)data);
233
   else
234
      nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
235

236
   nouveau_fence_ref(nv->screen->fence.current, &buf->fence);
237
   nouveau_fence_ref(nv->screen->fence.current, &buf->fence_wr);
238
}
239

240
/* Does a CPU wait for the buffer's backing data to become reliably accessible
241
 * for write/read by waiting on the buffer's relevant fences.
242
 */
243
static inline bool
244
nouveau_buffer_sync(struct nouveau_context *nv,
245
                    struct nv04_resource *buf, unsigned rw)
246
{
247
   if (rw == PIPE_MAP_READ) {
248
      if (!buf->fence_wr)
249
         return true;
250
      NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
251
                           !nouveau_fence_signalled(buf->fence_wr));
252
      if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
253
         return false;
254
   } else {
255
      if (!buf->fence)
256
         return true;
257
      NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
258
                           !nouveau_fence_signalled(buf->fence));
259
      if (!nouveau_fence_wait(buf->fence, &nv->debug))
260
         return false;
261

262
      nouveau_fence_ref(NULL, &buf->fence);
263
   }
264
   nouveau_fence_ref(NULL, &buf->fence_wr);
265

266
   return true;
267
}
268

269
static inline bool
270
nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
271
{
272
   if (rw == PIPE_MAP_READ)
273
      return (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr));
274
   else
275
      return (buf->fence && !nouveau_fence_signalled(buf->fence));
276
}
277

278
static inline void
279
nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
280
                             struct pipe_resource *resource,
281
                             const struct pipe_box *box,
282
                             unsigned usage)
283
{
284
   tx->base.resource = resource;
285
   tx->base.level = 0;
286
   tx->base.usage = usage;
287
   tx->base.box.x = box->x;
288
   tx->base.box.y = 0;
289
   tx->base.box.z = 0;
290
   tx->base.box.width = box->width;
291
   tx->base.box.height = 1;
292
   tx->base.box.depth = 1;
293
   tx->base.stride = 0;
294
   tx->base.layer_stride = 0;
295

296
   tx->bo = NULL;
297
   tx->map = NULL;
298
}
299

300
static inline void
301
nouveau_buffer_transfer_del(struct nouveau_context *nv,
302
                            struct nouveau_transfer *tx)
303
{
304
   if (tx->map) {
305
      if (likely(tx->bo)) {
306
         nouveau_fence_work(nv->screen->fence.current,
307
                            nouveau_fence_unref_bo, tx->bo);
308
         if (tx->mm)
309
            release_allocation(&tx->mm, nv->screen->fence.current);
310
      } else {
311
         align_free(tx->map -
312
                    (tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK));
313
      }
314
   }
315
}
316

317
/* Creates a cache in system memory of the buffer data. */
318
static bool
319
nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
320
{
321
   struct nouveau_transfer tx;
322
   bool ret;
323
   tx.base.resource = &buf->base;
324
   tx.base.box.x = 0;
325
   tx.base.box.width = buf->base.width0;
326
   tx.bo = NULL;
327
   tx.map = NULL;
328

329
   if (!buf->data)
330
      if (!nouveau_buffer_malloc(buf))
331
         return false;
332
   if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
333
      return true;
334
   nv->stats.buf_cache_count++;
335

336
   if (!nouveau_transfer_staging(nv, &tx, false))
337
      return false;
338

339
   ret = nouveau_transfer_read(nv, &tx);
340
   if (ret) {
341
      buf->status &= ~NOUVEAU_BUFFER_STATUS_DIRTY;
342
      memcpy(buf->data, tx.map, buf->base.width0);
343
   }
344
   nouveau_buffer_transfer_del(nv, &tx);
345
   return ret;
346
}
347

348

349
#define NOUVEAU_TRANSFER_DISCARD \
350
   (PIPE_MAP_DISCARD_RANGE | PIPE_MAP_DISCARD_WHOLE_RESOURCE)
351

352
/* Checks whether it is possible to completely discard the memory backing this
353
 * resource. This can be useful if we would otherwise have to wait for a read
354
 * operation to complete on this data.
355
 */
356
static inline bool
357
nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
358
{
359
   if (!(usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE))
360
      return false;
361
   if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
362
      return false;
363
   if (unlikely(usage & PIPE_MAP_PERSISTENT))
364
      return false;
365
   return buf->mm && nouveau_buffer_busy(buf, PIPE_MAP_WRITE);
366
}
367

368
/* Returns a pointer to a memory area representing a window into the
369
 * resource's data.
370
 *
371
 * This may or may not be the _actual_ memory area of the resource. However
372
 * when calling nouveau_buffer_transfer_unmap, if it wasn't the actual memory
373
 * area, the contents of the returned map are copied over to the resource.
374
 *
375
 * The usage indicates what the caller plans to do with the map:
376
 *
377
 *   WRITE means that the user plans to write to it
378
 *
379
 *   READ means that the user plans on reading from it
380
 *
381
 *   DISCARD_WHOLE_RESOURCE means that the whole resource is going to be
382
 *   potentially overwritten, and even if it isn't, the bits that aren't don't
383
 *   need to be maintained.
384
 *
385
 *   DISCARD_RANGE means that all the data in the specified range is going to
386
 *   be overwritten.
387
 *
388
 * The strategy for determining what kind of memory area to return is complex,
389
 * see comments inside of the function.
390
 */
391
void *
392
nouveau_buffer_transfer_map(struct pipe_context *pipe,
393
                            struct pipe_resource *resource,
394
                            unsigned level, unsigned usage,
395
                            const struct pipe_box *box,
396
                            struct pipe_transfer **ptransfer)
397
{
398
   struct nouveau_context *nv = nouveau_context(pipe);
399
   struct nv04_resource *buf = nv04_resource(resource);
400

401
   if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
402
      return nouveau_user_ptr_transfer_map(pipe, resource, level, usage, box, ptransfer);
403

404
   struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
405
   uint8_t *map;
406
   int ret;
407

408
   if (!tx)
409
      return NULL;
410
   nouveau_buffer_transfer_init(tx, resource, box, usage);
411
   *ptransfer = &tx->base;
412

413
   if (usage & PIPE_MAP_READ)
414
      NOUVEAU_DRV_STAT(nv->screen, buf_transfers_rd, 1);
415
   if (usage & PIPE_MAP_WRITE)
416
      NOUVEAU_DRV_STAT(nv->screen, buf_transfers_wr, 1);
417

418
   /* If we are trying to write to an uninitialized range, the user shouldn't
419
    * care what was there before. So we can treat the write as if the target
420
    * range were being discarded. Furthermore, since we know that even if this
421
    * buffer is busy due to GPU activity, because the contents were
422
    * uninitialized, the GPU can't care what was there, and so we can treat
423
    * the write as being unsynchronized.
424
    */
425
   if ((usage & PIPE_MAP_WRITE) &&
426
       !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width))
427
      usage |= PIPE_MAP_DISCARD_RANGE | PIPE_MAP_UNSYNCHRONIZED;
428

429
   if (buf->domain == NOUVEAU_BO_VRAM) {
430
      if (usage & NOUVEAU_TRANSFER_DISCARD) {
431
         /* Set up a staging area for the user to write to. It will be copied
432
          * back into VRAM on unmap. */
433
         if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
434
            buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
435
         nouveau_transfer_staging(nv, tx, true);
436
      } else {
437
         if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
438
            /* The GPU is currently writing to this buffer. Copy its current
439
             * contents to a staging area in the GART. This is necessary since
440
             * not the whole area being mapped is being discarded.
441
             */
442
            if (buf->data) {
443
               align_free(buf->data);
444
               buf->data = NULL;
445
            }
446
            nouveau_transfer_staging(nv, tx, false);
447
            nouveau_transfer_read(nv, tx);
448
         } else {
449
            /* The buffer is currently idle. Create a staging area for writes,
450
             * and make sure that the cached data is up-to-date. */
451
            if (usage & PIPE_MAP_WRITE)
452
               nouveau_transfer_staging(nv, tx, true);
453
            if (!buf->data)
454
               nouveau_buffer_cache(nv, buf);
455
         }
456
      }
457
      return buf->data ? (buf->data + box->x) : tx->map;
458
   } else
459
   if (unlikely(buf->domain == 0)) {
460
      return buf->data + box->x;
461
   }
462

463
   /* At this point, buf->domain == GART */
464

465
   if (nouveau_buffer_should_discard(buf, usage)) {
466
      int ref = buf->base.reference.count - 1;
467
      nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
468
      if (ref > 0) /* any references inside context possible ? */
469
         nv->invalidate_resource_storage(nv, &buf->base, ref);
470
   }
471

472
   /* Note that nouveau_bo_map ends up doing a nouveau_bo_wait with the
473
    * relevant flags. If buf->mm is set, that means this resource is part of a
474
    * larger slab bo that holds multiple resources. So in that case, don't
475
    * wait on the whole slab and instead use the logic below to return a
476
    * reasonable buffer for that case.
477
    */
478
   ret = nouveau_bo_map(buf->bo,
479
                        buf->mm ? 0 : nouveau_screen_transfer_flags(usage),
480
                        nv->client);
481
   if (ret) {
482
      FREE(tx);
483
      return NULL;
484
   }
485
   map = (uint8_t *)buf->bo->map + buf->offset + box->x;
486

487
   /* using kernel fences only if !buf->mm */
488
   if ((usage & PIPE_MAP_UNSYNCHRONIZED) || !buf->mm)
489
      return map;
490

491
   /* If the GPU is currently reading/writing this buffer, we shouldn't
492
    * interfere with its progress. So instead we either wait for the GPU to
493
    * complete its operation, or set up a staging area to perform our work in.
494
    */
495
   if (nouveau_buffer_busy(buf, usage & PIPE_MAP_READ_WRITE)) {
496
      if (unlikely(usage & (PIPE_MAP_DISCARD_WHOLE_RESOURCE |
497
                            PIPE_MAP_PERSISTENT))) {
498
         /* Discarding was not possible, must sync because
499
          * subsequent transfers might use UNSYNCHRONIZED. */
500
         nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
501
      } else
502
      if (usage & PIPE_MAP_DISCARD_RANGE) {
503
         /* The whole range is being discarded, so it doesn't matter what was
504
          * there before. No need to copy anything over. */
505
         nouveau_transfer_staging(nv, tx, true);
506
         map = tx->map;
507
      } else
508
      if (nouveau_buffer_busy(buf, PIPE_MAP_READ)) {
509
         if (usage & PIPE_MAP_DONTBLOCK)
510
            map = NULL;
511
         else
512
            nouveau_buffer_sync(nv, buf, usage & PIPE_MAP_READ_WRITE);
513
      } else {
514
         /* It is expected that the returned buffer be a representation of the
515
          * data in question, so we must copy it over from the buffer. */
516
         nouveau_transfer_staging(nv, tx, true);
517
         if (tx->map)
518
            memcpy(tx->map, map, box->width);
519
         map = tx->map;
520
      }
521
   }
522
   if (!map)
523
      FREE(tx);
524
   return map;
525
}
526

527

528

529
void
530
nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
531
                                     struct pipe_transfer *transfer,
532
                                     const struct pipe_box *box)
533
{
534
   struct nouveau_transfer *tx = nouveau_transfer(transfer);
535
   struct nv04_resource *buf = nv04_resource(transfer->resource);
536

537
   if (tx->map)
538
      nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
539

540
   util_range_add(&buf->base, &buf->valid_buffer_range,
541
                  tx->base.box.x + box->x,
542
                  tx->base.box.x + box->x + box->width);
543
}
544

545
/* Unmap stage of the transfer. If it was a WRITE transfer and the map that
546
 * was returned was not the real resource's data, this needs to transfer the
547
 * data back to the resource.
548
 *
549
 * Also marks vbo dirty based on the buffer's binding
550
 */
551
void
552
nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
553
                              struct pipe_transfer *transfer)
554
{
555
   struct nouveau_context *nv = nouveau_context(pipe);
556
   struct nv04_resource *buf = nv04_resource(transfer->resource);
557

558
   if (buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR)
559
      return nouveau_user_ptr_transfer_unmap(pipe, transfer);
560

561
   struct nouveau_transfer *tx = nouveau_transfer(transfer);
562

563
   if (tx->base.usage & PIPE_MAP_WRITE) {
564
      if (!(tx->base.usage & PIPE_MAP_FLUSH_EXPLICIT)) {
565
         if (tx->map)
566
            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
567

568
         util_range_add(&buf->base, &buf->valid_buffer_range,
569
                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
570
      }
571

572
      if (likely(buf->domain)) {
573
         const uint8_t bind = buf->base.bind;
574
         /* make sure we invalidate dedicated caches */
575
         if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
576
            nv->vbo_dirty = true;
577
      }
578
   }
579

580
   if (!tx->bo && (tx->base.usage & PIPE_MAP_WRITE))
581
      NOUVEAU_DRV_STAT(nv->screen, buf_write_bytes_direct, tx->base.box.width);
582

583
   nouveau_buffer_transfer_del(nv, tx);
584
   FREE(tx);
585
}
586

587

588
void
589
nouveau_copy_buffer(struct nouveau_context *nv,
590
                    struct nv04_resource *dst, unsigned dstx,
591
                    struct nv04_resource *src, unsigned srcx, unsigned size)
592
{
593
   assert(dst->base.target == PIPE_BUFFER && src->base.target == PIPE_BUFFER);
594

595
   assert(!(dst->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
596
   assert(!(src->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
597

598
   if (likely(dst->domain) && likely(src->domain)) {
599
      nv->copy_data(nv,
600
                    dst->bo, dst->offset + dstx, dst->domain,
601
                    src->bo, src->offset + srcx, src->domain, size);
602

603
      dst->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
604
      nouveau_fence_ref(nv->screen->fence.current, &dst->fence);
605
      nouveau_fence_ref(nv->screen->fence.current, &dst->fence_wr);
606

607
      src->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
608
      nouveau_fence_ref(nv->screen->fence.current, &src->fence);
609
   } else {
610
      struct pipe_box src_box;
611
      src_box.x = srcx;
612
      src_box.y = 0;
613
      src_box.z = 0;
614
      src_box.width = size;
615
      src_box.height = 1;
616
      src_box.depth = 1;
617
      util_resource_copy_region(&nv->pipe,
618
                                &dst->base, 0, dstx, 0, 0,
619
                                &src->base, 0, &src_box);
620
   }
621

622
   util_range_add(&dst->base, &dst->valid_buffer_range, dstx, dstx + size);
623
}
624

625

626
void *
627
nouveau_resource_map_offset(struct nouveau_context *nv,
628
                            struct nv04_resource *res, uint32_t offset,
629
                            uint32_t flags)
630
{
631
   if (unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) ||
632
       unlikely(res->status & NOUVEAU_BUFFER_STATUS_USER_PTR))
633
      return res->data + offset;
634

635
   if (res->domain == NOUVEAU_BO_VRAM) {
636
      if (!res->data || (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING))
637
         nouveau_buffer_cache(nv, res);
638
   }
639
   if (res->domain != NOUVEAU_BO_GART)
640
      return res->data + offset;
641

642
   if (res->mm) {
643
      unsigned rw;
644
      rw = (flags & NOUVEAU_BO_WR) ? PIPE_MAP_WRITE : PIPE_MAP_READ;
645
      nouveau_buffer_sync(nv, res, rw);
646
      if (nouveau_bo_map(res->bo, 0, NULL))
647
         return NULL;
648
   } else {
649
      if (nouveau_bo_map(res->bo, flags, nv->client))
650
         return NULL;
651
   }
652
   return (uint8_t *)res->bo->map + res->offset + offset;
653
}
654

655
static void *
656
nouveau_user_ptr_transfer_map(struct pipe_context *pipe,
657
                              struct pipe_resource *resource,
658
                              unsigned level, unsigned usage,
659
                              const struct pipe_box *box,
660
                              struct pipe_transfer **ptransfer)
661
{
662
   struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
663
   if (!tx)
664
      return NULL;
665
   nouveau_buffer_transfer_init(tx, resource, box, usage);
666
   *ptransfer = &tx->base;
667
   return nv04_resource(resource)->data;
668
}
669

670
static void
671
nouveau_user_ptr_transfer_unmap(struct pipe_context *pipe,
672
                                struct pipe_transfer *transfer)
673
{
674
   struct nouveau_transfer *tx = nouveau_transfer(transfer);
675
   FREE(tx);
676
}
677

678
struct pipe_resource *
679
nouveau_buffer_create(struct pipe_screen *pscreen,
680
                      const struct pipe_resource *templ)
681
{
682
   struct nouveau_screen *screen = nouveau_screen(pscreen);
683
   struct nv04_resource *buffer;
684
   bool ret;
685

686
   buffer = CALLOC_STRUCT(nv04_resource);
687
   if (!buffer)
688
      return NULL;
689

690
   buffer->base = *templ;
691
   pipe_reference_init(&buffer->base.reference, 1);
692
   buffer->base.screen = pscreen;
693

694
   if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
695
                             PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
696
      buffer->domain = NOUVEAU_BO_GART;
697
   } else if (buffer->base.bind == 0 || (buffer->base.bind &
698
              (screen->vidmem_bindings & screen->sysmem_bindings))) {
699
      switch (buffer->base.usage) {
700
      case PIPE_USAGE_DEFAULT:
701
      case PIPE_USAGE_IMMUTABLE:
702
         buffer->domain = NV_VRAM_DOMAIN(screen);
703
         break;
704
      case PIPE_USAGE_DYNAMIC:
705
         /* For most apps, we'd have to do staging transfers to avoid sync
706
          * with this usage, and GART -> GART copies would be suboptimal.
707
          */
708
         buffer->domain = NV_VRAM_DOMAIN(screen);
709
         break;
710
      case PIPE_USAGE_STAGING:
711
      case PIPE_USAGE_STREAM:
712
         buffer->domain = NOUVEAU_BO_GART;
713
         break;
714
      default:
715
         assert(0);
716
         break;
717
      }
718
   } else {
719
      if (buffer->base.bind & screen->vidmem_bindings)
720
         buffer->domain = NV_VRAM_DOMAIN(screen);
721
      else
722
      if (buffer->base.bind & screen->sysmem_bindings)
723
         buffer->domain = NOUVEAU_BO_GART;
724
   }
725

726
   ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
727

728
   if (ret == false)
729
      goto fail;
730

731
   if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
732
      nouveau_buffer_cache(NULL, buffer);
733

734
   NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1);
735

736
   util_range_init(&buffer->valid_buffer_range);
737

738
   return &buffer->base;
739

740
fail:
741
   FREE(buffer);
742
   return NULL;
743
}
744

745
struct pipe_resource *
746
nouveau_buffer_create_from_user(struct pipe_screen *pscreen,
747
                                const struct pipe_resource *templ,
748
                                void *user_ptr)
749
{
750
   struct nv04_resource *buffer;
751

752
   buffer = CALLOC_STRUCT(nv04_resource);
753
   if (!buffer)
754
      return NULL;
755

756
   buffer->base = *templ;
757
   /* set address and data to the same thing for higher compatibility with
758
    * existing code. It's correct nonetheless as the same pointer is equally
759
    * valid on the CPU and the GPU.
760
    */
761
   buffer->address = (uintptr_t)user_ptr;
762
   buffer->data = user_ptr;
763
   buffer->status = NOUVEAU_BUFFER_STATUS_USER_PTR;
764
   buffer->base.screen = pscreen;
765

766
   pipe_reference_init(&buffer->base.reference, 1);
767

768
   return &buffer->base;
769
}
770

771
struct pipe_resource *
772
nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
773
                           unsigned bytes, unsigned bind)
774
{
775
   struct nv04_resource *buffer;
776

777
   buffer = CALLOC_STRUCT(nv04_resource);
778
   if (!buffer)
779
      return NULL;
780

781
   pipe_reference_init(&buffer->base.reference, 1);
782
   buffer->base.screen = pscreen;
783
   buffer->base.format = PIPE_FORMAT_R8_UNORM;
784
   buffer->base.usage = PIPE_USAGE_IMMUTABLE;
785
   buffer->base.bind = bind;
786
   buffer->base.width0 = bytes;
787
   buffer->base.height0 = 1;
788
   buffer->base.depth0 = 1;
789

790
   buffer->data = ptr;
791
   buffer->status = NOUVEAU_BUFFER_STATUS_USER_MEMORY;
792

793
   util_range_init(&buffer->valid_buffer_range);
794
   util_range_add(&buffer->base, &buffer->valid_buffer_range, 0, bytes);
795

796
   return &buffer->base;
797
}
798

799
static inline bool
800
nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
801
                          struct nouveau_bo *bo, unsigned offset, unsigned size)
802
{
803
   if (!nouveau_buffer_malloc(buf))
804
      return false;
805
   if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
806
      return false;
807
   memcpy(buf->data, (uint8_t *)bo->map + offset, size);
808
   return true;
809
}
810

811
/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
812
bool
813
nouveau_buffer_migrate(struct nouveau_context *nv,
814
                       struct nv04_resource *buf, const unsigned new_domain)
815
{
816
   assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
817

818
   struct nouveau_screen *screen = nv->screen;
819
   struct nouveau_bo *bo;
820
   const unsigned old_domain = buf->domain;
821
   unsigned size = buf->base.width0;
822
   unsigned offset;
823
   int ret;
824

825
   assert(new_domain != old_domain);
826

827
   if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
828
      if (!nouveau_buffer_allocate(screen, buf, new_domain))
829
         return false;
830
      ret = nouveau_bo_map(buf->bo, 0, nv->client);
831
      if (ret)
832
         return ret;
833
      memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
834
      align_free(buf->data);
835
   } else
836
   if (old_domain != 0 && new_domain != 0) {
837
      struct nouveau_mm_allocation *mm = buf->mm;
838

839
      if (new_domain == NOUVEAU_BO_VRAM) {
840
         /* keep a system memory copy of our data in case we hit a fallback */
841
         if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
842
            return false;
843
         if (nouveau_mesa_debug)
844
            debug_printf("migrating %u KiB to VRAM\n", size / 1024);
845
      }
846

847
      offset = buf->offset;
848
      bo = buf->bo;
849
      buf->bo = NULL;
850
      buf->mm = NULL;
851
      nouveau_buffer_allocate(screen, buf, new_domain);
852

853
      nv->copy_data(nv, buf->bo, buf->offset, new_domain,
854
                    bo, offset, old_domain, buf->base.width0);
855

856
      nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
857
      if (mm)
858
         release_allocation(&mm, screen->fence.current);
859
   } else
860
   if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
861
      struct nouveau_transfer tx;
862
      if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
863
         return false;
864
      tx.base.resource = &buf->base;
865
      tx.base.box.x = 0;
866
      tx.base.box.width = buf->base.width0;
867
      tx.bo = NULL;
868
      tx.map = NULL;
869
      if (!nouveau_transfer_staging(nv, &tx, false))
870
         return false;
871
      nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
872
      nouveau_buffer_transfer_del(nv, &tx);
873
   } else
874
      return false;
875

876
   assert(buf->domain == new_domain);
877
   return true;
878
}
879

880
/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
881
 * We'd like to only allocate @size bytes here, but then we'd have to rebase
882
 * the vertex indices ...
883
 */
884
bool
885
nouveau_user_buffer_upload(struct nouveau_context *nv,
886
                           struct nv04_resource *buf,
887
                           unsigned base, unsigned size)
888
{
889
   assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
890

891
   struct nouveau_screen *screen = nouveau_screen(buf->base.screen);
892
   int ret;
893

894
   assert(buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY);
895

896
   buf->base.width0 = base + size;
897
   if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
898
      return false;
899

900
   ret = nouveau_bo_map(buf->bo, 0, nv->client);
901
   if (ret)
902
      return false;
903
   memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
904

905
   return true;
906
}
907

908
/* Invalidate underlying buffer storage, reset fences, reallocate to non-busy
909
 * buffer.
910
 */
911
void
912
nouveau_buffer_invalidate(struct pipe_context *pipe,
913
                          struct pipe_resource *resource)
914
{
915
   struct nouveau_context *nv = nouveau_context(pipe);
916
   struct nv04_resource *buf = nv04_resource(resource);
917
   int ref = buf->base.reference.count - 1;
918

919
   assert(!(buf->status & NOUVEAU_BUFFER_STATUS_USER_PTR));
920

921
   /* Shared buffers shouldn't get reallocated */
922
   if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
923
      return;
924

925
   /* If the buffer is sub-allocated and not currently being written, just
926
    * wipe the valid buffer range. Otherwise we have to create fresh
927
    * storage. (We don't keep track of fences for non-sub-allocated BO's.)
928
    */
929
   if (buf->mm && !nouveau_buffer_busy(buf, PIPE_MAP_WRITE)) {
930
      util_range_set_empty(&buf->valid_buffer_range);
931
   } else {
932
      nouveau_buffer_reallocate(nv->screen, buf, buf->domain);
933
      if (ref > 0) /* any references inside context possible ? */
934
         nv->invalidate_resource_storage(nv, &buf->base, ref);
935
   }
936
}
937

938

939
/* Scratch data allocation. */
940

941
static inline int
942
nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
943
                         unsigned size)
944
{
945
   return nouveau_bo_new(nv->screen->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
946
                         4096, size, NULL, pbo);
947
}
948

949
static void
950
nouveau_scratch_unref_bos(void *d)
951
{
952
   struct runout *b = d;
953
   int i;
954

955
   for (i = 0; i < b->nr; ++i)
956
      nouveau_bo_ref(NULL, &b->bo[i]);
957

958
   FREE(b);
959
}
960

961
void
962
nouveau_scratch_runout_release(struct nouveau_context *nv)
963
{
964
   if (!nv->scratch.runout)
965
      return;
966

967
   if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos,
968
         nv->scratch.runout))
969
      return;
970

971
   nv->scratch.end = 0;
972
   nv->scratch.runout = NULL;
973
}
974

975
/* Allocate an extra bo if we can't fit everything we need simultaneously.
976
 * (Could happen for very large user arrays.)
977
 */
978
static inline bool
979
nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
980
{
981
   int ret;
982
   unsigned n;
983

984
   if (nv->scratch.runout)
985
      n = nv->scratch.runout->nr;
986
   else
987
      n = 0;
988
   nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
989
                                (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
990
                                 sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
991
   nv->scratch.runout->nr = n + 1;
992
   nv->scratch.runout->bo[n] = NULL;
993

994
   ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
995
   if (!ret) {
996
      ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL);
997
      if (ret)
998
         nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
999
   }
1000
   if (!ret) {
1001
      nv->scratch.current = nv->scratch.runout->bo[n];
1002
      nv->scratch.offset = 0;
1003
      nv->scratch.end = size;
1004
      nv->scratch.map = nv->scratch.current->map;
1005
   }
1006
   return !ret;
1007
}
1008

1009
/* Continue to next scratch buffer, if available (no wrapping, large enough).
1010
 * Allocate it if it has not yet been created.
1011
 */
1012
static inline bool
1013
nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
1014
{
1015
   struct nouveau_bo *bo;
1016
   int ret;
1017
   const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
1018

1019
   if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
1020
      return false;
1021
   nv->scratch.id = i;
1022

1023
   bo = nv->scratch.bo[i];
1024
   if (!bo) {
1025
      ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
1026
      if (ret)
1027
         return false;
1028
      nv->scratch.bo[i] = bo;
1029
   }
1030
   nv->scratch.current = bo;
1031
   nv->scratch.offset = 0;
1032
   nv->scratch.end = nv->scratch.bo_size;
1033

1034
   ret = nouveau_bo_map(bo, NOUVEAU_BO_WR, nv->client);
1035
   if (!ret)
1036
      nv->scratch.map = bo->map;
1037
   return !ret;
1038
}
1039

1040
static bool
1041
nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
1042
{
1043
   bool ret;
1044

1045
   ret = nouveau_scratch_next(nv, min_size);
1046
   if (!ret)
1047
      ret = nouveau_scratch_runout(nv, min_size);
1048
   return ret;
1049
}
1050

1051

1052
/* Copy data to a scratch buffer and return address & bo the data resides in. */
1053
uint64_t
1054
nouveau_scratch_data(struct nouveau_context *nv,
1055
                     const void *data, unsigned base, unsigned size,
1056
                     struct nouveau_bo **bo)
1057
{
1058
   unsigned bgn = MAX2(base, nv->scratch.offset);
1059
   unsigned end = bgn + size;
1060

1061
   if (end >= nv->scratch.end) {
1062
      end = base + size;
1063
      if (!nouveau_scratch_more(nv, end))
1064
         return 0;
1065
      bgn = base;
1066
   }
1067
   nv->scratch.offset = align(end, 4);
1068

1069
   memcpy(nv->scratch.map + bgn, (const uint8_t *)data + base, size);
1070

1071
   *bo = nv->scratch.current;
1072
   return (*bo)->offset + (bgn - base);
1073
}
1074

1075
void *
1076
nouveau_scratch_get(struct nouveau_context *nv,
1077
                    unsigned size, uint64_t *gpu_addr, struct nouveau_bo **pbo)
1078
{
1079
   unsigned bgn = nv->scratch.offset;
1080
   unsigned end = nv->scratch.offset + size;
1081

1082
   if (end >= nv->scratch.end) {
1083
      end = size;
1084
      if (!nouveau_scratch_more(nv, end))
1085
         return NULL;
1086
      bgn = 0;
1087
   }
1088
   nv->scratch.offset = align(end, 4);
1089

1090
   *pbo = nv->scratch.current;
1091
   *gpu_addr = nv->scratch.current->offset + bgn;
1092
   return nv->scratch.map + bgn;
1093
}
1094

1095
Product

Resources

Company