Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/gpu/drm/i915/i915_gem_execbuffer.c
15113 views
1
/*
2
* Copyright © 2008,2010 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* Authors:
24
* Eric Anholt <[email protected]>
25
* Chris Wilson <[email protected]>
26
*
27
*/
28
29
#include "drmP.h"
30
#include "drm.h"
31
#include "i915_drm.h"
32
#include "i915_drv.h"
33
#include "i915_trace.h"
34
#include "intel_drv.h"
35
36
struct change_domains {
37
uint32_t invalidate_domains;
38
uint32_t flush_domains;
39
uint32_t flush_rings;
40
uint32_t flips;
41
};
42
43
/*
44
* Set the next domain for the specified object. This
45
* may not actually perform the necessary flushing/invaliding though,
46
* as that may want to be batched with other set_domain operations
47
*
48
* This is (we hope) the only really tricky part of gem. The goal
49
* is fairly simple -- track which caches hold bits of the object
50
* and make sure they remain coherent. A few concrete examples may
51
* help to explain how it works. For shorthand, we use the notation
52
* (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
53
* a pair of read and write domain masks.
54
*
55
* Case 1: the batch buffer
56
*
57
* 1. Allocated
58
* 2. Written by CPU
59
* 3. Mapped to GTT
60
* 4. Read by GPU
61
* 5. Unmapped from GTT
62
* 6. Freed
63
*
64
* Let's take these a step at a time
65
*
66
* 1. Allocated
67
* Pages allocated from the kernel may still have
68
* cache contents, so we set them to (CPU, CPU) always.
69
* 2. Written by CPU (using pwrite)
70
* The pwrite function calls set_domain (CPU, CPU) and
71
* this function does nothing (as nothing changes)
72
* 3. Mapped by GTT
73
* This function asserts that the object is not
74
* currently in any GPU-based read or write domains
75
* 4. Read by GPU
76
* i915_gem_execbuffer calls set_domain (COMMAND, 0).
77
* As write_domain is zero, this function adds in the
78
* current read domains (CPU+COMMAND, 0).
79
* flush_domains is set to CPU.
80
* invalidate_domains is set to COMMAND
81
* clflush is run to get data out of the CPU caches
82
* then i915_dev_set_domain calls i915_gem_flush to
83
* emit an MI_FLUSH and drm_agp_chipset_flush
84
* 5. Unmapped from GTT
85
* i915_gem_object_unbind calls set_domain (CPU, CPU)
86
* flush_domains and invalidate_domains end up both zero
87
* so no flushing/invalidating happens
88
* 6. Freed
89
* yay, done
90
*
91
* Case 2: The shared render buffer
92
*
93
* 1. Allocated
94
* 2. Mapped to GTT
95
* 3. Read/written by GPU
96
* 4. set_domain to (CPU,CPU)
97
* 5. Read/written by CPU
98
* 6. Read/written by GPU
99
*
100
* 1. Allocated
101
* Same as last example, (CPU, CPU)
102
* 2. Mapped to GTT
103
* Nothing changes (assertions find that it is not in the GPU)
104
* 3. Read/written by GPU
105
* execbuffer calls set_domain (RENDER, RENDER)
106
* flush_domains gets CPU
107
* invalidate_domains gets GPU
108
* clflush (obj)
109
* MI_FLUSH and drm_agp_chipset_flush
110
* 4. set_domain (CPU, CPU)
111
* flush_domains gets GPU
112
* invalidate_domains gets CPU
113
* wait_rendering (obj) to make sure all drawing is complete.
114
* This will include an MI_FLUSH to get the data from GPU
115
* to memory
116
* clflush (obj) to invalidate the CPU cache
117
* Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
118
* 5. Read/written by CPU
119
* cache lines are loaded and dirtied
120
* 6. Read written by GPU
121
* Same as last GPU access
122
*
123
* Case 3: The constant buffer
124
*
125
* 1. Allocated
126
* 2. Written by CPU
127
* 3. Read by GPU
128
* 4. Updated (written) by CPU again
129
* 5. Read by GPU
130
*
131
* 1. Allocated
132
* (CPU, CPU)
133
* 2. Written by CPU
134
* (CPU, CPU)
135
* 3. Read by GPU
136
* (CPU+RENDER, 0)
137
* flush_domains = CPU
138
* invalidate_domains = RENDER
139
* clflush (obj)
140
* MI_FLUSH
141
* drm_agp_chipset_flush
142
* 4. Updated (written) by CPU again
143
* (CPU, CPU)
144
* flush_domains = 0 (no previous write domain)
145
* invalidate_domains = 0 (no new read domains)
146
* 5. Read by GPU
147
* (CPU+RENDER, 0)
148
* flush_domains = CPU
149
* invalidate_domains = RENDER
150
* clflush (obj)
151
* MI_FLUSH
152
* drm_agp_chipset_flush
153
*/
154
static void
155
i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
156
struct intel_ring_buffer *ring,
157
struct change_domains *cd)
158
{
159
uint32_t invalidate_domains = 0, flush_domains = 0;
160
161
/*
162
* If the object isn't moving to a new write domain,
163
* let the object stay in multiple read domains
164
*/
165
if (obj->base.pending_write_domain == 0)
166
obj->base.pending_read_domains |= obj->base.read_domains;
167
168
/*
169
* Flush the current write domain if
170
* the new read domains don't match. Invalidate
171
* any read domains which differ from the old
172
* write domain
173
*/
174
if (obj->base.write_domain &&
175
(((obj->base.write_domain != obj->base.pending_read_domains ||
176
obj->ring != ring)) ||
177
(obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
178
flush_domains |= obj->base.write_domain;
179
invalidate_domains |=
180
obj->base.pending_read_domains & ~obj->base.write_domain;
181
}
182
/*
183
* Invalidate any read caches which may have
184
* stale data. That is, any new read domains.
185
*/
186
invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
187
if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
188
i915_gem_clflush_object(obj);
189
190
if (obj->base.pending_write_domain)
191
cd->flips |= atomic_read(&obj->pending_flip);
192
193
/* The actual obj->write_domain will be updated with
194
* pending_write_domain after we emit the accumulated flush for all
195
* of our domain changes in execbuffers (which clears objects'
196
* write_domains). So if we have a current write domain that we
197
* aren't changing, set pending_write_domain to that.
198
*/
199
if (flush_domains == 0 && obj->base.pending_write_domain == 0)
200
obj->base.pending_write_domain = obj->base.write_domain;
201
202
cd->invalidate_domains |= invalidate_domains;
203
cd->flush_domains |= flush_domains;
204
if (flush_domains & I915_GEM_GPU_DOMAINS)
205
cd->flush_rings |= obj->ring->id;
206
if (invalidate_domains & I915_GEM_GPU_DOMAINS)
207
cd->flush_rings |= ring->id;
208
}
209
210
struct eb_objects {
211
int and;
212
struct hlist_head buckets[0];
213
};
214
215
static struct eb_objects *
216
eb_create(int size)
217
{
218
struct eb_objects *eb;
219
int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
220
while (count > size)
221
count >>= 1;
222
eb = kzalloc(count*sizeof(struct hlist_head) +
223
sizeof(struct eb_objects),
224
GFP_KERNEL);
225
if (eb == NULL)
226
return eb;
227
228
eb->and = count - 1;
229
return eb;
230
}
231
232
static void
233
eb_reset(struct eb_objects *eb)
234
{
235
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
236
}
237
238
static void
239
eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
240
{
241
hlist_add_head(&obj->exec_node,
242
&eb->buckets[obj->exec_handle & eb->and]);
243
}
244
245
static struct drm_i915_gem_object *
246
eb_get_object(struct eb_objects *eb, unsigned long handle)
247
{
248
struct hlist_head *head;
249
struct hlist_node *node;
250
struct drm_i915_gem_object *obj;
251
252
head = &eb->buckets[handle & eb->and];
253
hlist_for_each(node, head) {
254
obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
255
if (obj->exec_handle == handle)
256
return obj;
257
}
258
259
return NULL;
260
}
261
262
static void
263
eb_destroy(struct eb_objects *eb)
264
{
265
kfree(eb);
266
}
267
268
static int
269
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
270
struct eb_objects *eb,
271
struct drm_i915_gem_relocation_entry *reloc)
272
{
273
struct drm_device *dev = obj->base.dev;
274
struct drm_gem_object *target_obj;
275
uint32_t target_offset;
276
int ret = -EINVAL;
277
278
/* we've already hold a reference to all valid objects */
279
target_obj = &eb_get_object(eb, reloc->target_handle)->base;
280
if (unlikely(target_obj == NULL))
281
return -ENOENT;
282
283
target_offset = to_intel_bo(target_obj)->gtt_offset;
284
285
/* The target buffer should have appeared before us in the
286
* exec_object list, so it should have a GTT space bound by now.
287
*/
288
if (unlikely(target_offset == 0)) {
289
DRM_ERROR("No GTT space found for object %d\n",
290
reloc->target_handle);
291
return ret;
292
}
293
294
/* Validate that the target is in a valid r/w GPU domain */
295
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
296
DRM_ERROR("reloc with multiple write domains: "
297
"obj %p target %d offset %d "
298
"read %08x write %08x",
299
obj, reloc->target_handle,
300
(int) reloc->offset,
301
reloc->read_domains,
302
reloc->write_domain);
303
return ret;
304
}
305
if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) {
306
DRM_ERROR("reloc with read/write CPU domains: "
307
"obj %p target %d offset %d "
308
"read %08x write %08x",
309
obj, reloc->target_handle,
310
(int) reloc->offset,
311
reloc->read_domains,
312
reloc->write_domain);
313
return ret;
314
}
315
if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
316
reloc->write_domain != target_obj->pending_write_domain)) {
317
DRM_ERROR("Write domain conflict: "
318
"obj %p target %d offset %d "
319
"new %08x old %08x\n",
320
obj, reloc->target_handle,
321
(int) reloc->offset,
322
reloc->write_domain,
323
target_obj->pending_write_domain);
324
return ret;
325
}
326
327
target_obj->pending_read_domains |= reloc->read_domains;
328
target_obj->pending_write_domain |= reloc->write_domain;
329
330
/* If the relocation already has the right value in it, no
331
* more work needs to be done.
332
*/
333
if (target_offset == reloc->presumed_offset)
334
return 0;
335
336
/* Check that the relocation address is valid... */
337
if (unlikely(reloc->offset > obj->base.size - 4)) {
338
DRM_ERROR("Relocation beyond object bounds: "
339
"obj %p target %d offset %d size %d.\n",
340
obj, reloc->target_handle,
341
(int) reloc->offset,
342
(int) obj->base.size);
343
return ret;
344
}
345
if (unlikely(reloc->offset & 3)) {
346
DRM_ERROR("Relocation not 4-byte aligned: "
347
"obj %p target %d offset %d.\n",
348
obj, reloc->target_handle,
349
(int) reloc->offset);
350
return ret;
351
}
352
353
reloc->delta += target_offset;
354
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
355
uint32_t page_offset = reloc->offset & ~PAGE_MASK;
356
char *vaddr;
357
358
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
359
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
360
kunmap_atomic(vaddr);
361
} else {
362
struct drm_i915_private *dev_priv = dev->dev_private;
363
uint32_t __iomem *reloc_entry;
364
void __iomem *reloc_page;
365
366
/* We can't wait for rendering with pagefaults disabled */
367
if (obj->active && in_atomic())
368
return -EFAULT;
369
370
ret = i915_gem_object_set_to_gtt_domain(obj, 1);
371
if (ret)
372
return ret;
373
374
/* Map the page containing the relocation we're going to perform. */
375
reloc->offset += obj->gtt_offset;
376
reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
377
reloc->offset & PAGE_MASK);
378
reloc_entry = (uint32_t __iomem *)
379
(reloc_page + (reloc->offset & ~PAGE_MASK));
380
iowrite32(reloc->delta, reloc_entry);
381
io_mapping_unmap_atomic(reloc_page);
382
}
383
384
/* and update the user's relocation entry */
385
reloc->presumed_offset = target_offset;
386
387
return 0;
388
}
389
390
static int
391
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
392
struct eb_objects *eb)
393
{
394
struct drm_i915_gem_relocation_entry __user *user_relocs;
395
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
396
int i, ret;
397
398
user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
399
for (i = 0; i < entry->relocation_count; i++) {
400
struct drm_i915_gem_relocation_entry reloc;
401
402
if (__copy_from_user_inatomic(&reloc,
403
user_relocs+i,
404
sizeof(reloc)))
405
return -EFAULT;
406
407
ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
408
if (ret)
409
return ret;
410
411
if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
412
&reloc.presumed_offset,
413
sizeof(reloc.presumed_offset)))
414
return -EFAULT;
415
}
416
417
return 0;
418
}
419
420
static int
421
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
422
struct eb_objects *eb,
423
struct drm_i915_gem_relocation_entry *relocs)
424
{
425
const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
426
int i, ret;
427
428
for (i = 0; i < entry->relocation_count; i++) {
429
ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
430
if (ret)
431
return ret;
432
}
433
434
return 0;
435
}
436
437
static int
438
i915_gem_execbuffer_relocate(struct drm_device *dev,
439
struct eb_objects *eb,
440
struct list_head *objects)
441
{
442
struct drm_i915_gem_object *obj;
443
int ret = 0;
444
445
/* This is the fast path and we cannot handle a pagefault whilst
446
* holding the struct mutex lest the user pass in the relocations
447
* contained within a mmaped bo. For in such a case we, the page
448
* fault handler would call i915_gem_fault() and we would try to
449
* acquire the struct mutex again. Obviously this is bad and so
450
* lockdep complains vehemently.
451
*/
452
pagefault_disable();
453
list_for_each_entry(obj, objects, exec_list) {
454
ret = i915_gem_execbuffer_relocate_object(obj, eb);
455
if (ret)
456
break;
457
}
458
pagefault_enable();
459
460
return ret;
461
}
462
463
static int
464
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
465
struct drm_file *file,
466
struct list_head *objects)
467
{
468
struct drm_i915_gem_object *obj;
469
int ret, retry;
470
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
471
struct list_head ordered_objects;
472
473
INIT_LIST_HEAD(&ordered_objects);
474
while (!list_empty(objects)) {
475
struct drm_i915_gem_exec_object2 *entry;
476
bool need_fence, need_mappable;
477
478
obj = list_first_entry(objects,
479
struct drm_i915_gem_object,
480
exec_list);
481
entry = obj->exec_entry;
482
483
need_fence =
484
has_fenced_gpu_access &&
485
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
486
obj->tiling_mode != I915_TILING_NONE;
487
need_mappable =
488
entry->relocation_count ? true : need_fence;
489
490
if (need_mappable)
491
list_move(&obj->exec_list, &ordered_objects);
492
else
493
list_move_tail(&obj->exec_list, &ordered_objects);
494
495
obj->base.pending_read_domains = 0;
496
obj->base.pending_write_domain = 0;
497
}
498
list_splice(&ordered_objects, objects);
499
500
/* Attempt to pin all of the buffers into the GTT.
501
* This is done in 3 phases:
502
*
503
* 1a. Unbind all objects that do not match the GTT constraints for
504
* the execbuffer (fenceable, mappable, alignment etc).
505
* 1b. Increment pin count for already bound objects.
506
* 2. Bind new objects.
507
* 3. Decrement pin count.
508
*
509
* This avoid unnecessary unbinding of later objects in order to makr
510
* room for the earlier objects *unless* we need to defragment.
511
*/
512
retry = 0;
513
do {
514
ret = 0;
515
516
/* Unbind any ill-fitting objects or pin. */
517
list_for_each_entry(obj, objects, exec_list) {
518
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
519
bool need_fence, need_mappable;
520
if (!obj->gtt_space)
521
continue;
522
523
need_fence =
524
has_fenced_gpu_access &&
525
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
526
obj->tiling_mode != I915_TILING_NONE;
527
need_mappable =
528
entry->relocation_count ? true : need_fence;
529
530
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
531
(need_mappable && !obj->map_and_fenceable))
532
ret = i915_gem_object_unbind(obj);
533
else
534
ret = i915_gem_object_pin(obj,
535
entry->alignment,
536
need_mappable);
537
if (ret)
538
goto err;
539
540
entry++;
541
}
542
543
/* Bind fresh objects */
544
list_for_each_entry(obj, objects, exec_list) {
545
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
546
bool need_fence;
547
548
need_fence =
549
has_fenced_gpu_access &&
550
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
551
obj->tiling_mode != I915_TILING_NONE;
552
553
if (!obj->gtt_space) {
554
bool need_mappable =
555
entry->relocation_count ? true : need_fence;
556
557
ret = i915_gem_object_pin(obj,
558
entry->alignment,
559
need_mappable);
560
if (ret)
561
break;
562
}
563
564
if (has_fenced_gpu_access) {
565
if (need_fence) {
566
ret = i915_gem_object_get_fence(obj, ring);
567
if (ret)
568
break;
569
} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
570
obj->tiling_mode == I915_TILING_NONE) {
571
/* XXX pipelined! */
572
ret = i915_gem_object_put_fence(obj);
573
if (ret)
574
break;
575
}
576
obj->pending_fenced_gpu_access = need_fence;
577
}
578
579
entry->offset = obj->gtt_offset;
580
}
581
582
/* Decrement pin count for bound objects */
583
list_for_each_entry(obj, objects, exec_list) {
584
if (obj->gtt_space)
585
i915_gem_object_unpin(obj);
586
}
587
588
if (ret != -ENOSPC || retry > 1)
589
return ret;
590
591
/* First attempt, just clear anything that is purgeable.
592
* Second attempt, clear the entire GTT.
593
*/
594
ret = i915_gem_evict_everything(ring->dev, retry == 0);
595
if (ret)
596
return ret;
597
598
retry++;
599
} while (1);
600
601
err:
602
obj = list_entry(obj->exec_list.prev,
603
struct drm_i915_gem_object,
604
exec_list);
605
while (objects != &obj->exec_list) {
606
if (obj->gtt_space)
607
i915_gem_object_unpin(obj);
608
609
obj = list_entry(obj->exec_list.prev,
610
struct drm_i915_gem_object,
611
exec_list);
612
}
613
614
return ret;
615
}
616
617
static int
618
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
619
struct drm_file *file,
620
struct intel_ring_buffer *ring,
621
struct list_head *objects,
622
struct eb_objects *eb,
623
struct drm_i915_gem_exec_object2 *exec,
624
int count)
625
{
626
struct drm_i915_gem_relocation_entry *reloc;
627
struct drm_i915_gem_object *obj;
628
int *reloc_offset;
629
int i, total, ret;
630
631
/* We may process another execbuffer during the unlock... */
632
while (!list_empty(objects)) {
633
obj = list_first_entry(objects,
634
struct drm_i915_gem_object,
635
exec_list);
636
list_del_init(&obj->exec_list);
637
drm_gem_object_unreference(&obj->base);
638
}
639
640
mutex_unlock(&dev->struct_mutex);
641
642
total = 0;
643
for (i = 0; i < count; i++)
644
total += exec[i].relocation_count;
645
646
reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
647
reloc = drm_malloc_ab(total, sizeof(*reloc));
648
if (reloc == NULL || reloc_offset == NULL) {
649
drm_free_large(reloc);
650
drm_free_large(reloc_offset);
651
mutex_lock(&dev->struct_mutex);
652
return -ENOMEM;
653
}
654
655
total = 0;
656
for (i = 0; i < count; i++) {
657
struct drm_i915_gem_relocation_entry __user *user_relocs;
658
659
user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
660
661
if (copy_from_user(reloc+total, user_relocs,
662
exec[i].relocation_count * sizeof(*reloc))) {
663
ret = -EFAULT;
664
mutex_lock(&dev->struct_mutex);
665
goto err;
666
}
667
668
reloc_offset[i] = total;
669
total += exec[i].relocation_count;
670
}
671
672
ret = i915_mutex_lock_interruptible(dev);
673
if (ret) {
674
mutex_lock(&dev->struct_mutex);
675
goto err;
676
}
677
678
/* reacquire the objects */
679
eb_reset(eb);
680
for (i = 0; i < count; i++) {
681
obj = to_intel_bo(drm_gem_object_lookup(dev, file,
682
exec[i].handle));
683
if (&obj->base == NULL) {
684
DRM_ERROR("Invalid object handle %d at index %d\n",
685
exec[i].handle, i);
686
ret = -ENOENT;
687
goto err;
688
}
689
690
list_add_tail(&obj->exec_list, objects);
691
obj->exec_handle = exec[i].handle;
692
obj->exec_entry = &exec[i];
693
eb_add_object(eb, obj);
694
}
695
696
ret = i915_gem_execbuffer_reserve(ring, file, objects);
697
if (ret)
698
goto err;
699
700
list_for_each_entry(obj, objects, exec_list) {
701
int offset = obj->exec_entry - exec;
702
ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
703
reloc + reloc_offset[offset]);
704
if (ret)
705
goto err;
706
}
707
708
/* Leave the user relocations as are, this is the painfully slow path,
709
* and we want to avoid the complication of dropping the lock whilst
710
* having buffers reserved in the aperture and so causing spurious
711
* ENOSPC for random operations.
712
*/
713
714
err:
715
drm_free_large(reloc);
716
drm_free_large(reloc_offset);
717
return ret;
718
}
719
720
static int
721
i915_gem_execbuffer_flush(struct drm_device *dev,
722
uint32_t invalidate_domains,
723
uint32_t flush_domains,
724
uint32_t flush_rings)
725
{
726
drm_i915_private_t *dev_priv = dev->dev_private;
727
int i, ret;
728
729
if (flush_domains & I915_GEM_DOMAIN_CPU)
730
intel_gtt_chipset_flush();
731
732
if (flush_domains & I915_GEM_DOMAIN_GTT)
733
wmb();
734
735
if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
736
for (i = 0; i < I915_NUM_RINGS; i++)
737
if (flush_rings & (1 << i)) {
738
ret = i915_gem_flush_ring(&dev_priv->ring[i],
739
invalidate_domains,
740
flush_domains);
741
if (ret)
742
return ret;
743
}
744
}
745
746
return 0;
747
}
748
749
static int
750
i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
751
struct intel_ring_buffer *to)
752
{
753
struct intel_ring_buffer *from = obj->ring;
754
u32 seqno;
755
int ret, idx;
756
757
if (from == NULL || to == from)
758
return 0;
759
760
/* XXX gpu semaphores are implicated in various hard hangs on SNB */
761
if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
762
return i915_gem_object_wait_rendering(obj);
763
764
idx = intel_ring_sync_index(from, to);
765
766
seqno = obj->last_rendering_seqno;
767
if (seqno <= from->sync_seqno[idx])
768
return 0;
769
770
if (seqno == from->outstanding_lazy_request) {
771
struct drm_i915_gem_request *request;
772
773
request = kzalloc(sizeof(*request), GFP_KERNEL);
774
if (request == NULL)
775
return -ENOMEM;
776
777
ret = i915_add_request(from, NULL, request);
778
if (ret) {
779
kfree(request);
780
return ret;
781
}
782
783
seqno = request->seqno;
784
}
785
786
from->sync_seqno[idx] = seqno;
787
return intel_ring_sync(to, from, seqno - 1);
788
}
789
790
static int
791
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
792
{
793
u32 plane, flip_mask;
794
int ret;
795
796
/* Check for any pending flips. As we only maintain a flip queue depth
797
* of 1, we can simply insert a WAIT for the next display flip prior
798
* to executing the batch and avoid stalling the CPU.
799
*/
800
801
for (plane = 0; flips >> plane; plane++) {
802
if (((flips >> plane) & 1) == 0)
803
continue;
804
805
if (plane)
806
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
807
else
808
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
809
810
ret = intel_ring_begin(ring, 2);
811
if (ret)
812
return ret;
813
814
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
815
intel_ring_emit(ring, MI_NOOP);
816
intel_ring_advance(ring);
817
}
818
819
return 0;
820
}
821
822
823
static int
824
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
825
struct list_head *objects)
826
{
827
struct drm_i915_gem_object *obj;
828
struct change_domains cd;
829
int ret;
830
831
memset(&cd, 0, sizeof(cd));
832
list_for_each_entry(obj, objects, exec_list)
833
i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
834
835
if (cd.invalidate_domains | cd.flush_domains) {
836
ret = i915_gem_execbuffer_flush(ring->dev,
837
cd.invalidate_domains,
838
cd.flush_domains,
839
cd.flush_rings);
840
if (ret)
841
return ret;
842
}
843
844
if (cd.flips) {
845
ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
846
if (ret)
847
return ret;
848
}
849
850
list_for_each_entry(obj, objects, exec_list) {
851
ret = i915_gem_execbuffer_sync_rings(obj, ring);
852
if (ret)
853
return ret;
854
}
855
856
return 0;
857
}
858
859
static bool
860
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
861
{
862
return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
863
}
864
865
static int
866
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
867
int count)
868
{
869
int i;
870
871
for (i = 0; i < count; i++) {
872
char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
873
int length; /* limited by fault_in_pages_readable() */
874
875
/* First check for malicious input causing overflow */
876
if (exec[i].relocation_count >
877
INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
878
return -EINVAL;
879
880
length = exec[i].relocation_count *
881
sizeof(struct drm_i915_gem_relocation_entry);
882
if (!access_ok(VERIFY_READ, ptr, length))
883
return -EFAULT;
884
885
/* we may also need to update the presumed offsets */
886
if (!access_ok(VERIFY_WRITE, ptr, length))
887
return -EFAULT;
888
889
if (fault_in_pages_readable(ptr, length))
890
return -EFAULT;
891
}
892
893
return 0;
894
}
895
896
static void
897
i915_gem_execbuffer_move_to_active(struct list_head *objects,
898
struct intel_ring_buffer *ring,
899
u32 seqno)
900
{
901
struct drm_i915_gem_object *obj;
902
903
list_for_each_entry(obj, objects, exec_list) {
904
u32 old_read = obj->base.read_domains;
905
u32 old_write = obj->base.write_domain;
906
907
908
obj->base.read_domains = obj->base.pending_read_domains;
909
obj->base.write_domain = obj->base.pending_write_domain;
910
obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
911
912
i915_gem_object_move_to_active(obj, ring, seqno);
913
if (obj->base.write_domain) {
914
obj->dirty = 1;
915
obj->pending_gpu_write = true;
916
list_move_tail(&obj->gpu_write_list,
917
&ring->gpu_write_list);
918
intel_mark_busy(ring->dev, obj);
919
}
920
921
trace_i915_gem_object_change_domain(obj, old_read, old_write);
922
}
923
}
924
925
static void
926
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
927
struct drm_file *file,
928
struct intel_ring_buffer *ring)
929
{
930
struct drm_i915_gem_request *request;
931
u32 invalidate;
932
933
/*
934
* Ensure that the commands in the batch buffer are
935
* finished before the interrupt fires.
936
*
937
* The sampler always gets flushed on i965 (sigh).
938
*/
939
invalidate = I915_GEM_DOMAIN_COMMAND;
940
if (INTEL_INFO(dev)->gen >= 4)
941
invalidate |= I915_GEM_DOMAIN_SAMPLER;
942
if (ring->flush(ring, invalidate, 0)) {
943
i915_gem_next_request_seqno(ring);
944
return;
945
}
946
947
/* Add a breadcrumb for the completion of the batch buffer */
948
request = kzalloc(sizeof(*request), GFP_KERNEL);
949
if (request == NULL || i915_add_request(ring, file, request)) {
950
i915_gem_next_request_seqno(ring);
951
kfree(request);
952
}
953
}
954
955
static int
956
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
957
struct drm_file *file,
958
struct drm_i915_gem_execbuffer2 *args,
959
struct drm_i915_gem_exec_object2 *exec)
960
{
961
drm_i915_private_t *dev_priv = dev->dev_private;
962
struct list_head objects;
963
struct eb_objects *eb;
964
struct drm_i915_gem_object *batch_obj;
965
struct drm_clip_rect *cliprects = NULL;
966
struct intel_ring_buffer *ring;
967
u32 exec_start, exec_len;
968
u32 seqno;
969
int ret, mode, i;
970
971
if (!i915_gem_check_execbuffer(args)) {
972
DRM_ERROR("execbuf with invalid offset/length\n");
973
return -EINVAL;
974
}
975
976
ret = validate_exec_list(exec, args->buffer_count);
977
if (ret)
978
return ret;
979
980
switch (args->flags & I915_EXEC_RING_MASK) {
981
case I915_EXEC_DEFAULT:
982
case I915_EXEC_RENDER:
983
ring = &dev_priv->ring[RCS];
984
break;
985
case I915_EXEC_BSD:
986
if (!HAS_BSD(dev)) {
987
DRM_ERROR("execbuf with invalid ring (BSD)\n");
988
return -EINVAL;
989
}
990
ring = &dev_priv->ring[VCS];
991
break;
992
case I915_EXEC_BLT:
993
if (!HAS_BLT(dev)) {
994
DRM_ERROR("execbuf with invalid ring (BLT)\n");
995
return -EINVAL;
996
}
997
ring = &dev_priv->ring[BCS];
998
break;
999
default:
1000
DRM_ERROR("execbuf with unknown ring: %d\n",
1001
(int)(args->flags & I915_EXEC_RING_MASK));
1002
return -EINVAL;
1003
}
1004
1005
mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1006
switch (mode) {
1007
case I915_EXEC_CONSTANTS_REL_GENERAL:
1008
case I915_EXEC_CONSTANTS_ABSOLUTE:
1009
case I915_EXEC_CONSTANTS_REL_SURFACE:
1010
if (ring == &dev_priv->ring[RCS] &&
1011
mode != dev_priv->relative_constants_mode) {
1012
if (INTEL_INFO(dev)->gen < 4)
1013
return -EINVAL;
1014
1015
if (INTEL_INFO(dev)->gen > 5 &&
1016
mode == I915_EXEC_CONSTANTS_REL_SURFACE)
1017
return -EINVAL;
1018
1019
ret = intel_ring_begin(ring, 4);
1020
if (ret)
1021
return ret;
1022
1023
intel_ring_emit(ring, MI_NOOP);
1024
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1025
intel_ring_emit(ring, INSTPM);
1026
intel_ring_emit(ring,
1027
I915_EXEC_CONSTANTS_MASK << 16 | mode);
1028
intel_ring_advance(ring);
1029
1030
dev_priv->relative_constants_mode = mode;
1031
}
1032
break;
1033
default:
1034
DRM_ERROR("execbuf with unknown constants: %d\n", mode);
1035
return -EINVAL;
1036
}
1037
1038
if (args->buffer_count < 1) {
1039
DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1040
return -EINVAL;
1041
}
1042
1043
if (args->num_cliprects != 0) {
1044
if (ring != &dev_priv->ring[RCS]) {
1045
DRM_ERROR("clip rectangles are only valid with the render ring\n");
1046
return -EINVAL;
1047
}
1048
1049
cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
1050
GFP_KERNEL);
1051
if (cliprects == NULL) {
1052
ret = -ENOMEM;
1053
goto pre_mutex_err;
1054
}
1055
1056
if (copy_from_user(cliprects,
1057
(struct drm_clip_rect __user *)(uintptr_t)
1058
args->cliprects_ptr,
1059
sizeof(*cliprects)*args->num_cliprects)) {
1060
ret = -EFAULT;
1061
goto pre_mutex_err;
1062
}
1063
}
1064
1065
ret = i915_mutex_lock_interruptible(dev);
1066
if (ret)
1067
goto pre_mutex_err;
1068
1069
if (dev_priv->mm.suspended) {
1070
mutex_unlock(&dev->struct_mutex);
1071
ret = -EBUSY;
1072
goto pre_mutex_err;
1073
}
1074
1075
eb = eb_create(args->buffer_count);
1076
if (eb == NULL) {
1077
mutex_unlock(&dev->struct_mutex);
1078
ret = -ENOMEM;
1079
goto pre_mutex_err;
1080
}
1081
1082
/* Look up object handles */
1083
INIT_LIST_HEAD(&objects);
1084
for (i = 0; i < args->buffer_count; i++) {
1085
struct drm_i915_gem_object *obj;
1086
1087
obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1088
exec[i].handle));
1089
if (&obj->base == NULL) {
1090
DRM_ERROR("Invalid object handle %d at index %d\n",
1091
exec[i].handle, i);
1092
/* prevent error path from reading uninitialized data */
1093
ret = -ENOENT;
1094
goto err;
1095
}
1096
1097
if (!list_empty(&obj->exec_list)) {
1098
DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
1099
obj, exec[i].handle, i);
1100
ret = -EINVAL;
1101
goto err;
1102
}
1103
1104
list_add_tail(&obj->exec_list, &objects);
1105
obj->exec_handle = exec[i].handle;
1106
obj->exec_entry = &exec[i];
1107
eb_add_object(eb, obj);
1108
}
1109
1110
/* take note of the batch buffer before we might reorder the lists */
1111
batch_obj = list_entry(objects.prev,
1112
struct drm_i915_gem_object,
1113
exec_list);
1114
1115
/* Move the objects en-masse into the GTT, evicting if necessary. */
1116
ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1117
if (ret)
1118
goto err;
1119
1120
/* The objects are in their final locations, apply the relocations. */
1121
ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1122
if (ret) {
1123
if (ret == -EFAULT) {
1124
ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1125
&objects, eb,
1126
exec,
1127
args->buffer_count);
1128
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1129
}
1130
if (ret)
1131
goto err;
1132
}
1133
1134
/* Set the pending read domains for the batch buffer to COMMAND */
1135
if (batch_obj->base.pending_write_domain) {
1136
DRM_ERROR("Attempting to use self-modifying batch buffer\n");
1137
ret = -EINVAL;
1138
goto err;
1139
}
1140
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1141
1142
ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1143
if (ret)
1144
goto err;
1145
1146
seqno = i915_gem_next_request_seqno(ring);
1147
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
1148
if (seqno < ring->sync_seqno[i]) {
1149
/* The GPU can not handle its semaphore value wrapping,
1150
* so every billion or so execbuffers, we need to stall
1151
* the GPU in order to reset the counters.
1152
*/
1153
ret = i915_gpu_idle(dev);
1154
if (ret)
1155
goto err;
1156
1157
BUG_ON(ring->sync_seqno[i]);
1158
}
1159
}
1160
1161
trace_i915_gem_ring_dispatch(ring, seqno);
1162
1163
exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1164
exec_len = args->batch_len;
1165
if (cliprects) {
1166
for (i = 0; i < args->num_cliprects; i++) {
1167
ret = i915_emit_box(dev, &cliprects[i],
1168
args->DR1, args->DR4);
1169
if (ret)
1170
goto err;
1171
1172
ret = ring->dispatch_execbuffer(ring,
1173
exec_start, exec_len);
1174
if (ret)
1175
goto err;
1176
}
1177
} else {
1178
ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1179
if (ret)
1180
goto err;
1181
}
1182
1183
i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1184
i915_gem_execbuffer_retire_commands(dev, file, ring);
1185
1186
err:
1187
eb_destroy(eb);
1188
while (!list_empty(&objects)) {
1189
struct drm_i915_gem_object *obj;
1190
1191
obj = list_first_entry(&objects,
1192
struct drm_i915_gem_object,
1193
exec_list);
1194
list_del_init(&obj->exec_list);
1195
drm_gem_object_unreference(&obj->base);
1196
}
1197
1198
mutex_unlock(&dev->struct_mutex);
1199
1200
pre_mutex_err:
1201
kfree(cliprects);
1202
return ret;
1203
}
1204
1205
/*
1206
* Legacy execbuffer just creates an exec2 list from the original exec object
1207
* list array and passes it to the real function.
1208
*/
1209
int
1210
i915_gem_execbuffer(struct drm_device *dev, void *data,
1211
struct drm_file *file)
1212
{
1213
struct drm_i915_gem_execbuffer *args = data;
1214
struct drm_i915_gem_execbuffer2 exec2;
1215
struct drm_i915_gem_exec_object *exec_list = NULL;
1216
struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1217
int ret, i;
1218
1219
if (args->buffer_count < 1) {
1220
DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1221
return -EINVAL;
1222
}
1223
1224
/* Copy in the exec list from userland */
1225
exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1226
exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1227
if (exec_list == NULL || exec2_list == NULL) {
1228
DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1229
args->buffer_count);
1230
drm_free_large(exec_list);
1231
drm_free_large(exec2_list);
1232
return -ENOMEM;
1233
}
1234
ret = copy_from_user(exec_list,
1235
(struct drm_i915_relocation_entry __user *)
1236
(uintptr_t) args->buffers_ptr,
1237
sizeof(*exec_list) * args->buffer_count);
1238
if (ret != 0) {
1239
DRM_ERROR("copy %d exec entries failed %d\n",
1240
args->buffer_count, ret);
1241
drm_free_large(exec_list);
1242
drm_free_large(exec2_list);
1243
return -EFAULT;
1244
}
1245
1246
for (i = 0; i < args->buffer_count; i++) {
1247
exec2_list[i].handle = exec_list[i].handle;
1248
exec2_list[i].relocation_count = exec_list[i].relocation_count;
1249
exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1250
exec2_list[i].alignment = exec_list[i].alignment;
1251
exec2_list[i].offset = exec_list[i].offset;
1252
if (INTEL_INFO(dev)->gen < 4)
1253
exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1254
else
1255
exec2_list[i].flags = 0;
1256
}
1257
1258
exec2.buffers_ptr = args->buffers_ptr;
1259
exec2.buffer_count = args->buffer_count;
1260
exec2.batch_start_offset = args->batch_start_offset;
1261
exec2.batch_len = args->batch_len;
1262
exec2.DR1 = args->DR1;
1263
exec2.DR4 = args->DR4;
1264
exec2.num_cliprects = args->num_cliprects;
1265
exec2.cliprects_ptr = args->cliprects_ptr;
1266
exec2.flags = I915_EXEC_RENDER;
1267
1268
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1269
if (!ret) {
1270
/* Copy the new buffer offsets back to the user's exec list. */
1271
for (i = 0; i < args->buffer_count; i++)
1272
exec_list[i].offset = exec2_list[i].offset;
1273
/* ... and back out to userspace */
1274
ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1275
(uintptr_t) args->buffers_ptr,
1276
exec_list,
1277
sizeof(*exec_list) * args->buffer_count);
1278
if (ret) {
1279
ret = -EFAULT;
1280
DRM_ERROR("failed to copy %d exec entries "
1281
"back to user (%d)\n",
1282
args->buffer_count, ret);
1283
}
1284
}
1285
1286
drm_free_large(exec_list);
1287
drm_free_large(exec2_list);
1288
return ret;
1289
}
1290
1291
int
1292
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1293
struct drm_file *file)
1294
{
1295
struct drm_i915_gem_execbuffer2 *args = data;
1296
struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1297
int ret;
1298
1299
if (args->buffer_count < 1) {
1300
DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
1301
return -EINVAL;
1302
}
1303
1304
exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1305
GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1306
if (exec2_list == NULL)
1307
exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1308
args->buffer_count);
1309
if (exec2_list == NULL) {
1310
DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1311
args->buffer_count);
1312
return -ENOMEM;
1313
}
1314
ret = copy_from_user(exec2_list,
1315
(struct drm_i915_relocation_entry __user *)
1316
(uintptr_t) args->buffers_ptr,
1317
sizeof(*exec2_list) * args->buffer_count);
1318
if (ret != 0) {
1319
DRM_ERROR("copy %d exec entries failed %d\n",
1320
args->buffer_count, ret);
1321
drm_free_large(exec2_list);
1322
return -EFAULT;
1323
}
1324
1325
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1326
if (!ret) {
1327
/* Copy the new buffer offsets back to the user's exec list. */
1328
ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1329
(uintptr_t) args->buffers_ptr,
1330
exec2_list,
1331
sizeof(*exec2_list) * args->buffer_count);
1332
if (ret) {
1333
ret = -EFAULT;
1334
DRM_ERROR("failed to copy %d exec entries "
1335
"back to user (%d)\n",
1336
args->buffer_count, ret);
1337
}
1338
}
1339
1340
drm_free_large(exec2_list);
1341
return ret;
1342
}
1343
1344