Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/gpu/drm/i915/i915_gem.c
15112 views
1
/*
2
* Copyright © 2008 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* Authors:
24
* Eric Anholt <[email protected]>
25
*
26
*/
27
28
#include "drmP.h"
29
#include "drm.h"
30
#include "i915_drm.h"
31
#include "i915_drv.h"
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/shmem_fs.h>
35
#include <linux/slab.h>
36
#include <linux/swap.h>
37
#include <linux/pci.h>
38
39
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
40
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
41
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42
static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
43
bool write);
44
static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
45
uint64_t offset,
46
uint64_t size);
47
static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
48
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
49
unsigned alignment,
50
bool map_and_fenceable);
51
static void i915_gem_clear_fence_reg(struct drm_device *dev,
52
struct drm_i915_fence_reg *reg);
53
static int i915_gem_phys_pwrite(struct drm_device *dev,
54
struct drm_i915_gem_object *obj,
55
struct drm_i915_gem_pwrite *args,
56
struct drm_file *file);
57
static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
58
59
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
60
struct shrink_control *sc);
61
62
/* some bookkeeping */
63
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
64
size_t size)
65
{
66
dev_priv->mm.object_count++;
67
dev_priv->mm.object_memory += size;
68
}
69
70
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
71
size_t size)
72
{
73
dev_priv->mm.object_count--;
74
dev_priv->mm.object_memory -= size;
75
}
76
77
static int
78
i915_gem_wait_for_error(struct drm_device *dev)
79
{
80
struct drm_i915_private *dev_priv = dev->dev_private;
81
struct completion *x = &dev_priv->error_completion;
82
unsigned long flags;
83
int ret;
84
85
if (!atomic_read(&dev_priv->mm.wedged))
86
return 0;
87
88
ret = wait_for_completion_interruptible(x);
89
if (ret)
90
return ret;
91
92
if (atomic_read(&dev_priv->mm.wedged)) {
93
/* GPU is hung, bump the completion count to account for
94
* the token we just consumed so that we never hit zero and
95
* end up waiting upon a subsequent completion event that
96
* will never happen.
97
*/
98
spin_lock_irqsave(&x->wait.lock, flags);
99
x->done++;
100
spin_unlock_irqrestore(&x->wait.lock, flags);
101
}
102
return 0;
103
}
104
105
int i915_mutex_lock_interruptible(struct drm_device *dev)
106
{
107
int ret;
108
109
ret = i915_gem_wait_for_error(dev);
110
if (ret)
111
return ret;
112
113
ret = mutex_lock_interruptible(&dev->struct_mutex);
114
if (ret)
115
return ret;
116
117
WARN_ON(i915_verify_lists(dev));
118
return 0;
119
}
120
121
static inline bool
122
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
123
{
124
return obj->gtt_space && !obj->active && obj->pin_count == 0;
125
}
126
127
void i915_gem_do_init(struct drm_device *dev,
128
unsigned long start,
129
unsigned long mappable_end,
130
unsigned long end)
131
{
132
drm_i915_private_t *dev_priv = dev->dev_private;
133
134
drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
135
136
dev_priv->mm.gtt_start = start;
137
dev_priv->mm.gtt_mappable_end = mappable_end;
138
dev_priv->mm.gtt_end = end;
139
dev_priv->mm.gtt_total = end - start;
140
dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
141
142
/* Take over this portion of the GTT */
143
intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
144
}
145
146
int
147
i915_gem_init_ioctl(struct drm_device *dev, void *data,
148
struct drm_file *file)
149
{
150
struct drm_i915_gem_init *args = data;
151
152
if (args->gtt_start >= args->gtt_end ||
153
(args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
154
return -EINVAL;
155
156
mutex_lock(&dev->struct_mutex);
157
i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
158
mutex_unlock(&dev->struct_mutex);
159
160
return 0;
161
}
162
163
int
164
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
165
struct drm_file *file)
166
{
167
struct drm_i915_private *dev_priv = dev->dev_private;
168
struct drm_i915_gem_get_aperture *args = data;
169
struct drm_i915_gem_object *obj;
170
size_t pinned;
171
172
if (!(dev->driver->driver_features & DRIVER_GEM))
173
return -ENODEV;
174
175
pinned = 0;
176
mutex_lock(&dev->struct_mutex);
177
list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
178
pinned += obj->gtt_space->size;
179
mutex_unlock(&dev->struct_mutex);
180
181
args->aper_size = dev_priv->mm.gtt_total;
182
args->aper_available_size = args->aper_size -pinned;
183
184
return 0;
185
}
186
187
static int
188
i915_gem_create(struct drm_file *file,
189
struct drm_device *dev,
190
uint64_t size,
191
uint32_t *handle_p)
192
{
193
struct drm_i915_gem_object *obj;
194
int ret;
195
u32 handle;
196
197
size = roundup(size, PAGE_SIZE);
198
199
/* Allocate the new object */
200
obj = i915_gem_alloc_object(dev, size);
201
if (obj == NULL)
202
return -ENOMEM;
203
204
ret = drm_gem_handle_create(file, &obj->base, &handle);
205
if (ret) {
206
drm_gem_object_release(&obj->base);
207
i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
208
kfree(obj);
209
return ret;
210
}
211
212
/* drop reference from allocate - handle holds it now */
213
drm_gem_object_unreference(&obj->base);
214
trace_i915_gem_object_create(obj);
215
216
*handle_p = handle;
217
return 0;
218
}
219
220
int
221
i915_gem_dumb_create(struct drm_file *file,
222
struct drm_device *dev,
223
struct drm_mode_create_dumb *args)
224
{
225
/* have to work out size/pitch and return them */
226
args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
227
args->size = args->pitch * args->height;
228
return i915_gem_create(file, dev,
229
args->size, &args->handle);
230
}
231
232
int i915_gem_dumb_destroy(struct drm_file *file,
233
struct drm_device *dev,
234
uint32_t handle)
235
{
236
return drm_gem_handle_delete(file, handle);
237
}
238
239
/**
240
* Creates a new mm object and returns a handle to it.
241
*/
242
int
243
i915_gem_create_ioctl(struct drm_device *dev, void *data,
244
struct drm_file *file)
245
{
246
struct drm_i915_gem_create *args = data;
247
return i915_gem_create(file, dev,
248
args->size, &args->handle);
249
}
250
251
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
252
{
253
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
254
255
return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
256
obj->tiling_mode != I915_TILING_NONE;
257
}
258
259
static inline void
260
slow_shmem_copy(struct page *dst_page,
261
int dst_offset,
262
struct page *src_page,
263
int src_offset,
264
int length)
265
{
266
char *dst_vaddr, *src_vaddr;
267
268
dst_vaddr = kmap(dst_page);
269
src_vaddr = kmap(src_page);
270
271
memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
272
273
kunmap(src_page);
274
kunmap(dst_page);
275
}
276
277
static inline void
278
slow_shmem_bit17_copy(struct page *gpu_page,
279
int gpu_offset,
280
struct page *cpu_page,
281
int cpu_offset,
282
int length,
283
int is_read)
284
{
285
char *gpu_vaddr, *cpu_vaddr;
286
287
/* Use the unswizzled path if this page isn't affected. */
288
if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
289
if (is_read)
290
return slow_shmem_copy(cpu_page, cpu_offset,
291
gpu_page, gpu_offset, length);
292
else
293
return slow_shmem_copy(gpu_page, gpu_offset,
294
cpu_page, cpu_offset, length);
295
}
296
297
gpu_vaddr = kmap(gpu_page);
298
cpu_vaddr = kmap(cpu_page);
299
300
/* Copy the data, XORing A6 with A17 (1). The user already knows he's
301
* XORing with the other bits (A9 for Y, A9 and A10 for X)
302
*/
303
while (length > 0) {
304
int cacheline_end = ALIGN(gpu_offset + 1, 64);
305
int this_length = min(cacheline_end - gpu_offset, length);
306
int swizzled_gpu_offset = gpu_offset ^ 64;
307
308
if (is_read) {
309
memcpy(cpu_vaddr + cpu_offset,
310
gpu_vaddr + swizzled_gpu_offset,
311
this_length);
312
} else {
313
memcpy(gpu_vaddr + swizzled_gpu_offset,
314
cpu_vaddr + cpu_offset,
315
this_length);
316
}
317
cpu_offset += this_length;
318
gpu_offset += this_length;
319
length -= this_length;
320
}
321
322
kunmap(cpu_page);
323
kunmap(gpu_page);
324
}
325
326
/**
327
* This is the fast shmem pread path, which attempts to copy_from_user directly
328
* from the backing pages of the object to the user's address space. On a
329
* fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
330
*/
331
static int
332
i915_gem_shmem_pread_fast(struct drm_device *dev,
333
struct drm_i915_gem_object *obj,
334
struct drm_i915_gem_pread *args,
335
struct drm_file *file)
336
{
337
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
338
ssize_t remain;
339
loff_t offset;
340
char __user *user_data;
341
int page_offset, page_length;
342
343
user_data = (char __user *) (uintptr_t) args->data_ptr;
344
remain = args->size;
345
346
offset = args->offset;
347
348
while (remain > 0) {
349
struct page *page;
350
char *vaddr;
351
int ret;
352
353
/* Operation in this page
354
*
355
* page_offset = offset within page
356
* page_length = bytes to copy for this page
357
*/
358
page_offset = offset_in_page(offset);
359
page_length = remain;
360
if ((page_offset + remain) > PAGE_SIZE)
361
page_length = PAGE_SIZE - page_offset;
362
363
page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
364
if (IS_ERR(page))
365
return PTR_ERR(page);
366
367
vaddr = kmap_atomic(page);
368
ret = __copy_to_user_inatomic(user_data,
369
vaddr + page_offset,
370
page_length);
371
kunmap_atomic(vaddr);
372
373
mark_page_accessed(page);
374
page_cache_release(page);
375
if (ret)
376
return -EFAULT;
377
378
remain -= page_length;
379
user_data += page_length;
380
offset += page_length;
381
}
382
383
return 0;
384
}
385
386
/**
387
* This is the fallback shmem pread path, which allocates temporary storage
388
* in kernel space to copy_to_user into outside of the struct_mutex, so we
389
* can copy out of the object's backing pages while holding the struct mutex
390
* and not take page faults.
391
*/
392
static int
393
i915_gem_shmem_pread_slow(struct drm_device *dev,
394
struct drm_i915_gem_object *obj,
395
struct drm_i915_gem_pread *args,
396
struct drm_file *file)
397
{
398
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
399
struct mm_struct *mm = current->mm;
400
struct page **user_pages;
401
ssize_t remain;
402
loff_t offset, pinned_pages, i;
403
loff_t first_data_page, last_data_page, num_pages;
404
int shmem_page_offset;
405
int data_page_index, data_page_offset;
406
int page_length;
407
int ret;
408
uint64_t data_ptr = args->data_ptr;
409
int do_bit17_swizzling;
410
411
remain = args->size;
412
413
/* Pin the user pages containing the data. We can't fault while
414
* holding the struct mutex, yet we want to hold it while
415
* dereferencing the user data.
416
*/
417
first_data_page = data_ptr / PAGE_SIZE;
418
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
419
num_pages = last_data_page - first_data_page + 1;
420
421
user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
422
if (user_pages == NULL)
423
return -ENOMEM;
424
425
mutex_unlock(&dev->struct_mutex);
426
down_read(&mm->mmap_sem);
427
pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
428
num_pages, 1, 0, user_pages, NULL);
429
up_read(&mm->mmap_sem);
430
mutex_lock(&dev->struct_mutex);
431
if (pinned_pages < num_pages) {
432
ret = -EFAULT;
433
goto out;
434
}
435
436
ret = i915_gem_object_set_cpu_read_domain_range(obj,
437
args->offset,
438
args->size);
439
if (ret)
440
goto out;
441
442
do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
443
444
offset = args->offset;
445
446
while (remain > 0) {
447
struct page *page;
448
449
/* Operation in this page
450
*
451
* shmem_page_offset = offset within page in shmem file
452
* data_page_index = page number in get_user_pages return
453
* data_page_offset = offset with data_page_index page.
454
* page_length = bytes to copy for this page
455
*/
456
shmem_page_offset = offset_in_page(offset);
457
data_page_index = data_ptr / PAGE_SIZE - first_data_page;
458
data_page_offset = offset_in_page(data_ptr);
459
460
page_length = remain;
461
if ((shmem_page_offset + page_length) > PAGE_SIZE)
462
page_length = PAGE_SIZE - shmem_page_offset;
463
if ((data_page_offset + page_length) > PAGE_SIZE)
464
page_length = PAGE_SIZE - data_page_offset;
465
466
page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
467
if (IS_ERR(page)) {
468
ret = PTR_ERR(page);
469
goto out;
470
}
471
472
if (do_bit17_swizzling) {
473
slow_shmem_bit17_copy(page,
474
shmem_page_offset,
475
user_pages[data_page_index],
476
data_page_offset,
477
page_length,
478
1);
479
} else {
480
slow_shmem_copy(user_pages[data_page_index],
481
data_page_offset,
482
page,
483
shmem_page_offset,
484
page_length);
485
}
486
487
mark_page_accessed(page);
488
page_cache_release(page);
489
490
remain -= page_length;
491
data_ptr += page_length;
492
offset += page_length;
493
}
494
495
out:
496
for (i = 0; i < pinned_pages; i++) {
497
SetPageDirty(user_pages[i]);
498
mark_page_accessed(user_pages[i]);
499
page_cache_release(user_pages[i]);
500
}
501
drm_free_large(user_pages);
502
503
return ret;
504
}
505
506
/**
507
* Reads data from the object referenced by handle.
508
*
509
* On error, the contents of *data are undefined.
510
*/
511
int
512
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
513
struct drm_file *file)
514
{
515
struct drm_i915_gem_pread *args = data;
516
struct drm_i915_gem_object *obj;
517
int ret = 0;
518
519
if (args->size == 0)
520
return 0;
521
522
if (!access_ok(VERIFY_WRITE,
523
(char __user *)(uintptr_t)args->data_ptr,
524
args->size))
525
return -EFAULT;
526
527
ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
528
args->size);
529
if (ret)
530
return -EFAULT;
531
532
ret = i915_mutex_lock_interruptible(dev);
533
if (ret)
534
return ret;
535
536
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
537
if (&obj->base == NULL) {
538
ret = -ENOENT;
539
goto unlock;
540
}
541
542
/* Bounds check source. */
543
if (args->offset > obj->base.size ||
544
args->size > obj->base.size - args->offset) {
545
ret = -EINVAL;
546
goto out;
547
}
548
549
trace_i915_gem_object_pread(obj, args->offset, args->size);
550
551
ret = i915_gem_object_set_cpu_read_domain_range(obj,
552
args->offset,
553
args->size);
554
if (ret)
555
goto out;
556
557
ret = -EFAULT;
558
if (!i915_gem_object_needs_bit17_swizzle(obj))
559
ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
560
if (ret == -EFAULT)
561
ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
562
563
out:
564
drm_gem_object_unreference(&obj->base);
565
unlock:
566
mutex_unlock(&dev->struct_mutex);
567
return ret;
568
}
569
570
/* This is the fast write path which cannot handle
571
* page faults in the source data
572
*/
573
574
static inline int
575
fast_user_write(struct io_mapping *mapping,
576
loff_t page_base, int page_offset,
577
char __user *user_data,
578
int length)
579
{
580
char *vaddr_atomic;
581
unsigned long unwritten;
582
583
vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
584
unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
585
user_data, length);
586
io_mapping_unmap_atomic(vaddr_atomic);
587
return unwritten;
588
}
589
590
/* Here's the write path which can sleep for
591
* page faults
592
*/
593
594
static inline void
595
slow_kernel_write(struct io_mapping *mapping,
596
loff_t gtt_base, int gtt_offset,
597
struct page *user_page, int user_offset,
598
int length)
599
{
600
char __iomem *dst_vaddr;
601
char *src_vaddr;
602
603
dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
604
src_vaddr = kmap(user_page);
605
606
memcpy_toio(dst_vaddr + gtt_offset,
607
src_vaddr + user_offset,
608
length);
609
610
kunmap(user_page);
611
io_mapping_unmap(dst_vaddr);
612
}
613
614
/**
615
* This is the fast pwrite path, where we copy the data directly from the
616
* user into the GTT, uncached.
617
*/
618
static int
619
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
620
struct drm_i915_gem_object *obj,
621
struct drm_i915_gem_pwrite *args,
622
struct drm_file *file)
623
{
624
drm_i915_private_t *dev_priv = dev->dev_private;
625
ssize_t remain;
626
loff_t offset, page_base;
627
char __user *user_data;
628
int page_offset, page_length;
629
630
user_data = (char __user *) (uintptr_t) args->data_ptr;
631
remain = args->size;
632
633
offset = obj->gtt_offset + args->offset;
634
635
while (remain > 0) {
636
/* Operation in this page
637
*
638
* page_base = page offset within aperture
639
* page_offset = offset within page
640
* page_length = bytes to copy for this page
641
*/
642
page_base = offset & PAGE_MASK;
643
page_offset = offset_in_page(offset);
644
page_length = remain;
645
if ((page_offset + remain) > PAGE_SIZE)
646
page_length = PAGE_SIZE - page_offset;
647
648
/* If we get a fault while copying data, then (presumably) our
649
* source page isn't available. Return the error and we'll
650
* retry in the slow path.
651
*/
652
if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
653
page_offset, user_data, page_length))
654
return -EFAULT;
655
656
remain -= page_length;
657
user_data += page_length;
658
offset += page_length;
659
}
660
661
return 0;
662
}
663
664
/**
665
* This is the fallback GTT pwrite path, which uses get_user_pages to pin
666
* the memory and maps it using kmap_atomic for copying.
667
*
668
* This code resulted in x11perf -rgb10text consuming about 10% more CPU
669
* than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
670
*/
671
static int
672
i915_gem_gtt_pwrite_slow(struct drm_device *dev,
673
struct drm_i915_gem_object *obj,
674
struct drm_i915_gem_pwrite *args,
675
struct drm_file *file)
676
{
677
drm_i915_private_t *dev_priv = dev->dev_private;
678
ssize_t remain;
679
loff_t gtt_page_base, offset;
680
loff_t first_data_page, last_data_page, num_pages;
681
loff_t pinned_pages, i;
682
struct page **user_pages;
683
struct mm_struct *mm = current->mm;
684
int gtt_page_offset, data_page_offset, data_page_index, page_length;
685
int ret;
686
uint64_t data_ptr = args->data_ptr;
687
688
remain = args->size;
689
690
/* Pin the user pages containing the data. We can't fault while
691
* holding the struct mutex, and all of the pwrite implementations
692
* want to hold it while dereferencing the user data.
693
*/
694
first_data_page = data_ptr / PAGE_SIZE;
695
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
696
num_pages = last_data_page - first_data_page + 1;
697
698
user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
699
if (user_pages == NULL)
700
return -ENOMEM;
701
702
mutex_unlock(&dev->struct_mutex);
703
down_read(&mm->mmap_sem);
704
pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
705
num_pages, 0, 0, user_pages, NULL);
706
up_read(&mm->mmap_sem);
707
mutex_lock(&dev->struct_mutex);
708
if (pinned_pages < num_pages) {
709
ret = -EFAULT;
710
goto out_unpin_pages;
711
}
712
713
ret = i915_gem_object_set_to_gtt_domain(obj, true);
714
if (ret)
715
goto out_unpin_pages;
716
717
ret = i915_gem_object_put_fence(obj);
718
if (ret)
719
goto out_unpin_pages;
720
721
offset = obj->gtt_offset + args->offset;
722
723
while (remain > 0) {
724
/* Operation in this page
725
*
726
* gtt_page_base = page offset within aperture
727
* gtt_page_offset = offset within page in aperture
728
* data_page_index = page number in get_user_pages return
729
* data_page_offset = offset with data_page_index page.
730
* page_length = bytes to copy for this page
731
*/
732
gtt_page_base = offset & PAGE_MASK;
733
gtt_page_offset = offset_in_page(offset);
734
data_page_index = data_ptr / PAGE_SIZE - first_data_page;
735
data_page_offset = offset_in_page(data_ptr);
736
737
page_length = remain;
738
if ((gtt_page_offset + page_length) > PAGE_SIZE)
739
page_length = PAGE_SIZE - gtt_page_offset;
740
if ((data_page_offset + page_length) > PAGE_SIZE)
741
page_length = PAGE_SIZE - data_page_offset;
742
743
slow_kernel_write(dev_priv->mm.gtt_mapping,
744
gtt_page_base, gtt_page_offset,
745
user_pages[data_page_index],
746
data_page_offset,
747
page_length);
748
749
remain -= page_length;
750
offset += page_length;
751
data_ptr += page_length;
752
}
753
754
out_unpin_pages:
755
for (i = 0; i < pinned_pages; i++)
756
page_cache_release(user_pages[i]);
757
drm_free_large(user_pages);
758
759
return ret;
760
}
761
762
/**
763
* This is the fast shmem pwrite path, which attempts to directly
764
* copy_from_user into the kmapped pages backing the object.
765
*/
766
static int
767
i915_gem_shmem_pwrite_fast(struct drm_device *dev,
768
struct drm_i915_gem_object *obj,
769
struct drm_i915_gem_pwrite *args,
770
struct drm_file *file)
771
{
772
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
773
ssize_t remain;
774
loff_t offset;
775
char __user *user_data;
776
int page_offset, page_length;
777
778
user_data = (char __user *) (uintptr_t) args->data_ptr;
779
remain = args->size;
780
781
offset = args->offset;
782
obj->dirty = 1;
783
784
while (remain > 0) {
785
struct page *page;
786
char *vaddr;
787
int ret;
788
789
/* Operation in this page
790
*
791
* page_offset = offset within page
792
* page_length = bytes to copy for this page
793
*/
794
page_offset = offset_in_page(offset);
795
page_length = remain;
796
if ((page_offset + remain) > PAGE_SIZE)
797
page_length = PAGE_SIZE - page_offset;
798
799
page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
800
if (IS_ERR(page))
801
return PTR_ERR(page);
802
803
vaddr = kmap_atomic(page, KM_USER0);
804
ret = __copy_from_user_inatomic(vaddr + page_offset,
805
user_data,
806
page_length);
807
kunmap_atomic(vaddr, KM_USER0);
808
809
set_page_dirty(page);
810
mark_page_accessed(page);
811
page_cache_release(page);
812
813
/* If we get a fault while copying data, then (presumably) our
814
* source page isn't available. Return the error and we'll
815
* retry in the slow path.
816
*/
817
if (ret)
818
return -EFAULT;
819
820
remain -= page_length;
821
user_data += page_length;
822
offset += page_length;
823
}
824
825
return 0;
826
}
827
828
/**
829
* This is the fallback shmem pwrite path, which uses get_user_pages to pin
830
* the memory and maps it using kmap_atomic for copying.
831
*
832
* This avoids taking mmap_sem for faulting on the user's address while the
833
* struct_mutex is held.
834
*/
835
static int
836
i915_gem_shmem_pwrite_slow(struct drm_device *dev,
837
struct drm_i915_gem_object *obj,
838
struct drm_i915_gem_pwrite *args,
839
struct drm_file *file)
840
{
841
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
842
struct mm_struct *mm = current->mm;
843
struct page **user_pages;
844
ssize_t remain;
845
loff_t offset, pinned_pages, i;
846
loff_t first_data_page, last_data_page, num_pages;
847
int shmem_page_offset;
848
int data_page_index, data_page_offset;
849
int page_length;
850
int ret;
851
uint64_t data_ptr = args->data_ptr;
852
int do_bit17_swizzling;
853
854
remain = args->size;
855
856
/* Pin the user pages containing the data. We can't fault while
857
* holding the struct mutex, and all of the pwrite implementations
858
* want to hold it while dereferencing the user data.
859
*/
860
first_data_page = data_ptr / PAGE_SIZE;
861
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
862
num_pages = last_data_page - first_data_page + 1;
863
864
user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
865
if (user_pages == NULL)
866
return -ENOMEM;
867
868
mutex_unlock(&dev->struct_mutex);
869
down_read(&mm->mmap_sem);
870
pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
871
num_pages, 0, 0, user_pages, NULL);
872
up_read(&mm->mmap_sem);
873
mutex_lock(&dev->struct_mutex);
874
if (pinned_pages < num_pages) {
875
ret = -EFAULT;
876
goto out;
877
}
878
879
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
880
if (ret)
881
goto out;
882
883
do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
884
885
offset = args->offset;
886
obj->dirty = 1;
887
888
while (remain > 0) {
889
struct page *page;
890
891
/* Operation in this page
892
*
893
* shmem_page_offset = offset within page in shmem file
894
* data_page_index = page number in get_user_pages return
895
* data_page_offset = offset with data_page_index page.
896
* page_length = bytes to copy for this page
897
*/
898
shmem_page_offset = offset_in_page(offset);
899
data_page_index = data_ptr / PAGE_SIZE - first_data_page;
900
data_page_offset = offset_in_page(data_ptr);
901
902
page_length = remain;
903
if ((shmem_page_offset + page_length) > PAGE_SIZE)
904
page_length = PAGE_SIZE - shmem_page_offset;
905
if ((data_page_offset + page_length) > PAGE_SIZE)
906
page_length = PAGE_SIZE - data_page_offset;
907
908
page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
909
if (IS_ERR(page)) {
910
ret = PTR_ERR(page);
911
goto out;
912
}
913
914
if (do_bit17_swizzling) {
915
slow_shmem_bit17_copy(page,
916
shmem_page_offset,
917
user_pages[data_page_index],
918
data_page_offset,
919
page_length,
920
0);
921
} else {
922
slow_shmem_copy(page,
923
shmem_page_offset,
924
user_pages[data_page_index],
925
data_page_offset,
926
page_length);
927
}
928
929
set_page_dirty(page);
930
mark_page_accessed(page);
931
page_cache_release(page);
932
933
remain -= page_length;
934
data_ptr += page_length;
935
offset += page_length;
936
}
937
938
out:
939
for (i = 0; i < pinned_pages; i++)
940
page_cache_release(user_pages[i]);
941
drm_free_large(user_pages);
942
943
return ret;
944
}
945
946
/**
947
* Writes data to the object referenced by handle.
948
*
949
* On error, the contents of the buffer that were to be modified are undefined.
950
*/
951
int
952
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
953
struct drm_file *file)
954
{
955
struct drm_i915_gem_pwrite *args = data;
956
struct drm_i915_gem_object *obj;
957
int ret;
958
959
if (args->size == 0)
960
return 0;
961
962
if (!access_ok(VERIFY_READ,
963
(char __user *)(uintptr_t)args->data_ptr,
964
args->size))
965
return -EFAULT;
966
967
ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
968
args->size);
969
if (ret)
970
return -EFAULT;
971
972
ret = i915_mutex_lock_interruptible(dev);
973
if (ret)
974
return ret;
975
976
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
977
if (&obj->base == NULL) {
978
ret = -ENOENT;
979
goto unlock;
980
}
981
982
/* Bounds check destination. */
983
if (args->offset > obj->base.size ||
984
args->size > obj->base.size - args->offset) {
985
ret = -EINVAL;
986
goto out;
987
}
988
989
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
990
991
/* We can only do the GTT pwrite on untiled buffers, as otherwise
992
* it would end up going through the fenced access, and we'll get
993
* different detiling behavior between reading and writing.
994
* pread/pwrite currently are reading and writing from the CPU
995
* perspective, requiring manual detiling by the client.
996
*/
997
if (obj->phys_obj)
998
ret = i915_gem_phys_pwrite(dev, obj, args, file);
999
else if (obj->gtt_space &&
1000
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1001
ret = i915_gem_object_pin(obj, 0, true);
1002
if (ret)
1003
goto out;
1004
1005
ret = i915_gem_object_set_to_gtt_domain(obj, true);
1006
if (ret)
1007
goto out_unpin;
1008
1009
ret = i915_gem_object_put_fence(obj);
1010
if (ret)
1011
goto out_unpin;
1012
1013
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1014
if (ret == -EFAULT)
1015
ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
1016
1017
out_unpin:
1018
i915_gem_object_unpin(obj);
1019
} else {
1020
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1021
if (ret)
1022
goto out;
1023
1024
ret = -EFAULT;
1025
if (!i915_gem_object_needs_bit17_swizzle(obj))
1026
ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1027
if (ret == -EFAULT)
1028
ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1029
}
1030
1031
out:
1032
drm_gem_object_unreference(&obj->base);
1033
unlock:
1034
mutex_unlock(&dev->struct_mutex);
1035
return ret;
1036
}
1037
1038
/**
1039
* Called when user space prepares to use an object with the CPU, either
1040
* through the mmap ioctl's mapping or a GTT mapping.
1041
*/
1042
int
1043
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1044
struct drm_file *file)
1045
{
1046
struct drm_i915_gem_set_domain *args = data;
1047
struct drm_i915_gem_object *obj;
1048
uint32_t read_domains = args->read_domains;
1049
uint32_t write_domain = args->write_domain;
1050
int ret;
1051
1052
if (!(dev->driver->driver_features & DRIVER_GEM))
1053
return -ENODEV;
1054
1055
/* Only handle setting domains to types used by the CPU. */
1056
if (write_domain & I915_GEM_GPU_DOMAINS)
1057
return -EINVAL;
1058
1059
if (read_domains & I915_GEM_GPU_DOMAINS)
1060
return -EINVAL;
1061
1062
/* Having something in the write domain implies it's in the read
1063
* domain, and only that read domain. Enforce that in the request.
1064
*/
1065
if (write_domain != 0 && read_domains != write_domain)
1066
return -EINVAL;
1067
1068
ret = i915_mutex_lock_interruptible(dev);
1069
if (ret)
1070
return ret;
1071
1072
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1073
if (&obj->base == NULL) {
1074
ret = -ENOENT;
1075
goto unlock;
1076
}
1077
1078
if (read_domains & I915_GEM_DOMAIN_GTT) {
1079
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1080
1081
/* Silently promote "you're not bound, there was nothing to do"
1082
* to success, since the client was just asking us to
1083
* make sure everything was done.
1084
*/
1085
if (ret == -EINVAL)
1086
ret = 0;
1087
} else {
1088
ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1089
}
1090
1091
drm_gem_object_unreference(&obj->base);
1092
unlock:
1093
mutex_unlock(&dev->struct_mutex);
1094
return ret;
1095
}
1096
1097
/**
1098
* Called when user space has done writes to this buffer
1099
*/
1100
int
1101
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1102
struct drm_file *file)
1103
{
1104
struct drm_i915_gem_sw_finish *args = data;
1105
struct drm_i915_gem_object *obj;
1106
int ret = 0;
1107
1108
if (!(dev->driver->driver_features & DRIVER_GEM))
1109
return -ENODEV;
1110
1111
ret = i915_mutex_lock_interruptible(dev);
1112
if (ret)
1113
return ret;
1114
1115
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1116
if (&obj->base == NULL) {
1117
ret = -ENOENT;
1118
goto unlock;
1119
}
1120
1121
/* Pinned buffers may be scanout, so flush the cache */
1122
if (obj->pin_count)
1123
i915_gem_object_flush_cpu_write_domain(obj);
1124
1125
drm_gem_object_unreference(&obj->base);
1126
unlock:
1127
mutex_unlock(&dev->struct_mutex);
1128
return ret;
1129
}
1130
1131
/**
1132
* Maps the contents of an object, returning the address it is mapped
1133
* into.
1134
*
1135
* While the mapping holds a reference on the contents of the object, it doesn't
1136
* imply a ref on the object itself.
1137
*/
1138
int
1139
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1140
struct drm_file *file)
1141
{
1142
struct drm_i915_private *dev_priv = dev->dev_private;
1143
struct drm_i915_gem_mmap *args = data;
1144
struct drm_gem_object *obj;
1145
unsigned long addr;
1146
1147
if (!(dev->driver->driver_features & DRIVER_GEM))
1148
return -ENODEV;
1149
1150
obj = drm_gem_object_lookup(dev, file, args->handle);
1151
if (obj == NULL)
1152
return -ENOENT;
1153
1154
if (obj->size > dev_priv->mm.gtt_mappable_end) {
1155
drm_gem_object_unreference_unlocked(obj);
1156
return -E2BIG;
1157
}
1158
1159
down_write(&current->mm->mmap_sem);
1160
addr = do_mmap(obj->filp, 0, args->size,
1161
PROT_READ | PROT_WRITE, MAP_SHARED,
1162
args->offset);
1163
up_write(&current->mm->mmap_sem);
1164
drm_gem_object_unreference_unlocked(obj);
1165
if (IS_ERR((void *)addr))
1166
return addr;
1167
1168
args->addr_ptr = (uint64_t) addr;
1169
1170
return 0;
1171
}
1172
1173
/**
1174
* i915_gem_fault - fault a page into the GTT
1175
* vma: VMA in question
1176
* vmf: fault info
1177
*
1178
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1179
* from userspace. The fault handler takes care of binding the object to
1180
* the GTT (if needed), allocating and programming a fence register (again,
1181
* only if needed based on whether the old reg is still valid or the object
1182
* is tiled) and inserting a new PTE into the faulting process.
1183
*
1184
* Note that the faulting process may involve evicting existing objects
1185
* from the GTT and/or fence registers to make room. So performance may
1186
* suffer if the GTT working set is large or there are few fence registers
1187
* left.
1188
*/
1189
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1190
{
1191
struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1192
struct drm_device *dev = obj->base.dev;
1193
drm_i915_private_t *dev_priv = dev->dev_private;
1194
pgoff_t page_offset;
1195
unsigned long pfn;
1196
int ret = 0;
1197
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1198
1199
/* We don't use vmf->pgoff since that has the fake offset */
1200
page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1201
PAGE_SHIFT;
1202
1203
ret = i915_mutex_lock_interruptible(dev);
1204
if (ret)
1205
goto out;
1206
1207
trace_i915_gem_object_fault(obj, page_offset, true, write);
1208
1209
/* Now bind it into the GTT if needed */
1210
if (!obj->map_and_fenceable) {
1211
ret = i915_gem_object_unbind(obj);
1212
if (ret)
1213
goto unlock;
1214
}
1215
if (!obj->gtt_space) {
1216
ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1217
if (ret)
1218
goto unlock;
1219
1220
ret = i915_gem_object_set_to_gtt_domain(obj, write);
1221
if (ret)
1222
goto unlock;
1223
}
1224
1225
if (obj->tiling_mode == I915_TILING_NONE)
1226
ret = i915_gem_object_put_fence(obj);
1227
else
1228
ret = i915_gem_object_get_fence(obj, NULL);
1229
if (ret)
1230
goto unlock;
1231
1232
if (i915_gem_object_is_inactive(obj))
1233
list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1234
1235
obj->fault_mappable = true;
1236
1237
pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1238
page_offset;
1239
1240
/* Finally, remap it using the new GTT offset */
1241
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1242
unlock:
1243
mutex_unlock(&dev->struct_mutex);
1244
out:
1245
switch (ret) {
1246
case -EIO:
1247
case -EAGAIN:
1248
/* Give the error handler a chance to run and move the
1249
* objects off the GPU active list. Next time we service the
1250
* fault, we should be able to transition the page into the
1251
* GTT without touching the GPU (and so avoid further
1252
* EIO/EGAIN). If the GPU is wedged, then there is no issue
1253
* with coherency, just lost writes.
1254
*/
1255
set_need_resched();
1256
case 0:
1257
case -ERESTARTSYS:
1258
case -EINTR:
1259
return VM_FAULT_NOPAGE;
1260
case -ENOMEM:
1261
return VM_FAULT_OOM;
1262
default:
1263
return VM_FAULT_SIGBUS;
1264
}
1265
}
1266
1267
/**
1268
* i915_gem_create_mmap_offset - create a fake mmap offset for an object
1269
* @obj: obj in question
1270
*
1271
* GEM memory mapping works by handing back to userspace a fake mmap offset
1272
* it can use in a subsequent mmap(2) call. The DRM core code then looks
1273
* up the object based on the offset and sets up the various memory mapping
1274
* structures.
1275
*
1276
* This routine allocates and attaches a fake offset for @obj.
1277
*/
1278
static int
1279
i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
1280
{
1281
struct drm_device *dev = obj->base.dev;
1282
struct drm_gem_mm *mm = dev->mm_private;
1283
struct drm_map_list *list;
1284
struct drm_local_map *map;
1285
int ret = 0;
1286
1287
/* Set the object up for mmap'ing */
1288
list = &obj->base.map_list;
1289
list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1290
if (!list->map)
1291
return -ENOMEM;
1292
1293
map = list->map;
1294
map->type = _DRM_GEM;
1295
map->size = obj->base.size;
1296
map->handle = obj;
1297
1298
/* Get a DRM GEM mmap offset allocated... */
1299
list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1300
obj->base.size / PAGE_SIZE,
1301
0, 0);
1302
if (!list->file_offset_node) {
1303
DRM_ERROR("failed to allocate offset for bo %d\n",
1304
obj->base.name);
1305
ret = -ENOSPC;
1306
goto out_free_list;
1307
}
1308
1309
list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1310
obj->base.size / PAGE_SIZE,
1311
0);
1312
if (!list->file_offset_node) {
1313
ret = -ENOMEM;
1314
goto out_free_list;
1315
}
1316
1317
list->hash.key = list->file_offset_node->start;
1318
ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1319
if (ret) {
1320
DRM_ERROR("failed to add to map hash\n");
1321
goto out_free_mm;
1322
}
1323
1324
return 0;
1325
1326
out_free_mm:
1327
drm_mm_put_block(list->file_offset_node);
1328
out_free_list:
1329
kfree(list->map);
1330
list->map = NULL;
1331
1332
return ret;
1333
}
1334
1335
/**
1336
* i915_gem_release_mmap - remove physical page mappings
1337
* @obj: obj in question
1338
*
1339
* Preserve the reservation of the mmapping with the DRM core code, but
1340
* relinquish ownership of the pages back to the system.
1341
*
1342
* It is vital that we remove the page mapping if we have mapped a tiled
1343
* object through the GTT and then lose the fence register due to
1344
* resource pressure. Similarly if the object has been moved out of the
1345
* aperture, than pages mapped into userspace must be revoked. Removing the
1346
* mapping will then trigger a page fault on the next user access, allowing
1347
* fixup by i915_gem_fault().
1348
*/
1349
void
1350
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1351
{
1352
if (!obj->fault_mappable)
1353
return;
1354
1355
if (obj->base.dev->dev_mapping)
1356
unmap_mapping_range(obj->base.dev->dev_mapping,
1357
(loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1358
obj->base.size, 1);
1359
1360
obj->fault_mappable = false;
1361
}
1362
1363
static void
1364
i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
1365
{
1366
struct drm_device *dev = obj->base.dev;
1367
struct drm_gem_mm *mm = dev->mm_private;
1368
struct drm_map_list *list = &obj->base.map_list;
1369
1370
drm_ht_remove_item(&mm->offset_hash, &list->hash);
1371
drm_mm_put_block(list->file_offset_node);
1372
kfree(list->map);
1373
list->map = NULL;
1374
}
1375
1376
static uint32_t
1377
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1378
{
1379
uint32_t gtt_size;
1380
1381
if (INTEL_INFO(dev)->gen >= 4 ||
1382
tiling_mode == I915_TILING_NONE)
1383
return size;
1384
1385
/* Previous chips need a power-of-two fence region when tiling */
1386
if (INTEL_INFO(dev)->gen == 3)
1387
gtt_size = 1024*1024;
1388
else
1389
gtt_size = 512*1024;
1390
1391
while (gtt_size < size)
1392
gtt_size <<= 1;
1393
1394
return gtt_size;
1395
}
1396
1397
/**
1398
* i915_gem_get_gtt_alignment - return required GTT alignment for an object
1399
* @obj: object to check
1400
*
1401
* Return the required GTT alignment for an object, taking into account
1402
* potential fence register mapping.
1403
*/
1404
static uint32_t
1405
i915_gem_get_gtt_alignment(struct drm_device *dev,
1406
uint32_t size,
1407
int tiling_mode)
1408
{
1409
/*
1410
* Minimum alignment is 4k (GTT page size), but might be greater
1411
* if a fence register is needed for the object.
1412
*/
1413
if (INTEL_INFO(dev)->gen >= 4 ||
1414
tiling_mode == I915_TILING_NONE)
1415
return 4096;
1416
1417
/*
1418
* Previous chips need to be aligned to the size of the smallest
1419
* fence register that can contain the object.
1420
*/
1421
return i915_gem_get_gtt_size(dev, size, tiling_mode);
1422
}
1423
1424
/**
1425
* i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1426
* unfenced object
1427
* @dev: the device
1428
* @size: size of the object
1429
* @tiling_mode: tiling mode of the object
1430
*
1431
* Return the required GTT alignment for an object, only taking into account
1432
* unfenced tiled surface requirements.
1433
*/
1434
uint32_t
1435
i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1436
uint32_t size,
1437
int tiling_mode)
1438
{
1439
/*
1440
* Minimum alignment is 4k (GTT page size) for sane hw.
1441
*/
1442
if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1443
tiling_mode == I915_TILING_NONE)
1444
return 4096;
1445
1446
/* Previous hardware however needs to be aligned to a power-of-two
1447
* tile height. The simplest method for determining this is to reuse
1448
* the power-of-tile object size.
1449
*/
1450
return i915_gem_get_gtt_size(dev, size, tiling_mode);
1451
}
1452
1453
int
1454
i915_gem_mmap_gtt(struct drm_file *file,
1455
struct drm_device *dev,
1456
uint32_t handle,
1457
uint64_t *offset)
1458
{
1459
struct drm_i915_private *dev_priv = dev->dev_private;
1460
struct drm_i915_gem_object *obj;
1461
int ret;
1462
1463
if (!(dev->driver->driver_features & DRIVER_GEM))
1464
return -ENODEV;
1465
1466
ret = i915_mutex_lock_interruptible(dev);
1467
if (ret)
1468
return ret;
1469
1470
obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1471
if (&obj->base == NULL) {
1472
ret = -ENOENT;
1473
goto unlock;
1474
}
1475
1476
if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1477
ret = -E2BIG;
1478
goto unlock;
1479
}
1480
1481
if (obj->madv != I915_MADV_WILLNEED) {
1482
DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1483
ret = -EINVAL;
1484
goto out;
1485
}
1486
1487
if (!obj->base.map_list.map) {
1488
ret = i915_gem_create_mmap_offset(obj);
1489
if (ret)
1490
goto out;
1491
}
1492
1493
*offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1494
1495
out:
1496
drm_gem_object_unreference(&obj->base);
1497
unlock:
1498
mutex_unlock(&dev->struct_mutex);
1499
return ret;
1500
}
1501
1502
/**
1503
* i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1504
* @dev: DRM device
1505
* @data: GTT mapping ioctl data
1506
* @file: GEM object info
1507
*
1508
* Simply returns the fake offset to userspace so it can mmap it.
1509
* The mmap call will end up in drm_gem_mmap(), which will set things
1510
* up so we can get faults in the handler above.
1511
*
1512
* The fault handler will take care of binding the object into the GTT
1513
* (since it may have been evicted to make room for something), allocating
1514
* a fence register, and mapping the appropriate aperture address into
1515
* userspace.
1516
*/
1517
int
1518
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1519
struct drm_file *file)
1520
{
1521
struct drm_i915_gem_mmap_gtt *args = data;
1522
1523
if (!(dev->driver->driver_features & DRIVER_GEM))
1524
return -ENODEV;
1525
1526
return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1527
}
1528
1529
1530
static int
1531
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1532
gfp_t gfpmask)
1533
{
1534
int page_count, i;
1535
struct address_space *mapping;
1536
struct inode *inode;
1537
struct page *page;
1538
1539
/* Get the list of pages out of our struct file. They'll be pinned
1540
* at this point until we release them.
1541
*/
1542
page_count = obj->base.size / PAGE_SIZE;
1543
BUG_ON(obj->pages != NULL);
1544
obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1545
if (obj->pages == NULL)
1546
return -ENOMEM;
1547
1548
inode = obj->base.filp->f_path.dentry->d_inode;
1549
mapping = inode->i_mapping;
1550
gfpmask |= mapping_gfp_mask(mapping);
1551
1552
for (i = 0; i < page_count; i++) {
1553
page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
1554
if (IS_ERR(page))
1555
goto err_pages;
1556
1557
obj->pages[i] = page;
1558
}
1559
1560
if (obj->tiling_mode != I915_TILING_NONE)
1561
i915_gem_object_do_bit_17_swizzle(obj);
1562
1563
return 0;
1564
1565
err_pages:
1566
while (i--)
1567
page_cache_release(obj->pages[i]);
1568
1569
drm_free_large(obj->pages);
1570
obj->pages = NULL;
1571
return PTR_ERR(page);
1572
}
1573
1574
static void
1575
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1576
{
1577
int page_count = obj->base.size / PAGE_SIZE;
1578
int i;
1579
1580
BUG_ON(obj->madv == __I915_MADV_PURGED);
1581
1582
if (obj->tiling_mode != I915_TILING_NONE)
1583
i915_gem_object_save_bit_17_swizzle(obj);
1584
1585
if (obj->madv == I915_MADV_DONTNEED)
1586
obj->dirty = 0;
1587
1588
for (i = 0; i < page_count; i++) {
1589
if (obj->dirty)
1590
set_page_dirty(obj->pages[i]);
1591
1592
if (obj->madv == I915_MADV_WILLNEED)
1593
mark_page_accessed(obj->pages[i]);
1594
1595
page_cache_release(obj->pages[i]);
1596
}
1597
obj->dirty = 0;
1598
1599
drm_free_large(obj->pages);
1600
obj->pages = NULL;
1601
}
1602
1603
void
1604
i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1605
struct intel_ring_buffer *ring,
1606
u32 seqno)
1607
{
1608
struct drm_device *dev = obj->base.dev;
1609
struct drm_i915_private *dev_priv = dev->dev_private;
1610
1611
BUG_ON(ring == NULL);
1612
obj->ring = ring;
1613
1614
/* Add a reference if we're newly entering the active list. */
1615
if (!obj->active) {
1616
drm_gem_object_reference(&obj->base);
1617
obj->active = 1;
1618
}
1619
1620
/* Move from whatever list we were on to the tail of execution. */
1621
list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1622
list_move_tail(&obj->ring_list, &ring->active_list);
1623
1624
obj->last_rendering_seqno = seqno;
1625
if (obj->fenced_gpu_access) {
1626
struct drm_i915_fence_reg *reg;
1627
1628
BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1629
1630
obj->last_fenced_seqno = seqno;
1631
obj->last_fenced_ring = ring;
1632
1633
reg = &dev_priv->fence_regs[obj->fence_reg];
1634
list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1635
}
1636
}
1637
1638
static void
1639
i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1640
{
1641
list_del_init(&obj->ring_list);
1642
obj->last_rendering_seqno = 0;
1643
}
1644
1645
static void
1646
i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1647
{
1648
struct drm_device *dev = obj->base.dev;
1649
drm_i915_private_t *dev_priv = dev->dev_private;
1650
1651
BUG_ON(!obj->active);
1652
list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1653
1654
i915_gem_object_move_off_active(obj);
1655
}
1656
1657
static void
1658
i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1659
{
1660
struct drm_device *dev = obj->base.dev;
1661
struct drm_i915_private *dev_priv = dev->dev_private;
1662
1663
if (obj->pin_count != 0)
1664
list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1665
else
1666
list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1667
1668
BUG_ON(!list_empty(&obj->gpu_write_list));
1669
BUG_ON(!obj->active);
1670
obj->ring = NULL;
1671
1672
i915_gem_object_move_off_active(obj);
1673
obj->fenced_gpu_access = false;
1674
1675
obj->active = 0;
1676
obj->pending_gpu_write = false;
1677
drm_gem_object_unreference(&obj->base);
1678
1679
WARN_ON(i915_verify_lists(dev));
1680
}
1681
1682
/* Immediately discard the backing storage */
1683
static void
1684
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1685
{
1686
struct inode *inode;
1687
1688
/* Our goal here is to return as much of the memory as
1689
* is possible back to the system as we are called from OOM.
1690
* To do this we must instruct the shmfs to drop all of its
1691
* backing pages, *now*.
1692
*/
1693
inode = obj->base.filp->f_path.dentry->d_inode;
1694
shmem_truncate_range(inode, 0, (loff_t)-1);
1695
1696
obj->madv = __I915_MADV_PURGED;
1697
}
1698
1699
static inline int
1700
i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1701
{
1702
return obj->madv == I915_MADV_DONTNEED;
1703
}
1704
1705
static void
1706
i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1707
uint32_t flush_domains)
1708
{
1709
struct drm_i915_gem_object *obj, *next;
1710
1711
list_for_each_entry_safe(obj, next,
1712
&ring->gpu_write_list,
1713
gpu_write_list) {
1714
if (obj->base.write_domain & flush_domains) {
1715
uint32_t old_write_domain = obj->base.write_domain;
1716
1717
obj->base.write_domain = 0;
1718
list_del_init(&obj->gpu_write_list);
1719
i915_gem_object_move_to_active(obj, ring,
1720
i915_gem_next_request_seqno(ring));
1721
1722
trace_i915_gem_object_change_domain(obj,
1723
obj->base.read_domains,
1724
old_write_domain);
1725
}
1726
}
1727
}
1728
1729
int
1730
i915_add_request(struct intel_ring_buffer *ring,
1731
struct drm_file *file,
1732
struct drm_i915_gem_request *request)
1733
{
1734
drm_i915_private_t *dev_priv = ring->dev->dev_private;
1735
uint32_t seqno;
1736
int was_empty;
1737
int ret;
1738
1739
BUG_ON(request == NULL);
1740
1741
ret = ring->add_request(ring, &seqno);
1742
if (ret)
1743
return ret;
1744
1745
trace_i915_gem_request_add(ring, seqno);
1746
1747
request->seqno = seqno;
1748
request->ring = ring;
1749
request->emitted_jiffies = jiffies;
1750
was_empty = list_empty(&ring->request_list);
1751
list_add_tail(&request->list, &ring->request_list);
1752
1753
if (file) {
1754
struct drm_i915_file_private *file_priv = file->driver_priv;
1755
1756
spin_lock(&file_priv->mm.lock);
1757
request->file_priv = file_priv;
1758
list_add_tail(&request->client_list,
1759
&file_priv->mm.request_list);
1760
spin_unlock(&file_priv->mm.lock);
1761
}
1762
1763
ring->outstanding_lazy_request = false;
1764
1765
if (!dev_priv->mm.suspended) {
1766
mod_timer(&dev_priv->hangcheck_timer,
1767
jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1768
if (was_empty)
1769
queue_delayed_work(dev_priv->wq,
1770
&dev_priv->mm.retire_work, HZ);
1771
}
1772
return 0;
1773
}
1774
1775
static inline void
1776
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1777
{
1778
struct drm_i915_file_private *file_priv = request->file_priv;
1779
1780
if (!file_priv)
1781
return;
1782
1783
spin_lock(&file_priv->mm.lock);
1784
if (request->file_priv) {
1785
list_del(&request->client_list);
1786
request->file_priv = NULL;
1787
}
1788
spin_unlock(&file_priv->mm.lock);
1789
}
1790
1791
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1792
struct intel_ring_buffer *ring)
1793
{
1794
while (!list_empty(&ring->request_list)) {
1795
struct drm_i915_gem_request *request;
1796
1797
request = list_first_entry(&ring->request_list,
1798
struct drm_i915_gem_request,
1799
list);
1800
1801
list_del(&request->list);
1802
i915_gem_request_remove_from_client(request);
1803
kfree(request);
1804
}
1805
1806
while (!list_empty(&ring->active_list)) {
1807
struct drm_i915_gem_object *obj;
1808
1809
obj = list_first_entry(&ring->active_list,
1810
struct drm_i915_gem_object,
1811
ring_list);
1812
1813
obj->base.write_domain = 0;
1814
list_del_init(&obj->gpu_write_list);
1815
i915_gem_object_move_to_inactive(obj);
1816
}
1817
}
1818
1819
static void i915_gem_reset_fences(struct drm_device *dev)
1820
{
1821
struct drm_i915_private *dev_priv = dev->dev_private;
1822
int i;
1823
1824
for (i = 0; i < 16; i++) {
1825
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1826
struct drm_i915_gem_object *obj = reg->obj;
1827
1828
if (!obj)
1829
continue;
1830
1831
if (obj->tiling_mode)
1832
i915_gem_release_mmap(obj);
1833
1834
reg->obj->fence_reg = I915_FENCE_REG_NONE;
1835
reg->obj->fenced_gpu_access = false;
1836
reg->obj->last_fenced_seqno = 0;
1837
reg->obj->last_fenced_ring = NULL;
1838
i915_gem_clear_fence_reg(dev, reg);
1839
}
1840
}
1841
1842
void i915_gem_reset(struct drm_device *dev)
1843
{
1844
struct drm_i915_private *dev_priv = dev->dev_private;
1845
struct drm_i915_gem_object *obj;
1846
int i;
1847
1848
for (i = 0; i < I915_NUM_RINGS; i++)
1849
i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
1850
1851
/* Remove anything from the flushing lists. The GPU cache is likely
1852
* to be lost on reset along with the data, so simply move the
1853
* lost bo to the inactive list.
1854
*/
1855
while (!list_empty(&dev_priv->mm.flushing_list)) {
1856
obj= list_first_entry(&dev_priv->mm.flushing_list,
1857
struct drm_i915_gem_object,
1858
mm_list);
1859
1860
obj->base.write_domain = 0;
1861
list_del_init(&obj->gpu_write_list);
1862
i915_gem_object_move_to_inactive(obj);
1863
}
1864
1865
/* Move everything out of the GPU domains to ensure we do any
1866
* necessary invalidation upon reuse.
1867
*/
1868
list_for_each_entry(obj,
1869
&dev_priv->mm.inactive_list,
1870
mm_list)
1871
{
1872
obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1873
}
1874
1875
/* The fence registers are invalidated so clear them out */
1876
i915_gem_reset_fences(dev);
1877
}
1878
1879
/**
1880
* This function clears the request list as sequence numbers are passed.
1881
*/
1882
static void
1883
i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1884
{
1885
uint32_t seqno;
1886
int i;
1887
1888
if (list_empty(&ring->request_list))
1889
return;
1890
1891
WARN_ON(i915_verify_lists(ring->dev));
1892
1893
seqno = ring->get_seqno(ring);
1894
1895
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1896
if (seqno >= ring->sync_seqno[i])
1897
ring->sync_seqno[i] = 0;
1898
1899
while (!list_empty(&ring->request_list)) {
1900
struct drm_i915_gem_request *request;
1901
1902
request = list_first_entry(&ring->request_list,
1903
struct drm_i915_gem_request,
1904
list);
1905
1906
if (!i915_seqno_passed(seqno, request->seqno))
1907
break;
1908
1909
trace_i915_gem_request_retire(ring, request->seqno);
1910
1911
list_del(&request->list);
1912
i915_gem_request_remove_from_client(request);
1913
kfree(request);
1914
}
1915
1916
/* Move any buffers on the active list that are no longer referenced
1917
* by the ringbuffer to the flushing/inactive lists as appropriate.
1918
*/
1919
while (!list_empty(&ring->active_list)) {
1920
struct drm_i915_gem_object *obj;
1921
1922
obj= list_first_entry(&ring->active_list,
1923
struct drm_i915_gem_object,
1924
ring_list);
1925
1926
if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1927
break;
1928
1929
if (obj->base.write_domain != 0)
1930
i915_gem_object_move_to_flushing(obj);
1931
else
1932
i915_gem_object_move_to_inactive(obj);
1933
}
1934
1935
if (unlikely(ring->trace_irq_seqno &&
1936
i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1937
ring->irq_put(ring);
1938
ring->trace_irq_seqno = 0;
1939
}
1940
1941
WARN_ON(i915_verify_lists(ring->dev));
1942
}
1943
1944
void
1945
i915_gem_retire_requests(struct drm_device *dev)
1946
{
1947
drm_i915_private_t *dev_priv = dev->dev_private;
1948
int i;
1949
1950
if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1951
struct drm_i915_gem_object *obj, *next;
1952
1953
/* We must be careful that during unbind() we do not
1954
* accidentally infinitely recurse into retire requests.
1955
* Currently:
1956
* retire -> free -> unbind -> wait -> retire_ring
1957
*/
1958
list_for_each_entry_safe(obj, next,
1959
&dev_priv->mm.deferred_free_list,
1960
mm_list)
1961
i915_gem_free_object_tail(obj);
1962
}
1963
1964
for (i = 0; i < I915_NUM_RINGS; i++)
1965
i915_gem_retire_requests_ring(&dev_priv->ring[i]);
1966
}
1967
1968
static void
1969
i915_gem_retire_work_handler(struct work_struct *work)
1970
{
1971
drm_i915_private_t *dev_priv;
1972
struct drm_device *dev;
1973
bool idle;
1974
int i;
1975
1976
dev_priv = container_of(work, drm_i915_private_t,
1977
mm.retire_work.work);
1978
dev = dev_priv->dev;
1979
1980
/* Come back later if the device is busy... */
1981
if (!mutex_trylock(&dev->struct_mutex)) {
1982
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1983
return;
1984
}
1985
1986
i915_gem_retire_requests(dev);
1987
1988
/* Send a periodic flush down the ring so we don't hold onto GEM
1989
* objects indefinitely.
1990
*/
1991
idle = true;
1992
for (i = 0; i < I915_NUM_RINGS; i++) {
1993
struct intel_ring_buffer *ring = &dev_priv->ring[i];
1994
1995
if (!list_empty(&ring->gpu_write_list)) {
1996
struct drm_i915_gem_request *request;
1997
int ret;
1998
1999
ret = i915_gem_flush_ring(ring,
2000
0, I915_GEM_GPU_DOMAINS);
2001
request = kzalloc(sizeof(*request), GFP_KERNEL);
2002
if (ret || request == NULL ||
2003
i915_add_request(ring, NULL, request))
2004
kfree(request);
2005
}
2006
2007
idle &= list_empty(&ring->request_list);
2008
}
2009
2010
if (!dev_priv->mm.suspended && !idle)
2011
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2012
2013
mutex_unlock(&dev->struct_mutex);
2014
}
2015
2016
/**
2017
* Waits for a sequence number to be signaled, and cleans up the
2018
* request and object lists appropriately for that event.
2019
*/
2020
int
2021
i915_wait_request(struct intel_ring_buffer *ring,
2022
uint32_t seqno)
2023
{
2024
drm_i915_private_t *dev_priv = ring->dev->dev_private;
2025
u32 ier;
2026
int ret = 0;
2027
2028
BUG_ON(seqno == 0);
2029
2030
if (atomic_read(&dev_priv->mm.wedged)) {
2031
struct completion *x = &dev_priv->error_completion;
2032
bool recovery_complete;
2033
unsigned long flags;
2034
2035
/* Give the error handler a chance to run. */
2036
spin_lock_irqsave(&x->wait.lock, flags);
2037
recovery_complete = x->done > 0;
2038
spin_unlock_irqrestore(&x->wait.lock, flags);
2039
2040
return recovery_complete ? -EIO : -EAGAIN;
2041
}
2042
2043
if (seqno == ring->outstanding_lazy_request) {
2044
struct drm_i915_gem_request *request;
2045
2046
request = kzalloc(sizeof(*request), GFP_KERNEL);
2047
if (request == NULL)
2048
return -ENOMEM;
2049
2050
ret = i915_add_request(ring, NULL, request);
2051
if (ret) {
2052
kfree(request);
2053
return ret;
2054
}
2055
2056
seqno = request->seqno;
2057
}
2058
2059
if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2060
if (HAS_PCH_SPLIT(ring->dev))
2061
ier = I915_READ(DEIER) | I915_READ(GTIER);
2062
else
2063
ier = I915_READ(IER);
2064
if (!ier) {
2065
DRM_ERROR("something (likely vbetool) disabled "
2066
"interrupts, re-enabling\n");
2067
ring->dev->driver->irq_preinstall(ring->dev);
2068
ring->dev->driver->irq_postinstall(ring->dev);
2069
}
2070
2071
trace_i915_gem_request_wait_begin(ring, seqno);
2072
2073
ring->waiting_seqno = seqno;
2074
if (ring->irq_get(ring)) {
2075
if (dev_priv->mm.interruptible)
2076
ret = wait_event_interruptible(ring->irq_queue,
2077
i915_seqno_passed(ring->get_seqno(ring), seqno)
2078
|| atomic_read(&dev_priv->mm.wedged));
2079
else
2080
wait_event(ring->irq_queue,
2081
i915_seqno_passed(ring->get_seqno(ring), seqno)
2082
|| atomic_read(&dev_priv->mm.wedged));
2083
2084
ring->irq_put(ring);
2085
} else if (wait_for(i915_seqno_passed(ring->get_seqno(ring),
2086
seqno) ||
2087
atomic_read(&dev_priv->mm.wedged), 3000))
2088
ret = -EBUSY;
2089
ring->waiting_seqno = 0;
2090
2091
trace_i915_gem_request_wait_end(ring, seqno);
2092
}
2093
if (atomic_read(&dev_priv->mm.wedged))
2094
ret = -EAGAIN;
2095
2096
if (ret && ret != -ERESTARTSYS)
2097
DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2098
__func__, ret, seqno, ring->get_seqno(ring),
2099
dev_priv->next_seqno);
2100
2101
/* Directly dispatch request retiring. While we have the work queue
2102
* to handle this, the waiter on a request often wants an associated
2103
* buffer to have made it to the inactive list, and we would need
2104
* a separate wait queue to handle that.
2105
*/
2106
if (ret == 0)
2107
i915_gem_retire_requests_ring(ring);
2108
2109
return ret;
2110
}
2111
2112
/**
2113
* Ensures that all rendering to the object has completed and the object is
2114
* safe to unbind from the GTT or access from the CPU.
2115
*/
2116
int
2117
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
2118
{
2119
int ret;
2120
2121
/* This function only exists to support waiting for existing rendering,
2122
* not for emitting required flushes.
2123
*/
2124
BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
2125
2126
/* If there is rendering queued on the buffer being evicted, wait for
2127
* it.
2128
*/
2129
if (obj->active) {
2130
ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
2131
if (ret)
2132
return ret;
2133
}
2134
2135
return 0;
2136
}
2137
2138
/**
2139
* Unbinds an object from the GTT aperture.
2140
*/
2141
int
2142
i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2143
{
2144
int ret = 0;
2145
2146
if (obj->gtt_space == NULL)
2147
return 0;
2148
2149
if (obj->pin_count != 0) {
2150
DRM_ERROR("Attempting to unbind pinned buffer\n");
2151
return -EINVAL;
2152
}
2153
2154
/* blow away mappings if mapped through GTT */
2155
i915_gem_release_mmap(obj);
2156
2157
/* Move the object to the CPU domain to ensure that
2158
* any possible CPU writes while it's not in the GTT
2159
* are flushed when we go to remap it. This will
2160
* also ensure that all pending GPU writes are finished
2161
* before we unbind.
2162
*/
2163
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2164
if (ret == -ERESTARTSYS)
2165
return ret;
2166
/* Continue on if we fail due to EIO, the GPU is hung so we
2167
* should be safe and we need to cleanup or else we might
2168
* cause memory corruption through use-after-free.
2169
*/
2170
if (ret) {
2171
i915_gem_clflush_object(obj);
2172
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2173
}
2174
2175
/* release the fence reg _after_ flushing */
2176
ret = i915_gem_object_put_fence(obj);
2177
if (ret == -ERESTARTSYS)
2178
return ret;
2179
2180
trace_i915_gem_object_unbind(obj);
2181
2182
i915_gem_gtt_unbind_object(obj);
2183
i915_gem_object_put_pages_gtt(obj);
2184
2185
list_del_init(&obj->gtt_list);
2186
list_del_init(&obj->mm_list);
2187
/* Avoid an unnecessary call to unbind on rebind. */
2188
obj->map_and_fenceable = true;
2189
2190
drm_mm_put_block(obj->gtt_space);
2191
obj->gtt_space = NULL;
2192
obj->gtt_offset = 0;
2193
2194
if (i915_gem_object_is_purgeable(obj))
2195
i915_gem_object_truncate(obj);
2196
2197
return ret;
2198
}
2199
2200
int
2201
i915_gem_flush_ring(struct intel_ring_buffer *ring,
2202
uint32_t invalidate_domains,
2203
uint32_t flush_domains)
2204
{
2205
int ret;
2206
2207
if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2208
return 0;
2209
2210
trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2211
2212
ret = ring->flush(ring, invalidate_domains, flush_domains);
2213
if (ret)
2214
return ret;
2215
2216
if (flush_domains & I915_GEM_GPU_DOMAINS)
2217
i915_gem_process_flushing_list(ring, flush_domains);
2218
2219
return 0;
2220
}
2221
2222
static int i915_ring_idle(struct intel_ring_buffer *ring)
2223
{
2224
int ret;
2225
2226
if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2227
return 0;
2228
2229
if (!list_empty(&ring->gpu_write_list)) {
2230
ret = i915_gem_flush_ring(ring,
2231
I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2232
if (ret)
2233
return ret;
2234
}
2235
2236
return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
2237
}
2238
2239
int
2240
i915_gpu_idle(struct drm_device *dev)
2241
{
2242
drm_i915_private_t *dev_priv = dev->dev_private;
2243
bool lists_empty;
2244
int ret, i;
2245
2246
lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2247
list_empty(&dev_priv->mm.active_list));
2248
if (lists_empty)
2249
return 0;
2250
2251
/* Flush everything onto the inactive list. */
2252
for (i = 0; i < I915_NUM_RINGS; i++) {
2253
ret = i915_ring_idle(&dev_priv->ring[i]);
2254
if (ret)
2255
return ret;
2256
}
2257
2258
return 0;
2259
}
2260
2261
static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2262
struct intel_ring_buffer *pipelined)
2263
{
2264
struct drm_device *dev = obj->base.dev;
2265
drm_i915_private_t *dev_priv = dev->dev_private;
2266
u32 size = obj->gtt_space->size;
2267
int regnum = obj->fence_reg;
2268
uint64_t val;
2269
2270
val = (uint64_t)((obj->gtt_offset + size - 4096) &
2271
0xfffff000) << 32;
2272
val |= obj->gtt_offset & 0xfffff000;
2273
val |= (uint64_t)((obj->stride / 128) - 1) <<
2274
SANDYBRIDGE_FENCE_PITCH_SHIFT;
2275
2276
if (obj->tiling_mode == I915_TILING_Y)
2277
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2278
val |= I965_FENCE_REG_VALID;
2279
2280
if (pipelined) {
2281
int ret = intel_ring_begin(pipelined, 6);
2282
if (ret)
2283
return ret;
2284
2285
intel_ring_emit(pipelined, MI_NOOP);
2286
intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2287
intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2288
intel_ring_emit(pipelined, (u32)val);
2289
intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2290
intel_ring_emit(pipelined, (u32)(val >> 32));
2291
intel_ring_advance(pipelined);
2292
} else
2293
I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2294
2295
return 0;
2296
}
2297
2298
static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2299
struct intel_ring_buffer *pipelined)
2300
{
2301
struct drm_device *dev = obj->base.dev;
2302
drm_i915_private_t *dev_priv = dev->dev_private;
2303
u32 size = obj->gtt_space->size;
2304
int regnum = obj->fence_reg;
2305
uint64_t val;
2306
2307
val = (uint64_t)((obj->gtt_offset + size - 4096) &
2308
0xfffff000) << 32;
2309
val |= obj->gtt_offset & 0xfffff000;
2310
val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2311
if (obj->tiling_mode == I915_TILING_Y)
2312
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2313
val |= I965_FENCE_REG_VALID;
2314
2315
if (pipelined) {
2316
int ret = intel_ring_begin(pipelined, 6);
2317
if (ret)
2318
return ret;
2319
2320
intel_ring_emit(pipelined, MI_NOOP);
2321
intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2322
intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2323
intel_ring_emit(pipelined, (u32)val);
2324
intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2325
intel_ring_emit(pipelined, (u32)(val >> 32));
2326
intel_ring_advance(pipelined);
2327
} else
2328
I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2329
2330
return 0;
2331
}
2332
2333
static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2334
struct intel_ring_buffer *pipelined)
2335
{
2336
struct drm_device *dev = obj->base.dev;
2337
drm_i915_private_t *dev_priv = dev->dev_private;
2338
u32 size = obj->gtt_space->size;
2339
u32 fence_reg, val, pitch_val;
2340
int tile_width;
2341
2342
if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2343
(size & -size) != size ||
2344
(obj->gtt_offset & (size - 1)),
2345
"object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2346
obj->gtt_offset, obj->map_and_fenceable, size))
2347
return -EINVAL;
2348
2349
if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2350
tile_width = 128;
2351
else
2352
tile_width = 512;
2353
2354
/* Note: pitch better be a power of two tile widths */
2355
pitch_val = obj->stride / tile_width;
2356
pitch_val = ffs(pitch_val) - 1;
2357
2358
val = obj->gtt_offset;
2359
if (obj->tiling_mode == I915_TILING_Y)
2360
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2361
val |= I915_FENCE_SIZE_BITS(size);
2362
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2363
val |= I830_FENCE_REG_VALID;
2364
2365
fence_reg = obj->fence_reg;
2366
if (fence_reg < 8)
2367
fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2368
else
2369
fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2370
2371
if (pipelined) {
2372
int ret = intel_ring_begin(pipelined, 4);
2373
if (ret)
2374
return ret;
2375
2376
intel_ring_emit(pipelined, MI_NOOP);
2377
intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2378
intel_ring_emit(pipelined, fence_reg);
2379
intel_ring_emit(pipelined, val);
2380
intel_ring_advance(pipelined);
2381
} else
2382
I915_WRITE(fence_reg, val);
2383
2384
return 0;
2385
}
2386
2387
static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2388
struct intel_ring_buffer *pipelined)
2389
{
2390
struct drm_device *dev = obj->base.dev;
2391
drm_i915_private_t *dev_priv = dev->dev_private;
2392
u32 size = obj->gtt_space->size;
2393
int regnum = obj->fence_reg;
2394
uint32_t val;
2395
uint32_t pitch_val;
2396
2397
if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2398
(size & -size) != size ||
2399
(obj->gtt_offset & (size - 1)),
2400
"object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2401
obj->gtt_offset, size))
2402
return -EINVAL;
2403
2404
pitch_val = obj->stride / 128;
2405
pitch_val = ffs(pitch_val) - 1;
2406
2407
val = obj->gtt_offset;
2408
if (obj->tiling_mode == I915_TILING_Y)
2409
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2410
val |= I830_FENCE_SIZE_BITS(size);
2411
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2412
val |= I830_FENCE_REG_VALID;
2413
2414
if (pipelined) {
2415
int ret = intel_ring_begin(pipelined, 4);
2416
if (ret)
2417
return ret;
2418
2419
intel_ring_emit(pipelined, MI_NOOP);
2420
intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2421
intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2422
intel_ring_emit(pipelined, val);
2423
intel_ring_advance(pipelined);
2424
} else
2425
I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2426
2427
return 0;
2428
}
2429
2430
static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
2431
{
2432
return i915_seqno_passed(ring->get_seqno(ring), seqno);
2433
}
2434
2435
static int
2436
i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2437
struct intel_ring_buffer *pipelined)
2438
{
2439
int ret;
2440
2441
if (obj->fenced_gpu_access) {
2442
if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2443
ret = i915_gem_flush_ring(obj->last_fenced_ring,
2444
0, obj->base.write_domain);
2445
if (ret)
2446
return ret;
2447
}
2448
2449
obj->fenced_gpu_access = false;
2450
}
2451
2452
if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
2453
if (!ring_passed_seqno(obj->last_fenced_ring,
2454
obj->last_fenced_seqno)) {
2455
ret = i915_wait_request(obj->last_fenced_ring,
2456
obj->last_fenced_seqno);
2457
if (ret)
2458
return ret;
2459
}
2460
2461
obj->last_fenced_seqno = 0;
2462
obj->last_fenced_ring = NULL;
2463
}
2464
2465
/* Ensure that all CPU reads are completed before installing a fence
2466
* and all writes before removing the fence.
2467
*/
2468
if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2469
mb();
2470
2471
return 0;
2472
}
2473
2474
int
2475
i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2476
{
2477
int ret;
2478
2479
if (obj->tiling_mode)
2480
i915_gem_release_mmap(obj);
2481
2482
ret = i915_gem_object_flush_fence(obj, NULL);
2483
if (ret)
2484
return ret;
2485
2486
if (obj->fence_reg != I915_FENCE_REG_NONE) {
2487
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2488
i915_gem_clear_fence_reg(obj->base.dev,
2489
&dev_priv->fence_regs[obj->fence_reg]);
2490
2491
obj->fence_reg = I915_FENCE_REG_NONE;
2492
}
2493
2494
return 0;
2495
}
2496
2497
static struct drm_i915_fence_reg *
2498
i915_find_fence_reg(struct drm_device *dev,
2499
struct intel_ring_buffer *pipelined)
2500
{
2501
struct drm_i915_private *dev_priv = dev->dev_private;
2502
struct drm_i915_fence_reg *reg, *first, *avail;
2503
int i;
2504
2505
/* First try to find a free reg */
2506
avail = NULL;
2507
for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2508
reg = &dev_priv->fence_regs[i];
2509
if (!reg->obj)
2510
return reg;
2511
2512
if (!reg->obj->pin_count)
2513
avail = reg;
2514
}
2515
2516
if (avail == NULL)
2517
return NULL;
2518
2519
/* None available, try to steal one or wait for a user to finish */
2520
avail = first = NULL;
2521
list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2522
if (reg->obj->pin_count)
2523
continue;
2524
2525
if (first == NULL)
2526
first = reg;
2527
2528
if (!pipelined ||
2529
!reg->obj->last_fenced_ring ||
2530
reg->obj->last_fenced_ring == pipelined) {
2531
avail = reg;
2532
break;
2533
}
2534
}
2535
2536
if (avail == NULL)
2537
avail = first;
2538
2539
return avail;
2540
}
2541
2542
/**
2543
* i915_gem_object_get_fence - set up a fence reg for an object
2544
* @obj: object to map through a fence reg
2545
* @pipelined: ring on which to queue the change, or NULL for CPU access
2546
* @interruptible: must we wait uninterruptibly for the register to retire?
2547
*
2548
* When mapping objects through the GTT, userspace wants to be able to write
2549
* to them without having to worry about swizzling if the object is tiled.
2550
*
2551
* This function walks the fence regs looking for a free one for @obj,
2552
* stealing one if it can't find any.
2553
*
2554
* It then sets up the reg based on the object's properties: address, pitch
2555
* and tiling format.
2556
*/
2557
int
2558
i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
2559
struct intel_ring_buffer *pipelined)
2560
{
2561
struct drm_device *dev = obj->base.dev;
2562
struct drm_i915_private *dev_priv = dev->dev_private;
2563
struct drm_i915_fence_reg *reg;
2564
int ret;
2565
2566
/* XXX disable pipelining. There are bugs. Shocking. */
2567
pipelined = NULL;
2568
2569
/* Just update our place in the LRU if our fence is getting reused. */
2570
if (obj->fence_reg != I915_FENCE_REG_NONE) {
2571
reg = &dev_priv->fence_regs[obj->fence_reg];
2572
list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2573
2574
if (obj->tiling_changed) {
2575
ret = i915_gem_object_flush_fence(obj, pipelined);
2576
if (ret)
2577
return ret;
2578
2579
if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2580
pipelined = NULL;
2581
2582
if (pipelined) {
2583
reg->setup_seqno =
2584
i915_gem_next_request_seqno(pipelined);
2585
obj->last_fenced_seqno = reg->setup_seqno;
2586
obj->last_fenced_ring = pipelined;
2587
}
2588
2589
goto update;
2590
}
2591
2592
if (!pipelined) {
2593
if (reg->setup_seqno) {
2594
if (!ring_passed_seqno(obj->last_fenced_ring,
2595
reg->setup_seqno)) {
2596
ret = i915_wait_request(obj->last_fenced_ring,
2597
reg->setup_seqno);
2598
if (ret)
2599
return ret;
2600
}
2601
2602
reg->setup_seqno = 0;
2603
}
2604
} else if (obj->last_fenced_ring &&
2605
obj->last_fenced_ring != pipelined) {
2606
ret = i915_gem_object_flush_fence(obj, pipelined);
2607
if (ret)
2608
return ret;
2609
}
2610
2611
return 0;
2612
}
2613
2614
reg = i915_find_fence_reg(dev, pipelined);
2615
if (reg == NULL)
2616
return -ENOSPC;
2617
2618
ret = i915_gem_object_flush_fence(obj, pipelined);
2619
if (ret)
2620
return ret;
2621
2622
if (reg->obj) {
2623
struct drm_i915_gem_object *old = reg->obj;
2624
2625
drm_gem_object_reference(&old->base);
2626
2627
if (old->tiling_mode)
2628
i915_gem_release_mmap(old);
2629
2630
ret = i915_gem_object_flush_fence(old, pipelined);
2631
if (ret) {
2632
drm_gem_object_unreference(&old->base);
2633
return ret;
2634
}
2635
2636
if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
2637
pipelined = NULL;
2638
2639
old->fence_reg = I915_FENCE_REG_NONE;
2640
old->last_fenced_ring = pipelined;
2641
old->last_fenced_seqno =
2642
pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2643
2644
drm_gem_object_unreference(&old->base);
2645
} else if (obj->last_fenced_seqno == 0)
2646
pipelined = NULL;
2647
2648
reg->obj = obj;
2649
list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2650
obj->fence_reg = reg - dev_priv->fence_regs;
2651
obj->last_fenced_ring = pipelined;
2652
2653
reg->setup_seqno =
2654
pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2655
obj->last_fenced_seqno = reg->setup_seqno;
2656
2657
update:
2658
obj->tiling_changed = false;
2659
switch (INTEL_INFO(dev)->gen) {
2660
case 7:
2661
case 6:
2662
ret = sandybridge_write_fence_reg(obj, pipelined);
2663
break;
2664
case 5:
2665
case 4:
2666
ret = i965_write_fence_reg(obj, pipelined);
2667
break;
2668
case 3:
2669
ret = i915_write_fence_reg(obj, pipelined);
2670
break;
2671
case 2:
2672
ret = i830_write_fence_reg(obj, pipelined);
2673
break;
2674
}
2675
2676
return ret;
2677
}
2678
2679
/**
2680
* i915_gem_clear_fence_reg - clear out fence register info
2681
* @obj: object to clear
2682
*
2683
* Zeroes out the fence register itself and clears out the associated
2684
* data structures in dev_priv and obj.
2685
*/
2686
static void
2687
i915_gem_clear_fence_reg(struct drm_device *dev,
2688
struct drm_i915_fence_reg *reg)
2689
{
2690
drm_i915_private_t *dev_priv = dev->dev_private;
2691
uint32_t fence_reg = reg - dev_priv->fence_regs;
2692
2693
switch (INTEL_INFO(dev)->gen) {
2694
case 7:
2695
case 6:
2696
I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2697
break;
2698
case 5:
2699
case 4:
2700
I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2701
break;
2702
case 3:
2703
if (fence_reg >= 8)
2704
fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2705
else
2706
case 2:
2707
fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2708
2709
I915_WRITE(fence_reg, 0);
2710
break;
2711
}
2712
2713
list_del_init(&reg->lru_list);
2714
reg->obj = NULL;
2715
reg->setup_seqno = 0;
2716
}
2717
2718
/**
2719
* Finds free space in the GTT aperture and binds the object there.
2720
*/
2721
static int
2722
i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2723
unsigned alignment,
2724
bool map_and_fenceable)
2725
{
2726
struct drm_device *dev = obj->base.dev;
2727
drm_i915_private_t *dev_priv = dev->dev_private;
2728
struct drm_mm_node *free_space;
2729
gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2730
u32 size, fence_size, fence_alignment, unfenced_alignment;
2731
bool mappable, fenceable;
2732
int ret;
2733
2734
if (obj->madv != I915_MADV_WILLNEED) {
2735
DRM_ERROR("Attempting to bind a purgeable object\n");
2736
return -EINVAL;
2737
}
2738
2739
fence_size = i915_gem_get_gtt_size(dev,
2740
obj->base.size,
2741
obj->tiling_mode);
2742
fence_alignment = i915_gem_get_gtt_alignment(dev,
2743
obj->base.size,
2744
obj->tiling_mode);
2745
unfenced_alignment =
2746
i915_gem_get_unfenced_gtt_alignment(dev,
2747
obj->base.size,
2748
obj->tiling_mode);
2749
2750
if (alignment == 0)
2751
alignment = map_and_fenceable ? fence_alignment :
2752
unfenced_alignment;
2753
if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2754
DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2755
return -EINVAL;
2756
}
2757
2758
size = map_and_fenceable ? fence_size : obj->base.size;
2759
2760
/* If the object is bigger than the entire aperture, reject it early
2761
* before evicting everything in a vain attempt to find space.
2762
*/
2763
if (obj->base.size >
2764
(map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2765
DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2766
return -E2BIG;
2767
}
2768
2769
search_free:
2770
if (map_and_fenceable)
2771
free_space =
2772
drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2773
size, alignment, 0,
2774
dev_priv->mm.gtt_mappable_end,
2775
0);
2776
else
2777
free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2778
size, alignment, 0);
2779
2780
if (free_space != NULL) {
2781
if (map_and_fenceable)
2782
obj->gtt_space =
2783
drm_mm_get_block_range_generic(free_space,
2784
size, alignment, 0,
2785
dev_priv->mm.gtt_mappable_end,
2786
0);
2787
else
2788
obj->gtt_space =
2789
drm_mm_get_block(free_space, size, alignment);
2790
}
2791
if (obj->gtt_space == NULL) {
2792
/* If the gtt is empty and we're still having trouble
2793
* fitting our object in, we're out of memory.
2794
*/
2795
ret = i915_gem_evict_something(dev, size, alignment,
2796
map_and_fenceable);
2797
if (ret)
2798
return ret;
2799
2800
goto search_free;
2801
}
2802
2803
ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2804
if (ret) {
2805
drm_mm_put_block(obj->gtt_space);
2806
obj->gtt_space = NULL;
2807
2808
if (ret == -ENOMEM) {
2809
/* first try to reclaim some memory by clearing the GTT */
2810
ret = i915_gem_evict_everything(dev, false);
2811
if (ret) {
2812
/* now try to shrink everyone else */
2813
if (gfpmask) {
2814
gfpmask = 0;
2815
goto search_free;
2816
}
2817
2818
return -ENOMEM;
2819
}
2820
2821
goto search_free;
2822
}
2823
2824
return ret;
2825
}
2826
2827
ret = i915_gem_gtt_bind_object(obj);
2828
if (ret) {
2829
i915_gem_object_put_pages_gtt(obj);
2830
drm_mm_put_block(obj->gtt_space);
2831
obj->gtt_space = NULL;
2832
2833
if (i915_gem_evict_everything(dev, false))
2834
return ret;
2835
2836
goto search_free;
2837
}
2838
2839
list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2840
list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2841
2842
/* Assert that the object is not currently in any GPU domain. As it
2843
* wasn't in the GTT, there shouldn't be any way it could have been in
2844
* a GPU cache
2845
*/
2846
BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2847
BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2848
2849
obj->gtt_offset = obj->gtt_space->start;
2850
2851
fenceable =
2852
obj->gtt_space->size == fence_size &&
2853
(obj->gtt_space->start & (fence_alignment -1)) == 0;
2854
2855
mappable =
2856
obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2857
2858
obj->map_and_fenceable = mappable && fenceable;
2859
2860
trace_i915_gem_object_bind(obj, map_and_fenceable);
2861
return 0;
2862
}
2863
2864
void
2865
i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2866
{
2867
/* If we don't have a page list set up, then we're not pinned
2868
* to GPU, and we can ignore the cache flush because it'll happen
2869
* again at bind time.
2870
*/
2871
if (obj->pages == NULL)
2872
return;
2873
2874
/* If the GPU is snooping the contents of the CPU cache,
2875
* we do not need to manually clear the CPU cache lines. However,
2876
* the caches are only snooped when the render cache is
2877
* flushed/invalidated. As we always have to emit invalidations
2878
* and flushes when moving into and out of the RENDER domain, correct
2879
* snooping behaviour occurs naturally as the result of our domain
2880
* tracking.
2881
*/
2882
if (obj->cache_level != I915_CACHE_NONE)
2883
return;
2884
2885
trace_i915_gem_object_clflush(obj);
2886
2887
drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2888
}
2889
2890
/** Flushes any GPU write domain for the object if it's dirty. */
2891
static int
2892
i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2893
{
2894
if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2895
return 0;
2896
2897
/* Queue the GPU write cache flushing we need. */
2898
return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2899
}
2900
2901
/** Flushes the GTT write domain for the object if it's dirty. */
2902
static void
2903
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2904
{
2905
uint32_t old_write_domain;
2906
2907
if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2908
return;
2909
2910
/* No actual flushing is required for the GTT write domain. Writes
2911
* to it immediately go to main memory as far as we know, so there's
2912
* no chipset flush. It also doesn't land in render cache.
2913
*
2914
* However, we do have to enforce the order so that all writes through
2915
* the GTT land before any writes to the device, such as updates to
2916
* the GATT itself.
2917
*/
2918
wmb();
2919
2920
old_write_domain = obj->base.write_domain;
2921
obj->base.write_domain = 0;
2922
2923
trace_i915_gem_object_change_domain(obj,
2924
obj->base.read_domains,
2925
old_write_domain);
2926
}
2927
2928
/** Flushes the CPU write domain for the object if it's dirty. */
2929
static void
2930
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2931
{
2932
uint32_t old_write_domain;
2933
2934
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2935
return;
2936
2937
i915_gem_clflush_object(obj);
2938
intel_gtt_chipset_flush();
2939
old_write_domain = obj->base.write_domain;
2940
obj->base.write_domain = 0;
2941
2942
trace_i915_gem_object_change_domain(obj,
2943
obj->base.read_domains,
2944
old_write_domain);
2945
}
2946
2947
/**
2948
* Moves a single object to the GTT read, and possibly write domain.
2949
*
2950
* This function returns when the move is complete, including waiting on
2951
* flushes to occur.
2952
*/
2953
int
2954
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2955
{
2956
uint32_t old_write_domain, old_read_domains;
2957
int ret;
2958
2959
/* Not valid to be called on unbound objects. */
2960
if (obj->gtt_space == NULL)
2961
return -EINVAL;
2962
2963
if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2964
return 0;
2965
2966
ret = i915_gem_object_flush_gpu_write_domain(obj);
2967
if (ret)
2968
return ret;
2969
2970
if (obj->pending_gpu_write || write) {
2971
ret = i915_gem_object_wait_rendering(obj);
2972
if (ret)
2973
return ret;
2974
}
2975
2976
i915_gem_object_flush_cpu_write_domain(obj);
2977
2978
old_write_domain = obj->base.write_domain;
2979
old_read_domains = obj->base.read_domains;
2980
2981
/* It should now be out of any other write domains, and we can update
2982
* the domain values for our changes.
2983
*/
2984
BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2985
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2986
if (write) {
2987
obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2988
obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2989
obj->dirty = 1;
2990
}
2991
2992
trace_i915_gem_object_change_domain(obj,
2993
old_read_domains,
2994
old_write_domain);
2995
2996
return 0;
2997
}
2998
2999
/*
3000
* Prepare buffer for display plane. Use uninterruptible for possible flush
3001
* wait, as in modesetting process we're not supposed to be interrupted.
3002
*/
3003
int
3004
i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
3005
struct intel_ring_buffer *pipelined)
3006
{
3007
uint32_t old_read_domains;
3008
int ret;
3009
3010
/* Not valid to be called on unbound objects. */
3011
if (obj->gtt_space == NULL)
3012
return -EINVAL;
3013
3014
ret = i915_gem_object_flush_gpu_write_domain(obj);
3015
if (ret)
3016
return ret;
3017
3018
3019
/* Currently, we are always called from an non-interruptible context. */
3020
if (pipelined != obj->ring) {
3021
ret = i915_gem_object_wait_rendering(obj);
3022
if (ret)
3023
return ret;
3024
}
3025
3026
i915_gem_object_flush_cpu_write_domain(obj);
3027
3028
old_read_domains = obj->base.read_domains;
3029
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3030
3031
trace_i915_gem_object_change_domain(obj,
3032
old_read_domains,
3033
obj->base.write_domain);
3034
3035
return 0;
3036
}
3037
3038
int
3039
i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
3040
{
3041
int ret;
3042
3043
if (!obj->active)
3044
return 0;
3045
3046
if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3047
ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
3048
if (ret)
3049
return ret;
3050
}
3051
3052
return i915_gem_object_wait_rendering(obj);
3053
}
3054
3055
/**
3056
* Moves a single object to the CPU read, and possibly write domain.
3057
*
3058
* This function returns when the move is complete, including waiting on
3059
* flushes to occur.
3060
*/
3061
static int
3062
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3063
{
3064
uint32_t old_write_domain, old_read_domains;
3065
int ret;
3066
3067
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3068
return 0;
3069
3070
ret = i915_gem_object_flush_gpu_write_domain(obj);
3071
if (ret)
3072
return ret;
3073
3074
ret = i915_gem_object_wait_rendering(obj);
3075
if (ret)
3076
return ret;
3077
3078
i915_gem_object_flush_gtt_write_domain(obj);
3079
3080
/* If we have a partially-valid cache of the object in the CPU,
3081
* finish invalidating it and free the per-page flags.
3082
*/
3083
i915_gem_object_set_to_full_cpu_read_domain(obj);
3084
3085
old_write_domain = obj->base.write_domain;
3086
old_read_domains = obj->base.read_domains;
3087
3088
/* Flush the CPU cache if it's still invalid. */
3089
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3090
i915_gem_clflush_object(obj);
3091
3092
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3093
}
3094
3095
/* It should now be out of any other write domains, and we can update
3096
* the domain values for our changes.
3097
*/
3098
BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3099
3100
/* If we're writing through the CPU, then the GPU read domains will
3101
* need to be invalidated at next use.
3102
*/
3103
if (write) {
3104
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3105
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3106
}
3107
3108
trace_i915_gem_object_change_domain(obj,
3109
old_read_domains,
3110
old_write_domain);
3111
3112
return 0;
3113
}
3114
3115
/**
3116
* Moves the object from a partially CPU read to a full one.
3117
*
3118
* Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3119
* and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3120
*/
3121
static void
3122
i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3123
{
3124
if (!obj->page_cpu_valid)
3125
return;
3126
3127
/* If we're partially in the CPU read domain, finish moving it in.
3128
*/
3129
if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3130
int i;
3131
3132
for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3133
if (obj->page_cpu_valid[i])
3134
continue;
3135
drm_clflush_pages(obj->pages + i, 1);
3136
}
3137
}
3138
3139
/* Free the page_cpu_valid mappings which are now stale, whether
3140
* or not we've got I915_GEM_DOMAIN_CPU.
3141
*/
3142
kfree(obj->page_cpu_valid);
3143
obj->page_cpu_valid = NULL;
3144
}
3145
3146
/**
3147
* Set the CPU read domain on a range of the object.
3148
*
3149
* The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3150
* not entirely valid. The page_cpu_valid member of the object flags which
3151
* pages have been flushed, and will be respected by
3152
* i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3153
* of the whole object.
3154
*
3155
* This function returns when the move is complete, including waiting on
3156
* flushes to occur.
3157
*/
3158
static int
3159
i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3160
uint64_t offset, uint64_t size)
3161
{
3162
uint32_t old_read_domains;
3163
int i, ret;
3164
3165
if (offset == 0 && size == obj->base.size)
3166
return i915_gem_object_set_to_cpu_domain(obj, 0);
3167
3168
ret = i915_gem_object_flush_gpu_write_domain(obj);
3169
if (ret)
3170
return ret;
3171
3172
ret = i915_gem_object_wait_rendering(obj);
3173
if (ret)
3174
return ret;
3175
3176
i915_gem_object_flush_gtt_write_domain(obj);
3177
3178
/* If we're already fully in the CPU read domain, we're done. */
3179
if (obj->page_cpu_valid == NULL &&
3180
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3181
return 0;
3182
3183
/* Otherwise, create/clear the per-page CPU read domain flag if we're
3184
* newly adding I915_GEM_DOMAIN_CPU
3185
*/
3186
if (obj->page_cpu_valid == NULL) {
3187
obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3188
GFP_KERNEL);
3189
if (obj->page_cpu_valid == NULL)
3190
return -ENOMEM;
3191
} else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3192
memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3193
3194
/* Flush the cache on any pages that are still invalid from the CPU's
3195
* perspective.
3196
*/
3197
for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3198
i++) {
3199
if (obj->page_cpu_valid[i])
3200
continue;
3201
3202
drm_clflush_pages(obj->pages + i, 1);
3203
3204
obj->page_cpu_valid[i] = 1;
3205
}
3206
3207
/* It should now be out of any other write domains, and we can update
3208
* the domain values for our changes.
3209
*/
3210
BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3211
3212
old_read_domains = obj->base.read_domains;
3213
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3214
3215
trace_i915_gem_object_change_domain(obj,
3216
old_read_domains,
3217
obj->base.write_domain);
3218
3219
return 0;
3220
}
3221
3222
/* Throttle our rendering by waiting until the ring has completed our requests
3223
* emitted over 20 msec ago.
3224
*
3225
* Note that if we were to use the current jiffies each time around the loop,
3226
* we wouldn't escape the function with any frames outstanding if the time to
3227
* render a frame was over 20ms.
3228
*
3229
* This should get us reasonable parallelism between CPU and GPU but also
3230
* relatively low latency when blocking on a particular request to finish.
3231
*/
3232
static int
3233
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3234
{
3235
struct drm_i915_private *dev_priv = dev->dev_private;
3236
struct drm_i915_file_private *file_priv = file->driver_priv;
3237
unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3238
struct drm_i915_gem_request *request;
3239
struct intel_ring_buffer *ring = NULL;
3240
u32 seqno = 0;
3241
int ret;
3242
3243
if (atomic_read(&dev_priv->mm.wedged))
3244
return -EIO;
3245
3246
spin_lock(&file_priv->mm.lock);
3247
list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3248
if (time_after_eq(request->emitted_jiffies, recent_enough))
3249
break;
3250
3251
ring = request->ring;
3252
seqno = request->seqno;
3253
}
3254
spin_unlock(&file_priv->mm.lock);
3255
3256
if (seqno == 0)
3257
return 0;
3258
3259
ret = 0;
3260
if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3261
/* And wait for the seqno passing without holding any locks and
3262
* causing extra latency for others. This is safe as the irq
3263
* generation is designed to be run atomically and so is
3264
* lockless.
3265
*/
3266
if (ring->irq_get(ring)) {
3267
ret = wait_event_interruptible(ring->irq_queue,
3268
i915_seqno_passed(ring->get_seqno(ring), seqno)
3269
|| atomic_read(&dev_priv->mm.wedged));
3270
ring->irq_put(ring);
3271
3272
if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3273
ret = -EIO;
3274
}
3275
}
3276
3277
if (ret == 0)
3278
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3279
3280
return ret;
3281
}
3282
3283
int
3284
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3285
uint32_t alignment,
3286
bool map_and_fenceable)
3287
{
3288
struct drm_device *dev = obj->base.dev;
3289
struct drm_i915_private *dev_priv = dev->dev_private;
3290
int ret;
3291
3292
BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3293
WARN_ON(i915_verify_lists(dev));
3294
3295
if (obj->gtt_space != NULL) {
3296
if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3297
(map_and_fenceable && !obj->map_and_fenceable)) {
3298
WARN(obj->pin_count,
3299
"bo is already pinned with incorrect alignment:"
3300
" offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3301
" obj->map_and_fenceable=%d\n",
3302
obj->gtt_offset, alignment,
3303
map_and_fenceable,
3304
obj->map_and_fenceable);
3305
ret = i915_gem_object_unbind(obj);
3306
if (ret)
3307
return ret;
3308
}
3309
}
3310
3311
if (obj->gtt_space == NULL) {
3312
ret = i915_gem_object_bind_to_gtt(obj, alignment,
3313
map_and_fenceable);
3314
if (ret)
3315
return ret;
3316
}
3317
3318
if (obj->pin_count++ == 0) {
3319
if (!obj->active)
3320
list_move_tail(&obj->mm_list,
3321
&dev_priv->mm.pinned_list);
3322
}
3323
obj->pin_mappable |= map_and_fenceable;
3324
3325
WARN_ON(i915_verify_lists(dev));
3326
return 0;
3327
}
3328
3329
void
3330
i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3331
{
3332
struct drm_device *dev = obj->base.dev;
3333
drm_i915_private_t *dev_priv = dev->dev_private;
3334
3335
WARN_ON(i915_verify_lists(dev));
3336
BUG_ON(obj->pin_count == 0);
3337
BUG_ON(obj->gtt_space == NULL);
3338
3339
if (--obj->pin_count == 0) {
3340
if (!obj->active)
3341
list_move_tail(&obj->mm_list,
3342
&dev_priv->mm.inactive_list);
3343
obj->pin_mappable = false;
3344
}
3345
WARN_ON(i915_verify_lists(dev));
3346
}
3347
3348
int
3349
i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3350
struct drm_file *file)
3351
{
3352
struct drm_i915_gem_pin *args = data;
3353
struct drm_i915_gem_object *obj;
3354
int ret;
3355
3356
ret = i915_mutex_lock_interruptible(dev);
3357
if (ret)
3358
return ret;
3359
3360
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3361
if (&obj->base == NULL) {
3362
ret = -ENOENT;
3363
goto unlock;
3364
}
3365
3366
if (obj->madv != I915_MADV_WILLNEED) {
3367
DRM_ERROR("Attempting to pin a purgeable buffer\n");
3368
ret = -EINVAL;
3369
goto out;
3370
}
3371
3372
if (obj->pin_filp != NULL && obj->pin_filp != file) {
3373
DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3374
args->handle);
3375
ret = -EINVAL;
3376
goto out;
3377
}
3378
3379
obj->user_pin_count++;
3380
obj->pin_filp = file;
3381
if (obj->user_pin_count == 1) {
3382
ret = i915_gem_object_pin(obj, args->alignment, true);
3383
if (ret)
3384
goto out;
3385
}
3386
3387
/* XXX - flush the CPU caches for pinned objects
3388
* as the X server doesn't manage domains yet
3389
*/
3390
i915_gem_object_flush_cpu_write_domain(obj);
3391
args->offset = obj->gtt_offset;
3392
out:
3393
drm_gem_object_unreference(&obj->base);
3394
unlock:
3395
mutex_unlock(&dev->struct_mutex);
3396
return ret;
3397
}
3398
3399
int
3400
i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3401
struct drm_file *file)
3402
{
3403
struct drm_i915_gem_pin *args = data;
3404
struct drm_i915_gem_object *obj;
3405
int ret;
3406
3407
ret = i915_mutex_lock_interruptible(dev);
3408
if (ret)
3409
return ret;
3410
3411
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3412
if (&obj->base == NULL) {
3413
ret = -ENOENT;
3414
goto unlock;
3415
}
3416
3417
if (obj->pin_filp != file) {
3418
DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3419
args->handle);
3420
ret = -EINVAL;
3421
goto out;
3422
}
3423
obj->user_pin_count--;
3424
if (obj->user_pin_count == 0) {
3425
obj->pin_filp = NULL;
3426
i915_gem_object_unpin(obj);
3427
}
3428
3429
out:
3430
drm_gem_object_unreference(&obj->base);
3431
unlock:
3432
mutex_unlock(&dev->struct_mutex);
3433
return ret;
3434
}
3435
3436
int
3437
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3438
struct drm_file *file)
3439
{
3440
struct drm_i915_gem_busy *args = data;
3441
struct drm_i915_gem_object *obj;
3442
int ret;
3443
3444
ret = i915_mutex_lock_interruptible(dev);
3445
if (ret)
3446
return ret;
3447
3448
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3449
if (&obj->base == NULL) {
3450
ret = -ENOENT;
3451
goto unlock;
3452
}
3453
3454
/* Count all active objects as busy, even if they are currently not used
3455
* by the gpu. Users of this interface expect objects to eventually
3456
* become non-busy without any further actions, therefore emit any
3457
* necessary flushes here.
3458
*/
3459
args->busy = obj->active;
3460
if (args->busy) {
3461
/* Unconditionally flush objects, even when the gpu still uses this
3462
* object. Userspace calling this function indicates that it wants to
3463
* use this buffer rather sooner than later, so issuing the required
3464
* flush earlier is beneficial.
3465
*/
3466
if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3467
ret = i915_gem_flush_ring(obj->ring,
3468
0, obj->base.write_domain);
3469
} else if (obj->ring->outstanding_lazy_request ==
3470
obj->last_rendering_seqno) {
3471
struct drm_i915_gem_request *request;
3472
3473
/* This ring is not being cleared by active usage,
3474
* so emit a request to do so.
3475
*/
3476
request = kzalloc(sizeof(*request), GFP_KERNEL);
3477
if (request)
3478
ret = i915_add_request(obj->ring, NULL,request);
3479
else
3480
ret = -ENOMEM;
3481
}
3482
3483
/* Update the active list for the hardware's current position.
3484
* Otherwise this only updates on a delayed timer or when irqs
3485
* are actually unmasked, and our working set ends up being
3486
* larger than required.
3487
*/
3488
i915_gem_retire_requests_ring(obj->ring);
3489
3490
args->busy = obj->active;
3491
}
3492
3493
drm_gem_object_unreference(&obj->base);
3494
unlock:
3495
mutex_unlock(&dev->struct_mutex);
3496
return ret;
3497
}
3498
3499
int
3500
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3501
struct drm_file *file_priv)
3502
{
3503
return i915_gem_ring_throttle(dev, file_priv);
3504
}
3505
3506
int
3507
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3508
struct drm_file *file_priv)
3509
{
3510
struct drm_i915_gem_madvise *args = data;
3511
struct drm_i915_gem_object *obj;
3512
int ret;
3513
3514
switch (args->madv) {
3515
case I915_MADV_DONTNEED:
3516
case I915_MADV_WILLNEED:
3517
break;
3518
default:
3519
return -EINVAL;
3520
}
3521
3522
ret = i915_mutex_lock_interruptible(dev);
3523
if (ret)
3524
return ret;
3525
3526
obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3527
if (&obj->base == NULL) {
3528
ret = -ENOENT;
3529
goto unlock;
3530
}
3531
3532
if (obj->pin_count) {
3533
ret = -EINVAL;
3534
goto out;
3535
}
3536
3537
if (obj->madv != __I915_MADV_PURGED)
3538
obj->madv = args->madv;
3539
3540
/* if the object is no longer bound, discard its backing storage */
3541
if (i915_gem_object_is_purgeable(obj) &&
3542
obj->gtt_space == NULL)
3543
i915_gem_object_truncate(obj);
3544
3545
args->retained = obj->madv != __I915_MADV_PURGED;
3546
3547
out:
3548
drm_gem_object_unreference(&obj->base);
3549
unlock:
3550
mutex_unlock(&dev->struct_mutex);
3551
return ret;
3552
}
3553
3554
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3555
size_t size)
3556
{
3557
struct drm_i915_private *dev_priv = dev->dev_private;
3558
struct drm_i915_gem_object *obj;
3559
struct address_space *mapping;
3560
3561
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3562
if (obj == NULL)
3563
return NULL;
3564
3565
if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3566
kfree(obj);
3567
return NULL;
3568
}
3569
3570
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3571
mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3572
3573
i915_gem_info_add_obj(dev_priv, size);
3574
3575
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3576
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3577
3578
obj->cache_level = I915_CACHE_NONE;
3579
obj->base.driver_private = NULL;
3580
obj->fence_reg = I915_FENCE_REG_NONE;
3581
INIT_LIST_HEAD(&obj->mm_list);
3582
INIT_LIST_HEAD(&obj->gtt_list);
3583
INIT_LIST_HEAD(&obj->ring_list);
3584
INIT_LIST_HEAD(&obj->exec_list);
3585
INIT_LIST_HEAD(&obj->gpu_write_list);
3586
obj->madv = I915_MADV_WILLNEED;
3587
/* Avoid an unnecessary call to unbind on the first bind. */
3588
obj->map_and_fenceable = true;
3589
3590
return obj;
3591
}
3592
3593
int i915_gem_init_object(struct drm_gem_object *obj)
3594
{
3595
BUG();
3596
3597
return 0;
3598
}
3599
3600
static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
3601
{
3602
struct drm_device *dev = obj->base.dev;
3603
drm_i915_private_t *dev_priv = dev->dev_private;
3604
int ret;
3605
3606
ret = i915_gem_object_unbind(obj);
3607
if (ret == -ERESTARTSYS) {
3608
list_move(&obj->mm_list,
3609
&dev_priv->mm.deferred_free_list);
3610
return;
3611
}
3612
3613
trace_i915_gem_object_destroy(obj);
3614
3615
if (obj->base.map_list.map)
3616
i915_gem_free_mmap_offset(obj);
3617
3618
drm_gem_object_release(&obj->base);
3619
i915_gem_info_remove_obj(dev_priv, obj->base.size);
3620
3621
kfree(obj->page_cpu_valid);
3622
kfree(obj->bit_17);
3623
kfree(obj);
3624
}
3625
3626
void i915_gem_free_object(struct drm_gem_object *gem_obj)
3627
{
3628
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3629
struct drm_device *dev = obj->base.dev;
3630
3631
while (obj->pin_count > 0)
3632
i915_gem_object_unpin(obj);
3633
3634
if (obj->phys_obj)
3635
i915_gem_detach_phys_object(dev, obj);
3636
3637
i915_gem_free_object_tail(obj);
3638
}
3639
3640
int
3641
i915_gem_idle(struct drm_device *dev)
3642
{
3643
drm_i915_private_t *dev_priv = dev->dev_private;
3644
int ret;
3645
3646
mutex_lock(&dev->struct_mutex);
3647
3648
if (dev_priv->mm.suspended) {
3649
mutex_unlock(&dev->struct_mutex);
3650
return 0;
3651
}
3652
3653
ret = i915_gpu_idle(dev);
3654
if (ret) {
3655
mutex_unlock(&dev->struct_mutex);
3656
return ret;
3657
}
3658
3659
/* Under UMS, be paranoid and evict. */
3660
if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3661
ret = i915_gem_evict_inactive(dev, false);
3662
if (ret) {
3663
mutex_unlock(&dev->struct_mutex);
3664
return ret;
3665
}
3666
}
3667
3668
i915_gem_reset_fences(dev);
3669
3670
/* Hack! Don't let anybody do execbuf while we don't control the chip.
3671
* We need to replace this with a semaphore, or something.
3672
* And not confound mm.suspended!
3673
*/
3674
dev_priv->mm.suspended = 1;
3675
del_timer_sync(&dev_priv->hangcheck_timer);
3676
3677
i915_kernel_lost_context(dev);
3678
i915_gem_cleanup_ringbuffer(dev);
3679
3680
mutex_unlock(&dev->struct_mutex);
3681
3682
/* Cancel the retire work handler, which should be idle now. */
3683
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3684
3685
return 0;
3686
}
3687
3688
int
3689
i915_gem_init_ringbuffer(struct drm_device *dev)
3690
{
3691
drm_i915_private_t *dev_priv = dev->dev_private;
3692
int ret;
3693
3694
ret = intel_init_render_ring_buffer(dev);
3695
if (ret)
3696
return ret;
3697
3698
if (HAS_BSD(dev)) {
3699
ret = intel_init_bsd_ring_buffer(dev);
3700
if (ret)
3701
goto cleanup_render_ring;
3702
}
3703
3704
if (HAS_BLT(dev)) {
3705
ret = intel_init_blt_ring_buffer(dev);
3706
if (ret)
3707
goto cleanup_bsd_ring;
3708
}
3709
3710
dev_priv->next_seqno = 1;
3711
3712
return 0;
3713
3714
cleanup_bsd_ring:
3715
intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3716
cleanup_render_ring:
3717
intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3718
return ret;
3719
}
3720
3721
void
3722
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3723
{
3724
drm_i915_private_t *dev_priv = dev->dev_private;
3725
int i;
3726
3727
for (i = 0; i < I915_NUM_RINGS; i++)
3728
intel_cleanup_ring_buffer(&dev_priv->ring[i]);
3729
}
3730
3731
int
3732
i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3733
struct drm_file *file_priv)
3734
{
3735
drm_i915_private_t *dev_priv = dev->dev_private;
3736
int ret, i;
3737
3738
if (drm_core_check_feature(dev, DRIVER_MODESET))
3739
return 0;
3740
3741
if (atomic_read(&dev_priv->mm.wedged)) {
3742
DRM_ERROR("Reenabling wedged hardware, good luck\n");
3743
atomic_set(&dev_priv->mm.wedged, 0);
3744
}
3745
3746
mutex_lock(&dev->struct_mutex);
3747
dev_priv->mm.suspended = 0;
3748
3749
ret = i915_gem_init_ringbuffer(dev);
3750
if (ret != 0) {
3751
mutex_unlock(&dev->struct_mutex);
3752
return ret;
3753
}
3754
3755
BUG_ON(!list_empty(&dev_priv->mm.active_list));
3756
BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3757
BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3758
for (i = 0; i < I915_NUM_RINGS; i++) {
3759
BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3760
BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3761
}
3762
mutex_unlock(&dev->struct_mutex);
3763
3764
ret = drm_irq_install(dev);
3765
if (ret)
3766
goto cleanup_ringbuffer;
3767
3768
return 0;
3769
3770
cleanup_ringbuffer:
3771
mutex_lock(&dev->struct_mutex);
3772
i915_gem_cleanup_ringbuffer(dev);
3773
dev_priv->mm.suspended = 1;
3774
mutex_unlock(&dev->struct_mutex);
3775
3776
return ret;
3777
}
3778
3779
int
3780
i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3781
struct drm_file *file_priv)
3782
{
3783
if (drm_core_check_feature(dev, DRIVER_MODESET))
3784
return 0;
3785
3786
drm_irq_uninstall(dev);
3787
return i915_gem_idle(dev);
3788
}
3789
3790
void
3791
i915_gem_lastclose(struct drm_device *dev)
3792
{
3793
int ret;
3794
3795
if (drm_core_check_feature(dev, DRIVER_MODESET))
3796
return;
3797
3798
ret = i915_gem_idle(dev);
3799
if (ret)
3800
DRM_ERROR("failed to idle hardware: %d\n", ret);
3801
}
3802
3803
static void
3804
init_ring_lists(struct intel_ring_buffer *ring)
3805
{
3806
INIT_LIST_HEAD(&ring->active_list);
3807
INIT_LIST_HEAD(&ring->request_list);
3808
INIT_LIST_HEAD(&ring->gpu_write_list);
3809
}
3810
3811
void
3812
i915_gem_load(struct drm_device *dev)
3813
{
3814
int i;
3815
drm_i915_private_t *dev_priv = dev->dev_private;
3816
3817
INIT_LIST_HEAD(&dev_priv->mm.active_list);
3818
INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3819
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3820
INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3821
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3822
INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
3823
INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3824
for (i = 0; i < I915_NUM_RINGS; i++)
3825
init_ring_lists(&dev_priv->ring[i]);
3826
for (i = 0; i < 16; i++)
3827
INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3828
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3829
i915_gem_retire_work_handler);
3830
init_completion(&dev_priv->error_completion);
3831
3832
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3833
if (IS_GEN3(dev)) {
3834
u32 tmp = I915_READ(MI_ARB_STATE);
3835
if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3836
/* arb state is a masked write, so set bit + bit in mask */
3837
tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3838
I915_WRITE(MI_ARB_STATE, tmp);
3839
}
3840
}
3841
3842
dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3843
3844
/* Old X drivers will take 0-2 for front, back, depth buffers */
3845
if (!drm_core_check_feature(dev, DRIVER_MODESET))
3846
dev_priv->fence_reg_start = 3;
3847
3848
if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3849
dev_priv->num_fence_regs = 16;
3850
else
3851
dev_priv->num_fence_regs = 8;
3852
3853
/* Initialize fence registers to zero */
3854
for (i = 0; i < dev_priv->num_fence_regs; i++) {
3855
i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
3856
}
3857
3858
i915_gem_detect_bit_6_swizzle(dev);
3859
init_waitqueue_head(&dev_priv->pending_flip_queue);
3860
3861
dev_priv->mm.interruptible = true;
3862
3863
dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3864
dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3865
register_shrinker(&dev_priv->mm.inactive_shrinker);
3866
}
3867
3868
/*
3869
* Create a physically contiguous memory object for this object
3870
* e.g. for cursor + overlay regs
3871
*/
3872
static int i915_gem_init_phys_object(struct drm_device *dev,
3873
int id, int size, int align)
3874
{
3875
drm_i915_private_t *dev_priv = dev->dev_private;
3876
struct drm_i915_gem_phys_object *phys_obj;
3877
int ret;
3878
3879
if (dev_priv->mm.phys_objs[id - 1] || !size)
3880
return 0;
3881
3882
phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3883
if (!phys_obj)
3884
return -ENOMEM;
3885
3886
phys_obj->id = id;
3887
3888
phys_obj->handle = drm_pci_alloc(dev, size, align);
3889
if (!phys_obj->handle) {
3890
ret = -ENOMEM;
3891
goto kfree_obj;
3892
}
3893
#ifdef CONFIG_X86
3894
set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3895
#endif
3896
3897
dev_priv->mm.phys_objs[id - 1] = phys_obj;
3898
3899
return 0;
3900
kfree_obj:
3901
kfree(phys_obj);
3902
return ret;
3903
}
3904
3905
static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3906
{
3907
drm_i915_private_t *dev_priv = dev->dev_private;
3908
struct drm_i915_gem_phys_object *phys_obj;
3909
3910
if (!dev_priv->mm.phys_objs[id - 1])
3911
return;
3912
3913
phys_obj = dev_priv->mm.phys_objs[id - 1];
3914
if (phys_obj->cur_obj) {
3915
i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3916
}
3917
3918
#ifdef CONFIG_X86
3919
set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3920
#endif
3921
drm_pci_free(dev, phys_obj->handle);
3922
kfree(phys_obj);
3923
dev_priv->mm.phys_objs[id - 1] = NULL;
3924
}
3925
3926
void i915_gem_free_all_phys_object(struct drm_device *dev)
3927
{
3928
int i;
3929
3930
for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3931
i915_gem_free_phys_object(dev, i);
3932
}
3933
3934
void i915_gem_detach_phys_object(struct drm_device *dev,
3935
struct drm_i915_gem_object *obj)
3936
{
3937
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3938
char *vaddr;
3939
int i;
3940
int page_count;
3941
3942
if (!obj->phys_obj)
3943
return;
3944
vaddr = obj->phys_obj->handle->vaddr;
3945
3946
page_count = obj->base.size / PAGE_SIZE;
3947
for (i = 0; i < page_count; i++) {
3948
struct page *page = shmem_read_mapping_page(mapping, i);
3949
if (!IS_ERR(page)) {
3950
char *dst = kmap_atomic(page);
3951
memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3952
kunmap_atomic(dst);
3953
3954
drm_clflush_pages(&page, 1);
3955
3956
set_page_dirty(page);
3957
mark_page_accessed(page);
3958
page_cache_release(page);
3959
}
3960
}
3961
intel_gtt_chipset_flush();
3962
3963
obj->phys_obj->cur_obj = NULL;
3964
obj->phys_obj = NULL;
3965
}
3966
3967
int
3968
i915_gem_attach_phys_object(struct drm_device *dev,
3969
struct drm_i915_gem_object *obj,
3970
int id,
3971
int align)
3972
{
3973
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3974
drm_i915_private_t *dev_priv = dev->dev_private;
3975
int ret = 0;
3976
int page_count;
3977
int i;
3978
3979
if (id > I915_MAX_PHYS_OBJECT)
3980
return -EINVAL;
3981
3982
if (obj->phys_obj) {
3983
if (obj->phys_obj->id == id)
3984
return 0;
3985
i915_gem_detach_phys_object(dev, obj);
3986
}
3987
3988
/* create a new object */
3989
if (!dev_priv->mm.phys_objs[id - 1]) {
3990
ret = i915_gem_init_phys_object(dev, id,
3991
obj->base.size, align);
3992
if (ret) {
3993
DRM_ERROR("failed to init phys object %d size: %zu\n",
3994
id, obj->base.size);
3995
return ret;
3996
}
3997
}
3998
3999
/* bind to the object */
4000
obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4001
obj->phys_obj->cur_obj = obj;
4002
4003
page_count = obj->base.size / PAGE_SIZE;
4004
4005
for (i = 0; i < page_count; i++) {
4006
struct page *page;
4007
char *dst, *src;
4008
4009
page = shmem_read_mapping_page(mapping, i);
4010
if (IS_ERR(page))
4011
return PTR_ERR(page);
4012
4013
src = kmap_atomic(page);
4014
dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4015
memcpy(dst, src, PAGE_SIZE);
4016
kunmap_atomic(src);
4017
4018
mark_page_accessed(page);
4019
page_cache_release(page);
4020
}
4021
4022
return 0;
4023
}
4024
4025
static int
4026
i915_gem_phys_pwrite(struct drm_device *dev,
4027
struct drm_i915_gem_object *obj,
4028
struct drm_i915_gem_pwrite *args,
4029
struct drm_file *file_priv)
4030
{
4031
void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
4032
char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4033
4034
if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4035
unsigned long unwritten;
4036
4037
/* The physical object once assigned is fixed for the lifetime
4038
* of the obj, so we can safely drop the lock and continue
4039
* to access vaddr.
4040
*/
4041
mutex_unlock(&dev->struct_mutex);
4042
unwritten = copy_from_user(vaddr, user_data, args->size);
4043
mutex_lock(&dev->struct_mutex);
4044
if (unwritten)
4045
return -EFAULT;
4046
}
4047
4048
intel_gtt_chipset_flush();
4049
return 0;
4050
}
4051
4052
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4053
{
4054
struct drm_i915_file_private *file_priv = file->driver_priv;
4055
4056
/* Clean up our request list when the client is going away, so that
4057
* later retire_requests won't dereference our soon-to-be-gone
4058
* file_priv.
4059
*/
4060
spin_lock(&file_priv->mm.lock);
4061
while (!list_empty(&file_priv->mm.request_list)) {
4062
struct drm_i915_gem_request *request;
4063
4064
request = list_first_entry(&file_priv->mm.request_list,
4065
struct drm_i915_gem_request,
4066
client_list);
4067
list_del(&request->client_list);
4068
request->file_priv = NULL;
4069
}
4070
spin_unlock(&file_priv->mm.lock);
4071
}
4072
4073
static int
4074
i915_gpu_is_active(struct drm_device *dev)
4075
{
4076
drm_i915_private_t *dev_priv = dev->dev_private;
4077
int lists_empty;
4078
4079
lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4080
list_empty(&dev_priv->mm.active_list);
4081
4082
return !lists_empty;
4083
}
4084
4085
static int
4086
i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4087
{
4088
struct drm_i915_private *dev_priv =
4089
container_of(shrinker,
4090
struct drm_i915_private,
4091
mm.inactive_shrinker);
4092
struct drm_device *dev = dev_priv->dev;
4093
struct drm_i915_gem_object *obj, *next;
4094
int nr_to_scan = sc->nr_to_scan;
4095
int cnt;
4096
4097
if (!mutex_trylock(&dev->struct_mutex))
4098
return 0;
4099
4100
/* "fast-path" to count number of available objects */
4101
if (nr_to_scan == 0) {
4102
cnt = 0;
4103
list_for_each_entry(obj,
4104
&dev_priv->mm.inactive_list,
4105
mm_list)
4106
cnt++;
4107
mutex_unlock(&dev->struct_mutex);
4108
return cnt / 100 * sysctl_vfs_cache_pressure;
4109
}
4110
4111
rescan:
4112
/* first scan for clean buffers */
4113
i915_gem_retire_requests(dev);
4114
4115
list_for_each_entry_safe(obj, next,
4116
&dev_priv->mm.inactive_list,
4117
mm_list) {
4118
if (i915_gem_object_is_purgeable(obj)) {
4119
if (i915_gem_object_unbind(obj) == 0 &&
4120
--nr_to_scan == 0)
4121
break;
4122
}
4123
}
4124
4125
/* second pass, evict/count anything still on the inactive list */
4126
cnt = 0;
4127
list_for_each_entry_safe(obj, next,
4128
&dev_priv->mm.inactive_list,
4129
mm_list) {
4130
if (nr_to_scan &&
4131
i915_gem_object_unbind(obj) == 0)
4132
nr_to_scan--;
4133
else
4134
cnt++;
4135
}
4136
4137
if (nr_to_scan && i915_gpu_is_active(dev)) {
4138
/*
4139
* We are desperate for pages, so as a last resort, wait
4140
* for the GPU to finish and discard whatever we can.
4141
* This has a dramatic impact to reduce the number of
4142
* OOM-killer events whilst running the GPU aggressively.
4143
*/
4144
if (i915_gpu_idle(dev) == 0)
4145
goto rescan;
4146
}
4147
mutex_unlock(&dev->struct_mutex);
4148
return cnt / 100 * sysctl_vfs_cache_pressure;
4149
}
4150
4151