Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/powerpc/mm/gup.c
10817 views
1
/*
2
* Lockless get_user_pages_fast for powerpc
3
*
4
* Copyright (C) 2008 Nick Piggin
5
* Copyright (C) 2008 Novell Inc.
6
*/
7
#undef DEBUG
8
9
#include <linux/sched.h>
10
#include <linux/mm.h>
11
#include <linux/hugetlb.h>
12
#include <linux/vmstat.h>
13
#include <linux/pagemap.h>
14
#include <linux/rwsem.h>
15
#include <asm/pgtable.h>
16
17
#ifdef __HAVE_ARCH_PTE_SPECIAL
18
19
static inline void get_huge_page_tail(struct page *page)
20
{
21
/*
22
* __split_huge_page_refcount() cannot run
23
* from under us.
24
*/
25
VM_BUG_ON(atomic_read(&page->_count) < 0);
26
atomic_inc(&page->_count);
27
}
28
29
/*
30
* The performance critical leaf functions are made noinline otherwise gcc
31
* inlines everything into a single function which results in too much
32
* register pressure.
33
*/
34
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
35
unsigned long end, int write, struct page **pages, int *nr)
36
{
37
unsigned long mask, result;
38
pte_t *ptep;
39
40
result = _PAGE_PRESENT|_PAGE_USER;
41
if (write)
42
result |= _PAGE_RW;
43
mask = result | _PAGE_SPECIAL;
44
45
ptep = pte_offset_kernel(&pmd, addr);
46
do {
47
pte_t pte = *ptep;
48
struct page *page;
49
50
if ((pte_val(pte) & mask) != result)
51
return 0;
52
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
53
page = pte_page(pte);
54
if (!page_cache_get_speculative(page))
55
return 0;
56
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
57
put_page(page);
58
return 0;
59
}
60
if (PageTail(page))
61
get_huge_page_tail(page);
62
pages[*nr] = page;
63
(*nr)++;
64
65
} while (ptep++, addr += PAGE_SIZE, addr != end);
66
67
return 1;
68
}
69
70
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
71
int write, struct page **pages, int *nr)
72
{
73
unsigned long next;
74
pmd_t *pmdp;
75
76
pmdp = pmd_offset(&pud, addr);
77
do {
78
pmd_t pmd = *pmdp;
79
80
next = pmd_addr_end(addr, end);
81
if (pmd_none(pmd))
82
return 0;
83
if (is_hugepd(pmdp)) {
84
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
85
addr, next, write, pages, nr))
86
return 0;
87
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
88
return 0;
89
} while (pmdp++, addr = next, addr != end);
90
91
return 1;
92
}
93
94
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
95
int write, struct page **pages, int *nr)
96
{
97
unsigned long next;
98
pud_t *pudp;
99
100
pudp = pud_offset(&pgd, addr);
101
do {
102
pud_t pud = *pudp;
103
104
next = pud_addr_end(addr, end);
105
if (pud_none(pud))
106
return 0;
107
if (is_hugepd(pudp)) {
108
if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
109
addr, next, write, pages, nr))
110
return 0;
111
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
112
return 0;
113
} while (pudp++, addr = next, addr != end);
114
115
return 1;
116
}
117
118
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
119
struct page **pages)
120
{
121
struct mm_struct *mm = current->mm;
122
unsigned long addr, len, end;
123
unsigned long next;
124
pgd_t *pgdp;
125
int nr = 0;
126
127
pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
128
129
start &= PAGE_MASK;
130
addr = start;
131
len = (unsigned long) nr_pages << PAGE_SHIFT;
132
end = start + len;
133
134
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
135
start, len)))
136
goto slow_irqon;
137
138
pr_devel(" aligned: %lx .. %lx\n", start, end);
139
140
/*
141
* XXX: batch / limit 'nr', to avoid large irq off latency
142
* needs some instrumenting to determine the common sizes used by
143
* important workloads (eg. DB2), and whether limiting the batch size
144
* will decrease performance.
145
*
146
* It seems like we're in the clear for the moment. Direct-IO is
147
* the main guy that batches up lots of get_user_pages, and even
148
* they are limited to 64-at-a-time which is not so many.
149
*/
150
/*
151
* This doesn't prevent pagetable teardown, but does prevent
152
* the pagetables from being freed on powerpc.
153
*
154
* So long as we atomically load page table pointers versus teardown,
155
* we can follow the address down to the the page and take a ref on it.
156
*/
157
local_irq_disable();
158
159
pgdp = pgd_offset(mm, addr);
160
do {
161
pgd_t pgd = *pgdp;
162
163
pr_devel(" %016lx: normal pgd %p\n", addr,
164
(void *)pgd_val(pgd));
165
next = pgd_addr_end(addr, end);
166
if (pgd_none(pgd))
167
goto slow;
168
if (is_hugepd(pgdp)) {
169
if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
170
addr, next, write, pages, &nr))
171
goto slow;
172
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
173
goto slow;
174
} while (pgdp++, addr = next, addr != end);
175
176
local_irq_enable();
177
178
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
179
return nr;
180
181
{
182
int ret;
183
184
slow:
185
local_irq_enable();
186
slow_irqon:
187
pr_devel(" slow path ! nr = %d\n", nr);
188
189
/* Try to get the remaining pages with get_user_pages */
190
start += nr << PAGE_SHIFT;
191
pages += nr;
192
193
down_read(&mm->mmap_sem);
194
ret = get_user_pages(current, mm, start,
195
(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
196
up_read(&mm->mmap_sem);
197
198
/* Have to be a bit careful with return values */
199
if (nr > 0) {
200
if (ret < 0)
201
ret = nr;
202
else
203
ret += nr;
204
}
205
206
return ret;
207
}
208
}
209
210
#endif /* __HAVE_ARCH_PTE_SPECIAL */
211
212