Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/s390/mm/pgtable.c
10817 views
1
/*
2
* Copyright IBM Corp. 2007,2009
3
* Author(s): Martin Schwidefsky <[email protected]>
4
*/
5
6
#include <linux/sched.h>
7
#include <linux/kernel.h>
8
#include <linux/errno.h>
9
#include <linux/gfp.h>
10
#include <linux/mm.h>
11
#include <linux/swap.h>
12
#include <linux/smp.h>
13
#include <linux/highmem.h>
14
#include <linux/pagemap.h>
15
#include <linux/spinlock.h>
16
#include <linux/module.h>
17
#include <linux/quicklist.h>
18
#include <linux/rcupdate.h>
19
20
#include <asm/system.h>
21
#include <asm/pgtable.h>
22
#include <asm/pgalloc.h>
23
#include <asm/tlb.h>
24
#include <asm/tlbflush.h>
25
#include <asm/mmu_context.h>
26
27
#ifndef CONFIG_64BIT
28
#define ALLOC_ORDER 1
29
#define FRAG_MASK 0x0f
30
#else
31
#define ALLOC_ORDER 2
32
#define FRAG_MASK 0x03
33
#endif
34
35
unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
36
EXPORT_SYMBOL(VMALLOC_START);
37
38
static int __init parse_vmalloc(char *arg)
39
{
40
if (!arg)
41
return -EINVAL;
42
VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
43
return 0;
44
}
45
early_param("vmalloc", parse_vmalloc);
46
47
unsigned long *crst_table_alloc(struct mm_struct *mm)
48
{
49
struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
50
51
if (!page)
52
return NULL;
53
return (unsigned long *) page_to_phys(page);
54
}
55
56
void crst_table_free(struct mm_struct *mm, unsigned long *table)
57
{
58
free_pages((unsigned long) table, ALLOC_ORDER);
59
}
60
61
#ifdef CONFIG_64BIT
62
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
63
{
64
unsigned long *table, *pgd;
65
unsigned long entry;
66
67
BUG_ON(limit > (1UL << 53));
68
repeat:
69
table = crst_table_alloc(mm);
70
if (!table)
71
return -ENOMEM;
72
spin_lock_bh(&mm->page_table_lock);
73
if (mm->context.asce_limit < limit) {
74
pgd = (unsigned long *) mm->pgd;
75
if (mm->context.asce_limit <= (1UL << 31)) {
76
entry = _REGION3_ENTRY_EMPTY;
77
mm->context.asce_limit = 1UL << 42;
78
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
79
_ASCE_USER_BITS |
80
_ASCE_TYPE_REGION3;
81
} else {
82
entry = _REGION2_ENTRY_EMPTY;
83
mm->context.asce_limit = 1UL << 53;
84
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
85
_ASCE_USER_BITS |
86
_ASCE_TYPE_REGION2;
87
}
88
crst_table_init(table, entry);
89
pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
90
mm->pgd = (pgd_t *) table;
91
mm->task_size = mm->context.asce_limit;
92
table = NULL;
93
}
94
spin_unlock_bh(&mm->page_table_lock);
95
if (table)
96
crst_table_free(mm, table);
97
if (mm->context.asce_limit < limit)
98
goto repeat;
99
update_mm(mm, current);
100
return 0;
101
}
102
103
void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
104
{
105
pgd_t *pgd;
106
107
if (mm->context.asce_limit <= limit)
108
return;
109
__tlb_flush_mm(mm);
110
while (mm->context.asce_limit > limit) {
111
pgd = mm->pgd;
112
switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
113
case _REGION_ENTRY_TYPE_R2:
114
mm->context.asce_limit = 1UL << 42;
115
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
116
_ASCE_USER_BITS |
117
_ASCE_TYPE_REGION3;
118
break;
119
case _REGION_ENTRY_TYPE_R3:
120
mm->context.asce_limit = 1UL << 31;
121
mm->context.asce_bits = _ASCE_TABLE_LENGTH |
122
_ASCE_USER_BITS |
123
_ASCE_TYPE_SEGMENT;
124
break;
125
default:
126
BUG();
127
}
128
mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
129
mm->task_size = mm->context.asce_limit;
130
crst_table_free(mm, (unsigned long *) pgd);
131
}
132
update_mm(mm, current);
133
}
134
#endif
135
136
static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137
{
138
unsigned int old, new;
139
140
do {
141
old = atomic_read(v);
142
new = old ^ bits;
143
} while (atomic_cmpxchg(v, old, new) != old);
144
return new;
145
}
146
147
/*
148
* page table entry allocation/free routines.
149
*/
150
#ifdef CONFIG_PGSTE
151
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152
{
153
struct page *page;
154
unsigned long *table;
155
156
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157
if (!page)
158
return NULL;
159
pgtable_page_ctor(page);
160
atomic_set(&page->_mapcount, 3);
161
table = (unsigned long *) page_to_phys(page);
162
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164
return table;
165
}
166
167
static inline void page_table_free_pgste(unsigned long *table)
168
{
169
struct page *page;
170
171
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172
pgtable_page_ctor(page);
173
atomic_set(&page->_mapcount, -1);
174
__free_page(page);
175
}
176
#endif
177
178
unsigned long *page_table_alloc(struct mm_struct *mm)
179
{
180
struct page *page;
181
unsigned long *table;
182
unsigned int mask, bit;
183
184
#ifdef CONFIG_PGSTE
185
if (mm_has_pgste(mm))
186
return page_table_alloc_pgste(mm);
187
#endif
188
/* Allocate fragments of a 4K page as 1K/2K page table */
189
spin_lock_bh(&mm->context.list_lock);
190
mask = FRAG_MASK;
191
if (!list_empty(&mm->context.pgtable_list)) {
192
page = list_first_entry(&mm->context.pgtable_list,
193
struct page, lru);
194
table = (unsigned long *) page_to_phys(page);
195
mask = atomic_read(&page->_mapcount);
196
mask = mask | (mask >> 4);
197
}
198
if ((mask & FRAG_MASK) == FRAG_MASK) {
199
spin_unlock_bh(&mm->context.list_lock);
200
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
201
if (!page)
202
return NULL;
203
pgtable_page_ctor(page);
204
atomic_set(&page->_mapcount, 1);
205
table = (unsigned long *) page_to_phys(page);
206
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
207
spin_lock_bh(&mm->context.list_lock);
208
list_add(&page->lru, &mm->context.pgtable_list);
209
} else {
210
for (bit = 1; mask & bit; bit <<= 1)
211
table += PTRS_PER_PTE;
212
mask = atomic_xor_bits(&page->_mapcount, bit);
213
if ((mask & FRAG_MASK) == FRAG_MASK)
214
list_del(&page->lru);
215
}
216
spin_unlock_bh(&mm->context.list_lock);
217
return table;
218
}
219
220
void page_table_free(struct mm_struct *mm, unsigned long *table)
221
{
222
struct page *page;
223
unsigned int bit, mask;
224
225
#ifdef CONFIG_PGSTE
226
if (mm_has_pgste(mm))
227
return page_table_free_pgste(table);
228
#endif
229
/* Free 1K/2K page table fragment of a 4K page */
230
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
231
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
232
spin_lock_bh(&mm->context.list_lock);
233
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234
list_del(&page->lru);
235
mask = atomic_xor_bits(&page->_mapcount, bit);
236
if (mask & FRAG_MASK)
237
list_add(&page->lru, &mm->context.pgtable_list);
238
spin_unlock_bh(&mm->context.list_lock);
239
if (mask == 0) {
240
pgtable_page_dtor(page);
241
atomic_set(&page->_mapcount, -1);
242
__free_page(page);
243
}
244
}
245
246
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248
static void __page_table_free_rcu(void *table, unsigned bit)
249
{
250
struct page *page;
251
252
#ifdef CONFIG_PGSTE
253
if (bit == FRAG_MASK)
254
return page_table_free_pgste(table);
255
#endif
256
/* Free 1K/2K page table fragment of a 4K page */
257
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
258
if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
259
pgtable_page_dtor(page);
260
atomic_set(&page->_mapcount, -1);
261
__free_page(page);
262
}
263
}
264
265
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
266
{
267
struct mm_struct *mm;
268
struct page *page;
269
unsigned int bit, mask;
270
271
mm = tlb->mm;
272
#ifdef CONFIG_PGSTE
273
if (mm_has_pgste(mm)) {
274
table = (unsigned long *) (__pa(table) | FRAG_MASK);
275
tlb_remove_table(tlb, table);
276
return;
277
}
278
#endif
279
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281
spin_lock_bh(&mm->context.list_lock);
282
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283
list_del(&page->lru);
284
mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285
if (mask & FRAG_MASK)
286
list_add_tail(&page->lru, &mm->context.pgtable_list);
287
spin_unlock_bh(&mm->context.list_lock);
288
table = (unsigned long *) (__pa(table) | (bit << 4));
289
tlb_remove_table(tlb, table);
290
}
291
292
void __tlb_remove_table(void *_table)
293
{
294
void *table = (void *)((unsigned long) _table & PAGE_MASK);
295
unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297
if (type)
298
__page_table_free_rcu(table, type);
299
else
300
free_pages((unsigned long) table, ALLOC_ORDER);
301
}
302
303
#endif
304
305
/*
306
* switch on pgstes for its userspace process (for kvm)
307
*/
308
int s390_enable_sie(void)
309
{
310
struct task_struct *tsk = current;
311
struct mm_struct *mm, *old_mm;
312
313
/* Do we have switched amode? If no, we cannot do sie */
314
if (user_mode == HOME_SPACE_MODE)
315
return -EINVAL;
316
317
/* Do we have pgstes? if yes, we are done */
318
if (mm_has_pgste(tsk->mm))
319
return 0;
320
321
/* lets check if we are allowed to replace the mm */
322
task_lock(tsk);
323
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
324
#ifdef CONFIG_AIO
325
!hlist_empty(&tsk->mm->ioctx_list) ||
326
#endif
327
tsk->mm != tsk->active_mm) {
328
task_unlock(tsk);
329
return -EINVAL;
330
}
331
task_unlock(tsk);
332
333
/* we copy the mm and let dup_mm create the page tables with_pgstes */
334
tsk->mm->context.alloc_pgste = 1;
335
mm = dup_mm(tsk);
336
tsk->mm->context.alloc_pgste = 0;
337
if (!mm)
338
return -ENOMEM;
339
340
/* Now lets check again if something happened */
341
task_lock(tsk);
342
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
343
#ifdef CONFIG_AIO
344
!hlist_empty(&tsk->mm->ioctx_list) ||
345
#endif
346
tsk->mm != tsk->active_mm) {
347
mmput(mm);
348
task_unlock(tsk);
349
return -EINVAL;
350
}
351
352
/* ok, we are alone. No ptrace, no threads, etc. */
353
old_mm = tsk->mm;
354
tsk->mm = tsk->active_mm = mm;
355
preempt_disable();
356
update_mm(mm, tsk);
357
atomic_inc(&mm->context.attach_count);
358
atomic_dec(&old_mm->context.attach_count);
359
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
360
preempt_enable();
361
task_unlock(tsk);
362
mmput(old_mm);
363
return 0;
364
}
365
EXPORT_SYMBOL_GPL(s390_enable_sie);
366
367
#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
368
bool kernel_page_present(struct page *page)
369
{
370
unsigned long addr;
371
int cc;
372
373
addr = page_to_phys(page);
374
asm volatile(
375
" lra %1,0(%1)\n"
376
" ipm %0\n"
377
" srl %0,28"
378
: "=d" (cc), "+a" (addr) : : "cc");
379
return cc == 0;
380
}
381
#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
382
383