Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/mm/pgalloc.c
26439 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Page table allocation functions
4
*
5
* Copyright IBM Corp. 2016
6
* Author(s): Martin Schwidefsky <[email protected]>
7
*/
8
9
#include <linux/sysctl.h>
10
#include <linux/slab.h>
11
#include <linux/mm.h>
12
#include <asm/mmu_context.h>
13
#include <asm/page-states.h>
14
#include <asm/pgalloc.h>
15
#include <asm/tlbflush.h>
16
17
unsigned long *crst_table_alloc(struct mm_struct *mm)
18
{
19
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
20
unsigned long *table;
21
22
if (!ptdesc)
23
return NULL;
24
table = ptdesc_to_virt(ptdesc);
25
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
26
return table;
27
}
28
29
void crst_table_free(struct mm_struct *mm, unsigned long *table)
30
{
31
if (!table)
32
return;
33
pagetable_free(virt_to_ptdesc(table));
34
}
35
36
static void __crst_table_upgrade(void *arg)
37
{
38
struct mm_struct *mm = arg;
39
struct ctlreg asce;
40
41
/* change all active ASCEs to avoid the creation of new TLBs */
42
if (current->active_mm == mm) {
43
asce.val = mm->context.asce;
44
get_lowcore()->user_asce = asce;
45
local_ctl_load(7, &asce);
46
if (!test_thread_flag(TIF_ASCE_PRIMARY))
47
local_ctl_load(1, &asce);
48
}
49
__tlb_flush_local();
50
}
51
52
int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
53
{
54
unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
55
unsigned long asce_limit = mm->context.asce_limit;
56
57
mmap_assert_write_locked(mm);
58
59
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
60
VM_BUG_ON(asce_limit < _REGION2_SIZE);
61
62
if (end <= asce_limit)
63
return 0;
64
65
if (asce_limit == _REGION2_SIZE) {
66
p4d = crst_table_alloc(mm);
67
if (unlikely(!p4d))
68
goto err_p4d;
69
crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
70
pagetable_p4d_ctor(virt_to_ptdesc(p4d));
71
}
72
if (end > _REGION1_SIZE) {
73
pgd = crst_table_alloc(mm);
74
if (unlikely(!pgd))
75
goto err_pgd;
76
crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
77
pagetable_pgd_ctor(virt_to_ptdesc(pgd));
78
}
79
80
spin_lock_bh(&mm->page_table_lock);
81
82
if (p4d) {
83
__pgd = (unsigned long *) mm->pgd;
84
p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
85
mm->pgd = (pgd_t *) p4d;
86
mm->context.asce_limit = _REGION1_SIZE;
87
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
88
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
89
mm_inc_nr_puds(mm);
90
}
91
if (pgd) {
92
__pgd = (unsigned long *) mm->pgd;
93
pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
94
mm->pgd = (pgd_t *) pgd;
95
mm->context.asce_limit = TASK_SIZE_MAX;
96
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
97
_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
98
}
99
100
spin_unlock_bh(&mm->page_table_lock);
101
102
on_each_cpu(__crst_table_upgrade, mm, 0);
103
104
return 0;
105
106
err_pgd:
107
pagetable_dtor(virt_to_ptdesc(p4d));
108
crst_table_free(mm, p4d);
109
err_p4d:
110
return -ENOMEM;
111
}
112
113
#ifdef CONFIG_PGSTE
114
115
struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
116
{
117
struct ptdesc *ptdesc;
118
u64 *table;
119
120
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
121
if (ptdesc) {
122
table = (u64 *)ptdesc_to_virt(ptdesc);
123
__arch_set_page_dat(table, 1);
124
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
125
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
126
}
127
return ptdesc;
128
}
129
130
void page_table_free_pgste(struct ptdesc *ptdesc)
131
{
132
pagetable_free(ptdesc);
133
}
134
135
#endif /* CONFIG_PGSTE */
136
137
unsigned long *page_table_alloc(struct mm_struct *mm)
138
{
139
struct ptdesc *ptdesc;
140
unsigned long *table;
141
142
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
143
if (!ptdesc)
144
return NULL;
145
if (!pagetable_pte_ctor(mm, ptdesc)) {
146
pagetable_free(ptdesc);
147
return NULL;
148
}
149
table = ptdesc_to_virt(ptdesc);
150
__arch_set_page_dat(table, 1);
151
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
152
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
153
return table;
154
}
155
156
void page_table_free(struct mm_struct *mm, unsigned long *table)
157
{
158
struct ptdesc *ptdesc = virt_to_ptdesc(table);
159
160
pagetable_dtor_free(ptdesc);
161
}
162
163
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
164
static void pte_free_now(struct rcu_head *head)
165
{
166
struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
167
168
pagetable_dtor_free(ptdesc);
169
}
170
171
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
172
{
173
struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
174
175
call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
176
}
177
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
178
179
/*
180
* Base infrastructure required to generate basic asces, region, segment,
181
* and page tables that do not make use of enhanced features like EDAT1.
182
*/
183
184
static struct kmem_cache *base_pgt_cache;
185
186
static unsigned long *base_pgt_alloc(void)
187
{
188
unsigned long *table;
189
190
table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
191
if (table)
192
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
193
return table;
194
}
195
196
static void base_pgt_free(unsigned long *table)
197
{
198
kmem_cache_free(base_pgt_cache, table);
199
}
200
201
static unsigned long *base_crst_alloc(unsigned long val)
202
{
203
unsigned long *table;
204
struct ptdesc *ptdesc;
205
206
ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
207
if (!ptdesc)
208
return NULL;
209
table = ptdesc_address(ptdesc);
210
crst_table_init(table, val);
211
return table;
212
}
213
214
static void base_crst_free(unsigned long *table)
215
{
216
if (!table)
217
return;
218
pagetable_free(virt_to_ptdesc(table));
219
}
220
221
#define BASE_ADDR_END_FUNC(NAME, SIZE) \
222
static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
223
unsigned long end) \
224
{ \
225
unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
226
\
227
return (next - 1) < (end - 1) ? next : end; \
228
}
229
230
BASE_ADDR_END_FUNC(page, PAGE_SIZE)
231
BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
232
BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
233
BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
234
BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
235
236
static inline unsigned long base_lra(unsigned long address)
237
{
238
unsigned long real;
239
240
asm volatile(
241
" lra %0,0(%1)\n"
242
: "=d" (real) : "a" (address) : "cc");
243
return real;
244
}
245
246
static int base_page_walk(unsigned long *origin, unsigned long addr,
247
unsigned long end, int alloc)
248
{
249
unsigned long *pte, next;
250
251
if (!alloc)
252
return 0;
253
pte = origin;
254
pte += (addr & _PAGE_INDEX) >> PAGE_SHIFT;
255
do {
256
next = base_page_addr_end(addr, end);
257
*pte = base_lra(addr);
258
} while (pte++, addr = next, addr < end);
259
return 0;
260
}
261
262
static int base_segment_walk(unsigned long *origin, unsigned long addr,
263
unsigned long end, int alloc)
264
{
265
unsigned long *ste, next, *table;
266
int rc;
267
268
ste = origin;
269
ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
270
do {
271
next = base_segment_addr_end(addr, end);
272
if (*ste & _SEGMENT_ENTRY_INVALID) {
273
if (!alloc)
274
continue;
275
table = base_pgt_alloc();
276
if (!table)
277
return -ENOMEM;
278
*ste = __pa(table) | _SEGMENT_ENTRY;
279
}
280
table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
281
rc = base_page_walk(table, addr, next, alloc);
282
if (rc)
283
return rc;
284
if (!alloc)
285
base_pgt_free(table);
286
cond_resched();
287
} while (ste++, addr = next, addr < end);
288
return 0;
289
}
290
291
static int base_region3_walk(unsigned long *origin, unsigned long addr,
292
unsigned long end, int alloc)
293
{
294
unsigned long *rtte, next, *table;
295
int rc;
296
297
rtte = origin;
298
rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
299
do {
300
next = base_region3_addr_end(addr, end);
301
if (*rtte & _REGION_ENTRY_INVALID) {
302
if (!alloc)
303
continue;
304
table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
305
if (!table)
306
return -ENOMEM;
307
*rtte = __pa(table) | _REGION3_ENTRY;
308
}
309
table = __va(*rtte & _REGION_ENTRY_ORIGIN);
310
rc = base_segment_walk(table, addr, next, alloc);
311
if (rc)
312
return rc;
313
if (!alloc)
314
base_crst_free(table);
315
} while (rtte++, addr = next, addr < end);
316
return 0;
317
}
318
319
static int base_region2_walk(unsigned long *origin, unsigned long addr,
320
unsigned long end, int alloc)
321
{
322
unsigned long *rste, next, *table;
323
int rc;
324
325
rste = origin;
326
rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
327
do {
328
next = base_region2_addr_end(addr, end);
329
if (*rste & _REGION_ENTRY_INVALID) {
330
if (!alloc)
331
continue;
332
table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
333
if (!table)
334
return -ENOMEM;
335
*rste = __pa(table) | _REGION2_ENTRY;
336
}
337
table = __va(*rste & _REGION_ENTRY_ORIGIN);
338
rc = base_region3_walk(table, addr, next, alloc);
339
if (rc)
340
return rc;
341
if (!alloc)
342
base_crst_free(table);
343
} while (rste++, addr = next, addr < end);
344
return 0;
345
}
346
347
static int base_region1_walk(unsigned long *origin, unsigned long addr,
348
unsigned long end, int alloc)
349
{
350
unsigned long *rfte, next, *table;
351
int rc;
352
353
rfte = origin;
354
rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
355
do {
356
next = base_region1_addr_end(addr, end);
357
if (*rfte & _REGION_ENTRY_INVALID) {
358
if (!alloc)
359
continue;
360
table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
361
if (!table)
362
return -ENOMEM;
363
*rfte = __pa(table) | _REGION1_ENTRY;
364
}
365
table = __va(*rfte & _REGION_ENTRY_ORIGIN);
366
rc = base_region2_walk(table, addr, next, alloc);
367
if (rc)
368
return rc;
369
if (!alloc)
370
base_crst_free(table);
371
} while (rfte++, addr = next, addr < end);
372
return 0;
373
}
374
375
/**
376
* base_asce_free - free asce and tables returned from base_asce_alloc()
377
* @asce: asce to be freed
378
*
379
* Frees all region, segment, and page tables that were allocated with a
380
* corresponding base_asce_alloc() call.
381
*/
382
void base_asce_free(unsigned long asce)
383
{
384
unsigned long *table = __va(asce & _ASCE_ORIGIN);
385
386
if (!asce)
387
return;
388
switch (asce & _ASCE_TYPE_MASK) {
389
case _ASCE_TYPE_SEGMENT:
390
base_segment_walk(table, 0, _REGION3_SIZE, 0);
391
break;
392
case _ASCE_TYPE_REGION3:
393
base_region3_walk(table, 0, _REGION2_SIZE, 0);
394
break;
395
case _ASCE_TYPE_REGION2:
396
base_region2_walk(table, 0, _REGION1_SIZE, 0);
397
break;
398
case _ASCE_TYPE_REGION1:
399
base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
400
break;
401
}
402
base_crst_free(table);
403
}
404
405
static int base_pgt_cache_init(void)
406
{
407
static DEFINE_MUTEX(base_pgt_cache_mutex);
408
unsigned long sz = _PAGE_TABLE_SIZE;
409
410
if (base_pgt_cache)
411
return 0;
412
mutex_lock(&base_pgt_cache_mutex);
413
if (!base_pgt_cache)
414
base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
415
mutex_unlock(&base_pgt_cache_mutex);
416
return base_pgt_cache ? 0 : -ENOMEM;
417
}
418
419
/**
420
* base_asce_alloc - create kernel mapping without enhanced DAT features
421
* @addr: virtual start address of kernel mapping
422
* @num_pages: number of consecutive pages
423
*
424
* Generate an asce, including all required region, segment and page tables,
425
* that can be used to access the virtual kernel mapping. The difference is
426
* that the returned asce does not make use of any enhanced DAT features like
427
* e.g. large pages. This is required for some I/O functions that pass an
428
* asce, like e.g. some service call requests.
429
*
430
* Note: the returned asce may NEVER be attached to any cpu. It may only be
431
* used for I/O requests. tlb entries that might result because the
432
* asce was attached to a cpu won't be cleared.
433
*/
434
unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
435
{
436
unsigned long asce, *table, end;
437
int rc;
438
439
if (base_pgt_cache_init())
440
return 0;
441
end = addr + num_pages * PAGE_SIZE;
442
if (end <= _REGION3_SIZE) {
443
table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
444
if (!table)
445
return 0;
446
rc = base_segment_walk(table, addr, end, 1);
447
asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
448
} else if (end <= _REGION2_SIZE) {
449
table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
450
if (!table)
451
return 0;
452
rc = base_region3_walk(table, addr, end, 1);
453
asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
454
} else if (end <= _REGION1_SIZE) {
455
table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
456
if (!table)
457
return 0;
458
rc = base_region2_walk(table, addr, end, 1);
459
asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
460
} else {
461
table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
462
if (!table)
463
return 0;
464
rc = base_region1_walk(table, addr, end, 1);
465
asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
466
}
467
if (rc) {
468
base_asce_free(asce);
469
asce = 0;
470
}
471
return asce;
472
}
473
474