Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/mm/vmem.c
26442 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright IBM Corp. 2006
4
*/
5
6
#include <linux/memory_hotplug.h>
7
#include <linux/cpufeature.h>
8
#include <linux/memblock.h>
9
#include <linux/pfn.h>
10
#include <linux/mm.h>
11
#include <linux/init.h>
12
#include <linux/list.h>
13
#include <linux/hugetlb.h>
14
#include <linux/slab.h>
15
#include <linux/sort.h>
16
#include <asm/page-states.h>
17
#include <asm/abs_lowcore.h>
18
#include <asm/cacheflush.h>
19
#include <asm/maccess.h>
20
#include <asm/nospec-branch.h>
21
#include <asm/ctlreg.h>
22
#include <asm/pgalloc.h>
23
#include <asm/setup.h>
24
#include <asm/tlbflush.h>
25
#include <asm/sections.h>
26
#include <asm/set_memory.h>
27
#include <asm/physmem_info.h>
28
29
static DEFINE_MUTEX(vmem_mutex);
30
31
static void __ref *vmem_alloc_pages(unsigned int order)
32
{
33
unsigned long size = PAGE_SIZE << order;
34
35
if (slab_is_available())
36
return (void *)__get_free_pages(GFP_KERNEL, order);
37
return memblock_alloc(size, size);
38
}
39
40
static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap)
41
{
42
if (altmap) {
43
vmem_altmap_free(altmap, 1 << order);
44
return;
45
}
46
/* We don't expect boot memory to be removed ever. */
47
if (!slab_is_available() ||
48
WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
49
return;
50
free_pages(addr, order);
51
}
52
53
void *vmem_crst_alloc(unsigned long val)
54
{
55
unsigned long *table;
56
57
table = vmem_alloc_pages(CRST_ALLOC_ORDER);
58
if (!table)
59
return NULL;
60
crst_table_init(table, val);
61
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
62
return table;
63
}
64
65
pte_t __ref *vmem_pte_alloc(void)
66
{
67
pte_t *pte;
68
69
if (slab_is_available())
70
pte = (pte_t *)page_table_alloc(&init_mm);
71
else
72
pte = (pte_t *)memblock_alloc(PAGE_SIZE, PAGE_SIZE);
73
if (!pte)
74
return NULL;
75
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
76
__arch_set_page_dat(pte, 1);
77
return pte;
78
}
79
80
static void vmem_pte_free(unsigned long *table)
81
{
82
/* We don't expect boot memory to be removed ever. */
83
if (!slab_is_available() ||
84
WARN_ON_ONCE(PageReserved(virt_to_page(table))))
85
return;
86
page_table_free(&init_mm, table);
87
}
88
89
#define PAGE_UNUSED 0xFD
90
91
/*
92
* The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
93
* from unused_sub_pmd_start to next PMD_SIZE boundary.
94
*/
95
static unsigned long unused_sub_pmd_start;
96
97
static void vmemmap_flush_unused_sub_pmd(void)
98
{
99
if (!unused_sub_pmd_start)
100
return;
101
memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
102
ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
103
unused_sub_pmd_start = 0;
104
}
105
106
static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
107
{
108
/*
109
* As we expect to add in the same granularity as we remove, it's
110
* sufficient to mark only some piece used to block the memmap page from
111
* getting removed (just in case the memmap never gets initialized,
112
* e.g., because the memory block never gets onlined).
113
*/
114
memset((void *)start, 0, sizeof(struct page));
115
}
116
117
static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
118
{
119
/*
120
* We only optimize if the new used range directly follows the
121
* previously unused range (esp., when populating consecutive sections).
122
*/
123
if (unused_sub_pmd_start == start) {
124
unused_sub_pmd_start = end;
125
if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
126
unused_sub_pmd_start = 0;
127
return;
128
}
129
vmemmap_flush_unused_sub_pmd();
130
vmemmap_mark_sub_pmd_used(start, end);
131
}
132
133
static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
134
{
135
unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
136
137
vmemmap_flush_unused_sub_pmd();
138
139
/* Could be our memmap page is filled with PAGE_UNUSED already ... */
140
vmemmap_mark_sub_pmd_used(start, end);
141
142
/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
143
if (!IS_ALIGNED(start, PMD_SIZE))
144
memset((void *)page, PAGE_UNUSED, start - page);
145
/*
146
* We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
147
* consecutive sections. Remember for the last added PMD the last
148
* unused range in the populated PMD.
149
*/
150
if (!IS_ALIGNED(end, PMD_SIZE))
151
unused_sub_pmd_start = end;
152
}
153
154
/* Returns true if the PMD is completely unused and can be freed. */
155
static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
156
{
157
unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
158
159
vmemmap_flush_unused_sub_pmd();
160
memset((void *)start, PAGE_UNUSED, end - start);
161
return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
162
}
163
164
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
165
static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
166
unsigned long end, bool add, bool direct,
167
struct vmem_altmap *altmap)
168
{
169
unsigned long prot, pages = 0;
170
int ret = -ENOMEM;
171
pte_t *pte;
172
173
prot = pgprot_val(PAGE_KERNEL);
174
pte = pte_offset_kernel(pmd, addr);
175
for (; addr < end; addr += PAGE_SIZE, pte++) {
176
if (!add) {
177
if (pte_none(*pte))
178
continue;
179
if (!direct)
180
vmem_free_pages((unsigned long)pfn_to_virt(pte_pfn(*pte)), get_order(PAGE_SIZE), altmap);
181
pte_clear(&init_mm, addr, pte);
182
} else if (pte_none(*pte)) {
183
if (!direct) {
184
void *new_page = vmemmap_alloc_block_buf(PAGE_SIZE, NUMA_NO_NODE, altmap);
185
186
if (!new_page)
187
goto out;
188
set_pte(pte, __pte(__pa(new_page) | prot));
189
} else {
190
set_pte(pte, __pte(__pa(addr) | prot));
191
}
192
} else {
193
continue;
194
}
195
pages++;
196
}
197
ret = 0;
198
out:
199
if (direct)
200
update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
201
return ret;
202
}
203
204
static void try_free_pte_table(pmd_t *pmd, unsigned long start)
205
{
206
pte_t *pte;
207
int i;
208
209
/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
210
pte = pte_offset_kernel(pmd, start);
211
for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
212
if (!pte_none(*pte))
213
return;
214
}
215
vmem_pte_free((unsigned long *) pmd_deref(*pmd));
216
pmd_clear(pmd);
217
}
218
219
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
220
static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
221
unsigned long end, bool add, bool direct,
222
struct vmem_altmap *altmap)
223
{
224
unsigned long next, prot, pages = 0;
225
int ret = -ENOMEM;
226
pmd_t *pmd;
227
pte_t *pte;
228
229
prot = pgprot_val(SEGMENT_KERNEL);
230
pmd = pmd_offset(pud, addr);
231
for (; addr < end; addr = next, pmd++) {
232
next = pmd_addr_end(addr, end);
233
if (!add) {
234
if (pmd_none(*pmd))
235
continue;
236
if (pmd_leaf(*pmd)) {
237
if (IS_ALIGNED(addr, PMD_SIZE) &&
238
IS_ALIGNED(next, PMD_SIZE)) {
239
if (!direct)
240
vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
241
pmd_clear(pmd);
242
pages++;
243
} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
244
vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
245
pmd_clear(pmd);
246
}
247
continue;
248
}
249
} else if (pmd_none(*pmd)) {
250
if (IS_ALIGNED(addr, PMD_SIZE) &&
251
IS_ALIGNED(next, PMD_SIZE) &&
252
cpu_has_edat1() && direct &&
253
!debug_pagealloc_enabled()) {
254
set_pmd(pmd, __pmd(__pa(addr) | prot));
255
pages++;
256
continue;
257
} else if (!direct && cpu_has_edat1()) {
258
void *new_page;
259
260
/*
261
* Use 1MB frames for vmemmap if available. We
262
* always use large frames even if they are only
263
* partially used. Otherwise we would have also
264
* page tables since vmemmap_populate gets
265
* called for each section separately.
266
*/
267
new_page = vmemmap_alloc_block_buf(PMD_SIZE, NUMA_NO_NODE, altmap);
268
if (new_page) {
269
set_pmd(pmd, __pmd(__pa(new_page) | prot));
270
if (!IS_ALIGNED(addr, PMD_SIZE) ||
271
!IS_ALIGNED(next, PMD_SIZE)) {
272
vmemmap_use_new_sub_pmd(addr, next);
273
}
274
continue;
275
}
276
}
277
pte = vmem_pte_alloc();
278
if (!pte)
279
goto out;
280
pmd_populate(&init_mm, pmd, pte);
281
} else if (pmd_leaf(*pmd)) {
282
if (!direct)
283
vmemmap_use_sub_pmd(addr, next);
284
continue;
285
}
286
ret = modify_pte_table(pmd, addr, next, add, direct, altmap);
287
if (ret)
288
goto out;
289
if (!add)
290
try_free_pte_table(pmd, addr & PMD_MASK);
291
}
292
ret = 0;
293
out:
294
if (direct)
295
update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
296
return ret;
297
}
298
299
static void try_free_pmd_table(pud_t *pud, unsigned long start)
300
{
301
pmd_t *pmd;
302
int i;
303
304
pmd = pmd_offset(pud, start);
305
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
306
if (!pmd_none(*pmd))
307
return;
308
vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER, NULL);
309
pud_clear(pud);
310
}
311
312
static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
313
bool add, bool direct, struct vmem_altmap *altmap)
314
{
315
unsigned long next, prot, pages = 0;
316
int ret = -ENOMEM;
317
pud_t *pud;
318
pmd_t *pmd;
319
320
prot = pgprot_val(REGION3_KERNEL);
321
pud = pud_offset(p4d, addr);
322
for (; addr < end; addr = next, pud++) {
323
next = pud_addr_end(addr, end);
324
if (!add) {
325
if (pud_none(*pud))
326
continue;
327
if (pud_leaf(*pud)) {
328
if (IS_ALIGNED(addr, PUD_SIZE) &&
329
IS_ALIGNED(next, PUD_SIZE)) {
330
pud_clear(pud);
331
pages++;
332
}
333
continue;
334
}
335
} else if (pud_none(*pud)) {
336
if (IS_ALIGNED(addr, PUD_SIZE) &&
337
IS_ALIGNED(next, PUD_SIZE) &&
338
cpu_has_edat2() && direct &&
339
!debug_pagealloc_enabled()) {
340
set_pud(pud, __pud(__pa(addr) | prot));
341
pages++;
342
continue;
343
}
344
pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
345
if (!pmd)
346
goto out;
347
pud_populate(&init_mm, pud, pmd);
348
} else if (pud_leaf(*pud)) {
349
continue;
350
}
351
ret = modify_pmd_table(pud, addr, next, add, direct, altmap);
352
if (ret)
353
goto out;
354
if (!add)
355
try_free_pmd_table(pud, addr & PUD_MASK);
356
}
357
ret = 0;
358
out:
359
if (direct)
360
update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
361
return ret;
362
}
363
364
static void try_free_pud_table(p4d_t *p4d, unsigned long start)
365
{
366
pud_t *pud;
367
int i;
368
369
pud = pud_offset(p4d, start);
370
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
371
if (!pud_none(*pud))
372
return;
373
}
374
vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER, NULL);
375
p4d_clear(p4d);
376
}
377
378
static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
379
bool add, bool direct, struct vmem_altmap *altmap)
380
{
381
unsigned long next;
382
int ret = -ENOMEM;
383
p4d_t *p4d;
384
pud_t *pud;
385
386
p4d = p4d_offset(pgd, addr);
387
for (; addr < end; addr = next, p4d++) {
388
next = p4d_addr_end(addr, end);
389
if (!add) {
390
if (p4d_none(*p4d))
391
continue;
392
} else if (p4d_none(*p4d)) {
393
pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
394
if (!pud)
395
goto out;
396
p4d_populate(&init_mm, p4d, pud);
397
}
398
ret = modify_pud_table(p4d, addr, next, add, direct, altmap);
399
if (ret)
400
goto out;
401
if (!add)
402
try_free_pud_table(p4d, addr & P4D_MASK);
403
}
404
ret = 0;
405
out:
406
return ret;
407
}
408
409
static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
410
{
411
p4d_t *p4d;
412
int i;
413
414
p4d = p4d_offset(pgd, start);
415
for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
416
if (!p4d_none(*p4d))
417
return;
418
}
419
vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER, NULL);
420
pgd_clear(pgd);
421
}
422
423
static int modify_pagetable(unsigned long start, unsigned long end, bool add,
424
bool direct, struct vmem_altmap *altmap)
425
{
426
unsigned long addr, next;
427
int ret = -ENOMEM;
428
pgd_t *pgd;
429
p4d_t *p4d;
430
431
if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
432
return -EINVAL;
433
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
434
if (WARN_ON_ONCE(end > __abs_lowcore))
435
return -EINVAL;
436
for (addr = start; addr < end; addr = next) {
437
next = pgd_addr_end(addr, end);
438
pgd = pgd_offset_k(addr);
439
440
if (!add) {
441
if (pgd_none(*pgd))
442
continue;
443
} else if (pgd_none(*pgd)) {
444
p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
445
if (!p4d)
446
goto out;
447
pgd_populate(&init_mm, pgd, p4d);
448
}
449
ret = modify_p4d_table(pgd, addr, next, add, direct, altmap);
450
if (ret)
451
goto out;
452
if (!add)
453
try_free_p4d_table(pgd, addr & PGDIR_MASK);
454
}
455
ret = 0;
456
out:
457
if (!add)
458
flush_tlb_kernel_range(start, end);
459
return ret;
460
}
461
462
static int add_pagetable(unsigned long start, unsigned long end, bool direct,
463
struct vmem_altmap *altmap)
464
{
465
return modify_pagetable(start, end, true, direct, altmap);
466
}
467
468
static int remove_pagetable(unsigned long start, unsigned long end, bool direct,
469
struct vmem_altmap *altmap)
470
{
471
return modify_pagetable(start, end, false, direct, altmap);
472
}
473
474
/*
475
* Add a physical memory range to the 1:1 mapping.
476
*/
477
static int vmem_add_range(unsigned long start, unsigned long size)
478
{
479
start = (unsigned long)__va(start);
480
return add_pagetable(start, start + size, true, NULL);
481
}
482
483
/*
484
* Remove a physical memory range from the 1:1 mapping.
485
*/
486
static void vmem_remove_range(unsigned long start, unsigned long size)
487
{
488
start = (unsigned long)__va(start);
489
remove_pagetable(start, start + size, true, NULL);
490
}
491
492
/*
493
* Add a backed mem_map array to the virtual mem_map array.
494
*/
495
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
496
struct vmem_altmap *altmap)
497
{
498
int ret;
499
500
mutex_lock(&vmem_mutex);
501
/* We don't care about the node, just use NUMA_NO_NODE on allocations */
502
ret = add_pagetable(start, end, false, altmap);
503
if (ret)
504
remove_pagetable(start, end, false, altmap);
505
mutex_unlock(&vmem_mutex);
506
return ret;
507
}
508
509
#ifdef CONFIG_MEMORY_HOTPLUG
510
511
void vmemmap_free(unsigned long start, unsigned long end,
512
struct vmem_altmap *altmap)
513
{
514
mutex_lock(&vmem_mutex);
515
remove_pagetable(start, end, false, altmap);
516
mutex_unlock(&vmem_mutex);
517
}
518
519
#endif
520
521
void vmem_remove_mapping(unsigned long start, unsigned long size)
522
{
523
mutex_lock(&vmem_mutex);
524
vmem_remove_range(start, size);
525
mutex_unlock(&vmem_mutex);
526
}
527
528
struct range arch_get_mappable_range(void)
529
{
530
struct range mhp_range;
531
532
mhp_range.start = 0;
533
mhp_range.end = max_mappable - 1;
534
return mhp_range;
535
}
536
537
int vmem_add_mapping(unsigned long start, unsigned long size)
538
{
539
struct range range = arch_get_mappable_range();
540
int ret;
541
542
if (start < range.start ||
543
start + size > range.end + 1 ||
544
start + size < start)
545
return -ERANGE;
546
547
mutex_lock(&vmem_mutex);
548
ret = vmem_add_range(start, size);
549
if (ret)
550
vmem_remove_range(start, size);
551
mutex_unlock(&vmem_mutex);
552
return ret;
553
}
554
555
/*
556
* Allocate new or return existing page-table entry, but do not map it
557
* to any physical address. If missing, allocate segment- and region-
558
* table entries along. Meeting a large segment- or region-table entry
559
* while traversing is an error, since the function is expected to be
560
* called against virtual regions reserved for 4KB mappings only.
561
*/
562
pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
563
{
564
pte_t *ptep = NULL;
565
pgd_t *pgd;
566
p4d_t *p4d;
567
pud_t *pud;
568
pmd_t *pmd;
569
pte_t *pte;
570
571
pgd = pgd_offset_k(addr);
572
if (pgd_none(*pgd)) {
573
if (!alloc)
574
goto out;
575
p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
576
if (!p4d)
577
goto out;
578
pgd_populate(&init_mm, pgd, p4d);
579
}
580
p4d = p4d_offset(pgd, addr);
581
if (p4d_none(*p4d)) {
582
if (!alloc)
583
goto out;
584
pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
585
if (!pud)
586
goto out;
587
p4d_populate(&init_mm, p4d, pud);
588
}
589
pud = pud_offset(p4d, addr);
590
if (pud_none(*pud)) {
591
if (!alloc)
592
goto out;
593
pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
594
if (!pmd)
595
goto out;
596
pud_populate(&init_mm, pud, pmd);
597
} else if (WARN_ON_ONCE(pud_leaf(*pud))) {
598
goto out;
599
}
600
pmd = pmd_offset(pud, addr);
601
if (pmd_none(*pmd)) {
602
if (!alloc)
603
goto out;
604
pte = vmem_pte_alloc();
605
if (!pte)
606
goto out;
607
pmd_populate(&init_mm, pmd, pte);
608
} else if (WARN_ON_ONCE(pmd_leaf(*pmd))) {
609
goto out;
610
}
611
ptep = pte_offset_kernel(pmd, addr);
612
out:
613
return ptep;
614
}
615
616
int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
617
{
618
pte_t *ptep, pte;
619
620
if (!IS_ALIGNED(addr, PAGE_SIZE))
621
return -EINVAL;
622
ptep = vmem_get_alloc_pte(addr, alloc);
623
if (!ptep)
624
return -ENOMEM;
625
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
626
pte = mk_pte_phys(phys, prot);
627
set_pte(ptep, pte);
628
return 0;
629
}
630
631
int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
632
{
633
int rc;
634
635
mutex_lock(&vmem_mutex);
636
rc = __vmem_map_4k_page(addr, phys, prot, true);
637
mutex_unlock(&vmem_mutex);
638
return rc;
639
}
640
641
void vmem_unmap_4k_page(unsigned long addr)
642
{
643
pte_t *ptep;
644
645
mutex_lock(&vmem_mutex);
646
ptep = virt_to_kpte(addr);
647
__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
648
pte_clear(&init_mm, addr, ptep);
649
mutex_unlock(&vmem_mutex);
650
}
651
652
void __init vmem_map_init(void)
653
{
654
__set_memory_rox(_stext, _etext);
655
__set_memory_ro(_etext, __end_rodata);
656
__set_memory_rox(__stext_amode31, __etext_amode31);
657
/*
658
* If the BEAR-enhancement facility is not installed the first
659
* prefix page is used to return to the previous context with
660
* an LPSWE instruction and therefore must be executable.
661
*/
662
if (!cpu_has_bear())
663
set_memory_x(0, 1);
664
if (debug_pagealloc_enabled())
665
__set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
666
pr_info("Write protected kernel read-only data: %luk\n",
667
(unsigned long)(__end_rodata - _stext) >> 10);
668
}
669
670