Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/mm/init.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2012 Regents of the University of California
4
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
5
* Copyright (C) 2020 FORTH-ICS/CARV
6
* Nick Kossifidis <[email protected]>
7
*/
8
9
#include <linux/init.h>
10
#include <linux/mm.h>
11
#include <linux/memblock.h>
12
#include <linux/initrd.h>
13
#include <linux/swap.h>
14
#include <linux/swiotlb.h>
15
#include <linux/sizes.h>
16
#include <linux/of_fdt.h>
17
#include <linux/of_reserved_mem.h>
18
#include <linux/libfdt.h>
19
#include <linux/set_memory.h>
20
#include <linux/dma-map-ops.h>
21
#include <linux/crash_dump.h>
22
#include <linux/hugetlb.h>
23
#include <linux/kfence.h>
24
#include <linux/execmem.h>
25
26
#include <asm/fixmap.h>
27
#include <asm/io.h>
28
#include <asm/kasan.h>
29
#include <asm/module.h>
30
#include <asm/numa.h>
31
#include <asm/pgtable.h>
32
#include <asm/sections.h>
33
#include <asm/soc.h>
34
#include <asm/sparsemem.h>
35
#include <asm/tlbflush.h>
36
37
#include "../kernel/head.h"
38
39
u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1];
40
41
struct kernel_mapping kernel_map __ro_after_init;
42
EXPORT_SYMBOL(kernel_map);
43
#ifdef CONFIG_XIP_KERNEL
44
#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
45
#endif
46
47
#ifdef CONFIG_64BIT
48
u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
49
#else
50
u64 satp_mode __ro_after_init = SATP_MODE_32;
51
#endif
52
EXPORT_SYMBOL(satp_mode);
53
54
#ifdef CONFIG_64BIT
55
bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL);
56
bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL);
57
EXPORT_SYMBOL(pgtable_l4_enabled);
58
EXPORT_SYMBOL(pgtable_l5_enabled);
59
#endif
60
61
phys_addr_t phys_ram_base __ro_after_init;
62
EXPORT_SYMBOL(phys_ram_base);
63
64
#ifdef CONFIG_SPARSEMEM_VMEMMAP
65
#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
66
67
unsigned long vmemmap_start_pfn __ro_after_init;
68
EXPORT_SYMBOL(vmemmap_start_pfn);
69
#endif
70
71
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
72
__page_aligned_bss;
73
EXPORT_SYMBOL(empty_zero_page);
74
75
extern char _start[];
76
void *_dtb_early_va __initdata;
77
uintptr_t _dtb_early_pa __initdata;
78
79
phys_addr_t dma32_phys_limit __initdata;
80
81
static void __init zone_sizes_init(void)
82
{
83
unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
84
85
#ifdef CONFIG_ZONE_DMA32
86
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
87
#endif
88
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
89
90
free_area_init(max_zone_pfns);
91
}
92
93
#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
94
95
#define LOG2_SZ_1K ilog2(SZ_1K)
96
#define LOG2_SZ_1M ilog2(SZ_1M)
97
#define LOG2_SZ_1G ilog2(SZ_1G)
98
#define LOG2_SZ_1T ilog2(SZ_1T)
99
100
static inline void print_mlk(char *name, unsigned long b, unsigned long t)
101
{
102
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t,
103
(((t) - (b)) >> LOG2_SZ_1K));
104
}
105
106
static inline void print_mlm(char *name, unsigned long b, unsigned long t)
107
{
108
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t,
109
(((t) - (b)) >> LOG2_SZ_1M));
110
}
111
112
static inline void print_mlg(char *name, unsigned long b, unsigned long t)
113
{
114
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t,
115
(((t) - (b)) >> LOG2_SZ_1G));
116
}
117
118
#ifdef CONFIG_64BIT
119
static inline void print_mlt(char *name, unsigned long b, unsigned long t)
120
{
121
pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t,
122
(((t) - (b)) >> LOG2_SZ_1T));
123
}
124
#else
125
#define print_mlt(n, b, t) do {} while (0)
126
#endif
127
128
static inline void print_ml(char *name, unsigned long b, unsigned long t)
129
{
130
unsigned long diff = t - b;
131
132
if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10)
133
print_mlt(name, b, t);
134
else if ((diff >> LOG2_SZ_1G) >= 10)
135
print_mlg(name, b, t);
136
else if ((diff >> LOG2_SZ_1M) >= 10)
137
print_mlm(name, b, t);
138
else
139
print_mlk(name, b, t);
140
}
141
142
static void __init print_vm_layout(void)
143
{
144
pr_notice("Virtual kernel memory layout:\n");
145
print_ml("fixmap", (unsigned long)FIXADDR_START,
146
(unsigned long)FIXADDR_TOP);
147
print_ml("pci io", (unsigned long)PCI_IO_START,
148
(unsigned long)PCI_IO_END);
149
print_ml("vmemmap", (unsigned long)VMEMMAP_START,
150
(unsigned long)VMEMMAP_END);
151
print_ml("vmalloc", (unsigned long)VMALLOC_START,
152
(unsigned long)VMALLOC_END);
153
#ifdef CONFIG_64BIT
154
print_ml("modules", (unsigned long)MODULES_VADDR,
155
(unsigned long)MODULES_END);
156
#endif
157
print_ml("lowmem", (unsigned long)PAGE_OFFSET,
158
(unsigned long)high_memory);
159
if (IS_ENABLED(CONFIG_64BIT)) {
160
#ifdef CONFIG_KASAN
161
print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
162
#endif
163
164
print_ml("kernel", (unsigned long)kernel_map.virt_addr,
165
(unsigned long)ADDRESS_SPACE_END);
166
}
167
}
168
#else
169
static void print_vm_layout(void) { }
170
#endif /* CONFIG_DEBUG_VM */
171
172
void __init arch_mm_preinit(void)
173
{
174
bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit);
175
#ifdef CONFIG_FLATMEM
176
BUG_ON(!mem_map);
177
#endif /* CONFIG_FLATMEM */
178
179
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb &&
180
dma_cache_alignment != 1) {
181
/*
182
* If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb
183
* buffer per 1GB of RAM for kmalloc() bouncing on
184
* non-coherent platforms.
185
*/
186
unsigned long size =
187
DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
188
swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
189
swiotlb = true;
190
}
191
192
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
193
194
print_vm_layout();
195
}
196
197
/* Limit the memory size via mem. */
198
static phys_addr_t memory_limit;
199
#ifdef CONFIG_XIP_KERNEL
200
#define memory_limit (*(phys_addr_t *)XIP_FIXUP(&memory_limit))
201
#endif /* CONFIG_XIP_KERNEL */
202
203
static int __init early_mem(char *p)
204
{
205
u64 size;
206
207
if (!p)
208
return 1;
209
210
size = memparse(p, &p) & PAGE_MASK;
211
memory_limit = min_t(u64, size, memory_limit);
212
213
pr_notice("Memory limited to %lldMB\n", (u64)memory_limit >> 20);
214
215
return 0;
216
}
217
early_param("mem", early_mem);
218
219
static void __init setup_bootmem(void)
220
{
221
phys_addr_t vmlinux_end = __pa_symbol(&_end);
222
phys_addr_t max_mapped_addr;
223
phys_addr_t phys_ram_end, vmlinux_start;
224
225
if (IS_ENABLED(CONFIG_XIP_KERNEL))
226
vmlinux_start = __pa_symbol(&_sdata);
227
else
228
vmlinux_start = __pa_symbol(&_start);
229
230
memblock_enforce_memory_limit(memory_limit);
231
232
/*
233
* Make sure we align the reservation on PMD_SIZE since we will
234
* map the kernel in the linear mapping as read-only: we do not want
235
* any allocation to happen between _end and the next pmd aligned page.
236
*/
237
if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
238
vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
239
/*
240
* Reserve from the start of the kernel to the end of the kernel
241
*/
242
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
243
244
/*
245
* Make sure we align the start of the memory on a PMD boundary so that
246
* at worst, we map the linear mapping with PMD mappings.
247
*/
248
if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
249
phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
250
#ifdef CONFIG_SPARSEMEM_VMEMMAP
251
vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
252
#endif
253
}
254
255
/*
256
* In 64-bit, any use of __va/__pa before this point is wrong as we
257
* did not know the start of DRAM before.
258
*/
259
if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU))
260
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
261
262
/*
263
* The size of the linear page mapping may restrict the amount of
264
* usable RAM.
265
*/
266
if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) {
267
max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE;
268
if (memblock_end_of_DRAM() > max_mapped_addr) {
269
memblock_cap_memory_range(phys_ram_base,
270
max_mapped_addr - phys_ram_base);
271
pr_warn("Physical memory overflows the linear mapping size: region above %pa removed",
272
&max_mapped_addr);
273
}
274
}
275
276
/*
277
* Reserve physical address space that would be mapped to virtual
278
* addresses greater than (void *)(-PAGE_SIZE) because:
279
* - This memory would overlap with ERR_PTR
280
* - This memory belongs to high memory, which is not supported
281
*
282
* This is not applicable to 64-bit kernel, because virtual addresses
283
* after (void *)(-PAGE_SIZE) are not linearly mapped: they are
284
* occupied by kernel mapping. Also it is unrealistic for high memory
285
* to exist on 64-bit platforms.
286
*/
287
if (!IS_ENABLED(CONFIG_64BIT)) {
288
max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
289
memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
290
}
291
292
phys_ram_end = memblock_end_of_DRAM();
293
min_low_pfn = PFN_UP(phys_ram_base);
294
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
295
296
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
297
298
reserve_initrd_mem();
299
300
/*
301
* No allocation should be done before reserving the memory as defined
302
* in the device tree, otherwise the allocation could end up in a
303
* reserved region.
304
*/
305
early_init_fdt_scan_reserved_mem();
306
307
/*
308
* If DTB is built in, no need to reserve its memblock.
309
* Otherwise, do reserve it but avoid using
310
* early_init_fdt_reserve_self() since __pa() does
311
* not work for DTB pointers that are fixmap addresses
312
*/
313
if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
314
memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
315
316
dma_contiguous_reserve(dma32_phys_limit);
317
if (IS_ENABLED(CONFIG_64BIT))
318
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
319
}
320
321
#ifdef CONFIG_RELOCATABLE
322
extern unsigned long __rela_dyn_start, __rela_dyn_end;
323
324
static void __init relocate_kernel(void)
325
{
326
Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start;
327
/*
328
* This holds the offset between the linked virtual address and the
329
* relocated virtual address.
330
*/
331
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
332
/*
333
* This holds the offset between kernel linked virtual address and
334
* physical address.
335
*/
336
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
337
338
for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) {
339
Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
340
Elf_Addr relocated_addr = rela->r_addend;
341
342
if (rela->r_info != R_RISCV_RELATIVE)
343
continue;
344
345
/*
346
* Make sure to not relocate vdso symbols like rt_sigreturn
347
* which are linked from the address 0 in vmlinux since
348
* vdso symbol addresses are actually used as an offset from
349
* mm->context.vdso in VDSO_OFFSET macro.
350
*/
351
if (relocated_addr >= KERNEL_LINK_ADDR)
352
relocated_addr += reloc_offset;
353
354
*(Elf_Addr *)addr = relocated_addr;
355
}
356
}
357
#endif /* CONFIG_RELOCATABLE */
358
359
#ifdef CONFIG_MMU
360
struct pt_alloc_ops pt_ops __meminitdata;
361
362
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
363
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
364
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
365
366
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
367
368
#ifdef CONFIG_XIP_KERNEL
369
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
370
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
371
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
372
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
373
#endif /* CONFIG_XIP_KERNEL */
374
375
static const pgprot_t protection_map[16] = {
376
[VM_NONE] = PAGE_NONE,
377
[VM_READ] = PAGE_READ,
378
[VM_WRITE] = PAGE_COPY,
379
[VM_WRITE | VM_READ] = PAGE_COPY,
380
[VM_EXEC] = PAGE_EXEC,
381
[VM_EXEC | VM_READ] = PAGE_READ_EXEC,
382
[VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC,
383
[VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC,
384
[VM_SHARED] = PAGE_NONE,
385
[VM_SHARED | VM_READ] = PAGE_READ,
386
[VM_SHARED | VM_WRITE] = PAGE_SHARED,
387
[VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
388
[VM_SHARED | VM_EXEC] = PAGE_EXEC,
389
[VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC,
390
[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC,
391
[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC
392
};
393
DECLARE_VM_GET_PAGE_PROT
394
395
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
396
{
397
unsigned long addr = __fix_to_virt(idx);
398
pte_t *ptep;
399
400
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
401
402
ptep = &fixmap_pte[pte_index(addr)];
403
404
if (pgprot_val(prot))
405
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
406
else
407
pte_clear(&init_mm, addr, ptep);
408
local_flush_tlb_page(addr);
409
}
410
411
static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
412
{
413
return (pte_t *)((uintptr_t)pa);
414
}
415
416
static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
417
{
418
clear_fixmap(FIX_PTE);
419
return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
420
}
421
422
static inline pte_t *__meminit get_pte_virt_late(phys_addr_t pa)
423
{
424
return (pte_t *) __va(pa);
425
}
426
427
static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
428
{
429
/*
430
* We only create PMD or PGD early mappings so we
431
* should never reach here with MMU disabled.
432
*/
433
BUG();
434
}
435
436
static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
437
{
438
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
439
}
440
441
static phys_addr_t __meminit alloc_pte_late(uintptr_t va)
442
{
443
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
444
445
/*
446
* We do not know which mm the PTE page is associated to at this point.
447
* Passing NULL to the ctor is the safe option, though it may result
448
* in unnecessary work (e.g. initialising the ptlock for init_mm).
449
*/
450
BUG_ON(!ptdesc || !pagetable_pte_ctor(NULL, ptdesc));
451
return __pa((pte_t *)ptdesc_address(ptdesc));
452
}
453
454
static void __meminit create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz,
455
pgprot_t prot)
456
{
457
uintptr_t pte_idx = pte_index(va);
458
459
BUG_ON(sz != PAGE_SIZE);
460
461
if (pte_none(ptep[pte_idx]))
462
ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
463
}
464
465
#ifndef __PAGETABLE_PMD_FOLDED
466
467
static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
468
static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
469
static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
470
471
#ifdef CONFIG_XIP_KERNEL
472
#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
473
#define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd))
474
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
475
#endif /* CONFIG_XIP_KERNEL */
476
477
static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
478
static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
479
static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
480
481
#ifdef CONFIG_XIP_KERNEL
482
#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
483
#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d))
484
#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d))
485
#endif /* CONFIG_XIP_KERNEL */
486
487
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
488
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
489
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
490
491
#ifdef CONFIG_XIP_KERNEL
492
#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
493
#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
494
#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
495
#endif /* CONFIG_XIP_KERNEL */
496
497
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
498
{
499
/* Before MMU is enabled */
500
return (pmd_t *)((uintptr_t)pa);
501
}
502
503
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
504
{
505
clear_fixmap(FIX_PMD);
506
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
507
}
508
509
static pmd_t *__meminit get_pmd_virt_late(phys_addr_t pa)
510
{
511
return (pmd_t *) __va(pa);
512
}
513
514
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
515
{
516
BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
517
518
return (uintptr_t)early_pmd;
519
}
520
521
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
522
{
523
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
524
}
525
526
static phys_addr_t __meminit alloc_pmd_late(uintptr_t va)
527
{
528
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
529
530
/* See comment in alloc_pte_late() regarding NULL passed the ctor */
531
BUG_ON(!ptdesc || !pagetable_pmd_ctor(NULL, ptdesc));
532
return __pa((pmd_t *)ptdesc_address(ptdesc));
533
}
534
535
static void __meminit create_pmd_mapping(pmd_t *pmdp,
536
uintptr_t va, phys_addr_t pa,
537
phys_addr_t sz, pgprot_t prot)
538
{
539
pte_t *ptep;
540
phys_addr_t pte_phys;
541
uintptr_t pmd_idx = pmd_index(va);
542
543
if (sz == PMD_SIZE) {
544
if (pmd_none(pmdp[pmd_idx]))
545
pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
546
return;
547
}
548
549
if (pmd_none(pmdp[pmd_idx])) {
550
pte_phys = pt_ops.alloc_pte(va);
551
pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
552
ptep = pt_ops.get_pte_virt(pte_phys);
553
memset(ptep, 0, PAGE_SIZE);
554
} else {
555
pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
556
ptep = pt_ops.get_pte_virt(pte_phys);
557
}
558
559
create_pte_mapping(ptep, va, pa, sz, prot);
560
}
561
562
static pud_t *__init get_pud_virt_early(phys_addr_t pa)
563
{
564
return (pud_t *)((uintptr_t)pa);
565
}
566
567
static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
568
{
569
clear_fixmap(FIX_PUD);
570
return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
571
}
572
573
static pud_t *__meminit get_pud_virt_late(phys_addr_t pa)
574
{
575
return (pud_t *)__va(pa);
576
}
577
578
static phys_addr_t __init alloc_pud_early(uintptr_t va)
579
{
580
/* Only one PUD is available for early mapping */
581
BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
582
583
return (uintptr_t)early_pud;
584
}
585
586
static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
587
{
588
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
589
}
590
591
static phys_addr_t __meminit alloc_pud_late(uintptr_t va)
592
{
593
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0);
594
595
BUG_ON(!ptdesc);
596
pagetable_pud_ctor(ptdesc);
597
return __pa((pud_t *)ptdesc_address(ptdesc));
598
}
599
600
static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
601
{
602
return (p4d_t *)((uintptr_t)pa);
603
}
604
605
static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
606
{
607
clear_fixmap(FIX_P4D);
608
return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
609
}
610
611
static p4d_t *__meminit get_p4d_virt_late(phys_addr_t pa)
612
{
613
return (p4d_t *)__va(pa);
614
}
615
616
static phys_addr_t __init alloc_p4d_early(uintptr_t va)
617
{
618
/* Only one P4D is available for early mapping */
619
BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
620
621
return (uintptr_t)early_p4d;
622
}
623
624
static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
625
{
626
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
627
}
628
629
static phys_addr_t __meminit alloc_p4d_late(uintptr_t va)
630
{
631
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0);
632
633
BUG_ON(!ptdesc);
634
pagetable_p4d_ctor(ptdesc);
635
return __pa((p4d_t *)ptdesc_address(ptdesc));
636
}
637
638
static void __meminit create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t pa, phys_addr_t sz,
639
pgprot_t prot)
640
{
641
pmd_t *nextp;
642
phys_addr_t next_phys;
643
uintptr_t pud_index = pud_index(va);
644
645
if (sz == PUD_SIZE) {
646
if (pud_val(pudp[pud_index]) == 0)
647
pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
648
return;
649
}
650
651
if (pud_val(pudp[pud_index]) == 0) {
652
next_phys = pt_ops.alloc_pmd(va);
653
pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
654
nextp = pt_ops.get_pmd_virt(next_phys);
655
memset(nextp, 0, PAGE_SIZE);
656
} else {
657
next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
658
nextp = pt_ops.get_pmd_virt(next_phys);
659
}
660
661
create_pmd_mapping(nextp, va, pa, sz, prot);
662
}
663
664
static void __meminit create_p4d_mapping(p4d_t *p4dp, uintptr_t va, phys_addr_t pa, phys_addr_t sz,
665
pgprot_t prot)
666
{
667
pud_t *nextp;
668
phys_addr_t next_phys;
669
uintptr_t p4d_index = p4d_index(va);
670
671
if (sz == P4D_SIZE) {
672
if (p4d_val(p4dp[p4d_index]) == 0)
673
p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
674
return;
675
}
676
677
if (p4d_val(p4dp[p4d_index]) == 0) {
678
next_phys = pt_ops.alloc_pud(va);
679
p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
680
nextp = pt_ops.get_pud_virt(next_phys);
681
memset(nextp, 0, PAGE_SIZE);
682
} else {
683
next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
684
nextp = pt_ops.get_pud_virt(next_phys);
685
}
686
687
create_pud_mapping(nextp, va, pa, sz, prot);
688
}
689
690
#define pgd_next_t p4d_t
691
#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \
692
pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \
693
pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
694
#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \
695
pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \
696
pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
697
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
698
(pgtable_l5_enabled ? \
699
create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
700
(pgtable_l4_enabled ? \
701
create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \
702
create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
703
#define fixmap_pgd_next (pgtable_l5_enabled ? \
704
(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \
705
(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
706
#define trampoline_pgd_next (pgtable_l5_enabled ? \
707
(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
708
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
709
#else
710
#define pgd_next_t pte_t
711
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
712
#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
713
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
714
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
715
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
716
#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
717
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
718
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
719
#endif /* __PAGETABLE_PMD_FOLDED */
720
721
void __meminit create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz,
722
pgprot_t prot)
723
{
724
pgd_next_t *nextp;
725
phys_addr_t next_phys;
726
uintptr_t pgd_idx = pgd_index(va);
727
728
if (sz == PGDIR_SIZE) {
729
if (pgd_val(pgdp[pgd_idx]) == 0)
730
pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
731
return;
732
}
733
734
if (pgd_val(pgdp[pgd_idx]) == 0) {
735
next_phys = alloc_pgd_next(va);
736
pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
737
nextp = get_pgd_next_virt(next_phys);
738
memset(nextp, 0, PAGE_SIZE);
739
} else {
740
next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
741
nextp = get_pgd_next_virt(next_phys);
742
}
743
744
create_pgd_next_mapping(nextp, va, pa, sz, prot);
745
}
746
747
static uintptr_t __meminit best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size)
748
{
749
if (debug_pagealloc_enabled())
750
return PAGE_SIZE;
751
752
if (pgtable_l5_enabled &&
753
!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
754
return P4D_SIZE;
755
756
if (pgtable_l4_enabled &&
757
!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
758
return PUD_SIZE;
759
760
if (IS_ENABLED(CONFIG_64BIT) &&
761
!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
762
return PMD_SIZE;
763
764
return PAGE_SIZE;
765
}
766
767
#ifdef CONFIG_XIP_KERNEL
768
#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
769
extern char _xiprom[], _exiprom[], __data_loc;
770
771
/* called from head.S with MMU off */
772
asmlinkage void __init __copy_data(void)
773
{
774
void *from = (void *)(&__data_loc);
775
void *to = (void *)CONFIG_PHYS_RAM_BASE;
776
size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
777
778
memcpy(to, from, sz);
779
}
780
#endif
781
782
#ifdef CONFIG_STRICT_KERNEL_RWX
783
static __meminit pgprot_t pgprot_from_va(uintptr_t va)
784
{
785
if (is_va_kernel_text(va))
786
return PAGE_KERNEL_READ_EXEC;
787
788
/*
789
* In 64-bit kernel, the kernel mapping is outside the linear mapping so
790
* we must protect its linear mapping alias from being executed and
791
* written.
792
* And rodata section is marked readonly in mark_rodata_ro.
793
*/
794
if (IS_ENABLED(CONFIG_64BIT) && is_va_kernel_lm_alias_text(va))
795
return PAGE_KERNEL_READ;
796
797
return PAGE_KERNEL;
798
}
799
800
void mark_rodata_ro(void)
801
{
802
set_kernel_memory(__start_rodata, _data, set_memory_ro);
803
if (IS_ENABLED(CONFIG_64BIT))
804
set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
805
set_memory_ro);
806
}
807
#else
808
static __meminit pgprot_t pgprot_from_va(uintptr_t va)
809
{
810
if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va))
811
return PAGE_KERNEL;
812
813
return PAGE_KERNEL_EXEC;
814
}
815
#endif /* CONFIG_STRICT_KERNEL_RWX */
816
817
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
818
u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa);
819
820
static void __init disable_pgtable_l5(void)
821
{
822
pgtable_l5_enabled = false;
823
kernel_map.page_offset = PAGE_OFFSET_L4;
824
satp_mode = SATP_MODE_48;
825
}
826
827
static void __init disable_pgtable_l4(void)
828
{
829
pgtable_l4_enabled = false;
830
kernel_map.page_offset = PAGE_OFFSET_L3;
831
satp_mode = SATP_MODE_39;
832
}
833
834
static int __init print_no4lvl(char *p)
835
{
836
pr_info("Disabled 4-level and 5-level paging");
837
return 0;
838
}
839
early_param("no4lvl", print_no4lvl);
840
841
static int __init print_no5lvl(char *p)
842
{
843
pr_info("Disabled 5-level paging");
844
return 0;
845
}
846
early_param("no5lvl", print_no5lvl);
847
848
static void __init set_mmap_rnd_bits_max(void)
849
{
850
mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
851
}
852
853
/*
854
* There is a simple way to determine if 4-level is supported by the
855
* underlying hardware: establish 1:1 mapping in 4-level page table mode
856
* then read SATP to see if the configuration was taken into account
857
* meaning sv48 is supported.
858
*/
859
static __init void set_satp_mode(uintptr_t dtb_pa)
860
{
861
u64 identity_satp, hw_satp;
862
uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
863
u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
864
865
kernel_map.page_offset = PAGE_OFFSET_L5;
866
867
if (satp_mode_cmdline == SATP_MODE_57) {
868
disable_pgtable_l5();
869
} else if (satp_mode_cmdline == SATP_MODE_48) {
870
disable_pgtable_l5();
871
disable_pgtable_l4();
872
return;
873
}
874
875
create_p4d_mapping(early_p4d,
876
set_satp_mode_pmd, (uintptr_t)early_pud,
877
P4D_SIZE, PAGE_TABLE);
878
create_pud_mapping(early_pud,
879
set_satp_mode_pmd, (uintptr_t)early_pmd,
880
PUD_SIZE, PAGE_TABLE);
881
/* Handle the case where set_satp_mode straddles 2 PMDs */
882
create_pmd_mapping(early_pmd,
883
set_satp_mode_pmd, set_satp_mode_pmd,
884
PMD_SIZE, PAGE_KERNEL_EXEC);
885
create_pmd_mapping(early_pmd,
886
set_satp_mode_pmd + PMD_SIZE,
887
set_satp_mode_pmd + PMD_SIZE,
888
PMD_SIZE, PAGE_KERNEL_EXEC);
889
retry:
890
create_pgd_mapping(early_pg_dir,
891
set_satp_mode_pmd,
892
pgtable_l5_enabled ?
893
(uintptr_t)early_p4d : (uintptr_t)early_pud,
894
PGDIR_SIZE, PAGE_TABLE);
895
896
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
897
898
local_flush_tlb_all();
899
csr_write(CSR_SATP, identity_satp);
900
hw_satp = csr_swap(CSR_SATP, 0ULL);
901
local_flush_tlb_all();
902
903
if (hw_satp != identity_satp) {
904
if (pgtable_l5_enabled) {
905
disable_pgtable_l5();
906
memset(early_pg_dir, 0, PAGE_SIZE);
907
goto retry;
908
}
909
disable_pgtable_l4();
910
}
911
912
memset(early_pg_dir, 0, PAGE_SIZE);
913
memset(early_p4d, 0, PAGE_SIZE);
914
memset(early_pud, 0, PAGE_SIZE);
915
memset(early_pmd, 0, PAGE_SIZE);
916
}
917
#endif
918
919
/*
920
* setup_vm() is called from head.S with MMU-off.
921
*
922
* Following requirements should be honoured for setup_vm() to work
923
* correctly:
924
* 1) It should use PC-relative addressing for accessing kernel symbols.
925
* To achieve this we always use GCC cmodel=medany.
926
* 2) The compiler instrumentation for FTRACE will not work for setup_vm()
927
* so disable compiler instrumentation when FTRACE is enabled.
928
*
929
* Currently, the above requirements are honoured by using custom CFLAGS
930
* for init.o in mm/Makefile.
931
*/
932
933
#ifndef __riscv_cmodel_medany
934
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
935
#endif
936
937
#ifdef CONFIG_XIP_KERNEL
938
static void __init create_kernel_page_table(pgd_t *pgdir,
939
__always_unused bool early)
940
{
941
uintptr_t va, start_va, end_va;
942
943
/* Map the flash resident part */
944
end_va = kernel_map.virt_addr + kernel_map.xiprom_sz;
945
for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
946
create_pgd_mapping(pgdir, va,
947
kernel_map.xiprom + (va - kernel_map.virt_addr),
948
PMD_SIZE, PAGE_KERNEL_EXEC);
949
950
/* Map the data in RAM */
951
start_va = kernel_map.virt_addr + (uintptr_t)&_sdata - (uintptr_t)&_start;
952
end_va = kernel_map.virt_addr + kernel_map.size;
953
for (va = start_va; va < end_va; va += PMD_SIZE)
954
create_pgd_mapping(pgdir, va,
955
kernel_map.phys_addr + (va - start_va),
956
PMD_SIZE, PAGE_KERNEL);
957
}
958
#else
959
static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
960
{
961
uintptr_t va, end_va;
962
963
end_va = kernel_map.virt_addr + kernel_map.size;
964
for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE)
965
create_pgd_mapping(pgdir, va,
966
kernel_map.phys_addr + (va - kernel_map.virt_addr),
967
PMD_SIZE,
968
early ?
969
PAGE_KERNEL_EXEC : pgprot_from_va(va));
970
}
971
#endif
972
973
/*
974
* Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel,
975
* this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
976
* entry.
977
*/
978
static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
979
uintptr_t dtb_pa)
980
{
981
#ifndef CONFIG_BUILTIN_DTB
982
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
983
984
/* Make sure the fdt fixmap address is always aligned on PMD size */
985
BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
986
987
/* In 32-bit only, the fdt lies in its own PGD */
988
if (!IS_ENABLED(CONFIG_64BIT)) {
989
create_pgd_mapping(early_pg_dir, fix_fdt_va,
990
pa, MAX_FDT_SIZE, PAGE_KERNEL);
991
} else {
992
create_pmd_mapping(fixmap_pmd, fix_fdt_va,
993
pa, PMD_SIZE, PAGE_KERNEL);
994
create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
995
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
996
}
997
998
dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
999
#else
1000
/*
1001
* For 64-bit kernel, __va can't be used since it would return a linear
1002
* mapping address whereas dtb_early_va will be used before
1003
* setup_vm_final installs the linear mapping. For 32-bit kernel, as the
1004
* kernel is mapped in the linear mapping, that makes no difference.
1005
*/
1006
dtb_early_va = kernel_mapping_pa_to_va(dtb_pa);
1007
#endif
1008
1009
dtb_early_pa = dtb_pa;
1010
}
1011
1012
/*
1013
* MMU is not enabled, the page tables are allocated directly using
1014
* early_pmd/pud/p4d and the address returned is the physical one.
1015
*/
1016
static void __init pt_ops_set_early(void)
1017
{
1018
pt_ops.alloc_pte = alloc_pte_early;
1019
pt_ops.get_pte_virt = get_pte_virt_early;
1020
#ifndef __PAGETABLE_PMD_FOLDED
1021
pt_ops.alloc_pmd = alloc_pmd_early;
1022
pt_ops.get_pmd_virt = get_pmd_virt_early;
1023
pt_ops.alloc_pud = alloc_pud_early;
1024
pt_ops.get_pud_virt = get_pud_virt_early;
1025
pt_ops.alloc_p4d = alloc_p4d_early;
1026
pt_ops.get_p4d_virt = get_p4d_virt_early;
1027
#endif
1028
}
1029
1030
/*
1031
* MMU is enabled but page table setup is not complete yet.
1032
* fixmap page table alloc functions must be used as a means to temporarily
1033
* map the allocated physical pages since the linear mapping does not exist yet.
1034
*
1035
* Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
1036
* but it will be used as described above.
1037
*/
1038
static void __init pt_ops_set_fixmap(void)
1039
{
1040
pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap);
1041
pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap);
1042
#ifndef __PAGETABLE_PMD_FOLDED
1043
pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap);
1044
pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap);
1045
pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap);
1046
pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap);
1047
pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap);
1048
pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap);
1049
#endif
1050
}
1051
1052
/*
1053
* MMU is enabled and page table setup is complete, so from now, we can use
1054
* generic page allocation functions to setup page table.
1055
*/
1056
static void __init pt_ops_set_late(void)
1057
{
1058
pt_ops.alloc_pte = alloc_pte_late;
1059
pt_ops.get_pte_virt = get_pte_virt_late;
1060
#ifndef __PAGETABLE_PMD_FOLDED
1061
pt_ops.alloc_pmd = alloc_pmd_late;
1062
pt_ops.get_pmd_virt = get_pmd_virt_late;
1063
pt_ops.alloc_pud = alloc_pud_late;
1064
pt_ops.get_pud_virt = get_pud_virt_late;
1065
pt_ops.alloc_p4d = alloc_p4d_late;
1066
pt_ops.get_p4d_virt = get_p4d_virt_late;
1067
#endif
1068
}
1069
1070
#ifdef CONFIG_RANDOMIZE_BASE
1071
extern bool __init __pi_set_nokaslr_from_cmdline(uintptr_t dtb_pa);
1072
extern u64 __init __pi_get_kaslr_seed(uintptr_t dtb_pa);
1073
extern u64 __init __pi_get_kaslr_seed_zkr(const uintptr_t dtb_pa);
1074
1075
static int __init print_nokaslr(char *p)
1076
{
1077
pr_info("Disabled KASLR");
1078
return 0;
1079
}
1080
early_param("nokaslr", print_nokaslr);
1081
1082
unsigned long kaslr_offset(void)
1083
{
1084
return kernel_map.virt_offset;
1085
}
1086
#endif
1087
1088
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
1089
{
1090
pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
1091
1092
#ifdef CONFIG_RANDOMIZE_BASE
1093
if (!__pi_set_nokaslr_from_cmdline(dtb_pa)) {
1094
u64 kaslr_seed = __pi_get_kaslr_seed_zkr(dtb_pa);
1095
u32 kernel_size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
1096
u32 nr_pos;
1097
1098
if (kaslr_seed == 0)
1099
kaslr_seed = __pi_get_kaslr_seed(dtb_pa);
1100
/*
1101
* Compute the number of positions available: we are limited
1102
* by the early page table that only has one PUD and we must
1103
* be aligned on PMD_SIZE.
1104
*/
1105
nr_pos = (PUD_SIZE - kernel_size) / PMD_SIZE;
1106
1107
kernel_map.virt_offset = (kaslr_seed % nr_pos) * PMD_SIZE;
1108
}
1109
#endif
1110
1111
kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
1112
1113
#ifdef CONFIG_XIP_KERNEL
1114
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
1115
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
1116
1117
phys_ram_base = CONFIG_PHYS_RAM_BASE;
1118
#ifdef CONFIG_SPARSEMEM_VMEMMAP
1119
vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
1120
#endif
1121
kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
1122
kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
1123
1124
kernel_map.va_kernel_xip_text_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
1125
kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr
1126
+ (uintptr_t)&_sdata - (uintptr_t)&_start;
1127
#else
1128
kernel_map.phys_addr = (uintptr_t)(&_start);
1129
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
1130
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
1131
#endif
1132
1133
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
1134
set_satp_mode(dtb_pa);
1135
set_mmap_rnd_bits_max();
1136
#endif
1137
1138
/*
1139
* In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
1140
* where we have the system memory layout: this allows us to align
1141
* the physical and virtual mappings and then make use of PUD/P4D/PGD
1142
* for the linear mapping. This is only possible because the kernel
1143
* mapping lies outside the linear mapping.
1144
* In 32-bit however, as the kernel resides in the linear mapping,
1145
* setup_vm_final can not change the mapping established here,
1146
* otherwise the same kernel addresses would get mapped to different
1147
* physical addresses (if the start of dram is different from the
1148
* kernel physical address start).
1149
*/
1150
kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
1151
0UL : PAGE_OFFSET - kernel_map.phys_addr;
1152
1153
memory_limit = KERN_VIRT_SIZE;
1154
1155
/* Sanity check alignment and size */
1156
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
1157
BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
1158
1159
#ifdef CONFIG_64BIT
1160
/*
1161
* The last 4K bytes of the addressable memory can not be mapped because
1162
* of IS_ERR_VALUE macro.
1163
*/
1164
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
1165
#endif
1166
1167
#ifdef CONFIG_RELOCATABLE
1168
/*
1169
* Early page table uses only one PUD, which makes it possible
1170
* to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
1171
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
1172
* since a part of the kernel would not get mapped.
1173
*/
1174
if (IS_ENABLED(CONFIG_64BIT))
1175
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
1176
relocate_kernel();
1177
#endif
1178
1179
apply_early_boot_alternatives();
1180
pt_ops_set_early();
1181
1182
/* Setup early PGD for fixmap */
1183
create_pgd_mapping(early_pg_dir, FIXADDR_START,
1184
fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1185
1186
#ifndef __PAGETABLE_PMD_FOLDED
1187
/* Setup fixmap P4D and PUD */
1188
if (pgtable_l5_enabled)
1189
create_p4d_mapping(fixmap_p4d, FIXADDR_START,
1190
(uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
1191
/* Setup fixmap PUD and PMD */
1192
if (pgtable_l4_enabled)
1193
create_pud_mapping(fixmap_pud, FIXADDR_START,
1194
(uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
1195
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
1196
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
1197
/* Setup trampoline PGD and PMD */
1198
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
1199
trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
1200
if (pgtable_l5_enabled)
1201
create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
1202
(uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
1203
if (pgtable_l4_enabled)
1204
create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
1205
(uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
1206
#ifdef CONFIG_XIP_KERNEL
1207
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
1208
kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
1209
#else
1210
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
1211
kernel_map.phys_addr, PMD_SIZE, PAGE_KERNEL_EXEC);
1212
#endif
1213
#else
1214
/* Setup trampoline PGD */
1215
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
1216
kernel_map.phys_addr, PGDIR_SIZE, PAGE_KERNEL_EXEC);
1217
#endif
1218
1219
/*
1220
* Setup early PGD covering entire kernel which will allow
1221
* us to reach paging_init(). We map all memory banks later
1222
* in setup_vm_final() below.
1223
*/
1224
create_kernel_page_table(early_pg_dir, true);
1225
1226
/* Setup early mapping for FDT early scan */
1227
create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa);
1228
1229
/*
1230
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
1231
* range can not span multiple pmds.
1232
*/
1233
BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
1234
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
1235
1236
#ifndef __PAGETABLE_PMD_FOLDED
1237
/*
1238
* Early ioremap fixmap is already created as it lies within first 2MB
1239
* of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
1240
* FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
1241
* the user if not.
1242
*/
1243
fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
1244
fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
1245
if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
1246
WARN_ON(1);
1247
pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
1248
pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
1249
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
1250
fix_to_virt(FIX_BTMAP_BEGIN));
1251
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
1252
fix_to_virt(FIX_BTMAP_END));
1253
1254
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
1255
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
1256
}
1257
#endif
1258
1259
pt_ops_set_fixmap();
1260
}
1261
1262
static void __meminit create_linear_mapping_range(phys_addr_t start, phys_addr_t end,
1263
uintptr_t fixed_map_size, const pgprot_t *pgprot)
1264
{
1265
phys_addr_t pa;
1266
uintptr_t va, map_size;
1267
1268
for (pa = start; pa < end; pa += map_size) {
1269
va = (uintptr_t)__va(pa);
1270
map_size = fixed_map_size ? fixed_map_size :
1271
best_map_size(pa, va, end - pa);
1272
1273
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
1274
pgprot ? *pgprot : pgprot_from_va(va));
1275
}
1276
}
1277
1278
static void __init create_linear_mapping_page_table(void)
1279
{
1280
phys_addr_t start, end;
1281
phys_addr_t kfence_pool __maybe_unused;
1282
u64 i;
1283
1284
#ifdef CONFIG_STRICT_KERNEL_RWX
1285
phys_addr_t ktext_start = __pa_symbol(_start);
1286
phys_addr_t ktext_size = __init_data_begin - _start;
1287
phys_addr_t krodata_start = __pa_symbol(__start_rodata);
1288
phys_addr_t krodata_size = _data - __start_rodata;
1289
1290
/* Isolate kernel text and rodata so they don't get mapped with a PUD */
1291
memblock_mark_nomap(ktext_start, ktext_size);
1292
memblock_mark_nomap(krodata_start, krodata_size);
1293
#endif
1294
1295
#ifdef CONFIG_KFENCE
1296
/*
1297
* kfence pool must be backed by PAGE_SIZE mappings, so allocate it
1298
* before we setup the linear mapping so that we avoid using hugepages
1299
* for this region.
1300
*/
1301
kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
1302
BUG_ON(!kfence_pool);
1303
1304
memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
1305
__kfence_pool = __va(kfence_pool);
1306
#endif
1307
1308
/* Map all memory banks in the linear mapping */
1309
for_each_mem_range(i, &start, &end) {
1310
if (start >= end)
1311
break;
1312
if (start <= __pa(PAGE_OFFSET) &&
1313
__pa(PAGE_OFFSET) < end)
1314
start = __pa(PAGE_OFFSET);
1315
1316
create_linear_mapping_range(start, end, 0, NULL);
1317
}
1318
1319
#ifdef CONFIG_STRICT_KERNEL_RWX
1320
create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0, NULL);
1321
create_linear_mapping_range(krodata_start, krodata_start + krodata_size, 0, NULL);
1322
1323
memblock_clear_nomap(ktext_start, ktext_size);
1324
memblock_clear_nomap(krodata_start, krodata_size);
1325
#endif
1326
1327
#ifdef CONFIG_KFENCE
1328
create_linear_mapping_range(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, PAGE_SIZE, NULL);
1329
1330
memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
1331
#endif
1332
}
1333
1334
static void __init setup_vm_final(void)
1335
{
1336
/* Setup swapper PGD for fixmap */
1337
#if !defined(CONFIG_64BIT)
1338
/*
1339
* In 32-bit, the device tree lies in a pgd entry, so it must be copied
1340
* directly in swapper_pg_dir in addition to the pgd entry that points
1341
* to fixmap_pte.
1342
*/
1343
unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT));
1344
1345
set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
1346
#endif
1347
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1348
__pa_symbol(fixmap_pgd_next),
1349
PGDIR_SIZE, PAGE_TABLE);
1350
1351
/* Map the linear mapping */
1352
create_linear_mapping_page_table();
1353
1354
/* Map the kernel */
1355
if (IS_ENABLED(CONFIG_64BIT))
1356
create_kernel_page_table(swapper_pg_dir, false);
1357
1358
#ifdef CONFIG_KASAN
1359
kasan_swapper_init();
1360
#endif
1361
1362
/* Clear fixmap PTE and PMD mappings */
1363
clear_fixmap(FIX_PTE);
1364
clear_fixmap(FIX_PMD);
1365
clear_fixmap(FIX_PUD);
1366
clear_fixmap(FIX_P4D);
1367
1368
/* Move to swapper page table */
1369
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
1370
local_flush_tlb_all();
1371
1372
pt_ops_set_late();
1373
}
1374
#else
1375
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
1376
{
1377
dtb_early_va = (void *)dtb_pa;
1378
dtb_early_pa = dtb_pa;
1379
1380
#ifdef CONFIG_RELOCATABLE
1381
kernel_map.virt_addr = (uintptr_t)_start;
1382
kernel_map.phys_addr = (uintptr_t)_start;
1383
relocate_kernel();
1384
#endif
1385
}
1386
1387
static inline void setup_vm_final(void)
1388
{
1389
}
1390
#endif /* CONFIG_MMU */
1391
1392
/*
1393
* reserve_crashkernel() - reserves memory for crash kernel
1394
*
1395
* This function reserves memory area given in "crashkernel=" kernel command
1396
* line parameter. The memory reserved is used by dump capture kernel when
1397
* primary kernel is crashing.
1398
*/
1399
static void __init arch_reserve_crashkernel(void)
1400
{
1401
unsigned long long low_size = 0;
1402
unsigned long long crash_base, crash_size;
1403
bool high = false;
1404
int ret;
1405
1406
if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
1407
return;
1408
1409
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
1410
&crash_size, &crash_base,
1411
&low_size, NULL, &high);
1412
if (ret)
1413
return;
1414
1415
reserve_crashkernel_generic(crash_size, crash_base, low_size, high);
1416
}
1417
1418
void __init paging_init(void)
1419
{
1420
setup_bootmem();
1421
setup_vm_final();
1422
1423
/* Depend on that Linear Mapping is ready */
1424
memblock_allow_resize();
1425
}
1426
1427
void __init misc_mem_init(void)
1428
{
1429
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
1430
arch_numa_init();
1431
sparse_init();
1432
#ifdef CONFIG_SPARSEMEM_VMEMMAP
1433
/* The entire VMEMMAP region has been populated. Flush TLB for this region */
1434
local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
1435
#endif
1436
zone_sizes_init();
1437
arch_reserve_crashkernel();
1438
memblock_dump_all();
1439
}
1440
1441
#ifdef CONFIG_SPARSEMEM_VMEMMAP
1442
void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
1443
unsigned long addr, unsigned long next)
1444
{
1445
pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL);
1446
}
1447
1448
int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
1449
unsigned long addr, unsigned long next)
1450
{
1451
vmemmap_verify((pte_t *)pmdp, node, addr, next);
1452
return 1;
1453
}
1454
1455
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
1456
struct vmem_altmap *altmap)
1457
{
1458
/*
1459
* Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we
1460
* can't use hugepage mappings for 2-level page table because in case of
1461
* memory hotplug, we are not able to update all the page tables with
1462
* the new PMDs.
1463
*/
1464
return vmemmap_populate_hugepages(start, end, node, altmap);
1465
}
1466
#endif
1467
1468
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
1469
/*
1470
* Pre-allocates page-table pages for a specific area in the kernel
1471
* page-table. Only the level which needs to be synchronized between
1472
* all page-tables is allocated because the synchronization can be
1473
* expensive.
1474
*/
1475
static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end,
1476
const char *area)
1477
{
1478
unsigned long addr;
1479
const char *lvl;
1480
1481
for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
1482
pgd_t *pgd = pgd_offset_k(addr);
1483
p4d_t *p4d;
1484
pud_t *pud;
1485
pmd_t *pmd;
1486
1487
lvl = "p4d";
1488
p4d = p4d_alloc(&init_mm, pgd, addr);
1489
if (!p4d)
1490
goto failed;
1491
1492
if (pgtable_l5_enabled)
1493
continue;
1494
1495
lvl = "pud";
1496
pud = pud_alloc(&init_mm, p4d, addr);
1497
if (!pud)
1498
goto failed;
1499
1500
if (pgtable_l4_enabled)
1501
continue;
1502
1503
lvl = "pmd";
1504
pmd = pmd_alloc(&init_mm, pud, addr);
1505
if (!pmd)
1506
goto failed;
1507
}
1508
return;
1509
1510
failed:
1511
/*
1512
* The pages have to be there now or they will be missing in
1513
* process page-tables later.
1514
*/
1515
panic("Failed to pre-allocate %s pages for %s area\n", lvl, area);
1516
}
1517
1518
#define PAGE_END KASAN_SHADOW_START
1519
1520
void __init pgtable_cache_init(void)
1521
{
1522
preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc");
1523
if (IS_ENABLED(CONFIG_MODULES))
1524
preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules");
1525
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) {
1526
preallocate_pgd_pages_range(VMEMMAP_START, VMEMMAP_END, "vmemmap");
1527
preallocate_pgd_pages_range(PAGE_OFFSET, PAGE_END, "direct map");
1528
if (IS_ENABLED(CONFIG_KASAN))
1529
preallocate_pgd_pages_range(KASAN_SHADOW_START, KASAN_SHADOW_END, "kasan");
1530
}
1531
}
1532
#endif
1533
1534
#ifdef CONFIG_EXECMEM
1535
#ifdef CONFIG_MMU
1536
static struct execmem_info execmem_info __ro_after_init;
1537
1538
struct execmem_info __init *execmem_arch_setup(void)
1539
{
1540
execmem_info = (struct execmem_info){
1541
.ranges = {
1542
[EXECMEM_DEFAULT] = {
1543
.start = MODULES_VADDR,
1544
.end = MODULES_END,
1545
.pgprot = PAGE_KERNEL,
1546
.alignment = 1,
1547
},
1548
[EXECMEM_KPROBES] = {
1549
.start = VMALLOC_START,
1550
.end = VMALLOC_END,
1551
.pgprot = PAGE_KERNEL_READ_EXEC,
1552
.alignment = 1,
1553
},
1554
[EXECMEM_BPF] = {
1555
.start = BPF_JIT_REGION_START,
1556
.end = BPF_JIT_REGION_END,
1557
.pgprot = PAGE_KERNEL,
1558
.alignment = PAGE_SIZE,
1559
},
1560
},
1561
};
1562
1563
return &execmem_info;
1564
}
1565
#endif /* CONFIG_MMU */
1566
#endif /* CONFIG_EXECMEM */
1567
1568
#ifdef CONFIG_MEMORY_HOTPLUG
1569
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
1570
{
1571
struct page *page = pmd_page(*pmd);
1572
struct ptdesc *ptdesc = page_ptdesc(page);
1573
pte_t *pte;
1574
int i;
1575
1576
for (i = 0; i < PTRS_PER_PTE; i++) {
1577
pte = pte_start + i;
1578
if (!pte_none(*pte))
1579
return;
1580
}
1581
1582
pagetable_dtor(ptdesc);
1583
if (PageReserved(page))
1584
free_reserved_page(page);
1585
else
1586
pagetable_free(ptdesc);
1587
pmd_clear(pmd);
1588
}
1589
1590
static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap)
1591
{
1592
struct page *page = pud_page(*pud);
1593
struct ptdesc *ptdesc = page_ptdesc(page);
1594
pmd_t *pmd;
1595
int i;
1596
1597
for (i = 0; i < PTRS_PER_PMD; i++) {
1598
pmd = pmd_start + i;
1599
if (!pmd_none(*pmd))
1600
return;
1601
}
1602
1603
if (!is_vmemmap)
1604
pagetable_dtor(ptdesc);
1605
if (PageReserved(page))
1606
free_reserved_page(page);
1607
else
1608
pagetable_free(ptdesc);
1609
pud_clear(pud);
1610
}
1611
1612
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
1613
{
1614
struct page *page = p4d_page(*p4d);
1615
pud_t *pud;
1616
int i;
1617
1618
for (i = 0; i < PTRS_PER_PUD; i++) {
1619
pud = pud_start + i;
1620
if (!pud_none(*pud))
1621
return;
1622
}
1623
1624
if (PageReserved(page))
1625
free_reserved_page(page);
1626
else
1627
free_pages((unsigned long)page_address(page), 0);
1628
p4d_clear(p4d);
1629
}
1630
1631
static void __meminit free_vmemmap_storage(struct page *page, size_t size,
1632
struct vmem_altmap *altmap)
1633
{
1634
int order = get_order(size);
1635
1636
if (altmap) {
1637
vmem_altmap_free(altmap, size >> PAGE_SHIFT);
1638
return;
1639
}
1640
1641
if (PageReserved(page)) {
1642
unsigned int nr_pages = 1 << order;
1643
1644
while (nr_pages--)
1645
free_reserved_page(page++);
1646
return;
1647
}
1648
1649
free_pages((unsigned long)page_address(page), order);
1650
}
1651
1652
static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end,
1653
bool is_vmemmap, struct vmem_altmap *altmap)
1654
{
1655
unsigned long next;
1656
pte_t *ptep, pte;
1657
1658
for (; addr < end; addr = next) {
1659
next = (addr + PAGE_SIZE) & PAGE_MASK;
1660
if (next > end)
1661
next = end;
1662
1663
ptep = pte_base + pte_index(addr);
1664
pte = ptep_get(ptep);
1665
if (!pte_present(*ptep))
1666
continue;
1667
1668
pte_clear(&init_mm, addr, ptep);
1669
if (is_vmemmap)
1670
free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap);
1671
}
1672
}
1673
1674
static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end,
1675
bool is_vmemmap, struct vmem_altmap *altmap)
1676
{
1677
unsigned long next;
1678
pte_t *pte_base;
1679
pmd_t *pmdp, pmd;
1680
1681
for (; addr < end; addr = next) {
1682
next = pmd_addr_end(addr, end);
1683
pmdp = pmd_base + pmd_index(addr);
1684
pmd = pmdp_get(pmdp);
1685
if (!pmd_present(pmd))
1686
continue;
1687
1688
if (pmd_leaf(pmd)) {
1689
pmd_clear(pmdp);
1690
if (is_vmemmap)
1691
free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap);
1692
continue;
1693
}
1694
1695
pte_base = (pte_t *)pmd_page_vaddr(*pmdp);
1696
remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap);
1697
free_pte_table(pte_base, pmdp);
1698
}
1699
}
1700
1701
static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end,
1702
bool is_vmemmap, struct vmem_altmap *altmap)
1703
{
1704
unsigned long next;
1705
pud_t *pudp, pud;
1706
pmd_t *pmd_base;
1707
1708
for (; addr < end; addr = next) {
1709
next = pud_addr_end(addr, end);
1710
pudp = pud_base + pud_index(addr);
1711
pud = pudp_get(pudp);
1712
if (!pud_present(pud))
1713
continue;
1714
1715
if (pud_leaf(pud)) {
1716
if (pgtable_l4_enabled) {
1717
pud_clear(pudp);
1718
if (is_vmemmap)
1719
free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap);
1720
}
1721
continue;
1722
}
1723
1724
pmd_base = pmd_offset(pudp, 0);
1725
remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);
1726
1727
if (pgtable_l4_enabled)
1728
free_pmd_table(pmd_base, pudp, is_vmemmap);
1729
}
1730
}
1731
1732
static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end,
1733
bool is_vmemmap, struct vmem_altmap *altmap)
1734
{
1735
unsigned long next;
1736
p4d_t *p4dp, p4d;
1737
pud_t *pud_base;
1738
1739
for (; addr < end; addr = next) {
1740
next = p4d_addr_end(addr, end);
1741
p4dp = p4d_base + p4d_index(addr);
1742
p4d = p4dp_get(p4dp);
1743
if (!p4d_present(p4d))
1744
continue;
1745
1746
if (p4d_leaf(p4d)) {
1747
if (pgtable_l5_enabled) {
1748
p4d_clear(p4dp);
1749
if (is_vmemmap)
1750
free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap);
1751
}
1752
continue;
1753
}
1754
1755
pud_base = pud_offset(p4dp, 0);
1756
remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap);
1757
1758
if (pgtable_l5_enabled)
1759
free_pud_table(pud_base, p4dp);
1760
}
1761
}
1762
1763
static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap,
1764
struct vmem_altmap *altmap)
1765
{
1766
unsigned long addr, next;
1767
p4d_t *p4d_base;
1768
pgd_t *pgd;
1769
1770
for (addr = va; addr < end; addr = next) {
1771
next = pgd_addr_end(addr, end);
1772
pgd = pgd_offset_k(addr);
1773
1774
if (!pgd_present(*pgd))
1775
continue;
1776
1777
if (pgd_leaf(*pgd))
1778
continue;
1779
1780
p4d_base = p4d_offset(pgd, 0);
1781
remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap);
1782
}
1783
1784
flush_tlb_all();
1785
}
1786
1787
static void __meminit remove_linear_mapping(phys_addr_t start, u64 size)
1788
{
1789
unsigned long va = (unsigned long)__va(start);
1790
unsigned long end = (unsigned long)__va(start + size);
1791
1792
remove_pgd_mapping(va, end, false, NULL);
1793
}
1794
1795
struct range arch_get_mappable_range(void)
1796
{
1797
struct range mhp_range;
1798
1799
mhp_range.start = __pa(PAGE_OFFSET);
1800
mhp_range.end = __pa(PAGE_END - 1);
1801
return mhp_range;
1802
}
1803
1804
int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
1805
{
1806
int ret = 0;
1807
1808
create_linear_mapping_range(start, start + size, 0, &params->pgprot);
1809
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params);
1810
if (ret) {
1811
remove_linear_mapping(start, size);
1812
goto out;
1813
}
1814
1815
max_pfn = PFN_UP(start + size);
1816
max_low_pfn = max_pfn;
1817
1818
out:
1819
flush_tlb_all();
1820
return ret;
1821
}
1822
1823
void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
1824
{
1825
__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap);
1826
remove_linear_mapping(start, size);
1827
flush_tlb_all();
1828
}
1829
1830
void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
1831
{
1832
remove_pgd_mapping(start, end, true, altmap);
1833
}
1834
#endif /* CONFIG_MEMORY_HOTPLUG */
1835
1836