Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/boot/startup/map_kernel.c
26535 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
#include <linux/init.h>
4
#include <linux/linkage.h>
5
#include <linux/types.h>
6
#include <linux/kernel.h>
7
#include <linux/pgtable.h>
8
9
#include <asm/init.h>
10
#include <asm/sections.h>
11
#include <asm/setup.h>
12
#include <asm/sev.h>
13
14
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
15
extern unsigned int next_early_pgt;
16
17
static inline bool check_la57_support(void)
18
{
19
/*
20
* 5-level paging is detected and enabled at kernel decompression
21
* stage. Only check if it has been enabled there.
22
*/
23
if (!(native_read_cr4() & X86_CR4_LA57))
24
return false;
25
26
__pgtable_l5_enabled = 1;
27
pgdir_shift = 48;
28
ptrs_per_p4d = 512;
29
30
return true;
31
}
32
33
static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
34
pmdval_t *pmd,
35
unsigned long p2v_offset)
36
{
37
unsigned long paddr, paddr_end;
38
int i;
39
40
/* Encrypt the kernel and related (if SME is active) */
41
sme_encrypt_kernel(bp);
42
43
/*
44
* Clear the memory encryption mask from the .bss..decrypted section.
45
* The bss section will be memset to zero later in the initialization so
46
* there is no need to zero it after changing the memory encryption
47
* attribute.
48
*/
49
if (sme_get_me_mask()) {
50
paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
51
paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
52
53
for (; paddr < paddr_end; paddr += PMD_SIZE) {
54
/*
55
* On SNP, transition the page to shared in the RMP table so that
56
* it is consistent with the page table attribute change.
57
*
58
* __start_bss_decrypted has a virtual address in the high range
59
* mapping (kernel .text). PVALIDATE, by way of
60
* early_snp_set_memory_shared(), requires a valid virtual
61
* address but the kernel is currently running off of the identity
62
* mapping so use the PA to get a *currently* valid virtual address.
63
*/
64
early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
65
66
i = pmd_index(paddr - p2v_offset);
67
pmd[i] -= sme_get_me_mask();
68
}
69
}
70
71
/*
72
* Return the SME encryption mask (if SME is active) to be used as a
73
* modifier for the initial pgdir entry programmed into CR3.
74
*/
75
return sme_get_me_mask();
76
}
77
78
/*
79
* This code is compiled using PIC codegen because it will execute from the
80
* early 1:1 mapping of memory, which deviates from the mapping expected by the
81
* linker. Due to this deviation, taking the address of a global variable will
82
* produce an ambiguous result when using the plain & operator. Instead,
83
* rip_rel_ptr() must be used, which will return the RIP-relative address in
84
* the 1:1 mapping of memory. Kernel virtual addresses can be determined by
85
* subtracting p2v_offset from the RIP-relative address.
86
*/
87
unsigned long __head __startup_64(unsigned long p2v_offset,
88
struct boot_params *bp)
89
{
90
pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
91
unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
92
unsigned long va_text, va_end;
93
unsigned long pgtable_flags;
94
unsigned long load_delta;
95
pgdval_t *pgd;
96
p4dval_t *p4d;
97
pudval_t *pud;
98
pmdval_t *pmd, pmd_entry;
99
bool la57;
100
int i;
101
102
la57 = check_la57_support();
103
104
/* Is the address too large? */
105
if (physaddr >> MAX_PHYSMEM_BITS)
106
for (;;);
107
108
/*
109
* Compute the delta between the address I am compiled to run at
110
* and the address I am actually running at.
111
*/
112
phys_base = load_delta = __START_KERNEL_map + p2v_offset;
113
114
/* Is the address not 2M aligned? */
115
if (load_delta & ~PMD_MASK)
116
for (;;);
117
118
va_text = physaddr - p2v_offset;
119
va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
120
121
/* Include the SME encryption mask in the fixup value */
122
load_delta += sme_get_me_mask();
123
124
/* Fixup the physical addresses in the page table */
125
126
pgd = rip_rel_ptr(early_top_pgt);
127
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
128
129
if (la57) {
130
p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
131
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
132
133
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
134
}
135
136
level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
137
level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
138
139
for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
140
level2_fixmap_pgt[i].pmd += load_delta;
141
142
/*
143
* Set up the identity mapping for the switchover. These
144
* entries should *NOT* have the global bit set! This also
145
* creates a bunch of nonsense entries but that is fine --
146
* it avoids problems around wraparound.
147
*/
148
149
pud = &early_pgts[0]->pmd;
150
pmd = &early_pgts[1]->pmd;
151
next_early_pgt = 2;
152
153
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
154
155
if (la57) {
156
p4d = &early_pgts[next_early_pgt++]->pmd;
157
158
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
159
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
160
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
161
162
i = physaddr >> P4D_SHIFT;
163
p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
164
p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
165
} else {
166
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
167
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
168
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
169
}
170
171
i = physaddr >> PUD_SHIFT;
172
pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
173
pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
174
175
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
176
pmd_entry += sme_get_me_mask();
177
pmd_entry += physaddr;
178
179
for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
180
int idx = i + (physaddr >> PMD_SHIFT);
181
182
pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
183
}
184
185
/*
186
* Fixup the kernel text+data virtual addresses. Note that
187
* we might write invalid pmds, when the kernel is relocated
188
* cleanup_highmap() fixes this up along with the mappings
189
* beyond _end.
190
*
191
* Only the region occupied by the kernel image has so far
192
* been checked against the table of usable memory regions
193
* provided by the firmware, so invalidate pages outside that
194
* region. A page table entry that maps to a reserved area of
195
* memory would allow processor speculation into that area,
196
* and on some hardware (particularly the UV platform) even
197
* speculative access to some reserved areas is caught as an
198
* error, causing the BIOS to halt the system.
199
*/
200
201
pmd = rip_rel_ptr(level2_kernel_pgt);
202
203
/* invalidate pages before the kernel image */
204
for (i = 0; i < pmd_index(va_text); i++)
205
pmd[i] &= ~_PAGE_PRESENT;
206
207
/* fixup pages that are part of the kernel image */
208
for (; i <= pmd_index(va_end); i++)
209
if (pmd[i] & _PAGE_PRESENT)
210
pmd[i] += load_delta;
211
212
/* invalidate pages after the kernel image */
213
for (; i < PTRS_PER_PMD; i++)
214
pmd[i] &= ~_PAGE_PRESENT;
215
216
return sme_postprocess_startup(bp, pmd, p2v_offset);
217
}
218
219