Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/mm/dump_pagetables.c
26442 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Debug helper to dump the current kernel pagetables of the system
4
* so that we can see what the various memory ranges are set to.
5
*
6
* (C) Copyright 2008 Intel Corporation
7
*
8
* Author: Arjan van de Ven <[email protected]>
9
*/
10
11
#include <linux/debugfs.h>
12
#include <linux/kasan.h>
13
#include <linux/mm.h>
14
#include <linux/init.h>
15
#include <linux/sched.h>
16
#include <linux/seq_file.h>
17
#include <linux/highmem.h>
18
#include <linux/pci.h>
19
#include <linux/ptdump.h>
20
21
#include <asm/e820/types.h>
22
23
/*
24
* The dumper groups pagetable entries of the same type into one, and for
25
* that it needs to keep some state when walking, and flush this state
26
* when a "break" in the continuity is found.
27
*/
28
struct pg_state {
29
struct ptdump_state ptdump;
30
int level;
31
pgprotval_t current_prot;
32
pgprotval_t effective_prot;
33
pgprotval_t prot_levels[5];
34
unsigned long start_address;
35
const struct addr_marker *marker;
36
unsigned long lines;
37
bool to_dmesg;
38
bool check_wx;
39
unsigned long wx_pages;
40
struct seq_file *seq;
41
};
42
43
struct addr_marker {
44
unsigned long start_address;
45
const char *name;
46
unsigned long max_lines;
47
};
48
49
/* Address space markers hints */
50
51
#ifdef CONFIG_X86_64
52
53
enum address_markers_idx {
54
USER_SPACE_NR = 0,
55
KERNEL_SPACE_NR,
56
#ifdef CONFIG_MODIFY_LDT_SYSCALL
57
LDT_NR,
58
#endif
59
LOW_KERNEL_NR,
60
VMALLOC_START_NR,
61
VMEMMAP_START_NR,
62
#ifdef CONFIG_KASAN
63
KASAN_SHADOW_START_NR,
64
KASAN_SHADOW_END_NR,
65
#endif
66
CPU_ENTRY_AREA_NR,
67
#ifdef CONFIG_X86_ESPFIX64
68
ESPFIX_START_NR,
69
#endif
70
#ifdef CONFIG_EFI
71
EFI_END_NR,
72
#endif
73
HIGH_KERNEL_NR,
74
MODULES_VADDR_NR,
75
MODULES_END_NR,
76
FIXADDR_START_NR,
77
END_OF_SPACE_NR,
78
};
79
80
static struct addr_marker address_markers[] = {
81
[USER_SPACE_NR] = { 0, "User Space" },
82
[KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
83
[LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
84
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
85
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
86
#ifdef CONFIG_KASAN
87
/*
88
* These fields get initialized with the (dynamic)
89
* KASAN_SHADOW_{START,END} values in pt_dump_init().
90
*/
91
[KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
92
[KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
93
#endif
94
#ifdef CONFIG_MODIFY_LDT_SYSCALL
95
[LDT_NR] = { 0UL, "LDT remap" },
96
#endif
97
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
98
#ifdef CONFIG_X86_ESPFIX64
99
[ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
100
#endif
101
#ifdef CONFIG_EFI
102
[EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
103
#endif
104
[HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
105
[MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
106
[MODULES_END_NR] = { MODULES_END, "End Modules" },
107
[FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
108
[END_OF_SPACE_NR] = { -1, NULL }
109
};
110
111
#define INIT_PGD ((pgd_t *) &init_top_pgt)
112
113
#else /* CONFIG_X86_64 */
114
115
enum address_markers_idx {
116
USER_SPACE_NR = 0,
117
KERNEL_SPACE_NR,
118
VMALLOC_START_NR,
119
VMALLOC_END_NR,
120
#ifdef CONFIG_HIGHMEM
121
PKMAP_BASE_NR,
122
#endif
123
#ifdef CONFIG_MODIFY_LDT_SYSCALL
124
LDT_NR,
125
#endif
126
CPU_ENTRY_AREA_NR,
127
FIXADDR_START_NR,
128
END_OF_SPACE_NR,
129
};
130
131
static struct addr_marker address_markers[] = {
132
[USER_SPACE_NR] = { 0, "User Space" },
133
[KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
134
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
135
[VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
136
#ifdef CONFIG_HIGHMEM
137
[PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
138
#endif
139
#ifdef CONFIG_MODIFY_LDT_SYSCALL
140
[LDT_NR] = { 0UL, "LDT remap" },
141
#endif
142
[CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
143
[FIXADDR_START_NR] = { 0UL, "Fixmap area" },
144
[END_OF_SPACE_NR] = { -1, NULL }
145
};
146
147
#define INIT_PGD (swapper_pg_dir)
148
149
#endif /* !CONFIG_X86_64 */
150
151
/* Multipliers for offsets within the PTEs */
152
#define PTE_LEVEL_MULT (PAGE_SIZE)
153
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
154
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
155
#define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
156
#define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT)
157
158
#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \
159
({ \
160
if (to_dmesg) \
161
printk(KERN_INFO fmt, ##args); \
162
else \
163
if (m) \
164
seq_printf(m, fmt, ##args); \
165
})
166
167
#define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \
168
({ \
169
if (to_dmesg) \
170
printk(KERN_CONT fmt, ##args); \
171
else \
172
if (m) \
173
seq_printf(m, fmt, ##args); \
174
})
175
176
/*
177
* Print a readable form of a pgprot_t to the seq_file
178
*/
179
static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
180
{
181
static const char * const level_name[] =
182
{ "pgd", "p4d", "pud", "pmd", "pte" };
183
184
if (!(pr & _PAGE_PRESENT)) {
185
/* Not present */
186
pt_dump_cont_printf(m, dmsg, " ");
187
} else {
188
if (pr & _PAGE_USER)
189
pt_dump_cont_printf(m, dmsg, "USR ");
190
else
191
pt_dump_cont_printf(m, dmsg, " ");
192
if (pr & _PAGE_RW)
193
pt_dump_cont_printf(m, dmsg, "RW ");
194
else
195
pt_dump_cont_printf(m, dmsg, "ro ");
196
if (pr & _PAGE_PWT)
197
pt_dump_cont_printf(m, dmsg, "PWT ");
198
else
199
pt_dump_cont_printf(m, dmsg, " ");
200
if (pr & _PAGE_PCD)
201
pt_dump_cont_printf(m, dmsg, "PCD ");
202
else
203
pt_dump_cont_printf(m, dmsg, " ");
204
205
/* Bit 7 has a different meaning on level 3 vs 4 */
206
if (level <= 3 && pr & _PAGE_PSE)
207
pt_dump_cont_printf(m, dmsg, "PSE ");
208
else
209
pt_dump_cont_printf(m, dmsg, " ");
210
if ((level == 4 && pr & _PAGE_PAT) ||
211
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
212
pt_dump_cont_printf(m, dmsg, "PAT ");
213
else
214
pt_dump_cont_printf(m, dmsg, " ");
215
if (pr & _PAGE_GLOBAL)
216
pt_dump_cont_printf(m, dmsg, "GLB ");
217
else
218
pt_dump_cont_printf(m, dmsg, " ");
219
if (pr & _PAGE_NX)
220
pt_dump_cont_printf(m, dmsg, "NX ");
221
else
222
pt_dump_cont_printf(m, dmsg, "x ");
223
}
224
pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
225
}
226
227
static void note_wx(struct pg_state *st, unsigned long addr)
228
{
229
unsigned long npages;
230
231
npages = (addr - st->start_address) / PAGE_SIZE;
232
233
#ifdef CONFIG_PCI_BIOS
234
/*
235
* If PCI BIOS is enabled, the PCI BIOS area is forced to WX.
236
* Inform about it, but avoid the warning.
237
*/
238
if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
239
addr <= PAGE_OFFSET + BIOS_END) {
240
pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
241
return;
242
}
243
#endif
244
/* Account the WX pages */
245
st->wx_pages += npages;
246
WARN_ONCE(__supported_pte_mask & _PAGE_NX,
247
"x86/mm: Found insecure W+X mapping at address %pS\n",
248
(void *)st->start_address);
249
}
250
251
static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
252
{
253
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
254
pgprotval_t prot = val & PTE_FLAGS_MASK;
255
pgprotval_t effective;
256
257
if (level > 0) {
258
pgprotval_t higher_prot = st->prot_levels[level - 1];
259
260
effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
261
((higher_prot | prot) & _PAGE_NX);
262
} else {
263
effective = prot;
264
}
265
266
st->prot_levels[level] = effective;
267
}
268
269
static void effective_prot_pte(struct ptdump_state *st, pte_t pte)
270
{
271
effective_prot(st, 4, pte_val(pte));
272
}
273
274
static void effective_prot_pmd(struct ptdump_state *st, pmd_t pmd)
275
{
276
effective_prot(st, 3, pmd_val(pmd));
277
}
278
279
static void effective_prot_pud(struct ptdump_state *st, pud_t pud)
280
{
281
effective_prot(st, 2, pud_val(pud));
282
}
283
284
static void effective_prot_p4d(struct ptdump_state *st, p4d_t p4d)
285
{
286
effective_prot(st, 1, p4d_val(p4d));
287
}
288
289
static void effective_prot_pgd(struct ptdump_state *st, pgd_t pgd)
290
{
291
effective_prot(st, 0, pgd_val(pgd));
292
}
293
294
295
/*
296
* This function gets called on a break in a continuous series
297
* of PTE entries; the next one is different so we need to
298
* print what we collected so far.
299
*/
300
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
301
u64 val)
302
{
303
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
304
pgprotval_t new_prot, new_eff;
305
pgprotval_t cur, eff;
306
static const char units[] = "BKMGTPE";
307
struct seq_file *m = st->seq;
308
309
new_prot = val & PTE_FLAGS_MASK;
310
if (!val)
311
new_eff = 0;
312
else
313
new_eff = st->prot_levels[level];
314
315
/*
316
* If we have a "break" in the series, we need to flush the state that
317
* we have now. "break" is either changing perms, levels or
318
* address space marker.
319
*/
320
cur = st->current_prot;
321
eff = st->effective_prot;
322
323
if (st->level == -1) {
324
/* First entry */
325
st->current_prot = new_prot;
326
st->effective_prot = new_eff;
327
st->level = level;
328
st->marker = address_markers;
329
st->lines = 0;
330
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
331
st->marker->name);
332
} else if (new_prot != cur || new_eff != eff || level != st->level ||
333
addr >= st->marker[1].start_address) {
334
const char *unit = units;
335
unsigned long delta;
336
int width = sizeof(unsigned long) * 2;
337
338
if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
339
note_wx(st, addr);
340
341
/*
342
* Now print the actual finished series
343
*/
344
if (!st->marker->max_lines ||
345
st->lines < st->marker->max_lines) {
346
pt_dump_seq_printf(m, st->to_dmesg,
347
"0x%0*lx-0x%0*lx ",
348
width, st->start_address,
349
width, addr);
350
351
delta = addr - st->start_address;
352
while (!(delta & 1023) && unit[1]) {
353
delta >>= 10;
354
unit++;
355
}
356
pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
357
delta, *unit);
358
printk_prot(m, st->current_prot, st->level,
359
st->to_dmesg);
360
}
361
st->lines++;
362
363
/*
364
* We print markers for special areas of address space,
365
* such as the start of vmalloc space etc.
366
* This helps in the interpretation.
367
*/
368
if (addr >= st->marker[1].start_address) {
369
if (st->marker->max_lines &&
370
st->lines > st->marker->max_lines) {
371
unsigned long nskip =
372
st->lines - st->marker->max_lines;
373
pt_dump_seq_printf(m, st->to_dmesg,
374
"... %lu entr%s skipped ... \n",
375
nskip,
376
nskip == 1 ? "y" : "ies");
377
}
378
st->marker++;
379
st->lines = 0;
380
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
381
st->marker->name);
382
}
383
384
st->start_address = addr;
385
st->current_prot = new_prot;
386
st->effective_prot = new_eff;
387
st->level = level;
388
}
389
}
390
391
static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
392
{
393
note_page(pt_st, addr, 4, pte_val(pte));
394
}
395
396
static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
397
{
398
note_page(pt_st, addr, 3, pmd_val(pmd));
399
}
400
401
static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
402
{
403
note_page(pt_st, addr, 2, pud_val(pud));
404
}
405
406
static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
407
{
408
note_page(pt_st, addr, 1, p4d_val(p4d));
409
}
410
411
static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
412
{
413
note_page(pt_st, addr, 0, pgd_val(pgd));
414
}
415
416
static void note_page_flush(struct ptdump_state *pt_st)
417
{
418
pte_t pte_zero = {0};
419
420
note_page(pt_st, 0, -1, pte_val(pte_zero));
421
}
422
423
bool ptdump_walk_pgd_level_core(struct seq_file *m,
424
struct mm_struct *mm, pgd_t *pgd,
425
bool checkwx, bool dmesg)
426
{
427
const struct ptdump_range ptdump_ranges[] = {
428
#ifdef CONFIG_X86_64
429
{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
430
{GUARD_HOLE_END_ADDR, ~0UL},
431
#else
432
{0, ~0UL},
433
#endif
434
{0, 0}
435
};
436
437
struct pg_state st = {
438
.ptdump = {
439
.note_page_pte = note_page_pte,
440
.note_page_pmd = note_page_pmd,
441
.note_page_pud = note_page_pud,
442
.note_page_p4d = note_page_p4d,
443
.note_page_pgd = note_page_pgd,
444
.note_page_flush = note_page_flush,
445
.effective_prot_pte = effective_prot_pte,
446
.effective_prot_pmd = effective_prot_pmd,
447
.effective_prot_pud = effective_prot_pud,
448
.effective_prot_p4d = effective_prot_p4d,
449
.effective_prot_pgd = effective_prot_pgd,
450
.range = ptdump_ranges
451
},
452
.level = -1,
453
.to_dmesg = dmesg,
454
.check_wx = checkwx,
455
.seq = m
456
};
457
458
ptdump_walk_pgd(&st.ptdump, mm, pgd);
459
460
if (!checkwx)
461
return true;
462
if (st.wx_pages) {
463
pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
464
st.wx_pages);
465
466
return false;
467
} else {
468
pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
469
470
return true;
471
}
472
}
473
474
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
475
{
476
ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true);
477
}
478
479
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
480
bool user)
481
{
482
pgd_t *pgd = mm->pgd;
483
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
484
if (user && boot_cpu_has(X86_FEATURE_PTI))
485
pgd = kernel_to_user_pgdp(pgd);
486
#endif
487
ptdump_walk_pgd_level_core(m, mm, pgd, false, false);
488
}
489
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
490
491
void ptdump_walk_user_pgd_level_checkwx(void)
492
{
493
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
494
pgd_t *pgd = INIT_PGD;
495
496
if (!(__supported_pte_mask & _PAGE_NX) ||
497
!boot_cpu_has(X86_FEATURE_PTI))
498
return;
499
500
pr_info("x86/mm: Checking user space page tables\n");
501
pgd = kernel_to_user_pgdp(pgd);
502
ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false);
503
#endif
504
}
505
506
bool ptdump_walk_pgd_level_checkwx(void)
507
{
508
if (!(__supported_pte_mask & _PAGE_NX))
509
return true;
510
511
return ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
512
}
513
514
static int __init pt_dump_init(void)
515
{
516
/*
517
* Various markers are not compile-time constants, so assign them
518
* here.
519
*/
520
#ifdef CONFIG_X86_64
521
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
522
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
523
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
524
#ifdef CONFIG_MODIFY_LDT_SYSCALL
525
address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
526
#endif
527
#ifdef CONFIG_KASAN
528
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
529
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
530
#endif
531
#endif
532
#ifdef CONFIG_X86_32
533
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
534
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
535
# ifdef CONFIG_HIGHMEM
536
address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
537
# endif
538
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
539
address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
540
# ifdef CONFIG_MODIFY_LDT_SYSCALL
541
address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
542
# endif
543
#endif
544
return 0;
545
}
546
__initcall(pt_dump_init);
547
548