Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/mm/ptdump/ptdump.c
50686 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright 2016, Rashmica Gupta, IBM Corp.
4
*
5
* This traverses the kernel pagetables and dumps the
6
* information about the used sections of memory to
7
* /sys/kernel/debug/kernel_pagetables.
8
*
9
* Derived from the arm64 implementation:
10
* Copyright (c) 2014, The Linux Foundation, Laura Abbott.
11
* (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
12
*/
13
#include <linux/debugfs.h>
14
#include <linux/fs.h>
15
#include <linux/hugetlb.h>
16
#include <linux/io.h>
17
#include <linux/mm.h>
18
#include <linux/highmem.h>
19
#include <linux/ptdump.h>
20
#include <linux/sched.h>
21
#include <linux/seq_file.h>
22
#include <asm/fixmap.h>
23
#include <linux/const.h>
24
#include <linux/kasan.h>
25
#include <asm/page.h>
26
#include <asm/hugetlb.h>
27
28
#include <mm/mmu_decl.h>
29
30
#include "ptdump.h"
31
32
/*
33
* To visualise what is happening,
34
*
35
* - PTRS_PER_P** = how many entries there are in the corresponding P**
36
* - P**_SHIFT = how many bits of the address we use to index into the
37
* corresponding P**
38
* - P**_SIZE is how much memory we can access through the table - not the
39
* size of the table itself.
40
* P**={PGD, PUD, PMD, PTE}
41
*
42
*
43
* Each entry of the PGD points to a PUD. Each entry of a PUD points to a
44
* PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
45
* a page.
46
*
47
* In the case where there are only 3 levels, the PUD is folded into the
48
* PGD: every PUD has only one entry which points to the PMD.
49
*
50
* The page dumper groups page table entries of the same type into a single
51
* description. It uses pg_state to track the range information while
52
* iterating over the PTE entries. When the continuity is broken it then
53
* dumps out a description of the range - ie PTEs that are virtually contiguous
54
* with the same PTE flags are chunked together. This is to make it clear how
55
* different areas of the kernel virtual memory are used.
56
*
57
*/
58
struct pg_state {
59
struct ptdump_state ptdump;
60
struct seq_file *seq;
61
const struct addr_marker *marker;
62
unsigned long start_address;
63
unsigned long start_pa;
64
int level;
65
u64 current_flags;
66
bool check_wx;
67
unsigned long wx_pages;
68
};
69
70
struct addr_marker {
71
unsigned long start_address;
72
const char *name;
73
};
74
75
static struct addr_marker address_markers[] = {
76
{ 0, "Start of kernel VM" },
77
#ifdef MODULES_VADDR
78
{ 0, "modules start" },
79
{ 0, "modules end" },
80
#endif
81
{ 0, "vmalloc() Area" },
82
{ 0, "vmalloc() End" },
83
#ifdef CONFIG_PPC64
84
{ 0, "isa I/O start" },
85
{ 0, "isa I/O end" },
86
{ 0, "phb I/O start" },
87
{ 0, "phb I/O end" },
88
{ 0, "I/O remap start" },
89
{ 0, "I/O remap end" },
90
{ 0, "vmemmap start" },
91
#else
92
{ 0, "Early I/O remap start" },
93
{ 0, "Early I/O remap end" },
94
#ifdef CONFIG_HIGHMEM
95
{ 0, "Highmem PTEs start" },
96
{ 0, "Highmem PTEs end" },
97
#endif
98
{ 0, "Fixmap start" },
99
{ 0, "Fixmap end" },
100
#endif
101
#ifdef CONFIG_KASAN
102
{ 0, "kasan shadow mem start" },
103
{ 0, "kasan shadow mem end" },
104
#endif
105
{ -1, NULL },
106
};
107
108
static struct ptdump_range ptdump_range[] __ro_after_init = {
109
{TASK_SIZE_MAX, ~0UL},
110
{0, 0}
111
};
112
113
#define pt_dump_seq_printf(m, fmt, args...) \
114
({ \
115
if (m) \
116
seq_printf(m, fmt, ##args); \
117
})
118
119
#define pt_dump_seq_putc(m, c) \
120
({ \
121
if (m) \
122
seq_putc(m, c); \
123
})
124
125
void pt_dump_size(struct seq_file *m, unsigned long size)
126
{
127
static const char units[] = " KMGTPE";
128
const char *unit = units;
129
130
/* Work out what appropriate unit to use */
131
while (!(size & 1023) && unit[1]) {
132
size >>= 10;
133
unit++;
134
}
135
pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
136
}
137
138
static void dump_flag_info(struct pg_state *st, const struct flag_info
139
*flag, u64 pte, int num)
140
{
141
unsigned int i;
142
143
for (i = 0; i < num; i++, flag++) {
144
const char *s = NULL;
145
u64 val;
146
147
/* flag not defined so don't check it */
148
if (flag->mask == 0)
149
continue;
150
/* Some 'flags' are actually values */
151
if (flag->is_val) {
152
val = pte & flag->val;
153
if (flag->shift)
154
val = val >> flag->shift;
155
pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
156
} else {
157
if ((pte & flag->mask) == flag->val)
158
s = flag->set;
159
else
160
s = flag->clear;
161
if (s)
162
pt_dump_seq_printf(st->seq, " %s", s);
163
}
164
st->current_flags &= ~flag->mask;
165
}
166
if (st->current_flags != 0)
167
pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
168
}
169
170
static void dump_addr(struct pg_state *st, unsigned long addr)
171
{
172
#ifdef CONFIG_PPC64
173
#define REG "0x%016lx"
174
#else
175
#define REG "0x%08lx"
176
#endif
177
178
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
179
pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
180
pt_dump_size(st->seq, addr - st->start_address);
181
pt_dump_seq_printf(st->seq, "%s ", pg_level[st->level].name);
182
}
183
184
static void note_prot_wx(struct pg_state *st, unsigned long addr)
185
{
186
pte_t pte = __pte(st->current_flags);
187
188
if (!st->check_wx)
189
return;
190
191
if (!pte_write(pte) || !pte_exec(pte))
192
return;
193
194
WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
195
"powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
196
(void *)st->start_address, (void *)st->start_address);
197
198
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
199
}
200
201
static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
202
{
203
u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
204
u64 pa = val & PTE_RPN_MASK;
205
206
st->level = level;
207
st->current_flags = flag;
208
st->start_address = addr;
209
st->start_pa = pa;
210
211
while (addr >= st->marker[1].start_address) {
212
st->marker++;
213
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
214
}
215
}
216
217
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
218
{
219
u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
220
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
221
222
/* At first no level is set */
223
if (st->level == -1) {
224
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
225
note_page_update_state(st, addr, level, val);
226
/*
227
* Dump the section of virtual memory when:
228
* - the PTE flags from one entry to the next differs.
229
* - we change levels in the tree.
230
* - the address is in a different section of memory and is thus
231
* used for a different purpose, regardless of the flags.
232
*/
233
} else if (flag != st->current_flags || level != st->level ||
234
addr >= st->marker[1].start_address) {
235
236
/* Check the PTE flags */
237
if (st->current_flags) {
238
note_prot_wx(st, addr);
239
dump_addr(st, addr);
240
241
/* Dump all the flags */
242
if (pg_level[st->level].flag)
243
dump_flag_info(st, pg_level[st->level].flag,
244
st->current_flags,
245
pg_level[st->level].num);
246
247
pt_dump_seq_putc(st->seq, '\n');
248
}
249
250
/*
251
* Address indicates we have passed the end of the
252
* current section of virtual memory
253
*/
254
note_page_update_state(st, addr, level, val);
255
}
256
}
257
258
static void populate_markers(void)
259
{
260
int i = 0;
261
262
#ifdef CONFIG_PPC64
263
address_markers[i++].start_address = PAGE_OFFSET;
264
#else
265
address_markers[i++].start_address = TASK_SIZE;
266
#endif
267
#ifdef MODULES_VADDR
268
address_markers[i++].start_address = MODULES_VADDR;
269
address_markers[i++].start_address = MODULES_END;
270
#endif
271
address_markers[i++].start_address = VMALLOC_START;
272
address_markers[i++].start_address = VMALLOC_END;
273
#ifdef CONFIG_PPC64
274
address_markers[i++].start_address = ISA_IO_BASE;
275
address_markers[i++].start_address = ISA_IO_END;
276
address_markers[i++].start_address = PHB_IO_BASE;
277
address_markers[i++].start_address = PHB_IO_END;
278
address_markers[i++].start_address = IOREMAP_BASE;
279
address_markers[i++].start_address = IOREMAP_END;
280
/* What is the ifdef about? */
281
#ifdef CONFIG_PPC_BOOK3S_64
282
address_markers[i++].start_address = H_VMEMMAP_START;
283
#else
284
address_markers[i++].start_address = VMEMMAP_BASE;
285
#endif
286
#else /* !CONFIG_PPC64 */
287
address_markers[i++].start_address = ioremap_bot;
288
address_markers[i++].start_address = IOREMAP_TOP;
289
#ifdef CONFIG_HIGHMEM
290
address_markers[i++].start_address = PKMAP_BASE;
291
address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
292
#endif
293
address_markers[i++].start_address = FIXADDR_START;
294
address_markers[i++].start_address = FIXADDR_TOP;
295
#endif /* CONFIG_PPC64 */
296
#ifdef CONFIG_KASAN
297
address_markers[i++].start_address = KASAN_SHADOW_START;
298
address_markers[i++].start_address = KASAN_SHADOW_END;
299
#endif
300
}
301
302
static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
303
{
304
note_page(pt_st, addr, 4, pte_val(pte));
305
}
306
307
static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
308
{
309
note_page(pt_st, addr, 3, pmd_val(pmd));
310
}
311
312
static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
313
{
314
note_page(pt_st, addr, 2, pud_val(pud));
315
}
316
317
static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
318
{
319
note_page(pt_st, addr, 1, p4d_val(p4d));
320
}
321
322
static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
323
{
324
note_page(pt_st, addr, 0, pgd_val(pgd));
325
}
326
327
static void note_page_flush(struct ptdump_state *pt_st)
328
{
329
pte_t pte_zero = {0};
330
331
note_page(pt_st, 0, -1, pte_val(pte_zero));
332
}
333
334
static int ptdump_show(struct seq_file *m, void *v)
335
{
336
struct pg_state st = {
337
.seq = m,
338
.marker = address_markers,
339
.level = -1,
340
.ptdump = {
341
.note_page_pte = note_page_pte,
342
.note_page_pmd = note_page_pmd,
343
.note_page_pud = note_page_pud,
344
.note_page_p4d = note_page_p4d,
345
.note_page_pgd = note_page_pgd,
346
.note_page_flush = note_page_flush,
347
.range = ptdump_range,
348
}
349
};
350
351
/* Traverse kernel page tables */
352
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
353
return 0;
354
}
355
356
DEFINE_SHOW_ATTRIBUTE(ptdump);
357
358
static void __init build_pgtable_complete_mask(void)
359
{
360
unsigned int i, j;
361
362
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
363
if (pg_level[i].flag)
364
for (j = 0; j < pg_level[i].num; j++)
365
pg_level[i].mask |= pg_level[i].flag[j].mask;
366
}
367
368
bool ptdump_check_wx(void)
369
{
370
struct pg_state st = {
371
.seq = NULL,
372
.marker = (struct addr_marker[]) {
373
{ 0, NULL},
374
{ -1, NULL},
375
},
376
.level = -1,
377
.check_wx = true,
378
.ptdump = {
379
.note_page_pte = note_page_pte,
380
.note_page_pmd = note_page_pmd,
381
.note_page_pud = note_page_pud,
382
.note_page_p4d = note_page_p4d,
383
.note_page_pgd = note_page_pgd,
384
.note_page_flush = note_page_flush,
385
.range = ptdump_range,
386
}
387
};
388
389
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO))
390
return true;
391
392
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
393
394
if (st.wx_pages) {
395
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
396
st.wx_pages);
397
398
return false;
399
} else {
400
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
401
402
return true;
403
}
404
}
405
406
static int __init ptdump_init(void)
407
{
408
#ifdef CONFIG_PPC64
409
if (!radix_enabled())
410
ptdump_range[0].start = KERN_VIRT_START;
411
else
412
ptdump_range[0].start = PAGE_OFFSET;
413
414
ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
415
#endif
416
417
populate_markers();
418
build_pgtable_complete_mask();
419
420
if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
421
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
422
423
return 0;
424
}
425
device_initcall(ptdump_init);
426
427