Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/mm/ptdump/ptdump.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright 2016, Rashmica Gupta, IBM Corp.
4
*
5
* This traverses the kernel pagetables and dumps the
6
* information about the used sections of memory to
7
* /sys/kernel/debug/kernel_pagetables.
8
*
9
* Derived from the arm64 implementation:
10
* Copyright (c) 2014, The Linux Foundation, Laura Abbott.
11
* (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
12
*/
13
#include <linux/debugfs.h>
14
#include <linux/fs.h>
15
#include <linux/hugetlb.h>
16
#include <linux/io.h>
17
#include <linux/mm.h>
18
#include <linux/highmem.h>
19
#include <linux/ptdump.h>
20
#include <linux/sched.h>
21
#include <linux/seq_file.h>
22
#include <asm/fixmap.h>
23
#include <linux/const.h>
24
#include <linux/kasan.h>
25
#include <asm/page.h>
26
#include <asm/hugetlb.h>
27
28
#include <mm/mmu_decl.h>
29
30
#include "ptdump.h"
31
32
/*
33
* To visualise what is happening,
34
*
35
* - PTRS_PER_P** = how many entries there are in the corresponding P**
36
* - P**_SHIFT = how many bits of the address we use to index into the
37
* corresponding P**
38
* - P**_SIZE is how much memory we can access through the table - not the
39
* size of the table itself.
40
* P**={PGD, PUD, PMD, PTE}
41
*
42
*
43
* Each entry of the PGD points to a PUD. Each entry of a PUD points to a
44
* PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
45
* a page.
46
*
47
* In the case where there are only 3 levels, the PUD is folded into the
48
* PGD: every PUD has only one entry which points to the PMD.
49
*
50
* The page dumper groups page table entries of the same type into a single
51
* description. It uses pg_state to track the range information while
52
* iterating over the PTE entries. When the continuity is broken it then
53
* dumps out a description of the range - ie PTEs that are virtually contiguous
54
* with the same PTE flags are chunked together. This is to make it clear how
55
* different areas of the kernel virtual memory are used.
56
*
57
*/
58
struct pg_state {
59
struct ptdump_state ptdump;
60
struct seq_file *seq;
61
const struct addr_marker *marker;
62
unsigned long start_address;
63
unsigned long start_pa;
64
int level;
65
u64 current_flags;
66
bool check_wx;
67
unsigned long wx_pages;
68
};
69
70
struct addr_marker {
71
unsigned long start_address;
72
const char *name;
73
};
74
75
static struct addr_marker address_markers[] = {
76
{ 0, "Start of kernel VM" },
77
#ifdef MODULES_VADDR
78
{ 0, "modules start" },
79
{ 0, "modules end" },
80
#endif
81
{ 0, "vmalloc() Area" },
82
{ 0, "vmalloc() End" },
83
#ifdef CONFIG_PPC64
84
{ 0, "isa I/O start" },
85
{ 0, "isa I/O end" },
86
{ 0, "phb I/O start" },
87
{ 0, "phb I/O end" },
88
{ 0, "I/O remap start" },
89
{ 0, "I/O remap end" },
90
{ 0, "vmemmap start" },
91
#else
92
{ 0, "Early I/O remap start" },
93
{ 0, "Early I/O remap end" },
94
#ifdef CONFIG_HIGHMEM
95
{ 0, "Highmem PTEs start" },
96
{ 0, "Highmem PTEs end" },
97
#endif
98
{ 0, "Fixmap start" },
99
{ 0, "Fixmap end" },
100
#endif
101
#ifdef CONFIG_KASAN
102
{ 0, "kasan shadow mem start" },
103
{ 0, "kasan shadow mem end" },
104
#endif
105
{ -1, NULL },
106
};
107
108
static struct ptdump_range ptdump_range[] __ro_after_init = {
109
{TASK_SIZE_MAX, ~0UL},
110
{0, 0}
111
};
112
113
#define pt_dump_seq_printf(m, fmt, args...) \
114
({ \
115
if (m) \
116
seq_printf(m, fmt, ##args); \
117
})
118
119
#define pt_dump_seq_putc(m, c) \
120
({ \
121
if (m) \
122
seq_putc(m, c); \
123
})
124
125
void pt_dump_size(struct seq_file *m, unsigned long size)
126
{
127
static const char units[] = " KMGTPE";
128
const char *unit = units;
129
130
/* Work out what appropriate unit to use */
131
while (!(size & 1023) && unit[1]) {
132
size >>= 10;
133
unit++;
134
}
135
pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
136
}
137
138
static void dump_flag_info(struct pg_state *st, const struct flag_info
139
*flag, u64 pte, int num)
140
{
141
unsigned int i;
142
143
for (i = 0; i < num; i++, flag++) {
144
const char *s = NULL;
145
u64 val;
146
147
/* flag not defined so don't check it */
148
if (flag->mask == 0)
149
continue;
150
/* Some 'flags' are actually values */
151
if (flag->is_val) {
152
val = pte & flag->val;
153
if (flag->shift)
154
val = val >> flag->shift;
155
pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
156
} else {
157
if ((pte & flag->mask) == flag->val)
158
s = flag->set;
159
else
160
s = flag->clear;
161
if (s)
162
pt_dump_seq_printf(st->seq, " %s", s);
163
}
164
st->current_flags &= ~flag->mask;
165
}
166
if (st->current_flags != 0)
167
pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
168
}
169
170
static void dump_addr(struct pg_state *st, unsigned long addr)
171
{
172
#ifdef CONFIG_PPC64
173
#define REG "0x%016lx"
174
#else
175
#define REG "0x%08lx"
176
#endif
177
178
pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
179
pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
180
pt_dump_size(st->seq, addr - st->start_address);
181
}
182
183
static void note_prot_wx(struct pg_state *st, unsigned long addr)
184
{
185
pte_t pte = __pte(st->current_flags);
186
187
if (!st->check_wx)
188
return;
189
190
if (!pte_write(pte) || !pte_exec(pte))
191
return;
192
193
WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
194
"powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
195
(void *)st->start_address, (void *)st->start_address);
196
197
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
198
}
199
200
static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
201
{
202
u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
203
u64 pa = val & PTE_RPN_MASK;
204
205
st->level = level;
206
st->current_flags = flag;
207
st->start_address = addr;
208
st->start_pa = pa;
209
210
while (addr >= st->marker[1].start_address) {
211
st->marker++;
212
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
213
}
214
}
215
216
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
217
{
218
u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
219
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
220
221
/* At first no level is set */
222
if (st->level == -1) {
223
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
224
note_page_update_state(st, addr, level, val);
225
/*
226
* Dump the section of virtual memory when:
227
* - the PTE flags from one entry to the next differs.
228
* - we change levels in the tree.
229
* - the address is in a different section of memory and is thus
230
* used for a different purpose, regardless of the flags.
231
*/
232
} else if (flag != st->current_flags || level != st->level ||
233
addr >= st->marker[1].start_address) {
234
235
/* Check the PTE flags */
236
if (st->current_flags) {
237
note_prot_wx(st, addr);
238
dump_addr(st, addr);
239
240
/* Dump all the flags */
241
if (pg_level[st->level].flag)
242
dump_flag_info(st, pg_level[st->level].flag,
243
st->current_flags,
244
pg_level[st->level].num);
245
246
pt_dump_seq_putc(st->seq, '\n');
247
}
248
249
/*
250
* Address indicates we have passed the end of the
251
* current section of virtual memory
252
*/
253
note_page_update_state(st, addr, level, val);
254
}
255
}
256
257
static void populate_markers(void)
258
{
259
int i = 0;
260
261
#ifdef CONFIG_PPC64
262
address_markers[i++].start_address = PAGE_OFFSET;
263
#else
264
address_markers[i++].start_address = TASK_SIZE;
265
#endif
266
#ifdef MODULES_VADDR
267
address_markers[i++].start_address = MODULES_VADDR;
268
address_markers[i++].start_address = MODULES_END;
269
#endif
270
address_markers[i++].start_address = VMALLOC_START;
271
address_markers[i++].start_address = VMALLOC_END;
272
#ifdef CONFIG_PPC64
273
address_markers[i++].start_address = ISA_IO_BASE;
274
address_markers[i++].start_address = ISA_IO_END;
275
address_markers[i++].start_address = PHB_IO_BASE;
276
address_markers[i++].start_address = PHB_IO_END;
277
address_markers[i++].start_address = IOREMAP_BASE;
278
address_markers[i++].start_address = IOREMAP_END;
279
/* What is the ifdef about? */
280
#ifdef CONFIG_PPC_BOOK3S_64
281
address_markers[i++].start_address = H_VMEMMAP_START;
282
#else
283
address_markers[i++].start_address = VMEMMAP_BASE;
284
#endif
285
#else /* !CONFIG_PPC64 */
286
address_markers[i++].start_address = ioremap_bot;
287
address_markers[i++].start_address = IOREMAP_TOP;
288
#ifdef CONFIG_HIGHMEM
289
address_markers[i++].start_address = PKMAP_BASE;
290
address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
291
#endif
292
address_markers[i++].start_address = FIXADDR_START;
293
address_markers[i++].start_address = FIXADDR_TOP;
294
#endif /* CONFIG_PPC64 */
295
#ifdef CONFIG_KASAN
296
address_markers[i++].start_address = KASAN_SHADOW_START;
297
address_markers[i++].start_address = KASAN_SHADOW_END;
298
#endif
299
}
300
301
static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
302
{
303
note_page(pt_st, addr, 4, pte_val(pte));
304
}
305
306
static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
307
{
308
note_page(pt_st, addr, 3, pmd_val(pmd));
309
}
310
311
static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
312
{
313
note_page(pt_st, addr, 2, pud_val(pud));
314
}
315
316
static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
317
{
318
note_page(pt_st, addr, 1, p4d_val(p4d));
319
}
320
321
static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
322
{
323
note_page(pt_st, addr, 0, pgd_val(pgd));
324
}
325
326
static void note_page_flush(struct ptdump_state *pt_st)
327
{
328
pte_t pte_zero = {0};
329
330
note_page(pt_st, 0, -1, pte_val(pte_zero));
331
}
332
333
static int ptdump_show(struct seq_file *m, void *v)
334
{
335
struct pg_state st = {
336
.seq = m,
337
.marker = address_markers,
338
.level = -1,
339
.ptdump = {
340
.note_page_pte = note_page_pte,
341
.note_page_pmd = note_page_pmd,
342
.note_page_pud = note_page_pud,
343
.note_page_p4d = note_page_p4d,
344
.note_page_pgd = note_page_pgd,
345
.note_page_flush = note_page_flush,
346
.range = ptdump_range,
347
}
348
};
349
350
/* Traverse kernel page tables */
351
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
352
return 0;
353
}
354
355
DEFINE_SHOW_ATTRIBUTE(ptdump);
356
357
static void __init build_pgtable_complete_mask(void)
358
{
359
unsigned int i, j;
360
361
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
362
if (pg_level[i].flag)
363
for (j = 0; j < pg_level[i].num; j++)
364
pg_level[i].mask |= pg_level[i].flag[j].mask;
365
}
366
367
bool ptdump_check_wx(void)
368
{
369
struct pg_state st = {
370
.seq = NULL,
371
.marker = (struct addr_marker[]) {
372
{ 0, NULL},
373
{ -1, NULL},
374
},
375
.level = -1,
376
.check_wx = true,
377
.ptdump = {
378
.note_page_pte = note_page_pte,
379
.note_page_pmd = note_page_pmd,
380
.note_page_pud = note_page_pud,
381
.note_page_p4d = note_page_p4d,
382
.note_page_pgd = note_page_pgd,
383
.note_page_flush = note_page_flush,
384
.range = ptdump_range,
385
}
386
};
387
388
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO))
389
return true;
390
391
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
392
393
if (st.wx_pages) {
394
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
395
st.wx_pages);
396
397
return false;
398
} else {
399
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
400
401
return true;
402
}
403
}
404
405
static int __init ptdump_init(void)
406
{
407
#ifdef CONFIG_PPC64
408
if (!radix_enabled())
409
ptdump_range[0].start = KERN_VIRT_START;
410
else
411
ptdump_range[0].start = PAGE_OFFSET;
412
413
ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
414
#endif
415
416
populate_markers();
417
build_pgtable_complete_mask();
418
419
if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
420
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
421
422
return 0;
423
}
424
device_initcall(ptdump_init);
425
426