Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/mm/kmemcheck/kmemcheck.c
10818 views
1
/**
2
* kmemcheck - a heavyweight memory checker for the linux kernel
3
* Copyright (C) 2007, 2008 Vegard Nossum <[email protected]>
4
* (With a lot of help from Ingo Molnar and Pekka Enberg.)
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License (version 2) as
8
* published by the Free Software Foundation.
9
*/
10
11
#include <linux/init.h>
12
#include <linux/interrupt.h>
13
#include <linux/kallsyms.h>
14
#include <linux/kernel.h>
15
#include <linux/kmemcheck.h>
16
#include <linux/mm.h>
17
#include <linux/module.h>
18
#include <linux/page-flags.h>
19
#include <linux/percpu.h>
20
#include <linux/ptrace.h>
21
#include <linux/string.h>
22
#include <linux/types.h>
23
24
#include <asm/cacheflush.h>
25
#include <asm/kmemcheck.h>
26
#include <asm/pgtable.h>
27
#include <asm/tlbflush.h>
28
29
#include "error.h"
30
#include "opcode.h"
31
#include "pte.h"
32
#include "selftest.h"
33
#include "shadow.h"
34
35
36
#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37
# define KMEMCHECK_ENABLED 0
38
#endif
39
40
#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41
# define KMEMCHECK_ENABLED 1
42
#endif
43
44
#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45
# define KMEMCHECK_ENABLED 2
46
#endif
47
48
int kmemcheck_enabled = KMEMCHECK_ENABLED;
49
50
int __init kmemcheck_init(void)
51
{
52
#ifdef CONFIG_SMP
53
/*
54
* Limit SMP to use a single CPU. We rely on the fact that this code
55
* runs before SMP is set up.
56
*/
57
if (setup_max_cpus > 1) {
58
printk(KERN_INFO
59
"kmemcheck: Limiting number of CPUs to 1.\n");
60
setup_max_cpus = 1;
61
}
62
#endif
63
64
if (!kmemcheck_selftest()) {
65
printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66
kmemcheck_enabled = 0;
67
return -EINVAL;
68
}
69
70
printk(KERN_INFO "kmemcheck: Initialized\n");
71
return 0;
72
}
73
74
early_initcall(kmemcheck_init);
75
76
/*
77
* We need to parse the kmemcheck= option before any memory is allocated.
78
*/
79
static int __init param_kmemcheck(char *str)
80
{
81
if (!str)
82
return -EINVAL;
83
84
sscanf(str, "%d", &kmemcheck_enabled);
85
return 0;
86
}
87
88
early_param("kmemcheck", param_kmemcheck);
89
90
int kmemcheck_show_addr(unsigned long address)
91
{
92
pte_t *pte;
93
94
pte = kmemcheck_pte_lookup(address);
95
if (!pte)
96
return 0;
97
98
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
99
__flush_tlb_one(address);
100
return 1;
101
}
102
103
int kmemcheck_hide_addr(unsigned long address)
104
{
105
pte_t *pte;
106
107
pte = kmemcheck_pte_lookup(address);
108
if (!pte)
109
return 0;
110
111
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
112
__flush_tlb_one(address);
113
return 1;
114
}
115
116
struct kmemcheck_context {
117
bool busy;
118
int balance;
119
120
/*
121
* There can be at most two memory operands to an instruction, but
122
* each address can cross a page boundary -- so we may need up to
123
* four addresses that must be hidden/revealed for each fault.
124
*/
125
unsigned long addr[4];
126
unsigned long n_addrs;
127
unsigned long flags;
128
129
/* Data size of the instruction that caused a fault. */
130
unsigned int size;
131
};
132
133
static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
134
135
bool kmemcheck_active(struct pt_regs *regs)
136
{
137
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
138
139
return data->balance > 0;
140
}
141
142
/* Save an address that needs to be shown/hidden */
143
static void kmemcheck_save_addr(unsigned long addr)
144
{
145
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
146
147
BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
148
data->addr[data->n_addrs++] = addr;
149
}
150
151
static unsigned int kmemcheck_show_all(void)
152
{
153
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
154
unsigned int i;
155
unsigned int n;
156
157
n = 0;
158
for (i = 0; i < data->n_addrs; ++i)
159
n += kmemcheck_show_addr(data->addr[i]);
160
161
return n;
162
}
163
164
static unsigned int kmemcheck_hide_all(void)
165
{
166
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
167
unsigned int i;
168
unsigned int n;
169
170
n = 0;
171
for (i = 0; i < data->n_addrs; ++i)
172
n += kmemcheck_hide_addr(data->addr[i]);
173
174
return n;
175
}
176
177
/*
178
* Called from the #PF handler.
179
*/
180
void kmemcheck_show(struct pt_regs *regs)
181
{
182
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
183
184
BUG_ON(!irqs_disabled());
185
186
if (unlikely(data->balance != 0)) {
187
kmemcheck_show_all();
188
kmemcheck_error_save_bug(regs);
189
data->balance = 0;
190
return;
191
}
192
193
/*
194
* None of the addresses actually belonged to kmemcheck. Note that
195
* this is not an error.
196
*/
197
if (kmemcheck_show_all() == 0)
198
return;
199
200
++data->balance;
201
202
/*
203
* The IF needs to be cleared as well, so that the faulting
204
* instruction can run "uninterrupted". Otherwise, we might take
205
* an interrupt and start executing that before we've had a chance
206
* to hide the page again.
207
*
208
* NOTE: In the rare case of multiple faults, we must not override
209
* the original flags:
210
*/
211
if (!(regs->flags & X86_EFLAGS_TF))
212
data->flags = regs->flags;
213
214
regs->flags |= X86_EFLAGS_TF;
215
regs->flags &= ~X86_EFLAGS_IF;
216
}
217
218
/*
219
* Called from the #DB handler.
220
*/
221
void kmemcheck_hide(struct pt_regs *regs)
222
{
223
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
224
int n;
225
226
BUG_ON(!irqs_disabled());
227
228
if (unlikely(data->balance != 1)) {
229
kmemcheck_show_all();
230
kmemcheck_error_save_bug(regs);
231
data->n_addrs = 0;
232
data->balance = 0;
233
234
if (!(data->flags & X86_EFLAGS_TF))
235
regs->flags &= ~X86_EFLAGS_TF;
236
if (data->flags & X86_EFLAGS_IF)
237
regs->flags |= X86_EFLAGS_IF;
238
return;
239
}
240
241
if (kmemcheck_enabled)
242
n = kmemcheck_hide_all();
243
else
244
n = kmemcheck_show_all();
245
246
if (n == 0)
247
return;
248
249
--data->balance;
250
251
data->n_addrs = 0;
252
253
if (!(data->flags & X86_EFLAGS_TF))
254
regs->flags &= ~X86_EFLAGS_TF;
255
if (data->flags & X86_EFLAGS_IF)
256
regs->flags |= X86_EFLAGS_IF;
257
}
258
259
void kmemcheck_show_pages(struct page *p, unsigned int n)
260
{
261
unsigned int i;
262
263
for (i = 0; i < n; ++i) {
264
unsigned long address;
265
pte_t *pte;
266
unsigned int level;
267
268
address = (unsigned long) page_address(&p[i]);
269
pte = lookup_address(address, &level);
270
BUG_ON(!pte);
271
BUG_ON(level != PG_LEVEL_4K);
272
273
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
274
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
275
__flush_tlb_one(address);
276
}
277
}
278
279
bool kmemcheck_page_is_tracked(struct page *p)
280
{
281
/* This will also check the "hidden" flag of the PTE. */
282
return kmemcheck_pte_lookup((unsigned long) page_address(p));
283
}
284
285
void kmemcheck_hide_pages(struct page *p, unsigned int n)
286
{
287
unsigned int i;
288
289
for (i = 0; i < n; ++i) {
290
unsigned long address;
291
pte_t *pte;
292
unsigned int level;
293
294
address = (unsigned long) page_address(&p[i]);
295
pte = lookup_address(address, &level);
296
BUG_ON(!pte);
297
BUG_ON(level != PG_LEVEL_4K);
298
299
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
300
set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
301
__flush_tlb_one(address);
302
}
303
}
304
305
/* Access may NOT cross page boundary */
306
static void kmemcheck_read_strict(struct pt_regs *regs,
307
unsigned long addr, unsigned int size)
308
{
309
void *shadow;
310
enum kmemcheck_shadow status;
311
312
shadow = kmemcheck_shadow_lookup(addr);
313
if (!shadow)
314
return;
315
316
kmemcheck_save_addr(addr);
317
status = kmemcheck_shadow_test(shadow, size);
318
if (status == KMEMCHECK_SHADOW_INITIALIZED)
319
return;
320
321
if (kmemcheck_enabled)
322
kmemcheck_error_save(status, addr, size, regs);
323
324
if (kmemcheck_enabled == 2)
325
kmemcheck_enabled = 0;
326
327
/* Don't warn about it again. */
328
kmemcheck_shadow_set(shadow, size);
329
}
330
331
bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
332
{
333
enum kmemcheck_shadow status;
334
void *shadow;
335
336
shadow = kmemcheck_shadow_lookup(addr);
337
if (!shadow)
338
return true;
339
340
status = kmemcheck_shadow_test_all(shadow, size);
341
342
return status == KMEMCHECK_SHADOW_INITIALIZED;
343
}
344
345
/* Access may cross page boundary */
346
static void kmemcheck_read(struct pt_regs *regs,
347
unsigned long addr, unsigned int size)
348
{
349
unsigned long page = addr & PAGE_MASK;
350
unsigned long next_addr = addr + size - 1;
351
unsigned long next_page = next_addr & PAGE_MASK;
352
353
if (likely(page == next_page)) {
354
kmemcheck_read_strict(regs, addr, size);
355
return;
356
}
357
358
/*
359
* What we do is basically to split the access across the
360
* two pages and handle each part separately. Yes, this means
361
* that we may now see reads that are 3 + 5 bytes, for
362
* example (and if both are uninitialized, there will be two
363
* reports), but it makes the code a lot simpler.
364
*/
365
kmemcheck_read_strict(regs, addr, next_page - addr);
366
kmemcheck_read_strict(regs, next_page, next_addr - next_page);
367
}
368
369
static void kmemcheck_write_strict(struct pt_regs *regs,
370
unsigned long addr, unsigned int size)
371
{
372
void *shadow;
373
374
shadow = kmemcheck_shadow_lookup(addr);
375
if (!shadow)
376
return;
377
378
kmemcheck_save_addr(addr);
379
kmemcheck_shadow_set(shadow, size);
380
}
381
382
static void kmemcheck_write(struct pt_regs *regs,
383
unsigned long addr, unsigned int size)
384
{
385
unsigned long page = addr & PAGE_MASK;
386
unsigned long next_addr = addr + size - 1;
387
unsigned long next_page = next_addr & PAGE_MASK;
388
389
if (likely(page == next_page)) {
390
kmemcheck_write_strict(regs, addr, size);
391
return;
392
}
393
394
/* See comment in kmemcheck_read(). */
395
kmemcheck_write_strict(regs, addr, next_page - addr);
396
kmemcheck_write_strict(regs, next_page, next_addr - next_page);
397
}
398
399
/*
400
* Copying is hard. We have two addresses, each of which may be split across
401
* a page (and each page will have different shadow addresses).
402
*/
403
static void kmemcheck_copy(struct pt_regs *regs,
404
unsigned long src_addr, unsigned long dst_addr, unsigned int size)
405
{
406
uint8_t shadow[8];
407
enum kmemcheck_shadow status;
408
409
unsigned long page;
410
unsigned long next_addr;
411
unsigned long next_page;
412
413
uint8_t *x;
414
unsigned int i;
415
unsigned int n;
416
417
BUG_ON(size > sizeof(shadow));
418
419
page = src_addr & PAGE_MASK;
420
next_addr = src_addr + size - 1;
421
next_page = next_addr & PAGE_MASK;
422
423
if (likely(page == next_page)) {
424
/* Same page */
425
x = kmemcheck_shadow_lookup(src_addr);
426
if (x) {
427
kmemcheck_save_addr(src_addr);
428
for (i = 0; i < size; ++i)
429
shadow[i] = x[i];
430
} else {
431
for (i = 0; i < size; ++i)
432
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
433
}
434
} else {
435
n = next_page - src_addr;
436
BUG_ON(n > sizeof(shadow));
437
438
/* First page */
439
x = kmemcheck_shadow_lookup(src_addr);
440
if (x) {
441
kmemcheck_save_addr(src_addr);
442
for (i = 0; i < n; ++i)
443
shadow[i] = x[i];
444
} else {
445
/* Not tracked */
446
for (i = 0; i < n; ++i)
447
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
448
}
449
450
/* Second page */
451
x = kmemcheck_shadow_lookup(next_page);
452
if (x) {
453
kmemcheck_save_addr(next_page);
454
for (i = n; i < size; ++i)
455
shadow[i] = x[i - n];
456
} else {
457
/* Not tracked */
458
for (i = n; i < size; ++i)
459
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
460
}
461
}
462
463
page = dst_addr & PAGE_MASK;
464
next_addr = dst_addr + size - 1;
465
next_page = next_addr & PAGE_MASK;
466
467
if (likely(page == next_page)) {
468
/* Same page */
469
x = kmemcheck_shadow_lookup(dst_addr);
470
if (x) {
471
kmemcheck_save_addr(dst_addr);
472
for (i = 0; i < size; ++i) {
473
x[i] = shadow[i];
474
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
475
}
476
}
477
} else {
478
n = next_page - dst_addr;
479
BUG_ON(n > sizeof(shadow));
480
481
/* First page */
482
x = kmemcheck_shadow_lookup(dst_addr);
483
if (x) {
484
kmemcheck_save_addr(dst_addr);
485
for (i = 0; i < n; ++i) {
486
x[i] = shadow[i];
487
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
488
}
489
}
490
491
/* Second page */
492
x = kmemcheck_shadow_lookup(next_page);
493
if (x) {
494
kmemcheck_save_addr(next_page);
495
for (i = n; i < size; ++i) {
496
x[i - n] = shadow[i];
497
shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
498
}
499
}
500
}
501
502
status = kmemcheck_shadow_test(shadow, size);
503
if (status == KMEMCHECK_SHADOW_INITIALIZED)
504
return;
505
506
if (kmemcheck_enabled)
507
kmemcheck_error_save(status, src_addr, size, regs);
508
509
if (kmemcheck_enabled == 2)
510
kmemcheck_enabled = 0;
511
}
512
513
enum kmemcheck_method {
514
KMEMCHECK_READ,
515
KMEMCHECK_WRITE,
516
};
517
518
static void kmemcheck_access(struct pt_regs *regs,
519
unsigned long fallback_address, enum kmemcheck_method fallback_method)
520
{
521
const uint8_t *insn;
522
const uint8_t *insn_primary;
523
unsigned int size;
524
525
struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
526
527
/* Recursive fault -- ouch. */
528
if (data->busy) {
529
kmemcheck_show_addr(fallback_address);
530
kmemcheck_error_save_bug(regs);
531
return;
532
}
533
534
data->busy = true;
535
536
insn = (const uint8_t *) regs->ip;
537
insn_primary = kmemcheck_opcode_get_primary(insn);
538
539
kmemcheck_opcode_decode(insn, &size);
540
541
switch (insn_primary[0]) {
542
#ifdef CONFIG_KMEMCHECK_BITOPS_OK
543
/* AND, OR, XOR */
544
/*
545
* Unfortunately, these instructions have to be excluded from
546
* our regular checking since they access only some (and not
547
* all) bits. This clears out "bogus" bitfield-access warnings.
548
*/
549
case 0x80:
550
case 0x81:
551
case 0x82:
552
case 0x83:
553
switch ((insn_primary[1] >> 3) & 7) {
554
/* OR */
555
case 1:
556
/* AND */
557
case 4:
558
/* XOR */
559
case 6:
560
kmemcheck_write(regs, fallback_address, size);
561
goto out;
562
563
/* ADD */
564
case 0:
565
/* ADC */
566
case 2:
567
/* SBB */
568
case 3:
569
/* SUB */
570
case 5:
571
/* CMP */
572
case 7:
573
break;
574
}
575
break;
576
#endif
577
578
/* MOVS, MOVSB, MOVSW, MOVSD */
579
case 0xa4:
580
case 0xa5:
581
/*
582
* These instructions are special because they take two
583
* addresses, but we only get one page fault.
584
*/
585
kmemcheck_copy(regs, regs->si, regs->di, size);
586
goto out;
587
588
/* CMPS, CMPSB, CMPSW, CMPSD */
589
case 0xa6:
590
case 0xa7:
591
kmemcheck_read(regs, regs->si, size);
592
kmemcheck_read(regs, regs->di, size);
593
goto out;
594
}
595
596
/*
597
* If the opcode isn't special in any way, we use the data from the
598
* page fault handler to determine the address and type of memory
599
* access.
600
*/
601
switch (fallback_method) {
602
case KMEMCHECK_READ:
603
kmemcheck_read(regs, fallback_address, size);
604
goto out;
605
case KMEMCHECK_WRITE:
606
kmemcheck_write(regs, fallback_address, size);
607
goto out;
608
}
609
610
out:
611
data->busy = false;
612
}
613
614
bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
615
unsigned long error_code)
616
{
617
pte_t *pte;
618
619
/*
620
* XXX: Is it safe to assume that memory accesses from virtual 86
621
* mode or non-kernel code segments will _never_ access kernel
622
* memory (e.g. tracked pages)? For now, we need this to avoid
623
* invoking kmemcheck for PnP BIOS calls.
624
*/
625
if (regs->flags & X86_VM_MASK)
626
return false;
627
if (regs->cs != __KERNEL_CS)
628
return false;
629
630
pte = kmemcheck_pte_lookup(address);
631
if (!pte)
632
return false;
633
634
WARN_ON_ONCE(in_nmi());
635
636
if (error_code & 2)
637
kmemcheck_access(regs, address, KMEMCHECK_WRITE);
638
else
639
kmemcheck_access(regs, address, KMEMCHECK_READ);
640
641
kmemcheck_show(regs);
642
return true;
643
}
644
645
bool kmemcheck_trap(struct pt_regs *regs)
646
{
647
if (!kmemcheck_active(regs))
648
return false;
649
650
/* We're done. */
651
kmemcheck_hide(regs);
652
return true;
653
}
654
655