Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/mm/kfence/core.c
48890 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* KFENCE guarded object allocator and fault handling.
4
*
5
* Copyright (C) 2020, Google LLC.
6
*/
7
8
#define pr_fmt(fmt) "kfence: " fmt
9
10
#include <linux/atomic.h>
11
#include <linux/bug.h>
12
#include <linux/debugfs.h>
13
#include <linux/hash.h>
14
#include <linux/irq_work.h>
15
#include <linux/jhash.h>
16
#include <linux/kcsan-checks.h>
17
#include <linux/kfence.h>
18
#include <linux/kmemleak.h>
19
#include <linux/list.h>
20
#include <linux/lockdep.h>
21
#include <linux/log2.h>
22
#include <linux/memblock.h>
23
#include <linux/moduleparam.h>
24
#include <linux/nodemask.h>
25
#include <linux/notifier.h>
26
#include <linux/panic_notifier.h>
27
#include <linux/random.h>
28
#include <linux/rcupdate.h>
29
#include <linux/reboot.h>
30
#include <linux/sched/clock.h>
31
#include <linux/seq_file.h>
32
#include <linux/slab.h>
33
#include <linux/spinlock.h>
34
#include <linux/string.h>
35
36
#include <asm/kfence.h>
37
38
#include "kfence.h"
39
40
/* Disables KFENCE on the first warning assuming an irrecoverable error. */
41
#define KFENCE_WARN_ON(cond) \
42
({ \
43
const bool __cond = WARN_ON(cond); \
44
if (unlikely(__cond)) { \
45
WRITE_ONCE(kfence_enabled, false); \
46
disabled_by_warn = true; \
47
} \
48
__cond; \
49
})
50
51
/* === Data ================================================================= */
52
53
static bool kfence_enabled __read_mostly;
54
static bool disabled_by_warn __read_mostly;
55
56
unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
57
EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
58
59
#ifdef MODULE_PARAM_PREFIX
60
#undef MODULE_PARAM_PREFIX
61
#endif
62
#define MODULE_PARAM_PREFIX "kfence."
63
64
static int kfence_enable_late(void);
65
static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
66
{
67
unsigned long num;
68
int ret = kstrtoul(val, 0, &num);
69
70
if (ret < 0)
71
return ret;
72
73
/* Using 0 to indicate KFENCE is disabled. */
74
if (!num && READ_ONCE(kfence_enabled)) {
75
pr_info("disabled\n");
76
WRITE_ONCE(kfence_enabled, false);
77
}
78
79
*((unsigned long *)kp->arg) = num;
80
81
if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
82
return disabled_by_warn ? -EINVAL : kfence_enable_late();
83
return 0;
84
}
85
86
static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
87
{
88
if (!READ_ONCE(kfence_enabled))
89
return sprintf(buffer, "0\n");
90
91
return param_get_ulong(buffer, kp);
92
}
93
94
static const struct kernel_param_ops sample_interval_param_ops = {
95
.set = param_set_sample_interval,
96
.get = param_get_sample_interval,
97
};
98
module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
99
100
/* Pool usage% threshold when currently covered allocations are skipped. */
101
static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
102
module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
103
104
/* Allocation burst count: number of excess KFENCE allocations per sample. */
105
static unsigned int kfence_burst __read_mostly;
106
module_param_named(burst, kfence_burst, uint, 0644);
107
108
/* If true, use a deferrable timer. */
109
static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
110
module_param_named(deferrable, kfence_deferrable, bool, 0444);
111
112
/* If true, check all canary bytes on panic. */
113
static bool kfence_check_on_panic __read_mostly;
114
module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444);
115
116
/* The pool of pages used for guard pages and objects. */
117
char *__kfence_pool __read_mostly;
118
EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
119
120
/*
121
* Per-object metadata, with one-to-one mapping of object metadata to
122
* backing pages (in __kfence_pool).
123
*/
124
static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
125
struct kfence_metadata *kfence_metadata __read_mostly;
126
127
/*
128
* If kfence_metadata is not NULL, it may be accessed by kfence_shutdown_cache().
129
* So introduce kfence_metadata_init to initialize metadata, and then make
130
* kfence_metadata visible after initialization is successful. This prevents
131
* potential UAF or access to uninitialized metadata.
132
*/
133
static struct kfence_metadata *kfence_metadata_init __read_mostly;
134
135
/* Freelist with available objects. */
136
static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
137
static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
138
139
/*
140
* The static key to set up a KFENCE allocation; or if static keys are not used
141
* to gate allocations, to avoid a load and compare if KFENCE is disabled.
142
*/
143
DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
144
145
/* Gates the allocation, ensuring only one succeeds in a given period. */
146
atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
147
148
/*
149
* A Counting Bloom filter of allocation coverage: limits currently covered
150
* allocations of the same source filling up the pool.
151
*
152
* Assuming a range of 15%-85% unique allocations in the pool at any point in
153
* time, the below parameters provide a probablity of 0.02-0.33 for false
154
* positive hits respectively:
155
*
156
* P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM
157
*/
158
#define ALLOC_COVERED_HNUM 2
159
#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
160
#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
161
#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
162
#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
163
static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
164
165
/* Stack depth used to determine uniqueness of an allocation. */
166
#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
167
168
/*
169
* Randomness for stack hashes, making the same collisions across reboots and
170
* different machines less likely.
171
*/
172
static u32 stack_hash_seed __ro_after_init;
173
174
/* Statistics counters for debugfs. */
175
enum kfence_counter_id {
176
KFENCE_COUNTER_ALLOCATED,
177
KFENCE_COUNTER_ALLOCS,
178
KFENCE_COUNTER_FREES,
179
KFENCE_COUNTER_ZOMBIES,
180
KFENCE_COUNTER_BUGS,
181
KFENCE_COUNTER_SKIP_INCOMPAT,
182
KFENCE_COUNTER_SKIP_CAPACITY,
183
KFENCE_COUNTER_SKIP_COVERED,
184
KFENCE_COUNTER_COUNT,
185
};
186
static atomic_long_t counters[KFENCE_COUNTER_COUNT];
187
static const char *const counter_names[] = {
188
[KFENCE_COUNTER_ALLOCATED] = "currently allocated",
189
[KFENCE_COUNTER_ALLOCS] = "total allocations",
190
[KFENCE_COUNTER_FREES] = "total frees",
191
[KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
192
[KFENCE_COUNTER_BUGS] = "total bugs",
193
[KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
194
[KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
195
[KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
196
};
197
static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
198
199
/* === Internals ============================================================ */
200
201
static inline bool should_skip_covered(void)
202
{
203
unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
204
205
return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
206
}
207
208
static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
209
{
210
num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
211
num_entries = filter_irq_stacks(stack_entries, num_entries);
212
return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
213
}
214
215
/*
216
* Adds (or subtracts) count @val for allocation stack trace hash
217
* @alloc_stack_hash from Counting Bloom filter.
218
*/
219
static void alloc_covered_add(u32 alloc_stack_hash, int val)
220
{
221
int i;
222
223
for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
224
atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
225
alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
226
}
227
}
228
229
/*
230
* Returns true if the allocation stack trace hash @alloc_stack_hash is
231
* currently contained (non-zero count) in Counting Bloom filter.
232
*/
233
static bool alloc_covered_contains(u32 alloc_stack_hash)
234
{
235
int i;
236
237
for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
238
if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
239
return false;
240
alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
241
}
242
243
return true;
244
}
245
246
static bool kfence_protect(unsigned long addr)
247
{
248
return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
249
}
250
251
static bool kfence_unprotect(unsigned long addr)
252
{
253
return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
254
}
255
256
static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
257
{
258
unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
259
unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
260
261
/* The checks do not affect performance; only called from slow-paths. */
262
263
/* Only call with a pointer into kfence_metadata. */
264
if (KFENCE_WARN_ON(meta < kfence_metadata ||
265
meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
266
return 0;
267
268
/*
269
* This metadata object only ever maps to 1 page; verify that the stored
270
* address is in the expected range.
271
*/
272
if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
273
return 0;
274
275
return pageaddr;
276
}
277
278
static inline bool kfence_obj_allocated(const struct kfence_metadata *meta)
279
{
280
enum kfence_object_state state = READ_ONCE(meta->state);
281
282
return state == KFENCE_OBJECT_ALLOCATED || state == KFENCE_OBJECT_RCU_FREEING;
283
}
284
285
/*
286
* Update the object's metadata state, including updating the alloc/free stacks
287
* depending on the state transition.
288
*/
289
static noinline void
290
metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
291
unsigned long *stack_entries, size_t num_stack_entries)
292
{
293
struct kfence_track *track =
294
next == KFENCE_OBJECT_ALLOCATED ? &meta->alloc_track : &meta->free_track;
295
296
lockdep_assert_held(&meta->lock);
297
298
/* Stack has been saved when calling rcu, skip. */
299
if (READ_ONCE(meta->state) == KFENCE_OBJECT_RCU_FREEING)
300
goto out;
301
302
if (stack_entries) {
303
memcpy(track->stack_entries, stack_entries,
304
num_stack_entries * sizeof(stack_entries[0]));
305
} else {
306
/*
307
* Skip over 1 (this) functions; noinline ensures we do not
308
* accidentally skip over the caller by never inlining.
309
*/
310
num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
311
}
312
track->num_stack_entries = num_stack_entries;
313
track->pid = task_pid_nr(current);
314
track->cpu = raw_smp_processor_id();
315
track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
316
317
out:
318
/*
319
* Pairs with READ_ONCE() in
320
* kfence_shutdown_cache(),
321
* kfence_handle_page_fault().
322
*/
323
WRITE_ONCE(meta->state, next);
324
}
325
326
#ifdef CONFIG_KMSAN
327
#define check_canary_attributes noinline __no_kmsan_checks
328
#else
329
#define check_canary_attributes inline
330
#endif
331
332
/* Check canary byte at @addr. */
333
static check_canary_attributes bool check_canary_byte(u8 *addr)
334
{
335
struct kfence_metadata *meta;
336
unsigned long flags;
337
338
if (likely(*addr == KFENCE_CANARY_PATTERN_U8(addr)))
339
return true;
340
341
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
342
343
meta = addr_to_metadata((unsigned long)addr);
344
raw_spin_lock_irqsave(&meta->lock, flags);
345
kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION);
346
raw_spin_unlock_irqrestore(&meta->lock, flags);
347
348
return false;
349
}
350
351
static inline void set_canary(const struct kfence_metadata *meta)
352
{
353
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
354
unsigned long addr = pageaddr;
355
356
/*
357
* The canary may be written to part of the object memory, but it does
358
* not affect it. The user should initialize the object before using it.
359
*/
360
for (; addr < meta->addr; addr += sizeof(u64))
361
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
362
363
addr = ALIGN_DOWN(meta->addr + meta->size, sizeof(u64));
364
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64))
365
*((u64 *)addr) = KFENCE_CANARY_PATTERN_U64;
366
}
367
368
static check_canary_attributes void
369
check_canary(const struct kfence_metadata *meta)
370
{
371
const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
372
unsigned long addr = pageaddr;
373
374
/*
375
* We'll iterate over each canary byte per-side until a corrupted byte
376
* is found. However, we'll still iterate over the canary bytes to the
377
* right of the object even if there was an error in the canary bytes to
378
* the left of the object. Specifically, if check_canary_byte()
379
* generates an error, showing both sides might give more clues as to
380
* what the error is about when displaying which bytes were corrupted.
381
*/
382
383
/* Apply to left of object. */
384
for (; meta->addr - addr >= sizeof(u64); addr += sizeof(u64)) {
385
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64))
386
break;
387
}
388
389
/*
390
* If the canary is corrupted in a certain 64 bytes, or the canary
391
* memory cannot be completely covered by multiple consecutive 64 bytes,
392
* it needs to be checked one by one.
393
*/
394
for (; addr < meta->addr; addr++) {
395
if (unlikely(!check_canary_byte((u8 *)addr)))
396
break;
397
}
398
399
/* Apply to right of object. */
400
for (addr = meta->addr + meta->size; addr % sizeof(u64) != 0; addr++) {
401
if (unlikely(!check_canary_byte((u8 *)addr)))
402
return;
403
}
404
for (; addr - pageaddr < PAGE_SIZE; addr += sizeof(u64)) {
405
if (unlikely(*((u64 *)addr) != KFENCE_CANARY_PATTERN_U64)) {
406
407
for (; addr - pageaddr < PAGE_SIZE; addr++) {
408
if (!check_canary_byte((u8 *)addr))
409
return;
410
}
411
}
412
}
413
}
414
415
static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
416
unsigned long *stack_entries, size_t num_stack_entries,
417
u32 alloc_stack_hash)
418
{
419
struct kfence_metadata *meta = NULL;
420
unsigned long flags;
421
struct slab *slab;
422
void *addr;
423
const bool random_right_allocate = get_random_u32_below(2);
424
const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
425
!get_random_u32_below(CONFIG_KFENCE_STRESS_TEST_FAULTS);
426
427
/* Try to obtain a free object. */
428
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
429
if (!list_empty(&kfence_freelist)) {
430
meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
431
list_del_init(&meta->list);
432
}
433
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
434
if (!meta) {
435
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
436
return NULL;
437
}
438
439
if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
440
/*
441
* This is extremely unlikely -- we are reporting on a
442
* use-after-free, which locked meta->lock, and the reporting
443
* code via printk calls kmalloc() which ends up in
444
* kfence_alloc() and tries to grab the same object that we're
445
* reporting on. While it has never been observed, lockdep does
446
* report that there is a possibility of deadlock. Fix it by
447
* using trylock and bailing out gracefully.
448
*/
449
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
450
/* Put the object back on the freelist. */
451
list_add_tail(&meta->list, &kfence_freelist);
452
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
453
454
return NULL;
455
}
456
457
meta->addr = metadata_to_pageaddr(meta);
458
/* Unprotect if we're reusing this page. */
459
if (meta->state == KFENCE_OBJECT_FREED)
460
kfence_unprotect(meta->addr);
461
462
/*
463
* Note: for allocations made before RNG initialization, will always
464
* return zero. We still benefit from enabling KFENCE as early as
465
* possible, even when the RNG is not yet available, as this will allow
466
* KFENCE to detect bugs due to earlier allocations. The only downside
467
* is that the out-of-bounds accesses detected are deterministic for
468
* such allocations.
469
*/
470
if (random_right_allocate) {
471
/* Allocate on the "right" side, re-calculate address. */
472
meta->addr += PAGE_SIZE - size;
473
meta->addr = ALIGN_DOWN(meta->addr, cache->align);
474
}
475
476
addr = (void *)meta->addr;
477
478
/* Update remaining metadata. */
479
metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
480
/* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
481
WRITE_ONCE(meta->cache, cache);
482
meta->size = size;
483
meta->alloc_stack_hash = alloc_stack_hash;
484
raw_spin_unlock_irqrestore(&meta->lock, flags);
485
486
alloc_covered_add(alloc_stack_hash, 1);
487
488
/* Set required slab fields. */
489
slab = virt_to_slab((void *)meta->addr);
490
slab->slab_cache = cache;
491
slab->objects = 1;
492
493
/* Memory initialization. */
494
set_canary(meta);
495
496
/*
497
* We check slab_want_init_on_alloc() ourselves, rather than letting
498
* SL*B do the initialization, as otherwise we might overwrite KFENCE's
499
* redzone.
500
*/
501
if (unlikely(slab_want_init_on_alloc(gfp, cache)))
502
memzero_explicit(addr, size);
503
if (cache->ctor)
504
cache->ctor(addr);
505
506
if (random_fault)
507
kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
508
509
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
510
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
511
512
return addr;
513
}
514
515
static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
516
{
517
struct kcsan_scoped_access assert_page_exclusive;
518
unsigned long flags;
519
bool init;
520
521
raw_spin_lock_irqsave(&meta->lock, flags);
522
523
if (!kfence_obj_allocated(meta) || meta->addr != (unsigned long)addr) {
524
/* Invalid or double-free, bail out. */
525
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
526
kfence_report_error((unsigned long)addr, false, NULL, meta,
527
KFENCE_ERROR_INVALID_FREE);
528
raw_spin_unlock_irqrestore(&meta->lock, flags);
529
return;
530
}
531
532
/* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */
533
kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
534
KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
535
&assert_page_exclusive);
536
537
if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
538
kfence_unprotect((unsigned long)addr); /* To check canary bytes. */
539
540
/* Restore page protection if there was an OOB access. */
541
if (meta->unprotected_page) {
542
memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
543
kfence_protect(meta->unprotected_page);
544
meta->unprotected_page = 0;
545
}
546
547
/* Mark the object as freed. */
548
metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
549
init = slab_want_init_on_free(meta->cache);
550
raw_spin_unlock_irqrestore(&meta->lock, flags);
551
552
alloc_covered_add(meta->alloc_stack_hash, -1);
553
554
/* Check canary bytes for memory corruption. */
555
check_canary(meta);
556
557
/*
558
* Clear memory if init-on-free is set. While we protect the page, the
559
* data is still there, and after a use-after-free is detected, we
560
* unprotect the page, so the data is still accessible.
561
*/
562
if (!zombie && unlikely(init))
563
memzero_explicit(addr, meta->size);
564
565
/* Protect to detect use-after-frees. */
566
kfence_protect((unsigned long)addr);
567
568
kcsan_end_scoped_access(&assert_page_exclusive);
569
if (!zombie) {
570
/* Add it to the tail of the freelist for reuse. */
571
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
572
KFENCE_WARN_ON(!list_empty(&meta->list));
573
list_add_tail(&meta->list, &kfence_freelist);
574
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
575
576
atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
577
atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
578
} else {
579
/* See kfence_shutdown_cache(). */
580
atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
581
}
582
}
583
584
static void rcu_guarded_free(struct rcu_head *h)
585
{
586
struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
587
588
kfence_guarded_free((void *)meta->addr, meta, false);
589
}
590
591
/*
592
* Initialization of the KFENCE pool after its allocation.
593
* Returns 0 on success; otherwise returns the address up to
594
* which partial initialization succeeded.
595
*/
596
static unsigned long kfence_init_pool(void)
597
{
598
unsigned long addr, start_pfn;
599
int i, rand;
600
601
if (!arch_kfence_init_pool())
602
return (unsigned long)__kfence_pool;
603
604
addr = (unsigned long)__kfence_pool;
605
start_pfn = PHYS_PFN(virt_to_phys(__kfence_pool));
606
607
/*
608
* Set up object pages: they must have PGTY_slab set to avoid freeing
609
* them as real pages.
610
*
611
* We also want to avoid inserting kfence_free() in the kfree()
612
* fast-path in SLUB, and therefore need to ensure kfree() correctly
613
* enters __slab_free() slow-path.
614
*/
615
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
616
struct page *page;
617
618
if (!i || (i % 2))
619
continue;
620
621
page = pfn_to_page(start_pfn + i);
622
__SetPageSlab(page);
623
#ifdef CONFIG_MEMCG
624
struct slab *slab = page_slab(page);
625
slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts |
626
MEMCG_DATA_OBJEXTS;
627
#endif
628
}
629
630
/*
631
* Protect the first 2 pages. The first page is mostly unnecessary, and
632
* merely serves as an extended guard page. However, adding one
633
* additional page in the beginning gives us an even number of pages,
634
* which simplifies the mapping of address to metadata index.
635
*/
636
for (i = 0; i < 2; i++) {
637
if (unlikely(!kfence_protect(addr)))
638
return addr;
639
640
addr += PAGE_SIZE;
641
}
642
643
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
644
struct kfence_metadata *meta = &kfence_metadata_init[i];
645
646
/* Initialize metadata. */
647
INIT_LIST_HEAD(&meta->list);
648
raw_spin_lock_init(&meta->lock);
649
meta->state = KFENCE_OBJECT_UNUSED;
650
/* Use addr to randomize the freelist. */
651
meta->addr = i;
652
653
/* Protect the right redzone. */
654
if (unlikely(!kfence_protect(addr + 2 * i * PAGE_SIZE + PAGE_SIZE)))
655
goto reset_slab;
656
}
657
658
for (i = CONFIG_KFENCE_NUM_OBJECTS; i > 0; i--) {
659
rand = get_random_u32_below(i);
660
swap(kfence_metadata_init[i - 1].addr, kfence_metadata_init[rand].addr);
661
}
662
663
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
664
struct kfence_metadata *meta_1 = &kfence_metadata_init[i];
665
struct kfence_metadata *meta_2 = &kfence_metadata_init[meta_1->addr];
666
667
list_add_tail(&meta_2->list, &kfence_freelist);
668
}
669
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
670
kfence_metadata_init[i].addr = addr;
671
addr += 2 * PAGE_SIZE;
672
}
673
674
/*
675
* Make kfence_metadata visible only when initialization is successful.
676
* Otherwise, if the initialization fails and kfence_metadata is freed,
677
* it may cause UAF in kfence_shutdown_cache().
678
*/
679
smp_store_release(&kfence_metadata, kfence_metadata_init);
680
return 0;
681
682
reset_slab:
683
addr += 2 * i * PAGE_SIZE;
684
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
685
struct page *page;
686
687
if (!i || (i % 2))
688
continue;
689
690
page = pfn_to_page(start_pfn + i);
691
#ifdef CONFIG_MEMCG
692
struct slab *slab = page_slab(page);
693
slab->obj_exts = 0;
694
#endif
695
__ClearPageSlab(page);
696
}
697
698
return addr;
699
}
700
701
static bool __init kfence_init_pool_early(void)
702
{
703
unsigned long addr;
704
705
if (!__kfence_pool)
706
return false;
707
708
addr = kfence_init_pool();
709
710
if (!addr) {
711
/*
712
* The pool is live and will never be deallocated from this point on.
713
* Ignore the pool object from the kmemleak phys object tree, as it would
714
* otherwise overlap with allocations returned by kfence_alloc(), which
715
* are registered with kmemleak through the slab post-alloc hook.
716
*/
717
kmemleak_ignore_phys(__pa(__kfence_pool));
718
return true;
719
}
720
721
/*
722
* Only release unprotected pages, and do not try to go back and change
723
* page attributes due to risk of failing to do so as well. If changing
724
* page attributes for some pages fails, it is very likely that it also
725
* fails for the first page, and therefore expect addr==__kfence_pool in
726
* most failure cases.
727
*/
728
memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
729
__kfence_pool = NULL;
730
731
memblock_free_late(__pa(kfence_metadata_init), KFENCE_METADATA_SIZE);
732
kfence_metadata_init = NULL;
733
734
return false;
735
}
736
737
/* === DebugFS Interface ==================================================== */
738
739
static int stats_show(struct seq_file *seq, void *v)
740
{
741
int i;
742
743
seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
744
for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
745
seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
746
747
return 0;
748
}
749
DEFINE_SHOW_ATTRIBUTE(stats);
750
751
/*
752
* debugfs seq_file operations for /sys/kernel/debug/kfence/objects.
753
* start_object() and next_object() return the object index + 1, because NULL is used
754
* to stop iteration.
755
*/
756
static void *start_object(struct seq_file *seq, loff_t *pos)
757
{
758
if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
759
return (void *)((long)*pos + 1);
760
return NULL;
761
}
762
763
static void stop_object(struct seq_file *seq, void *v)
764
{
765
}
766
767
static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
768
{
769
++*pos;
770
if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
771
return (void *)((long)*pos + 1);
772
return NULL;
773
}
774
775
static int show_object(struct seq_file *seq, void *v)
776
{
777
struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
778
unsigned long flags;
779
780
raw_spin_lock_irqsave(&meta->lock, flags);
781
kfence_print_object(seq, meta);
782
raw_spin_unlock_irqrestore(&meta->lock, flags);
783
seq_puts(seq, "---------------------------------\n");
784
785
return 0;
786
}
787
788
static const struct seq_operations objects_sops = {
789
.start = start_object,
790
.next = next_object,
791
.stop = stop_object,
792
.show = show_object,
793
};
794
DEFINE_SEQ_ATTRIBUTE(objects);
795
796
static int kfence_debugfs_init(void)
797
{
798
struct dentry *kfence_dir;
799
800
if (!READ_ONCE(kfence_enabled))
801
return 0;
802
803
kfence_dir = debugfs_create_dir("kfence", NULL);
804
debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
805
debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
806
return 0;
807
}
808
809
late_initcall(kfence_debugfs_init);
810
811
/* === Panic Notifier ====================================================== */
812
813
static void kfence_check_all_canary(void)
814
{
815
int i;
816
817
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
818
struct kfence_metadata *meta = &kfence_metadata[i];
819
820
if (kfence_obj_allocated(meta))
821
check_canary(meta);
822
}
823
}
824
825
static int kfence_check_canary_callback(struct notifier_block *nb,
826
unsigned long reason, void *arg)
827
{
828
kfence_check_all_canary();
829
return NOTIFY_OK;
830
}
831
832
static struct notifier_block kfence_check_canary_notifier = {
833
.notifier_call = kfence_check_canary_callback,
834
};
835
836
/* === Allocation Gate Timer ================================================ */
837
838
static struct delayed_work kfence_timer;
839
840
#ifdef CONFIG_KFENCE_STATIC_KEYS
841
/* Wait queue to wake up allocation-gate timer task. */
842
static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
843
844
static int kfence_reboot_callback(struct notifier_block *nb,
845
unsigned long action, void *data)
846
{
847
/*
848
* Disable kfence to avoid static keys IPI synchronization during
849
* late shutdown/kexec
850
*/
851
WRITE_ONCE(kfence_enabled, false);
852
/* Cancel any pending timer work */
853
cancel_delayed_work(&kfence_timer);
854
/*
855
* Wake up any blocked toggle_allocation_gate() so it can complete
856
* early while the system is still able to handle IPIs.
857
*/
858
wake_up(&allocation_wait);
859
860
return NOTIFY_OK;
861
}
862
863
static struct notifier_block kfence_reboot_notifier = {
864
.notifier_call = kfence_reboot_callback,
865
.priority = INT_MAX, /* Run early to stop timers ASAP */
866
};
867
868
static void wake_up_kfence_timer(struct irq_work *work)
869
{
870
wake_up(&allocation_wait);
871
}
872
static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
873
#endif
874
875
/*
876
* Set up delayed work, which will enable and disable the static key. We need to
877
* use a work queue (rather than a simple timer), since enabling and disabling a
878
* static key cannot be done from an interrupt.
879
*
880
* Note: Toggling a static branch currently causes IPIs, and here we'll end up
881
* with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with
882
* more aggressive sampling intervals), we could get away with a variant that
883
* avoids IPIs, at the cost of not immediately capturing allocations if the
884
* instructions remain cached.
885
*/
886
static void toggle_allocation_gate(struct work_struct *work)
887
{
888
if (!READ_ONCE(kfence_enabled))
889
return;
890
891
atomic_set(&kfence_allocation_gate, -kfence_burst);
892
#ifdef CONFIG_KFENCE_STATIC_KEYS
893
/* Enable static key, and await allocation to happen. */
894
static_branch_enable(&kfence_allocation_key);
895
896
wait_event_idle(allocation_wait,
897
atomic_read(&kfence_allocation_gate) > 0 ||
898
!READ_ONCE(kfence_enabled));
899
900
/* Disable static key and reset timer. */
901
static_branch_disable(&kfence_allocation_key);
902
#endif
903
queue_delayed_work(system_unbound_wq, &kfence_timer,
904
msecs_to_jiffies(kfence_sample_interval));
905
}
906
907
/* === Public interface ===================================================== */
908
909
void __init kfence_alloc_pool_and_metadata(void)
910
{
911
if (!kfence_sample_interval)
912
return;
913
914
/*
915
* If the pool has already been initialized by arch, there is no need to
916
* re-allocate the memory pool.
917
*/
918
if (!__kfence_pool)
919
__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
920
921
if (!__kfence_pool) {
922
pr_err("failed to allocate pool\n");
923
return;
924
}
925
926
/* The memory allocated by memblock has been zeroed out. */
927
kfence_metadata_init = memblock_alloc(KFENCE_METADATA_SIZE, PAGE_SIZE);
928
if (!kfence_metadata_init) {
929
pr_err("failed to allocate metadata\n");
930
memblock_free(__kfence_pool, KFENCE_POOL_SIZE);
931
__kfence_pool = NULL;
932
}
933
}
934
935
static void kfence_init_enable(void)
936
{
937
if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
938
static_branch_enable(&kfence_allocation_key);
939
940
if (kfence_deferrable)
941
INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate);
942
else
943
INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate);
944
945
if (kfence_check_on_panic)
946
atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier);
947
948
#ifdef CONFIG_KFENCE_STATIC_KEYS
949
register_reboot_notifier(&kfence_reboot_notifier);
950
#endif
951
952
WRITE_ONCE(kfence_enabled, true);
953
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
954
955
pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
956
CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
957
(void *)(__kfence_pool + KFENCE_POOL_SIZE));
958
}
959
960
void __init kfence_init(void)
961
{
962
stack_hash_seed = get_random_u32();
963
964
/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
965
if (!kfence_sample_interval)
966
return;
967
968
if (!kfence_init_pool_early()) {
969
pr_err("%s failed\n", __func__);
970
return;
971
}
972
973
kfence_init_enable();
974
}
975
976
static int kfence_init_late(void)
977
{
978
const unsigned long nr_pages_pool = KFENCE_POOL_SIZE / PAGE_SIZE;
979
const unsigned long nr_pages_meta = KFENCE_METADATA_SIZE / PAGE_SIZE;
980
unsigned long addr = (unsigned long)__kfence_pool;
981
unsigned long free_size = KFENCE_POOL_SIZE;
982
int err = -ENOMEM;
983
984
#ifdef CONFIG_CONTIG_ALLOC
985
struct page *pages;
986
987
pages = alloc_contig_pages(nr_pages_pool, GFP_KERNEL, first_online_node,
988
NULL);
989
if (!pages)
990
return -ENOMEM;
991
992
__kfence_pool = page_to_virt(pages);
993
pages = alloc_contig_pages(nr_pages_meta, GFP_KERNEL, first_online_node,
994
NULL);
995
if (pages)
996
kfence_metadata_init = page_to_virt(pages);
997
#else
998
if (nr_pages_pool > MAX_ORDER_NR_PAGES ||
999
nr_pages_meta > MAX_ORDER_NR_PAGES) {
1000
pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
1001
return -EINVAL;
1002
}
1003
1004
__kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
1005
if (!__kfence_pool)
1006
return -ENOMEM;
1007
1008
kfence_metadata_init = alloc_pages_exact(KFENCE_METADATA_SIZE, GFP_KERNEL);
1009
#endif
1010
1011
if (!kfence_metadata_init)
1012
goto free_pool;
1013
1014
memzero_explicit(kfence_metadata_init, KFENCE_METADATA_SIZE);
1015
addr = kfence_init_pool();
1016
if (!addr) {
1017
kfence_init_enable();
1018
kfence_debugfs_init();
1019
return 0;
1020
}
1021
1022
pr_err("%s failed\n", __func__);
1023
free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
1024
err = -EBUSY;
1025
1026
#ifdef CONFIG_CONTIG_ALLOC
1027
free_contig_range(page_to_pfn(virt_to_page((void *)kfence_metadata_init)),
1028
nr_pages_meta);
1029
free_pool:
1030
free_contig_range(page_to_pfn(virt_to_page((void *)addr)),
1031
free_size / PAGE_SIZE);
1032
#else
1033
free_pages_exact((void *)kfence_metadata_init, KFENCE_METADATA_SIZE);
1034
free_pool:
1035
free_pages_exact((void *)addr, free_size);
1036
#endif
1037
1038
kfence_metadata_init = NULL;
1039
__kfence_pool = NULL;
1040
return err;
1041
}
1042
1043
static int kfence_enable_late(void)
1044
{
1045
if (!__kfence_pool)
1046
return kfence_init_late();
1047
1048
WRITE_ONCE(kfence_enabled, true);
1049
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
1050
pr_info("re-enabled\n");
1051
return 0;
1052
}
1053
1054
void kfence_shutdown_cache(struct kmem_cache *s)
1055
{
1056
unsigned long flags;
1057
struct kfence_metadata *meta;
1058
int i;
1059
1060
/* Pairs with release in kfence_init_pool(). */
1061
if (!smp_load_acquire(&kfence_metadata))
1062
return;
1063
1064
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
1065
bool in_use;
1066
1067
meta = &kfence_metadata[i];
1068
1069
/*
1070
* If we observe some inconsistent cache and state pair where we
1071
* should have returned false here, cache destruction is racing
1072
* with either kmem_cache_alloc() or kmem_cache_free(). Taking
1073
* the lock will not help, as different critical section
1074
* serialization will have the same outcome.
1075
*/
1076
if (READ_ONCE(meta->cache) != s || !kfence_obj_allocated(meta))
1077
continue;
1078
1079
raw_spin_lock_irqsave(&meta->lock, flags);
1080
in_use = meta->cache == s && kfence_obj_allocated(meta);
1081
raw_spin_unlock_irqrestore(&meta->lock, flags);
1082
1083
if (in_use) {
1084
/*
1085
* This cache still has allocations, and we should not
1086
* release them back into the freelist so they can still
1087
* safely be used and retain the kernel's default
1088
* behaviour of keeping the allocations alive (leak the
1089
* cache); however, they effectively become "zombie
1090
* allocations" as the KFENCE objects are the only ones
1091
* still in use and the owning cache is being destroyed.
1092
*
1093
* We mark them freed, so that any subsequent use shows
1094
* more useful error messages that will include stack
1095
* traces of the user of the object, the original
1096
* allocation, and caller to shutdown_cache().
1097
*/
1098
kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true);
1099
}
1100
}
1101
1102
for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
1103
meta = &kfence_metadata[i];
1104
1105
/* See above. */
1106
if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
1107
continue;
1108
1109
raw_spin_lock_irqsave(&meta->lock, flags);
1110
if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
1111
meta->cache = NULL;
1112
raw_spin_unlock_irqrestore(&meta->lock, flags);
1113
}
1114
}
1115
1116
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
1117
{
1118
unsigned long stack_entries[KFENCE_STACK_DEPTH];
1119
size_t num_stack_entries;
1120
u32 alloc_stack_hash;
1121
int allocation_gate;
1122
1123
/*
1124
* Perform size check before switching kfence_allocation_gate, so that
1125
* we don't disable KFENCE without making an allocation.
1126
*/
1127
if (size > PAGE_SIZE) {
1128
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
1129
return NULL;
1130
}
1131
1132
/*
1133
* Skip allocations from non-default zones, including DMA. We cannot
1134
* guarantee that pages in the KFENCE pool will have the requested
1135
* properties (e.g. reside in DMAable memory).
1136
*/
1137
if ((flags & GFP_ZONEMASK) ||
1138
((flags & __GFP_THISNODE) && num_online_nodes() > 1) ||
1139
(s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
1140
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
1141
return NULL;
1142
}
1143
1144
/*
1145
* Skip allocations for this slab, if KFENCE has been disabled for
1146
* this slab.
1147
*/
1148
if (s->flags & SLAB_SKIP_KFENCE)
1149
return NULL;
1150
1151
allocation_gate = atomic_inc_return(&kfence_allocation_gate);
1152
if (allocation_gate > 1)
1153
return NULL;
1154
#ifdef CONFIG_KFENCE_STATIC_KEYS
1155
/*
1156
* waitqueue_active() is fully ordered after the update of
1157
* kfence_allocation_gate per atomic_inc_return().
1158
*/
1159
if (allocation_gate == 1 && waitqueue_active(&allocation_wait)) {
1160
/*
1161
* Calling wake_up() here may deadlock when allocations happen
1162
* from within timer code. Use an irq_work to defer it.
1163
*/
1164
irq_work_queue(&wake_up_kfence_timer_work);
1165
}
1166
#endif
1167
1168
if (!READ_ONCE(kfence_enabled))
1169
return NULL;
1170
1171
num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
1172
1173
/*
1174
* Do expensive check for coverage of allocation in slow-path after
1175
* allocation_gate has already become non-zero, even though it might
1176
* mean not making any allocation within a given sample interval.
1177
*
1178
* This ensures reasonable allocation coverage when the pool is almost
1179
* full, including avoiding long-lived allocations of the same source
1180
* filling up the pool (e.g. pagecache allocations).
1181
*/
1182
alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
1183
if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
1184
atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
1185
return NULL;
1186
}
1187
1188
return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
1189
alloc_stack_hash);
1190
}
1191
1192
size_t kfence_ksize(const void *addr)
1193
{
1194
const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1195
1196
/*
1197
* Read locklessly -- if there is a race with __kfence_alloc(), this is
1198
* either a use-after-free or invalid access.
1199
*/
1200
return meta ? meta->size : 0;
1201
}
1202
1203
void *kfence_object_start(const void *addr)
1204
{
1205
const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1206
1207
/*
1208
* Read locklessly -- if there is a race with __kfence_alloc(), this is
1209
* either a use-after-free or invalid access.
1210
*/
1211
return meta ? (void *)meta->addr : NULL;
1212
}
1213
1214
void __kfence_free(void *addr)
1215
{
1216
struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1217
1218
#ifdef CONFIG_MEMCG
1219
KFENCE_WARN_ON(meta->obj_exts.objcg);
1220
#endif
1221
/*
1222
* If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
1223
* the object, as the object page may be recycled for other-typed
1224
* objects once it has been freed. meta->cache may be NULL if the cache
1225
* was destroyed.
1226
* Save the stack trace here so that reports show where the user freed
1227
* the object.
1228
*/
1229
if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) {
1230
unsigned long flags;
1231
1232
raw_spin_lock_irqsave(&meta->lock, flags);
1233
metadata_update_state(meta, KFENCE_OBJECT_RCU_FREEING, NULL, 0);
1234
raw_spin_unlock_irqrestore(&meta->lock, flags);
1235
call_rcu(&meta->rcu_head, rcu_guarded_free);
1236
} else {
1237
kfence_guarded_free(addr, meta, false);
1238
}
1239
}
1240
1241
bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
1242
{
1243
const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
1244
struct kfence_metadata *to_report = NULL;
1245
enum kfence_error_type error_type;
1246
unsigned long flags;
1247
1248
if (!is_kfence_address((void *)addr))
1249
return false;
1250
1251
if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
1252
return kfence_unprotect(addr); /* ... unprotect and proceed. */
1253
1254
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
1255
1256
if (page_index % 2) {
1257
/* This is a redzone, report a buffer overflow. */
1258
struct kfence_metadata *meta;
1259
int distance = 0;
1260
1261
meta = addr_to_metadata(addr - PAGE_SIZE);
1262
if (meta && kfence_obj_allocated(meta)) {
1263
to_report = meta;
1264
/* Data race ok; distance calculation approximate. */
1265
distance = addr - data_race(meta->addr + meta->size);
1266
}
1267
1268
meta = addr_to_metadata(addr + PAGE_SIZE);
1269
if (meta && kfence_obj_allocated(meta)) {
1270
/* Data race ok; distance calculation approximate. */
1271
if (!to_report || distance > data_race(meta->addr) - addr)
1272
to_report = meta;
1273
}
1274
1275
if (!to_report)
1276
goto out;
1277
1278
raw_spin_lock_irqsave(&to_report->lock, flags);
1279
to_report->unprotected_page = addr;
1280
error_type = KFENCE_ERROR_OOB;
1281
1282
/*
1283
* If the object was freed before we took the look we can still
1284
* report this as an OOB -- the report will simply show the
1285
* stacktrace of the free as well.
1286
*/
1287
} else {
1288
to_report = addr_to_metadata(addr);
1289
if (!to_report)
1290
goto out;
1291
1292
raw_spin_lock_irqsave(&to_report->lock, flags);
1293
error_type = KFENCE_ERROR_UAF;
1294
/*
1295
* We may race with __kfence_alloc(), and it is possible that a
1296
* freed object may be reallocated. We simply report this as a
1297
* use-after-free, with the stack trace showing the place where
1298
* the object was re-allocated.
1299
*/
1300
}
1301
1302
out:
1303
if (to_report) {
1304
kfence_report_error(addr, is_write, regs, to_report, error_type);
1305
raw_spin_unlock_irqrestore(&to_report->lock, flags);
1306
} else {
1307
/* This may be a UAF or OOB access, but we can't be sure. */
1308
kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
1309
}
1310
1311
return kfence_unprotect(addr); /* Unprotect and let access proceed. */
1312
}
1313
1314