Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/hw_breakpoint.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
*
4
* Copyright (C) 2007 Alan Stern
5
* Copyright (C) 2009 IBM Corporation
6
* Copyright (C) 2009 Frederic Weisbecker <[email protected]>
7
*
8
* Authors: Alan Stern <[email protected]>
9
* K.Prasad <[email protected]>
10
* Frederic Weisbecker <[email protected]>
11
*/
12
13
/*
14
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
15
* using the CPU's debug registers.
16
*/
17
18
#include <linux/perf_event.h>
19
#include <linux/hw_breakpoint.h>
20
#include <linux/irqflags.h>
21
#include <linux/notifier.h>
22
#include <linux/kallsyms.h>
23
#include <linux/kprobes.h>
24
#include <linux/percpu.h>
25
#include <linux/kdebug.h>
26
#include <linux/kernel.h>
27
#include <linux/export.h>
28
#include <linux/sched.h>
29
#include <linux/smp.h>
30
31
#include <asm/hw_breakpoint.h>
32
#include <asm/processor.h>
33
#include <asm/debugreg.h>
34
#include <asm/user.h>
35
#include <asm/desc.h>
36
#include <asm/tlbflush.h>
37
38
/* Per cpu debug control register value */
39
DEFINE_PER_CPU(unsigned long, cpu_dr7);
40
EXPORT_PER_CPU_SYMBOL(cpu_dr7);
41
42
/* Per cpu debug address registers values */
43
static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
44
45
/*
46
* Stores the breakpoints currently in use on each breakpoint address
47
* register for each cpus
48
*/
49
static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
50
51
52
static inline unsigned long
53
__encode_dr7(int drnum, unsigned int len, unsigned int type)
54
{
55
unsigned long bp_info;
56
57
bp_info = (len | type) & 0xf;
58
bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
59
bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
60
61
return bp_info;
62
}
63
64
/*
65
* Encode the length, type, Exact, and Enable bits for a particular breakpoint
66
* as stored in debug register 7.
67
*/
68
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
69
{
70
return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN;
71
}
72
73
/*
74
* Decode the length and type bits for a particular breakpoint as
75
* stored in debug register 7. Return the "enabled" status.
76
*/
77
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
78
{
79
int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
80
81
*len = (bp_info & 0xc) | 0x40;
82
*type = (bp_info & 0x3) | 0x80;
83
84
return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
85
}
86
87
/*
88
* Install a perf counter breakpoint.
89
*
90
* We seek a free debug address register and use it for this
91
* breakpoint. Eventually we enable it in the debug control register.
92
*
93
* Atomic: we hold the counter->ctx->lock and we only handle variables
94
* and registers local to this cpu.
95
*/
96
int arch_install_hw_breakpoint(struct perf_event *bp)
97
{
98
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
99
unsigned long *dr7;
100
int i;
101
102
lockdep_assert_irqs_disabled();
103
104
for (i = 0; i < HBP_NUM; i++) {
105
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
106
107
if (!*slot) {
108
*slot = bp;
109
break;
110
}
111
}
112
113
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
114
return -EBUSY;
115
116
set_debugreg(info->address, i);
117
__this_cpu_write(cpu_debugreg[i], info->address);
118
119
dr7 = this_cpu_ptr(&cpu_dr7);
120
*dr7 |= encode_dr7(i, info->len, info->type);
121
122
/*
123
* Ensure we first write cpu_dr7 before we set the DR7 register.
124
* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
125
*/
126
barrier();
127
128
set_debugreg(*dr7, 7);
129
if (info->mask)
130
amd_set_dr_addr_mask(info->mask, i);
131
132
return 0;
133
}
134
135
/*
136
* Uninstall the breakpoint contained in the given counter.
137
*
138
* First we search the debug address register it uses and then we disable
139
* it.
140
*
141
* Atomic: we hold the counter->ctx->lock and we only handle variables
142
* and registers local to this cpu.
143
*/
144
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
145
{
146
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
147
unsigned long dr7;
148
int i;
149
150
lockdep_assert_irqs_disabled();
151
152
for (i = 0; i < HBP_NUM; i++) {
153
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
154
155
if (*slot == bp) {
156
*slot = NULL;
157
break;
158
}
159
}
160
161
if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
162
return;
163
164
dr7 = this_cpu_read(cpu_dr7);
165
dr7 &= ~__encode_dr7(i, info->len, info->type);
166
167
set_debugreg(dr7, 7);
168
if (info->mask)
169
amd_set_dr_addr_mask(0, i);
170
171
/*
172
* Ensure the write to cpu_dr7 is after we've set the DR7 register.
173
* This ensures an NMI never see cpu_dr7 0 when DR7 is not.
174
*/
175
barrier();
176
177
this_cpu_write(cpu_dr7, dr7);
178
}
179
180
static int arch_bp_generic_len(int x86_len)
181
{
182
switch (x86_len) {
183
case X86_BREAKPOINT_LEN_1:
184
return HW_BREAKPOINT_LEN_1;
185
case X86_BREAKPOINT_LEN_2:
186
return HW_BREAKPOINT_LEN_2;
187
case X86_BREAKPOINT_LEN_4:
188
return HW_BREAKPOINT_LEN_4;
189
#ifdef CONFIG_X86_64
190
case X86_BREAKPOINT_LEN_8:
191
return HW_BREAKPOINT_LEN_8;
192
#endif
193
default:
194
return -EINVAL;
195
}
196
}
197
198
int arch_bp_generic_fields(int x86_len, int x86_type,
199
int *gen_len, int *gen_type)
200
{
201
int len;
202
203
/* Type */
204
switch (x86_type) {
205
case X86_BREAKPOINT_EXECUTE:
206
if (x86_len != X86_BREAKPOINT_LEN_X)
207
return -EINVAL;
208
209
*gen_type = HW_BREAKPOINT_X;
210
*gen_len = sizeof(long);
211
return 0;
212
case X86_BREAKPOINT_WRITE:
213
*gen_type = HW_BREAKPOINT_W;
214
break;
215
case X86_BREAKPOINT_RW:
216
*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
217
break;
218
default:
219
return -EINVAL;
220
}
221
222
/* Len */
223
len = arch_bp_generic_len(x86_len);
224
if (len < 0)
225
return -EINVAL;
226
*gen_len = len;
227
228
return 0;
229
}
230
231
/*
232
* Check for virtual address in kernel space.
233
*/
234
int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
235
{
236
unsigned long va;
237
int len;
238
239
va = hw->address;
240
len = arch_bp_generic_len(hw->len);
241
WARN_ON_ONCE(len < 0);
242
243
/*
244
* We don't need to worry about va + len - 1 overflowing:
245
* we already require that va is aligned to a multiple of len.
246
*/
247
return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
248
}
249
250
/*
251
* Checks whether the range [addr, end], overlaps the area [base, base + size).
252
*/
253
static inline bool within_area(unsigned long addr, unsigned long end,
254
unsigned long base, unsigned long size)
255
{
256
return end >= base && addr < (base + size);
257
}
258
259
/*
260
* Checks whether the range from addr to end, inclusive, overlaps the fixed
261
* mapped CPU entry area range or other ranges used for CPU entry.
262
*/
263
static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
264
{
265
int cpu;
266
267
/* CPU entry erea is always used for CPU entry */
268
if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
269
CPU_ENTRY_AREA_MAP_SIZE))
270
return true;
271
272
/*
273
* When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
274
* GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
275
*/
276
#ifdef CONFIG_SMP
277
if (within_area(addr, end, (unsigned long)__per_cpu_offset,
278
sizeof(unsigned long) * nr_cpu_ids))
279
return true;
280
#else
281
if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
282
sizeof(pcpu_unit_offsets)))
283
return true;
284
#endif
285
286
for_each_possible_cpu(cpu) {
287
/* The original rw GDT is being used after load_direct_gdt() */
288
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
289
GDT_SIZE))
290
return true;
291
292
/*
293
* cpu_tss_rw is not directly referenced by hardware, but
294
* cpu_tss_rw is also used in CPU entry code,
295
*/
296
if (within_area(addr, end,
297
(unsigned long)&per_cpu(cpu_tss_rw, cpu),
298
sizeof(struct tss_struct)))
299
return true;
300
301
/*
302
* cpu_tlbstate.user_pcid_flush_mask is used for CPU entry.
303
* If a data breakpoint on it, it will cause an unwanted #DB.
304
* Protect the full cpu_tlbstate structure to be sure.
305
*/
306
if (within_area(addr, end,
307
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
308
sizeof(struct tlb_state)))
309
return true;
310
311
/*
312
* When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
313
* will read per-cpu cpu_dr7 before clear dr7 register.
314
*/
315
if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
316
sizeof(cpu_dr7)))
317
return true;
318
}
319
320
return false;
321
}
322
323
static int arch_build_bp_info(struct perf_event *bp,
324
const struct perf_event_attr *attr,
325
struct arch_hw_breakpoint *hw)
326
{
327
unsigned long bp_end;
328
329
bp_end = attr->bp_addr + attr->bp_len - 1;
330
if (bp_end < attr->bp_addr)
331
return -EINVAL;
332
333
/*
334
* Prevent any breakpoint of any type that overlaps the CPU
335
* entry area and data. This protects the IST stacks and also
336
* reduces the chance that we ever find out what happens if
337
* there's a data breakpoint on the GDT, IDT, or TSS.
338
*/
339
if (within_cpu_entry(attr->bp_addr, bp_end))
340
return -EINVAL;
341
342
hw->address = attr->bp_addr;
343
hw->mask = 0;
344
345
/* Type */
346
switch (attr->bp_type) {
347
case HW_BREAKPOINT_W:
348
hw->type = X86_BREAKPOINT_WRITE;
349
break;
350
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
351
hw->type = X86_BREAKPOINT_RW;
352
break;
353
case HW_BREAKPOINT_X:
354
/*
355
* We don't allow kernel breakpoints in places that are not
356
* acceptable for kprobes. On non-kprobes kernels, we don't
357
* allow kernel breakpoints at all.
358
*/
359
if (attr->bp_addr >= TASK_SIZE_MAX) {
360
if (within_kprobe_blacklist(attr->bp_addr))
361
return -EINVAL;
362
}
363
364
hw->type = X86_BREAKPOINT_EXECUTE;
365
/*
366
* x86 inst breakpoints need to have a specific undefined len.
367
* But we still need to check userspace is not trying to setup
368
* an unsupported length, to get a range breakpoint for example.
369
*/
370
if (attr->bp_len == sizeof(long)) {
371
hw->len = X86_BREAKPOINT_LEN_X;
372
return 0;
373
}
374
fallthrough;
375
default:
376
return -EINVAL;
377
}
378
379
/* Len */
380
switch (attr->bp_len) {
381
case HW_BREAKPOINT_LEN_1:
382
hw->len = X86_BREAKPOINT_LEN_1;
383
break;
384
case HW_BREAKPOINT_LEN_2:
385
hw->len = X86_BREAKPOINT_LEN_2;
386
break;
387
case HW_BREAKPOINT_LEN_4:
388
hw->len = X86_BREAKPOINT_LEN_4;
389
break;
390
#ifdef CONFIG_X86_64
391
case HW_BREAKPOINT_LEN_8:
392
hw->len = X86_BREAKPOINT_LEN_8;
393
break;
394
#endif
395
default:
396
/* AMD range breakpoint */
397
if (!is_power_of_2(attr->bp_len))
398
return -EINVAL;
399
if (attr->bp_addr & (attr->bp_len - 1))
400
return -EINVAL;
401
402
if (!boot_cpu_has(X86_FEATURE_BPEXT))
403
return -EOPNOTSUPP;
404
405
/*
406
* It's impossible to use a range breakpoint to fake out
407
* user vs kernel detection because bp_len - 1 can't
408
* have the high bit set. If we ever allow range instruction
409
* breakpoints, then we'll have to check for kprobe-blacklisted
410
* addresses anywhere in the range.
411
*/
412
hw->mask = attr->bp_len - 1;
413
hw->len = X86_BREAKPOINT_LEN_1;
414
}
415
416
return 0;
417
}
418
419
/*
420
* Validate the arch-specific HW Breakpoint register settings
421
*/
422
int hw_breakpoint_arch_parse(struct perf_event *bp,
423
const struct perf_event_attr *attr,
424
struct arch_hw_breakpoint *hw)
425
{
426
unsigned int align;
427
int ret;
428
429
430
ret = arch_build_bp_info(bp, attr, hw);
431
if (ret)
432
return ret;
433
434
switch (hw->len) {
435
case X86_BREAKPOINT_LEN_1:
436
align = 0;
437
if (hw->mask)
438
align = hw->mask;
439
break;
440
case X86_BREAKPOINT_LEN_2:
441
align = 1;
442
break;
443
case X86_BREAKPOINT_LEN_4:
444
align = 3;
445
break;
446
#ifdef CONFIG_X86_64
447
case X86_BREAKPOINT_LEN_8:
448
align = 7;
449
break;
450
#endif
451
default:
452
WARN_ON_ONCE(1);
453
return -EINVAL;
454
}
455
456
/*
457
* Check that the low-order bits of the address are appropriate
458
* for the alignment implied by len.
459
*/
460
if (hw->address & align)
461
return -EINVAL;
462
463
return 0;
464
}
465
466
/*
467
* Release the user breakpoints used by ptrace
468
*/
469
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
470
{
471
int i;
472
struct thread_struct *t = &tsk->thread;
473
474
for (i = 0; i < HBP_NUM; i++) {
475
unregister_hw_breakpoint(t->ptrace_bps[i]);
476
t->ptrace_bps[i] = NULL;
477
}
478
479
t->virtual_dr6 = 0;
480
t->ptrace_dr7 = 0;
481
}
482
483
void hw_breakpoint_restore(void)
484
{
485
set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
486
set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
487
set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
488
set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
489
set_debugreg(DR6_RESERVED, 6);
490
set_debugreg(__this_cpu_read(cpu_dr7), 7);
491
}
492
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
493
494
/*
495
* Handle debug exception notifications.
496
*
497
* Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
498
*
499
* NOTIFY_DONE returned if one of the following conditions is true.
500
* i) When the causative address is from user-space and the exception
501
* is a valid one, i.e. not triggered as a result of lazy debug register
502
* switching
503
* ii) When there are more bits than trap<n> set in DR6 register (such
504
* as BD, BS or BT) indicating that more than one debug condition is
505
* met and requires some more action in do_debug().
506
*
507
* NOTIFY_STOP returned for all other cases
508
*
509
*/
510
static int hw_breakpoint_handler(struct die_args *args)
511
{
512
int i, rc = NOTIFY_STOP;
513
struct perf_event *bp;
514
unsigned long *dr6_p;
515
unsigned long dr6;
516
bool bpx;
517
518
/* The DR6 value is pointed by args->err */
519
dr6_p = (unsigned long *)ERR_PTR(args->err);
520
dr6 = *dr6_p;
521
522
/* Do an early return if no trap bits are set in DR6 */
523
if ((dr6 & DR_TRAP_BITS) == 0)
524
return NOTIFY_DONE;
525
526
/* Handle all the breakpoints that were triggered */
527
for (i = 0; i < HBP_NUM; ++i) {
528
if (likely(!(dr6 & (DR_TRAP0 << i))))
529
continue;
530
531
bp = this_cpu_read(bp_per_reg[i]);
532
if (!bp)
533
continue;
534
535
bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
536
537
/*
538
* TF and data breakpoints are traps and can be merged, however
539
* instruction breakpoints are faults and will be raised
540
* separately.
541
*
542
* However DR6 can indicate both TF and instruction
543
* breakpoints. In that case take TF as that has precedence and
544
* delay the instruction breakpoint for the next exception.
545
*/
546
if (bpx && (dr6 & DR_STEP))
547
continue;
548
549
/*
550
* Reset the 'i'th TRAP bit in dr6 to denote completion of
551
* exception handling
552
*/
553
(*dr6_p) &= ~(DR_TRAP0 << i);
554
555
perf_bp_event(bp, args->regs);
556
557
/*
558
* Set up resume flag to avoid breakpoint recursion when
559
* returning back to origin.
560
*/
561
if (bpx)
562
args->regs->flags |= X86_EFLAGS_RF;
563
}
564
565
/*
566
* Further processing in do_debug() is needed for a) user-space
567
* breakpoints (to generate signals) and b) when the system has
568
* taken exception due to multiple causes
569
*/
570
if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
571
(dr6 & (~DR_TRAP_BITS)))
572
rc = NOTIFY_DONE;
573
574
return rc;
575
}
576
577
/*
578
* Handle debug exception notifications.
579
*/
580
int hw_breakpoint_exceptions_notify(
581
struct notifier_block *unused, unsigned long val, void *data)
582
{
583
if (val != DIE_DEBUG)
584
return NOTIFY_DONE;
585
586
return hw_breakpoint_handler(data);
587
}
588
589
void hw_breakpoint_pmu_read(struct perf_event *bp)
590
{
591
/* TODO */
592
}
593
594