Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/intel/bts.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* BTS PMU driver for perf
4
* Copyright (c) 2013-2014, Intel Corporation.
5
*/
6
7
#undef DEBUG
8
9
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11
#include <linux/bitops.h>
12
#include <linux/types.h>
13
#include <linux/slab.h>
14
#include <linux/debugfs.h>
15
#include <linux/device.h>
16
#include <linux/coredump.h>
17
18
#include <linux/sizes.h>
19
#include <asm/perf_event.h>
20
#include <asm/msr.h>
21
22
#include "../perf_event.h"
23
24
struct bts_ctx {
25
struct perf_output_handle handle;
26
struct debug_store ds_back;
27
int state;
28
};
29
30
/* BTS context states: */
31
enum {
32
/* no ongoing AUX transactions */
33
BTS_STATE_STOPPED = 0,
34
/* AUX transaction is on, BTS tracing is disabled */
35
BTS_STATE_INACTIVE,
36
/* AUX transaction is on, BTS tracing is running */
37
BTS_STATE_ACTIVE,
38
};
39
40
static struct bts_ctx __percpu *bts_ctx;
41
42
#define BTS_RECORD_SIZE 24
43
#define BTS_SAFETY_MARGIN 4080
44
45
struct bts_phys {
46
struct page *page;
47
unsigned long size;
48
unsigned long offset;
49
unsigned long displacement;
50
};
51
52
struct bts_buffer {
53
size_t real_size; /* multiple of BTS_RECORD_SIZE */
54
unsigned int nr_pages;
55
unsigned int nr_bufs;
56
unsigned int cur_buf;
57
bool snapshot;
58
local_t data_size;
59
local_t head;
60
unsigned long end;
61
void **data_pages;
62
struct bts_phys buf[] __counted_by(nr_bufs);
63
};
64
65
static struct pmu bts_pmu;
66
67
static int buf_nr_pages(struct page *page)
68
{
69
if (!PagePrivate(page))
70
return 1;
71
72
return 1 << page_private(page);
73
}
74
75
static size_t buf_size(struct page *page)
76
{
77
return buf_nr_pages(page) * PAGE_SIZE;
78
}
79
80
static void *
81
bts_buffer_setup_aux(struct perf_event *event, void **pages,
82
int nr_pages, bool overwrite)
83
{
84
struct bts_buffer *bb;
85
struct page *page;
86
int cpu = event->cpu;
87
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
88
unsigned long offset;
89
size_t size = nr_pages << PAGE_SHIFT;
90
int pg, nr_buf, pad;
91
92
/* count all the high order buffers */
93
for (pg = 0, nr_buf = 0; pg < nr_pages;) {
94
page = virt_to_page(pages[pg]);
95
pg += buf_nr_pages(page);
96
nr_buf++;
97
}
98
99
/*
100
* to avoid interrupts in overwrite mode, only allow one physical
101
*/
102
if (overwrite && nr_buf > 1)
103
return NULL;
104
105
bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node);
106
if (!bb)
107
return NULL;
108
109
bb->nr_pages = nr_pages;
110
bb->nr_bufs = nr_buf;
111
bb->snapshot = overwrite;
112
bb->data_pages = pages;
113
bb->real_size = size - size % BTS_RECORD_SIZE;
114
115
for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) {
116
unsigned int __nr_pages;
117
118
page = virt_to_page(pages[pg]);
119
__nr_pages = buf_nr_pages(page);
120
bb->buf[nr_buf].page = page;
121
bb->buf[nr_buf].offset = offset;
122
bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
123
bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement;
124
pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE;
125
bb->buf[nr_buf].size -= pad;
126
127
pg += __nr_pages;
128
offset += __nr_pages << PAGE_SHIFT;
129
}
130
131
return bb;
132
}
133
134
static void bts_buffer_free_aux(void *data)
135
{
136
kfree(data);
137
}
138
139
static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx)
140
{
141
return bb->buf[idx].offset + bb->buf[idx].displacement;
142
}
143
144
static void
145
bts_config_buffer(struct bts_buffer *bb)
146
{
147
int cpu = raw_smp_processor_id();
148
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
149
struct bts_phys *phys = &bb->buf[bb->cur_buf];
150
unsigned long index, thresh = 0, end = phys->size;
151
struct page *page = phys->page;
152
153
index = local_read(&bb->head);
154
155
if (!bb->snapshot) {
156
if (bb->end < phys->offset + buf_size(page))
157
end = bb->end - phys->offset - phys->displacement;
158
159
index -= phys->offset + phys->displacement;
160
161
if (end - index > BTS_SAFETY_MARGIN)
162
thresh = end - BTS_SAFETY_MARGIN;
163
else if (end - index > BTS_RECORD_SIZE)
164
thresh = end - BTS_RECORD_SIZE;
165
else
166
thresh = end;
167
}
168
169
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
170
ds->bts_index = ds->bts_buffer_base + index;
171
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
172
ds->bts_interrupt_threshold = !bb->snapshot
173
? ds->bts_buffer_base + thresh
174
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
175
}
176
177
static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
178
{
179
unsigned long index = head - phys->offset;
180
181
memset(page_address(phys->page) + index, 0, phys->size - index);
182
}
183
184
static void bts_update(struct bts_ctx *bts)
185
{
186
int cpu = raw_smp_processor_id();
187
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
188
struct bts_buffer *bb = perf_get_aux(&bts->handle);
189
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
190
191
if (!bb)
192
return;
193
194
head = index + bts_buffer_offset(bb, bb->cur_buf);
195
old = local_xchg(&bb->head, head);
196
197
if (!bb->snapshot) {
198
if (old == head)
199
return;
200
201
if (ds->bts_index >= ds->bts_absolute_maximum)
202
perf_aux_output_flag(&bts->handle,
203
PERF_AUX_FLAG_TRUNCATED);
204
205
/*
206
* old and head are always in the same physical buffer, so we
207
* can subtract them to get the data size.
208
*/
209
local_add(head - old, &bb->data_size);
210
} else {
211
local_set(&bb->data_size, head);
212
}
213
214
/*
215
* Since BTS is coherent, just add compiler barrier to ensure
216
* BTS updating is ordered against bts::handle::event.
217
*/
218
barrier();
219
}
220
221
static int
222
bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle);
223
224
/*
225
* Ordering PMU callbacks wrt themselves and the PMI is done by means
226
* of bts::state, which:
227
* - is set when bts::handle::event is valid, that is, between
228
* perf_aux_output_begin() and perf_aux_output_end();
229
* - is zero otherwise;
230
* - is ordered against bts::handle::event with a compiler barrier.
231
*/
232
233
static void __bts_event_start(struct perf_event *event)
234
{
235
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
236
struct bts_buffer *bb = perf_get_aux(&bts->handle);
237
u64 config = 0;
238
239
if (!bb->snapshot)
240
config |= ARCH_PERFMON_EVENTSEL_INT;
241
if (!event->attr.exclude_kernel)
242
config |= ARCH_PERFMON_EVENTSEL_OS;
243
if (!event->attr.exclude_user)
244
config |= ARCH_PERFMON_EVENTSEL_USR;
245
246
bts_config_buffer(bb);
247
248
/*
249
* local barrier to make sure that ds configuration made it
250
* before we enable BTS and bts::state goes ACTIVE
251
*/
252
wmb();
253
254
/* INACTIVE/STOPPED -> ACTIVE */
255
WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
256
257
intel_pmu_enable_bts(config);
258
259
}
260
261
static void bts_event_start(struct perf_event *event, int flags)
262
{
263
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
264
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
265
struct bts_buffer *bb;
266
267
bb = perf_aux_output_begin(&bts->handle, event);
268
if (!bb)
269
goto fail_stop;
270
271
if (bts_buffer_reset(bb, &bts->handle))
272
goto fail_end_stop;
273
274
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
275
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
276
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
277
278
perf_event_itrace_started(event);
279
event->hw.state = 0;
280
281
__bts_event_start(event);
282
283
return;
284
285
fail_end_stop:
286
perf_aux_output_end(&bts->handle, 0);
287
288
fail_stop:
289
event->hw.state = PERF_HES_STOPPED;
290
}
291
292
static void __bts_event_stop(struct perf_event *event, int state)
293
{
294
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
295
296
/* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
297
WRITE_ONCE(bts->state, state);
298
299
/*
300
* No extra synchronization is mandated by the documentation to have
301
* BTS data stores globally visible.
302
*/
303
intel_pmu_disable_bts();
304
}
305
306
static void bts_event_stop(struct perf_event *event, int flags)
307
{
308
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
309
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
310
struct bts_buffer *bb = NULL;
311
int state = READ_ONCE(bts->state);
312
313
if (state == BTS_STATE_ACTIVE)
314
__bts_event_stop(event, BTS_STATE_STOPPED);
315
316
if (state != BTS_STATE_STOPPED)
317
bb = perf_get_aux(&bts->handle);
318
319
event->hw.state |= PERF_HES_STOPPED;
320
321
if (flags & PERF_EF_UPDATE) {
322
bts_update(bts);
323
324
if (bb) {
325
if (bb->snapshot)
326
bts->handle.head =
327
local_xchg(&bb->data_size,
328
bb->nr_pages << PAGE_SHIFT);
329
perf_aux_output_end(&bts->handle,
330
local_xchg(&bb->data_size, 0));
331
}
332
333
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
334
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
335
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
336
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
337
}
338
}
339
340
void intel_bts_enable_local(void)
341
{
342
struct bts_ctx *bts;
343
int state;
344
345
if (!bts_ctx)
346
return;
347
348
bts = this_cpu_ptr(bts_ctx);
349
state = READ_ONCE(bts->state);
350
/*
351
* Here we transition from INACTIVE to ACTIVE;
352
* if we instead are STOPPED from the interrupt handler,
353
* stay that way. Can't be ACTIVE here though.
354
*/
355
if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
356
return;
357
358
if (state == BTS_STATE_STOPPED)
359
return;
360
361
if (bts->handle.event)
362
__bts_event_start(bts->handle.event);
363
}
364
365
void intel_bts_disable_local(void)
366
{
367
struct bts_ctx *bts;
368
369
if (!bts_ctx)
370
return;
371
372
bts = this_cpu_ptr(bts_ctx);
373
374
/*
375
* Here we transition from ACTIVE to INACTIVE;
376
* do nothing for STOPPED or INACTIVE.
377
*/
378
if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
379
return;
380
381
if (bts->handle.event)
382
__bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
383
}
384
385
static int
386
bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle)
387
{
388
unsigned long head, space, next_space, pad, gap, skip, wakeup;
389
unsigned int next_buf;
390
struct bts_phys *phys, *next_phys;
391
int ret;
392
393
if (bb->snapshot)
394
return 0;
395
396
head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1);
397
398
phys = &bb->buf[bb->cur_buf];
399
space = phys->offset + phys->displacement + phys->size - head;
400
pad = space;
401
if (space > handle->size) {
402
space = handle->size;
403
space -= space % BTS_RECORD_SIZE;
404
}
405
if (space <= BTS_SAFETY_MARGIN) {
406
/* See if next phys buffer has more space */
407
next_buf = bb->cur_buf + 1;
408
if (next_buf >= bb->nr_bufs)
409
next_buf = 0;
410
next_phys = &bb->buf[next_buf];
411
gap = buf_size(phys->page) - phys->displacement - phys->size +
412
next_phys->displacement;
413
skip = pad + gap;
414
if (handle->size >= skip) {
415
next_space = next_phys->size;
416
if (next_space + skip > handle->size) {
417
next_space = handle->size - skip;
418
next_space -= next_space % BTS_RECORD_SIZE;
419
}
420
if (next_space > space || !space) {
421
if (pad)
422
bts_buffer_pad_out(phys, head);
423
ret = perf_aux_output_skip(handle, skip);
424
if (ret)
425
return ret;
426
/* Advance to next phys buffer */
427
phys = next_phys;
428
space = next_space;
429
head = phys->offset + phys->displacement;
430
/*
431
* After this, cur_buf and head won't match ds
432
* anymore, so we must not be racing with
433
* bts_update().
434
*/
435
bb->cur_buf = next_buf;
436
local_set(&bb->head, head);
437
}
438
}
439
}
440
441
/* Don't go far beyond wakeup watermark */
442
wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
443
handle->head;
444
if (space > wakeup) {
445
space = wakeup;
446
space -= space % BTS_RECORD_SIZE;
447
}
448
449
bb->end = head + space;
450
451
/*
452
* If we have no space, the lost notification would have been sent when
453
* we hit absolute_maximum - see bts_update()
454
*/
455
if (!space)
456
return -ENOSPC;
457
458
return 0;
459
}
460
461
int intel_bts_interrupt(void)
462
{
463
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
464
struct bts_ctx *bts;
465
struct perf_event *event;
466
struct bts_buffer *bb;
467
s64 old_head;
468
int err = -ENOSPC, handled = 0;
469
470
if (!bts_ctx)
471
return 0;
472
473
bts = this_cpu_ptr(bts_ctx);
474
event = bts->handle.event;
475
/*
476
* The only surefire way of knowing if this NMI is ours is by checking
477
* the write ptr against the PMI threshold.
478
*/
479
if (ds && (ds->bts_index >= ds->bts_interrupt_threshold))
480
handled = 1;
481
482
/*
483
* this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
484
* so we can only be INACTIVE or STOPPED
485
*/
486
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
487
return handled;
488
489
bb = perf_get_aux(&bts->handle);
490
if (!bb)
491
return handled;
492
493
/*
494
* Skip snapshot counters: they don't use the interrupt, but
495
* there's no other way of telling, because the pointer will
496
* keep moving
497
*/
498
if (bb->snapshot)
499
return 0;
500
501
old_head = local_read(&bb->head);
502
bts_update(bts);
503
504
/* no new data */
505
if (old_head == local_read(&bb->head))
506
return handled;
507
508
perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0));
509
510
bb = perf_aux_output_begin(&bts->handle, event);
511
if (bb)
512
err = bts_buffer_reset(bb, &bts->handle);
513
514
if (err) {
515
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
516
517
if (bb) {
518
/*
519
* BTS_STATE_STOPPED should be visible before
520
* cleared handle::event
521
*/
522
barrier();
523
perf_aux_output_end(&bts->handle, 0);
524
}
525
}
526
527
return 1;
528
}
529
530
static void bts_event_del(struct perf_event *event, int mode)
531
{
532
bts_event_stop(event, PERF_EF_UPDATE);
533
}
534
535
static int bts_event_add(struct perf_event *event, int mode)
536
{
537
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
538
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
539
struct hw_perf_event *hwc = &event->hw;
540
541
event->hw.state = PERF_HES_STOPPED;
542
543
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
544
return -EBUSY;
545
546
if (bts->handle.event)
547
return -EBUSY;
548
549
if (mode & PERF_EF_START) {
550
bts_event_start(event, 0);
551
if (hwc->state & PERF_HES_STOPPED)
552
return -EINVAL;
553
}
554
555
return 0;
556
}
557
558
static void bts_event_destroy(struct perf_event *event)
559
{
560
x86_release_hardware();
561
x86_del_exclusive(x86_lbr_exclusive_bts);
562
}
563
564
static int bts_event_init(struct perf_event *event)
565
{
566
int ret;
567
568
if (event->attr.type != bts_pmu.type)
569
return -ENOENT;
570
571
/*
572
* BTS leaks kernel addresses even when CPL0 tracing is
573
* disabled, so disallow intel_bts driver for unprivileged
574
* users on paranoid systems since it provides trace data
575
* to the user in a zero-copy fashion.
576
*/
577
if (event->attr.exclude_kernel) {
578
ret = perf_allow_kernel();
579
if (ret)
580
return ret;
581
}
582
583
if (x86_add_exclusive(x86_lbr_exclusive_bts))
584
return -EBUSY;
585
586
ret = x86_reserve_hardware();
587
if (ret) {
588
x86_del_exclusive(x86_lbr_exclusive_bts);
589
return ret;
590
}
591
592
event->destroy = bts_event_destroy;
593
594
return 0;
595
}
596
597
static void bts_event_read(struct perf_event *event)
598
{
599
}
600
601
static __init int bts_init(void)
602
{
603
if (!boot_cpu_has(X86_FEATURE_DTES64))
604
return -ENODEV;
605
606
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
607
if (!x86_pmu.bts)
608
return -ENODEV;
609
610
if (boot_cpu_has(X86_FEATURE_PTI)) {
611
/*
612
* BTS hardware writes through a virtual memory map we must
613
* either use the kernel physical map, or the user mapping of
614
* the AUX buffer.
615
*
616
* However, since this driver supports per-CPU and per-task inherit
617
* we cannot use the user mapping since it will not be available
618
* if we're not running the owning process.
619
*
620
* With PTI we can't use the kernel map either, because its not
621
* there when we run userspace.
622
*
623
* For now, disable this driver when using PTI.
624
*/
625
return -ENODEV;
626
}
627
628
bts_ctx = alloc_percpu(struct bts_ctx);
629
if (!bts_ctx)
630
return -ENOMEM;
631
632
bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
633
PERF_PMU_CAP_EXCLUSIVE;
634
bts_pmu.task_ctx_nr = perf_sw_context;
635
bts_pmu.event_init = bts_event_init;
636
bts_pmu.add = bts_event_add;
637
bts_pmu.del = bts_event_del;
638
bts_pmu.start = bts_event_start;
639
bts_pmu.stop = bts_event_stop;
640
bts_pmu.read = bts_event_read;
641
bts_pmu.setup_aux = bts_buffer_setup_aux;
642
bts_pmu.free_aux = bts_buffer_free_aux;
643
644
return perf_pmu_register(&bts_pmu, "intel_bts", -1);
645
}
646
arch_initcall(bts_init);
647
648