Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/arm/kernel/perf_event.c
10817 views
1
#undef DEBUG
2
3
/*
4
* ARM performance counter support.
5
*
6
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7
* Copyright (C) 2010 ARM Ltd., Will Deacon <[email protected]>
8
*
9
* This code is based on the sparc64 perf event code, which is in turn based
10
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
11
* code.
12
*/
13
#define pr_fmt(fmt) "hw perfevents: " fmt
14
15
#include <linux/interrupt.h>
16
#include <linux/kernel.h>
17
#include <linux/module.h>
18
#include <linux/perf_event.h>
19
#include <linux/platform_device.h>
20
#include <linux/spinlock.h>
21
#include <linux/uaccess.h>
22
23
#include <asm/cputype.h>
24
#include <asm/irq.h>
25
#include <asm/irq_regs.h>
26
#include <asm/pmu.h>
27
#include <asm/stacktrace.h>
28
29
static struct platform_device *pmu_device;
30
31
/*
32
* Hardware lock to serialize accesses to PMU registers. Needed for the
33
* read/modify/write sequences.
34
*/
35
static DEFINE_RAW_SPINLOCK(pmu_lock);
36
37
/*
38
* ARMv6 supports a maximum of 3 events, starting from index 1. If we add
39
* another platform that supports more, we need to increase this to be the
40
* largest of all platforms.
41
*
42
* ARMv7 supports up to 32 events:
43
* cycle counter CCNT + 31 events counters CNT0..30.
44
* Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
45
*/
46
#define ARMPMU_MAX_HWEVENTS 33
47
48
/* The events for a given CPU. */
49
struct cpu_hw_events {
50
/*
51
* The events that are active on the CPU for the given index. Index 0
52
* is reserved.
53
*/
54
struct perf_event *events[ARMPMU_MAX_HWEVENTS];
55
56
/*
57
* A 1 bit for an index indicates that the counter is being used for
58
* an event. A 0 means that the counter can be used.
59
*/
60
unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
61
62
/*
63
* A 1 bit for an index indicates that the counter is actively being
64
* used.
65
*/
66
unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
67
};
68
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
69
70
struct arm_pmu {
71
enum arm_perf_pmu_ids id;
72
const char *name;
73
irqreturn_t (*handle_irq)(int irq_num, void *dev);
74
void (*enable)(struct hw_perf_event *evt, int idx);
75
void (*disable)(struct hw_perf_event *evt, int idx);
76
int (*get_event_idx)(struct cpu_hw_events *cpuc,
77
struct hw_perf_event *hwc);
78
u32 (*read_counter)(int idx);
79
void (*write_counter)(int idx, u32 val);
80
void (*start)(void);
81
void (*stop)(void);
82
void (*reset)(void *);
83
const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
84
[PERF_COUNT_HW_CACHE_OP_MAX]
85
[PERF_COUNT_HW_CACHE_RESULT_MAX];
86
const unsigned (*event_map)[PERF_COUNT_HW_MAX];
87
u32 raw_event_mask;
88
int num_events;
89
u64 max_period;
90
};
91
92
/* Set at runtime when we know what CPU type we are. */
93
static const struct arm_pmu *armpmu;
94
95
enum arm_perf_pmu_ids
96
armpmu_get_pmu_id(void)
97
{
98
int id = -ENODEV;
99
100
if (armpmu != NULL)
101
id = armpmu->id;
102
103
return id;
104
}
105
EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
106
107
int
108
armpmu_get_max_events(void)
109
{
110
int max_events = 0;
111
112
if (armpmu != NULL)
113
max_events = armpmu->num_events;
114
115
return max_events;
116
}
117
EXPORT_SYMBOL_GPL(armpmu_get_max_events);
118
119
int perf_num_counters(void)
120
{
121
return armpmu_get_max_events();
122
}
123
EXPORT_SYMBOL_GPL(perf_num_counters);
124
125
#define HW_OP_UNSUPPORTED 0xFFFF
126
127
#define C(_x) \
128
PERF_COUNT_HW_CACHE_##_x
129
130
#define CACHE_OP_UNSUPPORTED 0xFFFF
131
132
static int
133
armpmu_map_cache_event(u64 config)
134
{
135
unsigned int cache_type, cache_op, cache_result, ret;
136
137
cache_type = (config >> 0) & 0xff;
138
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
139
return -EINVAL;
140
141
cache_op = (config >> 8) & 0xff;
142
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
143
return -EINVAL;
144
145
cache_result = (config >> 16) & 0xff;
146
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
147
return -EINVAL;
148
149
ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
150
151
if (ret == CACHE_OP_UNSUPPORTED)
152
return -ENOENT;
153
154
return ret;
155
}
156
157
static int
158
armpmu_map_event(u64 config)
159
{
160
int mapping = (*armpmu->event_map)[config];
161
return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
162
}
163
164
static int
165
armpmu_map_raw_event(u64 config)
166
{
167
return (int)(config & armpmu->raw_event_mask);
168
}
169
170
static int
171
armpmu_event_set_period(struct perf_event *event,
172
struct hw_perf_event *hwc,
173
int idx)
174
{
175
s64 left = local64_read(&hwc->period_left);
176
s64 period = hwc->sample_period;
177
int ret = 0;
178
179
if (unlikely(left <= -period)) {
180
left = period;
181
local64_set(&hwc->period_left, left);
182
hwc->last_period = period;
183
ret = 1;
184
}
185
186
if (unlikely(left <= 0)) {
187
left += period;
188
local64_set(&hwc->period_left, left);
189
hwc->last_period = period;
190
ret = 1;
191
}
192
193
if (left > (s64)armpmu->max_period)
194
left = armpmu->max_period;
195
196
local64_set(&hwc->prev_count, (u64)-left);
197
198
armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
199
200
perf_event_update_userpage(event);
201
202
return ret;
203
}
204
205
static u64
206
armpmu_event_update(struct perf_event *event,
207
struct hw_perf_event *hwc,
208
int idx, int overflow)
209
{
210
u64 delta, prev_raw_count, new_raw_count;
211
212
again:
213
prev_raw_count = local64_read(&hwc->prev_count);
214
new_raw_count = armpmu->read_counter(idx);
215
216
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
217
new_raw_count) != prev_raw_count)
218
goto again;
219
220
new_raw_count &= armpmu->max_period;
221
prev_raw_count &= armpmu->max_period;
222
223
if (overflow)
224
delta = armpmu->max_period - prev_raw_count + new_raw_count + 1;
225
else
226
delta = new_raw_count - prev_raw_count;
227
228
local64_add(delta, &event->count);
229
local64_sub(delta, &hwc->period_left);
230
231
return new_raw_count;
232
}
233
234
static void
235
armpmu_read(struct perf_event *event)
236
{
237
struct hw_perf_event *hwc = &event->hw;
238
239
/* Don't read disabled counters! */
240
if (hwc->idx < 0)
241
return;
242
243
armpmu_event_update(event, hwc, hwc->idx, 0);
244
}
245
246
static void
247
armpmu_stop(struct perf_event *event, int flags)
248
{
249
struct hw_perf_event *hwc = &event->hw;
250
251
if (!armpmu)
252
return;
253
254
/*
255
* ARM pmu always has to update the counter, so ignore
256
* PERF_EF_UPDATE, see comments in armpmu_start().
257
*/
258
if (!(hwc->state & PERF_HES_STOPPED)) {
259
armpmu->disable(hwc, hwc->idx);
260
barrier(); /* why? */
261
armpmu_event_update(event, hwc, hwc->idx, 0);
262
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
263
}
264
}
265
266
static void
267
armpmu_start(struct perf_event *event, int flags)
268
{
269
struct hw_perf_event *hwc = &event->hw;
270
271
if (!armpmu)
272
return;
273
274
/*
275
* ARM pmu always has to reprogram the period, so ignore
276
* PERF_EF_RELOAD, see the comment below.
277
*/
278
if (flags & PERF_EF_RELOAD)
279
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
280
281
hwc->state = 0;
282
/*
283
* Set the period again. Some counters can't be stopped, so when we
284
* were stopped we simply disabled the IRQ source and the counter
285
* may have been left counting. If we don't do this step then we may
286
* get an interrupt too soon or *way* too late if the overflow has
287
* happened since disabling.
288
*/
289
armpmu_event_set_period(event, hwc, hwc->idx);
290
armpmu->enable(hwc, hwc->idx);
291
}
292
293
static void
294
armpmu_del(struct perf_event *event, int flags)
295
{
296
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
297
struct hw_perf_event *hwc = &event->hw;
298
int idx = hwc->idx;
299
300
WARN_ON(idx < 0);
301
302
clear_bit(idx, cpuc->active_mask);
303
armpmu_stop(event, PERF_EF_UPDATE);
304
cpuc->events[idx] = NULL;
305
clear_bit(idx, cpuc->used_mask);
306
307
perf_event_update_userpage(event);
308
}
309
310
static int
311
armpmu_add(struct perf_event *event, int flags)
312
{
313
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
314
struct hw_perf_event *hwc = &event->hw;
315
int idx;
316
int err = 0;
317
318
perf_pmu_disable(event->pmu);
319
320
/* If we don't have a space for the counter then finish early. */
321
idx = armpmu->get_event_idx(cpuc, hwc);
322
if (idx < 0) {
323
err = idx;
324
goto out;
325
}
326
327
/*
328
* If there is an event in the counter we are going to use then make
329
* sure it is disabled.
330
*/
331
event->hw.idx = idx;
332
armpmu->disable(hwc, idx);
333
cpuc->events[idx] = event;
334
set_bit(idx, cpuc->active_mask);
335
336
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
337
if (flags & PERF_EF_START)
338
armpmu_start(event, PERF_EF_RELOAD);
339
340
/* Propagate our changes to the userspace mapping. */
341
perf_event_update_userpage(event);
342
343
out:
344
perf_pmu_enable(event->pmu);
345
return err;
346
}
347
348
static struct pmu pmu;
349
350
static int
351
validate_event(struct cpu_hw_events *cpuc,
352
struct perf_event *event)
353
{
354
struct hw_perf_event fake_event = event->hw;
355
356
if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
357
return 1;
358
359
return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
360
}
361
362
static int
363
validate_group(struct perf_event *event)
364
{
365
struct perf_event *sibling, *leader = event->group_leader;
366
struct cpu_hw_events fake_pmu;
367
368
memset(&fake_pmu, 0, sizeof(fake_pmu));
369
370
if (!validate_event(&fake_pmu, leader))
371
return -ENOSPC;
372
373
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
374
if (!validate_event(&fake_pmu, sibling))
375
return -ENOSPC;
376
}
377
378
if (!validate_event(&fake_pmu, event))
379
return -ENOSPC;
380
381
return 0;
382
}
383
384
static irqreturn_t armpmu_platform_irq(int irq, void *dev)
385
{
386
struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);
387
388
return plat->handle_irq(irq, dev, armpmu->handle_irq);
389
}
390
391
static int
392
armpmu_reserve_hardware(void)
393
{
394
struct arm_pmu_platdata *plat;
395
irq_handler_t handle_irq;
396
int i, err = -ENODEV, irq;
397
398
pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
399
if (IS_ERR(pmu_device)) {
400
pr_warning("unable to reserve pmu\n");
401
return PTR_ERR(pmu_device);
402
}
403
404
init_pmu(ARM_PMU_DEVICE_CPU);
405
406
plat = dev_get_platdata(&pmu_device->dev);
407
if (plat && plat->handle_irq)
408
handle_irq = armpmu_platform_irq;
409
else
410
handle_irq = armpmu->handle_irq;
411
412
if (pmu_device->num_resources < 1) {
413
pr_err("no irqs for PMUs defined\n");
414
return -ENODEV;
415
}
416
417
for (i = 0; i < pmu_device->num_resources; ++i) {
418
irq = platform_get_irq(pmu_device, i);
419
if (irq < 0)
420
continue;
421
422
err = request_irq(irq, handle_irq,
423
IRQF_DISABLED | IRQF_NOBALANCING,
424
"armpmu", NULL);
425
if (err) {
426
pr_warning("unable to request IRQ%d for ARM perf "
427
"counters\n", irq);
428
break;
429
}
430
}
431
432
if (err) {
433
for (i = i - 1; i >= 0; --i) {
434
irq = platform_get_irq(pmu_device, i);
435
if (irq >= 0)
436
free_irq(irq, NULL);
437
}
438
release_pmu(pmu_device);
439
pmu_device = NULL;
440
}
441
442
return err;
443
}
444
445
static void
446
armpmu_release_hardware(void)
447
{
448
int i, irq;
449
450
for (i = pmu_device->num_resources - 1; i >= 0; --i) {
451
irq = platform_get_irq(pmu_device, i);
452
if (irq >= 0)
453
free_irq(irq, NULL);
454
}
455
armpmu->stop();
456
457
release_pmu(pmu_device);
458
pmu_device = NULL;
459
}
460
461
static atomic_t active_events = ATOMIC_INIT(0);
462
static DEFINE_MUTEX(pmu_reserve_mutex);
463
464
static void
465
hw_perf_event_destroy(struct perf_event *event)
466
{
467
if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
468
armpmu_release_hardware();
469
mutex_unlock(&pmu_reserve_mutex);
470
}
471
}
472
473
static int
474
__hw_perf_event_init(struct perf_event *event)
475
{
476
struct hw_perf_event *hwc = &event->hw;
477
int mapping, err;
478
479
/* Decode the generic type into an ARM event identifier. */
480
if (PERF_TYPE_HARDWARE == event->attr.type) {
481
mapping = armpmu_map_event(event->attr.config);
482
} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
483
mapping = armpmu_map_cache_event(event->attr.config);
484
} else if (PERF_TYPE_RAW == event->attr.type) {
485
mapping = armpmu_map_raw_event(event->attr.config);
486
} else {
487
pr_debug("event type %x not supported\n", event->attr.type);
488
return -EOPNOTSUPP;
489
}
490
491
if (mapping < 0) {
492
pr_debug("event %x:%llx not supported\n", event->attr.type,
493
event->attr.config);
494
return mapping;
495
}
496
497
/*
498
* Check whether we need to exclude the counter from certain modes.
499
* The ARM performance counters are on all of the time so if someone
500
* has asked us for some excludes then we have to fail.
501
*/
502
if (event->attr.exclude_kernel || event->attr.exclude_user ||
503
event->attr.exclude_hv || event->attr.exclude_idle) {
504
pr_debug("ARM performance counters do not support "
505
"mode exclusion\n");
506
return -EPERM;
507
}
508
509
/*
510
* We don't assign an index until we actually place the event onto
511
* hardware. Use -1 to signify that we haven't decided where to put it
512
* yet. For SMP systems, each core has it's own PMU so we can't do any
513
* clever allocation or constraints checking at this point.
514
*/
515
hwc->idx = -1;
516
517
/*
518
* Store the event encoding into the config_base field. config and
519
* event_base are unused as the only 2 things we need to know are
520
* the event mapping and the counter to use. The counter to use is
521
* also the indx and the config_base is the event type.
522
*/
523
hwc->config_base = (unsigned long)mapping;
524
hwc->config = 0;
525
hwc->event_base = 0;
526
527
if (!hwc->sample_period) {
528
hwc->sample_period = armpmu->max_period;
529
hwc->last_period = hwc->sample_period;
530
local64_set(&hwc->period_left, hwc->sample_period);
531
}
532
533
err = 0;
534
if (event->group_leader != event) {
535
err = validate_group(event);
536
if (err)
537
return -EINVAL;
538
}
539
540
return err;
541
}
542
543
static int armpmu_event_init(struct perf_event *event)
544
{
545
int err = 0;
546
547
switch (event->attr.type) {
548
case PERF_TYPE_RAW:
549
case PERF_TYPE_HARDWARE:
550
case PERF_TYPE_HW_CACHE:
551
break;
552
553
default:
554
return -ENOENT;
555
}
556
557
if (!armpmu)
558
return -ENODEV;
559
560
event->destroy = hw_perf_event_destroy;
561
562
if (!atomic_inc_not_zero(&active_events)) {
563
mutex_lock(&pmu_reserve_mutex);
564
if (atomic_read(&active_events) == 0) {
565
err = armpmu_reserve_hardware();
566
}
567
568
if (!err)
569
atomic_inc(&active_events);
570
mutex_unlock(&pmu_reserve_mutex);
571
}
572
573
if (err)
574
return err;
575
576
err = __hw_perf_event_init(event);
577
if (err)
578
hw_perf_event_destroy(event);
579
580
return err;
581
}
582
583
static void armpmu_enable(struct pmu *pmu)
584
{
585
/* Enable all of the perf events on hardware. */
586
int idx, enabled = 0;
587
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
588
589
if (!armpmu)
590
return;
591
592
for (idx = 0; idx <= armpmu->num_events; ++idx) {
593
struct perf_event *event = cpuc->events[idx];
594
595
if (!event)
596
continue;
597
598
armpmu->enable(&event->hw, idx);
599
enabled = 1;
600
}
601
602
if (enabled)
603
armpmu->start();
604
}
605
606
static void armpmu_disable(struct pmu *pmu)
607
{
608
if (armpmu)
609
armpmu->stop();
610
}
611
612
static struct pmu pmu = {
613
.pmu_enable = armpmu_enable,
614
.pmu_disable = armpmu_disable,
615
.event_init = armpmu_event_init,
616
.add = armpmu_add,
617
.del = armpmu_del,
618
.start = armpmu_start,
619
.stop = armpmu_stop,
620
.read = armpmu_read,
621
};
622
623
/* Include the PMU-specific implementations. */
624
#include "perf_event_xscale.c"
625
#include "perf_event_v6.c"
626
#include "perf_event_v7.c"
627
628
/*
629
* Ensure the PMU has sane values out of reset.
630
* This requires SMP to be available, so exists as a separate initcall.
631
*/
632
static int __init
633
armpmu_reset(void)
634
{
635
if (armpmu && armpmu->reset)
636
return on_each_cpu(armpmu->reset, NULL, 1);
637
return 0;
638
}
639
arch_initcall(armpmu_reset);
640
641
static int __init
642
init_hw_perf_events(void)
643
{
644
unsigned long cpuid = read_cpuid_id();
645
unsigned long implementor = (cpuid & 0xFF000000) >> 24;
646
unsigned long part_number = (cpuid & 0xFFF0);
647
648
/* ARM Ltd CPUs. */
649
if (0x41 == implementor) {
650
switch (part_number) {
651
case 0xB360: /* ARM1136 */
652
case 0xB560: /* ARM1156 */
653
case 0xB760: /* ARM1176 */
654
armpmu = armv6pmu_init();
655
break;
656
case 0xB020: /* ARM11mpcore */
657
armpmu = armv6mpcore_pmu_init();
658
break;
659
case 0xC080: /* Cortex-A8 */
660
armpmu = armv7_a8_pmu_init();
661
break;
662
case 0xC090: /* Cortex-A9 */
663
armpmu = armv7_a9_pmu_init();
664
break;
665
}
666
/* Intel CPUs [xscale]. */
667
} else if (0x69 == implementor) {
668
part_number = (cpuid >> 13) & 0x7;
669
switch (part_number) {
670
case 1:
671
armpmu = xscale1pmu_init();
672
break;
673
case 2:
674
armpmu = xscale2pmu_init();
675
break;
676
}
677
}
678
679
if (armpmu) {
680
pr_info("enabled with %s PMU driver, %d counters available\n",
681
armpmu->name, armpmu->num_events);
682
} else {
683
pr_info("no hardware support available\n");
684
}
685
686
perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
687
688
return 0;
689
}
690
early_initcall(init_hw_perf_events);
691
692
/*
693
* Callchain handling code.
694
*/
695
696
/*
697
* The registers we're interested in are at the end of the variable
698
* length saved register structure. The fp points at the end of this
699
* structure so the address of this struct is:
700
* (struct frame_tail *)(xxx->fp)-1
701
*
702
* This code has been adapted from the ARM OProfile support.
703
*/
704
struct frame_tail {
705
struct frame_tail __user *fp;
706
unsigned long sp;
707
unsigned long lr;
708
} __attribute__((packed));
709
710
/*
711
* Get the return address for a single stackframe and return a pointer to the
712
* next frame tail.
713
*/
714
static struct frame_tail __user *
715
user_backtrace(struct frame_tail __user *tail,
716
struct perf_callchain_entry *entry)
717
{
718
struct frame_tail buftail;
719
720
/* Also check accessibility of one struct frame_tail beyond */
721
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
722
return NULL;
723
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
724
return NULL;
725
726
perf_callchain_store(entry, buftail.lr);
727
728
/*
729
* Frame pointers should strictly progress back up the stack
730
* (towards higher addresses).
731
*/
732
if (tail + 1 >= buftail.fp)
733
return NULL;
734
735
return buftail.fp - 1;
736
}
737
738
void
739
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
740
{
741
struct frame_tail __user *tail;
742
743
744
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
745
746
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
747
tail && !((unsigned long)tail & 0x3))
748
tail = user_backtrace(tail, entry);
749
}
750
751
/*
752
* Gets called by walk_stackframe() for every stackframe. This will be called
753
* whist unwinding the stackframe and is like a subroutine return so we use
754
* the PC.
755
*/
756
static int
757
callchain_trace(struct stackframe *fr,
758
void *data)
759
{
760
struct perf_callchain_entry *entry = data;
761
perf_callchain_store(entry, fr->pc);
762
return 0;
763
}
764
765
void
766
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
767
{
768
struct stackframe fr;
769
770
fr.fp = regs->ARM_fp;
771
fr.sp = regs->ARM_sp;
772
fr.lr = regs->ARM_lr;
773
fr.pc = regs->ARM_pc;
774
walk_stackframe(&fr, callchain_trace, entry);
775
}
776
777