Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/amd/uncore.c
26535 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2013 Advanced Micro Devices, Inc.
4
*
5
* Author: Jacob Shin <[email protected]>
6
*/
7
8
#include <linux/perf_event.h>
9
#include <linux/percpu.h>
10
#include <linux/types.h>
11
#include <linux/slab.h>
12
#include <linux/init.h>
13
#include <linux/cpu.h>
14
#include <linux/cpumask.h>
15
#include <linux/cpufeature.h>
16
#include <linux/smp.h>
17
18
#include <asm/perf_event.h>
19
#include <asm/msr.h>
20
21
#define NUM_COUNTERS_NB 4
22
#define NUM_COUNTERS_L2 4
23
#define NUM_COUNTERS_L3 6
24
#define NUM_COUNTERS_MAX 64
25
26
#define RDPMC_BASE_NB 6
27
#define RDPMC_BASE_LLC 10
28
29
#define COUNTER_SHIFT 16
30
#define UNCORE_NAME_LEN 16
31
#define UNCORE_GROUP_MAX 256
32
33
#undef pr_fmt
34
#define pr_fmt(fmt) "amd_uncore: " fmt
35
36
static int pmu_version;
37
38
struct amd_uncore_ctx {
39
int refcnt;
40
int cpu;
41
struct perf_event **events;
42
unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
43
int nr_active;
44
struct hrtimer hrtimer;
45
u64 hrtimer_duration;
46
};
47
48
struct amd_uncore_pmu {
49
char name[UNCORE_NAME_LEN];
50
int num_counters;
51
int rdpmc_base;
52
u32 msr_base;
53
int group;
54
cpumask_t active_mask;
55
struct pmu pmu;
56
struct amd_uncore_ctx * __percpu *ctx;
57
};
58
59
enum {
60
UNCORE_TYPE_DF,
61
UNCORE_TYPE_L3,
62
UNCORE_TYPE_UMC,
63
64
UNCORE_TYPE_MAX
65
};
66
67
union amd_uncore_info {
68
struct {
69
u64 aux_data:32; /* auxiliary data */
70
u64 num_pmcs:8; /* number of counters */
71
u64 gid:8; /* group id */
72
u64 cid:8; /* context id */
73
} split;
74
u64 full;
75
};
76
77
struct amd_uncore {
78
union amd_uncore_info __percpu *info;
79
struct amd_uncore_pmu *pmus;
80
unsigned int num_pmus;
81
bool init_done;
82
void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
83
int (*init)(struct amd_uncore *uncore, unsigned int cpu);
84
void (*move)(struct amd_uncore *uncore, unsigned int cpu);
85
void (*free)(struct amd_uncore *uncore, unsigned int cpu);
86
};
87
88
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
89
90
/* Interval for hrtimer, defaults to 60000 milliseconds */
91
static unsigned int update_interval = 60 * MSEC_PER_SEC;
92
module_param(update_interval, uint, 0444);
93
94
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
95
{
96
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
97
}
98
99
static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
100
{
101
struct amd_uncore_ctx *ctx;
102
struct perf_event *event;
103
int bit;
104
105
ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
106
107
if (!ctx->nr_active || ctx->cpu != smp_processor_id())
108
return HRTIMER_NORESTART;
109
110
for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
111
event = ctx->events[bit];
112
event->pmu->read(event);
113
}
114
115
hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
116
return HRTIMER_RESTART;
117
}
118
119
static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
120
{
121
hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
122
HRTIMER_MODE_REL_PINNED_HARD);
123
}
124
125
static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
126
{
127
hrtimer_cancel(&ctx->hrtimer);
128
}
129
130
static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
131
{
132
hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
133
}
134
135
static void amd_uncore_read(struct perf_event *event)
136
{
137
struct hw_perf_event *hwc = &event->hw;
138
u64 prev, new;
139
s64 delta;
140
141
/*
142
* since we do not enable counter overflow interrupts,
143
* we do not have to worry about prev_count changing on us
144
*/
145
146
prev = local64_read(&hwc->prev_count);
147
148
/*
149
* Some uncore PMUs do not have RDPMC assignments. In such cases,
150
* read counts directly from the corresponding PERF_CTR.
151
*/
152
if (hwc->event_base_rdpmc < 0)
153
rdmsrq(hwc->event_base, new);
154
else
155
new = rdpmc(hwc->event_base_rdpmc);
156
157
local64_set(&hwc->prev_count, new);
158
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
159
delta >>= COUNTER_SHIFT;
160
local64_add(delta, &event->count);
161
}
162
163
static void amd_uncore_start(struct perf_event *event, int flags)
164
{
165
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
166
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
167
struct hw_perf_event *hwc = &event->hw;
168
169
if (!ctx->nr_active++)
170
amd_uncore_start_hrtimer(ctx);
171
172
if (flags & PERF_EF_RELOAD)
173
wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
174
175
hwc->state = 0;
176
__set_bit(hwc->idx, ctx->active_mask);
177
wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
178
perf_event_update_userpage(event);
179
}
180
181
static void amd_uncore_stop(struct perf_event *event, int flags)
182
{
183
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
184
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
185
struct hw_perf_event *hwc = &event->hw;
186
187
wrmsrq(hwc->config_base, hwc->config);
188
hwc->state |= PERF_HES_STOPPED;
189
190
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
191
event->pmu->read(event);
192
hwc->state |= PERF_HES_UPTODATE;
193
}
194
195
if (!--ctx->nr_active)
196
amd_uncore_cancel_hrtimer(ctx);
197
198
__clear_bit(hwc->idx, ctx->active_mask);
199
}
200
201
static int amd_uncore_add(struct perf_event *event, int flags)
202
{
203
int i;
204
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
205
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
206
struct hw_perf_event *hwc = &event->hw;
207
208
/* are we already assigned? */
209
if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
210
goto out;
211
212
for (i = 0; i < pmu->num_counters; i++) {
213
if (ctx->events[i] == event) {
214
hwc->idx = i;
215
goto out;
216
}
217
}
218
219
/* if not, take the first available counter */
220
hwc->idx = -1;
221
for (i = 0; i < pmu->num_counters; i++) {
222
struct perf_event *tmp = NULL;
223
224
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
225
hwc->idx = i;
226
break;
227
}
228
}
229
230
out:
231
if (hwc->idx == -1)
232
return -EBUSY;
233
234
hwc->config_base = pmu->msr_base + (2 * hwc->idx);
235
hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
236
hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
237
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
238
239
if (pmu->rdpmc_base < 0)
240
hwc->event_base_rdpmc = -1;
241
242
if (flags & PERF_EF_START)
243
event->pmu->start(event, PERF_EF_RELOAD);
244
245
return 0;
246
}
247
248
static void amd_uncore_del(struct perf_event *event, int flags)
249
{
250
int i;
251
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
252
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
253
struct hw_perf_event *hwc = &event->hw;
254
255
event->pmu->stop(event, PERF_EF_UPDATE);
256
257
for (i = 0; i < pmu->num_counters; i++) {
258
struct perf_event *tmp = event;
259
260
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
261
break;
262
}
263
264
hwc->idx = -1;
265
}
266
267
static int amd_uncore_event_init(struct perf_event *event)
268
{
269
struct amd_uncore_pmu *pmu;
270
struct amd_uncore_ctx *ctx;
271
struct hw_perf_event *hwc = &event->hw;
272
273
if (event->attr.type != event->pmu->type)
274
return -ENOENT;
275
276
if (event->cpu < 0)
277
return -EINVAL;
278
279
pmu = event_to_amd_uncore_pmu(event);
280
ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
281
if (!ctx)
282
return -ENODEV;
283
284
/*
285
* NB and Last level cache counters (MSRs) are shared across all cores
286
* that share the same NB / Last level cache. On family 16h and below,
287
* Interrupts can be directed to a single target core, however, event
288
* counts generated by processes running on other cores cannot be masked
289
* out. So we do not support sampling and per-thread events via
290
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
291
*/
292
hwc->config = event->attr.config;
293
hwc->idx = -1;
294
295
/*
296
* since request can come in to any of the shared cores, we will remap
297
* to a single common cpu.
298
*/
299
event->cpu = ctx->cpu;
300
301
return 0;
302
}
303
304
static umode_t
305
amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
306
{
307
return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
308
attr->mode : 0;
309
}
310
311
static umode_t
312
amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
313
{
314
return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
315
}
316
317
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
318
struct device_attribute *attr,
319
char *buf)
320
{
321
struct pmu *ptr = dev_get_drvdata(dev);
322
struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
323
324
return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
325
}
326
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
327
328
static struct attribute *amd_uncore_attrs[] = {
329
&dev_attr_cpumask.attr,
330
NULL,
331
};
332
333
static struct attribute_group amd_uncore_attr_group = {
334
.attrs = amd_uncore_attrs,
335
};
336
337
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
338
static ssize_t __uncore_##_var##_show(struct device *dev, \
339
struct device_attribute *attr, \
340
char *page) \
341
{ \
342
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
343
return sprintf(page, _format "\n"); \
344
} \
345
static struct device_attribute format_attr_##_var = \
346
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
347
348
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
349
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
350
DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
351
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
352
DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
353
DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
354
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
355
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
356
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
357
DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
358
DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
359
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
360
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
361
DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
362
363
/* Common DF and NB attributes */
364
static struct attribute *amd_uncore_df_format_attr[] = {
365
&format_attr_event12.attr, /* event */
366
&format_attr_umask8.attr, /* umask */
367
NULL,
368
};
369
370
/* Common L2 and L3 attributes */
371
static struct attribute *amd_uncore_l3_format_attr[] = {
372
&format_attr_event12.attr, /* event */
373
&format_attr_umask8.attr, /* umask */
374
NULL, /* threadmask */
375
NULL,
376
};
377
378
/* Common UMC attributes */
379
static struct attribute *amd_uncore_umc_format_attr[] = {
380
&format_attr_event8.attr, /* event */
381
&format_attr_rdwrmask.attr, /* rdwrmask */
382
NULL,
383
};
384
385
/* F17h unique L3 attributes */
386
static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
387
&format_attr_slicemask.attr, /* slicemask */
388
NULL,
389
};
390
391
/* F19h unique L3 attributes */
392
static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
393
&format_attr_coreid.attr, /* coreid */
394
&format_attr_enallslices.attr, /* enallslices */
395
&format_attr_enallcores.attr, /* enallcores */
396
&format_attr_sliceid.attr, /* sliceid */
397
NULL,
398
};
399
400
static struct attribute_group amd_uncore_df_format_group = {
401
.name = "format",
402
.attrs = amd_uncore_df_format_attr,
403
};
404
405
static struct attribute_group amd_uncore_l3_format_group = {
406
.name = "format",
407
.attrs = amd_uncore_l3_format_attr,
408
};
409
410
static struct attribute_group amd_f17h_uncore_l3_format_group = {
411
.name = "format",
412
.attrs = amd_f17h_uncore_l3_format_attr,
413
.is_visible = amd_f17h_uncore_is_visible,
414
};
415
416
static struct attribute_group amd_f19h_uncore_l3_format_group = {
417
.name = "format",
418
.attrs = amd_f19h_uncore_l3_format_attr,
419
.is_visible = amd_f19h_uncore_is_visible,
420
};
421
422
static struct attribute_group amd_uncore_umc_format_group = {
423
.name = "format",
424
.attrs = amd_uncore_umc_format_attr,
425
};
426
427
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
428
&amd_uncore_attr_group,
429
&amd_uncore_df_format_group,
430
NULL,
431
};
432
433
static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
434
&amd_uncore_attr_group,
435
&amd_uncore_l3_format_group,
436
NULL,
437
};
438
439
static const struct attribute_group *amd_uncore_l3_attr_update[] = {
440
&amd_f17h_uncore_l3_format_group,
441
&amd_f19h_uncore_l3_format_group,
442
NULL,
443
};
444
445
static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
446
&amd_uncore_attr_group,
447
&amd_uncore_umc_format_group,
448
NULL,
449
};
450
451
static __always_inline
452
int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
453
{
454
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
455
return info->split.cid;
456
}
457
458
static __always_inline
459
int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
460
{
461
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
462
return info->split.gid;
463
}
464
465
static __always_inline
466
int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
467
{
468
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
469
return info->split.num_pmcs;
470
}
471
472
static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
473
{
474
struct amd_uncore_pmu *pmu;
475
struct amd_uncore_ctx *ctx;
476
int i;
477
478
if (!uncore->init_done)
479
return;
480
481
for (i = 0; i < uncore->num_pmus; i++) {
482
pmu = &uncore->pmus[i];
483
ctx = *per_cpu_ptr(pmu->ctx, cpu);
484
if (!ctx)
485
continue;
486
487
if (cpu == ctx->cpu)
488
cpumask_clear_cpu(cpu, &pmu->active_mask);
489
490
if (!--ctx->refcnt) {
491
kfree(ctx->events);
492
kfree(ctx);
493
}
494
495
*per_cpu_ptr(pmu->ctx, cpu) = NULL;
496
}
497
}
498
499
static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
500
{
501
struct amd_uncore_ctx *curr, *prev;
502
struct amd_uncore_pmu *pmu;
503
int node, cid, gid, i, j;
504
505
if (!uncore->init_done || !uncore->num_pmus)
506
return 0;
507
508
cid = amd_uncore_ctx_cid(uncore, cpu);
509
gid = amd_uncore_ctx_gid(uncore, cpu);
510
511
for (i = 0; i < uncore->num_pmus; i++) {
512
pmu = &uncore->pmus[i];
513
*per_cpu_ptr(pmu->ctx, cpu) = NULL;
514
curr = NULL;
515
516
/* Check for group exclusivity */
517
if (gid != pmu->group)
518
continue;
519
520
/* Find a sibling context */
521
for_each_online_cpu(j) {
522
if (cpu == j)
523
continue;
524
525
prev = *per_cpu_ptr(pmu->ctx, j);
526
if (!prev)
527
continue;
528
529
if (cid == amd_uncore_ctx_cid(uncore, j)) {
530
curr = prev;
531
break;
532
}
533
}
534
535
/* Allocate context if sibling does not exist */
536
if (!curr) {
537
node = cpu_to_node(cpu);
538
curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
539
if (!curr)
540
goto fail;
541
542
curr->cpu = cpu;
543
curr->events = kzalloc_node(sizeof(*curr->events) *
544
pmu->num_counters,
545
GFP_KERNEL, node);
546
if (!curr->events) {
547
kfree(curr);
548
goto fail;
549
}
550
551
amd_uncore_init_hrtimer(curr);
552
curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
553
554
cpumask_set_cpu(cpu, &pmu->active_mask);
555
}
556
557
curr->refcnt++;
558
*per_cpu_ptr(pmu->ctx, cpu) = curr;
559
}
560
561
return 0;
562
563
fail:
564
amd_uncore_ctx_free(uncore, cpu);
565
566
return -ENOMEM;
567
}
568
569
static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
570
{
571
struct amd_uncore_ctx *curr, *next;
572
struct amd_uncore_pmu *pmu;
573
int i, j;
574
575
if (!uncore->init_done)
576
return;
577
578
for (i = 0; i < uncore->num_pmus; i++) {
579
pmu = &uncore->pmus[i];
580
curr = *per_cpu_ptr(pmu->ctx, cpu);
581
if (!curr)
582
continue;
583
584
/* Migrate to a shared sibling if possible */
585
for_each_online_cpu(j) {
586
next = *per_cpu_ptr(pmu->ctx, j);
587
if (!next || cpu == j)
588
continue;
589
590
if (curr == next) {
591
perf_pmu_migrate_context(&pmu->pmu, cpu, j);
592
cpumask_clear_cpu(cpu, &pmu->active_mask);
593
cpumask_set_cpu(j, &pmu->active_mask);
594
next->cpu = j;
595
break;
596
}
597
}
598
}
599
}
600
601
static int amd_uncore_cpu_starting(unsigned int cpu)
602
{
603
struct amd_uncore *uncore;
604
int i;
605
606
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
607
uncore = &uncores[i];
608
uncore->scan(uncore, cpu);
609
}
610
611
return 0;
612
}
613
614
static int amd_uncore_cpu_online(unsigned int cpu)
615
{
616
struct amd_uncore *uncore;
617
int i;
618
619
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
620
uncore = &uncores[i];
621
if (uncore->init(uncore, cpu))
622
break;
623
}
624
625
return 0;
626
}
627
628
static int amd_uncore_cpu_down_prepare(unsigned int cpu)
629
{
630
struct amd_uncore *uncore;
631
int i;
632
633
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
634
uncore = &uncores[i];
635
uncore->move(uncore, cpu);
636
}
637
638
return 0;
639
}
640
641
static int amd_uncore_cpu_dead(unsigned int cpu)
642
{
643
struct amd_uncore *uncore;
644
int i;
645
646
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
647
uncore = &uncores[i];
648
uncore->free(uncore, cpu);
649
}
650
651
return 0;
652
}
653
654
static int amd_uncore_df_event_init(struct perf_event *event)
655
{
656
struct hw_perf_event *hwc = &event->hw;
657
int ret = amd_uncore_event_init(event);
658
659
if (ret || pmu_version < 2)
660
return ret;
661
662
hwc->config = event->attr.config &
663
(pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
664
AMD64_RAW_EVENT_MASK_NB);
665
666
return 0;
667
}
668
669
static int amd_uncore_df_add(struct perf_event *event, int flags)
670
{
671
int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
672
struct hw_perf_event *hwc = &event->hw;
673
674
if (ret)
675
return ret;
676
677
/*
678
* The first four DF counters are accessible via RDPMC index 6 to 9
679
* followed by the L3 counters from index 10 to 15. For processors
680
* with more than four DF counters, the DF RDPMC assignments become
681
* discontiguous as the additional counters are accessible starting
682
* from index 16.
683
*/
684
if (hwc->idx >= NUM_COUNTERS_NB)
685
hwc->event_base_rdpmc += NUM_COUNTERS_L3;
686
687
/* Delayed start after rdpmc base update */
688
if (flags & PERF_EF_START)
689
amd_uncore_start(event, PERF_EF_RELOAD);
690
691
return 0;
692
}
693
694
static
695
void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
696
{
697
union cpuid_0x80000022_ebx ebx;
698
union amd_uncore_info info;
699
700
if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
701
return;
702
703
info.split.aux_data = 0;
704
info.split.num_pmcs = NUM_COUNTERS_NB;
705
info.split.gid = 0;
706
info.split.cid = topology_logical_package_id(cpu);
707
708
if (pmu_version >= 2) {
709
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
710
info.split.num_pmcs = ebx.split.num_df_pmc;
711
}
712
713
*per_cpu_ptr(uncore->info, cpu) = info;
714
}
715
716
static
717
int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
718
{
719
struct attribute **df_attr = amd_uncore_df_format_attr;
720
struct amd_uncore_pmu *pmu;
721
int num_counters;
722
723
/* Run just once */
724
if (uncore->init_done)
725
return amd_uncore_ctx_init(uncore, cpu);
726
727
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
728
if (!num_counters)
729
goto done;
730
731
/* No grouping, single instance for a system */
732
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
733
if (!uncore->pmus)
734
goto done;
735
736
/*
737
* For Family 17h and above, the Northbridge counters are repurposed
738
* as Data Fabric counters. The PMUs are exported based on family as
739
* either NB or DF.
740
*/
741
pmu = &uncore->pmus[0];
742
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
743
sizeof(pmu->name));
744
pmu->num_counters = num_counters;
745
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
746
pmu->rdpmc_base = RDPMC_BASE_NB;
747
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
748
749
if (pmu_version >= 2) {
750
*df_attr++ = &format_attr_event14v2.attr;
751
*df_attr++ = &format_attr_umask12.attr;
752
} else if (boot_cpu_data.x86 >= 0x17) {
753
*df_attr = &format_attr_event14.attr;
754
}
755
756
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
757
if (!pmu->ctx)
758
goto done;
759
760
pmu->pmu = (struct pmu) {
761
.task_ctx_nr = perf_invalid_context,
762
.attr_groups = amd_uncore_df_attr_groups,
763
.name = pmu->name,
764
.event_init = amd_uncore_df_event_init,
765
.add = amd_uncore_df_add,
766
.del = amd_uncore_del,
767
.start = amd_uncore_start,
768
.stop = amd_uncore_stop,
769
.read = amd_uncore_read,
770
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
771
.module = THIS_MODULE,
772
};
773
774
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
775
free_percpu(pmu->ctx);
776
pmu->ctx = NULL;
777
goto done;
778
}
779
780
pr_info("%d %s%s counters detected\n", pmu->num_counters,
781
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
782
pmu->pmu.name);
783
784
uncore->num_pmus = 1;
785
786
done:
787
uncore->init_done = true;
788
789
return amd_uncore_ctx_init(uncore, cpu);
790
}
791
792
static int amd_uncore_l3_event_init(struct perf_event *event)
793
{
794
int ret = amd_uncore_event_init(event);
795
struct hw_perf_event *hwc = &event->hw;
796
u64 config = event->attr.config;
797
u64 mask;
798
799
hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
800
801
/*
802
* SliceMask and ThreadMask need to be set for certain L3 events.
803
* For other events, the two fields do not affect the count.
804
*/
805
if (ret || boot_cpu_data.x86 < 0x17)
806
return ret;
807
808
mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
809
AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
810
AMD64_L3_COREID_MASK);
811
812
if (boot_cpu_data.x86 <= 0x18)
813
mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
814
((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
815
816
/*
817
* If the user doesn't specify a ThreadMask, they're not trying to
818
* count core 0, so we enable all cores & threads.
819
* We'll also assume that they want to count slice 0 if they specify
820
* a ThreadMask and leave SliceId and EnAllSlices unpopulated.
821
*/
822
else if (!(config & AMD64_L3_F19H_THREAD_MASK))
823
mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
824
AMD64_L3_EN_ALL_CORES;
825
826
hwc->config |= mask;
827
828
return 0;
829
}
830
831
static
832
void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
833
{
834
union amd_uncore_info info;
835
836
if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
837
return;
838
839
info.split.aux_data = 0;
840
info.split.num_pmcs = NUM_COUNTERS_L2;
841
info.split.gid = 0;
842
info.split.cid = per_cpu_llc_id(cpu);
843
844
if (boot_cpu_data.x86 >= 0x17)
845
info.split.num_pmcs = NUM_COUNTERS_L3;
846
847
*per_cpu_ptr(uncore->info, cpu) = info;
848
}
849
850
static
851
int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
852
{
853
struct attribute **l3_attr = amd_uncore_l3_format_attr;
854
struct amd_uncore_pmu *pmu;
855
int num_counters;
856
857
/* Run just once */
858
if (uncore->init_done)
859
return amd_uncore_ctx_init(uncore, cpu);
860
861
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
862
if (!num_counters)
863
goto done;
864
865
/* No grouping, single instance for a system */
866
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
867
if (!uncore->pmus)
868
goto done;
869
870
/*
871
* For Family 17h and above, L3 cache counters are available instead
872
* of L2 cache counters. The PMUs are exported based on family as
873
* either L2 or L3.
874
*/
875
pmu = &uncore->pmus[0];
876
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
877
sizeof(pmu->name));
878
pmu->num_counters = num_counters;
879
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
880
pmu->rdpmc_base = RDPMC_BASE_LLC;
881
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
882
883
if (boot_cpu_data.x86 >= 0x17) {
884
*l3_attr++ = &format_attr_event8.attr;
885
*l3_attr++ = &format_attr_umask8.attr;
886
*l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
887
&format_attr_threadmask2.attr :
888
&format_attr_threadmask8.attr;
889
}
890
891
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
892
if (!pmu->ctx)
893
goto done;
894
895
pmu->pmu = (struct pmu) {
896
.task_ctx_nr = perf_invalid_context,
897
.attr_groups = amd_uncore_l3_attr_groups,
898
.attr_update = amd_uncore_l3_attr_update,
899
.name = pmu->name,
900
.event_init = amd_uncore_l3_event_init,
901
.add = amd_uncore_add,
902
.del = amd_uncore_del,
903
.start = amd_uncore_start,
904
.stop = amd_uncore_stop,
905
.read = amd_uncore_read,
906
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
907
.module = THIS_MODULE,
908
};
909
910
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
911
free_percpu(pmu->ctx);
912
pmu->ctx = NULL;
913
goto done;
914
}
915
916
pr_info("%d %s%s counters detected\n", pmu->num_counters,
917
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
918
pmu->pmu.name);
919
920
uncore->num_pmus = 1;
921
922
done:
923
uncore->init_done = true;
924
925
return amd_uncore_ctx_init(uncore, cpu);
926
}
927
928
static int amd_uncore_umc_event_init(struct perf_event *event)
929
{
930
struct hw_perf_event *hwc = &event->hw;
931
int ret = amd_uncore_event_init(event);
932
933
if (ret)
934
return ret;
935
936
hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
937
938
return 0;
939
}
940
941
static void amd_uncore_umc_start(struct perf_event *event, int flags)
942
{
943
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
944
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
945
struct hw_perf_event *hwc = &event->hw;
946
947
if (!ctx->nr_active++)
948
amd_uncore_start_hrtimer(ctx);
949
950
if (flags & PERF_EF_RELOAD)
951
wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
952
953
hwc->state = 0;
954
__set_bit(hwc->idx, ctx->active_mask);
955
wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
956
perf_event_update_userpage(event);
957
}
958
959
static void amd_uncore_umc_read(struct perf_event *event)
960
{
961
struct hw_perf_event *hwc = &event->hw;
962
u64 prev, new, shift;
963
s64 delta;
964
965
shift = COUNTER_SHIFT + 1;
966
prev = local64_read(&hwc->prev_count);
967
968
/*
969
* UMC counters do not have RDPMC assignments. Read counts directly
970
* from the corresponding PERF_CTR.
971
*/
972
rdmsrl(hwc->event_base, new);
973
974
/*
975
* Unlike the other uncore counters, UMC counters saturate and set the
976
* Overflow bit (bit 48) on overflow. Since they do not roll over,
977
* proactively reset the corresponding PERF_CTR when bit 47 is set so
978
* that the counter never gets a chance to saturate.
979
*/
980
if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
981
wrmsrl(hwc->event_base, 0);
982
local64_set(&hwc->prev_count, 0);
983
} else {
984
local64_set(&hwc->prev_count, new);
985
}
986
987
delta = (new << shift) - (prev << shift);
988
delta >>= shift;
989
local64_add(delta, &event->count);
990
}
991
992
static
993
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
994
{
995
union cpuid_0x80000022_ebx ebx;
996
union amd_uncore_info info;
997
unsigned int eax, ecx, edx;
998
999
if (pmu_version < 2)
1000
return;
1001
1002
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
1003
info.split.aux_data = ecx; /* stash active mask */
1004
info.split.num_pmcs = ebx.split.num_umc_pmc;
1005
info.split.gid = topology_logical_package_id(cpu);
1006
info.split.cid = topology_logical_package_id(cpu);
1007
*per_cpu_ptr(uncore->info, cpu) = info;
1008
}
1009
1010
static
1011
int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
1012
{
1013
DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
1014
u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
1015
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
1016
union amd_uncore_info info;
1017
struct amd_uncore_pmu *pmu;
1018
int gid, i;
1019
u16 index = 0;
1020
1021
if (pmu_version < 2)
1022
return 0;
1023
1024
/* Run just once */
1025
if (uncore->init_done)
1026
return amd_uncore_ctx_init(uncore, cpu);
1027
1028
/* Find unique groups */
1029
for_each_online_cpu(i) {
1030
info = *per_cpu_ptr(uncore->info, i);
1031
gid = info.split.gid;
1032
if (test_bit(gid, gmask))
1033
continue;
1034
1035
__set_bit(gid, gmask);
1036
group_num_pmus[gid] = hweight32(info.split.aux_data);
1037
group_num_pmcs[gid] = info.split.num_pmcs;
1038
uncore->num_pmus += group_num_pmus[gid];
1039
}
1040
1041
uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
1042
GFP_KERNEL);
1043
if (!uncore->pmus) {
1044
uncore->num_pmus = 0;
1045
goto done;
1046
}
1047
1048
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
1049
for (i = 0; i < group_num_pmus[gid]; i++) {
1050
pmu = &uncore->pmus[index];
1051
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
1052
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
1053
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
1054
pmu->rdpmc_base = -1;
1055
pmu->group = gid;
1056
1057
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
1058
if (!pmu->ctx)
1059
goto done;
1060
1061
pmu->pmu = (struct pmu) {
1062
.task_ctx_nr = perf_invalid_context,
1063
.attr_groups = amd_uncore_umc_attr_groups,
1064
.name = pmu->name,
1065
.event_init = amd_uncore_umc_event_init,
1066
.add = amd_uncore_add,
1067
.del = amd_uncore_del,
1068
.start = amd_uncore_umc_start,
1069
.stop = amd_uncore_stop,
1070
.read = amd_uncore_umc_read,
1071
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
1072
.module = THIS_MODULE,
1073
};
1074
1075
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
1076
free_percpu(pmu->ctx);
1077
pmu->ctx = NULL;
1078
goto done;
1079
}
1080
1081
pr_info("%d %s counters detected\n", pmu->num_counters,
1082
pmu->pmu.name);
1083
1084
index++;
1085
}
1086
}
1087
1088
done:
1089
uncore->num_pmus = index;
1090
uncore->init_done = true;
1091
1092
return amd_uncore_ctx_init(uncore, cpu);
1093
}
1094
1095
static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
1096
/* UNCORE_TYPE_DF */
1097
{
1098
.scan = amd_uncore_df_ctx_scan,
1099
.init = amd_uncore_df_ctx_init,
1100
.move = amd_uncore_ctx_move,
1101
.free = amd_uncore_ctx_free,
1102
},
1103
/* UNCORE_TYPE_L3 */
1104
{
1105
.scan = amd_uncore_l3_ctx_scan,
1106
.init = amd_uncore_l3_ctx_init,
1107
.move = amd_uncore_ctx_move,
1108
.free = amd_uncore_ctx_free,
1109
},
1110
/* UNCORE_TYPE_UMC */
1111
{
1112
.scan = amd_uncore_umc_ctx_scan,
1113
.init = amd_uncore_umc_ctx_init,
1114
.move = amd_uncore_ctx_move,
1115
.free = amd_uncore_ctx_free,
1116
},
1117
};
1118
1119
static int __init amd_uncore_init(void)
1120
{
1121
struct amd_uncore *uncore;
1122
int ret = -ENODEV;
1123
int i;
1124
1125
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1126
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1127
return -ENODEV;
1128
1129
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1130
return -ENODEV;
1131
1132
if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1133
pmu_version = 2;
1134
1135
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1136
uncore = &uncores[i];
1137
1138
BUG_ON(!uncore->scan);
1139
BUG_ON(!uncore->init);
1140
BUG_ON(!uncore->move);
1141
BUG_ON(!uncore->free);
1142
1143
uncore->info = alloc_percpu(union amd_uncore_info);
1144
if (!uncore->info) {
1145
ret = -ENOMEM;
1146
goto fail;
1147
}
1148
};
1149
1150
/*
1151
* Install callbacks. Core will call them for each online cpu.
1152
*/
1153
ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1154
"perf/x86/amd/uncore:prepare",
1155
NULL, amd_uncore_cpu_dead);
1156
if (ret)
1157
goto fail;
1158
1159
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1160
"perf/x86/amd/uncore:starting",
1161
amd_uncore_cpu_starting, NULL);
1162
if (ret)
1163
goto fail_prep;
1164
1165
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1166
"perf/x86/amd/uncore:online",
1167
amd_uncore_cpu_online,
1168
amd_uncore_cpu_down_prepare);
1169
if (ret)
1170
goto fail_start;
1171
1172
return 0;
1173
1174
fail_start:
1175
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1176
fail_prep:
1177
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1178
fail:
1179
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1180
uncore = &uncores[i];
1181
if (uncore->info) {
1182
free_percpu(uncore->info);
1183
uncore->info = NULL;
1184
}
1185
}
1186
1187
return ret;
1188
}
1189
1190
static void __exit amd_uncore_exit(void)
1191
{
1192
struct amd_uncore *uncore;
1193
struct amd_uncore_pmu *pmu;
1194
int i, j;
1195
1196
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1197
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1198
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1199
1200
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1201
uncore = &uncores[i];
1202
if (!uncore->info)
1203
continue;
1204
1205
free_percpu(uncore->info);
1206
uncore->info = NULL;
1207
1208
for (j = 0; j < uncore->num_pmus; j++) {
1209
pmu = &uncore->pmus[j];
1210
if (!pmu->ctx)
1211
continue;
1212
1213
perf_pmu_unregister(&pmu->pmu);
1214
free_percpu(pmu->ctx);
1215
pmu->ctx = NULL;
1216
}
1217
1218
kfree(uncore->pmus);
1219
uncore->pmus = NULL;
1220
}
1221
}
1222
1223
module_init(amd_uncore_init);
1224
module_exit(amd_uncore_exit);
1225
1226
MODULE_DESCRIPTION("AMD Uncore Driver");
1227
MODULE_LICENSE("GPL v2");
1228
1229