Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/amd/uncore.c
52401 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2013 Advanced Micro Devices, Inc.
4
*
5
* Author: Jacob Shin <[email protected]>
6
*/
7
8
#include <linux/perf_event.h>
9
#include <linux/percpu.h>
10
#include <linux/types.h>
11
#include <linux/slab.h>
12
#include <linux/init.h>
13
#include <linux/cpu.h>
14
#include <linux/cpumask.h>
15
#include <linux/cpufeature.h>
16
#include <linux/smp.h>
17
18
#include <asm/perf_event.h>
19
#include <asm/msr.h>
20
21
#define NUM_COUNTERS_NB 4
22
#define NUM_COUNTERS_L2 4
23
#define NUM_COUNTERS_L3 6
24
#define NUM_COUNTERS_MAX 64
25
26
#define RDPMC_BASE_NB 6
27
#define RDPMC_BASE_LLC 10
28
29
#define COUNTER_SHIFT 16
30
#define UNCORE_NAME_LEN 16
31
#define UNCORE_GROUP_MAX 256
32
33
#undef pr_fmt
34
#define pr_fmt(fmt) "amd_uncore: " fmt
35
36
static int pmu_version;
37
38
struct amd_uncore_ctx {
39
int refcnt;
40
int cpu;
41
struct perf_event **events;
42
unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
43
int nr_active;
44
struct hrtimer hrtimer;
45
u64 hrtimer_duration;
46
};
47
48
struct amd_uncore_pmu {
49
char name[UNCORE_NAME_LEN];
50
int num_counters;
51
int rdpmc_base;
52
u32 msr_base;
53
int group;
54
cpumask_t active_mask;
55
struct pmu pmu;
56
struct amd_uncore_ctx * __percpu *ctx;
57
};
58
59
enum {
60
UNCORE_TYPE_DF,
61
UNCORE_TYPE_L3,
62
UNCORE_TYPE_UMC,
63
64
UNCORE_TYPE_MAX
65
};
66
67
union amd_uncore_info {
68
struct {
69
u64 aux_data:32; /* auxiliary data */
70
u64 num_pmcs:8; /* number of counters */
71
u64 gid:8; /* group id */
72
u64 cid:8; /* context id */
73
} split;
74
u64 full;
75
};
76
77
struct amd_uncore {
78
union amd_uncore_info __percpu *info;
79
struct amd_uncore_pmu *pmus;
80
unsigned int num_pmus;
81
bool init_done;
82
void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
83
int (*init)(struct amd_uncore *uncore, unsigned int cpu);
84
void (*move)(struct amd_uncore *uncore, unsigned int cpu);
85
void (*free)(struct amd_uncore *uncore, unsigned int cpu);
86
};
87
88
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
89
90
/* Interval for hrtimer, defaults to 60000 milliseconds */
91
static unsigned int update_interval = 60 * MSEC_PER_SEC;
92
module_param(update_interval, uint, 0444);
93
94
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
95
{
96
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
97
}
98
99
static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
100
{
101
struct amd_uncore_ctx *ctx;
102
struct perf_event *event;
103
int bit;
104
105
ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
106
107
if (!ctx->nr_active || ctx->cpu != smp_processor_id())
108
return HRTIMER_NORESTART;
109
110
for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
111
event = ctx->events[bit];
112
event->pmu->read(event);
113
}
114
115
hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
116
return HRTIMER_RESTART;
117
}
118
119
static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
120
{
121
hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
122
HRTIMER_MODE_REL_PINNED_HARD);
123
}
124
125
static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
126
{
127
hrtimer_cancel(&ctx->hrtimer);
128
}
129
130
static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
131
{
132
hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
133
}
134
135
static void amd_uncore_read(struct perf_event *event)
136
{
137
struct hw_perf_event *hwc = &event->hw;
138
u64 prev, new;
139
s64 delta;
140
141
/*
142
* since we do not enable counter overflow interrupts,
143
* we do not have to worry about prev_count changing on us
144
*/
145
146
prev = local64_read(&hwc->prev_count);
147
148
/*
149
* Some uncore PMUs do not have RDPMC assignments. In such cases,
150
* read counts directly from the corresponding PERF_CTR.
151
*/
152
if (hwc->event_base_rdpmc < 0)
153
rdmsrq(hwc->event_base, new);
154
else
155
new = rdpmc(hwc->event_base_rdpmc);
156
157
local64_set(&hwc->prev_count, new);
158
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
159
delta >>= COUNTER_SHIFT;
160
local64_add(delta, &event->count);
161
}
162
163
static void amd_uncore_start(struct perf_event *event, int flags)
164
{
165
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
166
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
167
struct hw_perf_event *hwc = &event->hw;
168
169
if (!ctx->nr_active++)
170
amd_uncore_start_hrtimer(ctx);
171
172
if (flags & PERF_EF_RELOAD)
173
wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
174
175
hwc->state = 0;
176
__set_bit(hwc->idx, ctx->active_mask);
177
wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
178
perf_event_update_userpage(event);
179
}
180
181
static void amd_uncore_stop(struct perf_event *event, int flags)
182
{
183
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
184
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
185
struct hw_perf_event *hwc = &event->hw;
186
187
wrmsrq(hwc->config_base, hwc->config);
188
hwc->state |= PERF_HES_STOPPED;
189
190
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
191
event->pmu->read(event);
192
hwc->state |= PERF_HES_UPTODATE;
193
}
194
195
if (!--ctx->nr_active)
196
amd_uncore_cancel_hrtimer(ctx);
197
198
__clear_bit(hwc->idx, ctx->active_mask);
199
}
200
201
static int amd_uncore_add(struct perf_event *event, int flags)
202
{
203
int i;
204
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
205
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
206
struct hw_perf_event *hwc = &event->hw;
207
208
/* are we already assigned? */
209
if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
210
goto out;
211
212
for (i = 0; i < pmu->num_counters; i++) {
213
if (ctx->events[i] == event) {
214
hwc->idx = i;
215
goto out;
216
}
217
}
218
219
/* if not, take the first available counter */
220
hwc->idx = -1;
221
for (i = 0; i < pmu->num_counters; i++) {
222
struct perf_event *tmp = NULL;
223
224
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
225
hwc->idx = i;
226
break;
227
}
228
}
229
230
out:
231
if (hwc->idx == -1)
232
return -EBUSY;
233
234
hwc->config_base = pmu->msr_base + (2 * hwc->idx);
235
hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
236
hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
237
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
238
239
if (pmu->rdpmc_base < 0)
240
hwc->event_base_rdpmc = -1;
241
242
if (flags & PERF_EF_START)
243
event->pmu->start(event, PERF_EF_RELOAD);
244
245
return 0;
246
}
247
248
static void amd_uncore_del(struct perf_event *event, int flags)
249
{
250
int i;
251
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
252
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
253
struct hw_perf_event *hwc = &event->hw;
254
255
event->pmu->stop(event, PERF_EF_UPDATE);
256
257
for (i = 0; i < pmu->num_counters; i++) {
258
struct perf_event *tmp = event;
259
260
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
261
break;
262
}
263
264
hwc->idx = -1;
265
}
266
267
static int amd_uncore_event_init(struct perf_event *event)
268
{
269
struct amd_uncore_pmu *pmu;
270
struct amd_uncore_ctx *ctx;
271
struct hw_perf_event *hwc = &event->hw;
272
273
if (event->attr.type != event->pmu->type)
274
return -ENOENT;
275
276
if (event->cpu < 0)
277
return -EINVAL;
278
279
pmu = event_to_amd_uncore_pmu(event);
280
ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
281
if (!ctx)
282
return -ENODEV;
283
284
/*
285
* NB and Last level cache counters (MSRs) are shared across all cores
286
* that share the same NB / Last level cache. On family 16h and below,
287
* Interrupts can be directed to a single target core, however, event
288
* counts generated by processes running on other cores cannot be masked
289
* out. So we do not support sampling and per-thread events via
290
* CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
291
*/
292
hwc->config = event->attr.config;
293
hwc->idx = -1;
294
295
/*
296
* since request can come in to any of the shared cores, we will remap
297
* to a single common cpu.
298
*/
299
event->cpu = ctx->cpu;
300
301
return 0;
302
}
303
304
static umode_t
305
amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
306
{
307
return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
308
attr->mode : 0;
309
}
310
311
static umode_t
312
amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
313
{
314
return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
315
}
316
317
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
318
struct device_attribute *attr,
319
char *buf)
320
{
321
struct pmu *ptr = dev_get_drvdata(dev);
322
struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
323
324
return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
325
}
326
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
327
328
static struct attribute *amd_uncore_attrs[] = {
329
&dev_attr_cpumask.attr,
330
NULL,
331
};
332
333
static struct attribute_group amd_uncore_attr_group = {
334
.attrs = amd_uncore_attrs,
335
};
336
337
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
338
static ssize_t __uncore_##_var##_show(struct device *dev, \
339
struct device_attribute *attr, \
340
char *page) \
341
{ \
342
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
343
return sprintf(page, _format "\n"); \
344
} \
345
static struct device_attribute format_attr_##_var = \
346
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
347
348
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
349
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
350
DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */
351
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */
352
DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15");
353
DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */
354
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
355
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
356
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
357
DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
358
DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
359
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
360
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
361
DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */
362
363
/* Common DF and NB attributes */
364
static struct attribute *amd_uncore_df_format_attr[] = {
365
&format_attr_event12.attr, /* event */
366
&format_attr_umask8.attr, /* umask */
367
NULL,
368
};
369
370
/* Common L2 and L3 attributes */
371
static struct attribute *amd_uncore_l3_format_attr[] = {
372
&format_attr_event12.attr, /* event */
373
&format_attr_umask8.attr, /* umask */
374
NULL, /* threadmask */
375
NULL,
376
};
377
378
/* Common UMC attributes */
379
static struct attribute *amd_uncore_umc_format_attr[] = {
380
&format_attr_event8.attr, /* event */
381
&format_attr_rdwrmask.attr, /* rdwrmask */
382
NULL,
383
};
384
385
/* F17h unique L3 attributes */
386
static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
387
&format_attr_slicemask.attr, /* slicemask */
388
NULL,
389
};
390
391
/* F19h unique L3 attributes */
392
static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
393
&format_attr_coreid.attr, /* coreid */
394
&format_attr_enallslices.attr, /* enallslices */
395
&format_attr_enallcores.attr, /* enallcores */
396
&format_attr_sliceid.attr, /* sliceid */
397
NULL,
398
};
399
400
static struct attribute_group amd_uncore_df_format_group = {
401
.name = "format",
402
.attrs = amd_uncore_df_format_attr,
403
};
404
405
static struct attribute_group amd_uncore_l3_format_group = {
406
.name = "format",
407
.attrs = amd_uncore_l3_format_attr,
408
};
409
410
static struct attribute_group amd_f17h_uncore_l3_format_group = {
411
.name = "format",
412
.attrs = amd_f17h_uncore_l3_format_attr,
413
.is_visible = amd_f17h_uncore_is_visible,
414
};
415
416
static struct attribute_group amd_f19h_uncore_l3_format_group = {
417
.name = "format",
418
.attrs = amd_f19h_uncore_l3_format_attr,
419
.is_visible = amd_f19h_uncore_is_visible,
420
};
421
422
static struct attribute_group amd_uncore_umc_format_group = {
423
.name = "format",
424
.attrs = amd_uncore_umc_format_attr,
425
};
426
427
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
428
&amd_uncore_attr_group,
429
&amd_uncore_df_format_group,
430
NULL,
431
};
432
433
static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
434
&amd_uncore_attr_group,
435
&amd_uncore_l3_format_group,
436
NULL,
437
};
438
439
static const struct attribute_group *amd_uncore_l3_attr_update[] = {
440
&amd_f17h_uncore_l3_format_group,
441
&amd_f19h_uncore_l3_format_group,
442
NULL,
443
};
444
445
static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
446
&amd_uncore_attr_group,
447
&amd_uncore_umc_format_group,
448
NULL,
449
};
450
451
static __always_inline
452
int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
453
{
454
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
455
return info->split.cid;
456
}
457
458
static __always_inline
459
int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
460
{
461
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
462
return info->split.gid;
463
}
464
465
static __always_inline
466
int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
467
{
468
union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
469
return info->split.num_pmcs;
470
}
471
472
static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
473
{
474
struct amd_uncore_pmu *pmu;
475
struct amd_uncore_ctx *ctx;
476
int i;
477
478
if (!uncore->init_done)
479
return;
480
481
for (i = 0; i < uncore->num_pmus; i++) {
482
pmu = &uncore->pmus[i];
483
ctx = *per_cpu_ptr(pmu->ctx, cpu);
484
if (!ctx)
485
continue;
486
487
if (cpu == ctx->cpu)
488
cpumask_clear_cpu(cpu, &pmu->active_mask);
489
490
if (!--ctx->refcnt) {
491
kfree(ctx->events);
492
kfree(ctx);
493
}
494
495
*per_cpu_ptr(pmu->ctx, cpu) = NULL;
496
}
497
}
498
499
static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
500
{
501
struct amd_uncore_ctx *curr, *prev;
502
struct amd_uncore_pmu *pmu;
503
int node, cid, gid, i, j;
504
505
if (!uncore->init_done || !uncore->num_pmus)
506
return 0;
507
508
cid = amd_uncore_ctx_cid(uncore, cpu);
509
gid = amd_uncore_ctx_gid(uncore, cpu);
510
511
for (i = 0; i < uncore->num_pmus; i++) {
512
pmu = &uncore->pmus[i];
513
*per_cpu_ptr(pmu->ctx, cpu) = NULL;
514
curr = NULL;
515
516
/* Check for group exclusivity */
517
if (gid != pmu->group)
518
continue;
519
520
/* Find a sibling context */
521
for_each_online_cpu(j) {
522
if (cpu == j)
523
continue;
524
525
prev = *per_cpu_ptr(pmu->ctx, j);
526
if (!prev)
527
continue;
528
529
if (cid == amd_uncore_ctx_cid(uncore, j)) {
530
curr = prev;
531
break;
532
}
533
}
534
535
/* Allocate context if sibling does not exist */
536
if (!curr) {
537
node = cpu_to_node(cpu);
538
curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
539
if (!curr)
540
goto fail;
541
542
curr->cpu = cpu;
543
curr->events = kzalloc_node(sizeof(*curr->events) *
544
pmu->num_counters,
545
GFP_KERNEL, node);
546
if (!curr->events) {
547
kfree(curr);
548
goto fail;
549
}
550
551
amd_uncore_init_hrtimer(curr);
552
curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
553
554
cpumask_set_cpu(cpu, &pmu->active_mask);
555
}
556
557
curr->refcnt++;
558
*per_cpu_ptr(pmu->ctx, cpu) = curr;
559
}
560
561
return 0;
562
563
fail:
564
amd_uncore_ctx_free(uncore, cpu);
565
566
return -ENOMEM;
567
}
568
569
static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
570
{
571
struct amd_uncore_ctx *curr, *next;
572
struct amd_uncore_pmu *pmu;
573
int i, j;
574
575
if (!uncore->init_done)
576
return;
577
578
for (i = 0; i < uncore->num_pmus; i++) {
579
pmu = &uncore->pmus[i];
580
curr = *per_cpu_ptr(pmu->ctx, cpu);
581
if (!curr)
582
continue;
583
584
/* Migrate to a shared sibling if possible */
585
for_each_online_cpu(j) {
586
next = *per_cpu_ptr(pmu->ctx, j);
587
if (!next || cpu == j)
588
continue;
589
590
if (curr == next) {
591
perf_pmu_migrate_context(&pmu->pmu, cpu, j);
592
cpumask_clear_cpu(cpu, &pmu->active_mask);
593
cpumask_set_cpu(j, &pmu->active_mask);
594
next->cpu = j;
595
break;
596
}
597
}
598
}
599
}
600
601
static int amd_uncore_cpu_starting(unsigned int cpu)
602
{
603
struct amd_uncore *uncore;
604
int i;
605
606
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
607
uncore = &uncores[i];
608
uncore->scan(uncore, cpu);
609
}
610
611
return 0;
612
}
613
614
static int amd_uncore_cpu_online(unsigned int cpu)
615
{
616
struct amd_uncore *uncore;
617
int i;
618
619
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
620
uncore = &uncores[i];
621
if (uncore->init(uncore, cpu))
622
break;
623
}
624
625
return 0;
626
}
627
628
static int amd_uncore_cpu_down_prepare(unsigned int cpu)
629
{
630
struct amd_uncore *uncore;
631
int i;
632
633
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
634
uncore = &uncores[i];
635
uncore->move(uncore, cpu);
636
}
637
638
return 0;
639
}
640
641
static int amd_uncore_cpu_dead(unsigned int cpu)
642
{
643
struct amd_uncore *uncore;
644
int i;
645
646
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
647
uncore = &uncores[i];
648
uncore->free(uncore, cpu);
649
}
650
651
return 0;
652
}
653
654
static int amd_uncore_df_event_init(struct perf_event *event)
655
{
656
struct hw_perf_event *hwc = &event->hw;
657
int ret = amd_uncore_event_init(event);
658
659
hwc->config = event->attr.config &
660
(pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
661
AMD64_RAW_EVENT_MASK_NB);
662
663
return ret;
664
}
665
666
static int amd_uncore_df_add(struct perf_event *event, int flags)
667
{
668
int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
669
struct hw_perf_event *hwc = &event->hw;
670
671
if (ret)
672
return ret;
673
674
/*
675
* The first four DF counters are accessible via RDPMC index 6 to 9
676
* followed by the L3 counters from index 10 to 15. For processors
677
* with more than four DF counters, the DF RDPMC assignments become
678
* discontiguous as the additional counters are accessible starting
679
* from index 16.
680
*/
681
if (hwc->idx >= NUM_COUNTERS_NB)
682
hwc->event_base_rdpmc += NUM_COUNTERS_L3;
683
684
/* Delayed start after rdpmc base update */
685
if (flags & PERF_EF_START)
686
amd_uncore_start(event, PERF_EF_RELOAD);
687
688
return 0;
689
}
690
691
static
692
void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
693
{
694
union cpuid_0x80000022_ebx ebx;
695
union amd_uncore_info info;
696
697
if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
698
return;
699
700
info.split.aux_data = 0;
701
info.split.num_pmcs = NUM_COUNTERS_NB;
702
info.split.gid = 0;
703
info.split.cid = topology_logical_package_id(cpu);
704
705
if (pmu_version >= 2) {
706
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
707
info.split.num_pmcs = ebx.split.num_df_pmc;
708
}
709
710
*per_cpu_ptr(uncore->info, cpu) = info;
711
}
712
713
static
714
int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
715
{
716
struct attribute **df_attr = amd_uncore_df_format_attr;
717
struct amd_uncore_pmu *pmu;
718
int num_counters;
719
720
/* Run just once */
721
if (uncore->init_done)
722
return amd_uncore_ctx_init(uncore, cpu);
723
724
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
725
if (!num_counters)
726
goto done;
727
728
/* No grouping, single instance for a system */
729
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
730
if (!uncore->pmus)
731
goto done;
732
733
/*
734
* For Family 17h and above, the Northbridge counters are repurposed
735
* as Data Fabric counters. The PMUs are exported based on family as
736
* either NB or DF.
737
*/
738
pmu = &uncore->pmus[0];
739
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
740
sizeof(pmu->name));
741
pmu->num_counters = num_counters;
742
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
743
pmu->rdpmc_base = RDPMC_BASE_NB;
744
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
745
746
if (pmu_version >= 2) {
747
*df_attr++ = &format_attr_event14v2.attr;
748
*df_attr++ = &format_attr_umask12.attr;
749
} else if (boot_cpu_data.x86 >= 0x17) {
750
*df_attr = &format_attr_event14.attr;
751
}
752
753
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
754
if (!pmu->ctx)
755
goto done;
756
757
pmu->pmu = (struct pmu) {
758
.task_ctx_nr = perf_invalid_context,
759
.attr_groups = amd_uncore_df_attr_groups,
760
.name = pmu->name,
761
.event_init = amd_uncore_df_event_init,
762
.add = amd_uncore_df_add,
763
.del = amd_uncore_del,
764
.start = amd_uncore_start,
765
.stop = amd_uncore_stop,
766
.read = amd_uncore_read,
767
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
768
.module = THIS_MODULE,
769
};
770
771
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
772
free_percpu(pmu->ctx);
773
pmu->ctx = NULL;
774
goto done;
775
}
776
777
pr_info("%d %s%s counters detected\n", pmu->num_counters,
778
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
779
pmu->pmu.name);
780
781
uncore->num_pmus = 1;
782
783
done:
784
uncore->init_done = true;
785
786
return amd_uncore_ctx_init(uncore, cpu);
787
}
788
789
static int amd_uncore_l3_event_init(struct perf_event *event)
790
{
791
int ret = amd_uncore_event_init(event);
792
struct hw_perf_event *hwc = &event->hw;
793
u64 config = event->attr.config;
794
u64 mask;
795
796
hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
797
798
/*
799
* SliceMask and ThreadMask need to be set for certain L3 events.
800
* For other events, the two fields do not affect the count.
801
*/
802
if (ret || boot_cpu_data.x86 < 0x17)
803
return ret;
804
805
mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
806
AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
807
AMD64_L3_COREID_MASK);
808
809
if (boot_cpu_data.x86 <= 0x18)
810
mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
811
((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
812
813
/*
814
* If the user doesn't specify a ThreadMask, they're not trying to
815
* count core 0, so we enable all cores & threads.
816
* We'll also assume that they want to count slice 0 if they specify
817
* a ThreadMask and leave SliceId and EnAllSlices unpopulated.
818
*/
819
else if (!(config & AMD64_L3_F19H_THREAD_MASK))
820
mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
821
AMD64_L3_EN_ALL_CORES;
822
823
hwc->config |= mask;
824
825
return 0;
826
}
827
828
static
829
void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
830
{
831
union amd_uncore_info info;
832
833
if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
834
return;
835
836
info.split.aux_data = 0;
837
info.split.num_pmcs = NUM_COUNTERS_L2;
838
info.split.gid = 0;
839
info.split.cid = per_cpu_llc_id(cpu);
840
841
if (boot_cpu_data.x86 >= 0x17)
842
info.split.num_pmcs = NUM_COUNTERS_L3;
843
844
*per_cpu_ptr(uncore->info, cpu) = info;
845
}
846
847
static
848
int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
849
{
850
struct attribute **l3_attr = amd_uncore_l3_format_attr;
851
struct amd_uncore_pmu *pmu;
852
int num_counters;
853
854
/* Run just once */
855
if (uncore->init_done)
856
return amd_uncore_ctx_init(uncore, cpu);
857
858
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
859
if (!num_counters)
860
goto done;
861
862
/* No grouping, single instance for a system */
863
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
864
if (!uncore->pmus)
865
goto done;
866
867
/*
868
* For Family 17h and above, L3 cache counters are available instead
869
* of L2 cache counters. The PMUs are exported based on family as
870
* either L2 or L3.
871
*/
872
pmu = &uncore->pmus[0];
873
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
874
sizeof(pmu->name));
875
pmu->num_counters = num_counters;
876
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
877
pmu->rdpmc_base = RDPMC_BASE_LLC;
878
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
879
880
if (boot_cpu_data.x86 >= 0x17) {
881
*l3_attr++ = &format_attr_event8.attr;
882
*l3_attr++ = &format_attr_umask8.attr;
883
*l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
884
&format_attr_threadmask2.attr :
885
&format_attr_threadmask8.attr;
886
}
887
888
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
889
if (!pmu->ctx)
890
goto done;
891
892
pmu->pmu = (struct pmu) {
893
.task_ctx_nr = perf_invalid_context,
894
.attr_groups = amd_uncore_l3_attr_groups,
895
.attr_update = amd_uncore_l3_attr_update,
896
.name = pmu->name,
897
.event_init = amd_uncore_l3_event_init,
898
.add = amd_uncore_add,
899
.del = amd_uncore_del,
900
.start = amd_uncore_start,
901
.stop = amd_uncore_stop,
902
.read = amd_uncore_read,
903
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
904
.module = THIS_MODULE,
905
};
906
907
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
908
free_percpu(pmu->ctx);
909
pmu->ctx = NULL;
910
goto done;
911
}
912
913
pr_info("%d %s%s counters detected\n", pmu->num_counters,
914
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "",
915
pmu->pmu.name);
916
917
uncore->num_pmus = 1;
918
919
done:
920
uncore->init_done = true;
921
922
return amd_uncore_ctx_init(uncore, cpu);
923
}
924
925
static int amd_uncore_umc_event_init(struct perf_event *event)
926
{
927
struct hw_perf_event *hwc = &event->hw;
928
int ret = amd_uncore_event_init(event);
929
930
if (ret)
931
return ret;
932
933
hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
934
935
return 0;
936
}
937
938
static void amd_uncore_umc_start(struct perf_event *event, int flags)
939
{
940
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
941
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
942
struct hw_perf_event *hwc = &event->hw;
943
944
if (!ctx->nr_active++)
945
amd_uncore_start_hrtimer(ctx);
946
947
if (flags & PERF_EF_RELOAD)
948
wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count));
949
950
hwc->state = 0;
951
__set_bit(hwc->idx, ctx->active_mask);
952
wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
953
perf_event_update_userpage(event);
954
}
955
956
static void amd_uncore_umc_read(struct perf_event *event)
957
{
958
struct hw_perf_event *hwc = &event->hw;
959
u64 prev, new, shift;
960
s64 delta;
961
962
shift = COUNTER_SHIFT + 1;
963
prev = local64_read(&hwc->prev_count);
964
965
/*
966
* UMC counters do not have RDPMC assignments. Read counts directly
967
* from the corresponding PERF_CTR.
968
*/
969
rdmsrl(hwc->event_base, new);
970
971
/*
972
* Unlike the other uncore counters, UMC counters saturate and set the
973
* Overflow bit (bit 48) on overflow. Since they do not roll over,
974
* proactively reset the corresponding PERF_CTR when bit 47 is set so
975
* that the counter never gets a chance to saturate.
976
*/
977
if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
978
wrmsrl(hwc->event_base, 0);
979
local64_set(&hwc->prev_count, 0);
980
} else {
981
local64_set(&hwc->prev_count, new);
982
}
983
984
delta = (new << shift) - (prev << shift);
985
delta >>= shift;
986
local64_add(delta, &event->count);
987
}
988
989
static
990
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
991
{
992
union cpuid_0x80000022_ebx ebx;
993
union amd_uncore_info info;
994
unsigned int eax, ecx, edx;
995
996
if (pmu_version < 2)
997
return;
998
999
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
1000
info.split.aux_data = ecx; /* stash active mask */
1001
info.split.num_pmcs = ebx.split.num_umc_pmc;
1002
info.split.gid = topology_logical_package_id(cpu);
1003
info.split.cid = topology_logical_package_id(cpu);
1004
*per_cpu_ptr(uncore->info, cpu) = info;
1005
}
1006
1007
static
1008
int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
1009
{
1010
DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
1011
u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
1012
u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
1013
union amd_uncore_info info;
1014
struct amd_uncore_pmu *pmu;
1015
int gid, i;
1016
u16 index = 0;
1017
1018
if (pmu_version < 2)
1019
return 0;
1020
1021
/* Run just once */
1022
if (uncore->init_done)
1023
return amd_uncore_ctx_init(uncore, cpu);
1024
1025
/* Find unique groups */
1026
for_each_online_cpu(i) {
1027
info = *per_cpu_ptr(uncore->info, i);
1028
gid = info.split.gid;
1029
if (test_bit(gid, gmask))
1030
continue;
1031
1032
__set_bit(gid, gmask);
1033
group_num_pmus[gid] = hweight32(info.split.aux_data);
1034
group_num_pmcs[gid] = info.split.num_pmcs;
1035
uncore->num_pmus += group_num_pmus[gid];
1036
}
1037
1038
uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
1039
GFP_KERNEL);
1040
if (!uncore->pmus) {
1041
uncore->num_pmus = 0;
1042
goto done;
1043
}
1044
1045
for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
1046
for (i = 0; i < group_num_pmus[gid]; i++) {
1047
pmu = &uncore->pmus[index];
1048
snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index);
1049
pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
1050
pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
1051
pmu->rdpmc_base = -1;
1052
pmu->group = gid;
1053
1054
pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
1055
if (!pmu->ctx)
1056
goto done;
1057
1058
pmu->pmu = (struct pmu) {
1059
.task_ctx_nr = perf_invalid_context,
1060
.attr_groups = amd_uncore_umc_attr_groups,
1061
.name = pmu->name,
1062
.event_init = amd_uncore_umc_event_init,
1063
.add = amd_uncore_add,
1064
.del = amd_uncore_del,
1065
.start = amd_uncore_umc_start,
1066
.stop = amd_uncore_stop,
1067
.read = amd_uncore_umc_read,
1068
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
1069
.module = THIS_MODULE,
1070
};
1071
1072
if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
1073
free_percpu(pmu->ctx);
1074
pmu->ctx = NULL;
1075
goto done;
1076
}
1077
1078
pr_info("%d %s counters detected\n", pmu->num_counters,
1079
pmu->pmu.name);
1080
1081
index++;
1082
}
1083
}
1084
1085
done:
1086
uncore->num_pmus = index;
1087
uncore->init_done = true;
1088
1089
return amd_uncore_ctx_init(uncore, cpu);
1090
}
1091
1092
static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
1093
/* UNCORE_TYPE_DF */
1094
{
1095
.scan = amd_uncore_df_ctx_scan,
1096
.init = amd_uncore_df_ctx_init,
1097
.move = amd_uncore_ctx_move,
1098
.free = amd_uncore_ctx_free,
1099
},
1100
/* UNCORE_TYPE_L3 */
1101
{
1102
.scan = amd_uncore_l3_ctx_scan,
1103
.init = amd_uncore_l3_ctx_init,
1104
.move = amd_uncore_ctx_move,
1105
.free = amd_uncore_ctx_free,
1106
},
1107
/* UNCORE_TYPE_UMC */
1108
{
1109
.scan = amd_uncore_umc_ctx_scan,
1110
.init = amd_uncore_umc_ctx_init,
1111
.move = amd_uncore_ctx_move,
1112
.free = amd_uncore_ctx_free,
1113
},
1114
};
1115
1116
static int __init amd_uncore_init(void)
1117
{
1118
struct amd_uncore *uncore;
1119
int ret = -ENODEV;
1120
int i;
1121
1122
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1123
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1124
return -ENODEV;
1125
1126
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1127
return -ENODEV;
1128
1129
if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1130
pmu_version = 2;
1131
1132
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1133
uncore = &uncores[i];
1134
1135
BUG_ON(!uncore->scan);
1136
BUG_ON(!uncore->init);
1137
BUG_ON(!uncore->move);
1138
BUG_ON(!uncore->free);
1139
1140
uncore->info = alloc_percpu(union amd_uncore_info);
1141
if (!uncore->info) {
1142
ret = -ENOMEM;
1143
goto fail;
1144
}
1145
};
1146
1147
/*
1148
* Install callbacks. Core will call them for each online cpu.
1149
*/
1150
ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1151
"perf/x86/amd/uncore:prepare",
1152
NULL, amd_uncore_cpu_dead);
1153
if (ret)
1154
goto fail;
1155
1156
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1157
"perf/x86/amd/uncore:starting",
1158
amd_uncore_cpu_starting, NULL);
1159
if (ret)
1160
goto fail_prep;
1161
1162
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1163
"perf/x86/amd/uncore:online",
1164
amd_uncore_cpu_online,
1165
amd_uncore_cpu_down_prepare);
1166
if (ret)
1167
goto fail_start;
1168
1169
return 0;
1170
1171
fail_start:
1172
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1173
fail_prep:
1174
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1175
fail:
1176
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1177
uncore = &uncores[i];
1178
if (uncore->info) {
1179
free_percpu(uncore->info);
1180
uncore->info = NULL;
1181
}
1182
}
1183
1184
return ret;
1185
}
1186
1187
static void __exit amd_uncore_exit(void)
1188
{
1189
struct amd_uncore *uncore;
1190
struct amd_uncore_pmu *pmu;
1191
int i, j;
1192
1193
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1194
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1195
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1196
1197
for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1198
uncore = &uncores[i];
1199
if (!uncore->info)
1200
continue;
1201
1202
free_percpu(uncore->info);
1203
uncore->info = NULL;
1204
1205
for (j = 0; j < uncore->num_pmus; j++) {
1206
pmu = &uncore->pmus[j];
1207
if (!pmu->ctx)
1208
continue;
1209
1210
perf_pmu_unregister(&pmu->pmu);
1211
free_percpu(pmu->ctx);
1212
pmu->ctx = NULL;
1213
}
1214
1215
kfree(uncore->pmus);
1216
uncore->pmus = NULL;
1217
}
1218
}
1219
1220
module_init(amd_uncore_init);
1221
module_exit(amd_uncore_exit);
1222
1223
MODULE_DESCRIPTION("AMD Uncore Driver");
1224
MODULE_LICENSE("GPL v2");
1225
1226