Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/events/zhaoxin/core.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Zhaoxin PMU; like Intel Architectural PerfMon-v2
4
*/
5
6
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8
#include <linux/stddef.h>
9
#include <linux/types.h>
10
#include <linux/init.h>
11
#include <linux/slab.h>
12
#include <linux/export.h>
13
#include <linux/nmi.h>
14
15
#include <asm/cpufeature.h>
16
#include <asm/hardirq.h>
17
#include <asm/apic.h>
18
#include <asm/msr.h>
19
20
#include "../perf_event.h"
21
22
/*
23
* Zhaoxin PerfMon, used on zxc and later.
24
*/
25
static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
26
27
[PERF_COUNT_HW_CPU_CYCLES] = 0x0082,
28
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
29
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515,
30
[PERF_COUNT_HW_CACHE_MISSES] = 0x051a,
31
[PERF_COUNT_HW_BUS_CYCLES] = 0x0083,
32
};
33
34
static struct event_constraint zxc_event_constraints[] __read_mostly = {
35
36
FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
37
EVENT_CONSTRAINT_END
38
};
39
40
static struct event_constraint zxd_event_constraints[] __read_mostly = {
41
42
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
43
FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
44
FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
45
EVENT_CONSTRAINT_END
46
};
47
48
static __initconst const u64 zxd_hw_cache_event_ids
49
[PERF_COUNT_HW_CACHE_MAX]
50
[PERF_COUNT_HW_CACHE_OP_MAX]
51
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
52
[C(L1D)] = {
53
[C(OP_READ)] = {
54
[C(RESULT_ACCESS)] = 0x0042,
55
[C(RESULT_MISS)] = 0x0538,
56
},
57
[C(OP_WRITE)] = {
58
[C(RESULT_ACCESS)] = 0x0043,
59
[C(RESULT_MISS)] = 0x0562,
60
},
61
[C(OP_PREFETCH)] = {
62
[C(RESULT_ACCESS)] = -1,
63
[C(RESULT_MISS)] = -1,
64
},
65
},
66
[C(L1I)] = {
67
[C(OP_READ)] = {
68
[C(RESULT_ACCESS)] = 0x0300,
69
[C(RESULT_MISS)] = 0x0301,
70
},
71
[C(OP_WRITE)] = {
72
[C(RESULT_ACCESS)] = -1,
73
[C(RESULT_MISS)] = -1,
74
},
75
[C(OP_PREFETCH)] = {
76
[C(RESULT_ACCESS)] = 0x030a,
77
[C(RESULT_MISS)] = 0x030b,
78
},
79
},
80
[C(LL)] = {
81
[C(OP_READ)] = {
82
[C(RESULT_ACCESS)] = -1,
83
[C(RESULT_MISS)] = -1,
84
},
85
[C(OP_WRITE)] = {
86
[C(RESULT_ACCESS)] = -1,
87
[C(RESULT_MISS)] = -1,
88
},
89
[C(OP_PREFETCH)] = {
90
[C(RESULT_ACCESS)] = -1,
91
[C(RESULT_MISS)] = -1,
92
},
93
},
94
[C(DTLB)] = {
95
[C(OP_READ)] = {
96
[C(RESULT_ACCESS)] = 0x0042,
97
[C(RESULT_MISS)] = 0x052c,
98
},
99
[C(OP_WRITE)] = {
100
[C(RESULT_ACCESS)] = 0x0043,
101
[C(RESULT_MISS)] = 0x0530,
102
},
103
[C(OP_PREFETCH)] = {
104
[C(RESULT_ACCESS)] = 0x0564,
105
[C(RESULT_MISS)] = 0x0565,
106
},
107
},
108
[C(ITLB)] = {
109
[C(OP_READ)] = {
110
[C(RESULT_ACCESS)] = 0x00c0,
111
[C(RESULT_MISS)] = 0x0534,
112
},
113
[C(OP_WRITE)] = {
114
[C(RESULT_ACCESS)] = -1,
115
[C(RESULT_MISS)] = -1,
116
},
117
[C(OP_PREFETCH)] = {
118
[C(RESULT_ACCESS)] = -1,
119
[C(RESULT_MISS)] = -1,
120
},
121
},
122
[C(BPU)] = {
123
[C(OP_READ)] = {
124
[C(RESULT_ACCESS)] = 0x0700,
125
[C(RESULT_MISS)] = 0x0709,
126
},
127
[C(OP_WRITE)] = {
128
[C(RESULT_ACCESS)] = -1,
129
[C(RESULT_MISS)] = -1,
130
},
131
[C(OP_PREFETCH)] = {
132
[C(RESULT_ACCESS)] = -1,
133
[C(RESULT_MISS)] = -1,
134
},
135
},
136
[C(NODE)] = {
137
[C(OP_READ)] = {
138
[C(RESULT_ACCESS)] = -1,
139
[C(RESULT_MISS)] = -1,
140
},
141
[C(OP_WRITE)] = {
142
[C(RESULT_ACCESS)] = -1,
143
[C(RESULT_MISS)] = -1,
144
},
145
[C(OP_PREFETCH)] = {
146
[C(RESULT_ACCESS)] = -1,
147
[C(RESULT_MISS)] = -1,
148
},
149
},
150
};
151
152
static __initconst const u64 zxe_hw_cache_event_ids
153
[PERF_COUNT_HW_CACHE_MAX]
154
[PERF_COUNT_HW_CACHE_OP_MAX]
155
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
156
[C(L1D)] = {
157
[C(OP_READ)] = {
158
[C(RESULT_ACCESS)] = 0x0568,
159
[C(RESULT_MISS)] = 0x054b,
160
},
161
[C(OP_WRITE)] = {
162
[C(RESULT_ACCESS)] = 0x0669,
163
[C(RESULT_MISS)] = 0x0562,
164
},
165
[C(OP_PREFETCH)] = {
166
[C(RESULT_ACCESS)] = -1,
167
[C(RESULT_MISS)] = -1,
168
},
169
},
170
[C(L1I)] = {
171
[C(OP_READ)] = {
172
[C(RESULT_ACCESS)] = 0x0300,
173
[C(RESULT_MISS)] = 0x0301,
174
},
175
[C(OP_WRITE)] = {
176
[C(RESULT_ACCESS)] = -1,
177
[C(RESULT_MISS)] = -1,
178
},
179
[C(OP_PREFETCH)] = {
180
[C(RESULT_ACCESS)] = 0x030a,
181
[C(RESULT_MISS)] = 0x030b,
182
},
183
},
184
[C(LL)] = {
185
[C(OP_READ)] = {
186
[C(RESULT_ACCESS)] = 0x0,
187
[C(RESULT_MISS)] = 0x0,
188
},
189
[C(OP_WRITE)] = {
190
[C(RESULT_ACCESS)] = 0x0,
191
[C(RESULT_MISS)] = 0x0,
192
},
193
[C(OP_PREFETCH)] = {
194
[C(RESULT_ACCESS)] = 0x0,
195
[C(RESULT_MISS)] = 0x0,
196
},
197
},
198
[C(DTLB)] = {
199
[C(OP_READ)] = {
200
[C(RESULT_ACCESS)] = 0x0568,
201
[C(RESULT_MISS)] = 0x052c,
202
},
203
[C(OP_WRITE)] = {
204
[C(RESULT_ACCESS)] = 0x0669,
205
[C(RESULT_MISS)] = 0x0530,
206
},
207
[C(OP_PREFETCH)] = {
208
[C(RESULT_ACCESS)] = 0x0564,
209
[C(RESULT_MISS)] = 0x0565,
210
},
211
},
212
[C(ITLB)] = {
213
[C(OP_READ)] = {
214
[C(RESULT_ACCESS)] = 0x00c0,
215
[C(RESULT_MISS)] = 0x0534,
216
},
217
[C(OP_WRITE)] = {
218
[C(RESULT_ACCESS)] = -1,
219
[C(RESULT_MISS)] = -1,
220
},
221
[C(OP_PREFETCH)] = {
222
[C(RESULT_ACCESS)] = -1,
223
[C(RESULT_MISS)] = -1,
224
},
225
},
226
[C(BPU)] = {
227
[C(OP_READ)] = {
228
[C(RESULT_ACCESS)] = 0x0028,
229
[C(RESULT_MISS)] = 0x0029,
230
},
231
[C(OP_WRITE)] = {
232
[C(RESULT_ACCESS)] = -1,
233
[C(RESULT_MISS)] = -1,
234
},
235
[C(OP_PREFETCH)] = {
236
[C(RESULT_ACCESS)] = -1,
237
[C(RESULT_MISS)] = -1,
238
},
239
},
240
[C(NODE)] = {
241
[C(OP_READ)] = {
242
[C(RESULT_ACCESS)] = -1,
243
[C(RESULT_MISS)] = -1,
244
},
245
[C(OP_WRITE)] = {
246
[C(RESULT_ACCESS)] = -1,
247
[C(RESULT_MISS)] = -1,
248
},
249
[C(OP_PREFETCH)] = {
250
[C(RESULT_ACCESS)] = -1,
251
[C(RESULT_MISS)] = -1,
252
},
253
},
254
};
255
256
static void zhaoxin_pmu_disable_all(void)
257
{
258
wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
259
}
260
261
static void zhaoxin_pmu_enable_all(int added)
262
{
263
wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
264
}
265
266
static inline u64 zhaoxin_pmu_get_status(void)
267
{
268
u64 status;
269
270
rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
271
272
return status;
273
}
274
275
static inline void zhaoxin_pmu_ack_status(u64 ack)
276
{
277
wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
278
}
279
280
static inline void zxc_pmu_ack_status(u64 ack)
281
{
282
/*
283
* ZXC needs global control enabled in order to clear status bits.
284
*/
285
zhaoxin_pmu_enable_all(0);
286
zhaoxin_pmu_ack_status(ack);
287
zhaoxin_pmu_disable_all();
288
}
289
290
static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
291
{
292
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
293
u64 ctrl_val, mask;
294
295
mask = 0xfULL << (idx * 4);
296
297
rdmsrq(hwc->config_base, ctrl_val);
298
ctrl_val &= ~mask;
299
wrmsrq(hwc->config_base, ctrl_val);
300
}
301
302
static void zhaoxin_pmu_disable_event(struct perf_event *event)
303
{
304
struct hw_perf_event *hwc = &event->hw;
305
306
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
307
zhaoxin_pmu_disable_fixed(hwc);
308
return;
309
}
310
311
x86_pmu_disable_event(event);
312
}
313
314
static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
315
{
316
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
317
u64 ctrl_val, bits, mask;
318
319
/*
320
* Enable IRQ generation (0x8),
321
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
322
* if requested:
323
*/
324
bits = 0x8ULL;
325
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
326
bits |= 0x2;
327
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
328
bits |= 0x1;
329
330
bits <<= (idx * 4);
331
mask = 0xfULL << (idx * 4);
332
333
rdmsrq(hwc->config_base, ctrl_val);
334
ctrl_val &= ~mask;
335
ctrl_val |= bits;
336
wrmsrq(hwc->config_base, ctrl_val);
337
}
338
339
static void zhaoxin_pmu_enable_event(struct perf_event *event)
340
{
341
struct hw_perf_event *hwc = &event->hw;
342
343
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
344
zhaoxin_pmu_enable_fixed(hwc);
345
return;
346
}
347
348
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
349
}
350
351
/*
352
* This handler is triggered by the local APIC, so the APIC IRQ handling
353
* rules apply:
354
*/
355
static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
356
{
357
struct perf_sample_data data;
358
struct cpu_hw_events *cpuc;
359
int handled = 0;
360
u64 status;
361
int bit;
362
363
cpuc = this_cpu_ptr(&cpu_hw_events);
364
apic_write(APIC_LVTPC, APIC_DM_NMI);
365
zhaoxin_pmu_disable_all();
366
status = zhaoxin_pmu_get_status();
367
if (!status)
368
goto done;
369
370
again:
371
if (x86_pmu.enabled_ack)
372
zxc_pmu_ack_status(status);
373
else
374
zhaoxin_pmu_ack_status(status);
375
376
inc_irq_stat(apic_perf_irqs);
377
378
/*
379
* CondChgd bit 63 doesn't mean any overflow status. Ignore
380
* and clear the bit.
381
*/
382
if (__test_and_clear_bit(63, (unsigned long *)&status)) {
383
if (!status)
384
goto done;
385
}
386
387
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
388
struct perf_event *event = cpuc->events[bit];
389
390
handled++;
391
392
if (!test_bit(bit, cpuc->active_mask))
393
continue;
394
395
x86_perf_event_update(event);
396
perf_sample_data_init(&data, 0, event->hw.last_period);
397
398
if (!x86_perf_event_set_period(event))
399
continue;
400
401
perf_event_overflow(event, &data, regs);
402
}
403
404
/*
405
* Repeat if there is more work to be done:
406
*/
407
status = zhaoxin_pmu_get_status();
408
if (status)
409
goto again;
410
411
done:
412
zhaoxin_pmu_enable_all(0);
413
return handled;
414
}
415
416
static u64 zhaoxin_pmu_event_map(int hw_event)
417
{
418
return zx_pmon_event_map[hw_event];
419
}
420
421
static struct event_constraint *
422
zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
423
struct perf_event *event)
424
{
425
struct event_constraint *c;
426
427
if (x86_pmu.event_constraints) {
428
for_each_event_constraint(c, x86_pmu.event_constraints) {
429
if ((event->hw.config & c->cmask) == c->code)
430
return c;
431
}
432
}
433
434
return &unconstrained;
435
}
436
437
PMU_FORMAT_ATTR(event, "config:0-7");
438
PMU_FORMAT_ATTR(umask, "config:8-15");
439
PMU_FORMAT_ATTR(edge, "config:18");
440
PMU_FORMAT_ATTR(inv, "config:23");
441
PMU_FORMAT_ATTR(cmask, "config:24-31");
442
443
static struct attribute *zx_arch_formats_attr[] = {
444
&format_attr_event.attr,
445
&format_attr_umask.attr,
446
&format_attr_edge.attr,
447
&format_attr_inv.attr,
448
&format_attr_cmask.attr,
449
NULL,
450
};
451
452
static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
453
{
454
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
455
456
return x86_event_sysfs_show(page, config, event);
457
}
458
459
static const struct x86_pmu zhaoxin_pmu __initconst = {
460
.name = "zhaoxin",
461
.handle_irq = zhaoxin_pmu_handle_irq,
462
.disable_all = zhaoxin_pmu_disable_all,
463
.enable_all = zhaoxin_pmu_enable_all,
464
.enable = zhaoxin_pmu_enable_event,
465
.disable = zhaoxin_pmu_disable_event,
466
.hw_config = x86_pmu_hw_config,
467
.schedule_events = x86_schedule_events,
468
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
469
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
470
.event_map = zhaoxin_pmu_event_map,
471
.max_events = ARRAY_SIZE(zx_pmon_event_map),
472
.apic = 1,
473
/*
474
* For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
475
*/
476
.max_period = (1ULL << 47) - 1,
477
.get_event_constraints = zhaoxin_get_event_constraints,
478
479
.format_attrs = zx_arch_formats_attr,
480
.events_sysfs_show = zhaoxin_event_sysfs_show,
481
};
482
483
static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
484
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
485
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
486
{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
487
{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
488
{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
489
{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
490
{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
491
};
492
493
static __init void zhaoxin_arch_events_quirk(void)
494
{
495
int bit;
496
497
/* disable event that reported as not present by cpuid */
498
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
499
zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
500
pr_warn("CPUID marked event: \'%s\' unavailable\n",
501
zx_arch_events_map[bit].name);
502
}
503
}
504
505
__init int zhaoxin_pmu_init(void)
506
{
507
union cpuid10_edx edx;
508
union cpuid10_eax eax;
509
union cpuid10_ebx ebx;
510
struct event_constraint *c;
511
unsigned int unused;
512
int version;
513
514
pr_info("Welcome to zhaoxin pmu!\n");
515
516
/*
517
* Check whether the Architectural PerfMon supports
518
* hw_event or not.
519
*/
520
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
521
522
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
523
return -ENODEV;
524
525
version = eax.split.version_id;
526
if (version != 2)
527
return -ENODEV;
528
529
x86_pmu = zhaoxin_pmu;
530
pr_info("Version check pass!\n");
531
532
x86_pmu.version = version;
533
x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0);
534
x86_pmu.cntval_bits = eax.split.bit_width;
535
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
536
x86_pmu.events_maskl = ebx.full;
537
x86_pmu.events_mask_len = eax.split.mask_length;
538
539
x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0);
540
x86_add_quirk(zhaoxin_arch_events_quirk);
541
542
switch (boot_cpu_data.x86) {
543
case 0x06:
544
/*
545
* Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS.
546
* Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D]
547
* ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3
548
*/
549
if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) ||
550
boot_cpu_data.x86_model == 0x19) {
551
552
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
553
554
/* Clearing status works only if the global control is enable on zxc. */
555
x86_pmu.enabled_ack = 1;
556
557
x86_pmu.event_constraints = zxc_event_constraints;
558
zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
559
zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
560
zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
561
zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
562
563
pr_cont("ZXC events, ");
564
break;
565
}
566
return -ENODEV;
567
568
case 0x07:
569
zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
570
X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
571
572
zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
573
X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
574
575
switch (boot_cpu_data.x86_model) {
576
case 0x1b:
577
memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
578
sizeof(hw_cache_event_ids));
579
580
x86_pmu.event_constraints = zxd_event_constraints;
581
582
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
583
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
584
585
pr_cont("ZXD events, ");
586
break;
587
case 0x3b:
588
memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
589
sizeof(hw_cache_event_ids));
590
591
x86_pmu.event_constraints = zxd_event_constraints;
592
593
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
594
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
595
596
pr_cont("ZXE events, ");
597
break;
598
default:
599
return -ENODEV;
600
}
601
break;
602
603
default:
604
return -ENODEV;
605
}
606
607
x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;
608
x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED;
609
610
if (x86_pmu.event_constraints) {
611
for_each_event_constraint(c, x86_pmu.event_constraints) {
612
c->idxmsk64 |= x86_pmu.cntr_mask64;
613
c->weight += x86_pmu_num_counters(NULL);
614
}
615
}
616
617
return 0;
618
}
619
620
621