Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/perf/power9-pmu.c
26470 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Performance counter support for POWER9 processors.
4
*
5
* Copyright 2009 Paul Mackerras, IBM Corporation.
6
* Copyright 2013 Michael Ellerman, IBM Corporation.
7
* Copyright 2016 Madhavan Srinivasan, IBM Corporation.
8
*/
9
10
#define pr_fmt(fmt) "power9-pmu: " fmt
11
12
#include "isa207-common.h"
13
14
/*
15
* Raw event encoding for Power9:
16
*
17
* 60 56 52 48 44 40 36 32
18
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
19
* | | [ ] [ ] [ thresh_cmp ] [ thresh_ctl ]
20
* | | | | |
21
* | | *- IFM (Linux) | thresh start/stop -*
22
* | *- BHRB (Linux) *sm
23
* *- EBB (Linux)
24
*
25
* 28 24 20 16 12 8 4 0
26
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
27
* [ ] [ sample ] [cache] [ pmc ] [unit ] [] m [ pmcxsel ]
28
* | | | | |
29
* | | | | *- mark
30
* | | *- L1/L2/L3 cache_sel |
31
* | | |
32
* | *- sampling mode for marked events *- combine
33
* |
34
* *- thresh_sel
35
*
36
* Below uses IBM bit numbering.
37
*
38
* MMCR1[x:y] = unit (PMCxUNIT)
39
* MMCR1[24] = pmc1combine[0]
40
* MMCR1[25] = pmc1combine[1]
41
* MMCR1[26] = pmc2combine[0]
42
* MMCR1[27] = pmc2combine[1]
43
* MMCR1[28] = pmc3combine[0]
44
* MMCR1[29] = pmc3combine[1]
45
* MMCR1[30] = pmc4combine[0]
46
* MMCR1[31] = pmc4combine[1]
47
*
48
* if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
49
* MMCR1[20:27] = thresh_ctl
50
* else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
51
* MMCR1[20:27] = thresh_ctl
52
* else
53
* MMCRA[48:55] = thresh_ctl (THRESH START/END)
54
*
55
* if thresh_sel:
56
* MMCRA[45:47] = thresh_sel
57
*
58
* if thresh_cmp:
59
* MMCRA[9:11] = thresh_cmp[0:2]
60
* MMCRA[12:18] = thresh_cmp[3:9]
61
*
62
* MMCR1[16] = cache_sel[2]
63
 * MMCR1[17] = cache_sel[3]
64
*
65
* if mark:
66
* MMCRA[63] = 1 (SAMPLE_ENABLE)
67
* MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
68
 * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
69
*
70
* if EBB and BHRB:
71
* MMCRA[32:33] = IFM
72
*
73
* MMCRA[SDAR_MODE] = sm
74
*/
75
76
/*
77
* Some power9 event codes.
78
*/
79
#define EVENT(_name, _code) _name = _code,
80
81
enum {
82
#include "power9-events-list.h"
83
};
84
85
#undef EVENT
86
87
/* MMCRA IFM bits - POWER9 */
88
#define POWER9_MMCRA_IFM1 0x0000000040000000UL
89
#define POWER9_MMCRA_IFM2 0x0000000080000000UL
90
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
91
#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
92
93
extern u64 PERF_REG_EXTENDED_MASK;
94
95
/* Nasty Power9 specific hack */
96
#define PVR_POWER9_CUMULUS 0x00002000
97
98
/* PowerISA v2.07 format attribute structure*/
99
extern const struct attribute_group isa207_pmu_format_group;
100
101
static int p9_dd21_bl_ev[] = {
102
PM_MRK_ST_DONE_L2,
103
PM_RADIX_PWC_L1_HIT,
104
PM_FLOP_CMPL,
105
PM_MRK_NTF_FIN,
106
PM_RADIX_PWC_L2_HIT,
107
PM_IFETCH_THROTTLE,
108
PM_MRK_L2_TM_ST_ABORT_SISTER,
109
PM_RADIX_PWC_L3_HIT,
110
PM_RUN_CYC_SMT2_MODE,
111
PM_TM_TX_PASS_RUN_INST,
112
PM_DISP_HELD_SYNC_HOLD,
113
};
114
115
static int p9_dd22_bl_ev[] = {
116
PM_DTLB_MISS_16G,
117
PM_DERAT_MISS_2M,
118
PM_DTLB_MISS_2M,
119
PM_MRK_DTLB_MISS_1G,
120
PM_DTLB_MISS_4K,
121
PM_DERAT_MISS_1G,
122
PM_MRK_DERAT_MISS_2M,
123
PM_MRK_DTLB_MISS_4K,
124
PM_MRK_DTLB_MISS_16G,
125
PM_DTLB_MISS_64K,
126
PM_MRK_DERAT_MISS_1G,
127
PM_MRK_DTLB_MISS_64K,
128
PM_DISP_HELD_SYNC_HOLD,
129
PM_DTLB_MISS_16M,
130
PM_DTLB_MISS_1G,
131
PM_MRK_DTLB_MISS_16M,
132
};
133
134
/* Table of alternatives, sorted by column 0 */
135
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
136
{ PM_BR_2PATH, PM_BR_2PATH_ALT },
137
{ PM_INST_DISP, PM_INST_DISP_ALT },
138
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
139
{ PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
140
{ PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
141
};
142
143
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
144
{
145
int num_alt = 0;
146
147
num_alt = isa207_get_alternatives(event, alt,
148
ARRAY_SIZE(power9_event_alternatives), flags,
149
power9_event_alternatives);
150
151
return num_alt;
152
}
153
154
static int power9_check_attr_config(struct perf_event *ev)
155
{
156
u64 val;
157
u64 event = ev->attr.config;
158
159
val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
160
if (val == 0xC || isa3XX_check_attr_config(ev))
161
return -EINVAL;
162
163
return 0;
164
}
165
166
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
167
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
168
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
169
GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
170
GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
171
GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
172
GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
173
GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
174
GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
175
GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
176
177
CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
178
CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
179
CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
180
CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
181
CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
182
CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
183
CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
184
CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
185
CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
186
CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
187
CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
188
CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
189
CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
190
CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
191
192
static struct attribute *power9_events_attr[] = {
193
GENERIC_EVENT_PTR(PM_CYC),
194
GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
195
GENERIC_EVENT_PTR(PM_CMPLU_STALL),
196
GENERIC_EVENT_PTR(PM_INST_CMPL),
197
GENERIC_EVENT_PTR(PM_BR_CMPL),
198
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
199
GENERIC_EVENT_PTR(PM_LD_REF_L1),
200
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
201
GENERIC_EVENT_PTR(MEM_LOADS),
202
GENERIC_EVENT_PTR(MEM_STORES),
203
CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
204
CACHE_EVENT_PTR(PM_LD_REF_L1),
205
CACHE_EVENT_PTR(PM_L1_PREF),
206
CACHE_EVENT_PTR(PM_ST_MISS_L1),
207
CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
208
CACHE_EVENT_PTR(PM_INST_FROM_L1),
209
CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
210
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
211
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
212
CACHE_EVENT_PTR(PM_L3_PREF_ALL),
213
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
214
CACHE_EVENT_PTR(PM_BR_CMPL),
215
CACHE_EVENT_PTR(PM_DTLB_MISS),
216
CACHE_EVENT_PTR(PM_ITLB_MISS),
217
NULL
218
};
219
220
static const struct attribute_group power9_pmu_events_group = {
221
.name = "events",
222
.attrs = power9_events_attr,
223
};
224
225
PMU_FORMAT_ATTR(event, "config:0-51");
226
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
227
PMU_FORMAT_ATTR(mark, "config:8");
228
PMU_FORMAT_ATTR(combine, "config:10-11");
229
PMU_FORMAT_ATTR(unit, "config:12-15");
230
PMU_FORMAT_ATTR(pmc, "config:16-19");
231
PMU_FORMAT_ATTR(cache_sel, "config:20-23");
232
PMU_FORMAT_ATTR(sample_mode, "config:24-28");
233
PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
234
PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
235
PMU_FORMAT_ATTR(thresh_start, "config:36-39");
236
PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
237
PMU_FORMAT_ATTR(sdar_mode, "config:50-51");
238
239
static struct attribute *power9_pmu_format_attr[] = {
240
&format_attr_event.attr,
241
&format_attr_pmcxsel.attr,
242
&format_attr_mark.attr,
243
&format_attr_combine.attr,
244
&format_attr_unit.attr,
245
&format_attr_pmc.attr,
246
&format_attr_cache_sel.attr,
247
&format_attr_sample_mode.attr,
248
&format_attr_thresh_sel.attr,
249
&format_attr_thresh_stop.attr,
250
&format_attr_thresh_start.attr,
251
&format_attr_thresh_cmp.attr,
252
&format_attr_sdar_mode.attr,
253
NULL,
254
};
255
256
static const struct attribute_group power9_pmu_format_group = {
257
.name = "format",
258
.attrs = power9_pmu_format_attr,
259
};
260
261
static struct attribute *power9_pmu_caps_attrs[] = {
262
NULL
263
};
264
265
static struct attribute_group power9_pmu_caps_group = {
266
.name = "caps",
267
.attrs = power9_pmu_caps_attrs,
268
};
269
270
static const struct attribute_group *power9_pmu_attr_groups[] = {
271
&power9_pmu_format_group,
272
&power9_pmu_events_group,
273
&power9_pmu_caps_group,
274
NULL,
275
};
276
277
static int power9_generic_events[] = {
278
[PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
279
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
280
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
281
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
282
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
283
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
284
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
285
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
286
};
287
288
static u64 power9_bhrb_filter_map(u64 branch_sample_type)
289
{
290
u64 pmu_bhrb_filter = 0;
291
292
/* BHRB and regular PMU events share the same privilege state
293
* filter configuration. BHRB is always recorded along with a
294
* regular PMU event. As the privilege state filter is handled
295
* in the basic PMC configuration of the accompanying regular
296
* PMU event, we ignore any separate BHRB specific request.
297
*/
298
299
/* No branch filter requested */
300
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
301
return pmu_bhrb_filter;
302
303
/* Invalid branch filter options - HW does not support */
304
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
305
return -1;
306
307
if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
308
return -1;
309
310
if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
311
return -1;
312
313
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
314
pmu_bhrb_filter |= POWER9_MMCRA_IFM1;
315
return pmu_bhrb_filter;
316
}
317
318
/* Every thing else is unsupported */
319
return -1;
320
}
321
322
static void power9_config_bhrb(u64 pmu_bhrb_filter)
323
{
324
pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK;
325
326
/* Enable BHRB filter in PMU */
327
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
328
}
329
330
#define C(x) PERF_COUNT_HW_CACHE_##x
331
332
/*
333
* Table of generalized cache-related events.
334
* 0 means not supported, -1 means nonsensical, other values
335
* are event codes.
336
*/
337
static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
338
[ C(L1D) ] = {
339
[ C(OP_READ) ] = {
340
[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
341
[ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN,
342
},
343
[ C(OP_WRITE) ] = {
344
[ C(RESULT_ACCESS) ] = 0,
345
[ C(RESULT_MISS) ] = PM_ST_MISS_L1,
346
},
347
[ C(OP_PREFETCH) ] = {
348
[ C(RESULT_ACCESS) ] = PM_L1_PREF,
349
[ C(RESULT_MISS) ] = 0,
350
},
351
},
352
[ C(L1I) ] = {
353
[ C(OP_READ) ] = {
354
[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
355
[ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
356
},
357
[ C(OP_WRITE) ] = {
358
[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
359
[ C(RESULT_MISS) ] = -1,
360
},
361
[ C(OP_PREFETCH) ] = {
362
[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
363
[ C(RESULT_MISS) ] = 0,
364
},
365
},
366
[ C(LL) ] = {
367
[ C(OP_READ) ] = {
368
[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
369
[ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
370
},
371
[ C(OP_WRITE) ] = {
372
[ C(RESULT_ACCESS) ] = 0,
373
[ C(RESULT_MISS) ] = 0,
374
},
375
[ C(OP_PREFETCH) ] = {
376
[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
377
[ C(RESULT_MISS) ] = 0,
378
},
379
},
380
[ C(DTLB) ] = {
381
[ C(OP_READ) ] = {
382
[ C(RESULT_ACCESS) ] = 0,
383
[ C(RESULT_MISS) ] = PM_DTLB_MISS,
384
},
385
[ C(OP_WRITE) ] = {
386
[ C(RESULT_ACCESS) ] = -1,
387
[ C(RESULT_MISS) ] = -1,
388
},
389
[ C(OP_PREFETCH) ] = {
390
[ C(RESULT_ACCESS) ] = -1,
391
[ C(RESULT_MISS) ] = -1,
392
},
393
},
394
[ C(ITLB) ] = {
395
[ C(OP_READ) ] = {
396
[ C(RESULT_ACCESS) ] = 0,
397
[ C(RESULT_MISS) ] = PM_ITLB_MISS,
398
},
399
[ C(OP_WRITE) ] = {
400
[ C(RESULT_ACCESS) ] = -1,
401
[ C(RESULT_MISS) ] = -1,
402
},
403
[ C(OP_PREFETCH) ] = {
404
[ C(RESULT_ACCESS) ] = -1,
405
[ C(RESULT_MISS) ] = -1,
406
},
407
},
408
[ C(BPU) ] = {
409
[ C(OP_READ) ] = {
410
[ C(RESULT_ACCESS) ] = PM_BR_CMPL,
411
[ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
412
},
413
[ C(OP_WRITE) ] = {
414
[ C(RESULT_ACCESS) ] = -1,
415
[ C(RESULT_MISS) ] = -1,
416
},
417
[ C(OP_PREFETCH) ] = {
418
[ C(RESULT_ACCESS) ] = -1,
419
[ C(RESULT_MISS) ] = -1,
420
},
421
},
422
[ C(NODE) ] = {
423
[ C(OP_READ) ] = {
424
[ C(RESULT_ACCESS) ] = -1,
425
[ C(RESULT_MISS) ] = -1,
426
},
427
[ C(OP_WRITE) ] = {
428
[ C(RESULT_ACCESS) ] = -1,
429
[ C(RESULT_MISS) ] = -1,
430
},
431
[ C(OP_PREFETCH) ] = {
432
[ C(RESULT_ACCESS) ] = -1,
433
[ C(RESULT_MISS) ] = -1,
434
},
435
},
436
};
437
438
#undef C
439
440
static struct power_pmu power9_pmu = {
441
.name = "POWER9",
442
.n_counter = MAX_PMU_COUNTERS,
443
.add_fields = ISA207_ADD_FIELDS,
444
.test_adder = ISA207_TEST_ADDER,
445
.group_constraint_mask = CNST_CACHE_PMC4_MASK,
446
.group_constraint_val = CNST_CACHE_PMC4_VAL,
447
.compute_mmcr = isa207_compute_mmcr,
448
.config_bhrb = power9_config_bhrb,
449
.bhrb_filter_map = power9_bhrb_filter_map,
450
.get_constraint = isa207_get_constraint,
451
.get_alternatives = power9_get_alternatives,
452
.get_mem_data_src = isa207_get_mem_data_src,
453
.get_mem_weight = isa207_get_mem_weight,
454
.disable_pmc = isa207_disable_pmc,
455
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
456
.n_generic = ARRAY_SIZE(power9_generic_events),
457
.generic_events = power9_generic_events,
458
.cache_events = &power9_cache_events,
459
.attr_groups = power9_pmu_attr_groups,
460
.bhrb_nr = 32,
461
.capabilities = PERF_PMU_CAP_EXTENDED_REGS,
462
.check_attr_config = power9_check_attr_config,
463
};
464
465
int __init init_power9_pmu(void)
466
{
467
int rc = 0;
468
unsigned int pvr = mfspr(SPRN_PVR);
469
470
if (PVR_VER(pvr) != PVR_POWER9)
471
return -ENODEV;
472
473
/* Blacklist events */
474
if (!(pvr & PVR_POWER9_CUMULUS)) {
475
if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
476
power9_pmu.blacklist_ev = p9_dd21_bl_ev;
477
power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
478
} else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
479
power9_pmu.blacklist_ev = p9_dd22_bl_ev;
480
power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
481
}
482
}
483
484
/* Set the PERF_REG_EXTENDED_MASK here */
485
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
486
487
rc = register_power_pmu(&power9_pmu);
488
if (rc)
489
return rc;
490
491
/* Tell userspace that EBB is supported */
492
cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
493
494
return 0;
495
}
496
497