Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/perf/ppc970-pmu.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* Performance counter support for PPC970-family processors.
4
*
5
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
6
*/
7
#include <linux/string.h>
8
#include <linux/perf_event.h>
9
#include <asm/reg.h>
10
#include <asm/cputable.h>
11
12
#include "internal.h"
13
14
/*
15
* Bits in event code for PPC970
16
*/
17
#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
18
#define PM_PMC_MSK 0xf
19
#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
20
#define PM_UNIT_MSK 0xf
21
#define PM_SPCSEL_SH 6
22
#define PM_SPCSEL_MSK 3
23
#define PM_BYTE_SH 4 /* Byte number of event bus to use */
24
#define PM_BYTE_MSK 3
25
#define PM_PMCSEL_MSK 0xf
26
27
/* Values in PM_UNIT field */
28
#define PM_NONE 0
29
#define PM_FPU 1
30
#define PM_VPU 2
31
#define PM_ISU 3
32
#define PM_IFU 4
33
#define PM_IDU 5
34
#define PM_STS 6
35
#define PM_LSU0 7
36
#define PM_LSU1U 8
37
#define PM_LSU1L 9
38
#define PM_LASTUNIT 9
39
40
/*
41
* Bits in MMCR0 for PPC970
42
*/
43
#define MMCR0_PMC1SEL_SH 8
44
#define MMCR0_PMC2SEL_SH 1
45
#define MMCR_PMCSEL_MSK 0x1f
46
47
/*
48
* Bits in MMCR1 for PPC970
49
*/
50
#define MMCR1_TTM0SEL_SH 62
51
#define MMCR1_TTM1SEL_SH 59
52
#define MMCR1_TTM3SEL_SH 53
53
#define MMCR1_TTMSEL_MSK 3
54
#define MMCR1_TD_CP_DBG0SEL_SH 50
55
#define MMCR1_TD_CP_DBG1SEL_SH 48
56
#define MMCR1_TD_CP_DBG2SEL_SH 46
57
#define MMCR1_TD_CP_DBG3SEL_SH 44
58
#define MMCR1_PMC1_ADDER_SEL_SH 39
59
#define MMCR1_PMC2_ADDER_SEL_SH 38
60
#define MMCR1_PMC6_ADDER_SEL_SH 37
61
#define MMCR1_PMC5_ADDER_SEL_SH 36
62
#define MMCR1_PMC8_ADDER_SEL_SH 35
63
#define MMCR1_PMC7_ADDER_SEL_SH 34
64
#define MMCR1_PMC3_ADDER_SEL_SH 33
65
#define MMCR1_PMC4_ADDER_SEL_SH 32
66
#define MMCR1_PMC3SEL_SH 27
67
#define MMCR1_PMC4SEL_SH 22
68
#define MMCR1_PMC5SEL_SH 17
69
#define MMCR1_PMC6SEL_SH 12
70
#define MMCR1_PMC7SEL_SH 7
71
#define MMCR1_PMC8SEL_SH 2
72
73
static short mmcr1_adder_bits[8] = {
74
MMCR1_PMC1_ADDER_SEL_SH,
75
MMCR1_PMC2_ADDER_SEL_SH,
76
MMCR1_PMC3_ADDER_SEL_SH,
77
MMCR1_PMC4_ADDER_SEL_SH,
78
MMCR1_PMC5_ADDER_SEL_SH,
79
MMCR1_PMC6_ADDER_SEL_SH,
80
MMCR1_PMC7_ADDER_SEL_SH,
81
MMCR1_PMC8_ADDER_SEL_SH
82
};
83
84
/*
85
* Layout of constraint bits:
86
* 6666555555555544444444443333333333222222222211111111110000000000
87
* 3210987654321098765432109876543210987654321098765432109876543210
88
* <><><>[ >[ >[ >< >< >< >< ><><><><><><><><>
89
* SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
90
*
91
* SP - SPCSEL constraint
92
* 48-49: SPCSEL value 0x3_0000_0000_0000
93
*
94
* T0 - TTM0 constraint
95
* 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
96
*
97
* T1 - TTM1 constraint
98
* 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
99
*
100
* UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
101
* 43: UC3 error 0x0800_0000_0000
102
* 42: FPU|IFU|VPU events needed 0x0400_0000_0000
103
* 41: ISU events needed 0x0200_0000_0000
104
* 40: IDU|STS events needed 0x0100_0000_0000
105
*
106
* PS1
107
* 39: PS1 error 0x0080_0000_0000
108
* 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
109
*
110
* PS2
111
* 35: PS2 error 0x0008_0000_0000
112
* 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
113
*
114
* B0
115
* 28-31: Byte 0 event source 0xf000_0000
116
* Encoding as for the event code
117
*
118
* B1, B2, B3
119
* 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
120
*
121
* P1
122
* 15: P1 error 0x8000
123
* 14-15: Count of events needing PMC1
124
*
125
* P2..P8
126
* 0-13: Count of events needing PMC2..PMC8
127
*/
128
129
static unsigned char direct_marked_event[8] = {
130
(1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
131
(1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
132
(1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */
133
(1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
134
(1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */
135
(1<<3) | (1<<4) | (1<<5),
136
/* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
137
(1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
138
(1<<4) /* PMC8: PM_MRK_LSU_FIN */
139
};
140
141
/*
142
* Returns 1 if event counts things relating to marked instructions
143
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
144
*/
145
static int p970_marked_instr_event(u64 event)
146
{
147
int pmc, psel, unit, byte, bit;
148
unsigned int mask;
149
150
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
151
psel = event & PM_PMCSEL_MSK;
152
if (pmc) {
153
if (direct_marked_event[pmc - 1] & (1 << psel))
154
return 1;
155
if (psel == 0) /* add events */
156
bit = (pmc <= 4)? pmc - 1: 8 - pmc;
157
else if (psel == 7 || psel == 13) /* decode events */
158
bit = 4;
159
else
160
return 0;
161
} else
162
bit = psel;
163
164
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
165
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
166
mask = 0;
167
switch (unit) {
168
case PM_VPU:
169
mask = 0x4c; /* byte 0 bits 2,3,6 */
170
break;
171
case PM_LSU0:
172
/* byte 2 bits 0,2,3,4,6; all of byte 1 */
173
mask = 0x085dff00;
174
break;
175
case PM_LSU1L:
176
mask = 0x50 << 24; /* byte 3 bits 4,6 */
177
break;
178
}
179
return (mask >> (byte * 8 + bit)) & 1;
180
}
181
182
/* Masks and values for using events from the various units */
183
static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
184
[PM_FPU] = { 0xc80000000000ull, 0x040000000000ull },
185
[PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull },
186
[PM_ISU] = { 0x080000000000ull, 0x020000000000ull },
187
[PM_IFU] = { 0xc80000000000ull, 0x840000000000ull },
188
[PM_IDU] = { 0x380000000000ull, 0x010000000000ull },
189
[PM_STS] = { 0x380000000000ull, 0x310000000000ull },
190
};
191
192
static int p970_get_constraint(u64 event, unsigned long *maskp,
193
unsigned long *valp, u64 event_config1 __maybe_unused)
194
{
195
int pmc, byte, unit, sh, spcsel;
196
unsigned long mask = 0, value = 0;
197
int grp = -1;
198
199
pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
200
if (pmc) {
201
if (pmc > 8)
202
return -1;
203
sh = (pmc - 1) * 2;
204
mask |= 2 << sh;
205
value |= 1 << sh;
206
grp = ((pmc - 1) >> 1) & 1;
207
}
208
unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
209
if (unit) {
210
if (unit > PM_LASTUNIT)
211
return -1;
212
mask |= unit_cons[unit][0];
213
value |= unit_cons[unit][1];
214
byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
215
/*
216
* Bus events on bytes 0 and 2 can be counted
217
* on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
218
*/
219
if (!pmc)
220
grp = byte & 1;
221
/* Set byte lane select field */
222
mask |= 0xfULL << (28 - 4 * byte);
223
value |= (unsigned long)unit << (28 - 4 * byte);
224
}
225
if (grp == 0) {
226
/* increment PMC1/2/5/6 field */
227
mask |= 0x8000000000ull;
228
value |= 0x1000000000ull;
229
} else if (grp == 1) {
230
/* increment PMC3/4/7/8 field */
231
mask |= 0x800000000ull;
232
value |= 0x100000000ull;
233
}
234
spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
235
if (spcsel) {
236
mask |= 3ull << 48;
237
value |= (unsigned long)spcsel << 48;
238
}
239
*maskp = mask;
240
*valp = value;
241
return 0;
242
}
243
244
static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
245
{
246
alt[0] = event;
247
248
/* 2 alternatives for LSU empty */
249
if (event == 0x2002 || event == 0x3002) {
250
alt[1] = event ^ 0x1000;
251
return 2;
252
}
253
254
return 1;
255
}
256
257
static int p970_compute_mmcr(u64 event[], int n_ev,
258
unsigned int hwc[], struct mmcr_regs *mmcr,
259
struct perf_event *pevents[],
260
u32 flags __maybe_unused)
261
{
262
unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
263
unsigned int pmc, unit, byte, psel;
264
unsigned int ttm, grp;
265
unsigned int pmc_inuse = 0;
266
unsigned int pmc_grp_use[2];
267
unsigned char busbyte[4];
268
unsigned char unituse[16];
269
unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
270
unsigned char ttmuse[2];
271
unsigned char pmcsel[8];
272
int i;
273
int spcsel;
274
275
if (n_ev > 8)
276
return -1;
277
278
/* First pass to count resource use */
279
pmc_grp_use[0] = pmc_grp_use[1] = 0;
280
memset(busbyte, 0, sizeof(busbyte));
281
memset(unituse, 0, sizeof(unituse));
282
for (i = 0; i < n_ev; ++i) {
283
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
284
if (pmc) {
285
if (pmc_inuse & (1 << (pmc - 1)))
286
return -1;
287
pmc_inuse |= 1 << (pmc - 1);
288
/* count 1/2/5/6 vs 3/4/7/8 use */
289
++pmc_grp_use[((pmc - 1) >> 1) & 1];
290
}
291
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
292
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
293
if (unit) {
294
if (unit > PM_LASTUNIT)
295
return -1;
296
if (!pmc)
297
++pmc_grp_use[byte & 1];
298
if (busbyte[byte] && busbyte[byte] != unit)
299
return -1;
300
busbyte[byte] = unit;
301
unituse[unit] = 1;
302
}
303
}
304
if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
305
return -1;
306
307
/*
308
* Assign resources and set multiplexer selects.
309
*
310
* PM_ISU can go either on TTM0 or TTM1, but that's the only
311
* choice we have to deal with.
312
*/
313
if (unituse[PM_ISU] &
314
(unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
315
unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */
316
/* Set TTM[01]SEL fields. */
317
ttmuse[0] = ttmuse[1] = 0;
318
for (i = PM_FPU; i <= PM_STS; ++i) {
319
if (!unituse[i])
320
continue;
321
ttm = unitmap[i];
322
++ttmuse[(ttm >> 2) & 1];
323
mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH;
324
}
325
/* Check only one unit per TTMx */
326
if (ttmuse[0] > 1 || ttmuse[1] > 1)
327
return -1;
328
329
/* Set byte lane select fields and TTM3SEL. */
330
for (byte = 0; byte < 4; ++byte) {
331
unit = busbyte[byte];
332
if (!unit)
333
continue;
334
if (unit <= PM_STS)
335
ttm = (unitmap[unit] >> 2) & 1;
336
else if (unit == PM_LSU0)
337
ttm = 2;
338
else {
339
ttm = 3;
340
if (unit == PM_LSU1L && byte >= 2)
341
mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
342
}
343
mmcr1 |= (unsigned long)ttm
344
<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
345
}
346
347
/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
348
memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */
349
for (i = 0; i < n_ev; ++i) {
350
pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
351
unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
352
byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
353
psel = event[i] & PM_PMCSEL_MSK;
354
if (!pmc) {
355
/* Bus event or any-PMC direct event */
356
if (unit)
357
psel |= 0x10 | ((byte & 2) << 2);
358
else
359
psel |= 8;
360
for (pmc = 0; pmc < 8; ++pmc) {
361
if (pmc_inuse & (1 << pmc))
362
continue;
363
grp = (pmc >> 1) & 1;
364
if (unit) {
365
if (grp == (byte & 1))
366
break;
367
} else if (pmc_grp_use[grp] < 4) {
368
++pmc_grp_use[grp];
369
break;
370
}
371
}
372
pmc_inuse |= 1 << pmc;
373
} else {
374
/* Direct event */
375
--pmc;
376
if (psel == 0 && (byte & 2))
377
/* add events on higher-numbered bus */
378
mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
379
}
380
pmcsel[pmc] = psel;
381
hwc[i] = pmc;
382
spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK;
383
mmcr1 |= spcsel;
384
if (p970_marked_instr_event(event[i]))
385
mmcra |= MMCRA_SAMPLE_ENABLE;
386
}
387
for (pmc = 0; pmc < 2; ++pmc)
388
mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
389
for (; pmc < 8; ++pmc)
390
mmcr1 |= (unsigned long)pmcsel[pmc]
391
<< (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
392
if (pmc_inuse & 1)
393
mmcr0 |= MMCR0_PMC1CE;
394
if (pmc_inuse & 0xfe)
395
mmcr0 |= MMCR0_PMCjCE;
396
397
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
398
399
/* Return MMCRx values */
400
mmcr->mmcr0 = mmcr0;
401
mmcr->mmcr1 = mmcr1;
402
mmcr->mmcra = mmcra;
403
return 0;
404
}
405
406
static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
407
{
408
int shift;
409
410
/*
411
* Setting the PMCxSEL field to 0x08 disables PMC x.
412
*/
413
if (pmc <= 1) {
414
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
415
mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
416
} else {
417
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
418
mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
419
}
420
}
421
422
static int ppc970_generic_events[] = {
423
[PERF_COUNT_HW_CPU_CYCLES] = 7,
424
[PERF_COUNT_HW_INSTRUCTIONS] = 1,
425
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */
426
[PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */
427
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */
428
[PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */
429
};
430
431
#define C(x) PERF_COUNT_HW_CACHE_##x
432
433
/*
434
* Table of generalized cache-related events.
435
* 0 means not supported, -1 means nonsensical, other values
436
* are event codes.
437
*/
438
static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
439
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
440
[C(OP_READ)] = { 0x8810, 0x3810 },
441
[C(OP_WRITE)] = { 0x7810, 0x813 },
442
[C(OP_PREFETCH)] = { 0x731, 0 },
443
},
444
[C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
445
[C(OP_READ)] = { 0, 0 },
446
[C(OP_WRITE)] = { -1, -1 },
447
[C(OP_PREFETCH)] = { 0, 0 },
448
},
449
[C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
450
[C(OP_READ)] = { 0, 0 },
451
[C(OP_WRITE)] = { 0, 0 },
452
[C(OP_PREFETCH)] = { 0x733, 0 },
453
},
454
[C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
455
[C(OP_READ)] = { 0, 0x704 },
456
[C(OP_WRITE)] = { -1, -1 },
457
[C(OP_PREFETCH)] = { -1, -1 },
458
},
459
[C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
460
[C(OP_READ)] = { 0, 0x700 },
461
[C(OP_WRITE)] = { -1, -1 },
462
[C(OP_PREFETCH)] = { -1, -1 },
463
},
464
[C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
465
[C(OP_READ)] = { 0x431, 0x327 },
466
[C(OP_WRITE)] = { -1, -1 },
467
[C(OP_PREFETCH)] = { -1, -1 },
468
},
469
[C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
470
[C(OP_READ)] = { -1, -1 },
471
[C(OP_WRITE)] = { -1, -1 },
472
[C(OP_PREFETCH)] = { -1, -1 },
473
},
474
};
475
476
static struct power_pmu ppc970_pmu = {
477
.name = "PPC970/FX/MP",
478
.n_counter = 8,
479
.max_alternatives = 2,
480
.add_fields = 0x001100005555ull,
481
.test_adder = 0x013300000000ull,
482
.compute_mmcr = p970_compute_mmcr,
483
.get_constraint = p970_get_constraint,
484
.get_alternatives = p970_get_alternatives,
485
.disable_pmc = p970_disable_pmc,
486
.n_generic = ARRAY_SIZE(ppc970_generic_events),
487
.generic_events = ppc970_generic_events,
488
.cache_events = &ppc970_cache_events,
489
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
490
};
491
492
int __init init_ppc970_pmu(void)
493
{
494
unsigned int pvr = mfspr(SPRN_PVR);
495
496
if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP &&
497
PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX)
498
return -ENODEV;
499
500
return register_power_pmu(&ppc970_pmu);
501
}
502
503