Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kernel/perf_cpum_cf.c
38184 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Performance event support for s390x - CPU-measurement Counter Facility
4
*
5
* Copyright IBM Corp. 2012, 2023
6
* Author(s): Hendrik Brueckner <[email protected]>
7
* Thomas Richter <[email protected]>
8
*/
9
#define pr_fmt(fmt) "cpum_cf: " fmt
10
11
#include <linux/kernel.h>
12
#include <linux/kernel_stat.h>
13
#include <linux/percpu.h>
14
#include <linux/notifier.h>
15
#include <linux/init.h>
16
#include <linux/miscdevice.h>
17
#include <linux/perf_event.h>
18
19
#include <asm/cpu_mf.h>
20
#include <asm/hwctrset.h>
21
#include <asm/debug.h>
22
23
/* Perf PMU definitions for the counter facility */
24
#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
25
#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
26
27
enum cpumf_ctr_set {
28
CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
29
CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
30
CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
31
CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
32
CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
33
34
/* Maximum number of counter sets */
35
CPUMF_CTR_SET_MAX,
36
};
37
38
#define CPUMF_LCCTL_ENABLE_SHIFT 16
39
#define CPUMF_LCCTL_ACTCTL_SHIFT 0
40
41
static inline void ctr_set_enable(u64 *state, u64 ctrsets)
42
{
43
*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
44
}
45
46
static inline void ctr_set_disable(u64 *state, u64 ctrsets)
47
{
48
*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
49
}
50
51
static inline void ctr_set_start(u64 *state, u64 ctrsets)
52
{
53
*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
54
}
55
56
static inline void ctr_set_stop(u64 *state, u64 ctrsets)
57
{
58
*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
59
}
60
61
static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
62
{
63
switch (set) {
64
case CPUMF_CTR_SET_BASIC:
65
return stcctm(BASIC, range, dest);
66
case CPUMF_CTR_SET_USER:
67
return stcctm(PROBLEM_STATE, range, dest);
68
case CPUMF_CTR_SET_CRYPTO:
69
return stcctm(CRYPTO_ACTIVITY, range, dest);
70
case CPUMF_CTR_SET_EXT:
71
return stcctm(EXTENDED, range, dest);
72
case CPUMF_CTR_SET_MT_DIAG:
73
return stcctm(MT_DIAG_CLEARING, range, dest);
74
case CPUMF_CTR_SET_MAX:
75
return 3;
76
}
77
return 3;
78
}
79
80
struct cpu_cf_events {
81
refcount_t refcnt; /* Reference count */
82
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
83
u64 state; /* For perf_event_open SVC */
84
u64 dev_state; /* For /dev/hwctr */
85
unsigned int flags;
86
size_t used; /* Bytes used in data */
87
size_t usedss; /* Bytes used in start/stop */
88
unsigned char start[PAGE_SIZE]; /* Counter set at event add */
89
unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
90
unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
91
unsigned int sets; /* # Counter set saved in memory */
92
};
93
94
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
95
static debug_info_t *cf_dbg;
96
97
/*
98
* The CPU Measurement query counter information instruction contains
99
* information which varies per machine generation, but is constant and
100
* does not change when running on a particular machine, such as counter
101
* first and second version number. This is needed to determine the size
102
* of counter sets. Extract this information at device driver initialization.
103
*/
104
static struct cpumf_ctr_info cpumf_ctr_info;
105
106
struct cpu_cf_ptr {
107
struct cpu_cf_events *cpucf;
108
};
109
110
static struct cpu_cf_root { /* Anchor to per CPU data */
111
refcount_t refcnt; /* Overall active events */
112
struct cpu_cf_ptr __percpu *cfptr;
113
} cpu_cf_root;
114
115
/*
116
* Serialize event initialization and event removal. Both are called from
117
* user space in task context with perf_event_open() and close()
118
* system calls.
119
*
120
* This mutex serializes functions cpum_cf_alloc_cpu() called at event
121
* initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
122
* called at event removal via call back function hw_perf_event_destroy()
123
* when the event is deleted. They are serialized to enforce correct
124
* bookkeeping of pointer and reference counts anchored by
125
* struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
126
* per CPU pointers stored in cpu_cf_root::cfptr.
127
*/
128
static DEFINE_MUTEX(pmc_reserve_mutex);
129
130
/*
131
* Get pointer to per-cpu structure.
132
*
133
* Function get_cpu_cfhw() is called from
134
* - cfset_copy_all(): This function is protected by cpus_read_lock(), so
135
* CPU hot plug remove can not happen. Event removal requires a close()
136
* first.
137
*
138
* Function this_cpu_cfhw() is called from perf common code functions:
139
* - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
140
* All functions execute with interrupts disabled on that particular CPU.
141
* - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
142
*
143
* Therefore it is safe to access the CPU specific pointer to the event.
144
*/
145
static struct cpu_cf_events *get_cpu_cfhw(int cpu)
146
{
147
struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
148
149
if (p) {
150
struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
151
152
return q->cpucf;
153
}
154
return NULL;
155
}
156
157
static struct cpu_cf_events *this_cpu_cfhw(void)
158
{
159
return get_cpu_cfhw(smp_processor_id());
160
}
161
162
/* Disable counter sets on dedicated CPU */
163
static void cpum_cf_reset_cpu(void *flags)
164
{
165
lcctl(0);
166
}
167
168
/* Free per CPU data when the last event is removed. */
169
static void cpum_cf_free_root(void)
170
{
171
if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
172
return;
173
free_percpu(cpu_cf_root.cfptr);
174
cpu_cf_root.cfptr = NULL;
175
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
176
on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
177
debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
178
__func__, refcount_read(&cpu_cf_root.refcnt),
179
!cpu_cf_root.cfptr);
180
}
181
182
/*
183
* On initialization of first event also allocate per CPU data dynamically.
184
* Start with an array of pointers, the array size is the maximum number of
185
* CPUs possible, which might be larger than the number of CPUs currently
186
* online.
187
*/
188
static int cpum_cf_alloc_root(void)
189
{
190
int rc = 0;
191
192
if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
193
return rc;
194
195
/* The memory is already zeroed. */
196
cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
197
if (cpu_cf_root.cfptr) {
198
refcount_set(&cpu_cf_root.refcnt, 1);
199
on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
200
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
201
} else {
202
rc = -ENOMEM;
203
}
204
205
return rc;
206
}
207
208
/* Free CPU counter data structure for a PMU */
209
static void cpum_cf_free_cpu(int cpu)
210
{
211
struct cpu_cf_events *cpuhw;
212
struct cpu_cf_ptr *p;
213
214
mutex_lock(&pmc_reserve_mutex);
215
/*
216
* When invoked via CPU hotplug handler, there might be no events
217
* installed or that particular CPU might not have an
218
* event installed. This anchor pointer can be NULL!
219
*/
220
if (!cpu_cf_root.cfptr)
221
goto out;
222
p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
223
cpuhw = p->cpucf;
224
/*
225
* Might be zero when called from CPU hotplug handler and no event
226
* installed on that CPU, but on different CPUs.
227
*/
228
if (!cpuhw)
229
goto out;
230
231
if (refcount_dec_and_test(&cpuhw->refcnt)) {
232
kfree(cpuhw);
233
p->cpucf = NULL;
234
}
235
cpum_cf_free_root();
236
out:
237
mutex_unlock(&pmc_reserve_mutex);
238
}
239
240
/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
241
static int cpum_cf_alloc_cpu(int cpu)
242
{
243
struct cpu_cf_events *cpuhw;
244
struct cpu_cf_ptr *p;
245
int rc;
246
247
mutex_lock(&pmc_reserve_mutex);
248
rc = cpum_cf_alloc_root();
249
if (rc)
250
goto unlock;
251
p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
252
cpuhw = p->cpucf;
253
254
if (!cpuhw) {
255
cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
256
if (cpuhw) {
257
p->cpucf = cpuhw;
258
refcount_set(&cpuhw->refcnt, 1);
259
} else {
260
rc = -ENOMEM;
261
}
262
} else {
263
refcount_inc(&cpuhw->refcnt);
264
}
265
if (rc) {
266
/*
267
* Error in allocation of event, decrement anchor. Since
268
* cpu_cf_event in not created, its destroy() function is not
269
* invoked. Adjust the reference counter for the anchor.
270
*/
271
cpum_cf_free_root();
272
}
273
unlock:
274
mutex_unlock(&pmc_reserve_mutex);
275
return rc;
276
}
277
278
/*
279
* Create/delete per CPU data structures for /dev/hwctr interface and events
280
* created by perf_event_open().
281
* If cpu is -1, track task on all available CPUs. This requires
282
* allocation of hardware data structures for all CPUs. This setup handles
283
* perf_event_open() with task context and /dev/hwctr interface.
284
* If cpu is non-zero install event on this CPU only. This setup handles
285
* perf_event_open() with CPU context.
286
*/
287
static int cpum_cf_alloc(int cpu)
288
{
289
cpumask_var_t mask;
290
int rc;
291
292
if (cpu == -1) {
293
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
294
return -ENOMEM;
295
for_each_online_cpu(cpu) {
296
rc = cpum_cf_alloc_cpu(cpu);
297
if (rc) {
298
for_each_cpu(cpu, mask)
299
cpum_cf_free_cpu(cpu);
300
break;
301
}
302
cpumask_set_cpu(cpu, mask);
303
}
304
free_cpumask_var(mask);
305
} else {
306
rc = cpum_cf_alloc_cpu(cpu);
307
}
308
return rc;
309
}
310
311
static void cpum_cf_free(int cpu)
312
{
313
if (cpu == -1) {
314
for_each_online_cpu(cpu)
315
cpum_cf_free_cpu(cpu);
316
} else {
317
cpum_cf_free_cpu(cpu);
318
}
319
}
320
321
#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
322
/* interval in seconds */
323
324
/* Counter sets are stored as data stream in a page sized memory buffer and
325
* exported to user space via raw data attached to the event sample data.
326
* Each counter set starts with an eight byte header consisting of:
327
* - a two byte eye catcher (0xfeef)
328
* - a one byte counter set number
329
* - a two byte counter set size (indicates the number of counters in this set)
330
* - a three byte reserved value (must be zero) to make the header the same
331
* size as a counter value.
332
* All counter values are eight byte in size.
333
*
334
* All counter sets are followed by a 64 byte trailer.
335
* The trailer consists of a:
336
* - flag field indicating valid fields when corresponding bit set
337
* - the counter facility first and second version number
338
* - the CPU speed if nonzero
339
* - the time stamp the counter sets have been collected
340
* - the time of day (TOD) base value
341
* - the machine type.
342
*
343
* The counter sets are saved when the process is prepared to be executed on a
344
* CPU and saved again when the process is going to be removed from a CPU.
345
* The difference of both counter sets are calculated and stored in the event
346
* sample data area.
347
*/
348
struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
349
unsigned int def:16; /* 0-15 Data Entry Format */
350
unsigned int set:16; /* 16-31 Counter set identifier */
351
unsigned int ctr:16; /* 32-47 Number of stored counters */
352
unsigned int res1:16; /* 48-63 Reserved */
353
};
354
355
struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
356
/* 0 - 7 */
357
union {
358
struct {
359
unsigned int clock_base:1; /* TOD clock base set */
360
unsigned int speed:1; /* CPU speed set */
361
/* Measurement alerts */
362
unsigned int mtda:1; /* Loss of MT ctr. data alert */
363
unsigned int caca:1; /* Counter auth. change alert */
364
unsigned int lcda:1; /* Loss of counter data alert */
365
};
366
unsigned long flags; /* 0-63 All indicators */
367
};
368
/* 8 - 15 */
369
unsigned int cfvn:16; /* 64-79 Ctr First Version */
370
unsigned int csvn:16; /* 80-95 Ctr Second Version */
371
unsigned int cpu_speed:32; /* 96-127 CPU speed */
372
/* 16 - 23 */
373
unsigned long timestamp; /* 128-191 Timestamp (TOD) */
374
/* 24 - 55 */
375
union {
376
struct {
377
unsigned long progusage1;
378
unsigned long progusage2;
379
unsigned long progusage3;
380
unsigned long tod_base;
381
};
382
unsigned long progusage[4];
383
};
384
/* 56 - 63 */
385
unsigned int mach_type:16; /* Machine type */
386
unsigned int res1:16; /* Reserved */
387
unsigned int res2:32; /* Reserved */
388
};
389
390
/* Create the trailer data at the end of a page. */
391
static void cfdiag_trailer(struct cf_trailer_entry *te)
392
{
393
struct cpuid cpuid;
394
395
te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */
396
te->csvn = cpumf_ctr_info.csvn;
397
398
get_cpu_id(&cpuid); /* Machine type */
399
te->mach_type = cpuid.machine;
400
te->cpu_speed = cfdiag_cpu_speed;
401
if (te->cpu_speed)
402
te->speed = 1;
403
te->clock_base = 1; /* Save clock base */
404
te->tod_base = tod_clock_base.tod;
405
te->timestamp = get_tod_clock_fast();
406
}
407
408
/*
409
* The number of counters per counter set varies between machine generations,
410
* but is constant when running on a particular machine generation.
411
* Determine each counter set size at device driver initialization and
412
* retrieve it later.
413
*/
414
static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
415
static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
416
{
417
size_t ctrset_size = 0;
418
419
switch (ctrset) {
420
case CPUMF_CTR_SET_BASIC:
421
if (cpumf_ctr_info.cfvn >= 1)
422
ctrset_size = 6;
423
break;
424
case CPUMF_CTR_SET_USER:
425
if (cpumf_ctr_info.cfvn == 1)
426
ctrset_size = 6;
427
else if (cpumf_ctr_info.cfvn >= 3)
428
ctrset_size = 2;
429
break;
430
case CPUMF_CTR_SET_CRYPTO:
431
if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
432
ctrset_size = 16;
433
else if (cpumf_ctr_info.csvn >= 6)
434
ctrset_size = 20;
435
break;
436
case CPUMF_CTR_SET_EXT:
437
if (cpumf_ctr_info.csvn == 1)
438
ctrset_size = 32;
439
else if (cpumf_ctr_info.csvn == 2)
440
ctrset_size = 48;
441
else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
442
ctrset_size = 128;
443
else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
444
ctrset_size = 160;
445
break;
446
case CPUMF_CTR_SET_MT_DIAG:
447
if (cpumf_ctr_info.csvn > 3)
448
ctrset_size = 48;
449
break;
450
case CPUMF_CTR_SET_MAX:
451
break;
452
}
453
cpumf_ctr_setsizes[ctrset] = ctrset_size;
454
}
455
456
/*
457
* Return the maximum possible counter set size (in number of 8 byte counters)
458
* depending on type and model number.
459
*/
460
static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
461
{
462
return cpumf_ctr_setsizes[ctrset];
463
}
464
465
/* Read a counter set. The counter set number determines the counter set and
466
* the CPUM-CF first and second version number determine the number of
467
* available counters in each counter set.
468
* Each counter set starts with header containing the counter set number and
469
* the number of eight byte counters.
470
*
471
* The functions returns the number of bytes occupied by this counter set
472
* including the header.
473
* If there is no counter in the counter set, this counter set is useless and
474
* zero is returned on this case.
475
*
476
* Note that the counter sets may not be enabled or active and the stcctm
477
* instruction might return error 3. Depending on error_ok value this is ok,
478
* for example when called from cpumf_pmu_start() call back function.
479
*/
480
static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
481
size_t room, bool error_ok)
482
{
483
size_t ctrset_size, need = 0;
484
int rc = 3; /* Assume write failure */
485
486
ctrdata->def = CF_DIAG_CTRSET_DEF;
487
ctrdata->set = ctrset;
488
ctrdata->res1 = 0;
489
ctrset_size = cpum_cf_read_setsize(ctrset);
490
491
if (ctrset_size) { /* Save data */
492
need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
493
if (need <= room) {
494
rc = ctr_stcctm(ctrset, ctrset_size,
495
(u64 *)(ctrdata + 1));
496
}
497
if (rc != 3 || error_ok)
498
ctrdata->ctr = ctrset_size;
499
else
500
need = 0;
501
}
502
503
return need;
504
}
505
506
static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
507
[CPUMF_CTR_SET_BASIC] = 0x02,
508
[CPUMF_CTR_SET_USER] = 0x04,
509
[CPUMF_CTR_SET_CRYPTO] = 0x08,
510
[CPUMF_CTR_SET_EXT] = 0x01,
511
[CPUMF_CTR_SET_MT_DIAG] = 0x20,
512
};
513
514
/* Read out all counter sets and save them in the provided data buffer.
515
* The last 64 byte host an artificial trailer entry.
516
*/
517
static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
518
bool error_ok)
519
{
520
struct cf_trailer_entry *trailer;
521
size_t offset = 0, done;
522
int i;
523
524
memset(data, 0, sz);
525
sz -= sizeof(*trailer); /* Always room for trailer */
526
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
527
struct cf_ctrset_entry *ctrdata = data + offset;
528
529
if (!(auth & cpumf_ctr_ctl[i]))
530
continue; /* Counter set not authorized */
531
532
done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
533
offset += done;
534
}
535
trailer = data + offset;
536
cfdiag_trailer(trailer);
537
return offset + sizeof(*trailer);
538
}
539
540
/* Calculate the difference for each counter in a counter set. */
541
static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
542
{
543
for (; --counters >= 0; ++pstart, ++pstop)
544
if (*pstop >= *pstart)
545
*pstop -= *pstart;
546
else
547
*pstop = *pstart - *pstop + 1;
548
}
549
550
/* Scan the counter sets and calculate the difference of each counter
551
* in each set. The result is the increment of each counter during the
552
* period the counter set has been activated.
553
*
554
* Return true on success.
555
*/
556
static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
557
{
558
struct cf_trailer_entry *trailer_start, *trailer_stop;
559
struct cf_ctrset_entry *ctrstart, *ctrstop;
560
size_t offset = 0;
561
int i;
562
563
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
564
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
565
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
566
567
/* Counter set not authorized */
568
if (!(auth & cpumf_ctr_ctl[i]))
569
continue;
570
/* Counter set size zero was not saved */
571
if (!cpum_cf_read_setsize(i))
572
continue;
573
574
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
575
pr_err_once("cpum_cf_diag counter set compare error "
576
"in set %i\n", ctrstart->set);
577
return 0;
578
}
579
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
580
cfdiag_diffctrset((u64 *)(ctrstart + 1),
581
(u64 *)(ctrstop + 1), ctrstart->ctr);
582
offset += ctrstart->ctr * sizeof(u64) +
583
sizeof(*ctrstart);
584
}
585
}
586
587
/* Save time_stamp from start of event in stop's trailer */
588
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
589
trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
590
trailer_stop->progusage[0] = trailer_start->timestamp;
591
592
return 1;
593
}
594
595
static enum cpumf_ctr_set get_counter_set(u64 event)
596
{
597
int set = CPUMF_CTR_SET_MAX;
598
599
if (event < 32)
600
set = CPUMF_CTR_SET_BASIC;
601
else if (event < 64)
602
set = CPUMF_CTR_SET_USER;
603
else if (event < 128)
604
set = CPUMF_CTR_SET_CRYPTO;
605
else if (event < 288)
606
set = CPUMF_CTR_SET_EXT;
607
else if (event >= 448 && event < 496)
608
set = CPUMF_CTR_SET_MT_DIAG;
609
610
return set;
611
}
612
613
static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
614
{
615
u16 mtdiag_ctl;
616
int err = 0;
617
618
/* check required version for counter sets */
619
switch (set) {
620
case CPUMF_CTR_SET_BASIC:
621
case CPUMF_CTR_SET_USER:
622
if (cpumf_ctr_info.cfvn < 1)
623
err = -EOPNOTSUPP;
624
break;
625
case CPUMF_CTR_SET_CRYPTO:
626
if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
627
config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
628
err = -EOPNOTSUPP;
629
break;
630
case CPUMF_CTR_SET_EXT:
631
if (cpumf_ctr_info.csvn < 1)
632
err = -EOPNOTSUPP;
633
if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
634
(cpumf_ctr_info.csvn == 2 && config > 175) ||
635
(cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
636
config > 255) ||
637
(cpumf_ctr_info.csvn >= 6 && config > 287))
638
err = -EOPNOTSUPP;
639
break;
640
case CPUMF_CTR_SET_MT_DIAG:
641
if (cpumf_ctr_info.csvn <= 3)
642
err = -EOPNOTSUPP;
643
/*
644
* MT-diagnostic counters are read-only. The counter set
645
* is automatically enabled and activated on all CPUs with
646
* multithreading (SMT). Deactivation of multithreading
647
* also disables the counter set. State changes are ignored
648
* by lcctl(). Because Linux controls SMT enablement through
649
* a kernel parameter only, the counter set is either disabled
650
* or enabled and active.
651
*
652
* Thus, the counters can only be used if SMT is on and the
653
* counter set is enabled and active.
654
*/
655
mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
656
if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
657
(cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
658
(cpumf_ctr_info.act_ctl & mtdiag_ctl)))
659
err = -EOPNOTSUPP;
660
break;
661
case CPUMF_CTR_SET_MAX:
662
err = -EOPNOTSUPP;
663
}
664
665
return err;
666
}
667
668
/*
669
* Change the CPUMF state to active.
670
* Enable and activate the CPU-counter sets according
671
* to the per-cpu control state.
672
*/
673
static void cpumf_pmu_enable(struct pmu *pmu)
674
{
675
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
676
int err;
677
678
if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
679
return;
680
681
err = lcctl(cpuhw->state | cpuhw->dev_state);
682
if (err)
683
pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
684
else
685
cpuhw->flags |= PMU_F_ENABLED;
686
}
687
688
/*
689
* Change the CPUMF state to inactive.
690
* Disable and enable (inactive) the CPU-counter sets according
691
* to the per-cpu control state.
692
*/
693
static void cpumf_pmu_disable(struct pmu *pmu)
694
{
695
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
696
u64 inactive;
697
int err;
698
699
if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
700
return;
701
702
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
703
inactive |= cpuhw->dev_state;
704
err = lcctl(inactive);
705
if (err)
706
pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
707
else
708
cpuhw->flags &= ~PMU_F_ENABLED;
709
}
710
711
/* Release the PMU if event is the last perf event */
712
static void hw_perf_event_destroy(struct perf_event *event)
713
{
714
cpum_cf_free(event->cpu);
715
}
716
717
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
718
static const int cpumf_generic_events_basic[] = {
719
[PERF_COUNT_HW_CPU_CYCLES] = 0,
720
[PERF_COUNT_HW_INSTRUCTIONS] = 1,
721
[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
722
[PERF_COUNT_HW_CACHE_MISSES] = -1,
723
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
724
[PERF_COUNT_HW_BRANCH_MISSES] = -1,
725
[PERF_COUNT_HW_BUS_CYCLES] = -1,
726
};
727
/* CPUMF <-> perf event mappings for userspace (problem-state set) */
728
static const int cpumf_generic_events_user[] = {
729
[PERF_COUNT_HW_CPU_CYCLES] = 32,
730
[PERF_COUNT_HW_INSTRUCTIONS] = 33,
731
[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
732
[PERF_COUNT_HW_CACHE_MISSES] = -1,
733
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
734
[PERF_COUNT_HW_BRANCH_MISSES] = -1,
735
[PERF_COUNT_HW_BUS_CYCLES] = -1,
736
};
737
738
static int is_userspace_event(u64 ev)
739
{
740
return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
741
cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
742
}
743
744
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
745
{
746
struct perf_event_attr *attr = &event->attr;
747
struct hw_perf_event *hwc = &event->hw;
748
enum cpumf_ctr_set set;
749
u64 ev;
750
751
switch (type) {
752
case PERF_TYPE_RAW:
753
/* Raw events are used to access counters directly,
754
* hence do not permit excludes */
755
if (attr->exclude_kernel || attr->exclude_user ||
756
attr->exclude_hv)
757
return -EOPNOTSUPP;
758
ev = attr->config;
759
break;
760
761
case PERF_TYPE_HARDWARE:
762
ev = attr->config;
763
if (!attr->exclude_user && attr->exclude_kernel) {
764
/*
765
* Count user space (problem-state) only
766
* Handle events 32 and 33 as 0:u and 1:u
767
*/
768
if (!is_userspace_event(ev)) {
769
if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
770
return -EOPNOTSUPP;
771
ev = cpumf_generic_events_user[ev];
772
}
773
} else if (!attr->exclude_kernel && attr->exclude_user) {
774
/* No support for kernel space counters only */
775
return -EOPNOTSUPP;
776
} else {
777
/* Count user and kernel space, incl. events 32 + 33 */
778
if (!is_userspace_event(ev)) {
779
if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
780
return -EOPNOTSUPP;
781
ev = cpumf_generic_events_basic[ev];
782
}
783
}
784
break;
785
786
default:
787
return -ENOENT;
788
}
789
790
if (ev == -1)
791
return -ENOENT;
792
793
if (ev > PERF_CPUM_CF_MAX_CTR)
794
return -ENOENT;
795
796
/* Obtain the counter set to which the specified counter belongs */
797
set = get_counter_set(ev);
798
switch (set) {
799
case CPUMF_CTR_SET_BASIC:
800
case CPUMF_CTR_SET_USER:
801
case CPUMF_CTR_SET_CRYPTO:
802
case CPUMF_CTR_SET_EXT:
803
case CPUMF_CTR_SET_MT_DIAG:
804
/*
805
* Use the hardware perf event structure to store the
806
* counter number in the 'config' member and the counter
807
* set number in the 'config_base' as bit mask.
808
* It is later used to enable/disable the counter(s).
809
*/
810
hwc->config = ev;
811
hwc->config_base = cpumf_ctr_ctl[set];
812
break;
813
case CPUMF_CTR_SET_MAX:
814
/* The counter could not be associated to a counter set */
815
return -EINVAL;
816
}
817
818
/* Initialize for using the CPU-measurement counter facility */
819
if (cpum_cf_alloc(event->cpu))
820
return -ENOMEM;
821
event->destroy = hw_perf_event_destroy;
822
823
/*
824
* Finally, validate version and authorization of the counter set.
825
* If the particular CPU counter set is not authorized,
826
* return with -ENOENT in order to fall back to other
827
* PMUs that might suffice the event request.
828
*/
829
if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
830
return -ENOENT;
831
return validate_ctr_version(hwc->config, set);
832
}
833
834
/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
835
* attribute::type values:
836
* - PERF_TYPE_HARDWARE:
837
* - pmu->type:
838
* Handle both type of invocations identical. They address the same hardware.
839
* The result is different when event modifiers exclude_kernel and/or
840
* exclude_user are also set.
841
*/
842
static int cpumf_pmu_event_type(struct perf_event *event)
843
{
844
u64 ev = event->attr.config;
845
846
if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
847
cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
848
cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
849
cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
850
return PERF_TYPE_HARDWARE;
851
return PERF_TYPE_RAW;
852
}
853
854
static int cpumf_pmu_event_init(struct perf_event *event)
855
{
856
unsigned int type = event->attr.type;
857
int err = -ENOENT;
858
859
if (is_sampling_event(event)) /* No sampling support */
860
return err;
861
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
862
err = __hw_perf_event_init(event, type);
863
else if (event->pmu->type == type)
864
/* Registered as unknown PMU */
865
err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
866
867
return err;
868
}
869
870
static int hw_perf_event_reset(struct perf_event *event)
871
{
872
u64 prev, new;
873
int err;
874
875
prev = local64_read(&event->hw.prev_count);
876
do {
877
err = ecctr(event->hw.config, &new);
878
if (err) {
879
if (err != 3)
880
break;
881
/* The counter is not (yet) available. This
882
* might happen if the counter set to which
883
* this counter belongs is in the disabled
884
* state.
885
*/
886
new = 0;
887
}
888
} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
889
890
return err;
891
}
892
893
static void hw_perf_event_update(struct perf_event *event)
894
{
895
u64 prev, new, delta;
896
int err;
897
898
prev = local64_read(&event->hw.prev_count);
899
do {
900
err = ecctr(event->hw.config, &new);
901
if (err)
902
return;
903
} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
904
905
delta = (prev <= new) ? new - prev
906
: (-1ULL - prev) + new + 1; /* overflow */
907
local64_add(delta, &event->count);
908
}
909
910
static void cpumf_pmu_read(struct perf_event *event)
911
{
912
if (event->hw.state & PERF_HES_STOPPED)
913
return;
914
915
hw_perf_event_update(event);
916
}
917
918
static void cpumf_pmu_start(struct perf_event *event, int flags)
919
{
920
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
921
struct hw_perf_event *hwc = &event->hw;
922
int i;
923
924
if (!(hwc->state & PERF_HES_STOPPED))
925
return;
926
927
hwc->state = 0;
928
929
/* (Re-)enable and activate the counter set */
930
ctr_set_enable(&cpuhw->state, hwc->config_base);
931
ctr_set_start(&cpuhw->state, hwc->config_base);
932
933
/* The counter set to which this counter belongs can be already active.
934
* Because all counters in a set are active, the event->hw.prev_count
935
* needs to be synchronized. At this point, the counter set can be in
936
* the inactive or disabled state.
937
*/
938
if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
939
cpuhw->usedss = cfdiag_getctr(cpuhw->start,
940
sizeof(cpuhw->start),
941
hwc->config_base, true);
942
} else {
943
hw_perf_event_reset(event);
944
}
945
946
/* Increment refcount for counter sets */
947
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
948
if ((hwc->config_base & cpumf_ctr_ctl[i]))
949
atomic_inc(&cpuhw->ctr_set[i]);
950
}
951
952
/* Create perf event sample with the counter sets as raw data. The sample
953
* is then pushed to the event subsystem and the function checks for
954
* possible event overflows. If an event overflow occurs, the PMU is
955
* stopped.
956
*
957
* Return non-zero if an event overflow occurred.
958
*/
959
static int cfdiag_push_sample(struct perf_event *event,
960
struct cpu_cf_events *cpuhw)
961
{
962
struct perf_sample_data data;
963
struct perf_raw_record raw;
964
struct pt_regs regs;
965
int overflow;
966
967
/* Setup perf sample */
968
perf_sample_data_init(&data, 0, event->hw.last_period);
969
memset(&regs, 0, sizeof(regs));
970
memset(&raw, 0, sizeof(raw));
971
972
if (event->attr.sample_type & PERF_SAMPLE_CPU)
973
data.cpu_entry.cpu = event->cpu;
974
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
975
raw.frag.size = cpuhw->usedss;
976
raw.frag.data = cpuhw->stop;
977
perf_sample_save_raw_data(&data, event, &raw);
978
}
979
980
overflow = perf_event_overflow(event, &data, &regs);
981
982
perf_event_update_userpage(event);
983
return overflow;
984
}
985
986
static void cpumf_pmu_stop(struct perf_event *event, int flags)
987
{
988
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
989
struct hw_perf_event *hwc = &event->hw;
990
int i;
991
992
if (!(hwc->state & PERF_HES_STOPPED)) {
993
/* Decrement reference count for this counter set and if this
994
* is the last used counter in the set, clear activation
995
* control and set the counter set state to inactive.
996
*/
997
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
998
if (!(hwc->config_base & cpumf_ctr_ctl[i]))
999
continue;
1000
if (!atomic_dec_return(&cpuhw->ctr_set[i]))
1001
ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
1002
}
1003
hwc->state |= PERF_HES_STOPPED;
1004
}
1005
1006
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1007
if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
1008
local64_inc(&event->count);
1009
cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
1010
sizeof(cpuhw->stop),
1011
event->hw.config_base,
1012
false);
1013
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
1014
cfdiag_push_sample(event, cpuhw);
1015
} else {
1016
hw_perf_event_update(event);
1017
}
1018
hwc->state |= PERF_HES_UPTODATE;
1019
}
1020
}
1021
1022
static int cpumf_pmu_add(struct perf_event *event, int flags)
1023
{
1024
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1025
1026
ctr_set_enable(&cpuhw->state, event->hw.config_base);
1027
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1028
1029
if (flags & PERF_EF_START)
1030
cpumf_pmu_start(event, PERF_EF_RELOAD);
1031
1032
return 0;
1033
}
1034
1035
static void cpumf_pmu_del(struct perf_event *event, int flags)
1036
{
1037
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1038
int i;
1039
1040
cpumf_pmu_stop(event, PERF_EF_UPDATE);
1041
1042
/* Check if any counter in the counter set is still used. If not used,
1043
* change the counter set to the disabled state. This also clears the
1044
* content of all counters in the set.
1045
*
1046
* When a new perf event has been added but not yet started, this can
1047
* clear enable control and resets all counters in a set. Therefore,
1048
* cpumf_pmu_start() always has to re-enable a counter set.
1049
*/
1050
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
1051
if (!atomic_read(&cpuhw->ctr_set[i]))
1052
ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
1053
}
1054
1055
/* Performance monitoring unit for s390x */
1056
static struct pmu cpumf_pmu = {
1057
.task_ctx_nr = perf_sw_context,
1058
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
1059
.pmu_enable = cpumf_pmu_enable,
1060
.pmu_disable = cpumf_pmu_disable,
1061
.event_init = cpumf_pmu_event_init,
1062
.add = cpumf_pmu_add,
1063
.del = cpumf_pmu_del,
1064
.start = cpumf_pmu_start,
1065
.stop = cpumf_pmu_stop,
1066
.read = cpumf_pmu_read,
1067
};
1068
1069
static struct cfset_session { /* CPUs and counter set bit mask */
1070
struct list_head head; /* Head of list of active processes */
1071
} cfset_session = {
1072
.head = LIST_HEAD_INIT(cfset_session.head)
1073
};
1074
1075
static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
1076
/*
1077
* Synchronize access to device /dev/hwc. This mutex protects against
1078
* concurrent access to functions cfset_open() and cfset_release().
1079
* Same for CPU hotplug add and remove events triggering
1080
* cpum_cf_online_cpu() and cpum_cf_offline_cpu().
1081
* It also serializes concurrent device ioctl access from multiple
1082
* processes accessing /dev/hwc.
1083
*
1084
* The mutex protects concurrent access to the /dev/hwctr session management
1085
* struct cfset_session and reference counting variable cfset_opencnt.
1086
*/
1087
static DEFINE_MUTEX(cfset_ctrset_mutex);
1088
1089
/*
1090
* CPU hotplug handles only /dev/hwctr device.
1091
* For perf_event_open() the CPU hotplug handling is done on kernel common
1092
* code:
1093
* - CPU add: Nothing is done since a file descriptor can not be created
1094
* and returned to the user.
1095
* - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
1096
* pmu_delete(). The event itself is removed when the file descriptor is
1097
* closed.
1098
*/
1099
static int cfset_online_cpu(unsigned int cpu);
1100
1101
static int cpum_cf_online_cpu(unsigned int cpu)
1102
{
1103
int rc = 0;
1104
1105
/*
1106
* Ignore notification for perf_event_open().
1107
* Handle only /dev/hwctr device sessions.
1108
*/
1109
mutex_lock(&cfset_ctrset_mutex);
1110
if (refcount_read(&cfset_opencnt)) {
1111
rc = cpum_cf_alloc_cpu(cpu);
1112
if (!rc)
1113
cfset_online_cpu(cpu);
1114
}
1115
mutex_unlock(&cfset_ctrset_mutex);
1116
return rc;
1117
}
1118
1119
static int cfset_offline_cpu(unsigned int cpu);
1120
1121
static int cpum_cf_offline_cpu(unsigned int cpu)
1122
{
1123
/*
1124
* During task exit processing of grouped perf events triggered by CPU
1125
* hotplug processing, pmu_disable() is called as part of perf context
1126
* removal process. Therefore do not trigger event removal now for
1127
* perf_event_open() created events. Perf common code triggers event
1128
* destruction when the event file descriptor is closed.
1129
*
1130
* Handle only /dev/hwctr device sessions.
1131
*/
1132
mutex_lock(&cfset_ctrset_mutex);
1133
if (refcount_read(&cfset_opencnt)) {
1134
cfset_offline_cpu(cpu);
1135
cpum_cf_free_cpu(cpu);
1136
}
1137
mutex_unlock(&cfset_ctrset_mutex);
1138
return 0;
1139
}
1140
1141
/* Return true if store counter set multiple instruction is available */
1142
static inline int stccm_avail(void)
1143
{
1144
return test_facility(142);
1145
}
1146
1147
/* CPU-measurement alerts for the counter facility */
1148
static void cpumf_measurement_alert(struct ext_code ext_code,
1149
unsigned int alert, unsigned long unused)
1150
{
1151
struct cpu_cf_events *cpuhw;
1152
1153
if (!(alert & CPU_MF_INT_CF_MASK))
1154
return;
1155
1156
inc_irq_stat(IRQEXT_CMC);
1157
1158
/*
1159
* Measurement alerts are shared and might happen when the PMU
1160
* is not reserved. Ignore these alerts in this case.
1161
*/
1162
cpuhw = this_cpu_cfhw();
1163
if (!cpuhw)
1164
return;
1165
1166
/* counter authorization change alert */
1167
if (alert & CPU_MF_INT_CF_CACA)
1168
qctri(&cpumf_ctr_info);
1169
1170
/* loss of counter data alert */
1171
if (alert & CPU_MF_INT_CF_LCDA)
1172
pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
1173
1174
/* loss of MT counter data alert */
1175
if (alert & CPU_MF_INT_CF_MTDA)
1176
pr_warn("CPU[%i] MT counter data was lost\n",
1177
smp_processor_id());
1178
}
1179
1180
static int cfset_init(void);
1181
static int __init cpumf_pmu_init(void)
1182
{
1183
int rc;
1184
1185
/* Extract counter measurement facility information */
1186
if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
1187
return -ENODEV;
1188
1189
/* Determine and store counter set sizes for later reference */
1190
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1191
cpum_cf_make_setsize(rc);
1192
1193
/*
1194
* Clear bit 15 of cr0 to unauthorize problem-state to
1195
* extract measurement counters
1196
*/
1197
system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT);
1198
1199
/* register handler for measurement-alert interruptions */
1200
rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
1201
cpumf_measurement_alert);
1202
if (rc) {
1203
pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
1204
return rc;
1205
}
1206
1207
/* Setup s390dbf facility */
1208
cf_dbg = debug_register("cpum_cf", 2, 1, 128);
1209
if (!cf_dbg) {
1210
pr_err("Registration of s390dbf(cpum_cf) failed\n");
1211
rc = -ENOMEM;
1212
goto out1;
1213
}
1214
debug_register_view(cf_dbg, &debug_sprintf_view);
1215
1216
cpumf_pmu.attr_groups = cpumf_cf_event_group();
1217
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
1218
if (rc) {
1219
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
1220
goto out2;
1221
} else if (stccm_avail()) { /* Setup counter set device */
1222
cfset_init();
1223
}
1224
1225
rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
1226
"perf/s390/cf:online",
1227
cpum_cf_online_cpu, cpum_cf_offline_cpu);
1228
return rc;
1229
1230
out2:
1231
debug_unregister_view(cf_dbg, &debug_sprintf_view);
1232
debug_unregister(cf_dbg);
1233
out1:
1234
unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
1235
return rc;
1236
}
1237
1238
/* Support for the CPU Measurement Facility counter set extraction using
1239
* device /dev/hwctr. This allows user space programs to extract complete
1240
* counter set via normal file operations.
1241
*/
1242
1243
struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
1244
unsigned int sets; /* Counter set bit mask */
1245
atomic_t cpus_ack; /* # CPUs successfully executed func */
1246
};
1247
1248
struct cfset_request { /* CPUs and counter set bit mask */
1249
unsigned long ctrset; /* Bit mask of counter set to read */
1250
cpumask_t mask; /* CPU mask to read from */
1251
struct list_head node; /* Chain to cfset_session.head */
1252
};
1253
1254
static void cfset_session_init(void)
1255
{
1256
INIT_LIST_HEAD(&cfset_session.head);
1257
}
1258
1259
/* Remove current request from global bookkeeping. Maintain a counter set bit
1260
* mask on a per CPU basis.
1261
* Done in process context under mutex protection.
1262
*/
1263
static void cfset_session_del(struct cfset_request *p)
1264
{
1265
list_del(&p->node);
1266
}
1267
1268
/* Add current request to global bookkeeping. Maintain a counter set bit mask
1269
* on a per CPU basis.
1270
* Done in process context under mutex protection.
1271
*/
1272
static void cfset_session_add(struct cfset_request *p)
1273
{
1274
list_add(&p->node, &cfset_session.head);
1275
}
1276
1277
/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
1278
* path is currently used.
1279
* The cpu_cf_events::dev_state is used to denote counter sets in use by this
1280
* interface. It is always or'ed in. If this interface is not active, its
1281
* value is zero and no additional counter sets will be included.
1282
*
1283
* The cpu_cf_events::state is used by the perf_event_open SVC and remains
1284
* unchanged.
1285
*
1286
* perf_pmu_enable() and perf_pmu_enable() and its call backs
1287
* cpumf_pmu_enable() and cpumf_pmu_disable() are called by the
1288
* performance measurement subsystem to enable per process
1289
* CPU Measurement counter facility.
1290
* The XXX_enable() and XXX_disable functions are used to turn off
1291
* x86 performance monitoring interrupt (PMI) during scheduling.
1292
* s390 uses these calls to temporarily stop and resume the active CPU
1293
* counters sets during scheduling.
1294
*
1295
* We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
1296
* device access. The perf_event_open() SVC interface makes a lot of effort
1297
* to only run the counters while the calling process is actively scheduled
1298
* to run.
1299
* When /dev/hwctr interface is also used at the same time, the counter sets
1300
* will keep running, even when the process is scheduled off a CPU.
1301
* However this is not a problem and does not lead to wrong counter values
1302
* for the perf_event_open() SVC. The current counter value will be recorded
1303
* during schedule-in. At schedule-out time the current counter value is
1304
* extracted again and the delta is calculated and added to the event.
1305
*/
1306
/* Stop all counter sets via ioctl interface */
1307
static void cfset_ioctl_off(void *parm)
1308
{
1309
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1310
struct cfset_call_on_cpu_parm *p = parm;
1311
int rc;
1312
1313
/* Check if any counter set used by /dev/hwctr */
1314
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1315
if ((p->sets & cpumf_ctr_ctl[rc])) {
1316
if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
1317
ctr_set_disable(&cpuhw->dev_state,
1318
cpumf_ctr_ctl[rc]);
1319
ctr_set_stop(&cpuhw->dev_state,
1320
cpumf_ctr_ctl[rc]);
1321
}
1322
}
1323
/* Keep perf_event_open counter sets */
1324
rc = lcctl(cpuhw->dev_state | cpuhw->state);
1325
if (rc)
1326
pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
1327
cpuhw->state, S390_HWCTR_DEVICE, rc);
1328
if (!cpuhw->dev_state)
1329
cpuhw->flags &= ~PMU_F_IN_USE;
1330
}
1331
1332
/* Start counter sets on particular CPU */
1333
static void cfset_ioctl_on(void *parm)
1334
{
1335
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1336
struct cfset_call_on_cpu_parm *p = parm;
1337
int rc;
1338
1339
cpuhw->flags |= PMU_F_IN_USE;
1340
ctr_set_enable(&cpuhw->dev_state, p->sets);
1341
ctr_set_start(&cpuhw->dev_state, p->sets);
1342
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
1343
if ((p->sets & cpumf_ctr_ctl[rc]))
1344
atomic_inc(&cpuhw->ctr_set[rc]);
1345
rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */
1346
if (!rc)
1347
atomic_inc(&p->cpus_ack);
1348
else
1349
pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
1350
cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
1351
}
1352
1353
static void cfset_release_cpu(void *p)
1354
{
1355
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1356
int rc;
1357
1358
cpuhw->dev_state = 0;
1359
rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
1360
if (rc)
1361
pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
1362
cpuhw->state, S390_HWCTR_DEVICE, rc);
1363
}
1364
1365
/* This modifies the process CPU mask to adopt it to the currently online
1366
* CPUs. Offline CPUs can not be addresses. This call terminates the access
1367
* and is usually followed by close() or a new iotcl(..., START, ...) which
1368
* creates a new request structure.
1369
*/
1370
static void cfset_all_stop(struct cfset_request *req)
1371
{
1372
struct cfset_call_on_cpu_parm p = {
1373
.sets = req->ctrset,
1374
};
1375
1376
cpumask_and(&req->mask, &req->mask, cpu_online_mask);
1377
on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
1378
}
1379
1380
/* Release function is also called when application gets terminated without
1381
* doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
1382
*/
1383
static int cfset_release(struct inode *inode, struct file *file)
1384
{
1385
mutex_lock(&cfset_ctrset_mutex);
1386
/* Open followed by close/exit has no private_data */
1387
if (file->private_data) {
1388
cfset_all_stop(file->private_data);
1389
cfset_session_del(file->private_data);
1390
kfree(file->private_data);
1391
file->private_data = NULL;
1392
}
1393
if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
1394
on_each_cpu(cfset_release_cpu, NULL, 1);
1395
cpum_cf_free(-1);
1396
}
1397
mutex_unlock(&cfset_ctrset_mutex);
1398
return 0;
1399
}
1400
1401
/*
1402
* Open via /dev/hwctr device. Allocate all per CPU resources on the first
1403
* open of the device. The last close releases all per CPU resources.
1404
* Parallel perf_event_open system calls also use per CPU resources.
1405
* These invocations are handled via reference counting on the per CPU data
1406
* structures.
1407
*/
1408
static int cfset_open(struct inode *inode, struct file *file)
1409
{
1410
int rc = 0;
1411
1412
if (!perfmon_capable())
1413
return -EPERM;
1414
file->private_data = NULL;
1415
1416
mutex_lock(&cfset_ctrset_mutex);
1417
if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
1418
rc = cpum_cf_alloc(-1);
1419
if (!rc) {
1420
cfset_session_init();
1421
refcount_set(&cfset_opencnt, 1);
1422
}
1423
}
1424
mutex_unlock(&cfset_ctrset_mutex);
1425
1426
/* nonseekable_open() never fails */
1427
return rc ?: nonseekable_open(inode, file);
1428
}
1429
1430
static int cfset_all_start(struct cfset_request *req)
1431
{
1432
struct cfset_call_on_cpu_parm p = {
1433
.sets = req->ctrset,
1434
.cpus_ack = ATOMIC_INIT(0),
1435
};
1436
cpumask_var_t mask;
1437
int rc = 0;
1438
1439
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1440
return -ENOMEM;
1441
cpumask_and(mask, &req->mask, cpu_online_mask);
1442
on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
1443
if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
1444
on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
1445
rc = -EIO;
1446
}
1447
free_cpumask_var(mask);
1448
return rc;
1449
}
1450
1451
/* Return the maximum required space for all possible CPUs in case one
1452
* CPU will be onlined during the START, READ, STOP cycles.
1453
* To find out the size of the counter sets, any one CPU will do. They
1454
* all have the same counter sets.
1455
*/
1456
static size_t cfset_needspace(unsigned int sets)
1457
{
1458
size_t bytes = 0;
1459
int i;
1460
1461
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1462
if (!(sets & cpumf_ctr_ctl[i]))
1463
continue;
1464
bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
1465
sizeof(((struct s390_ctrset_setdata *)0)->set) +
1466
sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
1467
}
1468
bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
1469
(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
1470
sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
1471
return bytes;
1472
}
1473
1474
static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
1475
{
1476
struct s390_ctrset_read __user *ctrset_read;
1477
unsigned int cpu, cpus, rc = 0;
1478
void __user *uptr;
1479
1480
ctrset_read = (struct s390_ctrset_read __user *)arg;
1481
uptr = ctrset_read->data;
1482
for_each_cpu(cpu, mask) {
1483
struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
1484
struct s390_ctrset_cpudata __user *ctrset_cpudata;
1485
1486
ctrset_cpudata = uptr;
1487
rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
1488
rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
1489
rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
1490
cpuhw->used);
1491
if (rc) {
1492
rc = -EFAULT;
1493
goto out;
1494
}
1495
uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
1496
cond_resched();
1497
}
1498
cpus = cpumask_weight(mask);
1499
if (put_user(cpus, &ctrset_read->no_cpus))
1500
rc = -EFAULT;
1501
out:
1502
return rc;
1503
}
1504
1505
static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
1506
int ctrset_size, size_t room)
1507
{
1508
size_t need = 0;
1509
int rc = -1;
1510
1511
need = sizeof(*p) + sizeof(u64) * ctrset_size;
1512
if (need <= room) {
1513
p->set = cpumf_ctr_ctl[ctrset];
1514
p->no_cnts = ctrset_size;
1515
rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
1516
if (rc == 3) /* Nothing stored */
1517
need = 0;
1518
}
1519
return need;
1520
}
1521
1522
/* Read all counter sets. */
1523
static void cfset_cpu_read(void *parm)
1524
{
1525
struct cpu_cf_events *cpuhw = this_cpu_cfhw();
1526
struct cfset_call_on_cpu_parm *p = parm;
1527
int set, set_size;
1528
size_t space;
1529
1530
/* No data saved yet */
1531
cpuhw->used = 0;
1532
cpuhw->sets = 0;
1533
memset(cpuhw->data, 0, sizeof(cpuhw->data));
1534
1535
/* Scan the counter sets */
1536
for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
1537
struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
1538
cpuhw->used;
1539
1540
if (!(p->sets & cpumf_ctr_ctl[set]))
1541
continue; /* Counter set not in list */
1542
set_size = cpum_cf_read_setsize(set);
1543
space = sizeof(cpuhw->data) - cpuhw->used;
1544
space = cfset_cpuset_read(sp, set, set_size, space);
1545
if (space) {
1546
cpuhw->used += space;
1547
cpuhw->sets += 1;
1548
}
1549
}
1550
}
1551
1552
static int cfset_all_read(unsigned long arg, struct cfset_request *req)
1553
{
1554
struct cfset_call_on_cpu_parm p;
1555
cpumask_var_t mask;
1556
int rc;
1557
1558
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1559
return -ENOMEM;
1560
1561
p.sets = req->ctrset;
1562
cpumask_and(mask, &req->mask, cpu_online_mask);
1563
on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
1564
rc = cfset_all_copy(arg, mask);
1565
free_cpumask_var(mask);
1566
return rc;
1567
}
1568
1569
static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
1570
{
1571
int ret = -ENODATA;
1572
1573
if (req && req->ctrset)
1574
ret = cfset_all_read(arg, req);
1575
return ret;
1576
}
1577
1578
static long cfset_ioctl_stop(struct file *file)
1579
{
1580
struct cfset_request *req = file->private_data;
1581
int ret = -ENXIO;
1582
1583
if (req) {
1584
cfset_all_stop(req);
1585
cfset_session_del(req);
1586
kfree(req);
1587
file->private_data = NULL;
1588
ret = 0;
1589
}
1590
return ret;
1591
}
1592
1593
static long cfset_ioctl_start(unsigned long arg, struct file *file)
1594
{
1595
struct s390_ctrset_start __user *ustart;
1596
struct s390_ctrset_start start;
1597
struct cfset_request *preq;
1598
void __user *umask;
1599
unsigned int len;
1600
int ret = 0;
1601
size_t need;
1602
1603
if (file->private_data)
1604
return -EBUSY;
1605
ustart = (struct s390_ctrset_start __user *)arg;
1606
if (copy_from_user(&start, ustart, sizeof(start)))
1607
return -EFAULT;
1608
if (start.version != S390_HWCTR_START_VERSION)
1609
return -EINVAL;
1610
if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
1611
cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
1612
cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
1613
cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
1614
cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
1615
return -EINVAL; /* Invalid counter set */
1616
if (!start.counter_sets)
1617
return -EINVAL; /* No counter set at all? */
1618
1619
preq = kzalloc(sizeof(*preq), GFP_KERNEL);
1620
if (!preq)
1621
return -ENOMEM;
1622
cpumask_clear(&preq->mask);
1623
len = min_t(u64, start.cpumask_len, cpumask_size());
1624
umask = (void __user *)start.cpumask;
1625
if (copy_from_user(&preq->mask, umask, len)) {
1626
kfree(preq);
1627
return -EFAULT;
1628
}
1629
if (cpumask_empty(&preq->mask)) {
1630
kfree(preq);
1631
return -EINVAL;
1632
}
1633
need = cfset_needspace(start.counter_sets);
1634
if (put_user(need, &ustart->data_bytes)) {
1635
kfree(preq);
1636
return -EFAULT;
1637
}
1638
preq->ctrset = start.counter_sets;
1639
ret = cfset_all_start(preq);
1640
if (!ret) {
1641
cfset_session_add(preq);
1642
file->private_data = preq;
1643
} else {
1644
kfree(preq);
1645
}
1646
return ret;
1647
}
1648
1649
/* Entry point to the /dev/hwctr device interface.
1650
* The ioctl system call supports three subcommands:
1651
* S390_HWCTR_START: Start the specified counter sets on a CPU list. The
1652
* counter set keeps running until explicitly stopped. Returns the number
1653
* of bytes needed to store the counter values. If another S390_HWCTR_START
1654
* ioctl subcommand is called without a previous S390_HWCTR_STOP stop
1655
* command on the same file descriptor, -EBUSY is returned.
1656
* S390_HWCTR_READ: Read the counter set values from specified CPU list given
1657
* with the S390_HWCTR_START command.
1658
* S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
1659
* previous S390_HWCTR_START subcommand.
1660
*/
1661
static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1662
{
1663
int ret;
1664
1665
cpus_read_lock();
1666
mutex_lock(&cfset_ctrset_mutex);
1667
switch (cmd) {
1668
case S390_HWCTR_START:
1669
ret = cfset_ioctl_start(arg, file);
1670
break;
1671
case S390_HWCTR_STOP:
1672
ret = cfset_ioctl_stop(file);
1673
break;
1674
case S390_HWCTR_READ:
1675
ret = cfset_ioctl_read(arg, file->private_data);
1676
break;
1677
default:
1678
ret = -ENOTTY;
1679
break;
1680
}
1681
mutex_unlock(&cfset_ctrset_mutex);
1682
cpus_read_unlock();
1683
return ret;
1684
}
1685
1686
static const struct file_operations cfset_fops = {
1687
.owner = THIS_MODULE,
1688
.open = cfset_open,
1689
.release = cfset_release,
1690
.unlocked_ioctl = cfset_ioctl,
1691
};
1692
1693
static struct miscdevice cfset_dev = {
1694
.name = S390_HWCTR_DEVICE,
1695
.minor = MISC_DYNAMIC_MINOR,
1696
.fops = &cfset_fops,
1697
.mode = 0666,
1698
};
1699
1700
/* Hotplug add of a CPU. Scan through all active processes and add
1701
* that CPU to the list of CPUs supplied with ioctl(..., START, ...).
1702
*/
1703
static int cfset_online_cpu(unsigned int cpu)
1704
{
1705
struct cfset_call_on_cpu_parm p;
1706
struct cfset_request *rp;
1707
1708
if (!list_empty(&cfset_session.head)) {
1709
list_for_each_entry(rp, &cfset_session.head, node) {
1710
p.sets = rp->ctrset;
1711
cfset_ioctl_on(&p);
1712
cpumask_set_cpu(cpu, &rp->mask);
1713
}
1714
}
1715
return 0;
1716
}
1717
1718
/* Hotplug remove of a CPU. Scan through all active processes and clear
1719
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
1720
* Adjust reference counts.
1721
*/
1722
static int cfset_offline_cpu(unsigned int cpu)
1723
{
1724
struct cfset_call_on_cpu_parm p;
1725
struct cfset_request *rp;
1726
1727
if (!list_empty(&cfset_session.head)) {
1728
list_for_each_entry(rp, &cfset_session.head, node) {
1729
p.sets = rp->ctrset;
1730
cfset_ioctl_off(&p);
1731
cpumask_clear_cpu(cpu, &rp->mask);
1732
}
1733
}
1734
return 0;
1735
}
1736
1737
static void cfdiag_read(struct perf_event *event)
1738
{
1739
}
1740
1741
static int get_authctrsets(void)
1742
{
1743
unsigned long auth = 0;
1744
enum cpumf_ctr_set i;
1745
1746
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1747
if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
1748
auth |= cpumf_ctr_ctl[i];
1749
}
1750
return auth;
1751
}
1752
1753
/* Setup the event. Test for authorized counter sets and only include counter
1754
* sets which are authorized at the time of the setup. Including unauthorized
1755
* counter sets result in specification exception (and panic).
1756
*/
1757
static int cfdiag_event_init2(struct perf_event *event)
1758
{
1759
struct perf_event_attr *attr = &event->attr;
1760
int err = 0;
1761
1762
/* Set sample_period to indicate sampling */
1763
event->hw.config = attr->config;
1764
event->hw.sample_period = attr->sample_period;
1765
local64_set(&event->hw.period_left, event->hw.sample_period);
1766
local64_set(&event->count, 0);
1767
event->hw.last_period = event->hw.sample_period;
1768
1769
/* Add all authorized counter sets to config_base. The
1770
* the hardware init function is either called per-cpu or just once
1771
* for all CPUS (event->cpu == -1). This depends on the whether
1772
* counting is started for all CPUs or on a per workload base where
1773
* the perf event moves from one CPU to another CPU.
1774
* Checking the authorization on any CPU is fine as the hardware
1775
* applies the same authorization settings to all CPUs.
1776
*/
1777
event->hw.config_base = get_authctrsets();
1778
1779
/* No authorized counter sets, nothing to count/sample */
1780
if (!event->hw.config_base)
1781
err = -EINVAL;
1782
1783
return err;
1784
}
1785
1786
static int cfdiag_event_init(struct perf_event *event)
1787
{
1788
struct perf_event_attr *attr = &event->attr;
1789
int err = -ENOENT;
1790
1791
if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
1792
event->attr.type != event->pmu->type)
1793
goto out;
1794
1795
/* Raw events are used to access counters directly,
1796
* hence do not permit excludes.
1797
* This event is useless without PERF_SAMPLE_RAW to return counter set
1798
* values as raw data.
1799
*/
1800
if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
1801
!(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
1802
err = -EOPNOTSUPP;
1803
goto out;
1804
}
1805
1806
/* Initialize for using the CPU-measurement counter facility */
1807
if (cpum_cf_alloc(event->cpu))
1808
return -ENOMEM;
1809
event->destroy = hw_perf_event_destroy;
1810
1811
err = cfdiag_event_init2(event);
1812
out:
1813
return err;
1814
}
1815
1816
/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
1817
* to collect the complete counter sets for a scheduled process. Target
1818
* are complete counter sets attached as raw data to the artificial event.
1819
* This results in complete counter sets available when a process is
1820
* scheduled. Contains the delta of every counter while the process was
1821
* running.
1822
*/
1823
CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
1824
1825
static struct attribute *cfdiag_events_attr[] = {
1826
CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
1827
NULL,
1828
};
1829
1830
PMU_FORMAT_ATTR(event, "config:0-63");
1831
1832
static struct attribute *cfdiag_format_attr[] = {
1833
&format_attr_event.attr,
1834
NULL,
1835
};
1836
1837
static struct attribute_group cfdiag_events_group = {
1838
.name = "events",
1839
.attrs = cfdiag_events_attr,
1840
};
1841
static struct attribute_group cfdiag_format_group = {
1842
.name = "format",
1843
.attrs = cfdiag_format_attr,
1844
};
1845
static const struct attribute_group *cfdiag_attr_groups[] = {
1846
&cfdiag_events_group,
1847
&cfdiag_format_group,
1848
NULL,
1849
};
1850
1851
/* Performance monitoring unit for event CF_DIAG. Since this event
1852
* is also started and stopped via the perf_event_open() system call, use
1853
* the same event enable/disable call back functions. They do not
1854
* have a pointer to the perf_event structure as first parameter.
1855
*
1856
* The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
1857
* Reuse them and distinguish the event (always first parameter) via
1858
* 'config' member.
1859
*/
1860
static struct pmu cf_diag = {
1861
.task_ctx_nr = perf_sw_context,
1862
.event_init = cfdiag_event_init,
1863
.pmu_enable = cpumf_pmu_enable,
1864
.pmu_disable = cpumf_pmu_disable,
1865
.add = cpumf_pmu_add,
1866
.del = cpumf_pmu_del,
1867
.start = cpumf_pmu_start,
1868
.stop = cpumf_pmu_stop,
1869
.read = cfdiag_read,
1870
1871
.attr_groups = cfdiag_attr_groups
1872
};
1873
1874
/* Calculate memory needed to store all counter sets together with header and
1875
* trailer data. This is independent of the counter set authorization which
1876
* can vary depending on the configuration.
1877
*/
1878
static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
1879
{
1880
size_t max_size = sizeof(struct cf_trailer_entry);
1881
enum cpumf_ctr_set i;
1882
1883
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
1884
size_t size = cpum_cf_read_setsize(i);
1885
1886
if (size)
1887
max_size += size * sizeof(u64) +
1888
sizeof(struct cf_ctrset_entry);
1889
}
1890
return max_size;
1891
}
1892
1893
/* Get the CPU speed, try sampling facility first and CPU attributes second. */
1894
static void cfdiag_get_cpu_speed(void)
1895
{
1896
unsigned long mhz;
1897
1898
if (cpum_sf_avail()) { /* Sampling facility first */
1899
struct hws_qsi_info_block si;
1900
1901
memset(&si, 0, sizeof(si));
1902
if (!qsi(&si)) {
1903
cfdiag_cpu_speed = si.cpu_speed;
1904
return;
1905
}
1906
}
1907
1908
/* Fallback: CPU speed extract static part. Used in case
1909
* CPU Measurement Sampling Facility is turned off.
1910
*/
1911
mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
1912
if (mhz != -1UL)
1913
cfdiag_cpu_speed = mhz & 0xffffffff;
1914
}
1915
1916
static int cfset_init(void)
1917
{
1918
size_t need;
1919
int rc;
1920
1921
cfdiag_get_cpu_speed();
1922
/* Make sure the counter set data fits into predefined buffer. */
1923
need = cfdiag_maxsize(&cpumf_ctr_info);
1924
if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
1925
pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
1926
need);
1927
return -ENOMEM;
1928
}
1929
1930
rc = misc_register(&cfset_dev);
1931
if (rc) {
1932
pr_err("Registration of /dev/%s failed rc=%i\n",
1933
cfset_dev.name, rc);
1934
goto out;
1935
}
1936
1937
rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
1938
if (rc) {
1939
misc_deregister(&cfset_dev);
1940
pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
1941
rc);
1942
}
1943
out:
1944
return rc;
1945
}
1946
1947
device_initcall(cpumf_pmu_init);
1948
1949