Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/builtin-stat.c
49857 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* builtin-stat.c
4
*
5
* Builtin stat command: Give a precise performance counters summary
6
* overview about any workload, CPU or specific PID.
7
*
8
* Sample output:
9
10
$ perf stat ./hackbench 10
11
12
Time: 0.118
13
14
Performance counter stats for './hackbench 10':
15
16
1708.761321 task-clock # 11.037 CPUs utilized
17
41,190 context-switches # 0.024 M/sec
18
6,735 CPU-migrations # 0.004 M/sec
19
17,318 page-faults # 0.010 M/sec
20
5,205,202,243 cycles # 3.046 GHz
21
3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
22
1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
23
2,603,501,247 instructions # 0.50 insns per cycle
24
# 1.48 stalled cycles per insn
25
484,357,498 branches # 283.455 M/sec
26
6,388,934 branch-misses # 1.32% of all branches
27
28
0.154822978 seconds time elapsed
29
30
*
31
* Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <[email protected]>
32
*
33
* Improvements and fixes by:
34
*
35
* Arjan van de Ven <[email protected]>
36
* Yanmin Zhang <[email protected]>
37
* Wu Fengguang <[email protected]>
38
* Mike Galbraith <[email protected]>
39
* Paul Mackerras <[email protected]>
40
* Jaswinder Singh Rajput <[email protected]>
41
*/
42
43
#include "builtin.h"
44
#include "util/cgroup.h"
45
#include <subcmd/parse-options.h>
46
#include "util/parse-events.h"
47
#include "util/pmus.h"
48
#include "util/pmu.h"
49
#include "util/tool_pmu.h"
50
#include "util/event.h"
51
#include "util/evlist.h"
52
#include "util/evsel.h"
53
#include "util/debug.h"
54
#include "util/color.h"
55
#include "util/stat.h"
56
#include "util/header.h"
57
#include "util/cpumap.h"
58
#include "util/thread_map.h"
59
#include "util/counts.h"
60
#include "util/topdown.h"
61
#include "util/session.h"
62
#include "util/tool.h"
63
#include "util/string2.h"
64
#include "util/metricgroup.h"
65
#include "util/synthetic-events.h"
66
#include "util/target.h"
67
#include "util/time-utils.h"
68
#include "util/top.h"
69
#include "util/affinity.h"
70
#include "util/pfm.h"
71
#include "util/bpf_counter.h"
72
#include "util/iostat.h"
73
#include "util/util.h"
74
#include "util/intel-tpebs.h"
75
#include "asm/bug.h"
76
77
#include <linux/list_sort.h>
78
#include <linux/time64.h>
79
#include <linux/zalloc.h>
80
#include <api/fs/fs.h>
81
#include <errno.h>
82
#include <signal.h>
83
#include <stdlib.h>
84
#include <sys/prctl.h>
85
#include <inttypes.h>
86
#include <locale.h>
87
#include <math.h>
88
#include <sys/types.h>
89
#include <sys/stat.h>
90
#include <sys/wait.h>
91
#include <unistd.h>
92
#include <sys/time.h>
93
#include <sys/resource.h>
94
#include <linux/err.h>
95
96
#include <linux/ctype.h>
97
#include <perf/evlist.h>
98
#include <internal/threadmap.h>
99
100
#ifdef HAVE_BPF_SKEL
101
#include "util/bpf_skel/bperf_cgroup.h"
102
#endif
103
104
#define DEFAULT_SEPARATOR " "
105
#define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi"
106
107
struct rusage_stats {
108
struct stats ru_utime_usec_stat;
109
struct stats ru_stime_usec_stat;
110
};
111
112
static void print_counters(struct timespec *ts, int argc, const char **argv);
113
114
static struct evlist *evsel_list;
115
static struct parse_events_option_args parse_events_option_args = {
116
.evlistp = &evsel_list,
117
};
118
119
static bool all_counters_use_bpf = true;
120
121
static struct target target;
122
123
static volatile sig_atomic_t child_pid = -1;
124
static int detailed_run = 0;
125
static bool transaction_run;
126
static bool topdown_run = false;
127
static bool smi_cost = false;
128
static bool smi_reset = false;
129
static int big_num_opt = -1;
130
static const char *pre_cmd = NULL;
131
static const char *post_cmd = NULL;
132
static bool sync_run = false;
133
static bool forever = false;
134
static bool force_metric_only = false;
135
static struct timespec ref_time;
136
static bool append_file;
137
static bool interval_count;
138
static const char *output_name;
139
static int output_fd;
140
static char *metrics;
141
static struct rusage_stats ru_stats;
142
143
struct perf_stat {
144
bool record;
145
struct perf_data data;
146
struct perf_session *session;
147
u64 bytes_written;
148
struct perf_tool tool;
149
bool maps_allocated;
150
struct perf_cpu_map *cpus;
151
struct perf_thread_map *threads;
152
enum aggr_mode aggr_mode;
153
u32 aggr_level;
154
};
155
156
static struct perf_stat perf_stat;
157
#define STAT_RECORD perf_stat.record
158
159
static volatile sig_atomic_t done = 0;
160
161
/* Options set from the command line. */
162
struct opt_aggr_mode {
163
bool node, socket, die, cluster, cache, core, thread, no_aggr;
164
};
165
166
/* Turn command line option into most generic aggregation mode setting. */
167
static enum aggr_mode opt_aggr_mode_to_aggr_mode(struct opt_aggr_mode *opt_mode)
168
{
169
enum aggr_mode mode = AGGR_GLOBAL;
170
171
if (opt_mode->node)
172
mode = AGGR_NODE;
173
if (opt_mode->socket)
174
mode = AGGR_SOCKET;
175
if (opt_mode->die)
176
mode = AGGR_DIE;
177
if (opt_mode->cluster)
178
mode = AGGR_CLUSTER;
179
if (opt_mode->cache)
180
mode = AGGR_CACHE;
181
if (opt_mode->core)
182
mode = AGGR_CORE;
183
if (opt_mode->thread)
184
mode = AGGR_THREAD;
185
if (opt_mode->no_aggr)
186
mode = AGGR_NONE;
187
return mode;
188
}
189
190
static void evlist__check_cpu_maps(struct evlist *evlist)
191
{
192
struct evsel *evsel, *warned_leader = NULL;
193
194
evlist__for_each_entry(evlist, evsel) {
195
struct evsel *leader = evsel__leader(evsel);
196
197
/* Check that leader matches cpus with each member. */
198
if (leader == evsel)
199
continue;
200
if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
201
continue;
202
203
/* If there's mismatch disable the group and warn user. */
204
if (warned_leader != leader) {
205
char buf[200];
206
207
pr_warning("WARNING: grouped events cpus do not match.\n"
208
"Events with CPUs not matching the leader will "
209
"be removed from the group.\n");
210
evsel__group_desc(leader, buf, sizeof(buf));
211
pr_warning(" %s\n", buf);
212
warned_leader = leader;
213
}
214
if (verbose > 0) {
215
char buf[200];
216
217
cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
218
pr_warning(" %s: %s\n", leader->name, buf);
219
cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
220
pr_warning(" %s: %s\n", evsel->name, buf);
221
}
222
223
evsel__remove_from_group(evsel, leader);
224
}
225
}
226
227
static inline void diff_timespec(struct timespec *r, struct timespec *a,
228
struct timespec *b)
229
{
230
r->tv_sec = a->tv_sec - b->tv_sec;
231
if (a->tv_nsec < b->tv_nsec) {
232
r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
233
r->tv_sec--;
234
} else {
235
r->tv_nsec = a->tv_nsec - b->tv_nsec ;
236
}
237
}
238
239
static void perf_stat__reset_stats(void)
240
{
241
evlist__reset_stats(evsel_list);
242
memset(stat_config.walltime_nsecs_stats, 0, sizeof(*stat_config.walltime_nsecs_stats));
243
}
244
245
static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
246
union perf_event *event,
247
struct perf_sample *sample __maybe_unused,
248
struct machine *machine __maybe_unused)
249
{
250
if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
251
pr_err("failed to write perf data, error: %m\n");
252
return -1;
253
}
254
255
perf_stat.bytes_written += event->header.size;
256
return 0;
257
}
258
259
static int write_stat_round_event(u64 tm, u64 type)
260
{
261
return perf_event__synthesize_stat_round(NULL, tm, type,
262
process_synthesized_event,
263
NULL);
264
}
265
266
#define WRITE_STAT_ROUND_EVENT(time, interval) \
267
write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
268
269
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
270
271
static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
272
struct perf_counts_values *count)
273
{
274
struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
275
struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
276
277
return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
278
process_synthesized_event, NULL);
279
}
280
281
static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread)
282
{
283
int err = evsel__read_counter(counter, cpu_map_idx, thread);
284
285
/*
286
* Reading user and system time will fail when the process
287
* terminates. Use the wait4 values in that case.
288
*/
289
if (err && cpu_map_idx == 0 &&
290
(evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME ||
291
evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) {
292
struct perf_counts_values *count =
293
perf_counts(counter->counts, cpu_map_idx, thread);
294
struct perf_counts_values *old_count = NULL;
295
u64 val;
296
297
if (counter->prev_raw_counts)
298
old_count = perf_counts(counter->prev_raw_counts, cpu_map_idx, thread);
299
300
if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME)
301
val = ru_stats.ru_utime_usec_stat.mean;
302
else
303
val = ru_stats.ru_stime_usec_stat.mean;
304
305
count->val = val;
306
if (old_count) {
307
count->run = old_count->run + 1;
308
count->ena = old_count->ena + 1;
309
} else {
310
count->run++;
311
count->ena++;
312
}
313
return 0;
314
}
315
return err;
316
}
317
318
/*
319
* Read out the results of a single counter:
320
* do not aggregate counts across CPUs in system-wide mode
321
*/
322
static int read_counter_cpu(struct evsel *counter, int cpu_map_idx)
323
{
324
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
325
int thread;
326
327
if (!counter->supported)
328
return -ENOENT;
329
330
for (thread = 0; thread < nthreads; thread++) {
331
struct perf_counts_values *count;
332
333
count = perf_counts(counter->counts, cpu_map_idx, thread);
334
335
/*
336
* The leader's group read loads data into its group members
337
* (via evsel__read_counter()) and sets their count->loaded.
338
*/
339
if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
340
read_single_counter(counter, cpu_map_idx, thread)) {
341
counter->counts->scaled = -1;
342
perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
343
perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
344
return -1;
345
}
346
347
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
348
349
if (STAT_RECORD) {
350
if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
351
pr_err("failed to write stat event\n");
352
return -1;
353
}
354
}
355
356
if (verbose > 1) {
357
fprintf(stat_config.output,
358
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
359
evsel__name(counter),
360
perf_cpu_map__cpu(evsel__cpus(counter),
361
cpu_map_idx).cpu,
362
count->val, count->ena, count->run);
363
}
364
}
365
366
return 0;
367
}
368
369
static int read_counters_with_affinity(void)
370
{
371
struct evlist_cpu_iterator evlist_cpu_itr;
372
struct affinity saved_affinity, *affinity;
373
374
if (all_counters_use_bpf)
375
return 0;
376
377
if (!target__has_cpu(&target) || target__has_per_thread(&target))
378
affinity = NULL;
379
else if (affinity__setup(&saved_affinity) < 0)
380
return -1;
381
else
382
affinity = &saved_affinity;
383
384
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
385
struct evsel *counter = evlist_cpu_itr.evsel;
386
387
if (evsel__is_bpf(counter))
388
continue;
389
390
if (evsel__is_tool(counter))
391
continue;
392
393
if (!counter->err)
394
counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx);
395
}
396
if (affinity)
397
affinity__cleanup(&saved_affinity);
398
399
return 0;
400
}
401
402
static int read_bpf_map_counters(void)
403
{
404
struct evsel *counter;
405
int err;
406
407
evlist__for_each_entry(evsel_list, counter) {
408
if (!evsel__is_bpf(counter))
409
continue;
410
411
err = bpf_counter__read(counter);
412
if (err)
413
return err;
414
}
415
return 0;
416
}
417
418
static int read_tool_counters(void)
419
{
420
struct evsel *counter;
421
422
evlist__for_each_entry(evsel_list, counter) {
423
int idx;
424
425
if (!evsel__is_tool(counter))
426
continue;
427
428
perf_cpu_map__for_each_idx(idx, counter->core.cpus) {
429
if (!counter->err)
430
counter->err = read_counter_cpu(counter, idx);
431
}
432
}
433
return 0;
434
}
435
436
static int read_counters(void)
437
{
438
int ret;
439
440
if (stat_config.stop_read_counter)
441
return 0;
442
443
// Read all BPF counters first.
444
ret = read_bpf_map_counters();
445
if (ret)
446
return ret;
447
448
// Read non-BPF and non-tool counters next.
449
ret = read_counters_with_affinity();
450
if (ret)
451
return ret;
452
453
// Read the tool counters last. This way the duration_time counter
454
// should always be greater than any other counter's enabled time.
455
return read_tool_counters();
456
}
457
458
static void process_counters(void)
459
{
460
struct evsel *counter;
461
462
evlist__for_each_entry(evsel_list, counter) {
463
if (counter->err)
464
pr_debug("failed to read counter %s\n", counter->name);
465
if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
466
pr_warning("failed to process counter %s\n", counter->name);
467
counter->err = 0;
468
}
469
470
perf_stat_merge_counters(&stat_config, evsel_list);
471
perf_stat_process_percore(&stat_config, evsel_list);
472
}
473
474
static void process_interval(void)
475
{
476
struct timespec ts, rs;
477
478
clock_gettime(CLOCK_MONOTONIC, &ts);
479
diff_timespec(&rs, &ts, &ref_time);
480
481
evlist__reset_aggr_stats(evsel_list);
482
483
if (read_counters() == 0)
484
process_counters();
485
486
if (STAT_RECORD) {
487
if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
488
pr_err("failed to write stat round event\n");
489
}
490
491
init_stats(stat_config.walltime_nsecs_stats);
492
update_stats(stat_config.walltime_nsecs_stats, stat_config.interval * 1000000ULL);
493
print_counters(&rs, 0, NULL);
494
}
495
496
static bool handle_interval(unsigned int interval, int *times)
497
{
498
if (interval) {
499
process_interval();
500
if (interval_count && !(--(*times)))
501
return true;
502
}
503
return false;
504
}
505
506
static int enable_counters(void)
507
{
508
struct evsel *evsel;
509
int err;
510
511
evlist__for_each_entry(evsel_list, evsel) {
512
if (!evsel__is_bpf(evsel))
513
continue;
514
515
err = bpf_counter__enable(evsel);
516
if (err)
517
return err;
518
}
519
520
if (!target__enable_on_exec(&target)) {
521
if (!all_counters_use_bpf)
522
evlist__enable(evsel_list);
523
}
524
return 0;
525
}
526
527
static void disable_counters(void)
528
{
529
struct evsel *counter;
530
531
/*
532
* If we don't have tracee (attaching to task or cpu), counters may
533
* still be running. To get accurate group ratios, we must stop groups
534
* from counting before reading their constituent counters.
535
*/
536
if (!target__none(&target)) {
537
evlist__for_each_entry(evsel_list, counter)
538
bpf_counter__disable(counter);
539
if (!all_counters_use_bpf)
540
evlist__disable(evsel_list);
541
}
542
}
543
544
static volatile sig_atomic_t workload_exec_errno;
545
546
/*
547
* evlist__prepare_workload will send a SIGUSR1
548
* if the fork fails, since we asked by setting its
549
* want_signal to true.
550
*/
551
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
552
void *ucontext __maybe_unused)
553
{
554
workload_exec_errno = info->si_value.sival_int;
555
}
556
557
static bool evsel__should_store_id(struct evsel *counter)
558
{
559
return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
560
}
561
562
static bool is_target_alive(struct target *_target,
563
struct perf_thread_map *threads)
564
{
565
struct stat st;
566
int i;
567
568
if (!target__has_task(_target))
569
return true;
570
571
for (i = 0; i < threads->nr; i++) {
572
char path[PATH_MAX];
573
574
scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
575
threads->map[i].pid);
576
577
if (!stat(path, &st))
578
return true;
579
}
580
581
return false;
582
}
583
584
static void process_evlist(struct evlist *evlist, unsigned int interval)
585
{
586
enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
587
588
if (evlist__ctlfd_process(evlist, &cmd) > 0) {
589
switch (cmd) {
590
case EVLIST_CTL_CMD_ENABLE:
591
fallthrough;
592
case EVLIST_CTL_CMD_DISABLE:
593
if (interval)
594
process_interval();
595
break;
596
case EVLIST_CTL_CMD_SNAPSHOT:
597
case EVLIST_CTL_CMD_ACK:
598
case EVLIST_CTL_CMD_UNSUPPORTED:
599
case EVLIST_CTL_CMD_EVLIST:
600
case EVLIST_CTL_CMD_STOP:
601
case EVLIST_CTL_CMD_PING:
602
default:
603
break;
604
}
605
}
606
}
607
608
static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
609
int *time_to_sleep)
610
{
611
int tts = *time_to_sleep;
612
struct timespec time_diff;
613
614
diff_timespec(&time_diff, time_stop, time_start);
615
616
tts -= time_diff.tv_sec * MSEC_PER_SEC +
617
time_diff.tv_nsec / NSEC_PER_MSEC;
618
619
if (tts < 0)
620
tts = 0;
621
622
*time_to_sleep = tts;
623
}
624
625
static int dispatch_events(bool forks, int timeout, int interval, int *times)
626
{
627
int child_exited = 0, status = 0;
628
int time_to_sleep, sleep_time;
629
struct timespec time_start, time_stop;
630
631
if (interval)
632
sleep_time = interval;
633
else if (timeout)
634
sleep_time = timeout;
635
else
636
sleep_time = 1000;
637
638
time_to_sleep = sleep_time;
639
640
while (!done) {
641
if (forks)
642
child_exited = waitpid(child_pid, &status, WNOHANG);
643
else
644
child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
645
646
if (child_exited)
647
break;
648
649
clock_gettime(CLOCK_MONOTONIC, &time_start);
650
if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
651
if (timeout || handle_interval(interval, times))
652
break;
653
time_to_sleep = sleep_time;
654
} else { /* fd revent */
655
process_evlist(evsel_list, interval);
656
clock_gettime(CLOCK_MONOTONIC, &time_stop);
657
compute_tts(&time_start, &time_stop, &time_to_sleep);
658
}
659
}
660
661
return status;
662
}
663
664
enum counter_recovery {
665
COUNTER_SKIP,
666
COUNTER_RETRY,
667
};
668
669
static enum counter_recovery stat_handle_error(struct evsel *counter, int err)
670
{
671
char msg[BUFSIZ];
672
673
assert(!counter->supported);
674
675
/*
676
* PPC returns ENXIO for HW counters until 2.6.37
677
* (behavior changed with commit b0a873e).
678
*/
679
if (err == EINVAL || err == ENOSYS || err == ENOENT || err == ENXIO) {
680
if (verbose > 0) {
681
evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
682
ui__warning("%s event is not supported by the kernel.\n%s\n",
683
evsel__name(counter), msg);
684
}
685
return COUNTER_SKIP;
686
}
687
if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) {
688
if (verbose > 0)
689
ui__warning("%s\n", msg);
690
counter->supported = true;
691
return COUNTER_RETRY;
692
}
693
if (target__has_per_thread(&target) && err != EOPNOTSUPP &&
694
evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) {
695
/*
696
* For global --per-thread case, skip current
697
* error thread.
698
*/
699
if (!thread_map__remove(evsel_list->core.threads,
700
evsel_list->core.threads->err_thread)) {
701
evsel_list->core.threads->err_thread = -1;
702
counter->supported = true;
703
return COUNTER_RETRY;
704
}
705
}
706
if (verbose > 0) {
707
evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
708
ui__warning(err == EOPNOTSUPP
709
? "%s event is not supported by the kernel.\n%s\n"
710
: "skipping event %s that kernel failed to open.\n%s\n",
711
evsel__name(counter), msg);
712
}
713
return COUNTER_SKIP;
714
}
715
716
static int create_perf_stat_counter(struct evsel *evsel,
717
struct perf_stat_config *config,
718
int cpu_map_idx)
719
{
720
struct perf_event_attr *attr = &evsel->core.attr;
721
struct evsel *leader = evsel__leader(evsel);
722
723
/* Reset supported flag as creating a stat counter is retried. */
724
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
725
PERF_FORMAT_TOTAL_TIME_RUNNING;
726
727
/*
728
* The event is part of non trivial group, let's enable
729
* the group read (for leader) and ID retrieval for all
730
* members.
731
*/
732
if (leader->core.nr_members > 1)
733
attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
734
735
attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
736
737
/*
738
* Some events get initialized with sample_(period/type) set,
739
* like tracepoints. Clear it up for counting.
740
*/
741
attr->sample_period = 0;
742
743
if (config->identifier)
744
attr->sample_type = PERF_SAMPLE_IDENTIFIER;
745
746
if (config->all_user) {
747
attr->exclude_kernel = 1;
748
attr->exclude_user = 0;
749
}
750
751
if (config->all_kernel) {
752
attr->exclude_kernel = 0;
753
attr->exclude_user = 1;
754
}
755
756
/*
757
* Disabling all counters initially, they will be enabled
758
* either manually by us or by kernel via enable_on_exec
759
* set later.
760
*/
761
if (evsel__is_group_leader(evsel)) {
762
attr->disabled = 1;
763
764
if (target__enable_on_exec(&target))
765
attr->enable_on_exec = 1;
766
}
767
768
return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
769
evsel->core.threads);
770
}
771
772
static void update_rusage_stats(const struct rusage *rusage)
773
{
774
const u64 us_to_ns = 1000;
775
const u64 s_to_ns = 1000000000;
776
777
update_stats(&ru_stats.ru_utime_usec_stat,
778
(rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
779
update_stats(&ru_stats.ru_stime_usec_stat,
780
(rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
781
}
782
783
static int __run_perf_stat(int argc, const char **argv, int run_idx)
784
{
785
int interval = stat_config.interval;
786
int times = stat_config.times;
787
int timeout = stat_config.timeout;
788
char msg[BUFSIZ];
789
unsigned long long t0, t1;
790
struct evsel *counter;
791
size_t l;
792
int status = 0;
793
const bool forks = (argc > 0);
794
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
795
struct evlist_cpu_iterator evlist_cpu_itr;
796
struct affinity saved_affinity, *affinity = NULL;
797
int err, open_err = 0;
798
bool second_pass = false, has_supported_counters;
799
800
if (forks) {
801
if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
802
perror("failed to prepare workload");
803
return -1;
804
}
805
child_pid = evsel_list->workload.pid;
806
}
807
808
if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
809
if (affinity__setup(&saved_affinity) < 0) {
810
err = -1;
811
goto err_out;
812
}
813
affinity = &saved_affinity;
814
}
815
816
evlist__for_each_entry(evsel_list, counter) {
817
counter->reset_group = false;
818
if (bpf_counter__load(counter, &target)) {
819
err = -1;
820
goto err_out;
821
}
822
if (!(evsel__is_bperf(counter)))
823
all_counters_use_bpf = false;
824
}
825
826
evlist__reset_aggr_stats(evsel_list);
827
828
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
829
counter = evlist_cpu_itr.evsel;
830
831
/*
832
* bperf calls evsel__open_per_cpu() in bperf__load(), so
833
* no need to call it again here.
834
*/
835
if (target.use_bpf)
836
break;
837
838
if (counter->reset_group || !counter->supported)
839
continue;
840
if (evsel__is_bperf(counter))
841
continue;
842
843
while (true) {
844
if (create_perf_stat_counter(counter, &stat_config,
845
evlist_cpu_itr.cpu_map_idx) == 0)
846
break;
847
848
open_err = errno;
849
/*
850
* Weak group failed. We cannot just undo this here
851
* because earlier CPUs might be in group mode, and the kernel
852
* doesn't support mixing group and non group reads. Defer
853
* it to later.
854
* Don't close here because we're in the wrong affinity.
855
*/
856
if ((open_err == EINVAL || open_err == EBADF) &&
857
evsel__leader(counter) != counter &&
858
counter->weak_group) {
859
evlist__reset_weak_group(evsel_list, counter, false);
860
assert(counter->reset_group);
861
counter->supported = true;
862
second_pass = true;
863
break;
864
}
865
866
if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
867
break;
868
}
869
}
870
871
if (second_pass) {
872
/*
873
* Now redo all the weak group after closing them,
874
* and also close errored counters.
875
*/
876
877
/* First close errored or weak retry */
878
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
879
counter = evlist_cpu_itr.evsel;
880
881
if (!counter->reset_group && counter->supported)
882
continue;
883
884
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
885
}
886
/* Now reopen weak */
887
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
888
counter = evlist_cpu_itr.evsel;
889
890
if (!counter->reset_group)
891
continue;
892
893
while (true) {
894
pr_debug2("reopening weak %s\n", evsel__name(counter));
895
if (create_perf_stat_counter(counter, &stat_config,
896
evlist_cpu_itr.cpu_map_idx) == 0)
897
break;
898
899
open_err = errno;
900
if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
901
break;
902
}
903
}
904
}
905
affinity__cleanup(affinity);
906
affinity = NULL;
907
908
has_supported_counters = false;
909
evlist__for_each_entry(evsel_list, counter) {
910
if (!counter->supported) {
911
perf_evsel__free_fd(&counter->core);
912
continue;
913
}
914
has_supported_counters = true;
915
916
l = strlen(counter->unit);
917
if (l > stat_config.unit_width)
918
stat_config.unit_width = l;
919
920
if (evsel__should_store_id(counter) &&
921
evsel__store_ids(counter, evsel_list)) {
922
err = -1;
923
goto err_out;
924
}
925
}
926
if (!has_supported_counters && !stat_config.null_run) {
927
if (open_err) {
928
evsel__open_strerror(evlist__first(evsel_list), &target, open_err,
929
msg, sizeof(msg));
930
}
931
ui__error("No supported events found.\n%s\n", msg);
932
933
if (child_pid != -1)
934
kill(child_pid, SIGTERM);
935
err = -1;
936
goto err_out;
937
}
938
939
if (evlist__apply_filters(evsel_list, &counter, &target)) {
940
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
941
counter->filter, evsel__name(counter), errno,
942
str_error_r(errno, msg, sizeof(msg)));
943
return -1;
944
}
945
946
if (STAT_RECORD) {
947
int fd = perf_data__fd(&perf_stat.data);
948
949
if (is_pipe) {
950
err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
951
} else {
952
err = perf_session__write_header(perf_stat.session, evsel_list,
953
fd, false);
954
}
955
956
if (err < 0)
957
goto err_out;
958
959
err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
960
process_synthesized_event, is_pipe);
961
if (err < 0)
962
goto err_out;
963
964
}
965
966
if (target.initial_delay) {
967
pr_info(EVLIST_DISABLED_MSG);
968
} else {
969
err = enable_counters();
970
if (err) {
971
err = -1;
972
goto err_out;
973
}
974
}
975
976
/* Exec the command, if any */
977
if (forks)
978
evlist__start_workload(evsel_list);
979
980
if (target.initial_delay > 0) {
981
usleep(target.initial_delay * USEC_PER_MSEC);
982
err = enable_counters();
983
if (err) {
984
err = -1;
985
goto err_out;
986
}
987
988
pr_info(EVLIST_ENABLED_MSG);
989
}
990
991
t0 = rdclock();
992
clock_gettime(CLOCK_MONOTONIC, &ref_time);
993
994
if (forks) {
995
if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
996
status = dispatch_events(forks, timeout, interval, &times);
997
if (child_pid != -1) {
998
if (timeout)
999
kill(child_pid, SIGTERM);
1000
wait4(child_pid, &status, 0, &stat_config.ru_data);
1001
}
1002
1003
if (workload_exec_errno) {
1004
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1005
pr_err("Workload failed: %s\n", emsg);
1006
err = -1;
1007
goto err_out;
1008
}
1009
1010
if (WIFSIGNALED(status)) {
1011
/*
1012
* We want to indicate failure to stop a repeat run,
1013
* hence negative. We want the value to be the exit code
1014
* of perf, which for termination by a signal is 128
1015
* plus the signal number.
1016
*/
1017
err = 0 - (128 + WTERMSIG(status));
1018
psignal(WTERMSIG(status), argv[0]);
1019
} else {
1020
err = WEXITSTATUS(status);
1021
}
1022
} else {
1023
err = dispatch_events(forks, timeout, interval, &times);
1024
}
1025
1026
disable_counters();
1027
1028
t1 = rdclock();
1029
1030
if (stat_config.walltime_run_table)
1031
stat_config.walltime_run[run_idx] = t1 - t0;
1032
1033
if (interval && stat_config.summary) {
1034
stat_config.interval = 0;
1035
stat_config.stop_read_counter = true;
1036
init_stats(stat_config.walltime_nsecs_stats);
1037
update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
1038
1039
evlist__copy_prev_raw_counts(evsel_list);
1040
evlist__reset_prev_raw_counts(evsel_list);
1041
evlist__reset_aggr_stats(evsel_list);
1042
} else {
1043
update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
1044
update_rusage_stats(&stat_config.ru_data);
1045
}
1046
1047
/*
1048
* Closing a group leader splits the group, and as we only disable
1049
* group leaders, results in remaining events becoming enabled. To
1050
* avoid arbitrary skew, we must read all counters before closing any
1051
* group leaders.
1052
*/
1053
if (read_counters() == 0)
1054
process_counters();
1055
1056
/*
1057
* We need to keep evsel_list alive, because it's processed
1058
* later the evsel_list will be closed after.
1059
*/
1060
if (!STAT_RECORD)
1061
evlist__close(evsel_list);
1062
1063
return err;
1064
1065
err_out:
1066
if (forks)
1067
evlist__cancel_workload(evsel_list);
1068
1069
affinity__cleanup(affinity);
1070
return err;
1071
}
1072
1073
/*
1074
* Returns -1 for fatal errors which signifies to not continue
1075
* when in repeat mode.
1076
*
1077
* Returns < -1 error codes when stat record is used. These
1078
* result in the stat information being displayed, but writing
1079
* to the file fails and is non fatal.
1080
*/
1081
static int run_perf_stat(int argc, const char **argv, int run_idx)
1082
{
1083
int ret;
1084
1085
if (pre_cmd) {
1086
ret = system(pre_cmd);
1087
if (ret)
1088
return ret;
1089
}
1090
1091
if (sync_run)
1092
sync();
1093
1094
ret = __run_perf_stat(argc, argv, run_idx);
1095
if (ret)
1096
return ret;
1097
1098
if (post_cmd) {
1099
ret = system(post_cmd);
1100
if (ret)
1101
return ret;
1102
}
1103
1104
return ret;
1105
}
1106
1107
static void print_counters(struct timespec *ts, int argc, const char **argv)
1108
{
1109
/* Do not print anything if we record to the pipe. */
1110
if (STAT_RECORD && perf_stat.data.is_pipe)
1111
return;
1112
if (quiet)
1113
return;
1114
1115
evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
1116
}
1117
1118
static volatile sig_atomic_t signr = -1;
1119
1120
static void skip_signal(int signo)
1121
{
1122
if ((child_pid == -1) || stat_config.interval)
1123
done = 1;
1124
1125
signr = signo;
1126
/*
1127
* render child_pid harmless
1128
* won't send SIGTERM to a random
1129
* process in case of race condition
1130
* and fast PID recycling
1131
*/
1132
child_pid = -1;
1133
}
1134
1135
static void sig_atexit(void)
1136
{
1137
sigset_t set, oset;
1138
1139
/*
1140
* avoid race condition with SIGCHLD handler
1141
* in skip_signal() which is modifying child_pid
1142
* goal is to avoid send SIGTERM to a random
1143
* process
1144
*/
1145
sigemptyset(&set);
1146
sigaddset(&set, SIGCHLD);
1147
sigprocmask(SIG_BLOCK, &set, &oset);
1148
1149
if (child_pid != -1)
1150
kill(child_pid, SIGTERM);
1151
1152
sigprocmask(SIG_SETMASK, &oset, NULL);
1153
1154
if (signr == -1)
1155
return;
1156
1157
signal(signr, SIG_DFL);
1158
kill(getpid(), signr);
1159
}
1160
1161
static int stat__set_big_num(const struct option *opt __maybe_unused,
1162
const char *s __maybe_unused, int unset)
1163
{
1164
big_num_opt = unset ? 0 : 1;
1165
perf_stat__set_big_num(!unset);
1166
return 0;
1167
}
1168
1169
static int enable_metric_only(const struct option *opt __maybe_unused,
1170
const char *s __maybe_unused, int unset)
1171
{
1172
force_metric_only = true;
1173
stat_config.metric_only = !unset;
1174
return 0;
1175
}
1176
1177
static int append_metric_groups(const struct option *opt __maybe_unused,
1178
const char *str,
1179
int unset __maybe_unused)
1180
{
1181
if (metrics) {
1182
char *tmp;
1183
1184
if (asprintf(&tmp, "%s,%s", metrics, str) < 0)
1185
return -ENOMEM;
1186
free(metrics);
1187
metrics = tmp;
1188
} else {
1189
metrics = strdup(str);
1190
if (!metrics)
1191
return -ENOMEM;
1192
}
1193
return 0;
1194
}
1195
1196
static int parse_control_option(const struct option *opt,
1197
const char *str,
1198
int unset __maybe_unused)
1199
{
1200
struct perf_stat_config *config = opt->value;
1201
1202
return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1203
}
1204
1205
static int parse_stat_cgroups(const struct option *opt,
1206
const char *str, int unset)
1207
{
1208
if (stat_config.cgroup_list) {
1209
pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1210
return -1;
1211
}
1212
1213
return parse_cgroups(opt, str, unset);
1214
}
1215
1216
static int parse_cputype(const struct option *opt,
1217
const char *str,
1218
int unset __maybe_unused)
1219
{
1220
const struct perf_pmu *pmu;
1221
struct evlist *evlist = *(struct evlist **)opt->value;
1222
1223
if (!list_empty(&evlist->core.entries)) {
1224
fprintf(stderr, "Must define cputype before events/metrics\n");
1225
return -1;
1226
}
1227
1228
pmu = perf_pmus__pmu_for_pmu_filter(str);
1229
if (!pmu) {
1230
fprintf(stderr, "--cputype %s is not supported!\n", str);
1231
return -1;
1232
}
1233
parse_events_option_args.pmu_filter = pmu->name;
1234
1235
return 0;
1236
}
1237
1238
static int parse_cache_level(const struct option *opt,
1239
const char *str,
1240
int unset __maybe_unused)
1241
{
1242
int level;
1243
struct opt_aggr_mode *opt_aggr_mode = (struct opt_aggr_mode *)opt->value;
1244
u32 *aggr_level = (u32 *)opt->data;
1245
1246
/*
1247
* If no string is specified, aggregate based on the topology of
1248
* Last Level Cache (LLC). Since the LLC level can change from
1249
* architecture to architecture, set level greater than
1250
* MAX_CACHE_LVL which will be interpreted as LLC.
1251
*/
1252
if (str == NULL) {
1253
level = MAX_CACHE_LVL + 1;
1254
goto out;
1255
}
1256
1257
/*
1258
* The format to specify cache level is LX or lX where X is the
1259
* cache level.
1260
*/
1261
if (strlen(str) != 2 || (str[0] != 'l' && str[0] != 'L')) {
1262
pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1263
MAX_CACHE_LVL,
1264
MAX_CACHE_LVL);
1265
return -EINVAL;
1266
}
1267
1268
level = atoi(&str[1]);
1269
if (level < 1) {
1270
pr_err("Cache level must be of form L[1-%d], or l[1-%d]\n",
1271
MAX_CACHE_LVL,
1272
MAX_CACHE_LVL);
1273
return -EINVAL;
1274
}
1275
1276
if (level > MAX_CACHE_LVL) {
1277
pr_err("perf only supports max cache level of %d.\n"
1278
"Consider increasing MAX_CACHE_LVL\n", MAX_CACHE_LVL);
1279
return -EINVAL;
1280
}
1281
out:
1282
opt_aggr_mode->cache = true;
1283
*aggr_level = level;
1284
return 0;
1285
}
1286
1287
/**
1288
* Calculate the cache instance ID from the map in
1289
* /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1290
* Cache instance ID is the first CPU reported in the shared_cpu_list file.
1291
*/
1292
static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
1293
{
1294
int id;
1295
struct perf_cpu_map *cpu_map = perf_cpu_map__new(map);
1296
1297
/*
1298
* If the map contains no CPU, consider the current CPU to
1299
* be the first online CPU in the cache domain else use the
1300
* first online CPU of the cache domain as the ID.
1301
*/
1302
id = perf_cpu_map__min(cpu_map).cpu;
1303
if (id == -1)
1304
id = cpu.cpu;
1305
1306
/* Free the perf_cpu_map used to find the cache ID */
1307
perf_cpu_map__put(cpu_map);
1308
1309
return id;
1310
}
1311
1312
/**
1313
* cpu__get_cache_id - Returns 0 if successful in populating the
1314
* cache level and cache id. Cache level is read from
1315
* /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
1316
* is the first CPU reported by
1317
* /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
1318
*/
1319
static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
1320
{
1321
int ret = 0;
1322
u32 cache_level = stat_config.aggr_level;
1323
struct cpu_cache_level caches[MAX_CACHE_LVL];
1324
u32 i = 0, caches_cnt = 0;
1325
1326
cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1327
cache->cache = -1;
1328
1329
ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
1330
if (ret) {
1331
/*
1332
* If caches_cnt is not 0, cpu_cache_level data
1333
* was allocated when building the topology.
1334
* Free the allocated data before returning.
1335
*/
1336
if (caches_cnt)
1337
goto free_caches;
1338
1339
return ret;
1340
}
1341
1342
if (!caches_cnt)
1343
return -1;
1344
1345
/*
1346
* Save the data for the highest level if no
1347
* level was specified by the user.
1348
*/
1349
if (cache_level > MAX_CACHE_LVL) {
1350
int max_level_index = 0;
1351
1352
for (i = 1; i < caches_cnt; ++i) {
1353
if (caches[i].level > caches[max_level_index].level)
1354
max_level_index = i;
1355
}
1356
1357
cache->cache_lvl = caches[max_level_index].level;
1358
cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
1359
1360
/* Reset i to 0 to free entire caches[] */
1361
i = 0;
1362
goto free_caches;
1363
}
1364
1365
for (i = 0; i < caches_cnt; ++i) {
1366
if (caches[i].level == cache_level) {
1367
cache->cache_lvl = cache_level;
1368
cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1369
}
1370
1371
cpu_cache_level__free(&caches[i]);
1372
}
1373
1374
free_caches:
1375
/*
1376
* Free all the allocated cpu_cache_level data.
1377
*/
1378
while (i < caches_cnt)
1379
cpu_cache_level__free(&caches[i++]);
1380
1381
return ret;
1382
}
1383
1384
/**
1385
* aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
1386
* level, die and socket populated with the cache instache ID, cache level,
1387
* die and socket for cpu. The function signature is compatible with
1388
* aggr_cpu_id_get_t.
1389
*/
1390
static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
1391
{
1392
int ret;
1393
struct aggr_cpu_id id;
1394
struct perf_cache cache;
1395
1396
id = aggr_cpu_id__die(cpu, data);
1397
if (aggr_cpu_id__is_empty(&id))
1398
return id;
1399
1400
ret = cpu__get_cache_details(cpu, &cache);
1401
if (ret)
1402
return id;
1403
1404
id.cache_lvl = cache.cache_lvl;
1405
id.cache = cache.cache;
1406
return id;
1407
}
1408
1409
static const char *const aggr_mode__string[] = {
1410
[AGGR_CORE] = "core",
1411
[AGGR_CACHE] = "cache",
1412
[AGGR_CLUSTER] = "cluster",
1413
[AGGR_DIE] = "die",
1414
[AGGR_GLOBAL] = "global",
1415
[AGGR_NODE] = "node",
1416
[AGGR_NONE] = "none",
1417
[AGGR_SOCKET] = "socket",
1418
[AGGR_THREAD] = "thread",
1419
[AGGR_UNSET] = "unset",
1420
};
1421
1422
static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1423
struct perf_cpu cpu)
1424
{
1425
return aggr_cpu_id__socket(cpu, /*data=*/NULL);
1426
}
1427
1428
static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1429
struct perf_cpu cpu)
1430
{
1431
return aggr_cpu_id__die(cpu, /*data=*/NULL);
1432
}
1433
1434
static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *config __maybe_unused,
1435
struct perf_cpu cpu)
1436
{
1437
return aggr_cpu_id__cache(cpu, /*data=*/NULL);
1438
}
1439
1440
static struct aggr_cpu_id perf_stat__get_cluster(struct perf_stat_config *config __maybe_unused,
1441
struct perf_cpu cpu)
1442
{
1443
return aggr_cpu_id__cluster(cpu, /*data=*/NULL);
1444
}
1445
1446
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1447
struct perf_cpu cpu)
1448
{
1449
return aggr_cpu_id__core(cpu, /*data=*/NULL);
1450
}
1451
1452
static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1453
struct perf_cpu cpu)
1454
{
1455
return aggr_cpu_id__node(cpu, /*data=*/NULL);
1456
}
1457
1458
static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused,
1459
struct perf_cpu cpu)
1460
{
1461
return aggr_cpu_id__global(cpu, /*data=*/NULL);
1462
}
1463
1464
static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused,
1465
struct perf_cpu cpu)
1466
{
1467
return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
1468
}
1469
1470
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
1471
aggr_get_id_t get_id, struct perf_cpu cpu)
1472
{
1473
struct aggr_cpu_id id;
1474
1475
/* per-process mode - should use global aggr mode */
1476
if (cpu.cpu == -1 || cpu.cpu >= config->cpus_aggr_map->nr)
1477
return get_id(config, cpu);
1478
1479
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
1480
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
1481
1482
id = config->cpus_aggr_map->map[cpu.cpu];
1483
return id;
1484
}
1485
1486
static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
1487
struct perf_cpu cpu)
1488
{
1489
return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
1490
}
1491
1492
static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
1493
struct perf_cpu cpu)
1494
{
1495
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
1496
}
1497
1498
static struct aggr_cpu_id perf_stat__get_cluster_cached(struct perf_stat_config *config,
1499
struct perf_cpu cpu)
1500
{
1501
return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu);
1502
}
1503
1504
static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
1505
struct perf_cpu cpu)
1506
{
1507
return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
1508
}
1509
1510
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
1511
struct perf_cpu cpu)
1512
{
1513
return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
1514
}
1515
1516
static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
1517
struct perf_cpu cpu)
1518
{
1519
return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
1520
}
1521
1522
static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config,
1523
struct perf_cpu cpu)
1524
{
1525
return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
1526
}
1527
1528
static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config,
1529
struct perf_cpu cpu)
1530
{
1531
return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
1532
}
1533
1534
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
1535
{
1536
switch (aggr_mode) {
1537
case AGGR_SOCKET:
1538
return aggr_cpu_id__socket;
1539
case AGGR_DIE:
1540
return aggr_cpu_id__die;
1541
case AGGR_CLUSTER:
1542
return aggr_cpu_id__cluster;
1543
case AGGR_CACHE:
1544
return aggr_cpu_id__cache;
1545
case AGGR_CORE:
1546
return aggr_cpu_id__core;
1547
case AGGR_NODE:
1548
return aggr_cpu_id__node;
1549
case AGGR_NONE:
1550
return aggr_cpu_id__cpu;
1551
case AGGR_GLOBAL:
1552
return aggr_cpu_id__global;
1553
case AGGR_THREAD:
1554
case AGGR_UNSET:
1555
case AGGR_MAX:
1556
default:
1557
return NULL;
1558
}
1559
}
1560
1561
static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
1562
{
1563
switch (aggr_mode) {
1564
case AGGR_SOCKET:
1565
return perf_stat__get_socket_cached;
1566
case AGGR_DIE:
1567
return perf_stat__get_die_cached;
1568
case AGGR_CLUSTER:
1569
return perf_stat__get_cluster_cached;
1570
case AGGR_CACHE:
1571
return perf_stat__get_cache_id_cached;
1572
case AGGR_CORE:
1573
return perf_stat__get_core_cached;
1574
case AGGR_NODE:
1575
return perf_stat__get_node_cached;
1576
case AGGR_NONE:
1577
return perf_stat__get_cpu_cached;
1578
case AGGR_GLOBAL:
1579
return perf_stat__get_global_cached;
1580
case AGGR_THREAD:
1581
case AGGR_UNSET:
1582
case AGGR_MAX:
1583
default:
1584
return NULL;
1585
}
1586
}
1587
1588
static int perf_stat_init_aggr_mode(void)
1589
{
1590
int nr;
1591
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
1592
1593
if (get_id) {
1594
bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
1595
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1596
get_id, /*data=*/NULL, needs_sort);
1597
if (!stat_config.aggr_map) {
1598
pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]);
1599
return -1;
1600
}
1601
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
1602
}
1603
1604
if (stat_config.aggr_mode == AGGR_THREAD) {
1605
nr = perf_thread_map__nr(evsel_list->core.threads);
1606
stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
1607
if (stat_config.aggr_map == NULL)
1608
return -ENOMEM;
1609
1610
for (int s = 0; s < nr; s++) {
1611
struct aggr_cpu_id id = aggr_cpu_id__empty();
1612
1613
id.thread_idx = s;
1614
stat_config.aggr_map->map[s] = id;
1615
}
1616
return 0;
1617
}
1618
1619
/*
1620
* The evsel_list->cpus is the base we operate on,
1621
* taking the highest cpu number to be the size of
1622
* the aggregation translate cpumap.
1623
*/
1624
nr = perf_cpu_map__max(evsel_list->core.all_cpus).cpu + 1;
1625
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr);
1626
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1627
}
1628
1629
static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
1630
{
1631
free(map);
1632
}
1633
1634
static void perf_stat__exit_aggr_mode(void)
1635
{
1636
cpu_aggr_map__delete(stat_config.aggr_map);
1637
cpu_aggr_map__delete(stat_config.cpus_aggr_map);
1638
stat_config.aggr_map = NULL;
1639
stat_config.cpus_aggr_map = NULL;
1640
}
1641
1642
static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
1643
{
1644
struct perf_env *env = data;
1645
struct aggr_cpu_id id = aggr_cpu_id__empty();
1646
1647
if (cpu.cpu != -1)
1648
id.socket = env->cpu[cpu.cpu].socket_id;
1649
1650
return id;
1651
}
1652
1653
static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
1654
{
1655
struct perf_env *env = data;
1656
struct aggr_cpu_id id = aggr_cpu_id__empty();
1657
1658
if (cpu.cpu != -1) {
1659
/*
1660
* die_id is relative to socket, so start
1661
* with the socket ID and then add die to
1662
* make a unique ID.
1663
*/
1664
id.socket = env->cpu[cpu.cpu].socket_id;
1665
id.die = env->cpu[cpu.cpu].die_id;
1666
}
1667
1668
return id;
1669
}
1670
1671
static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
1672
u32 cache_level, struct aggr_cpu_id *id)
1673
{
1674
int i;
1675
int caches_cnt = env->caches_cnt;
1676
struct cpu_cache_level *caches = env->caches;
1677
1678
id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
1679
id->cache = -1;
1680
1681
if (!caches_cnt)
1682
return;
1683
1684
for (i = caches_cnt - 1; i > -1; --i) {
1685
struct perf_cpu_map *cpu_map;
1686
int map_contains_cpu;
1687
1688
/*
1689
* If user has not specified a level, find the fist level with
1690
* the cpu in the map. Since building the map is expensive, do
1691
* this only if levels match.
1692
*/
1693
if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level)
1694
continue;
1695
1696
cpu_map = perf_cpu_map__new(caches[i].map);
1697
map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
1698
perf_cpu_map__put(cpu_map);
1699
1700
if (map_contains_cpu != -1) {
1701
id->cache_lvl = caches[i].level;
1702
id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
1703
return;
1704
}
1705
}
1706
}
1707
1708
static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
1709
void *data)
1710
{
1711
struct perf_env *env = data;
1712
struct aggr_cpu_id id = aggr_cpu_id__empty();
1713
1714
if (cpu.cpu != -1) {
1715
u32 cache_level = (perf_stat.aggr_level) ?: stat_config.aggr_level;
1716
1717
id.socket = env->cpu[cpu.cpu].socket_id;
1718
id.die = env->cpu[cpu.cpu].die_id;
1719
perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
1720
}
1721
1722
return id;
1723
}
1724
1725
static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu,
1726
void *data)
1727
{
1728
struct perf_env *env = data;
1729
struct aggr_cpu_id id = aggr_cpu_id__empty();
1730
1731
if (cpu.cpu != -1) {
1732
id.socket = env->cpu[cpu.cpu].socket_id;
1733
id.die = env->cpu[cpu.cpu].die_id;
1734
id.cluster = env->cpu[cpu.cpu].cluster_id;
1735
}
1736
1737
return id;
1738
}
1739
1740
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
1741
{
1742
struct perf_env *env = data;
1743
struct aggr_cpu_id id = aggr_cpu_id__empty();
1744
1745
if (cpu.cpu != -1) {
1746
/*
1747
* core_id is relative to socket, die and cluster, we need a
1748
* global id. So we set socket, die id, cluster id and core id.
1749
*/
1750
id.socket = env->cpu[cpu.cpu].socket_id;
1751
id.die = env->cpu[cpu.cpu].die_id;
1752
id.cluster = env->cpu[cpu.cpu].cluster_id;
1753
id.core = env->cpu[cpu.cpu].core_id;
1754
}
1755
1756
return id;
1757
}
1758
1759
static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
1760
{
1761
struct perf_env *env = data;
1762
struct aggr_cpu_id id = aggr_cpu_id__empty();
1763
1764
if (cpu.cpu != -1) {
1765
/*
1766
* core_id is relative to socket and die,
1767
* we need a global id. So we set
1768
* socket, die id and core id
1769
*/
1770
id.socket = env->cpu[cpu.cpu].socket_id;
1771
id.die = env->cpu[cpu.cpu].die_id;
1772
id.core = env->cpu[cpu.cpu].core_id;
1773
id.cpu = cpu;
1774
}
1775
1776
return id;
1777
}
1778
1779
static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
1780
{
1781
struct aggr_cpu_id id = aggr_cpu_id__empty();
1782
1783
id.node = perf_env__numa_node(data, cpu);
1784
return id;
1785
}
1786
1787
static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
1788
void *data __maybe_unused)
1789
{
1790
struct aggr_cpu_id id = aggr_cpu_id__empty();
1791
1792
/* it always aggregates to the cpu 0 */
1793
id.cpu = (struct perf_cpu){ .cpu = 0 };
1794
return id;
1795
}
1796
1797
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1798
struct perf_cpu cpu)
1799
{
1800
return perf_env__get_socket_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1801
}
1802
static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1803
struct perf_cpu cpu)
1804
{
1805
return perf_env__get_die_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1806
}
1807
1808
static struct aggr_cpu_id perf_stat__get_cluster_file(struct perf_stat_config *config __maybe_unused,
1809
struct perf_cpu cpu)
1810
{
1811
return perf_env__get_cluster_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1812
}
1813
1814
static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
1815
struct perf_cpu cpu)
1816
{
1817
return perf_env__get_cache_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1818
}
1819
1820
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1821
struct perf_cpu cpu)
1822
{
1823
return perf_env__get_core_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1824
}
1825
1826
static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused,
1827
struct perf_cpu cpu)
1828
{
1829
return perf_env__get_cpu_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1830
}
1831
1832
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1833
struct perf_cpu cpu)
1834
{
1835
return perf_env__get_node_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1836
}
1837
1838
static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused,
1839
struct perf_cpu cpu)
1840
{
1841
return perf_env__get_global_aggr_by_cpu(cpu, perf_session__env(perf_stat.session));
1842
}
1843
1844
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
1845
{
1846
switch (aggr_mode) {
1847
case AGGR_SOCKET:
1848
return perf_env__get_socket_aggr_by_cpu;
1849
case AGGR_DIE:
1850
return perf_env__get_die_aggr_by_cpu;
1851
case AGGR_CLUSTER:
1852
return perf_env__get_cluster_aggr_by_cpu;
1853
case AGGR_CACHE:
1854
return perf_env__get_cache_aggr_by_cpu;
1855
case AGGR_CORE:
1856
return perf_env__get_core_aggr_by_cpu;
1857
case AGGR_NODE:
1858
return perf_env__get_node_aggr_by_cpu;
1859
case AGGR_GLOBAL:
1860
return perf_env__get_global_aggr_by_cpu;
1861
case AGGR_NONE:
1862
return perf_env__get_cpu_aggr_by_cpu;
1863
case AGGR_THREAD:
1864
case AGGR_UNSET:
1865
case AGGR_MAX:
1866
default:
1867
return NULL;
1868
}
1869
}
1870
1871
static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
1872
{
1873
switch (aggr_mode) {
1874
case AGGR_SOCKET:
1875
return perf_stat__get_socket_file;
1876
case AGGR_DIE:
1877
return perf_stat__get_die_file;
1878
case AGGR_CLUSTER:
1879
return perf_stat__get_cluster_file;
1880
case AGGR_CACHE:
1881
return perf_stat__get_cache_file;
1882
case AGGR_CORE:
1883
return perf_stat__get_core_file;
1884
case AGGR_NODE:
1885
return perf_stat__get_node_file;
1886
case AGGR_GLOBAL:
1887
return perf_stat__get_global_file;
1888
case AGGR_NONE:
1889
return perf_stat__get_cpu_file;
1890
case AGGR_THREAD:
1891
case AGGR_UNSET:
1892
case AGGR_MAX:
1893
default:
1894
return NULL;
1895
}
1896
}
1897
1898
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1899
{
1900
struct perf_env *env = perf_session__env(st->session);
1901
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
1902
bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
1903
1904
if (stat_config.aggr_mode == AGGR_THREAD) {
1905
int nr = perf_thread_map__nr(evsel_list->core.threads);
1906
1907
stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
1908
if (stat_config.aggr_map == NULL)
1909
return -ENOMEM;
1910
1911
for (int s = 0; s < nr; s++) {
1912
struct aggr_cpu_id id = aggr_cpu_id__empty();
1913
1914
id.thread_idx = s;
1915
stat_config.aggr_map->map[s] = id;
1916
}
1917
return 0;
1918
}
1919
1920
if (!get_id)
1921
return 0;
1922
1923
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
1924
get_id, env, needs_sort);
1925
if (!stat_config.aggr_map) {
1926
pr_err("cannot build %s map\n", aggr_mode__string[stat_config.aggr_mode]);
1927
return -1;
1928
}
1929
stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
1930
return 0;
1931
}
1932
1933
static int default_evlist_evsel_cmp(void *priv __maybe_unused,
1934
const struct list_head *l,
1935
const struct list_head *r)
1936
{
1937
const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
1938
const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
1939
const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
1940
const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
1941
1942
if (evsel__leader(lhs) == evsel__leader(rhs)) {
1943
/* Within the same group, respect the original order. */
1944
return lhs_core->idx - rhs_core->idx;
1945
}
1946
1947
/* Sort default metrics evsels first, and default show events before those. */
1948
if (lhs->default_metricgroup != rhs->default_metricgroup)
1949
return lhs->default_metricgroup ? -1 : 1;
1950
1951
if (lhs->default_show_events != rhs->default_show_events)
1952
return lhs->default_show_events ? -1 : 1;
1953
1954
/* Sort by PMU type (prefers legacy types first). */
1955
if (lhs->pmu != rhs->pmu)
1956
return lhs->pmu->type - rhs->pmu->type;
1957
1958
/* Sort by name. */
1959
return strcmp(evsel__name((struct evsel *)lhs), evsel__name((struct evsel *)rhs));
1960
}
1961
1962
/*
1963
* Add default events, if there were no attributes specified or
1964
* if -d/--detailed, -d -d or -d -d -d is used:
1965
*/
1966
static int add_default_events(void)
1967
{
1968
const char *pmu = parse_events_option_args.pmu_filter ?: "all";
1969
struct parse_events_error err;
1970
struct evlist *evlist = evlist__new();
1971
struct evsel *evsel;
1972
int ret = 0;
1973
1974
if (!evlist)
1975
return -ENOMEM;
1976
1977
parse_events_error__init(&err);
1978
1979
/* Set attrs if no event is selected and !null_run: */
1980
if (stat_config.null_run)
1981
goto out;
1982
1983
if (transaction_run) {
1984
/* Handle -T as -M transaction. Once platform specific metrics
1985
* support has been added to the json files, all architectures
1986
* will use this approach. To determine transaction support
1987
* on an architecture test for such a metric name.
1988
*/
1989
if (!metricgroup__has_metric_or_groups(pmu, "transaction")) {
1990
pr_err("Missing transaction metrics\n");
1991
ret = -1;
1992
goto out;
1993
}
1994
ret = metricgroup__parse_groups(evlist, pmu, "transaction",
1995
stat_config.metric_no_group,
1996
stat_config.metric_no_merge,
1997
stat_config.metric_no_threshold,
1998
stat_config.user_requested_cpu_list,
1999
stat_config.system_wide,
2000
stat_config.hardware_aware_grouping);
2001
goto out;
2002
}
2003
2004
if (smi_cost) {
2005
int smi;
2006
2007
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
2008
pr_err("freeze_on_smi is not supported.\n");
2009
ret = -1;
2010
goto out;
2011
}
2012
2013
if (!smi) {
2014
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
2015
pr_err("Failed to set freeze_on_smi.\n");
2016
ret = -1;
2017
goto out;
2018
}
2019
smi_reset = true;
2020
}
2021
2022
if (!metricgroup__has_metric_or_groups(pmu, "smi")) {
2023
pr_err("Missing smi metrics\n");
2024
ret = -1;
2025
goto out;
2026
}
2027
2028
if (!force_metric_only)
2029
stat_config.metric_only = true;
2030
2031
ret = metricgroup__parse_groups(evlist, pmu, "smi",
2032
stat_config.metric_no_group,
2033
stat_config.metric_no_merge,
2034
stat_config.metric_no_threshold,
2035
stat_config.user_requested_cpu_list,
2036
stat_config.system_wide,
2037
stat_config.hardware_aware_grouping);
2038
goto out;
2039
}
2040
2041
if (topdown_run) {
2042
unsigned int max_level = metricgroups__topdown_max_level();
2043
char str[] = "TopdownL1";
2044
2045
if (!force_metric_only)
2046
stat_config.metric_only = true;
2047
2048
if (!max_level) {
2049
pr_err("Topdown requested but the topdown metric groups aren't present.\n"
2050
"(See perf list the metric groups have names like TopdownL1)\n");
2051
ret = -1;
2052
goto out;
2053
}
2054
if (stat_config.topdown_level > max_level) {
2055
pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
2056
ret = -1;
2057
goto out;
2058
} else if (!stat_config.topdown_level) {
2059
stat_config.topdown_level = 1;
2060
}
2061
if (!stat_config.interval && !stat_config.metric_only) {
2062
fprintf(stat_config.output,
2063
"Topdown accuracy may decrease when measuring long periods.\n"
2064
"Please print the result regularly, e.g. -I1000\n");
2065
}
2066
str[8] = stat_config.topdown_level + '0';
2067
if (metricgroup__parse_groups(evlist,
2068
pmu, str,
2069
/*metric_no_group=*/false,
2070
/*metric_no_merge=*/false,
2071
/*metric_no_threshold=*/true,
2072
stat_config.user_requested_cpu_list,
2073
stat_config.system_wide,
2074
stat_config.hardware_aware_grouping) < 0) {
2075
ret = -1;
2076
goto out;
2077
}
2078
}
2079
2080
if (!stat_config.topdown_level)
2081
stat_config.topdown_level = 1;
2082
2083
if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
2084
/*
2085
* Add Default metrics. To minimize multiplexing, don't request
2086
* threshold computation, but it will be computed if the events
2087
* are present.
2088
*/
2089
const char *default_metricgroup_names[] = {
2090
"Default", "Default2", "Default3", "Default4",
2091
};
2092
2093
for (size_t i = 0; i < ARRAY_SIZE(default_metricgroup_names); i++) {
2094
struct evlist *metric_evlist;
2095
2096
if (!metricgroup__has_metric_or_groups(pmu, default_metricgroup_names[i]))
2097
continue;
2098
2099
if ((int)i > detailed_run)
2100
break;
2101
2102
metric_evlist = evlist__new();
2103
if (!metric_evlist) {
2104
ret = -ENOMEM;
2105
break;
2106
}
2107
if (metricgroup__parse_groups(metric_evlist, pmu, default_metricgroup_names[i],
2108
/*metric_no_group=*/false,
2109
/*metric_no_merge=*/false,
2110
/*metric_no_threshold=*/true,
2111
stat_config.user_requested_cpu_list,
2112
stat_config.system_wide,
2113
stat_config.hardware_aware_grouping) < 0) {
2114
evlist__delete(metric_evlist);
2115
ret = -1;
2116
break;
2117
}
2118
2119
evlist__for_each_entry(metric_evlist, evsel)
2120
evsel->default_metricgroup = true;
2121
2122
evlist__splice_list_tail(evlist, &metric_evlist->core.entries);
2123
metricgroup__copy_metric_events(evlist, /*cgrp=*/NULL,
2124
&evlist->metric_events,
2125
&metric_evlist->metric_events);
2126
evlist__delete(metric_evlist);
2127
}
2128
list_sort(/*priv=*/NULL, &evlist->core.entries, default_evlist_evsel_cmp);
2129
2130
}
2131
out:
2132
if (!ret) {
2133
evlist__for_each_entry(evlist, evsel) {
2134
/*
2135
* Make at least one event non-skippable so fatal errors are visible.
2136
* 'cycles' always used to be default and non-skippable, so use that.
2137
*/
2138
if (!evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
2139
evsel->skippable = true;
2140
}
2141
}
2142
parse_events_error__exit(&err);
2143
evlist__splice_list_tail(evsel_list, &evlist->core.entries);
2144
metricgroup__copy_metric_events(evsel_list, /*cgrp=*/NULL,
2145
&evsel_list->metric_events,
2146
&evlist->metric_events);
2147
evlist__delete(evlist);
2148
return ret;
2149
}
2150
2151
static const char * const stat_record_usage[] = {
2152
"perf stat record [<options>]",
2153
NULL,
2154
};
2155
2156
static void init_features(struct perf_session *session)
2157
{
2158
int feat;
2159
2160
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
2161
perf_header__set_feat(&session->header, feat);
2162
2163
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
2164
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
2165
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
2166
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
2167
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
2168
}
2169
2170
static int __cmd_record(const struct option stat_options[], struct opt_aggr_mode *opt_mode,
2171
int argc, const char **argv)
2172
{
2173
struct perf_session *session;
2174
struct perf_data *data = &perf_stat.data;
2175
2176
argc = parse_options(argc, argv, stat_options, stat_record_usage,
2177
PARSE_OPT_STOP_AT_NON_OPTION);
2178
stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(opt_mode);
2179
2180
if (output_name)
2181
data->path = output_name;
2182
2183
if (stat_config.run_count != 1 || forever) {
2184
pr_err("Cannot use -r option with perf stat record.\n");
2185
return -1;
2186
}
2187
2188
session = perf_session__new(data, NULL);
2189
if (IS_ERR(session)) {
2190
pr_err("Perf session creation failed\n");
2191
return PTR_ERR(session);
2192
}
2193
2194
init_features(session);
2195
2196
session->evlist = evsel_list;
2197
perf_stat.session = session;
2198
perf_stat.record = true;
2199
return argc;
2200
}
2201
2202
static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
2203
struct perf_session *session,
2204
union perf_event *event)
2205
{
2206
struct perf_record_stat_round *stat_round = &event->stat_round;
2207
struct timespec tsh, *ts = NULL;
2208
struct perf_env *env = perf_session__env(session);
2209
const char **argv = env->cmdline_argv;
2210
int argc = env->nr_cmdline;
2211
2212
process_counters();
2213
2214
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2215
update_stats(stat_config.walltime_nsecs_stats, stat_round->time);
2216
2217
if (stat_config.interval && stat_round->time) {
2218
tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
2219
tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
2220
ts = &tsh;
2221
}
2222
2223
print_counters(ts, argc, argv);
2224
return 0;
2225
}
2226
2227
static
2228
int process_stat_config_event(const struct perf_tool *tool,
2229
struct perf_session *session,
2230
union perf_event *event)
2231
{
2232
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2233
2234
perf_event__read_stat_config(&stat_config, &event->stat_config);
2235
2236
if (perf_cpu_map__is_empty(st->cpus)) {
2237
if (st->aggr_mode != AGGR_UNSET)
2238
pr_warning("warning: processing task data, aggregation mode not set\n");
2239
} else if (st->aggr_mode != AGGR_UNSET) {
2240
stat_config.aggr_mode = st->aggr_mode;
2241
}
2242
2243
if (perf_stat.data.is_pipe)
2244
perf_stat_init_aggr_mode();
2245
else
2246
perf_stat_init_aggr_mode_file(st);
2247
2248
if (stat_config.aggr_map) {
2249
int nr_aggr = stat_config.aggr_map->nr;
2250
2251
if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
2252
pr_err("cannot allocate aggr counts\n");
2253
return -1;
2254
}
2255
}
2256
return 0;
2257
}
2258
2259
static int set_maps(struct perf_stat *st)
2260
{
2261
if (!st->cpus || !st->threads)
2262
return 0;
2263
2264
if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2265
return -EINVAL;
2266
2267
perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
2268
2269
if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true))
2270
return -ENOMEM;
2271
2272
st->maps_allocated = true;
2273
return 0;
2274
}
2275
2276
static
2277
int process_thread_map_event(const struct perf_tool *tool,
2278
struct perf_session *session __maybe_unused,
2279
union perf_event *event)
2280
{
2281
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2282
2283
if (st->threads) {
2284
pr_warning("Extra thread map event, ignoring.\n");
2285
return 0;
2286
}
2287
2288
st->threads = thread_map__new_event(&event->thread_map);
2289
if (!st->threads)
2290
return -ENOMEM;
2291
2292
return set_maps(st);
2293
}
2294
2295
static
2296
int process_cpu_map_event(const struct perf_tool *tool,
2297
struct perf_session *session __maybe_unused,
2298
union perf_event *event)
2299
{
2300
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2301
struct perf_cpu_map *cpus;
2302
2303
if (st->cpus) {
2304
pr_warning("Extra cpu map event, ignoring.\n");
2305
return 0;
2306
}
2307
2308
cpus = cpu_map__new_data(&event->cpu_map.data);
2309
if (!cpus)
2310
return -ENOMEM;
2311
2312
st->cpus = cpus;
2313
return set_maps(st);
2314
}
2315
2316
static const char * const stat_report_usage[] = {
2317
"perf stat report [<options>]",
2318
NULL,
2319
};
2320
2321
static struct perf_stat perf_stat = {
2322
.aggr_mode = AGGR_UNSET,
2323
.aggr_level = 0,
2324
};
2325
2326
static int __cmd_report(int argc, const char **argv)
2327
{
2328
struct perf_session *session;
2329
const struct option options[] = {
2330
OPT_STRING('i', "input", &input_name, "file", "input file name"),
2331
OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2332
"aggregate counts per processor socket", AGGR_SOCKET),
2333
OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
2334
"aggregate counts per processor die", AGGR_DIE),
2335
OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode,
2336
"aggregate counts perf processor cluster", AGGR_CLUSTER),
2337
OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
2338
"cache level",
2339
"aggregate count at this cache level (Default: LLC)",
2340
parse_cache_level),
2341
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2342
"aggregate counts per physical processor core", AGGR_CORE),
2343
OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
2344
"aggregate counts per numa node", AGGR_NODE),
2345
OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2346
"disable CPU count aggregation", AGGR_NONE),
2347
OPT_END()
2348
};
2349
struct stat st;
2350
int ret;
2351
2352
argc = parse_options(argc, argv, options, stat_report_usage, 0);
2353
2354
if (!input_name || !strlen(input_name)) {
2355
if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
2356
input_name = "-";
2357
else
2358
input_name = "perf.data";
2359
}
2360
2361
perf_stat.data.path = input_name;
2362
perf_stat.data.mode = PERF_DATA_MODE_READ;
2363
2364
perf_tool__init(&perf_stat.tool, /*ordered_events=*/false);
2365
perf_stat.tool.attr = perf_event__process_attr;
2366
perf_stat.tool.event_update = perf_event__process_event_update;
2367
perf_stat.tool.thread_map = process_thread_map_event;
2368
perf_stat.tool.cpu_map = process_cpu_map_event;
2369
perf_stat.tool.stat_config = process_stat_config_event;
2370
perf_stat.tool.stat = perf_event__process_stat_event;
2371
perf_stat.tool.stat_round = process_stat_round_event;
2372
2373
session = perf_session__new(&perf_stat.data, &perf_stat.tool);
2374
if (IS_ERR(session))
2375
return PTR_ERR(session);
2376
2377
perf_stat.session = session;
2378
stat_config.output = stderr;
2379
evlist__delete(evsel_list);
2380
evsel_list = session->evlist;
2381
2382
ret = perf_session__process_events(session);
2383
if (ret)
2384
return ret;
2385
2386
perf_session__delete(session);
2387
return 0;
2388
}
2389
2390
static void setup_system_wide(int forks)
2391
{
2392
/*
2393
* Make system wide (-a) the default target if
2394
* no target was specified and one of following
2395
* conditions is met:
2396
*
2397
* - there's no workload specified
2398
* - there is workload specified but all requested
2399
* events are system wide events
2400
*/
2401
if (!target__none(&target))
2402
return;
2403
2404
if (!forks)
2405
target.system_wide = true;
2406
else {
2407
struct evsel *counter;
2408
2409
evlist__for_each_entry(evsel_list, counter) {
2410
if (!counter->core.requires_cpu &&
2411
!evsel__name_is(counter, "duration_time")) {
2412
return;
2413
}
2414
}
2415
2416
if (evsel_list->core.nr_entries)
2417
target.system_wide = true;
2418
}
2419
}
2420
2421
#ifdef HAVE_ARCH_X86_64_SUPPORT
2422
static int parse_tpebs_mode(const struct option *opt, const char *str,
2423
int unset __maybe_unused)
2424
{
2425
enum tpebs_mode *mode = opt->value;
2426
2427
if (!strcasecmp("mean", str)) {
2428
*mode = TPEBS_MODE__MEAN;
2429
return 0;
2430
}
2431
if (!strcasecmp("min", str)) {
2432
*mode = TPEBS_MODE__MIN;
2433
return 0;
2434
}
2435
if (!strcasecmp("max", str)) {
2436
*mode = TPEBS_MODE__MAX;
2437
return 0;
2438
}
2439
if (!strcasecmp("last", str)) {
2440
*mode = TPEBS_MODE__LAST;
2441
return 0;
2442
}
2443
return -1;
2444
}
2445
#endif // HAVE_ARCH_X86_64_SUPPORT
2446
2447
int cmd_stat(int argc, const char **argv)
2448
{
2449
struct opt_aggr_mode opt_mode = {};
2450
struct option stat_options[] = {
2451
OPT_BOOLEAN('T', "transaction", &transaction_run,
2452
"hardware transaction statistics"),
2453
OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
2454
"event selector. use 'perf list' to list available events",
2455
parse_events_option),
2456
OPT_CALLBACK(0, "filter", &evsel_list, "filter",
2457
"event filter", parse_filter),
2458
OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
2459
"child tasks do not inherit counters"),
2460
OPT_STRING('p', "pid", &target.pid, "pid",
2461
"stat events on existing process id"),
2462
OPT_STRING('t', "tid", &target.tid, "tid",
2463
"stat events on existing thread id"),
2464
#ifdef HAVE_BPF_SKEL
2465
OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
2466
"stat events on existing bpf program id"),
2467
OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
2468
"use bpf program to count events"),
2469
OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
2470
"path to perf_event_attr map"),
2471
#endif
2472
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2473
"system-wide collection from all CPUs"),
2474
OPT_BOOLEAN(0, "scale", &stat_config.scale,
2475
"Use --no-scale to disable counter scaling for multiplexing"),
2476
OPT_INCR('v', "verbose", &verbose,
2477
"be more verbose (show counter open errors, etc)"),
2478
OPT_INTEGER('r', "repeat", &stat_config.run_count,
2479
"repeat command and print average + stddev (max: 100, forever: 0)"),
2480
OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
2481
"display details about each run (only with -r option)"),
2482
OPT_BOOLEAN('n', "null", &stat_config.null_run,
2483
"null run - dont start any counters"),
2484
OPT_INCR('d', "detailed", &detailed_run,
2485
"detailed run - start a lot of events"),
2486
OPT_BOOLEAN('S', "sync", &sync_run,
2487
"call sync() before starting a run"),
2488
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
2489
"print large numbers with thousands\' separators",
2490
stat__set_big_num),
2491
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2492
"list of cpus to monitor in system-wide"),
2493
OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr,
2494
"disable aggregation across CPUs or PMUs"),
2495
OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr,
2496
"disable aggregation the same as -A or -no-aggr"),
2497
OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
2498
"Merge identical named hybrid events"),
2499
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
2500
"print counts with custom separator"),
2501
OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
2502
"print counts in JSON format"),
2503
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
2504
"monitor event in cgroup name only", parse_stat_cgroups),
2505
OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
2506
"expand events for each cgroup"),
2507
OPT_STRING('o', "output", &output_name, "file", "output file name"),
2508
OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
2509
OPT_INTEGER(0, "log-fd", &output_fd,
2510
"log output to fd, instead of stderr"),
2511
OPT_STRING(0, "pre", &pre_cmd, "command",
2512
"command to run prior to the measured command"),
2513
OPT_STRING(0, "post", &post_cmd, "command",
2514
"command to run after to the measured command"),
2515
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
2516
"print counts at regular interval in ms "
2517
"(overhead is possible for values <= 100ms)"),
2518
OPT_INTEGER(0, "interval-count", &stat_config.times,
2519
"print counts for fixed number of times"),
2520
OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
2521
"clear screen in between new interval"),
2522
OPT_UINTEGER(0, "timeout", &stat_config.timeout,
2523
"stop workload and print counts after a timeout period in ms (>= 10ms)"),
2524
OPT_BOOLEAN(0, "per-socket", &opt_mode.socket,
2525
"aggregate counts per processor socket"),
2526
OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
2527
OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster,
2528
"aggregate counts per processor cluster"),
2529
OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level,
2530
"cache level", "aggregate count at this cache level (Default: LLC)",
2531
parse_cache_level),
2532
OPT_BOOLEAN(0, "per-core", &opt_mode.core,
2533
"aggregate counts per physical processor core"),
2534
OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
2535
OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
2536
OPT_INTEGER('D', "delay", &target.initial_delay,
2537
"ms to wait before starting measurement after program start (-1: start with events disabled)"),
2538
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
2539
"Only print computed metrics. No raw values", enable_metric_only),
2540
OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
2541
"don't group metric events, impacts multiplexing"),
2542
OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
2543
"don't try to share events between metrics in a group"),
2544
OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
2545
"disable adding events for the metric threshold calculation"),
2546
OPT_BOOLEAN(0, "topdown", &topdown_run,
2547
"measure top-down statistics"),
2548
#ifdef HAVE_ARCH_X86_64_SUPPORT
2549
OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
2550
"enable recording for tpebs when retire_latency required"),
2551
OPT_CALLBACK(0, "tpebs-mode", &tpebs_mode, "tpebs-mode",
2552
"Mode of TPEBS recording: mean, min or max",
2553
parse_tpebs_mode),
2554
#endif
2555
OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
2556
"Set the metrics level for the top-down statistics (0: max level)"),
2557
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
2558
"measure SMI cost"),
2559
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
2560
"monitor specified metrics or metric groups (separated by ,)",
2561
append_metric_groups),
2562
OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
2563
"Configure all used events to run in kernel space.",
2564
PARSE_OPT_EXCLUSIVE),
2565
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
2566
"Configure all used events to run in user space.",
2567
PARSE_OPT_EXCLUSIVE),
2568
OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
2569
"Use with 'percore' event qualifier to show the event "
2570
"counts of one hardware thread by sum up total hardware "
2571
"threads of same physical core"),
2572
OPT_BOOLEAN(0, "summary", &stat_config.summary,
2573
"print summary for interval mode"),
2574
OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
2575
"don't print 'summary' for CSV summary output"),
2576
OPT_BOOLEAN(0, "quiet", &quiet,
2577
"don't print any output, messages or warnings (useful with record)"),
2578
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
2579
"Only enable events on applying cpu with this type "
2580
"for hybrid platform (e.g. core or atom)",
2581
parse_cputype),
2582
#ifdef HAVE_LIBPFM
2583
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
2584
"libpfm4 event selector. use 'perf list' to list available events",
2585
parse_libpfm_events_option),
2586
#endif
2587
OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
2588
"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
2589
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
2590
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
2591
parse_control_option),
2592
OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default",
2593
"measure I/O performance metrics provided by arch/platform",
2594
iostat_parse),
2595
OPT_END()
2596
};
2597
const char * const stat_usage[] = {
2598
"perf stat [<options>] [<command>]",
2599
NULL
2600
};
2601
int status = -EINVAL, run_idx, err;
2602
const char *mode;
2603
FILE *output = stderr;
2604
unsigned int interval, timeout;
2605
const char * const stat_subcommands[] = { "record", "report" };
2606
char errbuf[BUFSIZ];
2607
struct evsel *counter;
2608
2609
setlocale(LC_ALL, "");
2610
2611
evsel_list = evlist__new();
2612
if (evsel_list == NULL)
2613
return -ENOMEM;
2614
2615
parse_events__shrink_config_terms();
2616
2617
/* String-parsing callback-based options would segfault when negated */
2618
set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
2619
set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
2620
set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
2621
2622
argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2623
(const char **) stat_usage,
2624
PARSE_OPT_STOP_AT_NON_OPTION);
2625
2626
stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(&opt_mode);
2627
2628
if (stat_config.csv_sep) {
2629
stat_config.csv_output = true;
2630
if (!strcmp(stat_config.csv_sep, "\\t"))
2631
stat_config.csv_sep = "\t";
2632
} else
2633
stat_config.csv_sep = DEFAULT_SEPARATOR;
2634
2635
if (argc && strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2636
argc = __cmd_record(stat_options, &opt_mode, argc, argv);
2637
if (argc < 0)
2638
return -1;
2639
} else if (argc && strlen(argv[0]) > 2 && strstarts("report", argv[0]))
2640
return __cmd_report(argc, argv);
2641
2642
interval = stat_config.interval;
2643
timeout = stat_config.timeout;
2644
2645
/*
2646
* For record command the -o is already taken care of.
2647
*/
2648
if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2649
output = NULL;
2650
2651
if (output_name && output_fd) {
2652
fprintf(stderr, "cannot use both --output and --log-fd\n");
2653
parse_options_usage(stat_usage, stat_options, "o", 1);
2654
parse_options_usage(NULL, stat_options, "log-fd", 0);
2655
goto out;
2656
}
2657
2658
if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2659
fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2660
goto out;
2661
}
2662
2663
if (stat_config.metric_only && stat_config.run_count > 1) {
2664
fprintf(stderr, "--metric-only is not supported with -r\n");
2665
goto out;
2666
}
2667
2668
if (stat_config.csv_output || (stat_config.metric_only && stat_config.json_output)) {
2669
/*
2670
* Current CSV and metric-only JSON output doesn't display the
2671
* metric threshold so don't compute it.
2672
*/
2673
stat_config.metric_no_threshold = true;
2674
}
2675
2676
if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
2677
fprintf(stderr, "--table is only supported with -r\n");
2678
parse_options_usage(stat_usage, stat_options, "r", 1);
2679
parse_options_usage(NULL, stat_options, "table", 0);
2680
goto out;
2681
}
2682
2683
if (output_fd < 0) {
2684
fprintf(stderr, "argument to --log-fd must be a > 0\n");
2685
parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2686
goto out;
2687
}
2688
2689
if (!output && !quiet) {
2690
struct timespec tm;
2691
mode = append_file ? "a" : "w";
2692
2693
output = fopen(output_name, mode);
2694
if (!output) {
2695
perror("failed to create output file");
2696
return -1;
2697
}
2698
if (!stat_config.json_output) {
2699
clock_gettime(CLOCK_REALTIME, &tm);
2700
fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2701
}
2702
} else if (output_fd > 0) {
2703
mode = append_file ? "a" : "w";
2704
output = fdopen(output_fd, mode);
2705
if (!output) {
2706
perror("Failed opening logfd");
2707
return -errno;
2708
}
2709
}
2710
2711
if (stat_config.interval_clear && !isatty(fileno(output))) {
2712
fprintf(stderr, "--interval-clear does not work with output\n");
2713
parse_options_usage(stat_usage, stat_options, "o", 1);
2714
parse_options_usage(NULL, stat_options, "log-fd", 0);
2715
parse_options_usage(NULL, stat_options, "interval-clear", 0);
2716
return -1;
2717
}
2718
2719
stat_config.output = output;
2720
2721
/*
2722
* let the spreadsheet do the pretty-printing
2723
*/
2724
if (stat_config.csv_output) {
2725
/* User explicitly passed -B? */
2726
if (big_num_opt == 1) {
2727
fprintf(stderr, "-B option not supported with -x\n");
2728
parse_options_usage(stat_usage, stat_options, "B", 1);
2729
parse_options_usage(NULL, stat_options, "x", 1);
2730
goto out;
2731
} else /* Nope, so disable big number formatting */
2732
stat_config.big_num = false;
2733
} else if (big_num_opt == 0) /* User passed --no-big-num */
2734
stat_config.big_num = false;
2735
2736
target.inherit = !stat_config.no_inherit;
2737
err = target__validate(&target);
2738
if (err) {
2739
target__strerror(&target, err, errbuf, BUFSIZ);
2740
pr_warning("%s\n", errbuf);
2741
}
2742
2743
setup_system_wide(argc);
2744
2745
/*
2746
* Display user/system times only for single
2747
* run and when there's specified tracee.
2748
*/
2749
if ((stat_config.run_count == 1) && target__none(&target))
2750
stat_config.ru_display = true;
2751
2752
if (stat_config.run_count < 0) {
2753
pr_err("Run count must be a positive number\n");
2754
parse_options_usage(stat_usage, stat_options, "r", 1);
2755
goto out;
2756
} else if (stat_config.run_count == 0) {
2757
forever = true;
2758
stat_config.run_count = 1;
2759
}
2760
2761
if (stat_config.walltime_run_table) {
2762
stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2763
if (!stat_config.walltime_run) {
2764
pr_err("failed to setup -r option");
2765
goto out;
2766
}
2767
}
2768
2769
if ((stat_config.aggr_mode == AGGR_THREAD) &&
2770
!target__has_task(&target)) {
2771
if (!target.system_wide || target.cpu_list) {
2772
fprintf(stderr, "The --per-thread option is only "
2773
"available when monitoring via -p -t -a "
2774
"options or only --per-thread.\n");
2775
parse_options_usage(NULL, stat_options, "p", 1);
2776
parse_options_usage(NULL, stat_options, "t", 1);
2777
goto out;
2778
}
2779
}
2780
2781
/*
2782
* no_aggr, cgroup are for system-wide only
2783
* --per-thread is aggregated per thread, we dont mix it with cpu mode
2784
*/
2785
if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2786
stat_config.aggr_mode != AGGR_THREAD) ||
2787
(nr_cgroups || stat_config.cgroup_list)) &&
2788
!target__has_cpu(&target)) {
2789
fprintf(stderr, "both cgroup and no-aggregation "
2790
"modes only available in system-wide mode\n");
2791
2792
parse_options_usage(stat_usage, stat_options, "G", 1);
2793
parse_options_usage(NULL, stat_options, "A", 1);
2794
parse_options_usage(NULL, stat_options, "a", 1);
2795
parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2796
goto out;
2797
}
2798
2799
if (stat_config.iostat_run) {
2800
status = iostat_prepare(evsel_list, &stat_config);
2801
if (status)
2802
goto out;
2803
if (iostat_mode == IOSTAT_LIST) {
2804
iostat_list(evsel_list, &stat_config);
2805
goto out;
2806
} else if (verbose > 0)
2807
iostat_list(evsel_list, &stat_config);
2808
if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target))
2809
target.system_wide = true;
2810
}
2811
2812
if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
2813
target.per_thread = true;
2814
2815
stat_config.system_wide = target.system_wide;
2816
if (target.cpu_list) {
2817
stat_config.user_requested_cpu_list = strdup(target.cpu_list);
2818
if (!stat_config.user_requested_cpu_list) {
2819
status = -ENOMEM;
2820
goto out;
2821
}
2822
}
2823
2824
/*
2825
* Metric parsing needs to be delayed as metrics may optimize events
2826
* knowing the target is system-wide.
2827
*/
2828
if (metrics) {
2829
const char *pmu = parse_events_option_args.pmu_filter ?: "all";
2830
int ret = metricgroup__parse_groups(evsel_list, pmu, metrics,
2831
stat_config.metric_no_group,
2832
stat_config.metric_no_merge,
2833
stat_config.metric_no_threshold,
2834
stat_config.user_requested_cpu_list,
2835
stat_config.system_wide,
2836
stat_config.hardware_aware_grouping);
2837
2838
zfree(&metrics);
2839
if (ret) {
2840
status = ret;
2841
goto out;
2842
}
2843
}
2844
2845
if (add_default_events())
2846
goto out;
2847
2848
if (stat_config.cgroup_list) {
2849
if (nr_cgroups > 0) {
2850
pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2851
parse_options_usage(stat_usage, stat_options, "G", 1);
2852
parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2853
goto out;
2854
}
2855
2856
if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, true) < 0) {
2857
parse_options_usage(stat_usage, stat_options,
2858
"for-each-cgroup", 0);
2859
goto out;
2860
}
2861
}
2862
#ifdef HAVE_BPF_SKEL
2863
if (target.use_bpf && nr_cgroups &&
2864
(evsel_list->core.nr_entries / nr_cgroups) > BPERF_CGROUP__MAX_EVENTS) {
2865
pr_warning("Disabling BPF counters due to more events (%d) than the max (%d)\n",
2866
evsel_list->core.nr_entries / nr_cgroups, BPERF_CGROUP__MAX_EVENTS);
2867
target.use_bpf = false;
2868
}
2869
#endif // HAVE_BPF_SKEL
2870
evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
2871
2872
evlist__for_each_entry(evsel_list, counter) {
2873
/*
2874
* Setup BPF counters to require CPUs as any(-1) isn't
2875
* supported. evlist__create_maps below will propagate this
2876
* information to the evsels. Note, evsel__is_bperf isn't yet
2877
* set up, and this change must happen early, so directly use
2878
* the bpf_counter variable and target information.
2879
*/
2880
if ((counter->bpf_counter || target.use_bpf) && !target__has_cpu(&target))
2881
counter->core.requires_cpu = true;
2882
}
2883
2884
if (evlist__create_maps(evsel_list, &target) < 0) {
2885
if (target__has_task(&target)) {
2886
pr_err("Problems finding threads of monitor\n");
2887
parse_options_usage(stat_usage, stat_options, "p", 1);
2888
parse_options_usage(NULL, stat_options, "t", 1);
2889
} else if (target__has_cpu(&target)) {
2890
perror("failed to parse CPUs map");
2891
parse_options_usage(stat_usage, stat_options, "C", 1);
2892
parse_options_usage(NULL, stat_options, "a", 1);
2893
}
2894
goto out;
2895
}
2896
2897
evlist__check_cpu_maps(evsel_list);
2898
2899
/*
2900
* Initialize thread_map with comm names,
2901
* so we could print it out on output.
2902
*/
2903
if (stat_config.aggr_mode == AGGR_THREAD) {
2904
thread_map__read_comms(evsel_list->core.threads);
2905
}
2906
2907
if (stat_config.aggr_mode == AGGR_NODE)
2908
cpu__setup_cpunode_map();
2909
2910
if (stat_config.times && interval)
2911
interval_count = true;
2912
else if (stat_config.times && !interval) {
2913
pr_err("interval-count option should be used together with "
2914
"interval-print.\n");
2915
parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2916
parse_options_usage(stat_usage, stat_options, "I", 1);
2917
goto out;
2918
}
2919
2920
if (timeout && timeout < 100) {
2921
if (timeout < 10) {
2922
pr_err("timeout must be >= 10ms.\n");
2923
parse_options_usage(stat_usage, stat_options, "timeout", 0);
2924
goto out;
2925
} else
2926
pr_warning("timeout < 100ms. "
2927
"The overhead percentage could be high in some cases. "
2928
"Please proceed with caution.\n");
2929
}
2930
if (timeout && interval) {
2931
pr_err("timeout option is not supported with interval-print.\n");
2932
parse_options_usage(stat_usage, stat_options, "timeout", 0);
2933
parse_options_usage(stat_usage, stat_options, "I", 1);
2934
goto out;
2935
}
2936
2937
if (perf_stat_init_aggr_mode())
2938
goto out;
2939
2940
if (evlist__alloc_stats(&stat_config, evsel_list, interval))
2941
goto out;
2942
2943
/*
2944
* Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2945
* while avoiding that older tools show confusing messages.
2946
*
2947
* However for pipe sessions we need to keep it zero,
2948
* because script's perf_evsel__check_attr is triggered
2949
* by attr->sample_type != 0, and we can't run it on
2950
* stat sessions.
2951
*/
2952
stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2953
2954
/*
2955
* We dont want to block the signals - that would cause
2956
* child tasks to inherit that and Ctrl-C would not work.
2957
* What we want is for Ctrl-C to work in the exec()-ed
2958
* task, but being ignored by perf stat itself:
2959
*/
2960
atexit(sig_atexit);
2961
if (!forever)
2962
signal(SIGINT, skip_signal);
2963
signal(SIGCHLD, skip_signal);
2964
signal(SIGALRM, skip_signal);
2965
signal(SIGABRT, skip_signal);
2966
2967
if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
2968
goto out;
2969
2970
/* Enable ignoring missing threads when -p option is defined. */
2971
evlist__first(evsel_list)->ignore_missing_thread = target.pid;
2972
status = 0;
2973
for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2974
if (stat_config.run_count != 1 && verbose > 0)
2975
fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2976
run_idx + 1);
2977
2978
if (run_idx != 0)
2979
evlist__reset_prev_raw_counts(evsel_list);
2980
2981
status = run_perf_stat(argc, argv, run_idx);
2982
if (status < 0)
2983
break;
2984
2985
if (forever && !interval) {
2986
print_counters(NULL, argc, argv);
2987
perf_stat__reset_stats();
2988
}
2989
}
2990
2991
if (!forever && status != -1 && (!interval || stat_config.summary)) {
2992
if (stat_config.run_count > 1)
2993
evlist__copy_res_stats(&stat_config, evsel_list);
2994
print_counters(NULL, argc, argv);
2995
}
2996
2997
evlist__finalize_ctlfd(evsel_list);
2998
2999
if (STAT_RECORD) {
3000
/*
3001
* We synthesize the kernel mmap record just so that older tools
3002
* don't emit warnings about not being able to resolve symbols
3003
* due to /proc/sys/kernel/kptr_restrict settings and instead provide
3004
* a saner message about no samples being in the perf.data file.
3005
*
3006
* This also serves to suppress a warning about f_header.data.size == 0
3007
* in header.c at the moment 'perf stat record' gets introduced, which
3008
* is not really needed once we start adding the stat specific PERF_RECORD_
3009
* records, but the need to suppress the kptr_restrict messages in older
3010
* tools remain -acme
3011
*/
3012
int fd = perf_data__fd(&perf_stat.data);
3013
3014
err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
3015
process_synthesized_event,
3016
&perf_stat.session->machines.host);
3017
if (err) {
3018
pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
3019
"older tools may produce warnings about this file\n.");
3020
}
3021
3022
if (!interval) {
3023
if (WRITE_STAT_ROUND_EVENT(stat_config.walltime_nsecs_stats->max, FINAL))
3024
pr_err("failed to write stat round event\n");
3025
}
3026
3027
if (!perf_stat.data.is_pipe) {
3028
perf_stat.session->header.data_size += perf_stat.bytes_written;
3029
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
3030
}
3031
3032
evlist__close(evsel_list);
3033
perf_session__delete(perf_stat.session);
3034
}
3035
3036
perf_stat__exit_aggr_mode();
3037
evlist__free_stats(evsel_list);
3038
out:
3039
if (stat_config.iostat_run)
3040
iostat_release(evsel_list);
3041
3042
zfree(&stat_config.walltime_run);
3043
zfree(&stat_config.user_requested_cpu_list);
3044
3045
if (smi_cost && smi_reset)
3046
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
3047
3048
evlist__delete(evsel_list);
3049
3050
evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
3051
3052
/* Only the low byte of status becomes the exit code. */
3053
return abs(status);
3054
}
3055
3056