Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/builtin-record.c
49600 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* builtin-record.c
4
*
5
* Builtin record command: Record the profile of a workload
6
* (or a CPU, or a PID) into the perf.data output file - for
7
* later analysis via perf report.
8
*/
9
#include "builtin.h"
10
11
#include "util/build-id.h"
12
#include <subcmd/parse-options.h>
13
#include <internal/xyarray.h>
14
#include "util/parse-events.h"
15
#include "util/config.h"
16
17
#include "util/callchain.h"
18
#include "util/cgroup.h"
19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/mmap.h"
25
#include "util/mutex.h"
26
#include "util/target.h"
27
#include "util/session.h"
28
#include "util/tool.h"
29
#include "util/stat.h"
30
#include "util/symbol.h"
31
#include "util/record.h"
32
#include "util/cpumap.h"
33
#include "util/thread_map.h"
34
#include "util/data.h"
35
#include "util/perf_regs.h"
36
#include "util/auxtrace.h"
37
#include "util/tsc.h"
38
#include "util/parse-branch-options.h"
39
#include "util/parse-regs-options.h"
40
#include "util/perf_api_probe.h"
41
#include "util/trigger.h"
42
#include "util/perf-hooks.h"
43
#include "util/cpu-set-sched.h"
44
#include "util/synthetic-events.h"
45
#include "util/time-utils.h"
46
#include "util/units.h"
47
#include "util/bpf-event.h"
48
#include "util/util.h"
49
#include "util/pfm.h"
50
#include "util/pmu.h"
51
#include "util/pmus.h"
52
#include "util/clockid.h"
53
#include "util/off_cpu.h"
54
#include "util/bpf-filter.h"
55
#include "util/strbuf.h"
56
#include "asm/bug.h"
57
#include "perf.h"
58
#include "cputopo.h"
59
60
#include <errno.h>
61
#include <inttypes.h>
62
#include <locale.h>
63
#include <poll.h>
64
#include <pthread.h>
65
#include <unistd.h>
66
#ifndef HAVE_GETTID
67
#include <syscall.h>
68
#endif
69
#include <sched.h>
70
#include <signal.h>
71
#ifdef HAVE_EVENTFD_SUPPORT
72
#include <sys/eventfd.h>
73
#endif
74
#include <sys/mman.h>
75
#include <sys/wait.h>
76
#include <sys/types.h>
77
#include <sys/stat.h>
78
#include <fcntl.h>
79
#include <linux/err.h>
80
#include <linux/string.h>
81
#include <linux/time64.h>
82
#include <linux/zalloc.h>
83
#include <linux/bitmap.h>
84
#include <sys/time.h>
85
86
struct switch_output {
87
bool enabled;
88
bool signal;
89
unsigned long size;
90
unsigned long time;
91
const char *str;
92
bool set;
93
char **filenames;
94
int num_files;
95
int cur_file;
96
};
97
98
struct thread_mask {
99
struct mmap_cpu_mask maps;
100
struct mmap_cpu_mask affinity;
101
};
102
103
struct record_thread {
104
pid_t tid;
105
struct thread_mask *mask;
106
struct {
107
int msg[2];
108
int ack[2];
109
} pipes;
110
struct fdarray pollfd;
111
int ctlfd_pos;
112
int nr_mmaps;
113
struct mmap **maps;
114
struct mmap **overwrite_maps;
115
struct record *rec;
116
unsigned long long samples;
117
unsigned long waking;
118
u64 bytes_written;
119
u64 bytes_transferred;
120
u64 bytes_compressed;
121
};
122
123
static __thread struct record_thread *thread;
124
125
enum thread_msg {
126
THREAD_MSG__UNDEFINED = 0,
127
THREAD_MSG__READY,
128
THREAD_MSG__MAX,
129
};
130
131
static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132
"UNDEFINED", "READY"
133
};
134
135
enum thread_spec {
136
THREAD_SPEC__UNDEFINED = 0,
137
THREAD_SPEC__CPU,
138
THREAD_SPEC__CORE,
139
THREAD_SPEC__PACKAGE,
140
THREAD_SPEC__NUMA,
141
THREAD_SPEC__USER,
142
THREAD_SPEC__MAX,
143
};
144
145
static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146
"undefined", "cpu", "core", "package", "numa", "user"
147
};
148
149
struct pollfd_index_map {
150
int evlist_pollfd_index;
151
int thread_pollfd_index;
152
};
153
154
struct record {
155
struct perf_tool tool;
156
struct record_opts opts;
157
u64 bytes_written;
158
u64 thread_bytes_written;
159
struct perf_data data;
160
struct auxtrace_record *itr;
161
struct evlist *evlist;
162
struct perf_session *session;
163
struct evlist *sb_evlist;
164
pthread_t thread_id;
165
int realtime_prio;
166
bool latency;
167
bool switch_output_event_set;
168
bool no_buildid;
169
bool no_buildid_set;
170
bool no_buildid_cache;
171
bool no_buildid_cache_set;
172
bool buildid_all;
173
bool buildid_mmap;
174
bool buildid_mmap_set;
175
bool timestamp_filename;
176
bool timestamp_boundary;
177
bool off_cpu;
178
const char *filter_action;
179
const char *uid_str;
180
struct switch_output switch_output;
181
unsigned long long samples;
182
unsigned long output_max_size; /* = 0: unlimited */
183
struct perf_debuginfod debuginfod;
184
int nr_threads;
185
struct thread_mask *thread_masks;
186
struct record_thread *thread_data;
187
struct pollfd_index_map *index_map;
188
size_t index_map_sz;
189
size_t index_map_cnt;
190
};
191
192
static volatile int done;
193
194
static volatile int auxtrace_record__snapshot_started;
195
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196
static DEFINE_TRIGGER(switch_output_trigger);
197
198
static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199
"SYS", "NODE", "CPU"
200
};
201
202
static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203
struct perf_sample *sample, struct machine *machine);
204
static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205
struct perf_sample *sample, struct machine *machine);
206
static int process_timestamp_boundary(const struct perf_tool *tool,
207
union perf_event *event,
208
struct perf_sample *sample,
209
struct machine *machine);
210
211
#ifndef HAVE_GETTID
212
static inline pid_t gettid(void)
213
{
214
return (pid_t)syscall(__NR_gettid);
215
}
216
#endif
217
218
static int record__threads_enabled(struct record *rec)
219
{
220
return rec->opts.threads_spec;
221
}
222
223
static bool switch_output_signal(struct record *rec)
224
{
225
return rec->switch_output.signal &&
226
trigger_is_ready(&switch_output_trigger);
227
}
228
229
static bool switch_output_size(struct record *rec)
230
{
231
return rec->switch_output.size &&
232
trigger_is_ready(&switch_output_trigger) &&
233
(rec->bytes_written >= rec->switch_output.size);
234
}
235
236
static bool switch_output_time(struct record *rec)
237
{
238
return rec->switch_output.time &&
239
trigger_is_ready(&switch_output_trigger);
240
}
241
242
static u64 record__bytes_written(struct record *rec)
243
{
244
return rec->bytes_written + rec->thread_bytes_written;
245
}
246
247
static bool record__output_max_size_exceeded(struct record *rec)
248
{
249
return rec->output_max_size &&
250
(record__bytes_written(rec) >= rec->output_max_size);
251
}
252
253
static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254
void *bf, size_t size)
255
{
256
struct perf_data_file *file = &rec->session->data->file;
257
258
if (map && map->file)
259
file = map->file;
260
261
if (perf_data_file__write(file, bf, size) < 0) {
262
pr_err("failed to write perf data, error: %m\n");
263
return -1;
264
}
265
266
if (map && map->file) {
267
thread->bytes_written += size;
268
rec->thread_bytes_written += size;
269
} else {
270
rec->bytes_written += size;
271
}
272
273
if (record__output_max_size_exceeded(rec) && !done) {
274
fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275
" stopping session ]\n",
276
record__bytes_written(rec) >> 10);
277
done = 1;
278
}
279
280
if (switch_output_size(rec))
281
trigger_hit(&switch_output_trigger);
282
283
return 0;
284
}
285
286
static int record__aio_enabled(struct record *rec);
287
static int record__comp_enabled(struct record *rec);
288
static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289
void *dst, size_t dst_size, void *src, size_t src_size);
290
291
#ifdef HAVE_AIO_SUPPORT
292
static int record__aio_write(struct aiocb *cblock, int trace_fd,
293
void *buf, size_t size, off_t off)
294
{
295
int rc;
296
297
cblock->aio_fildes = trace_fd;
298
cblock->aio_buf = buf;
299
cblock->aio_nbytes = size;
300
cblock->aio_offset = off;
301
cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302
303
do {
304
rc = aio_write(cblock);
305
if (rc == 0) {
306
break;
307
} else if (errno != EAGAIN) {
308
cblock->aio_fildes = -1;
309
pr_err("failed to queue perf data, error: %m\n");
310
break;
311
}
312
} while (1);
313
314
return rc;
315
}
316
317
static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318
{
319
void *rem_buf;
320
off_t rem_off;
321
size_t rem_size;
322
int rc, aio_errno;
323
ssize_t aio_ret, written;
324
325
aio_errno = aio_error(cblock);
326
if (aio_errno == EINPROGRESS)
327
return 0;
328
329
written = aio_ret = aio_return(cblock);
330
if (aio_ret < 0) {
331
if (aio_errno != EINTR)
332
pr_err("failed to write perf data, error: %m\n");
333
written = 0;
334
}
335
336
rem_size = cblock->aio_nbytes - written;
337
338
if (rem_size == 0) {
339
cblock->aio_fildes = -1;
340
/*
341
* md->refcount is incremented in record__aio_pushfn() for
342
* every aio write request started in record__aio_push() so
343
* decrement it because the request is now complete.
344
*/
345
perf_mmap__put(&md->core);
346
rc = 1;
347
} else {
348
/*
349
* aio write request may require restart with the
350
* remainder if the kernel didn't write whole
351
* chunk at once.
352
*/
353
rem_off = cblock->aio_offset + written;
354
rem_buf = (void *)(cblock->aio_buf + written);
355
record__aio_write(cblock, cblock->aio_fildes,
356
rem_buf, rem_size, rem_off);
357
rc = 0;
358
}
359
360
return rc;
361
}
362
363
static int record__aio_sync(struct mmap *md, bool sync_all)
364
{
365
struct aiocb **aiocb = md->aio.aiocb;
366
struct aiocb *cblocks = md->aio.cblocks;
367
struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
368
int i, do_suspend;
369
370
do {
371
do_suspend = 0;
372
for (i = 0; i < md->aio.nr_cblocks; ++i) {
373
if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374
if (sync_all)
375
aiocb[i] = NULL;
376
else
377
return i;
378
} else {
379
/*
380
* Started aio write is not complete yet
381
* so it has to be waited before the
382
* next allocation.
383
*/
384
aiocb[i] = &cblocks[i];
385
do_suspend = 1;
386
}
387
}
388
if (!do_suspend)
389
return -1;
390
391
while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392
if (!(errno == EAGAIN || errno == EINTR))
393
pr_err("failed to sync perf data, error: %m\n");
394
}
395
} while (1);
396
}
397
398
struct record_aio {
399
struct record *rec;
400
void *data;
401
size_t size;
402
};
403
404
static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405
{
406
struct record_aio *aio = to;
407
408
/*
409
* map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410
* to release space in the kernel buffer as fast as possible, calling
411
* perf_mmap__consume() from perf_mmap__push() function.
412
*
413
* That lets the kernel to proceed with storing more profiling data into
414
* the kernel buffer earlier than other per-cpu kernel buffers are handled.
415
*
416
* Coping can be done in two steps in case the chunk of profiling data
417
* crosses the upper bound of the kernel buffer. In this case we first move
418
* part of data from map->start till the upper bound and then the remainder
419
* from the beginning of the kernel buffer till the end of the data chunk.
420
*/
421
422
if (record__comp_enabled(aio->rec)) {
423
ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424
mmap__mmap_len(map) - aio->size,
425
buf, size);
426
if (compressed < 0)
427
return (int)compressed;
428
429
size = compressed;
430
} else {
431
memcpy(aio->data + aio->size, buf, size);
432
}
433
434
if (!aio->size) {
435
/*
436
* Increment map->refcount to guard map->aio.data[] buffer
437
* from premature deallocation because map object can be
438
* released earlier than aio write request started on
439
* map->aio.data[] buffer is complete.
440
*
441
* perf_mmap__put() is done at record__aio_complete()
442
* after started aio request completion or at record__aio_push()
443
* if the request failed to start.
444
*/
445
perf_mmap__get(&map->core);
446
}
447
448
aio->size += size;
449
450
return size;
451
}
452
453
static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454
{
455
int ret, idx;
456
int trace_fd = rec->session->data->file.fd;
457
struct record_aio aio = { .rec = rec, .size = 0 };
458
459
/*
460
* Call record__aio_sync() to wait till map->aio.data[] buffer
461
* becomes available after previous aio write operation.
462
*/
463
464
idx = record__aio_sync(map, false);
465
aio.data = map->aio.data[idx];
466
ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467
if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468
return ret;
469
470
rec->samples++;
471
ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472
if (!ret) {
473
*off += aio.size;
474
rec->bytes_written += aio.size;
475
if (switch_output_size(rec))
476
trigger_hit(&switch_output_trigger);
477
} else {
478
/*
479
* Decrement map->refcount incremented in record__aio_pushfn()
480
* back if record__aio_write() operation failed to start, otherwise
481
* map->refcount is decremented in record__aio_complete() after
482
* aio write operation finishes successfully.
483
*/
484
perf_mmap__put(&map->core);
485
}
486
487
return ret;
488
}
489
490
static off_t record__aio_get_pos(int trace_fd)
491
{
492
return lseek(trace_fd, 0, SEEK_CUR);
493
}
494
495
static void record__aio_set_pos(int trace_fd, off_t pos)
496
{
497
lseek(trace_fd, pos, SEEK_SET);
498
}
499
500
static void record__aio_mmap_read_sync(struct record *rec)
501
{
502
int i;
503
struct evlist *evlist = rec->evlist;
504
struct mmap *maps = evlist->mmap;
505
506
if (!record__aio_enabled(rec))
507
return;
508
509
for (i = 0; i < evlist->core.nr_mmaps; i++) {
510
struct mmap *map = &maps[i];
511
512
if (map->core.base)
513
record__aio_sync(map, true);
514
}
515
}
516
517
static int nr_cblocks_default = 1;
518
static int nr_cblocks_max = 4;
519
520
static int record__aio_parse(const struct option *opt,
521
const char *str,
522
int unset)
523
{
524
struct record_opts *opts = (struct record_opts *)opt->value;
525
526
if (unset) {
527
opts->nr_cblocks = 0;
528
} else {
529
if (str)
530
opts->nr_cblocks = strtol(str, NULL, 0);
531
if (!opts->nr_cblocks)
532
opts->nr_cblocks = nr_cblocks_default;
533
}
534
535
return 0;
536
}
537
#else /* HAVE_AIO_SUPPORT */
538
static int nr_cblocks_max = 0;
539
540
static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541
off_t *off __maybe_unused)
542
{
543
return -1;
544
}
545
546
static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547
{
548
return -1;
549
}
550
551
static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552
{
553
}
554
555
static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556
{
557
}
558
#endif
559
560
static int record__aio_enabled(struct record *rec)
561
{
562
return rec->opts.nr_cblocks > 0;
563
}
564
565
#define MMAP_FLUSH_DEFAULT 1
566
static int record__mmap_flush_parse(const struct option *opt,
567
const char *str,
568
int unset)
569
{
570
int flush_max;
571
struct record_opts *opts = (struct record_opts *)opt->value;
572
static struct parse_tag tags[] = {
573
{ .tag = 'B', .mult = 1 },
574
{ .tag = 'K', .mult = 1 << 10 },
575
{ .tag = 'M', .mult = 1 << 20 },
576
{ .tag = 'G', .mult = 1 << 30 },
577
{ .tag = 0 },
578
};
579
580
if (unset)
581
return 0;
582
583
if (str) {
584
opts->mmap_flush = parse_tag_value(str, tags);
585
if (opts->mmap_flush == (int)-1)
586
opts->mmap_flush = strtol(str, NULL, 0);
587
}
588
589
if (!opts->mmap_flush)
590
opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591
592
flush_max = evlist__mmap_size(opts->mmap_pages);
593
flush_max /= 4;
594
if (opts->mmap_flush > flush_max)
595
opts->mmap_flush = flush_max;
596
597
return 0;
598
}
599
600
#ifdef HAVE_ZSTD_SUPPORT
601
static unsigned int comp_level_default = 1;
602
603
static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604
{
605
struct record_opts *opts = opt->value;
606
607
if (unset) {
608
opts->comp_level = 0;
609
} else {
610
if (str)
611
opts->comp_level = strtol(str, NULL, 0);
612
if (!opts->comp_level)
613
opts->comp_level = comp_level_default;
614
}
615
616
return 0;
617
}
618
#endif
619
static unsigned int comp_level_max = 22;
620
621
static int record__comp_enabled(struct record *rec)
622
{
623
return rec->opts.comp_level > 0;
624
}
625
626
static int process_synthesized_event(const struct perf_tool *tool,
627
union perf_event *event,
628
struct perf_sample *sample __maybe_unused,
629
struct machine *machine __maybe_unused)
630
{
631
struct record *rec = container_of(tool, struct record, tool);
632
return record__write(rec, NULL, event, event->header.size);
633
}
634
635
static struct mutex synth_lock;
636
637
static int process_locked_synthesized_event(const struct perf_tool *tool,
638
union perf_event *event,
639
struct perf_sample *sample __maybe_unused,
640
struct machine *machine __maybe_unused)
641
{
642
int ret;
643
644
mutex_lock(&synth_lock);
645
ret = process_synthesized_event(tool, event, sample, machine);
646
mutex_unlock(&synth_lock);
647
return ret;
648
}
649
650
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651
{
652
struct record *rec = to;
653
654
if (record__comp_enabled(rec)) {
655
struct perf_record_compressed2 *event = map->data;
656
size_t padding = 0;
657
u8 pad[8] = {0};
658
ssize_t compressed = zstd_compress(rec->session, map, map->data,
659
mmap__mmap_len(map), bf, size);
660
661
if (compressed < 0)
662
return (int)compressed;
663
664
bf = event;
665
thread->samples++;
666
667
/*
668
* The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669
* error. We make it aligned here.
670
*/
671
event->data_size = compressed - sizeof(struct perf_record_compressed2);
672
event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673
padding = event->header.size - compressed;
674
return record__write(rec, map, bf, compressed) ||
675
record__write(rec, map, &pad, padding);
676
}
677
678
thread->samples++;
679
return record__write(rec, map, bf, size);
680
}
681
682
static volatile sig_atomic_t signr = -1;
683
static volatile sig_atomic_t child_finished;
684
#ifdef HAVE_EVENTFD_SUPPORT
685
static volatile sig_atomic_t done_fd = -1;
686
#endif
687
688
static void sig_handler(int sig)
689
{
690
if (sig == SIGCHLD)
691
child_finished = 1;
692
else
693
signr = sig;
694
695
done = 1;
696
#ifdef HAVE_EVENTFD_SUPPORT
697
if (done_fd >= 0) {
698
u64 tmp = 1;
699
int orig_errno = errno;
700
701
/*
702
* It is possible for this signal handler to run after done is
703
* checked in the main loop, but before the perf counter fds are
704
* polled. If this happens, the poll() will continue to wait
705
* even though done is set, and will only break out if either
706
* another signal is received, or the counters are ready for
707
* read. To ensure the poll() doesn't sleep when done is set,
708
* use an eventfd (done_fd) to wake up the poll().
709
*/
710
if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711
pr_err("failed to signal wakeup fd, error: %m\n");
712
713
errno = orig_errno;
714
}
715
#endif // HAVE_EVENTFD_SUPPORT
716
}
717
718
static void sigsegv_handler(int sig)
719
{
720
perf_hooks__recover();
721
sighandler_dump_stack(sig);
722
}
723
724
static void record__sig_exit(void)
725
{
726
if (signr == -1)
727
return;
728
729
signal(signr, SIG_DFL);
730
raise(signr);
731
}
732
733
static int record__process_auxtrace(const struct perf_tool *tool,
734
struct mmap *map,
735
union perf_event *event, void *data1,
736
size_t len1, void *data2, size_t len2)
737
{
738
struct record *rec = container_of(tool, struct record, tool);
739
struct perf_data *data = &rec->data;
740
size_t padding;
741
u8 pad[8] = {0};
742
743
if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
744
off_t file_offset;
745
int fd = perf_data__fd(data);
746
int err;
747
748
file_offset = lseek(fd, 0, SEEK_CUR);
749
if (file_offset == -1)
750
return -1;
751
err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
752
event, file_offset);
753
if (err)
754
return err;
755
}
756
757
/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
758
padding = (len1 + len2) & 7;
759
if (padding)
760
padding = 8 - padding;
761
762
record__write(rec, map, event, event->header.size);
763
record__write(rec, map, data1, len1);
764
if (len2)
765
record__write(rec, map, data2, len2);
766
record__write(rec, map, &pad, padding);
767
768
return 0;
769
}
770
771
static int record__auxtrace_mmap_read(struct record *rec,
772
struct mmap *map)
773
{
774
int ret;
775
776
ret = auxtrace_mmap__read(map, rec->itr,
777
perf_session__env(rec->session),
778
&rec->tool,
779
record__process_auxtrace);
780
if (ret < 0)
781
return ret;
782
783
if (ret)
784
rec->samples++;
785
786
return 0;
787
}
788
789
static int record__auxtrace_mmap_read_snapshot(struct record *rec,
790
struct mmap *map)
791
{
792
int ret;
793
794
ret = auxtrace_mmap__read_snapshot(map, rec->itr,
795
perf_session__env(rec->session),
796
&rec->tool,
797
record__process_auxtrace,
798
rec->opts.auxtrace_snapshot_size);
799
if (ret < 0)
800
return ret;
801
802
if (ret)
803
rec->samples++;
804
805
return 0;
806
}
807
808
static int record__auxtrace_read_snapshot_all(struct record *rec)
809
{
810
int i;
811
int rc = 0;
812
813
for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
814
struct mmap *map = &rec->evlist->mmap[i];
815
816
if (!map->auxtrace_mmap.base)
817
continue;
818
819
if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
820
rc = -1;
821
goto out;
822
}
823
}
824
out:
825
return rc;
826
}
827
828
static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
829
{
830
pr_debug("Recording AUX area tracing snapshot\n");
831
if (record__auxtrace_read_snapshot_all(rec) < 0) {
832
trigger_error(&auxtrace_snapshot_trigger);
833
} else {
834
if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
835
trigger_error(&auxtrace_snapshot_trigger);
836
else
837
trigger_ready(&auxtrace_snapshot_trigger);
838
}
839
}
840
841
static int record__auxtrace_snapshot_exit(struct record *rec)
842
{
843
if (trigger_is_error(&auxtrace_snapshot_trigger))
844
return 0;
845
846
if (!auxtrace_record__snapshot_started &&
847
auxtrace_record__snapshot_start(rec->itr))
848
return -1;
849
850
record__read_auxtrace_snapshot(rec, true);
851
if (trigger_is_error(&auxtrace_snapshot_trigger))
852
return -1;
853
854
return 0;
855
}
856
857
static int record__auxtrace_init(struct record *rec)
858
{
859
int err;
860
861
if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
862
&& record__threads_enabled(rec)) {
863
pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
864
return -EINVAL;
865
}
866
867
if (!rec->itr) {
868
rec->itr = auxtrace_record__init(rec->evlist, &err);
869
if (err)
870
return err;
871
}
872
873
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
874
rec->opts.auxtrace_snapshot_opts);
875
if (err)
876
return err;
877
878
err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
879
rec->opts.auxtrace_sample_opts);
880
if (err)
881
return err;
882
883
err = auxtrace_parse_aux_action(rec->evlist);
884
if (err)
885
return err;
886
887
return auxtrace_parse_filters(rec->evlist);
888
}
889
890
static int record__config_text_poke(struct evlist *evlist)
891
{
892
struct evsel *evsel;
893
894
/* Nothing to do if text poke is already configured */
895
evlist__for_each_entry(evlist, evsel) {
896
if (evsel->core.attr.text_poke)
897
return 0;
898
}
899
900
evsel = evlist__add_dummy_on_all_cpus(evlist);
901
if (!evsel)
902
return -ENOMEM;
903
904
evsel->core.attr.text_poke = 1;
905
evsel->core.attr.ksymbol = 1;
906
evsel->immediate = true;
907
evsel__set_sample_bit(evsel, TIME);
908
909
return 0;
910
}
911
912
static int record__config_off_cpu(struct record *rec)
913
{
914
return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
915
}
916
917
static bool record__tracking_system_wide(struct record *rec)
918
{
919
struct evlist *evlist = rec->evlist;
920
struct evsel *evsel;
921
922
/*
923
* If non-dummy evsel exists, system_wide sideband is need to
924
* help parse sample information.
925
* For example, PERF_EVENT_MMAP event to help parse symbol,
926
* and PERF_EVENT_COMM event to help parse task executable name.
927
*/
928
evlist__for_each_entry(evlist, evsel) {
929
if (!evsel__is_dummy_event(evsel))
930
return true;
931
}
932
933
return false;
934
}
935
936
static int record__config_tracking_events(struct record *rec)
937
{
938
struct record_opts *opts = &rec->opts;
939
struct evlist *evlist = rec->evlist;
940
bool system_wide = false;
941
struct evsel *evsel;
942
943
/*
944
* For initial_delay, system wide or a hybrid system, we need to add
945
* tracking event so that we can track PERF_RECORD_MMAP to cover the
946
* delay of waiting or event synthesis.
947
*/
948
if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
949
perf_pmus__num_core_pmus() > 1) {
950
/*
951
* User space tasks can migrate between CPUs, so when tracing
952
* selected CPUs, sideband for all CPUs is still needed.
953
*/
954
if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
955
system_wide = true;
956
957
evsel = evlist__findnew_tracking_event(evlist, system_wide);
958
if (!evsel)
959
return -ENOMEM;
960
961
/*
962
* Enable the tracking event when the process is forked for
963
* initial_delay, immediately for system wide.
964
*/
965
if (opts->target.initial_delay && !evsel->immediate &&
966
!target__has_cpu(&opts->target))
967
evsel->core.attr.enable_on_exec = 1;
968
else
969
evsel->immediate = 1;
970
}
971
972
return 0;
973
}
974
975
static bool record__kcore_readable(struct machine *machine)
976
{
977
char kcore[PATH_MAX];
978
int fd;
979
980
scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
981
982
fd = open(kcore, O_RDONLY);
983
if (fd < 0)
984
return false;
985
986
close(fd);
987
988
return true;
989
}
990
991
static int record__kcore_copy(struct machine *machine, struct perf_data *data)
992
{
993
char from_dir[PATH_MAX];
994
char kcore_dir[PATH_MAX];
995
int ret;
996
997
snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
998
999
ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1000
if (ret)
1001
return ret;
1002
1003
return kcore_copy(from_dir, kcore_dir);
1004
}
1005
1006
static void record__thread_data_init_pipes(struct record_thread *thread_data)
1007
{
1008
thread_data->pipes.msg[0] = -1;
1009
thread_data->pipes.msg[1] = -1;
1010
thread_data->pipes.ack[0] = -1;
1011
thread_data->pipes.ack[1] = -1;
1012
}
1013
1014
static int record__thread_data_open_pipes(struct record_thread *thread_data)
1015
{
1016
if (pipe(thread_data->pipes.msg))
1017
return -EINVAL;
1018
1019
if (pipe(thread_data->pipes.ack)) {
1020
close(thread_data->pipes.msg[0]);
1021
thread_data->pipes.msg[0] = -1;
1022
close(thread_data->pipes.msg[1]);
1023
thread_data->pipes.msg[1] = -1;
1024
return -EINVAL;
1025
}
1026
1027
pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1028
thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1029
thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1030
1031
return 0;
1032
}
1033
1034
static void record__thread_data_close_pipes(struct record_thread *thread_data)
1035
{
1036
if (thread_data->pipes.msg[0] != -1) {
1037
close(thread_data->pipes.msg[0]);
1038
thread_data->pipes.msg[0] = -1;
1039
}
1040
if (thread_data->pipes.msg[1] != -1) {
1041
close(thread_data->pipes.msg[1]);
1042
thread_data->pipes.msg[1] = -1;
1043
}
1044
if (thread_data->pipes.ack[0] != -1) {
1045
close(thread_data->pipes.ack[0]);
1046
thread_data->pipes.ack[0] = -1;
1047
}
1048
if (thread_data->pipes.ack[1] != -1) {
1049
close(thread_data->pipes.ack[1]);
1050
thread_data->pipes.ack[1] = -1;
1051
}
1052
}
1053
1054
static bool evlist__per_thread(struct evlist *evlist)
1055
{
1056
return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1057
}
1058
1059
static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1060
{
1061
int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1062
struct mmap *mmap = evlist->mmap;
1063
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1064
struct perf_cpu_map *cpus = evlist->core.all_cpus;
1065
bool per_thread = evlist__per_thread(evlist);
1066
1067
if (per_thread)
1068
thread_data->nr_mmaps = nr_mmaps;
1069
else
1070
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1071
thread_data->mask->maps.nbits);
1072
if (mmap) {
1073
thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1074
if (!thread_data->maps)
1075
return -ENOMEM;
1076
}
1077
if (overwrite_mmap) {
1078
thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1079
if (!thread_data->overwrite_maps) {
1080
zfree(&thread_data->maps);
1081
return -ENOMEM;
1082
}
1083
}
1084
pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1085
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1086
1087
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1088
if (per_thread ||
1089
test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1090
if (thread_data->maps) {
1091
thread_data->maps[tm] = &mmap[m];
1092
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1093
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1094
}
1095
if (thread_data->overwrite_maps) {
1096
thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1097
pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1098
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1099
}
1100
tm++;
1101
}
1102
}
1103
1104
return 0;
1105
}
1106
1107
static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1108
{
1109
int f, tm, pos;
1110
struct mmap *map, *overwrite_map;
1111
1112
fdarray__init(&thread_data->pollfd, 64);
1113
1114
for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1115
map = thread_data->maps ? thread_data->maps[tm] : NULL;
1116
overwrite_map = thread_data->overwrite_maps ?
1117
thread_data->overwrite_maps[tm] : NULL;
1118
1119
for (f = 0; f < evlist->core.pollfd.nr; f++) {
1120
void *ptr = evlist->core.pollfd.priv[f].ptr;
1121
1122
if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1123
pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1124
&evlist->core.pollfd);
1125
if (pos < 0)
1126
return pos;
1127
pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1128
thread_data, pos, evlist->core.pollfd.entries[f].fd);
1129
}
1130
}
1131
}
1132
1133
return 0;
1134
}
1135
1136
static void record__free_thread_data(struct record *rec)
1137
{
1138
int t;
1139
struct record_thread *thread_data = rec->thread_data;
1140
1141
if (thread_data == NULL)
1142
return;
1143
1144
for (t = 0; t < rec->nr_threads; t++) {
1145
record__thread_data_close_pipes(&thread_data[t]);
1146
zfree(&thread_data[t].maps);
1147
zfree(&thread_data[t].overwrite_maps);
1148
fdarray__exit(&thread_data[t].pollfd);
1149
}
1150
1151
zfree(&rec->thread_data);
1152
}
1153
1154
static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1155
int evlist_pollfd_index,
1156
int thread_pollfd_index)
1157
{
1158
size_t x = rec->index_map_cnt;
1159
1160
if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1161
return -ENOMEM;
1162
rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1163
rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1164
rec->index_map_cnt += 1;
1165
return 0;
1166
}
1167
1168
static int record__update_evlist_pollfd_from_thread(struct record *rec,
1169
struct evlist *evlist,
1170
struct record_thread *thread_data)
1171
{
1172
struct pollfd *e_entries = evlist->core.pollfd.entries;
1173
struct pollfd *t_entries = thread_data->pollfd.entries;
1174
int err = 0;
1175
size_t i;
1176
1177
for (i = 0; i < rec->index_map_cnt; i++) {
1178
int e_pos = rec->index_map[i].evlist_pollfd_index;
1179
int t_pos = rec->index_map[i].thread_pollfd_index;
1180
1181
if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1182
e_entries[e_pos].events != t_entries[t_pos].events) {
1183
pr_err("Thread and evlist pollfd index mismatch\n");
1184
err = -EINVAL;
1185
continue;
1186
}
1187
e_entries[e_pos].revents = t_entries[t_pos].revents;
1188
}
1189
return err;
1190
}
1191
1192
static int record__dup_non_perf_events(struct record *rec,
1193
struct evlist *evlist,
1194
struct record_thread *thread_data)
1195
{
1196
struct fdarray *fda = &evlist->core.pollfd;
1197
int i, ret;
1198
1199
for (i = 0; i < fda->nr; i++) {
1200
if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1201
continue;
1202
ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1203
if (ret < 0) {
1204
pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1205
return ret;
1206
}
1207
pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1208
thread_data, ret, fda->entries[i].fd);
1209
ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1210
if (ret < 0) {
1211
pr_err("Failed to map thread and evlist pollfd indexes\n");
1212
return ret;
1213
}
1214
}
1215
return 0;
1216
}
1217
1218
static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1219
{
1220
int t, ret;
1221
struct record_thread *thread_data;
1222
1223
rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1224
if (!rec->thread_data) {
1225
pr_err("Failed to allocate thread data\n");
1226
return -ENOMEM;
1227
}
1228
thread_data = rec->thread_data;
1229
1230
for (t = 0; t < rec->nr_threads; t++)
1231
record__thread_data_init_pipes(&thread_data[t]);
1232
1233
for (t = 0; t < rec->nr_threads; t++) {
1234
thread_data[t].rec = rec;
1235
thread_data[t].mask = &rec->thread_masks[t];
1236
ret = record__thread_data_init_maps(&thread_data[t], evlist);
1237
if (ret) {
1238
pr_err("Failed to initialize thread[%d] maps\n", t);
1239
goto out_free;
1240
}
1241
ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1242
if (ret) {
1243
pr_err("Failed to initialize thread[%d] pollfd\n", t);
1244
goto out_free;
1245
}
1246
if (t) {
1247
thread_data[t].tid = -1;
1248
ret = record__thread_data_open_pipes(&thread_data[t]);
1249
if (ret) {
1250
pr_err("Failed to open thread[%d] communication pipes\n", t);
1251
goto out_free;
1252
}
1253
ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1254
POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1255
if (ret < 0) {
1256
pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1257
goto out_free;
1258
}
1259
thread_data[t].ctlfd_pos = ret;
1260
pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1261
thread_data, thread_data[t].ctlfd_pos,
1262
thread_data[t].pipes.msg[0]);
1263
} else {
1264
thread_data[t].tid = gettid();
1265
1266
ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1267
if (ret < 0)
1268
goto out_free;
1269
1270
thread_data[t].ctlfd_pos = -1; /* Not used */
1271
}
1272
}
1273
1274
return 0;
1275
1276
out_free:
1277
record__free_thread_data(rec);
1278
1279
return ret;
1280
}
1281
1282
static int record__mmap_evlist(struct record *rec,
1283
struct evlist *evlist)
1284
{
1285
int i, ret;
1286
struct record_opts *opts = &rec->opts;
1287
bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1288
opts->auxtrace_sample_mode;
1289
char msg[512];
1290
1291
if (opts->affinity != PERF_AFFINITY_SYS)
1292
cpu__setup_cpunode_map();
1293
1294
if (evlist__mmap_ex(evlist, opts->mmap_pages,
1295
opts->auxtrace_mmap_pages,
1296
auxtrace_overwrite,
1297
opts->nr_cblocks, opts->affinity,
1298
opts->mmap_flush, opts->comp_level) < 0) {
1299
if (errno == EPERM) {
1300
pr_err("Permission error mapping pages.\n"
1301
"Consider increasing "
1302
"/proc/sys/kernel/perf_event_mlock_kb,\n"
1303
"or try again with a smaller value of -m/--mmap_pages.\n"
1304
"(current value: %u,%u)\n",
1305
opts->mmap_pages, opts->auxtrace_mmap_pages);
1306
return -errno;
1307
} else {
1308
pr_err("failed to mmap with %d (%s)\n", errno,
1309
str_error_r(errno, msg, sizeof(msg)));
1310
if (errno)
1311
return -errno;
1312
else
1313
return -EINVAL;
1314
}
1315
}
1316
1317
if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1318
return -1;
1319
1320
ret = record__alloc_thread_data(rec, evlist);
1321
if (ret)
1322
return ret;
1323
1324
if (record__threads_enabled(rec)) {
1325
ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1326
if (ret) {
1327
pr_err("Failed to create data directory: %s\n", strerror(-ret));
1328
return ret;
1329
}
1330
for (i = 0; i < evlist->core.nr_mmaps; i++) {
1331
if (evlist->mmap)
1332
evlist->mmap[i].file = &rec->data.dir.files[i];
1333
if (evlist->overwrite_mmap)
1334
evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1335
}
1336
}
1337
1338
return 0;
1339
}
1340
1341
static int record__mmap(struct record *rec)
1342
{
1343
return record__mmap_evlist(rec, rec->evlist);
1344
}
1345
1346
static int record__open(struct record *rec)
1347
{
1348
char msg[BUFSIZ];
1349
struct evsel *pos;
1350
struct evlist *evlist = rec->evlist;
1351
struct perf_session *session = rec->session;
1352
struct record_opts *opts = &rec->opts;
1353
int rc = 0;
1354
bool skipped = false;
1355
bool removed_tracking = false;
1356
1357
evlist__for_each_entry(evlist, pos) {
1358
if (removed_tracking) {
1359
/*
1360
* Normally the head of the list has tracking enabled
1361
* for sideband data like mmaps. If this event is
1362
* removed, make sure to add tracking to the next
1363
* processed event.
1364
*/
1365
if (!pos->tracking) {
1366
pos->tracking = true;
1367
evsel__config(pos, opts, &callchain_param);
1368
}
1369
removed_tracking = false;
1370
}
1371
try_again:
1372
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1373
bool report_error = true;
1374
1375
if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1376
if (verbose > 0)
1377
ui__warning("%s\n", msg);
1378
goto try_again;
1379
}
1380
if ((errno == EINVAL || errno == EBADF) &&
1381
pos->core.leader != &pos->core &&
1382
pos->weak_group) {
1383
pos = evlist__reset_weak_group(evlist, pos, true);
1384
goto try_again;
1385
}
1386
#if defined(__aarch64__) || defined(__arm__)
1387
if (strstr(evsel__name(pos), "cycles")) {
1388
struct evsel *pos2;
1389
/*
1390
* Unfortunately ARM has many events named
1391
* "cycles" on PMUs like the system-level (L3)
1392
* cache which don't support sampling. Only
1393
* display such failures to open when there is
1394
* only 1 cycles event or verbose is enabled.
1395
*/
1396
evlist__for_each_entry(evlist, pos2) {
1397
if (pos2 == pos)
1398
continue;
1399
if (strstr(evsel__name(pos2), "cycles")) {
1400
report_error = false;
1401
break;
1402
}
1403
}
1404
}
1405
#endif
1406
if (report_error || verbose > 0) {
1407
ui__error("Failure to open event '%s' on PMU '%s' which will be "
1408
"removed.\n%s\n",
1409
evsel__name(pos), evsel__pmu_name(pos), msg);
1410
}
1411
if (pos->tracking)
1412
removed_tracking = true;
1413
pos->skippable = true;
1414
skipped = true;
1415
}
1416
}
1417
1418
if (skipped) {
1419
struct evsel *tmp;
1420
int idx = 0;
1421
bool evlist_empty = true;
1422
1423
/* Remove evsels that failed to open and update indices. */
1424
evlist__for_each_entry_safe(evlist, tmp, pos) {
1425
if (pos->skippable) {
1426
evlist__remove(evlist, pos);
1427
continue;
1428
}
1429
1430
/*
1431
* Note, dummy events may be command line parsed or
1432
* added by the tool. We care about supporting `perf
1433
* record -e dummy` which may be used as a permission
1434
* check. Dummy events that are added to the command
1435
* line and opened along with other events that fail,
1436
* will still fail as if the dummy events were tool
1437
* added events for the sake of code simplicity.
1438
*/
1439
if (!evsel__is_dummy_event(pos))
1440
evlist_empty = false;
1441
}
1442
evlist__for_each_entry(evlist, pos) {
1443
pos->core.idx = idx++;
1444
}
1445
/* If list is empty then fail. */
1446
if (evlist_empty) {
1447
ui__error("Failure to open any events for recording.\n");
1448
rc = -1;
1449
goto out;
1450
}
1451
}
1452
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1453
pr_warning(
1454
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1455
"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1456
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1457
"file is not found in the buildid cache or in the vmlinux path.\n\n"
1458
"Samples in kernel modules won't be resolved at all.\n\n"
1459
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1460
"even with a suitable vmlinux or kallsyms file.\n\n");
1461
}
1462
1463
if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1464
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1465
pos->filter ?: "BPF", evsel__name(pos), errno,
1466
str_error_r(errno, msg, sizeof(msg)));
1467
rc = -1;
1468
goto out;
1469
}
1470
1471
rc = record__mmap(rec);
1472
if (rc)
1473
goto out;
1474
1475
session->evlist = evlist;
1476
perf_session__set_id_hdr_size(session);
1477
out:
1478
return rc;
1479
}
1480
1481
static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1482
{
1483
if (rec->evlist->first_sample_time == 0)
1484
rec->evlist->first_sample_time = sample_time;
1485
1486
if (sample_time)
1487
rec->evlist->last_sample_time = sample_time;
1488
}
1489
1490
static int process_sample_event(const struct perf_tool *tool,
1491
union perf_event *event,
1492
struct perf_sample *sample,
1493
struct evsel *evsel,
1494
struct machine *machine)
1495
{
1496
struct record *rec = container_of(tool, struct record, tool);
1497
1498
set_timestamp_boundary(rec, sample->time);
1499
1500
if (rec->buildid_all)
1501
return 0;
1502
1503
rec->samples++;
1504
return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1505
}
1506
1507
static int process_buildids(struct record *rec)
1508
{
1509
struct perf_session *session = rec->session;
1510
1511
if (perf_data__size(&rec->data) == 0)
1512
return 0;
1513
1514
/*
1515
* During this process, it'll load kernel map and replace the
1516
* dso->long_name to a real pathname it found. In this case
1517
* we prefer the vmlinux path like
1518
* /lib/modules/3.16.4/build/vmlinux
1519
*
1520
* rather than build-id path (in debug directory).
1521
* $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1522
*/
1523
symbol_conf.ignore_vmlinux_buildid = true;
1524
1525
/*
1526
* If --buildid-all is given, it marks all DSO regardless of hits,
1527
* so no need to process samples. But if timestamp_boundary is enabled,
1528
* it still needs to walk on all samples to get the timestamps of
1529
* first/last samples.
1530
*/
1531
if (rec->buildid_all && !rec->timestamp_boundary)
1532
rec->tool.sample = process_event_sample_stub;
1533
1534
return perf_session__process_events(session);
1535
}
1536
1537
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1538
{
1539
int err;
1540
struct perf_tool *tool = data;
1541
/*
1542
*As for guest kernel when processing subcommand record&report,
1543
*we arrange module mmap prior to guest kernel mmap and trigger
1544
*a preload dso because default guest module symbols are loaded
1545
*from guest kallsyms instead of /lib/modules/XXX/XXX. This
1546
*method is used to avoid symbol missing when the first addr is
1547
*in module instead of in guest kernel.
1548
*/
1549
err = perf_event__synthesize_modules(tool, process_synthesized_event,
1550
machine);
1551
if (err < 0)
1552
pr_err("Couldn't record guest kernel [%d]'s reference"
1553
" relocation symbol.\n", machine->pid);
1554
1555
/*
1556
* We use _stext for guest kernel because guest kernel's /proc/kallsyms
1557
* have no _text sometimes.
1558
*/
1559
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1560
machine);
1561
if (err < 0)
1562
pr_err("Couldn't record guest kernel [%d]'s reference"
1563
" relocation symbol.\n", machine->pid);
1564
}
1565
1566
static struct perf_event_header finished_round_event = {
1567
.size = sizeof(struct perf_event_header),
1568
.type = PERF_RECORD_FINISHED_ROUND,
1569
};
1570
1571
static struct perf_event_header finished_init_event = {
1572
.size = sizeof(struct perf_event_header),
1573
.type = PERF_RECORD_FINISHED_INIT,
1574
};
1575
1576
static void record__adjust_affinity(struct record *rec, struct mmap *map)
1577
{
1578
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1579
!bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1580
thread->mask->affinity.nbits)) {
1581
bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1582
bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1583
map->affinity_mask.bits, thread->mask->affinity.nbits);
1584
sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1585
(cpu_set_t *)thread->mask->affinity.bits);
1586
if (verbose == 2) {
1587
pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1588
mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1589
}
1590
}
1591
}
1592
1593
static size_t process_comp_header(void *record, size_t increment)
1594
{
1595
struct perf_record_compressed2 *event = record;
1596
size_t size = sizeof(*event);
1597
1598
if (increment) {
1599
event->header.size += increment;
1600
return increment;
1601
}
1602
1603
event->header.type = PERF_RECORD_COMPRESSED2;
1604
event->header.size = size;
1605
1606
return size;
1607
}
1608
1609
static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1610
void *dst, size_t dst_size, void *src, size_t src_size)
1611
{
1612
ssize_t compressed;
1613
size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1614
struct zstd_data *zstd_data = &session->zstd_data;
1615
1616
if (map && map->file)
1617
zstd_data = &map->zstd_data;
1618
1619
compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1620
max_record_size, process_comp_header);
1621
if (compressed < 0)
1622
return compressed;
1623
1624
if (map && map->file) {
1625
thread->bytes_transferred += src_size;
1626
thread->bytes_compressed += compressed;
1627
} else {
1628
session->bytes_transferred += src_size;
1629
session->bytes_compressed += compressed;
1630
}
1631
1632
return compressed;
1633
}
1634
1635
static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1636
bool overwrite, bool synch)
1637
{
1638
u64 bytes_written = rec->bytes_written;
1639
int i;
1640
int rc = 0;
1641
int nr_mmaps;
1642
struct mmap **maps;
1643
int trace_fd = rec->data.file.fd;
1644
off_t off = 0;
1645
1646
if (!evlist)
1647
return 0;
1648
1649
nr_mmaps = thread->nr_mmaps;
1650
maps = overwrite ? thread->overwrite_maps : thread->maps;
1651
1652
if (!maps)
1653
return 0;
1654
1655
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1656
return 0;
1657
1658
if (record__aio_enabled(rec))
1659
off = record__aio_get_pos(trace_fd);
1660
1661
for (i = 0; i < nr_mmaps; i++) {
1662
u64 flush = 0;
1663
struct mmap *map = maps[i];
1664
1665
if (map->core.base) {
1666
record__adjust_affinity(rec, map);
1667
if (synch) {
1668
flush = map->core.flush;
1669
map->core.flush = 1;
1670
}
1671
if (!record__aio_enabled(rec)) {
1672
if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1673
if (synch)
1674
map->core.flush = flush;
1675
rc = -1;
1676
goto out;
1677
}
1678
} else {
1679
if (record__aio_push(rec, map, &off) < 0) {
1680
record__aio_set_pos(trace_fd, off);
1681
if (synch)
1682
map->core.flush = flush;
1683
rc = -1;
1684
goto out;
1685
}
1686
}
1687
if (synch)
1688
map->core.flush = flush;
1689
}
1690
1691
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1692
!rec->opts.auxtrace_sample_mode &&
1693
record__auxtrace_mmap_read(rec, map) != 0) {
1694
rc = -1;
1695
goto out;
1696
}
1697
}
1698
1699
if (record__aio_enabled(rec))
1700
record__aio_set_pos(trace_fd, off);
1701
1702
/*
1703
* Mark the round finished in case we wrote
1704
* at least one event.
1705
*
1706
* No need for round events in directory mode,
1707
* because per-cpu maps and files have data
1708
* sorted by kernel.
1709
*/
1710
if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1711
rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1712
1713
if (overwrite)
1714
evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1715
out:
1716
return rc;
1717
}
1718
1719
static int record__mmap_read_all(struct record *rec, bool synch)
1720
{
1721
int err;
1722
1723
err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1724
if (err)
1725
return err;
1726
1727
return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1728
}
1729
1730
static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1731
void *arg __maybe_unused)
1732
{
1733
struct perf_mmap *map = fda->priv[fd].ptr;
1734
1735
if (map)
1736
perf_mmap__put(map);
1737
}
1738
1739
static void *record__thread(void *arg)
1740
{
1741
enum thread_msg msg = THREAD_MSG__READY;
1742
bool terminate = false;
1743
struct fdarray *pollfd;
1744
int err, ctlfd_pos;
1745
1746
thread = arg;
1747
thread->tid = gettid();
1748
1749
err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1750
if (err == -1)
1751
pr_warning("threads[%d]: failed to notify on start: %s\n",
1752
thread->tid, strerror(errno));
1753
1754
pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1755
1756
pollfd = &thread->pollfd;
1757
ctlfd_pos = thread->ctlfd_pos;
1758
1759
for (;;) {
1760
unsigned long long hits = thread->samples;
1761
1762
if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1763
break;
1764
1765
if (hits == thread->samples) {
1766
1767
err = fdarray__poll(pollfd, -1);
1768
/*
1769
* Propagate error, only if there's any. Ignore positive
1770
* number of returned events and interrupt error.
1771
*/
1772
if (err > 0 || (err < 0 && errno == EINTR))
1773
err = 0;
1774
thread->waking++;
1775
1776
if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1777
record__thread_munmap_filtered, NULL) == 0)
1778
break;
1779
}
1780
1781
if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1782
terminate = true;
1783
close(thread->pipes.msg[0]);
1784
thread->pipes.msg[0] = -1;
1785
pollfd->entries[ctlfd_pos].fd = -1;
1786
pollfd->entries[ctlfd_pos].events = 0;
1787
}
1788
1789
pollfd->entries[ctlfd_pos].revents = 0;
1790
}
1791
record__mmap_read_all(thread->rec, true);
1792
1793
err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1794
if (err == -1)
1795
pr_warning("threads[%d]: failed to notify on termination: %s\n",
1796
thread->tid, strerror(errno));
1797
1798
return NULL;
1799
}
1800
1801
static void record__init_features(struct record *rec)
1802
{
1803
struct perf_session *session = rec->session;
1804
int feat;
1805
1806
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1807
perf_header__set_feat(&session->header, feat);
1808
1809
if (rec->no_buildid)
1810
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1811
1812
if (!have_tracepoints(&rec->evlist->core.entries))
1813
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1814
1815
if (!rec->opts.branch_stack)
1816
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1817
1818
if (!rec->opts.full_auxtrace)
1819
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1820
1821
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1822
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1823
1824
if (!rec->opts.use_clockid)
1825
perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1826
1827
if (!record__threads_enabled(rec))
1828
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1829
1830
if (!record__comp_enabled(rec))
1831
perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1832
1833
perf_header__clear_feat(&session->header, HEADER_STAT);
1834
}
1835
1836
static void
1837
record__finish_output(struct record *rec)
1838
{
1839
int i;
1840
struct perf_data *data = &rec->data;
1841
int fd = perf_data__fd(data);
1842
1843
if (data->is_pipe) {
1844
/* Just to display approx. size */
1845
data->file.size = rec->bytes_written;
1846
return;
1847
}
1848
1849
rec->session->header.data_size += rec->bytes_written;
1850
data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1851
if (record__threads_enabled(rec)) {
1852
for (i = 0; i < data->dir.nr; i++)
1853
data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1854
}
1855
1856
/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1857
if (!rec->no_buildid || !rec->no_buildid_cache) {
1858
process_buildids(rec);
1859
1860
if (rec->buildid_all)
1861
perf_session__dsos_hit_all(rec->session);
1862
}
1863
perf_session__write_header(rec->session, rec->evlist, fd, true);
1864
perf_session__cache_build_ids(rec->session);
1865
}
1866
1867
static int record__synthesize_workload(struct record *rec, bool tail)
1868
{
1869
int err;
1870
struct perf_thread_map *thread_map;
1871
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1872
1873
if (rec->opts.tail_synthesize != tail)
1874
return 0;
1875
1876
thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1877
if (thread_map == NULL)
1878
return -1;
1879
1880
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1881
process_synthesized_event,
1882
&rec->session->machines.host,
1883
needs_mmap,
1884
rec->opts.sample_address);
1885
perf_thread_map__put(thread_map);
1886
return err;
1887
}
1888
1889
static int write_finished_init(struct record *rec, bool tail)
1890
{
1891
if (rec->opts.tail_synthesize != tail)
1892
return 0;
1893
1894
return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1895
}
1896
1897
static int record__synthesize(struct record *rec, bool tail);
1898
1899
static int
1900
record__switch_output(struct record *rec, bool at_exit)
1901
{
1902
struct perf_data *data = &rec->data;
1903
char *new_filename = NULL;
1904
int fd, err;
1905
1906
/* Same Size: "2015122520103046"*/
1907
char timestamp[] = "InvalidTimestamp";
1908
1909
record__aio_mmap_read_sync(rec);
1910
1911
write_finished_init(rec, true);
1912
1913
record__synthesize(rec, true);
1914
if (target__none(&rec->opts.target))
1915
record__synthesize_workload(rec, true);
1916
1917
rec->samples = 0;
1918
record__finish_output(rec);
1919
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1920
if (err) {
1921
pr_err("Failed to get current timestamp\n");
1922
return -EINVAL;
1923
}
1924
1925
fd = perf_data__switch(data, timestamp,
1926
rec->session->header.data_offset,
1927
at_exit, &new_filename);
1928
if (fd >= 0 && !at_exit) {
1929
rec->bytes_written = 0;
1930
rec->session->header.data_size = 0;
1931
}
1932
1933
if (!quiet) {
1934
fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1935
data->path, timestamp);
1936
}
1937
1938
if (rec->switch_output.num_files) {
1939
int n = rec->switch_output.cur_file + 1;
1940
1941
if (n >= rec->switch_output.num_files)
1942
n = 0;
1943
rec->switch_output.cur_file = n;
1944
if (rec->switch_output.filenames[n]) {
1945
remove(rec->switch_output.filenames[n]);
1946
zfree(&rec->switch_output.filenames[n]);
1947
}
1948
rec->switch_output.filenames[n] = new_filename;
1949
} else {
1950
free(new_filename);
1951
}
1952
1953
/* Output tracking events */
1954
if (!at_exit) {
1955
record__synthesize(rec, false);
1956
1957
/*
1958
* In 'perf record --switch-output' without -a,
1959
* record__synthesize() in record__switch_output() won't
1960
* generate tracking events because there's no thread_map
1961
* in evlist. Which causes newly created perf.data doesn't
1962
* contain map and comm information.
1963
* Create a fake thread_map and directly call
1964
* perf_event__synthesize_thread_map() for those events.
1965
*/
1966
if (target__none(&rec->opts.target))
1967
record__synthesize_workload(rec, false);
1968
write_finished_init(rec, false);
1969
}
1970
return fd;
1971
}
1972
1973
static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1974
struct perf_record_lost_samples *lost,
1975
int cpu_idx, int thread_idx, u64 lost_count,
1976
u16 misc_flag)
1977
{
1978
struct perf_sample_id *sid;
1979
struct perf_sample sample;
1980
int id_hdr_size;
1981
1982
perf_sample__init(&sample, /*all=*/true);
1983
lost->lost = lost_count;
1984
if (evsel->core.ids) {
1985
sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1986
sample.id = sid->id;
1987
}
1988
1989
id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1990
evsel->core.attr.sample_type, &sample);
1991
lost->header.size = sizeof(*lost) + id_hdr_size;
1992
lost->header.misc = misc_flag;
1993
record__write(rec, NULL, lost, lost->header.size);
1994
perf_sample__exit(&sample);
1995
}
1996
1997
static void record__read_lost_samples(struct record *rec)
1998
{
1999
struct perf_session *session = rec->session;
2000
struct perf_record_lost_samples_and_ids lost;
2001
struct evsel *evsel;
2002
2003
/* there was an error during record__open */
2004
if (session->evlist == NULL)
2005
return;
2006
2007
evlist__for_each_entry(session->evlist, evsel) {
2008
struct xyarray *xy = evsel->core.sample_id;
2009
u64 lost_count;
2010
2011
if (xy == NULL || evsel->core.fd == NULL)
2012
continue;
2013
if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
2014
xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
2015
pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
2016
continue;
2017
}
2018
2019
for (int x = 0; x < xyarray__max_x(xy); x++) {
2020
for (int y = 0; y < xyarray__max_y(xy); y++) {
2021
struct perf_counts_values count;
2022
2023
if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
2024
pr_debug("read LOST count failed\n");
2025
return;
2026
}
2027
2028
if (count.lost) {
2029
memset(&lost, 0, sizeof(lost));
2030
lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2031
__record__save_lost_samples(rec, evsel, &lost.lost,
2032
x, y, count.lost, 0);
2033
}
2034
}
2035
}
2036
2037
lost_count = perf_bpf_filter__lost_count(evsel);
2038
if (lost_count) {
2039
memset(&lost, 0, sizeof(lost));
2040
lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2041
__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2042
PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2043
}
2044
}
2045
}
2046
2047
static volatile sig_atomic_t workload_exec_errno;
2048
2049
/*
2050
* evlist__prepare_workload will send a SIGUSR1
2051
* if the fork fails, since we asked by setting its
2052
* want_signal to true.
2053
*/
2054
static void workload_exec_failed_signal(int signo __maybe_unused,
2055
siginfo_t *info,
2056
void *ucontext __maybe_unused)
2057
{
2058
workload_exec_errno = info->si_value.sival_int;
2059
done = 1;
2060
child_finished = 1;
2061
}
2062
2063
static void snapshot_sig_handler(int sig);
2064
static void alarm_sig_handler(int sig);
2065
2066
static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2067
{
2068
if (evlist) {
2069
if (evlist->mmap && evlist->mmap[0].core.base)
2070
return evlist->mmap[0].core.base;
2071
if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2072
return evlist->overwrite_mmap[0].core.base;
2073
}
2074
return NULL;
2075
}
2076
2077
static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2078
{
2079
const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2080
if (pc)
2081
return pc;
2082
return NULL;
2083
}
2084
2085
static int record__synthesize(struct record *rec, bool tail)
2086
{
2087
struct perf_session *session = rec->session;
2088
struct machine *machine = &session->machines.host;
2089
struct perf_data *data = &rec->data;
2090
struct record_opts *opts = &rec->opts;
2091
struct perf_tool *tool = &rec->tool;
2092
int err = 0;
2093
event_op f = process_synthesized_event;
2094
2095
if (rec->opts.tail_synthesize != tail)
2096
return 0;
2097
2098
if (data->is_pipe) {
2099
err = perf_event__synthesize_for_pipe(tool, session, data,
2100
process_synthesized_event);
2101
if (err < 0)
2102
goto out;
2103
2104
rec->bytes_written += err;
2105
}
2106
2107
err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2108
process_synthesized_event, machine);
2109
if (err)
2110
goto out;
2111
2112
/* Synthesize id_index before auxtrace_info */
2113
err = perf_event__synthesize_id_index(tool,
2114
process_synthesized_event,
2115
session->evlist, machine);
2116
if (err)
2117
goto out;
2118
2119
if (rec->opts.full_auxtrace) {
2120
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2121
session, process_synthesized_event);
2122
if (err)
2123
goto out;
2124
}
2125
2126
if (!evlist__exclude_kernel(rec->evlist)) {
2127
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2128
machine);
2129
WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2130
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2131
"Check /proc/kallsyms permission or run as root.\n");
2132
2133
err = perf_event__synthesize_modules(tool, process_synthesized_event,
2134
machine);
2135
WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2136
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2137
"Check /proc/modules permission or run as root.\n");
2138
}
2139
2140
if (perf_guest) {
2141
machines__process_guests(&session->machines,
2142
perf_event__synthesize_guest_os, tool);
2143
}
2144
2145
err = perf_event__synthesize_extra_attr(&rec->tool,
2146
rec->evlist,
2147
process_synthesized_event,
2148
data->is_pipe);
2149
if (err)
2150
goto out;
2151
2152
err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2153
process_synthesized_event,
2154
NULL);
2155
if (err < 0) {
2156
pr_err("Couldn't synthesize thread map.\n");
2157
return err;
2158
}
2159
2160
err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2161
process_synthesized_event, NULL);
2162
if (err < 0) {
2163
pr_err("Couldn't synthesize cpu map.\n");
2164
return err;
2165
}
2166
2167
err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2168
machine, opts);
2169
if (err < 0) {
2170
pr_warning("Couldn't synthesize bpf events.\n");
2171
err = 0;
2172
}
2173
2174
if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2175
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2176
machine);
2177
if (err < 0) {
2178
pr_warning("Couldn't synthesize cgroup events.\n");
2179
err = 0;
2180
}
2181
}
2182
2183
if (rec->opts.nr_threads_synthesize > 1) {
2184
mutex_init(&synth_lock);
2185
perf_set_multithreaded();
2186
f = process_locked_synthesized_event;
2187
}
2188
2189
if (rec->opts.synth & PERF_SYNTH_TASK) {
2190
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2191
2192
err = __machine__synthesize_threads(machine, tool, &opts->target,
2193
rec->evlist->core.threads,
2194
f, needs_mmap, opts->sample_address,
2195
rec->opts.nr_threads_synthesize);
2196
}
2197
2198
if (rec->opts.nr_threads_synthesize > 1) {
2199
perf_set_singlethreaded();
2200
mutex_destroy(&synth_lock);
2201
}
2202
2203
out:
2204
return err;
2205
}
2206
2207
static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2208
{
2209
#ifdef HAVE_LIBBPF_SUPPORT
2210
perf_event__synthesize_final_bpf_metadata(rec->session,
2211
process_synthesized_event);
2212
#endif
2213
}
2214
2215
static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2216
{
2217
struct record *rec = data;
2218
pthread_kill(rec->thread_id, SIGUSR2);
2219
return 0;
2220
}
2221
2222
static int record__setup_sb_evlist(struct record *rec)
2223
{
2224
struct record_opts *opts = &rec->opts;
2225
2226
if (rec->sb_evlist != NULL) {
2227
/*
2228
* We get here if --switch-output-event populated the
2229
* sb_evlist, so associate a callback that will send a SIGUSR2
2230
* to the main thread.
2231
*/
2232
evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2233
rec->thread_id = pthread_self();
2234
}
2235
#ifdef HAVE_LIBBPF_SUPPORT
2236
if (!opts->no_bpf_event) {
2237
if (rec->sb_evlist == NULL) {
2238
rec->sb_evlist = evlist__new();
2239
2240
if (rec->sb_evlist == NULL) {
2241
pr_err("Couldn't create side band evlist.\n.");
2242
return -1;
2243
}
2244
}
2245
2246
if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2247
pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2248
return -1;
2249
}
2250
}
2251
#endif
2252
if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2253
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2254
opts->no_bpf_event = true;
2255
}
2256
2257
return 0;
2258
}
2259
2260
static int record__init_clock(struct record *rec)
2261
{
2262
struct perf_session *session = rec->session;
2263
struct timespec ref_clockid;
2264
struct timeval ref_tod;
2265
struct perf_env *env = perf_session__env(session);
2266
u64 ref;
2267
2268
if (!rec->opts.use_clockid)
2269
return 0;
2270
2271
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2272
env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2273
2274
env->clock.clockid = rec->opts.clockid;
2275
2276
if (gettimeofday(&ref_tod, NULL) != 0) {
2277
pr_err("gettimeofday failed, cannot set reference time.\n");
2278
return -1;
2279
}
2280
2281
if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2282
pr_err("clock_gettime failed, cannot set reference time.\n");
2283
return -1;
2284
}
2285
2286
ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2287
(u64) ref_tod.tv_usec * NSEC_PER_USEC;
2288
2289
env->clock.tod_ns = ref;
2290
2291
ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2292
(u64) ref_clockid.tv_nsec;
2293
2294
env->clock.clockid_ns = ref;
2295
return 0;
2296
}
2297
2298
static void hit_auxtrace_snapshot_trigger(struct record *rec)
2299
{
2300
if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2301
trigger_hit(&auxtrace_snapshot_trigger);
2302
auxtrace_record__snapshot_started = 1;
2303
if (auxtrace_record__snapshot_start(rec->itr))
2304
trigger_error(&auxtrace_snapshot_trigger);
2305
}
2306
}
2307
2308
static int record__terminate_thread(struct record_thread *thread_data)
2309
{
2310
int err;
2311
enum thread_msg ack = THREAD_MSG__UNDEFINED;
2312
pid_t tid = thread_data->tid;
2313
2314
close(thread_data->pipes.msg[1]);
2315
thread_data->pipes.msg[1] = -1;
2316
err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2317
if (err > 0)
2318
pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2319
else
2320
pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2321
thread->tid, tid);
2322
2323
return 0;
2324
}
2325
2326
static int record__start_threads(struct record *rec)
2327
{
2328
int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2329
struct record_thread *thread_data = rec->thread_data;
2330
sigset_t full, mask;
2331
pthread_t handle;
2332
pthread_attr_t attrs;
2333
2334
thread = &thread_data[0];
2335
2336
if (!record__threads_enabled(rec))
2337
return 0;
2338
2339
sigfillset(&full);
2340
if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2341
pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2342
return -1;
2343
}
2344
2345
pthread_attr_init(&attrs);
2346
pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2347
2348
for (t = 1; t < nr_threads; t++) {
2349
enum thread_msg msg = THREAD_MSG__UNDEFINED;
2350
2351
#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2352
pthread_attr_setaffinity_np(&attrs,
2353
MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2354
(cpu_set_t *)(thread_data[t].mask->affinity.bits));
2355
#endif
2356
if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2357
for (tt = 1; tt < t; tt++)
2358
record__terminate_thread(&thread_data[t]);
2359
pr_err("Failed to start threads: %s\n", strerror(errno));
2360
ret = -1;
2361
goto out_err;
2362
}
2363
2364
err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2365
if (err > 0)
2366
pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2367
thread_msg_tags[msg]);
2368
else
2369
pr_warning("threads[%d]: failed to receive start notification from %d\n",
2370
thread->tid, rec->thread_data[t].tid);
2371
}
2372
2373
sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2374
(cpu_set_t *)thread->mask->affinity.bits);
2375
2376
pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2377
2378
out_err:
2379
pthread_attr_destroy(&attrs);
2380
2381
if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2382
pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2383
ret = -1;
2384
}
2385
2386
return ret;
2387
}
2388
2389
static int record__stop_threads(struct record *rec)
2390
{
2391
int t;
2392
struct record_thread *thread_data = rec->thread_data;
2393
2394
for (t = 1; t < rec->nr_threads; t++)
2395
record__terminate_thread(&thread_data[t]);
2396
2397
for (t = 0; t < rec->nr_threads; t++) {
2398
rec->samples += thread_data[t].samples;
2399
if (!record__threads_enabled(rec))
2400
continue;
2401
rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2402
rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2403
pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2404
thread_data[t].samples, thread_data[t].waking);
2405
if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2406
pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2407
thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2408
else
2409
pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2410
}
2411
2412
return 0;
2413
}
2414
2415
static unsigned long record__waking(struct record *rec)
2416
{
2417
int t;
2418
unsigned long waking = 0;
2419
struct record_thread *thread_data = rec->thread_data;
2420
2421
for (t = 0; t < rec->nr_threads; t++)
2422
waking += thread_data[t].waking;
2423
2424
return waking;
2425
}
2426
2427
static int __cmd_record(struct record *rec, int argc, const char **argv)
2428
{
2429
int err;
2430
int status = 0;
2431
const bool forks = argc > 0;
2432
struct perf_tool *tool = &rec->tool;
2433
struct record_opts *opts = &rec->opts;
2434
struct perf_data *data = &rec->data;
2435
struct perf_session *session;
2436
bool disabled = false, draining = false;
2437
int fd;
2438
float ratio = 0;
2439
enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2440
struct perf_env *env;
2441
2442
atexit(record__sig_exit);
2443
signal(SIGCHLD, sig_handler);
2444
signal(SIGINT, sig_handler);
2445
signal(SIGTERM, sig_handler);
2446
signal(SIGSEGV, sigsegv_handler);
2447
2448
if (rec->opts.record_cgroup) {
2449
#ifndef HAVE_FILE_HANDLE
2450
pr_err("cgroup tracking is not supported\n");
2451
return -1;
2452
#endif
2453
}
2454
2455
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2456
signal(SIGUSR2, snapshot_sig_handler);
2457
if (rec->opts.auxtrace_snapshot_mode)
2458
trigger_on(&auxtrace_snapshot_trigger);
2459
if (rec->switch_output.enabled)
2460
trigger_on(&switch_output_trigger);
2461
} else {
2462
signal(SIGUSR2, SIG_IGN);
2463
}
2464
2465
perf_tool__init(tool, /*ordered_events=*/true);
2466
tool->sample = process_sample_event;
2467
tool->fork = perf_event__process_fork;
2468
tool->exit = perf_event__process_exit;
2469
tool->comm = perf_event__process_comm;
2470
tool->namespaces = perf_event__process_namespaces;
2471
tool->mmap = build_id__process_mmap;
2472
tool->mmap2 = build_id__process_mmap2;
2473
tool->itrace_start = process_timestamp_boundary;
2474
tool->aux = process_timestamp_boundary;
2475
tool->namespace_events = rec->opts.record_namespaces;
2476
tool->cgroup_events = rec->opts.record_cgroup;
2477
session = perf_session__new(data, tool);
2478
if (IS_ERR(session)) {
2479
pr_err("Perf session creation failed.\n");
2480
return PTR_ERR(session);
2481
}
2482
env = perf_session__env(session);
2483
if (record__threads_enabled(rec)) {
2484
if (perf_data__is_pipe(&rec->data)) {
2485
pr_err("Parallel trace streaming is not available in pipe mode.\n");
2486
return -1;
2487
}
2488
if (rec->opts.full_auxtrace) {
2489
pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2490
return -1;
2491
}
2492
}
2493
2494
fd = perf_data__fd(data);
2495
rec->session = session;
2496
2497
if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2498
pr_err("Compression initialization failed.\n");
2499
return -1;
2500
}
2501
#ifdef HAVE_EVENTFD_SUPPORT
2502
done_fd = eventfd(0, EFD_NONBLOCK);
2503
if (done_fd < 0) {
2504
pr_err("Failed to create wakeup eventfd, error: %m\n");
2505
status = -1;
2506
goto out_delete_session;
2507
}
2508
err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2509
if (err < 0) {
2510
pr_err("Failed to add wakeup eventfd to poll list\n");
2511
status = err;
2512
goto out_delete_session;
2513
}
2514
#endif // HAVE_EVENTFD_SUPPORT
2515
2516
env->comp_type = PERF_COMP_ZSTD;
2517
env->comp_level = rec->opts.comp_level;
2518
2519
if (rec->opts.kcore &&
2520
!record__kcore_readable(&session->machines.host)) {
2521
pr_err("ERROR: kcore is not readable.\n");
2522
return -1;
2523
}
2524
2525
if (record__init_clock(rec))
2526
return -1;
2527
2528
record__init_features(rec);
2529
2530
if (forks) {
2531
err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2532
workload_exec_failed_signal);
2533
if (err < 0) {
2534
pr_err("Couldn't run the workload!\n");
2535
status = err;
2536
goto out_delete_session;
2537
}
2538
}
2539
2540
/*
2541
* If we have just single event and are sending data
2542
* through pipe, we need to force the ids allocation,
2543
* because we synthesize event name through the pipe
2544
* and need the id for that.
2545
*/
2546
if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2547
rec->opts.sample_id = true;
2548
2549
if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2550
rec->timestamp_filename = false;
2551
pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2552
}
2553
2554
/*
2555
* Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2556
* and hybrid_merge is false.
2557
*/
2558
evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2559
2560
evlist__config(rec->evlist, opts, &callchain_param);
2561
2562
/* Debug message used by test scripts */
2563
pr_debug3("perf record opening and mmapping events\n");
2564
if (record__open(rec) != 0) {
2565
err = -1;
2566
goto out_free_threads;
2567
}
2568
/* Debug message used by test scripts */
2569
pr_debug3("perf record done opening and mmapping events\n");
2570
env->comp_mmap_len = session->evlist->core.mmap_len;
2571
2572
if (rec->opts.kcore) {
2573
err = record__kcore_copy(&session->machines.host, data);
2574
if (err) {
2575
pr_err("ERROR: Failed to copy kcore\n");
2576
goto out_free_threads;
2577
}
2578
}
2579
2580
/*
2581
* Normally perf_session__new would do this, but it doesn't have the
2582
* evlist.
2583
*/
2584
if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2585
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2586
rec->tool.ordered_events = false;
2587
}
2588
2589
if (evlist__nr_groups(rec->evlist) == 0)
2590
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2591
2592
if (data->is_pipe) {
2593
err = perf_header__write_pipe(fd);
2594
if (err < 0)
2595
goto out_free_threads;
2596
} else {
2597
err = perf_session__write_header(session, rec->evlist, fd, false);
2598
if (err < 0)
2599
goto out_free_threads;
2600
}
2601
2602
err = -1;
2603
if (!rec->no_buildid
2604
&& !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2605
pr_err("Couldn't generate buildids. "
2606
"Use --no-buildid to profile anyway.\n");
2607
goto out_free_threads;
2608
}
2609
2610
if (!evlist__needs_bpf_sb_event(rec->evlist))
2611
opts->no_bpf_event = true;
2612
2613
err = record__setup_sb_evlist(rec);
2614
if (err)
2615
goto out_free_threads;
2616
2617
err = record__synthesize(rec, false);
2618
if (err < 0)
2619
goto out_free_threads;
2620
2621
if (rec->realtime_prio) {
2622
struct sched_param param;
2623
2624
param.sched_priority = rec->realtime_prio;
2625
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2626
pr_err("Could not set realtime priority.\n");
2627
err = -1;
2628
goto out_free_threads;
2629
}
2630
}
2631
2632
if (record__start_threads(rec))
2633
goto out_free_threads;
2634
2635
/*
2636
* When perf is starting the traced process, all the events
2637
* (apart from group members) have enable_on_exec=1 set,
2638
* so don't spoil it by prematurely enabling them.
2639
*/
2640
if (!target__none(&opts->target) && !opts->target.initial_delay)
2641
evlist__enable(rec->evlist);
2642
2643
/*
2644
* offcpu-time does not call execve, so enable_on_exe wouldn't work
2645
* when recording a workload, do it manually
2646
*/
2647
if (rec->off_cpu)
2648
evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2649
2650
/*
2651
* Let the child rip
2652
*/
2653
if (forks) {
2654
struct machine *machine = &session->machines.host;
2655
union perf_event *event;
2656
pid_t tgid;
2657
2658
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2659
if (event == NULL) {
2660
err = -ENOMEM;
2661
goto out_child;
2662
}
2663
2664
/*
2665
* Some H/W events are generated before COMM event
2666
* which is emitted during exec(), so perf script
2667
* cannot see a correct process name for those events.
2668
* Synthesize COMM event to prevent it.
2669
*/
2670
tgid = perf_event__synthesize_comm(tool, event,
2671
rec->evlist->workload.pid,
2672
process_synthesized_event,
2673
machine);
2674
free(event);
2675
2676
if (tgid == -1)
2677
goto out_child;
2678
2679
event = malloc(sizeof(event->namespaces) +
2680
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2681
machine->id_hdr_size);
2682
if (event == NULL) {
2683
err = -ENOMEM;
2684
goto out_child;
2685
}
2686
2687
/*
2688
* Synthesize NAMESPACES event for the command specified.
2689
*/
2690
perf_event__synthesize_namespaces(tool, event,
2691
rec->evlist->workload.pid,
2692
tgid, process_synthesized_event,
2693
machine);
2694
free(event);
2695
2696
evlist__start_workload(rec->evlist);
2697
}
2698
2699
if (opts->target.initial_delay) {
2700
pr_info(EVLIST_DISABLED_MSG);
2701
if (opts->target.initial_delay > 0) {
2702
usleep(opts->target.initial_delay * USEC_PER_MSEC);
2703
evlist__enable(rec->evlist);
2704
pr_info(EVLIST_ENABLED_MSG);
2705
}
2706
}
2707
2708
err = event_enable_timer__start(rec->evlist->eet);
2709
if (err)
2710
goto out_child;
2711
2712
/* Debug message used by test scripts */
2713
pr_debug3("perf record has started\n");
2714
fflush(stderr);
2715
2716
trigger_ready(&auxtrace_snapshot_trigger);
2717
trigger_ready(&switch_output_trigger);
2718
perf_hooks__invoke_record_start();
2719
2720
/*
2721
* Must write FINISHED_INIT so it will be seen after all other
2722
* synthesized user events, but before any regular events.
2723
*/
2724
err = write_finished_init(rec, false);
2725
if (err < 0)
2726
goto out_child;
2727
2728
for (;;) {
2729
unsigned long long hits = thread->samples;
2730
2731
/*
2732
* rec->evlist->bkw_mmap_state is possible to be
2733
* BKW_MMAP_EMPTY here: when done == true and
2734
* hits != rec->samples in previous round.
2735
*
2736
* evlist__toggle_bkw_mmap ensure we never
2737
* convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2738
*/
2739
if (trigger_is_hit(&switch_output_trigger) || done || draining)
2740
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2741
2742
if (record__mmap_read_all(rec, false) < 0) {
2743
trigger_error(&auxtrace_snapshot_trigger);
2744
trigger_error(&switch_output_trigger);
2745
err = -1;
2746
goto out_child;
2747
}
2748
2749
if (auxtrace_record__snapshot_started) {
2750
auxtrace_record__snapshot_started = 0;
2751
if (!trigger_is_error(&auxtrace_snapshot_trigger))
2752
record__read_auxtrace_snapshot(rec, false);
2753
if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2754
pr_err("AUX area tracing snapshot failed\n");
2755
err = -1;
2756
goto out_child;
2757
}
2758
}
2759
2760
if (trigger_is_hit(&switch_output_trigger)) {
2761
/*
2762
* If switch_output_trigger is hit, the data in
2763
* overwritable ring buffer should have been collected,
2764
* so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2765
*
2766
* If SIGUSR2 raise after or during record__mmap_read_all(),
2767
* record__mmap_read_all() didn't collect data from
2768
* overwritable ring buffer. Read again.
2769
*/
2770
if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2771
continue;
2772
trigger_ready(&switch_output_trigger);
2773
2774
/*
2775
* Reenable events in overwrite ring buffer after
2776
* record__mmap_read_all(): we should have collected
2777
* data from it.
2778
*/
2779
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2780
2781
if (!quiet)
2782
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2783
record__waking(rec));
2784
thread->waking = 0;
2785
fd = record__switch_output(rec, false);
2786
if (fd < 0) {
2787
pr_err("Failed to switch to new file\n");
2788
trigger_error(&switch_output_trigger);
2789
err = fd;
2790
goto out_child;
2791
}
2792
2793
/* re-arm the alarm */
2794
if (rec->switch_output.time)
2795
alarm(rec->switch_output.time);
2796
}
2797
2798
if (hits == thread->samples) {
2799
if (done || draining)
2800
break;
2801
err = fdarray__poll(&thread->pollfd, -1);
2802
/*
2803
* Propagate error, only if there's any. Ignore positive
2804
* number of returned events and interrupt error.
2805
*/
2806
if (err > 0 || (err < 0 && errno == EINTR))
2807
err = 0;
2808
thread->waking++;
2809
2810
if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2811
record__thread_munmap_filtered, NULL) == 0)
2812
draining = true;
2813
2814
err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2815
if (err)
2816
goto out_child;
2817
}
2818
2819
if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2820
switch (cmd) {
2821
case EVLIST_CTL_CMD_SNAPSHOT:
2822
hit_auxtrace_snapshot_trigger(rec);
2823
evlist__ctlfd_ack(rec->evlist);
2824
break;
2825
case EVLIST_CTL_CMD_STOP:
2826
done = 1;
2827
break;
2828
case EVLIST_CTL_CMD_ACK:
2829
case EVLIST_CTL_CMD_UNSUPPORTED:
2830
case EVLIST_CTL_CMD_ENABLE:
2831
case EVLIST_CTL_CMD_DISABLE:
2832
case EVLIST_CTL_CMD_EVLIST:
2833
case EVLIST_CTL_CMD_PING:
2834
default:
2835
break;
2836
}
2837
}
2838
2839
err = event_enable_timer__process(rec->evlist->eet);
2840
if (err < 0)
2841
goto out_child;
2842
if (err) {
2843
err = 0;
2844
done = 1;
2845
}
2846
2847
/*
2848
* When perf is starting the traced process, at the end events
2849
* die with the process and we wait for that. Thus no need to
2850
* disable events in this case.
2851
*/
2852
if (done && !disabled && !target__none(&opts->target)) {
2853
trigger_off(&auxtrace_snapshot_trigger);
2854
evlist__disable(rec->evlist);
2855
disabled = true;
2856
}
2857
}
2858
2859
trigger_off(&auxtrace_snapshot_trigger);
2860
trigger_off(&switch_output_trigger);
2861
2862
record__synthesize_final_bpf_metadata(rec);
2863
2864
if (opts->auxtrace_snapshot_on_exit)
2865
record__auxtrace_snapshot_exit(rec);
2866
2867
if (forks && workload_exec_errno) {
2868
char msg[STRERR_BUFSIZE];
2869
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2870
struct strbuf sb = STRBUF_INIT;
2871
2872
evlist__format_evsels(rec->evlist, &sb, 2048);
2873
2874
pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2875
sb.buf, argv[0], emsg);
2876
strbuf_release(&sb);
2877
err = -1;
2878
goto out_child;
2879
}
2880
2881
if (!quiet)
2882
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2883
record__waking(rec));
2884
2885
write_finished_init(rec, true);
2886
2887
if (target__none(&rec->opts.target))
2888
record__synthesize_workload(rec, true);
2889
2890
out_child:
2891
record__stop_threads(rec);
2892
record__mmap_read_all(rec, true);
2893
out_free_threads:
2894
record__free_thread_data(rec);
2895
evlist__finalize_ctlfd(rec->evlist);
2896
record__aio_mmap_read_sync(rec);
2897
2898
if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2899
ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2900
env->comp_ratio = ratio + 0.5;
2901
}
2902
2903
if (forks) {
2904
int exit_status;
2905
2906
if (!child_finished)
2907
kill(rec->evlist->workload.pid, SIGTERM);
2908
2909
wait(&exit_status);
2910
2911
if (err < 0)
2912
status = err;
2913
else if (WIFEXITED(exit_status))
2914
status = WEXITSTATUS(exit_status);
2915
else if (WIFSIGNALED(exit_status))
2916
signr = WTERMSIG(exit_status);
2917
} else
2918
status = err;
2919
2920
if (rec->off_cpu)
2921
rec->bytes_written += off_cpu_write(rec->session);
2922
2923
record__read_lost_samples(rec);
2924
/* this will be recalculated during process_buildids() */
2925
rec->samples = 0;
2926
2927
if (!err) {
2928
record__synthesize(rec, true);
2929
if (!rec->timestamp_filename) {
2930
record__finish_output(rec);
2931
} else {
2932
fd = record__switch_output(rec, true);
2933
if (fd < 0) {
2934
status = fd;
2935
goto out_delete_session;
2936
}
2937
}
2938
}
2939
2940
perf_hooks__invoke_record_end();
2941
2942
if (!err && !quiet) {
2943
char samples[128];
2944
const char *postfix = rec->timestamp_filename ?
2945
".<timestamp>" : "";
2946
2947
if (rec->samples && !rec->opts.full_auxtrace)
2948
scnprintf(samples, sizeof(samples),
2949
" (%" PRIu64 " samples)", rec->samples);
2950
else
2951
samples[0] = '\0';
2952
2953
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2954
perf_data__size(data) / 1024.0 / 1024.0,
2955
data->path, postfix, samples);
2956
if (ratio) {
2957
fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2958
rec->session->bytes_transferred / 1024.0 / 1024.0,
2959
ratio);
2960
}
2961
fprintf(stderr, " ]\n");
2962
}
2963
2964
out_delete_session:
2965
#ifdef HAVE_EVENTFD_SUPPORT
2966
if (done_fd >= 0) {
2967
fd = done_fd;
2968
done_fd = -1;
2969
2970
close(fd);
2971
}
2972
#endif
2973
zstd_fini(&session->zstd_data);
2974
if (!opts->no_bpf_event)
2975
evlist__stop_sb_thread(rec->sb_evlist);
2976
2977
perf_session__delete(session);
2978
return status;
2979
}
2980
2981
static void callchain_debug(struct callchain_param *callchain)
2982
{
2983
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2984
2985
pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2986
2987
if (callchain->record_mode == CALLCHAIN_DWARF)
2988
pr_debug("callchain: stack dump size %d\n",
2989
callchain->dump_size);
2990
}
2991
2992
int record_opts__parse_callchain(struct record_opts *record,
2993
struct callchain_param *callchain,
2994
const char *arg, bool unset)
2995
{
2996
int ret;
2997
callchain->enabled = !unset;
2998
2999
/* --no-call-graph */
3000
if (unset) {
3001
callchain->record_mode = CALLCHAIN_NONE;
3002
pr_debug("callchain: disabled\n");
3003
return 0;
3004
}
3005
3006
ret = parse_callchain_record_opt(arg, callchain);
3007
if (!ret) {
3008
/* Enable data address sampling for DWARF unwind. */
3009
if (callchain->record_mode == CALLCHAIN_DWARF)
3010
record->sample_address = true;
3011
callchain_debug(callchain);
3012
}
3013
3014
return ret;
3015
}
3016
3017
int record_parse_callchain_opt(const struct option *opt,
3018
const char *arg,
3019
int unset)
3020
{
3021
return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
3022
}
3023
3024
int record_callchain_opt(const struct option *opt,
3025
const char *arg __maybe_unused,
3026
int unset __maybe_unused)
3027
{
3028
struct callchain_param *callchain = opt->value;
3029
3030
callchain->enabled = true;
3031
3032
if (callchain->record_mode == CALLCHAIN_NONE)
3033
callchain->record_mode = CALLCHAIN_FP;
3034
3035
callchain_debug(callchain);
3036
return 0;
3037
}
3038
3039
static int perf_record_config(const char *var, const char *value, void *cb)
3040
{
3041
struct record *rec = cb;
3042
3043
if (!strcmp(var, "record.build-id")) {
3044
if (!strcmp(value, "cache"))
3045
rec->no_buildid_cache = false;
3046
else if (!strcmp(value, "no-cache"))
3047
rec->no_buildid_cache = true;
3048
else if (!strcmp(value, "skip"))
3049
rec->no_buildid = rec->no_buildid_cache = true;
3050
else if (!strcmp(value, "mmap"))
3051
rec->buildid_mmap = true;
3052
else if (!strcmp(value, "no-mmap"))
3053
rec->buildid_mmap = false;
3054
else
3055
return -1;
3056
return 0;
3057
}
3058
if (!strcmp(var, "record.call-graph")) {
3059
var = "call-graph.record-mode";
3060
return perf_default_config(var, value, cb);
3061
}
3062
#ifdef HAVE_AIO_SUPPORT
3063
if (!strcmp(var, "record.aio")) {
3064
rec->opts.nr_cblocks = strtol(value, NULL, 0);
3065
if (!rec->opts.nr_cblocks)
3066
rec->opts.nr_cblocks = nr_cblocks_default;
3067
}
3068
#endif
3069
if (!strcmp(var, "record.debuginfod")) {
3070
rec->debuginfod.urls = strdup(value);
3071
if (!rec->debuginfod.urls)
3072
return -ENOMEM;
3073
rec->debuginfod.set = true;
3074
}
3075
3076
return 0;
3077
}
3078
3079
static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3080
{
3081
struct record *rec = (struct record *)opt->value;
3082
3083
return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3084
}
3085
3086
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3087
{
3088
struct record_opts *opts = (struct record_opts *)opt->value;
3089
3090
if (unset || !str)
3091
return 0;
3092
3093
if (!strcasecmp(str, "node"))
3094
opts->affinity = PERF_AFFINITY_NODE;
3095
else if (!strcasecmp(str, "cpu"))
3096
opts->affinity = PERF_AFFINITY_CPU;
3097
3098
return 0;
3099
}
3100
3101
static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3102
{
3103
mask->nbits = nr_bits;
3104
mask->bits = bitmap_zalloc(mask->nbits);
3105
if (!mask->bits)
3106
return -ENOMEM;
3107
3108
return 0;
3109
}
3110
3111
static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3112
{
3113
bitmap_free(mask->bits);
3114
mask->nbits = 0;
3115
}
3116
3117
static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3118
{
3119
int ret;
3120
3121
ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3122
if (ret) {
3123
mask->affinity.bits = NULL;
3124
return ret;
3125
}
3126
3127
ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3128
if (ret) {
3129
record__mmap_cpu_mask_free(&mask->maps);
3130
mask->maps.bits = NULL;
3131
}
3132
3133
return ret;
3134
}
3135
3136
static void record__thread_mask_free(struct thread_mask *mask)
3137
{
3138
record__mmap_cpu_mask_free(&mask->maps);
3139
record__mmap_cpu_mask_free(&mask->affinity);
3140
}
3141
3142
static int record__parse_threads(const struct option *opt, const char *str, int unset)
3143
{
3144
int s;
3145
struct record_opts *opts = opt->value;
3146
3147
if (unset || !str || !strlen(str)) {
3148
opts->threads_spec = THREAD_SPEC__CPU;
3149
} else {
3150
for (s = 1; s < THREAD_SPEC__MAX; s++) {
3151
if (s == THREAD_SPEC__USER) {
3152
opts->threads_user_spec = strdup(str);
3153
if (!opts->threads_user_spec)
3154
return -ENOMEM;
3155
opts->threads_spec = THREAD_SPEC__USER;
3156
break;
3157
}
3158
if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3159
opts->threads_spec = s;
3160
break;
3161
}
3162
}
3163
}
3164
3165
if (opts->threads_spec == THREAD_SPEC__USER)
3166
pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3167
else
3168
pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3169
3170
return 0;
3171
}
3172
3173
static int parse_output_max_size(const struct option *opt,
3174
const char *str, int unset)
3175
{
3176
unsigned long *s = (unsigned long *)opt->value;
3177
static struct parse_tag tags_size[] = {
3178
{ .tag = 'B', .mult = 1 },
3179
{ .tag = 'K', .mult = 1 << 10 },
3180
{ .tag = 'M', .mult = 1 << 20 },
3181
{ .tag = 'G', .mult = 1 << 30 },
3182
{ .tag = 0 },
3183
};
3184
unsigned long val;
3185
3186
if (unset) {
3187
*s = 0;
3188
return 0;
3189
}
3190
3191
val = parse_tag_value(str, tags_size);
3192
if (val != (unsigned long) -1) {
3193
*s = val;
3194
return 0;
3195
}
3196
3197
return -1;
3198
}
3199
3200
static int record__parse_mmap_pages(const struct option *opt,
3201
const char *str,
3202
int unset __maybe_unused)
3203
{
3204
struct record_opts *opts = opt->value;
3205
char *s, *p;
3206
unsigned int mmap_pages;
3207
int ret;
3208
3209
if (!str)
3210
return -EINVAL;
3211
3212
s = strdup(str);
3213
if (!s)
3214
return -ENOMEM;
3215
3216
p = strchr(s, ',');
3217
if (p)
3218
*p = '\0';
3219
3220
if (*s) {
3221
ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3222
if (ret)
3223
goto out_free;
3224
opts->mmap_pages = mmap_pages;
3225
}
3226
3227
if (!p) {
3228
ret = 0;
3229
goto out_free;
3230
}
3231
3232
ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3233
if (ret)
3234
goto out_free;
3235
3236
opts->auxtrace_mmap_pages = mmap_pages;
3237
3238
out_free:
3239
free(s);
3240
return ret;
3241
}
3242
3243
static int record__parse_off_cpu_thresh(const struct option *opt,
3244
const char *str,
3245
int unset __maybe_unused)
3246
{
3247
struct record_opts *opts = opt->value;
3248
char *endptr;
3249
u64 off_cpu_thresh_ms;
3250
3251
if (!str)
3252
return -EINVAL;
3253
3254
off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3255
3256
/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3257
if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3258
return -EINVAL;
3259
else
3260
opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3261
3262
return 0;
3263
}
3264
3265
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3266
{
3267
}
3268
3269
static int parse_control_option(const struct option *opt,
3270
const char *str,
3271
int unset __maybe_unused)
3272
{
3273
struct record_opts *opts = opt->value;
3274
3275
return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3276
}
3277
3278
static void switch_output_size_warn(struct record *rec)
3279
{
3280
u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3281
struct switch_output *s = &rec->switch_output;
3282
3283
wakeup_size /= 2;
3284
3285
if (s->size < wakeup_size) {
3286
char buf[100];
3287
3288
unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3289
pr_warning("WARNING: switch-output data size lower than "
3290
"wakeup kernel buffer size (%s) "
3291
"expect bigger perf.data sizes\n", buf);
3292
}
3293
}
3294
3295
static int switch_output_setup(struct record *rec)
3296
{
3297
struct switch_output *s = &rec->switch_output;
3298
static struct parse_tag tags_size[] = {
3299
{ .tag = 'B', .mult = 1 },
3300
{ .tag = 'K', .mult = 1 << 10 },
3301
{ .tag = 'M', .mult = 1 << 20 },
3302
{ .tag = 'G', .mult = 1 << 30 },
3303
{ .tag = 0 },
3304
};
3305
static struct parse_tag tags_time[] = {
3306
{ .tag = 's', .mult = 1 },
3307
{ .tag = 'm', .mult = 60 },
3308
{ .tag = 'h', .mult = 60*60 },
3309
{ .tag = 'd', .mult = 60*60*24 },
3310
{ .tag = 0 },
3311
};
3312
unsigned long val;
3313
3314
/*
3315
* If we're using --switch-output-events, then we imply its
3316
* --switch-output=signal, as we'll send a SIGUSR2 from the side band
3317
* thread to its parent.
3318
*/
3319
if (rec->switch_output_event_set) {
3320
if (record__threads_enabled(rec)) {
3321
pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3322
return 0;
3323
}
3324
goto do_signal;
3325
}
3326
3327
if (!s->set)
3328
return 0;
3329
3330
if (record__threads_enabled(rec)) {
3331
pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3332
return 0;
3333
}
3334
3335
if (!strcmp(s->str, "signal")) {
3336
do_signal:
3337
s->signal = true;
3338
pr_debug("switch-output with SIGUSR2 signal\n");
3339
goto enabled;
3340
}
3341
3342
val = parse_tag_value(s->str, tags_size);
3343
if (val != (unsigned long) -1) {
3344
s->size = val;
3345
pr_debug("switch-output with %s size threshold\n", s->str);
3346
goto enabled;
3347
}
3348
3349
val = parse_tag_value(s->str, tags_time);
3350
if (val != (unsigned long) -1) {
3351
s->time = val;
3352
pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3353
s->str, s->time);
3354
goto enabled;
3355
}
3356
3357
return -1;
3358
3359
enabled:
3360
rec->timestamp_filename = true;
3361
s->enabled = true;
3362
3363
if (s->size && !rec->opts.no_buffering)
3364
switch_output_size_warn(rec);
3365
3366
return 0;
3367
}
3368
3369
static const char * const __record_usage[] = {
3370
"perf record [<options>] [<command>]",
3371
"perf record [<options>] -- <command> [<options>]",
3372
NULL
3373
};
3374
const char * const *record_usage = __record_usage;
3375
3376
static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3377
struct perf_sample *sample, struct machine *machine)
3378
{
3379
/*
3380
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3381
* no need to add them twice.
3382
*/
3383
if (!(event->header.misc & PERF_RECORD_MISC_USER))
3384
return 0;
3385
return perf_event__process_mmap(tool, event, sample, machine);
3386
}
3387
3388
static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3389
struct perf_sample *sample, struct machine *machine)
3390
{
3391
/*
3392
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3393
* no need to add them twice.
3394
*/
3395
if (!(event->header.misc & PERF_RECORD_MISC_USER))
3396
return 0;
3397
3398
return perf_event__process_mmap2(tool, event, sample, machine);
3399
}
3400
3401
static int process_timestamp_boundary(const struct perf_tool *tool,
3402
union perf_event *event __maybe_unused,
3403
struct perf_sample *sample,
3404
struct machine *machine __maybe_unused)
3405
{
3406
struct record *rec = container_of(tool, struct record, tool);
3407
3408
set_timestamp_boundary(rec, sample->time);
3409
return 0;
3410
}
3411
3412
static int parse_record_synth_option(const struct option *opt,
3413
const char *str,
3414
int unset __maybe_unused)
3415
{
3416
struct record_opts *opts = opt->value;
3417
char *p = strdup(str);
3418
3419
if (p == NULL)
3420
return -1;
3421
3422
opts->synth = parse_synth_opt(p);
3423
free(p);
3424
3425
if (opts->synth < 0) {
3426
pr_err("Invalid synth option: %s\n", str);
3427
return -1;
3428
}
3429
return 0;
3430
}
3431
3432
/*
3433
* XXX Ideally would be local to cmd_record() and passed to a record__new
3434
* because we need to have access to it in record__exit, that is called
3435
* after cmd_record() exits, but since record_options need to be accessible to
3436
* builtin-script, leave it here.
3437
*
3438
* At least we don't ouch it in all the other functions here directly.
3439
*
3440
* Just say no to tons of global variables, sigh.
3441
*/
3442
static struct record record = {
3443
.opts = {
3444
.sample_time = true,
3445
.mmap_pages = UINT_MAX,
3446
.user_freq = UINT_MAX,
3447
.user_interval = ULLONG_MAX,
3448
.freq = 4000,
3449
.target = {
3450
.uses_mmap = true,
3451
.default_per_cpu = true,
3452
},
3453
.mmap_flush = MMAP_FLUSH_DEFAULT,
3454
.nr_threads_synthesize = 1,
3455
.ctl_fd = -1,
3456
.ctl_fd_ack = -1,
3457
.synth = PERF_SYNTH_ALL,
3458
.off_cpu_thresh_ns = OFFCPU_THRESH,
3459
},
3460
.buildid_mmap = true,
3461
};
3462
3463
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3464
"\n\t\t\t\tDefault: fp";
3465
3466
static bool dry_run;
3467
3468
static struct parse_events_option_args parse_events_option_args = {
3469
.evlistp = &record.evlist,
3470
};
3471
3472
static struct parse_events_option_args switch_output_parse_events_option_args = {
3473
.evlistp = &record.sb_evlist,
3474
};
3475
3476
/*
3477
* XXX Will stay a global variable till we fix builtin-script.c to stop messing
3478
* with it and switch to use the library functions in perf_evlist that came
3479
* from builtin-record.c, i.e. use record_opts,
3480
* evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3481
* using pipes, etc.
3482
*/
3483
static struct option __record_options[] = {
3484
OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3485
"event selector. use 'perf list' to list available events",
3486
parse_events_option),
3487
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3488
"event filter", parse_filter),
3489
OPT_BOOLEAN(0, "latency", &record.latency,
3490
"Enable data collection for latency profiling.\n"
3491
"\t\t\t Use perf report --latency for latency-centric profile."),
3492
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3493
NULL, "don't record events from perf itself",
3494
exclude_perf),
3495
OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3496
"record events on existing process id"),
3497
OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3498
"record events on existing thread id"),
3499
OPT_INTEGER('r', "realtime", &record.realtime_prio,
3500
"collect data with this RT SCHED_FIFO priority"),
3501
OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3502
"collect data without buffering"),
3503
OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3504
"collect raw sample records from all opened counters"),
3505
OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3506
"system-wide collection from all CPUs"),
3507
OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3508
"list of cpus to monitor"),
3509
OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3510
OPT_STRING('o', "output", &record.data.path, "file",
3511
"output file name"),
3512
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3513
&record.opts.no_inherit_set,
3514
"child tasks do not inherit counters"),
3515
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3516
"synthesize non-sample events at the end of output"),
3517
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3518
OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3519
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3520
"Fail if the specified frequency can't be used"),
3521
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3522
"profile at this frequency",
3523
record__parse_freq),
3524
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3525
"number of mmap data pages and AUX area tracing mmap pages",
3526
record__parse_mmap_pages),
3527
OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3528
"Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3529
record__mmap_flush_parse),
3530
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3531
NULL, "enables call-graph recording" ,
3532
&record_callchain_opt),
3533
OPT_CALLBACK(0, "call-graph", &record.opts,
3534
"record_mode[,record_size]", record_callchain_help,
3535
&record_parse_callchain_opt),
3536
OPT_INCR('v', "verbose", &verbose,
3537
"be more verbose (show counter open errors, etc)"),
3538
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3539
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3540
"per thread counts"),
3541
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3542
OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3543
"Record the sample physical addresses"),
3544
OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3545
"Record the sampled data address data page size"),
3546
OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3547
"Record the sampled code address (ip) page size"),
3548
OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3549
"Record the data source for memory operations"),
3550
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3551
OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3552
"Record the sample identifier"),
3553
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3554
&record.opts.sample_time_set,
3555
"Record the sample timestamps"),
3556
OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3557
"Record the sample period"),
3558
OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3559
"don't sample"),
3560
OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3561
&record.no_buildid_cache_set,
3562
"do not update the buildid cache"),
3563
OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3564
&record.no_buildid_set,
3565
"do not collect buildids in perf.data"),
3566
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3567
"monitor event in cgroup name only",
3568
parse_cgroups),
3569
OPT_CALLBACK('D', "delay", &record, "ms",
3570
"ms to wait before starting measurement after program start (-1: start with events disabled), "
3571
"or ranges of time to enable events e.g. '-D 10-20,30-40'",
3572
record__parse_event_enable_time),
3573
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3574
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3575
3576
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3577
"branch any", "sample any taken branches",
3578
parse_branch_stack),
3579
3580
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3581
"branch filter mask", "branch stack filter modes",
3582
parse_branch_stack),
3583
OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3584
"sample by weight (on special events only)"),
3585
OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3586
"sample transaction flags (special events only)"),
3587
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3588
"use per-thread mmaps"),
3589
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3590
"sample selected machine registers on interrupt,"
3591
" use '-I?' to list register names", parse_intr_regs),
3592
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3593
"sample selected machine registers in user space,"
3594
" use '--user-regs=?' to list register names", parse_user_regs),
3595
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3596
"Record running/enabled time of read (:S) events"),
3597
OPT_CALLBACK('k', "clockid", &record.opts,
3598
"clockid", "clockid to use for events, see clock_gettime()",
3599
parse_clockid),
3600
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3601
"opts", "AUX area tracing Snapshot Mode", ""),
3602
OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3603
"opts", "sample AUX area", ""),
3604
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3605
"per thread proc mmap processing timeout in ms"),
3606
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3607
"Record namespaces events"),
3608
OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3609
"Record cgroup events"),
3610
OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3611
&record.opts.record_switch_events_set,
3612
"Record context switch events"),
3613
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3614
"Configure all used events to run in kernel space.",
3615
PARSE_OPT_EXCLUSIVE),
3616
OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3617
"Configure all used events to run in user space.",
3618
PARSE_OPT_EXCLUSIVE),
3619
OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3620
"collect kernel callchains"),
3621
OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3622
"collect user callchains"),
3623
OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3624
"file", "vmlinux pathname"),
3625
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3626
"Record build-id of all DSOs regardless of hits"),
3627
OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3628
"Record build-id in mmap events and skip build-id processing."),
3629
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3630
"append timestamp to output filename"),
3631
OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3632
"Record timestamp boundary (time of first/last samples)"),
3633
OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3634
&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3635
"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3636
"signal"),
3637
OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3638
&record.switch_output_event_set, "switch output event",
3639
"switch output event selector. use 'perf list' to list available events",
3640
parse_events_option_new_evlist),
3641
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3642
"Limit number of switch output generated files"),
3643
OPT_BOOLEAN(0, "dry-run", &dry_run,
3644
"Parse options then exit"),
3645
#ifdef HAVE_AIO_SUPPORT
3646
OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3647
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3648
record__aio_parse),
3649
#endif
3650
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3651
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3652
record__parse_affinity),
3653
#ifdef HAVE_ZSTD_SUPPORT
3654
OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3655
"Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3656
record__parse_comp_level),
3657
#endif
3658
OPT_CALLBACK(0, "max-size", &record.output_max_size,
3659
"size", "Limit the maximum size of the output file", parse_output_max_size),
3660
OPT_UINTEGER(0, "num-thread-synthesize",
3661
&record.opts.nr_threads_synthesize,
3662
"number of threads to run for event synthesis"),
3663
#ifdef HAVE_LIBPFM
3664
OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3665
"libpfm4 event selector. use 'perf list' to list available events",
3666
parse_libpfm_events_option),
3667
#endif
3668
OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3669
"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3670
"\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3671
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3672
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3673
parse_control_option),
3674
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3675
"Fine-tune event synthesis: default=all", parse_record_synth_option),
3676
OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3677
&record.debuginfod.set, "debuginfod urls",
3678
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3679
"system"),
3680
OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3681
"write collected trace data into several data files using parallel threads",
3682
record__parse_threads),
3683
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3684
OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3685
"BPF filter action"),
3686
OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3687
"Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3688
record__parse_off_cpu_thresh),
3689
OPT_END()
3690
};
3691
3692
struct option *record_options = __record_options;
3693
3694
static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3695
{
3696
struct perf_cpu cpu;
3697
int idx;
3698
3699
if (cpu_map__is_dummy(cpus))
3700
return 0;
3701
3702
perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3703
/* Return ENODEV is input cpu is greater than max cpu */
3704
if ((unsigned long)cpu.cpu > mask->nbits)
3705
return -ENODEV;
3706
__set_bit(cpu.cpu, mask->bits);
3707
}
3708
3709
return 0;
3710
}
3711
3712
static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3713
{
3714
struct perf_cpu_map *cpus;
3715
3716
cpus = perf_cpu_map__new(mask_spec);
3717
if (!cpus)
3718
return -ENOMEM;
3719
3720
bitmap_zero(mask->bits, mask->nbits);
3721
if (record__mmap_cpu_mask_init(mask, cpus))
3722
return -ENODEV;
3723
3724
perf_cpu_map__put(cpus);
3725
3726
return 0;
3727
}
3728
3729
static void record__free_thread_masks(struct record *rec, int nr_threads)
3730
{
3731
int t;
3732
3733
if (rec->thread_masks)
3734
for (t = 0; t < nr_threads; t++)
3735
record__thread_mask_free(&rec->thread_masks[t]);
3736
3737
zfree(&rec->thread_masks);
3738
}
3739
3740
static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3741
{
3742
int t, ret;
3743
3744
rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3745
if (!rec->thread_masks) {
3746
pr_err("Failed to allocate thread masks\n");
3747
return -ENOMEM;
3748
}
3749
3750
for (t = 0; t < nr_threads; t++) {
3751
ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3752
if (ret) {
3753
pr_err("Failed to allocate thread masks[%d]\n", t);
3754
goto out_free;
3755
}
3756
}
3757
3758
return 0;
3759
3760
out_free:
3761
record__free_thread_masks(rec, nr_threads);
3762
3763
return ret;
3764
}
3765
3766
static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3767
{
3768
int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3769
3770
ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3771
if (ret)
3772
return ret;
3773
3774
rec->nr_threads = nr_cpus;
3775
pr_debug("nr_threads: %d\n", rec->nr_threads);
3776
3777
for (t = 0; t < rec->nr_threads; t++) {
3778
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3779
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3780
if (verbose > 0) {
3781
pr_debug("thread_masks[%d]: ", t);
3782
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3783
pr_debug("thread_masks[%d]: ", t);
3784
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3785
}
3786
}
3787
3788
return 0;
3789
}
3790
3791
static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3792
const char **maps_spec, const char **affinity_spec,
3793
u32 nr_spec)
3794
{
3795
u32 s;
3796
int ret = 0, t = 0;
3797
struct mmap_cpu_mask cpus_mask;
3798
struct thread_mask thread_mask, full_mask, *thread_masks;
3799
3800
ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3801
if (ret) {
3802
pr_err("Failed to allocate CPUs mask\n");
3803
return ret;
3804
}
3805
3806
ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3807
if (ret) {
3808
pr_err("Failed to init cpu mask\n");
3809
goto out_free_cpu_mask;
3810
}
3811
3812
ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3813
if (ret) {
3814
pr_err("Failed to allocate full mask\n");
3815
goto out_free_cpu_mask;
3816
}
3817
3818
ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3819
if (ret) {
3820
pr_err("Failed to allocate thread mask\n");
3821
goto out_free_full_and_cpu_masks;
3822
}
3823
3824
for (s = 0; s < nr_spec; s++) {
3825
ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3826
if (ret) {
3827
pr_err("Failed to initialize maps thread mask\n");
3828
goto out_free;
3829
}
3830
ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3831
if (ret) {
3832
pr_err("Failed to initialize affinity thread mask\n");
3833
goto out_free;
3834
}
3835
3836
/* ignore invalid CPUs but do not allow empty masks */
3837
if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3838
cpus_mask.bits, thread_mask.maps.nbits)) {
3839
pr_err("Empty maps mask: %s\n", maps_spec[s]);
3840
ret = -EINVAL;
3841
goto out_free;
3842
}
3843
if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3844
cpus_mask.bits, thread_mask.affinity.nbits)) {
3845
pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3846
ret = -EINVAL;
3847
goto out_free;
3848
}
3849
3850
/* do not allow intersection with other masks (full_mask) */
3851
if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3852
thread_mask.maps.nbits)) {
3853
pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3854
ret = -EINVAL;
3855
goto out_free;
3856
}
3857
if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3858
thread_mask.affinity.nbits)) {
3859
pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3860
ret = -EINVAL;
3861
goto out_free;
3862
}
3863
3864
bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3865
thread_mask.maps.bits, full_mask.maps.nbits);
3866
bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3867
thread_mask.affinity.bits, full_mask.maps.nbits);
3868
3869
thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3870
if (!thread_masks) {
3871
pr_err("Failed to reallocate thread masks\n");
3872
ret = -ENOMEM;
3873
goto out_free;
3874
}
3875
rec->thread_masks = thread_masks;
3876
rec->thread_masks[t] = thread_mask;
3877
if (verbose > 0) {
3878
pr_debug("thread_masks[%d]: ", t);
3879
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3880
pr_debug("thread_masks[%d]: ", t);
3881
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3882
}
3883
t++;
3884
ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3885
if (ret) {
3886
pr_err("Failed to allocate thread mask\n");
3887
goto out_free_full_and_cpu_masks;
3888
}
3889
}
3890
rec->nr_threads = t;
3891
pr_debug("nr_threads: %d\n", rec->nr_threads);
3892
if (!rec->nr_threads)
3893
ret = -EINVAL;
3894
3895
out_free:
3896
record__thread_mask_free(&thread_mask);
3897
out_free_full_and_cpu_masks:
3898
record__thread_mask_free(&full_mask);
3899
out_free_cpu_mask:
3900
record__mmap_cpu_mask_free(&cpus_mask);
3901
3902
return ret;
3903
}
3904
3905
static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3906
{
3907
int ret;
3908
struct cpu_topology *topo;
3909
3910
topo = cpu_topology__new();
3911
if (!topo) {
3912
pr_err("Failed to allocate CPU topology\n");
3913
return -ENOMEM;
3914
}
3915
3916
ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3917
topo->core_cpus_list, topo->core_cpus_lists);
3918
cpu_topology__delete(topo);
3919
3920
return ret;
3921
}
3922
3923
static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3924
{
3925
int ret;
3926
struct cpu_topology *topo;
3927
3928
topo = cpu_topology__new();
3929
if (!topo) {
3930
pr_err("Failed to allocate CPU topology\n");
3931
return -ENOMEM;
3932
}
3933
3934
ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3935
topo->package_cpus_list, topo->package_cpus_lists);
3936
cpu_topology__delete(topo);
3937
3938
return ret;
3939
}
3940
3941
static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3942
{
3943
u32 s;
3944
int ret;
3945
const char **spec;
3946
struct numa_topology *topo;
3947
3948
topo = numa_topology__new();
3949
if (!topo) {
3950
pr_err("Failed to allocate NUMA topology\n");
3951
return -ENOMEM;
3952
}
3953
3954
spec = zalloc(topo->nr * sizeof(char *));
3955
if (!spec) {
3956
pr_err("Failed to allocate NUMA spec\n");
3957
ret = -ENOMEM;
3958
goto out_delete_topo;
3959
}
3960
for (s = 0; s < topo->nr; s++)
3961
spec[s] = topo->nodes[s].cpus;
3962
3963
ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3964
3965
zfree(&spec);
3966
3967
out_delete_topo:
3968
numa_topology__delete(topo);
3969
3970
return ret;
3971
}
3972
3973
static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3974
{
3975
int t, ret;
3976
u32 s, nr_spec = 0;
3977
char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3978
char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3979
3980
for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3981
spec = strtok_r(user_spec, ":", &spec_ptr);
3982
if (spec == NULL)
3983
break;
3984
pr_debug2("threads_spec[%d]: %s\n", t, spec);
3985
mask = strtok_r(spec, "/", &mask_ptr);
3986
if (mask == NULL)
3987
break;
3988
pr_debug2(" maps mask: %s\n", mask);
3989
tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3990
if (!tmp_spec) {
3991
pr_err("Failed to reallocate maps spec\n");
3992
ret = -ENOMEM;
3993
goto out_free;
3994
}
3995
maps_spec = tmp_spec;
3996
maps_spec[nr_spec] = dup_mask = strdup(mask);
3997
if (!maps_spec[nr_spec]) {
3998
pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3999
ret = -ENOMEM;
4000
goto out_free;
4001
}
4002
mask = strtok_r(NULL, "/", &mask_ptr);
4003
if (mask == NULL) {
4004
pr_err("Invalid thread maps or affinity specs\n");
4005
ret = -EINVAL;
4006
goto out_free;
4007
}
4008
pr_debug2(" affinity mask: %s\n", mask);
4009
tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
4010
if (!tmp_spec) {
4011
pr_err("Failed to reallocate affinity spec\n");
4012
ret = -ENOMEM;
4013
goto out_free;
4014
}
4015
affinity_spec = tmp_spec;
4016
affinity_spec[nr_spec] = strdup(mask);
4017
if (!affinity_spec[nr_spec]) {
4018
pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
4019
ret = -ENOMEM;
4020
goto out_free;
4021
}
4022
dup_mask = NULL;
4023
nr_spec++;
4024
}
4025
4026
ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
4027
(const char **)affinity_spec, nr_spec);
4028
4029
out_free:
4030
free(dup_mask);
4031
for (s = 0; s < nr_spec; s++) {
4032
if (maps_spec)
4033
free(maps_spec[s]);
4034
if (affinity_spec)
4035
free(affinity_spec[s]);
4036
}
4037
free(affinity_spec);
4038
free(maps_spec);
4039
4040
return ret;
4041
}
4042
4043
static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4044
{
4045
int ret;
4046
4047
ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4048
if (ret)
4049
return ret;
4050
4051
if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4052
return -ENODEV;
4053
4054
rec->nr_threads = 1;
4055
4056
return 0;
4057
}
4058
4059
static int record__init_thread_masks(struct record *rec)
4060
{
4061
int ret = 0;
4062
struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4063
4064
if (!record__threads_enabled(rec))
4065
return record__init_thread_default_masks(rec, cpus);
4066
4067
if (evlist__per_thread(rec->evlist)) {
4068
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4069
return -EINVAL;
4070
}
4071
4072
switch (rec->opts.threads_spec) {
4073
case THREAD_SPEC__CPU:
4074
ret = record__init_thread_cpu_masks(rec, cpus);
4075
break;
4076
case THREAD_SPEC__CORE:
4077
ret = record__init_thread_core_masks(rec, cpus);
4078
break;
4079
case THREAD_SPEC__PACKAGE:
4080
ret = record__init_thread_package_masks(rec, cpus);
4081
break;
4082
case THREAD_SPEC__NUMA:
4083
ret = record__init_thread_numa_masks(rec, cpus);
4084
break;
4085
case THREAD_SPEC__USER:
4086
ret = record__init_thread_user_masks(rec, cpus);
4087
break;
4088
default:
4089
break;
4090
}
4091
4092
return ret;
4093
}
4094
4095
int cmd_record(int argc, const char **argv)
4096
{
4097
int err;
4098
struct record *rec = &record;
4099
char errbuf[BUFSIZ];
4100
4101
setlocale(LC_ALL, "");
4102
4103
#ifndef HAVE_BPF_SKEL
4104
# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4105
set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4106
# undef set_nobuild
4107
#endif
4108
4109
/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4110
symbol_conf.lazy_load_kernel_maps = true;
4111
rec->opts.affinity = PERF_AFFINITY_SYS;
4112
4113
rec->evlist = evlist__new();
4114
if (rec->evlist == NULL)
4115
return -ENOMEM;
4116
4117
err = perf_config(perf_record_config, rec);
4118
if (err)
4119
return err;
4120
4121
argc = parse_options(argc, argv, record_options, record_usage,
4122
PARSE_OPT_STOP_AT_NON_OPTION);
4123
if (quiet)
4124
perf_quiet_option();
4125
4126
err = symbol__validate_sym_arguments();
4127
if (err)
4128
return err;
4129
4130
perf_debuginfod_setup(&record.debuginfod);
4131
4132
/* Make system wide (-a) the default target. */
4133
if (!argc && target__none(&rec->opts.target))
4134
rec->opts.target.system_wide = true;
4135
4136
if (nr_cgroups && !rec->opts.target.system_wide) {
4137
usage_with_options_msg(record_usage, record_options,
4138
"cgroup monitoring only available in system-wide mode");
4139
4140
}
4141
4142
if (record.latency) {
4143
/*
4144
* There is no fundamental reason why latency profiling
4145
* can't work for system-wide mode, but exact semantics
4146
* and details are to be defined.
4147
* See the following thread for details:
4148
* https://lore.kernel.org/all/[email protected]/
4149
*/
4150
if (record.opts.target.system_wide) {
4151
pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4152
err = -EINVAL;
4153
goto out_opts;
4154
}
4155
record.opts.record_switch_events = true;
4156
}
4157
4158
if (rec->buildid_mmap && !perf_can_record_build_id()) {
4159
pr_warning("Missing support for build id in kernel mmap events.\n"
4160
"Disable this warning with --no-buildid-mmap\n");
4161
rec->buildid_mmap = false;
4162
}
4163
4164
if (rec->buildid_mmap) {
4165
/* Enable perf_event_attr::build_id bit. */
4166
rec->opts.build_id = true;
4167
/* Disable build-ID table in the header. */
4168
rec->no_buildid = true;
4169
} else {
4170
pr_debug("Disabling build id in synthesized mmap2 events.\n");
4171
symbol_conf.no_buildid_mmap2 = true;
4172
}
4173
4174
if (rec->no_buildid_set && rec->no_buildid) {
4175
/* -B implies -N for historic reasons. */
4176
rec->no_buildid_cache = true;
4177
}
4178
4179
if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4180
pr_err("Kernel has no cgroup sampling support.\n");
4181
err = -EINVAL;
4182
goto out_opts;
4183
}
4184
4185
if (rec->opts.kcore)
4186
rec->opts.text_poke = true;
4187
4188
if (rec->opts.kcore || record__threads_enabled(rec))
4189
rec->data.is_dir = true;
4190
4191
if (record__threads_enabled(rec)) {
4192
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4193
pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4194
goto out_opts;
4195
}
4196
if (record__aio_enabled(rec)) {
4197
pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4198
goto out_opts;
4199
}
4200
}
4201
4202
if (rec->opts.comp_level != 0) {
4203
pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4204
rec->no_buildid = true;
4205
}
4206
4207
if (rec->opts.record_switch_events &&
4208
!perf_can_record_switch_events()) {
4209
ui__error("kernel does not support recording context switch events\n");
4210
parse_options_usage(record_usage, record_options, "switch-events", 0);
4211
err = -EINVAL;
4212
goto out_opts;
4213
}
4214
4215
if (switch_output_setup(rec)) {
4216
parse_options_usage(record_usage, record_options, "switch-output", 0);
4217
err = -EINVAL;
4218
goto out_opts;
4219
}
4220
4221
if (rec->switch_output.time) {
4222
signal(SIGALRM, alarm_sig_handler);
4223
alarm(rec->switch_output.time);
4224
}
4225
4226
if (rec->switch_output.num_files) {
4227
rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4228
sizeof(char *));
4229
if (!rec->switch_output.filenames) {
4230
err = -EINVAL;
4231
goto out_opts;
4232
}
4233
}
4234
4235
if (rec->timestamp_filename && record__threads_enabled(rec)) {
4236
rec->timestamp_filename = false;
4237
pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4238
}
4239
4240
if (rec->filter_action) {
4241
if (!strcmp(rec->filter_action, "pin"))
4242
err = perf_bpf_filter__pin();
4243
else if (!strcmp(rec->filter_action, "unpin"))
4244
err = perf_bpf_filter__unpin();
4245
else {
4246
pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4247
err = -EINVAL;
4248
}
4249
goto out_opts;
4250
}
4251
4252
/* For backward compatibility, -d implies --mem-info */
4253
if (rec->opts.sample_address)
4254
rec->opts.sample_data_src = true;
4255
4256
/*
4257
* Allow aliases to facilitate the lookup of symbols for address
4258
* filters. Refer to auxtrace_parse_filters().
4259
*/
4260
symbol_conf.allow_aliases = true;
4261
4262
symbol__init(NULL);
4263
4264
err = record__auxtrace_init(rec);
4265
if (err)
4266
goto out;
4267
4268
if (dry_run)
4269
goto out;
4270
4271
err = -ENOMEM;
4272
4273
if (rec->no_buildid_cache) {
4274
disable_buildid_cache();
4275
} else if (rec->switch_output.enabled) {
4276
/*
4277
* In 'perf record --switch-output', disable buildid
4278
* generation by default to reduce data file switching
4279
* overhead. Still generate buildid if they are required
4280
* explicitly using
4281
*
4282
* perf record --switch-output --no-no-buildid \
4283
* --no-no-buildid-cache
4284
*
4285
* Following code equals to:
4286
*
4287
* if ((rec->no_buildid || !rec->no_buildid_set) &&
4288
* (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4289
* disable_buildid_cache();
4290
*/
4291
bool disable = true;
4292
4293
if (rec->no_buildid_set && !rec->no_buildid)
4294
disable = false;
4295
if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4296
disable = false;
4297
if (disable) {
4298
rec->no_buildid = true;
4299
rec->no_buildid_cache = true;
4300
disable_buildid_cache();
4301
}
4302
}
4303
4304
if (record.opts.overwrite)
4305
record.opts.tail_synthesize = true;
4306
4307
if (rec->evlist->core.nr_entries == 0) {
4308
struct evlist *def_evlist = evlist__new_default();
4309
4310
if (!def_evlist)
4311
goto out;
4312
4313
evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
4314
evlist__delete(def_evlist);
4315
}
4316
4317
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4318
rec->opts.no_inherit = true;
4319
4320
err = target__validate(&rec->opts.target);
4321
if (err) {
4322
target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4323
ui__warning("%s\n", errbuf);
4324
}
4325
4326
if (rec->uid_str) {
4327
uid_t uid = parse_uid(rec->uid_str);
4328
4329
if (uid == UINT_MAX) {
4330
ui__error("Invalid User: %s", rec->uid_str);
4331
err = -EINVAL;
4332
goto out;
4333
}
4334
err = parse_uid_filter(rec->evlist, uid);
4335
if (err)
4336
goto out;
4337
4338
/* User ID filtering implies system wide. */
4339
rec->opts.target.system_wide = true;
4340
}
4341
4342
/* Enable ignoring missing threads when -p option is defined. */
4343
rec->opts.ignore_missing_thread = rec->opts.target.pid;
4344
4345
evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4346
4347
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4348
arch__add_leaf_frame_record_opts(&rec->opts);
4349
4350
err = -ENOMEM;
4351
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4352
if (rec->opts.target.pid != NULL) {
4353
pr_err("Couldn't create thread/CPU maps: %s\n",
4354
errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4355
goto out;
4356
}
4357
else
4358
usage_with_options(record_usage, record_options);
4359
}
4360
4361
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4362
if (err)
4363
goto out;
4364
4365
/*
4366
* We take all buildids when the file contains
4367
* AUX area tracing data because we do not decode the
4368
* trace because it would take too long.
4369
*/
4370
if (rec->opts.full_auxtrace)
4371
rec->buildid_all = true;
4372
4373
if (rec->opts.text_poke) {
4374
err = record__config_text_poke(rec->evlist);
4375
if (err) {
4376
pr_err("record__config_text_poke failed, error %d\n", err);
4377
goto out;
4378
}
4379
}
4380
4381
if (rec->off_cpu) {
4382
err = record__config_off_cpu(rec);
4383
if (err) {
4384
pr_err("record__config_off_cpu failed, error %d\n", err);
4385
goto out;
4386
}
4387
}
4388
4389
if (record_opts__config(&rec->opts)) {
4390
err = -EINVAL;
4391
goto out;
4392
}
4393
4394
err = record__config_tracking_events(rec);
4395
if (err) {
4396
pr_err("record__config_tracking_events failed, error %d\n", err);
4397
goto out;
4398
}
4399
4400
err = record__init_thread_masks(rec);
4401
if (err) {
4402
pr_err("Failed to initialize parallel data streaming masks\n");
4403
goto out;
4404
}
4405
4406
if (rec->opts.nr_cblocks > nr_cblocks_max)
4407
rec->opts.nr_cblocks = nr_cblocks_max;
4408
pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4409
4410
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4411
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4412
4413
if (rec->opts.comp_level > comp_level_max)
4414
rec->opts.comp_level = comp_level_max;
4415
pr_debug("comp level: %d\n", rec->opts.comp_level);
4416
4417
err = __cmd_record(&record, argc, argv);
4418
out:
4419
record__free_thread_masks(rec, rec->nr_threads);
4420
rec->nr_threads = 0;
4421
symbol__exit();
4422
auxtrace_record__free(rec->itr);
4423
out_opts:
4424
evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4425
evlist__delete(rec->evlist);
4426
return err;
4427
}
4428
4429
static void snapshot_sig_handler(int sig __maybe_unused)
4430
{
4431
struct record *rec = &record;
4432
4433
hit_auxtrace_snapshot_trigger(rec);
4434
4435
if (switch_output_signal(rec))
4436
trigger_hit(&switch_output_trigger);
4437
}
4438
4439
static void alarm_sig_handler(int sig __maybe_unused)
4440
{
4441
struct record *rec = &record;
4442
4443
if (switch_output_time(rec))
4444
trigger_hit(&switch_output_trigger);
4445
}
4446
4447