Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/builtin-record.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* builtin-record.c
4
*
5
* Builtin record command: Record the profile of a workload
6
* (or a CPU, or a PID) into the perf.data output file - for
7
* later analysis via perf report.
8
*/
9
#include "builtin.h"
10
11
#include "util/build-id.h"
12
#include <subcmd/parse-options.h>
13
#include <internal/xyarray.h>
14
#include "util/parse-events.h"
15
#include "util/config.h"
16
17
#include "util/callchain.h"
18
#include "util/cgroup.h"
19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/mmap.h"
25
#include "util/mutex.h"
26
#include "util/target.h"
27
#include "util/session.h"
28
#include "util/tool.h"
29
#include "util/stat.h"
30
#include "util/symbol.h"
31
#include "util/record.h"
32
#include "util/cpumap.h"
33
#include "util/thread_map.h"
34
#include "util/data.h"
35
#include "util/perf_regs.h"
36
#include "util/auxtrace.h"
37
#include "util/tsc.h"
38
#include "util/parse-branch-options.h"
39
#include "util/parse-regs-options.h"
40
#include "util/perf_api_probe.h"
41
#include "util/trigger.h"
42
#include "util/perf-hooks.h"
43
#include "util/cpu-set-sched.h"
44
#include "util/synthetic-events.h"
45
#include "util/time-utils.h"
46
#include "util/units.h"
47
#include "util/bpf-event.h"
48
#include "util/util.h"
49
#include "util/pfm.h"
50
#include "util/pmu.h"
51
#include "util/pmus.h"
52
#include "util/clockid.h"
53
#include "util/off_cpu.h"
54
#include "util/bpf-filter.h"
55
#include "util/strbuf.h"
56
#include "asm/bug.h"
57
#include "perf.h"
58
#include "cputopo.h"
59
60
#include <errno.h>
61
#include <inttypes.h>
62
#include <locale.h>
63
#include <poll.h>
64
#include <pthread.h>
65
#include <unistd.h>
66
#ifndef HAVE_GETTID
67
#include <syscall.h>
68
#endif
69
#include <sched.h>
70
#include <signal.h>
71
#ifdef HAVE_EVENTFD_SUPPORT
72
#include <sys/eventfd.h>
73
#endif
74
#include <sys/mman.h>
75
#include <sys/wait.h>
76
#include <sys/types.h>
77
#include <sys/stat.h>
78
#include <fcntl.h>
79
#include <linux/err.h>
80
#include <linux/string.h>
81
#include <linux/time64.h>
82
#include <linux/zalloc.h>
83
#include <linux/bitmap.h>
84
#include <sys/time.h>
85
86
struct switch_output {
87
bool enabled;
88
bool signal;
89
unsigned long size;
90
unsigned long time;
91
const char *str;
92
bool set;
93
char **filenames;
94
int num_files;
95
int cur_file;
96
};
97
98
struct thread_mask {
99
struct mmap_cpu_mask maps;
100
struct mmap_cpu_mask affinity;
101
};
102
103
struct record_thread {
104
pid_t tid;
105
struct thread_mask *mask;
106
struct {
107
int msg[2];
108
int ack[2];
109
} pipes;
110
struct fdarray pollfd;
111
int ctlfd_pos;
112
int nr_mmaps;
113
struct mmap **maps;
114
struct mmap **overwrite_maps;
115
struct record *rec;
116
unsigned long long samples;
117
unsigned long waking;
118
u64 bytes_written;
119
u64 bytes_transferred;
120
u64 bytes_compressed;
121
};
122
123
static __thread struct record_thread *thread;
124
125
enum thread_msg {
126
THREAD_MSG__UNDEFINED = 0,
127
THREAD_MSG__READY,
128
THREAD_MSG__MAX,
129
};
130
131
static const char *thread_msg_tags[THREAD_MSG__MAX] = {
132
"UNDEFINED", "READY"
133
};
134
135
enum thread_spec {
136
THREAD_SPEC__UNDEFINED = 0,
137
THREAD_SPEC__CPU,
138
THREAD_SPEC__CORE,
139
THREAD_SPEC__PACKAGE,
140
THREAD_SPEC__NUMA,
141
THREAD_SPEC__USER,
142
THREAD_SPEC__MAX,
143
};
144
145
static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
146
"undefined", "cpu", "core", "package", "numa", "user"
147
};
148
149
struct pollfd_index_map {
150
int evlist_pollfd_index;
151
int thread_pollfd_index;
152
};
153
154
struct record {
155
struct perf_tool tool;
156
struct record_opts opts;
157
u64 bytes_written;
158
u64 thread_bytes_written;
159
struct perf_data data;
160
struct auxtrace_record *itr;
161
struct evlist *evlist;
162
struct perf_session *session;
163
struct evlist *sb_evlist;
164
pthread_t thread_id;
165
int realtime_prio;
166
bool latency;
167
bool switch_output_event_set;
168
bool no_buildid;
169
bool no_buildid_set;
170
bool no_buildid_cache;
171
bool no_buildid_cache_set;
172
bool buildid_all;
173
bool buildid_mmap;
174
bool buildid_mmap_set;
175
bool timestamp_filename;
176
bool timestamp_boundary;
177
bool off_cpu;
178
const char *filter_action;
179
const char *uid_str;
180
struct switch_output switch_output;
181
unsigned long long samples;
182
unsigned long output_max_size; /* = 0: unlimited */
183
struct perf_debuginfod debuginfod;
184
int nr_threads;
185
struct thread_mask *thread_masks;
186
struct record_thread *thread_data;
187
struct pollfd_index_map *index_map;
188
size_t index_map_sz;
189
size_t index_map_cnt;
190
};
191
192
static volatile int done;
193
194
static volatile int auxtrace_record__snapshot_started;
195
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196
static DEFINE_TRIGGER(switch_output_trigger);
197
198
static const char *affinity_tags[PERF_AFFINITY_MAX] = {
199
"SYS", "NODE", "CPU"
200
};
201
202
static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
203
struct perf_sample *sample, struct machine *machine);
204
static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
205
struct perf_sample *sample, struct machine *machine);
206
static int process_timestamp_boundary(const struct perf_tool *tool,
207
union perf_event *event,
208
struct perf_sample *sample,
209
struct machine *machine);
210
211
#ifndef HAVE_GETTID
212
static inline pid_t gettid(void)
213
{
214
return (pid_t)syscall(__NR_gettid);
215
}
216
#endif
217
218
static int record__threads_enabled(struct record *rec)
219
{
220
return rec->opts.threads_spec;
221
}
222
223
static bool switch_output_signal(struct record *rec)
224
{
225
return rec->switch_output.signal &&
226
trigger_is_ready(&switch_output_trigger);
227
}
228
229
static bool switch_output_size(struct record *rec)
230
{
231
return rec->switch_output.size &&
232
trigger_is_ready(&switch_output_trigger) &&
233
(rec->bytes_written >= rec->switch_output.size);
234
}
235
236
static bool switch_output_time(struct record *rec)
237
{
238
return rec->switch_output.time &&
239
trigger_is_ready(&switch_output_trigger);
240
}
241
242
static u64 record__bytes_written(struct record *rec)
243
{
244
return rec->bytes_written + rec->thread_bytes_written;
245
}
246
247
static bool record__output_max_size_exceeded(struct record *rec)
248
{
249
return rec->output_max_size &&
250
(record__bytes_written(rec) >= rec->output_max_size);
251
}
252
253
static int record__write(struct record *rec, struct mmap *map __maybe_unused,
254
void *bf, size_t size)
255
{
256
struct perf_data_file *file = &rec->session->data->file;
257
258
if (map && map->file)
259
file = map->file;
260
261
if (perf_data_file__write(file, bf, size) < 0) {
262
pr_err("failed to write perf data, error: %m\n");
263
return -1;
264
}
265
266
if (map && map->file) {
267
thread->bytes_written += size;
268
rec->thread_bytes_written += size;
269
} else {
270
rec->bytes_written += size;
271
}
272
273
if (record__output_max_size_exceeded(rec) && !done) {
274
fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
275
" stopping session ]\n",
276
record__bytes_written(rec) >> 10);
277
done = 1;
278
}
279
280
if (switch_output_size(rec))
281
trigger_hit(&switch_output_trigger);
282
283
return 0;
284
}
285
286
static int record__aio_enabled(struct record *rec);
287
static int record__comp_enabled(struct record *rec);
288
static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
289
void *dst, size_t dst_size, void *src, size_t src_size);
290
291
#ifdef HAVE_AIO_SUPPORT
292
static int record__aio_write(struct aiocb *cblock, int trace_fd,
293
void *buf, size_t size, off_t off)
294
{
295
int rc;
296
297
cblock->aio_fildes = trace_fd;
298
cblock->aio_buf = buf;
299
cblock->aio_nbytes = size;
300
cblock->aio_offset = off;
301
cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
302
303
do {
304
rc = aio_write(cblock);
305
if (rc == 0) {
306
break;
307
} else if (errno != EAGAIN) {
308
cblock->aio_fildes = -1;
309
pr_err("failed to queue perf data, error: %m\n");
310
break;
311
}
312
} while (1);
313
314
return rc;
315
}
316
317
static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
318
{
319
void *rem_buf;
320
off_t rem_off;
321
size_t rem_size;
322
int rc, aio_errno;
323
ssize_t aio_ret, written;
324
325
aio_errno = aio_error(cblock);
326
if (aio_errno == EINPROGRESS)
327
return 0;
328
329
written = aio_ret = aio_return(cblock);
330
if (aio_ret < 0) {
331
if (aio_errno != EINTR)
332
pr_err("failed to write perf data, error: %m\n");
333
written = 0;
334
}
335
336
rem_size = cblock->aio_nbytes - written;
337
338
if (rem_size == 0) {
339
cblock->aio_fildes = -1;
340
/*
341
* md->refcount is incremented in record__aio_pushfn() for
342
* every aio write request started in record__aio_push() so
343
* decrement it because the request is now complete.
344
*/
345
perf_mmap__put(&md->core);
346
rc = 1;
347
} else {
348
/*
349
* aio write request may require restart with the
350
* remainder if the kernel didn't write whole
351
* chunk at once.
352
*/
353
rem_off = cblock->aio_offset + written;
354
rem_buf = (void *)(cblock->aio_buf + written);
355
record__aio_write(cblock, cblock->aio_fildes,
356
rem_buf, rem_size, rem_off);
357
rc = 0;
358
}
359
360
return rc;
361
}
362
363
static int record__aio_sync(struct mmap *md, bool sync_all)
364
{
365
struct aiocb **aiocb = md->aio.aiocb;
366
struct aiocb *cblocks = md->aio.cblocks;
367
struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
368
int i, do_suspend;
369
370
do {
371
do_suspend = 0;
372
for (i = 0; i < md->aio.nr_cblocks; ++i) {
373
if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
374
if (sync_all)
375
aiocb[i] = NULL;
376
else
377
return i;
378
} else {
379
/*
380
* Started aio write is not complete yet
381
* so it has to be waited before the
382
* next allocation.
383
*/
384
aiocb[i] = &cblocks[i];
385
do_suspend = 1;
386
}
387
}
388
if (!do_suspend)
389
return -1;
390
391
while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
392
if (!(errno == EAGAIN || errno == EINTR))
393
pr_err("failed to sync perf data, error: %m\n");
394
}
395
} while (1);
396
}
397
398
struct record_aio {
399
struct record *rec;
400
void *data;
401
size_t size;
402
};
403
404
static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
405
{
406
struct record_aio *aio = to;
407
408
/*
409
* map->core.base data pointed by buf is copied into free map->aio.data[] buffer
410
* to release space in the kernel buffer as fast as possible, calling
411
* perf_mmap__consume() from perf_mmap__push() function.
412
*
413
* That lets the kernel to proceed with storing more profiling data into
414
* the kernel buffer earlier than other per-cpu kernel buffers are handled.
415
*
416
* Coping can be done in two steps in case the chunk of profiling data
417
* crosses the upper bound of the kernel buffer. In this case we first move
418
* part of data from map->start till the upper bound and then the remainder
419
* from the beginning of the kernel buffer till the end of the data chunk.
420
*/
421
422
if (record__comp_enabled(aio->rec)) {
423
ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
424
mmap__mmap_len(map) - aio->size,
425
buf, size);
426
if (compressed < 0)
427
return (int)compressed;
428
429
size = compressed;
430
} else {
431
memcpy(aio->data + aio->size, buf, size);
432
}
433
434
if (!aio->size) {
435
/*
436
* Increment map->refcount to guard map->aio.data[] buffer
437
* from premature deallocation because map object can be
438
* released earlier than aio write request started on
439
* map->aio.data[] buffer is complete.
440
*
441
* perf_mmap__put() is done at record__aio_complete()
442
* after started aio request completion or at record__aio_push()
443
* if the request failed to start.
444
*/
445
perf_mmap__get(&map->core);
446
}
447
448
aio->size += size;
449
450
return size;
451
}
452
453
static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
454
{
455
int ret, idx;
456
int trace_fd = rec->session->data->file.fd;
457
struct record_aio aio = { .rec = rec, .size = 0 };
458
459
/*
460
* Call record__aio_sync() to wait till map->aio.data[] buffer
461
* becomes available after previous aio write operation.
462
*/
463
464
idx = record__aio_sync(map, false);
465
aio.data = map->aio.data[idx];
466
ret = perf_mmap__push(map, &aio, record__aio_pushfn);
467
if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
468
return ret;
469
470
rec->samples++;
471
ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
472
if (!ret) {
473
*off += aio.size;
474
rec->bytes_written += aio.size;
475
if (switch_output_size(rec))
476
trigger_hit(&switch_output_trigger);
477
} else {
478
/*
479
* Decrement map->refcount incremented in record__aio_pushfn()
480
* back if record__aio_write() operation failed to start, otherwise
481
* map->refcount is decremented in record__aio_complete() after
482
* aio write operation finishes successfully.
483
*/
484
perf_mmap__put(&map->core);
485
}
486
487
return ret;
488
}
489
490
static off_t record__aio_get_pos(int trace_fd)
491
{
492
return lseek(trace_fd, 0, SEEK_CUR);
493
}
494
495
static void record__aio_set_pos(int trace_fd, off_t pos)
496
{
497
lseek(trace_fd, pos, SEEK_SET);
498
}
499
500
static void record__aio_mmap_read_sync(struct record *rec)
501
{
502
int i;
503
struct evlist *evlist = rec->evlist;
504
struct mmap *maps = evlist->mmap;
505
506
if (!record__aio_enabled(rec))
507
return;
508
509
for (i = 0; i < evlist->core.nr_mmaps; i++) {
510
struct mmap *map = &maps[i];
511
512
if (map->core.base)
513
record__aio_sync(map, true);
514
}
515
}
516
517
static int nr_cblocks_default = 1;
518
static int nr_cblocks_max = 4;
519
520
static int record__aio_parse(const struct option *opt,
521
const char *str,
522
int unset)
523
{
524
struct record_opts *opts = (struct record_opts *)opt->value;
525
526
if (unset) {
527
opts->nr_cblocks = 0;
528
} else {
529
if (str)
530
opts->nr_cblocks = strtol(str, NULL, 0);
531
if (!opts->nr_cblocks)
532
opts->nr_cblocks = nr_cblocks_default;
533
}
534
535
return 0;
536
}
537
#else /* HAVE_AIO_SUPPORT */
538
static int nr_cblocks_max = 0;
539
540
static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
541
off_t *off __maybe_unused)
542
{
543
return -1;
544
}
545
546
static off_t record__aio_get_pos(int trace_fd __maybe_unused)
547
{
548
return -1;
549
}
550
551
static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
552
{
553
}
554
555
static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
556
{
557
}
558
#endif
559
560
static int record__aio_enabled(struct record *rec)
561
{
562
return rec->opts.nr_cblocks > 0;
563
}
564
565
#define MMAP_FLUSH_DEFAULT 1
566
static int record__mmap_flush_parse(const struct option *opt,
567
const char *str,
568
int unset)
569
{
570
int flush_max;
571
struct record_opts *opts = (struct record_opts *)opt->value;
572
static struct parse_tag tags[] = {
573
{ .tag = 'B', .mult = 1 },
574
{ .tag = 'K', .mult = 1 << 10 },
575
{ .tag = 'M', .mult = 1 << 20 },
576
{ .tag = 'G', .mult = 1 << 30 },
577
{ .tag = 0 },
578
};
579
580
if (unset)
581
return 0;
582
583
if (str) {
584
opts->mmap_flush = parse_tag_value(str, tags);
585
if (opts->mmap_flush == (int)-1)
586
opts->mmap_flush = strtol(str, NULL, 0);
587
}
588
589
if (!opts->mmap_flush)
590
opts->mmap_flush = MMAP_FLUSH_DEFAULT;
591
592
flush_max = evlist__mmap_size(opts->mmap_pages);
593
flush_max /= 4;
594
if (opts->mmap_flush > flush_max)
595
opts->mmap_flush = flush_max;
596
597
return 0;
598
}
599
600
#ifdef HAVE_ZSTD_SUPPORT
601
static unsigned int comp_level_default = 1;
602
603
static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
604
{
605
struct record_opts *opts = opt->value;
606
607
if (unset) {
608
opts->comp_level = 0;
609
} else {
610
if (str)
611
opts->comp_level = strtol(str, NULL, 0);
612
if (!opts->comp_level)
613
opts->comp_level = comp_level_default;
614
}
615
616
return 0;
617
}
618
#endif
619
static unsigned int comp_level_max = 22;
620
621
static int record__comp_enabled(struct record *rec)
622
{
623
return rec->opts.comp_level > 0;
624
}
625
626
static int process_synthesized_event(const struct perf_tool *tool,
627
union perf_event *event,
628
struct perf_sample *sample __maybe_unused,
629
struct machine *machine __maybe_unused)
630
{
631
struct record *rec = container_of(tool, struct record, tool);
632
return record__write(rec, NULL, event, event->header.size);
633
}
634
635
static struct mutex synth_lock;
636
637
static int process_locked_synthesized_event(const struct perf_tool *tool,
638
union perf_event *event,
639
struct perf_sample *sample __maybe_unused,
640
struct machine *machine __maybe_unused)
641
{
642
int ret;
643
644
mutex_lock(&synth_lock);
645
ret = process_synthesized_event(tool, event, sample, machine);
646
mutex_unlock(&synth_lock);
647
return ret;
648
}
649
650
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
651
{
652
struct record *rec = to;
653
654
if (record__comp_enabled(rec)) {
655
struct perf_record_compressed2 *event = map->data;
656
size_t padding = 0;
657
u8 pad[8] = {0};
658
ssize_t compressed = zstd_compress(rec->session, map, map->data,
659
mmap__mmap_len(map), bf, size);
660
661
if (compressed < 0)
662
return (int)compressed;
663
664
bf = event;
665
thread->samples++;
666
667
/*
668
* The record from `zstd_compress` is not 8 bytes aligned, which would cause asan
669
* error. We make it aligned here.
670
*/
671
event->data_size = compressed - sizeof(struct perf_record_compressed2);
672
event->header.size = PERF_ALIGN(compressed, sizeof(u64));
673
padding = event->header.size - compressed;
674
return record__write(rec, map, bf, compressed) ||
675
record__write(rec, map, &pad, padding);
676
}
677
678
thread->samples++;
679
return record__write(rec, map, bf, size);
680
}
681
682
static volatile sig_atomic_t signr = -1;
683
static volatile sig_atomic_t child_finished;
684
#ifdef HAVE_EVENTFD_SUPPORT
685
static volatile sig_atomic_t done_fd = -1;
686
#endif
687
688
static void sig_handler(int sig)
689
{
690
if (sig == SIGCHLD)
691
child_finished = 1;
692
else
693
signr = sig;
694
695
done = 1;
696
#ifdef HAVE_EVENTFD_SUPPORT
697
if (done_fd >= 0) {
698
u64 tmp = 1;
699
int orig_errno = errno;
700
701
/*
702
* It is possible for this signal handler to run after done is
703
* checked in the main loop, but before the perf counter fds are
704
* polled. If this happens, the poll() will continue to wait
705
* even though done is set, and will only break out if either
706
* another signal is received, or the counters are ready for
707
* read. To ensure the poll() doesn't sleep when done is set,
708
* use an eventfd (done_fd) to wake up the poll().
709
*/
710
if (write(done_fd, &tmp, sizeof(tmp)) < 0)
711
pr_err("failed to signal wakeup fd, error: %m\n");
712
713
errno = orig_errno;
714
}
715
#endif // HAVE_EVENTFD_SUPPORT
716
}
717
718
static void sigsegv_handler(int sig)
719
{
720
perf_hooks__recover();
721
sighandler_dump_stack(sig);
722
}
723
724
static void record__sig_exit(void)
725
{
726
if (signr == -1)
727
return;
728
729
signal(signr, SIG_DFL);
730
raise(signr);
731
}
732
733
#ifdef HAVE_AUXTRACE_SUPPORT
734
735
static int record__process_auxtrace(const struct perf_tool *tool,
736
struct mmap *map,
737
union perf_event *event, void *data1,
738
size_t len1, void *data2, size_t len2)
739
{
740
struct record *rec = container_of(tool, struct record, tool);
741
struct perf_data *data = &rec->data;
742
size_t padding;
743
u8 pad[8] = {0};
744
745
if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
746
off_t file_offset;
747
int fd = perf_data__fd(data);
748
int err;
749
750
file_offset = lseek(fd, 0, SEEK_CUR);
751
if (file_offset == -1)
752
return -1;
753
err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
754
event, file_offset);
755
if (err)
756
return err;
757
}
758
759
/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
760
padding = (len1 + len2) & 7;
761
if (padding)
762
padding = 8 - padding;
763
764
record__write(rec, map, event, event->header.size);
765
record__write(rec, map, data1, len1);
766
if (len2)
767
record__write(rec, map, data2, len2);
768
record__write(rec, map, &pad, padding);
769
770
return 0;
771
}
772
773
static int record__auxtrace_mmap_read(struct record *rec,
774
struct mmap *map)
775
{
776
int ret;
777
778
ret = auxtrace_mmap__read(map, rec->itr,
779
perf_session__env(rec->session),
780
&rec->tool,
781
record__process_auxtrace);
782
if (ret < 0)
783
return ret;
784
785
if (ret)
786
rec->samples++;
787
788
return 0;
789
}
790
791
static int record__auxtrace_mmap_read_snapshot(struct record *rec,
792
struct mmap *map)
793
{
794
int ret;
795
796
ret = auxtrace_mmap__read_snapshot(map, rec->itr,
797
perf_session__env(rec->session),
798
&rec->tool,
799
record__process_auxtrace,
800
rec->opts.auxtrace_snapshot_size);
801
if (ret < 0)
802
return ret;
803
804
if (ret)
805
rec->samples++;
806
807
return 0;
808
}
809
810
static int record__auxtrace_read_snapshot_all(struct record *rec)
811
{
812
int i;
813
int rc = 0;
814
815
for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
816
struct mmap *map = &rec->evlist->mmap[i];
817
818
if (!map->auxtrace_mmap.base)
819
continue;
820
821
if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
822
rc = -1;
823
goto out;
824
}
825
}
826
out:
827
return rc;
828
}
829
830
static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
831
{
832
pr_debug("Recording AUX area tracing snapshot\n");
833
if (record__auxtrace_read_snapshot_all(rec) < 0) {
834
trigger_error(&auxtrace_snapshot_trigger);
835
} else {
836
if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
837
trigger_error(&auxtrace_snapshot_trigger);
838
else
839
trigger_ready(&auxtrace_snapshot_trigger);
840
}
841
}
842
843
static int record__auxtrace_snapshot_exit(struct record *rec)
844
{
845
if (trigger_is_error(&auxtrace_snapshot_trigger))
846
return 0;
847
848
if (!auxtrace_record__snapshot_started &&
849
auxtrace_record__snapshot_start(rec->itr))
850
return -1;
851
852
record__read_auxtrace_snapshot(rec, true);
853
if (trigger_is_error(&auxtrace_snapshot_trigger))
854
return -1;
855
856
return 0;
857
}
858
859
static int record__auxtrace_init(struct record *rec)
860
{
861
int err;
862
863
if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
864
&& record__threads_enabled(rec)) {
865
pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
866
return -EINVAL;
867
}
868
869
if (!rec->itr) {
870
rec->itr = auxtrace_record__init(rec->evlist, &err);
871
if (err)
872
return err;
873
}
874
875
err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
876
rec->opts.auxtrace_snapshot_opts);
877
if (err)
878
return err;
879
880
err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
881
rec->opts.auxtrace_sample_opts);
882
if (err)
883
return err;
884
885
err = auxtrace_parse_aux_action(rec->evlist);
886
if (err)
887
return err;
888
889
return auxtrace_parse_filters(rec->evlist);
890
}
891
892
#else
893
894
static inline
895
int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
896
struct mmap *map __maybe_unused)
897
{
898
return 0;
899
}
900
901
static inline
902
void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
903
bool on_exit __maybe_unused)
904
{
905
}
906
907
static inline
908
int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
909
{
910
return 0;
911
}
912
913
static inline
914
int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
915
{
916
return 0;
917
}
918
919
static int record__auxtrace_init(struct record *rec __maybe_unused)
920
{
921
return 0;
922
}
923
924
#endif
925
926
static int record__config_text_poke(struct evlist *evlist)
927
{
928
struct evsel *evsel;
929
930
/* Nothing to do if text poke is already configured */
931
evlist__for_each_entry(evlist, evsel) {
932
if (evsel->core.attr.text_poke)
933
return 0;
934
}
935
936
evsel = evlist__add_dummy_on_all_cpus(evlist);
937
if (!evsel)
938
return -ENOMEM;
939
940
evsel->core.attr.text_poke = 1;
941
evsel->core.attr.ksymbol = 1;
942
evsel->immediate = true;
943
evsel__set_sample_bit(evsel, TIME);
944
945
return 0;
946
}
947
948
static int record__config_off_cpu(struct record *rec)
949
{
950
return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
951
}
952
953
static bool record__tracking_system_wide(struct record *rec)
954
{
955
struct evlist *evlist = rec->evlist;
956
struct evsel *evsel;
957
958
/*
959
* If non-dummy evsel exists, system_wide sideband is need to
960
* help parse sample information.
961
* For example, PERF_EVENT_MMAP event to help parse symbol,
962
* and PERF_EVENT_COMM event to help parse task executable name.
963
*/
964
evlist__for_each_entry(evlist, evsel) {
965
if (!evsel__is_dummy_event(evsel))
966
return true;
967
}
968
969
return false;
970
}
971
972
static int record__config_tracking_events(struct record *rec)
973
{
974
struct record_opts *opts = &rec->opts;
975
struct evlist *evlist = rec->evlist;
976
bool system_wide = false;
977
struct evsel *evsel;
978
979
/*
980
* For initial_delay, system wide or a hybrid system, we need to add
981
* tracking event so that we can track PERF_RECORD_MMAP to cover the
982
* delay of waiting or event synthesis.
983
*/
984
if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
985
perf_pmus__num_core_pmus() > 1) {
986
987
/*
988
* User space tasks can migrate between CPUs, so when tracing
989
* selected CPUs, sideband for all CPUs is still needed.
990
*/
991
if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
992
system_wide = true;
993
994
evsel = evlist__findnew_tracking_event(evlist, system_wide);
995
if (!evsel)
996
return -ENOMEM;
997
998
/*
999
* Enable the tracking event when the process is forked for
1000
* initial_delay, immediately for system wide.
1001
*/
1002
if (opts->target.initial_delay && !evsel->immediate &&
1003
!target__has_cpu(&opts->target))
1004
evsel->core.attr.enable_on_exec = 1;
1005
else
1006
evsel->immediate = 1;
1007
}
1008
1009
return 0;
1010
}
1011
1012
static bool record__kcore_readable(struct machine *machine)
1013
{
1014
char kcore[PATH_MAX];
1015
int fd;
1016
1017
scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
1018
1019
fd = open(kcore, O_RDONLY);
1020
if (fd < 0)
1021
return false;
1022
1023
close(fd);
1024
1025
return true;
1026
}
1027
1028
static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1029
{
1030
char from_dir[PATH_MAX];
1031
char kcore_dir[PATH_MAX];
1032
int ret;
1033
1034
snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1035
1036
ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1037
if (ret)
1038
return ret;
1039
1040
return kcore_copy(from_dir, kcore_dir);
1041
}
1042
1043
static void record__thread_data_init_pipes(struct record_thread *thread_data)
1044
{
1045
thread_data->pipes.msg[0] = -1;
1046
thread_data->pipes.msg[1] = -1;
1047
thread_data->pipes.ack[0] = -1;
1048
thread_data->pipes.ack[1] = -1;
1049
}
1050
1051
static int record__thread_data_open_pipes(struct record_thread *thread_data)
1052
{
1053
if (pipe(thread_data->pipes.msg))
1054
return -EINVAL;
1055
1056
if (pipe(thread_data->pipes.ack)) {
1057
close(thread_data->pipes.msg[0]);
1058
thread_data->pipes.msg[0] = -1;
1059
close(thread_data->pipes.msg[1]);
1060
thread_data->pipes.msg[1] = -1;
1061
return -EINVAL;
1062
}
1063
1064
pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1065
thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1066
thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1067
1068
return 0;
1069
}
1070
1071
static void record__thread_data_close_pipes(struct record_thread *thread_data)
1072
{
1073
if (thread_data->pipes.msg[0] != -1) {
1074
close(thread_data->pipes.msg[0]);
1075
thread_data->pipes.msg[0] = -1;
1076
}
1077
if (thread_data->pipes.msg[1] != -1) {
1078
close(thread_data->pipes.msg[1]);
1079
thread_data->pipes.msg[1] = -1;
1080
}
1081
if (thread_data->pipes.ack[0] != -1) {
1082
close(thread_data->pipes.ack[0]);
1083
thread_data->pipes.ack[0] = -1;
1084
}
1085
if (thread_data->pipes.ack[1] != -1) {
1086
close(thread_data->pipes.ack[1]);
1087
thread_data->pipes.ack[1] = -1;
1088
}
1089
}
1090
1091
static bool evlist__per_thread(struct evlist *evlist)
1092
{
1093
return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1094
}
1095
1096
static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1097
{
1098
int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1099
struct mmap *mmap = evlist->mmap;
1100
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1101
struct perf_cpu_map *cpus = evlist->core.all_cpus;
1102
bool per_thread = evlist__per_thread(evlist);
1103
1104
if (per_thread)
1105
thread_data->nr_mmaps = nr_mmaps;
1106
else
1107
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1108
thread_data->mask->maps.nbits);
1109
if (mmap) {
1110
thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1111
if (!thread_data->maps)
1112
return -ENOMEM;
1113
}
1114
if (overwrite_mmap) {
1115
thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1116
if (!thread_data->overwrite_maps) {
1117
zfree(&thread_data->maps);
1118
return -ENOMEM;
1119
}
1120
}
1121
pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1122
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1123
1124
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1125
if (per_thread ||
1126
test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1127
if (thread_data->maps) {
1128
thread_data->maps[tm] = &mmap[m];
1129
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1130
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1131
}
1132
if (thread_data->overwrite_maps) {
1133
thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1134
pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1135
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1136
}
1137
tm++;
1138
}
1139
}
1140
1141
return 0;
1142
}
1143
1144
static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1145
{
1146
int f, tm, pos;
1147
struct mmap *map, *overwrite_map;
1148
1149
fdarray__init(&thread_data->pollfd, 64);
1150
1151
for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1152
map = thread_data->maps ? thread_data->maps[tm] : NULL;
1153
overwrite_map = thread_data->overwrite_maps ?
1154
thread_data->overwrite_maps[tm] : NULL;
1155
1156
for (f = 0; f < evlist->core.pollfd.nr; f++) {
1157
void *ptr = evlist->core.pollfd.priv[f].ptr;
1158
1159
if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1160
pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1161
&evlist->core.pollfd);
1162
if (pos < 0)
1163
return pos;
1164
pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1165
thread_data, pos, evlist->core.pollfd.entries[f].fd);
1166
}
1167
}
1168
}
1169
1170
return 0;
1171
}
1172
1173
static void record__free_thread_data(struct record *rec)
1174
{
1175
int t;
1176
struct record_thread *thread_data = rec->thread_data;
1177
1178
if (thread_data == NULL)
1179
return;
1180
1181
for (t = 0; t < rec->nr_threads; t++) {
1182
record__thread_data_close_pipes(&thread_data[t]);
1183
zfree(&thread_data[t].maps);
1184
zfree(&thread_data[t].overwrite_maps);
1185
fdarray__exit(&thread_data[t].pollfd);
1186
}
1187
1188
zfree(&rec->thread_data);
1189
}
1190
1191
static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1192
int evlist_pollfd_index,
1193
int thread_pollfd_index)
1194
{
1195
size_t x = rec->index_map_cnt;
1196
1197
if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1198
return -ENOMEM;
1199
rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1200
rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1201
rec->index_map_cnt += 1;
1202
return 0;
1203
}
1204
1205
static int record__update_evlist_pollfd_from_thread(struct record *rec,
1206
struct evlist *evlist,
1207
struct record_thread *thread_data)
1208
{
1209
struct pollfd *e_entries = evlist->core.pollfd.entries;
1210
struct pollfd *t_entries = thread_data->pollfd.entries;
1211
int err = 0;
1212
size_t i;
1213
1214
for (i = 0; i < rec->index_map_cnt; i++) {
1215
int e_pos = rec->index_map[i].evlist_pollfd_index;
1216
int t_pos = rec->index_map[i].thread_pollfd_index;
1217
1218
if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1219
e_entries[e_pos].events != t_entries[t_pos].events) {
1220
pr_err("Thread and evlist pollfd index mismatch\n");
1221
err = -EINVAL;
1222
continue;
1223
}
1224
e_entries[e_pos].revents = t_entries[t_pos].revents;
1225
}
1226
return err;
1227
}
1228
1229
static int record__dup_non_perf_events(struct record *rec,
1230
struct evlist *evlist,
1231
struct record_thread *thread_data)
1232
{
1233
struct fdarray *fda = &evlist->core.pollfd;
1234
int i, ret;
1235
1236
for (i = 0; i < fda->nr; i++) {
1237
if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1238
continue;
1239
ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1240
if (ret < 0) {
1241
pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1242
return ret;
1243
}
1244
pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1245
thread_data, ret, fda->entries[i].fd);
1246
ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1247
if (ret < 0) {
1248
pr_err("Failed to map thread and evlist pollfd indexes\n");
1249
return ret;
1250
}
1251
}
1252
return 0;
1253
}
1254
1255
static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1256
{
1257
int t, ret;
1258
struct record_thread *thread_data;
1259
1260
rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1261
if (!rec->thread_data) {
1262
pr_err("Failed to allocate thread data\n");
1263
return -ENOMEM;
1264
}
1265
thread_data = rec->thread_data;
1266
1267
for (t = 0; t < rec->nr_threads; t++)
1268
record__thread_data_init_pipes(&thread_data[t]);
1269
1270
for (t = 0; t < rec->nr_threads; t++) {
1271
thread_data[t].rec = rec;
1272
thread_data[t].mask = &rec->thread_masks[t];
1273
ret = record__thread_data_init_maps(&thread_data[t], evlist);
1274
if (ret) {
1275
pr_err("Failed to initialize thread[%d] maps\n", t);
1276
goto out_free;
1277
}
1278
ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1279
if (ret) {
1280
pr_err("Failed to initialize thread[%d] pollfd\n", t);
1281
goto out_free;
1282
}
1283
if (t) {
1284
thread_data[t].tid = -1;
1285
ret = record__thread_data_open_pipes(&thread_data[t]);
1286
if (ret) {
1287
pr_err("Failed to open thread[%d] communication pipes\n", t);
1288
goto out_free;
1289
}
1290
ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1291
POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1292
if (ret < 0) {
1293
pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1294
goto out_free;
1295
}
1296
thread_data[t].ctlfd_pos = ret;
1297
pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1298
thread_data, thread_data[t].ctlfd_pos,
1299
thread_data[t].pipes.msg[0]);
1300
} else {
1301
thread_data[t].tid = gettid();
1302
1303
ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1304
if (ret < 0)
1305
goto out_free;
1306
1307
thread_data[t].ctlfd_pos = -1; /* Not used */
1308
}
1309
}
1310
1311
return 0;
1312
1313
out_free:
1314
record__free_thread_data(rec);
1315
1316
return ret;
1317
}
1318
1319
static int record__mmap_evlist(struct record *rec,
1320
struct evlist *evlist)
1321
{
1322
int i, ret;
1323
struct record_opts *opts = &rec->opts;
1324
bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1325
opts->auxtrace_sample_mode;
1326
char msg[512];
1327
1328
if (opts->affinity != PERF_AFFINITY_SYS)
1329
cpu__setup_cpunode_map();
1330
1331
if (evlist__mmap_ex(evlist, opts->mmap_pages,
1332
opts->auxtrace_mmap_pages,
1333
auxtrace_overwrite,
1334
opts->nr_cblocks, opts->affinity,
1335
opts->mmap_flush, opts->comp_level) < 0) {
1336
if (errno == EPERM) {
1337
pr_err("Permission error mapping pages.\n"
1338
"Consider increasing "
1339
"/proc/sys/kernel/perf_event_mlock_kb,\n"
1340
"or try again with a smaller value of -m/--mmap_pages.\n"
1341
"(current value: %u,%u)\n",
1342
opts->mmap_pages, opts->auxtrace_mmap_pages);
1343
return -errno;
1344
} else {
1345
pr_err("failed to mmap with %d (%s)\n", errno,
1346
str_error_r(errno, msg, sizeof(msg)));
1347
if (errno)
1348
return -errno;
1349
else
1350
return -EINVAL;
1351
}
1352
}
1353
1354
if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1355
return -1;
1356
1357
ret = record__alloc_thread_data(rec, evlist);
1358
if (ret)
1359
return ret;
1360
1361
if (record__threads_enabled(rec)) {
1362
ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1363
if (ret) {
1364
pr_err("Failed to create data directory: %s\n", strerror(-ret));
1365
return ret;
1366
}
1367
for (i = 0; i < evlist->core.nr_mmaps; i++) {
1368
if (evlist->mmap)
1369
evlist->mmap[i].file = &rec->data.dir.files[i];
1370
if (evlist->overwrite_mmap)
1371
evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1372
}
1373
}
1374
1375
return 0;
1376
}
1377
1378
static int record__mmap(struct record *rec)
1379
{
1380
return record__mmap_evlist(rec, rec->evlist);
1381
}
1382
1383
static int record__open(struct record *rec)
1384
{
1385
char msg[BUFSIZ];
1386
struct evsel *pos;
1387
struct evlist *evlist = rec->evlist;
1388
struct perf_session *session = rec->session;
1389
struct record_opts *opts = &rec->opts;
1390
int rc = 0;
1391
1392
evlist__for_each_entry(evlist, pos) {
1393
try_again:
1394
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1395
if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1396
if (verbose > 0)
1397
ui__warning("%s\n", msg);
1398
goto try_again;
1399
}
1400
if ((errno == EINVAL || errno == EBADF) &&
1401
pos->core.leader != &pos->core &&
1402
pos->weak_group) {
1403
pos = evlist__reset_weak_group(evlist, pos, true);
1404
goto try_again;
1405
}
1406
rc = -errno;
1407
evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1408
ui__error("%s\n", msg);
1409
goto out;
1410
}
1411
1412
pos->supported = true;
1413
}
1414
1415
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1416
pr_warning(
1417
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1418
"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1419
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1420
"file is not found in the buildid cache or in the vmlinux path.\n\n"
1421
"Samples in kernel modules won't be resolved at all.\n\n"
1422
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1423
"even with a suitable vmlinux or kallsyms file.\n\n");
1424
}
1425
1426
if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1427
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1428
pos->filter ?: "BPF", evsel__name(pos), errno,
1429
str_error_r(errno, msg, sizeof(msg)));
1430
rc = -1;
1431
goto out;
1432
}
1433
1434
rc = record__mmap(rec);
1435
if (rc)
1436
goto out;
1437
1438
session->evlist = evlist;
1439
perf_session__set_id_hdr_size(session);
1440
out:
1441
return rc;
1442
}
1443
1444
static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1445
{
1446
if (rec->evlist->first_sample_time == 0)
1447
rec->evlist->first_sample_time = sample_time;
1448
1449
if (sample_time)
1450
rec->evlist->last_sample_time = sample_time;
1451
}
1452
1453
static int process_sample_event(const struct perf_tool *tool,
1454
union perf_event *event,
1455
struct perf_sample *sample,
1456
struct evsel *evsel,
1457
struct machine *machine)
1458
{
1459
struct record *rec = container_of(tool, struct record, tool);
1460
1461
set_timestamp_boundary(rec, sample->time);
1462
1463
if (rec->buildid_all)
1464
return 0;
1465
1466
rec->samples++;
1467
return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1468
}
1469
1470
static int process_buildids(struct record *rec)
1471
{
1472
struct perf_session *session = rec->session;
1473
1474
if (perf_data__size(&rec->data) == 0)
1475
return 0;
1476
1477
/*
1478
* During this process, it'll load kernel map and replace the
1479
* dso->long_name to a real pathname it found. In this case
1480
* we prefer the vmlinux path like
1481
* /lib/modules/3.16.4/build/vmlinux
1482
*
1483
* rather than build-id path (in debug directory).
1484
* $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1485
*/
1486
symbol_conf.ignore_vmlinux_buildid = true;
1487
1488
/*
1489
* If --buildid-all is given, it marks all DSO regardless of hits,
1490
* so no need to process samples. But if timestamp_boundary is enabled,
1491
* it still needs to walk on all samples to get the timestamps of
1492
* first/last samples.
1493
*/
1494
if (rec->buildid_all && !rec->timestamp_boundary)
1495
rec->tool.sample = process_event_sample_stub;
1496
1497
return perf_session__process_events(session);
1498
}
1499
1500
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1501
{
1502
int err;
1503
struct perf_tool *tool = data;
1504
/*
1505
*As for guest kernel when processing subcommand record&report,
1506
*we arrange module mmap prior to guest kernel mmap and trigger
1507
*a preload dso because default guest module symbols are loaded
1508
*from guest kallsyms instead of /lib/modules/XXX/XXX. This
1509
*method is used to avoid symbol missing when the first addr is
1510
*in module instead of in guest kernel.
1511
*/
1512
err = perf_event__synthesize_modules(tool, process_synthesized_event,
1513
machine);
1514
if (err < 0)
1515
pr_err("Couldn't record guest kernel [%d]'s reference"
1516
" relocation symbol.\n", machine->pid);
1517
1518
/*
1519
* We use _stext for guest kernel because guest kernel's /proc/kallsyms
1520
* have no _text sometimes.
1521
*/
1522
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1523
machine);
1524
if (err < 0)
1525
pr_err("Couldn't record guest kernel [%d]'s reference"
1526
" relocation symbol.\n", machine->pid);
1527
}
1528
1529
static struct perf_event_header finished_round_event = {
1530
.size = sizeof(struct perf_event_header),
1531
.type = PERF_RECORD_FINISHED_ROUND,
1532
};
1533
1534
static struct perf_event_header finished_init_event = {
1535
.size = sizeof(struct perf_event_header),
1536
.type = PERF_RECORD_FINISHED_INIT,
1537
};
1538
1539
static void record__adjust_affinity(struct record *rec, struct mmap *map)
1540
{
1541
if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1542
!bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1543
thread->mask->affinity.nbits)) {
1544
bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1545
bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1546
map->affinity_mask.bits, thread->mask->affinity.nbits);
1547
sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1548
(cpu_set_t *)thread->mask->affinity.bits);
1549
if (verbose == 2) {
1550
pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1551
mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1552
}
1553
}
1554
}
1555
1556
static size_t process_comp_header(void *record, size_t increment)
1557
{
1558
struct perf_record_compressed2 *event = record;
1559
size_t size = sizeof(*event);
1560
1561
if (increment) {
1562
event->header.size += increment;
1563
return increment;
1564
}
1565
1566
event->header.type = PERF_RECORD_COMPRESSED2;
1567
event->header.size = size;
1568
1569
return size;
1570
}
1571
1572
static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1573
void *dst, size_t dst_size, void *src, size_t src_size)
1574
{
1575
ssize_t compressed;
1576
size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed2) - 1;
1577
struct zstd_data *zstd_data = &session->zstd_data;
1578
1579
if (map && map->file)
1580
zstd_data = &map->zstd_data;
1581
1582
compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1583
max_record_size, process_comp_header);
1584
if (compressed < 0)
1585
return compressed;
1586
1587
if (map && map->file) {
1588
thread->bytes_transferred += src_size;
1589
thread->bytes_compressed += compressed;
1590
} else {
1591
session->bytes_transferred += src_size;
1592
session->bytes_compressed += compressed;
1593
}
1594
1595
return compressed;
1596
}
1597
1598
static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1599
bool overwrite, bool synch)
1600
{
1601
u64 bytes_written = rec->bytes_written;
1602
int i;
1603
int rc = 0;
1604
int nr_mmaps;
1605
struct mmap **maps;
1606
int trace_fd = rec->data.file.fd;
1607
off_t off = 0;
1608
1609
if (!evlist)
1610
return 0;
1611
1612
nr_mmaps = thread->nr_mmaps;
1613
maps = overwrite ? thread->overwrite_maps : thread->maps;
1614
1615
if (!maps)
1616
return 0;
1617
1618
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1619
return 0;
1620
1621
if (record__aio_enabled(rec))
1622
off = record__aio_get_pos(trace_fd);
1623
1624
for (i = 0; i < nr_mmaps; i++) {
1625
u64 flush = 0;
1626
struct mmap *map = maps[i];
1627
1628
if (map->core.base) {
1629
record__adjust_affinity(rec, map);
1630
if (synch) {
1631
flush = map->core.flush;
1632
map->core.flush = 1;
1633
}
1634
if (!record__aio_enabled(rec)) {
1635
if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1636
if (synch)
1637
map->core.flush = flush;
1638
rc = -1;
1639
goto out;
1640
}
1641
} else {
1642
if (record__aio_push(rec, map, &off) < 0) {
1643
record__aio_set_pos(trace_fd, off);
1644
if (synch)
1645
map->core.flush = flush;
1646
rc = -1;
1647
goto out;
1648
}
1649
}
1650
if (synch)
1651
map->core.flush = flush;
1652
}
1653
1654
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1655
!rec->opts.auxtrace_sample_mode &&
1656
record__auxtrace_mmap_read(rec, map) != 0) {
1657
rc = -1;
1658
goto out;
1659
}
1660
}
1661
1662
if (record__aio_enabled(rec))
1663
record__aio_set_pos(trace_fd, off);
1664
1665
/*
1666
* Mark the round finished in case we wrote
1667
* at least one event.
1668
*
1669
* No need for round events in directory mode,
1670
* because per-cpu maps and files have data
1671
* sorted by kernel.
1672
*/
1673
if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1674
rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1675
1676
if (overwrite)
1677
evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1678
out:
1679
return rc;
1680
}
1681
1682
static int record__mmap_read_all(struct record *rec, bool synch)
1683
{
1684
int err;
1685
1686
err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1687
if (err)
1688
return err;
1689
1690
return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1691
}
1692
1693
static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1694
void *arg __maybe_unused)
1695
{
1696
struct perf_mmap *map = fda->priv[fd].ptr;
1697
1698
if (map)
1699
perf_mmap__put(map);
1700
}
1701
1702
static void *record__thread(void *arg)
1703
{
1704
enum thread_msg msg = THREAD_MSG__READY;
1705
bool terminate = false;
1706
struct fdarray *pollfd;
1707
int err, ctlfd_pos;
1708
1709
thread = arg;
1710
thread->tid = gettid();
1711
1712
err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1713
if (err == -1)
1714
pr_warning("threads[%d]: failed to notify on start: %s\n",
1715
thread->tid, strerror(errno));
1716
1717
pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1718
1719
pollfd = &thread->pollfd;
1720
ctlfd_pos = thread->ctlfd_pos;
1721
1722
for (;;) {
1723
unsigned long long hits = thread->samples;
1724
1725
if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1726
break;
1727
1728
if (hits == thread->samples) {
1729
1730
err = fdarray__poll(pollfd, -1);
1731
/*
1732
* Propagate error, only if there's any. Ignore positive
1733
* number of returned events and interrupt error.
1734
*/
1735
if (err > 0 || (err < 0 && errno == EINTR))
1736
err = 0;
1737
thread->waking++;
1738
1739
if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1740
record__thread_munmap_filtered, NULL) == 0)
1741
break;
1742
}
1743
1744
if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1745
terminate = true;
1746
close(thread->pipes.msg[0]);
1747
thread->pipes.msg[0] = -1;
1748
pollfd->entries[ctlfd_pos].fd = -1;
1749
pollfd->entries[ctlfd_pos].events = 0;
1750
}
1751
1752
pollfd->entries[ctlfd_pos].revents = 0;
1753
}
1754
record__mmap_read_all(thread->rec, true);
1755
1756
err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1757
if (err == -1)
1758
pr_warning("threads[%d]: failed to notify on termination: %s\n",
1759
thread->tid, strerror(errno));
1760
1761
return NULL;
1762
}
1763
1764
static void record__init_features(struct record *rec)
1765
{
1766
struct perf_session *session = rec->session;
1767
int feat;
1768
1769
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1770
perf_header__set_feat(&session->header, feat);
1771
1772
if (rec->no_buildid)
1773
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1774
1775
if (!have_tracepoints(&rec->evlist->core.entries))
1776
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1777
1778
if (!rec->opts.branch_stack)
1779
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1780
1781
if (!rec->opts.full_auxtrace)
1782
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1783
1784
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1785
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1786
1787
if (!rec->opts.use_clockid)
1788
perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1789
1790
if (!record__threads_enabled(rec))
1791
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1792
1793
if (!record__comp_enabled(rec))
1794
perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1795
1796
perf_header__clear_feat(&session->header, HEADER_STAT);
1797
}
1798
1799
static void
1800
record__finish_output(struct record *rec)
1801
{
1802
int i;
1803
struct perf_data *data = &rec->data;
1804
int fd = perf_data__fd(data);
1805
1806
if (data->is_pipe) {
1807
/* Just to display approx. size */
1808
data->file.size = rec->bytes_written;
1809
return;
1810
}
1811
1812
rec->session->header.data_size += rec->bytes_written;
1813
data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1814
if (record__threads_enabled(rec)) {
1815
for (i = 0; i < data->dir.nr; i++)
1816
data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1817
}
1818
1819
/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
1820
if (!rec->no_buildid) {
1821
process_buildids(rec);
1822
1823
if (rec->buildid_all)
1824
perf_session__dsos_hit_all(rec->session);
1825
}
1826
perf_session__write_header(rec->session, rec->evlist, fd, true);
1827
1828
return;
1829
}
1830
1831
static int record__synthesize_workload(struct record *rec, bool tail)
1832
{
1833
int err;
1834
struct perf_thread_map *thread_map;
1835
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1836
1837
if (rec->opts.tail_synthesize != tail)
1838
return 0;
1839
1840
thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1841
if (thread_map == NULL)
1842
return -1;
1843
1844
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1845
process_synthesized_event,
1846
&rec->session->machines.host,
1847
needs_mmap,
1848
rec->opts.sample_address);
1849
perf_thread_map__put(thread_map);
1850
return err;
1851
}
1852
1853
static int write_finished_init(struct record *rec, bool tail)
1854
{
1855
if (rec->opts.tail_synthesize != tail)
1856
return 0;
1857
1858
return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1859
}
1860
1861
static int record__synthesize(struct record *rec, bool tail);
1862
1863
static int
1864
record__switch_output(struct record *rec, bool at_exit)
1865
{
1866
struct perf_data *data = &rec->data;
1867
char *new_filename = NULL;
1868
int fd, err;
1869
1870
/* Same Size: "2015122520103046"*/
1871
char timestamp[] = "InvalidTimestamp";
1872
1873
record__aio_mmap_read_sync(rec);
1874
1875
write_finished_init(rec, true);
1876
1877
record__synthesize(rec, true);
1878
if (target__none(&rec->opts.target))
1879
record__synthesize_workload(rec, true);
1880
1881
rec->samples = 0;
1882
record__finish_output(rec);
1883
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1884
if (err) {
1885
pr_err("Failed to get current timestamp\n");
1886
return -EINVAL;
1887
}
1888
1889
fd = perf_data__switch(data, timestamp,
1890
rec->session->header.data_offset,
1891
at_exit, &new_filename);
1892
if (fd >= 0 && !at_exit) {
1893
rec->bytes_written = 0;
1894
rec->session->header.data_size = 0;
1895
}
1896
1897
if (!quiet) {
1898
fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1899
data->path, timestamp);
1900
}
1901
1902
if (rec->switch_output.num_files) {
1903
int n = rec->switch_output.cur_file + 1;
1904
1905
if (n >= rec->switch_output.num_files)
1906
n = 0;
1907
rec->switch_output.cur_file = n;
1908
if (rec->switch_output.filenames[n]) {
1909
remove(rec->switch_output.filenames[n]);
1910
zfree(&rec->switch_output.filenames[n]);
1911
}
1912
rec->switch_output.filenames[n] = new_filename;
1913
} else {
1914
free(new_filename);
1915
}
1916
1917
/* Output tracking events */
1918
if (!at_exit) {
1919
record__synthesize(rec, false);
1920
1921
/*
1922
* In 'perf record --switch-output' without -a,
1923
* record__synthesize() in record__switch_output() won't
1924
* generate tracking events because there's no thread_map
1925
* in evlist. Which causes newly created perf.data doesn't
1926
* contain map and comm information.
1927
* Create a fake thread_map and directly call
1928
* perf_event__synthesize_thread_map() for those events.
1929
*/
1930
if (target__none(&rec->opts.target))
1931
record__synthesize_workload(rec, false);
1932
write_finished_init(rec, false);
1933
}
1934
return fd;
1935
}
1936
1937
static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1938
struct perf_record_lost_samples *lost,
1939
int cpu_idx, int thread_idx, u64 lost_count,
1940
u16 misc_flag)
1941
{
1942
struct perf_sample_id *sid;
1943
struct perf_sample sample;
1944
int id_hdr_size;
1945
1946
perf_sample__init(&sample, /*all=*/true);
1947
lost->lost = lost_count;
1948
if (evsel->core.ids) {
1949
sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1950
sample.id = sid->id;
1951
}
1952
1953
id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1954
evsel->core.attr.sample_type, &sample);
1955
lost->header.size = sizeof(*lost) + id_hdr_size;
1956
lost->header.misc = misc_flag;
1957
record__write(rec, NULL, lost, lost->header.size);
1958
perf_sample__exit(&sample);
1959
}
1960
1961
static void record__read_lost_samples(struct record *rec)
1962
{
1963
struct perf_session *session = rec->session;
1964
struct perf_record_lost_samples_and_ids lost;
1965
struct evsel *evsel;
1966
1967
/* there was an error during record__open */
1968
if (session->evlist == NULL)
1969
return;
1970
1971
evlist__for_each_entry(session->evlist, evsel) {
1972
struct xyarray *xy = evsel->core.sample_id;
1973
u64 lost_count;
1974
1975
if (xy == NULL || evsel->core.fd == NULL)
1976
continue;
1977
if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1978
xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1979
pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1980
continue;
1981
}
1982
1983
for (int x = 0; x < xyarray__max_x(xy); x++) {
1984
for (int y = 0; y < xyarray__max_y(xy); y++) {
1985
struct perf_counts_values count;
1986
1987
if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1988
pr_debug("read LOST count failed\n");
1989
return;
1990
}
1991
1992
if (count.lost) {
1993
memset(&lost, 0, sizeof(lost));
1994
lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1995
__record__save_lost_samples(rec, evsel, &lost.lost,
1996
x, y, count.lost, 0);
1997
}
1998
}
1999
}
2000
2001
lost_count = perf_bpf_filter__lost_count(evsel);
2002
if (lost_count) {
2003
memset(&lost, 0, sizeof(lost));
2004
lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
2005
__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
2006
PERF_RECORD_MISC_LOST_SAMPLES_BPF);
2007
}
2008
}
2009
}
2010
2011
static volatile sig_atomic_t workload_exec_errno;
2012
2013
/*
2014
* evlist__prepare_workload will send a SIGUSR1
2015
* if the fork fails, since we asked by setting its
2016
* want_signal to true.
2017
*/
2018
static void workload_exec_failed_signal(int signo __maybe_unused,
2019
siginfo_t *info,
2020
void *ucontext __maybe_unused)
2021
{
2022
workload_exec_errno = info->si_value.sival_int;
2023
done = 1;
2024
child_finished = 1;
2025
}
2026
2027
static void snapshot_sig_handler(int sig);
2028
static void alarm_sig_handler(int sig);
2029
2030
static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2031
{
2032
if (evlist) {
2033
if (evlist->mmap && evlist->mmap[0].core.base)
2034
return evlist->mmap[0].core.base;
2035
if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2036
return evlist->overwrite_mmap[0].core.base;
2037
}
2038
return NULL;
2039
}
2040
2041
static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2042
{
2043
const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2044
if (pc)
2045
return pc;
2046
return NULL;
2047
}
2048
2049
static int record__synthesize(struct record *rec, bool tail)
2050
{
2051
struct perf_session *session = rec->session;
2052
struct machine *machine = &session->machines.host;
2053
struct perf_data *data = &rec->data;
2054
struct record_opts *opts = &rec->opts;
2055
struct perf_tool *tool = &rec->tool;
2056
int err = 0;
2057
event_op f = process_synthesized_event;
2058
2059
if (rec->opts.tail_synthesize != tail)
2060
return 0;
2061
2062
if (data->is_pipe) {
2063
err = perf_event__synthesize_for_pipe(tool, session, data,
2064
process_synthesized_event);
2065
if (err < 0)
2066
goto out;
2067
2068
rec->bytes_written += err;
2069
}
2070
2071
err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2072
process_synthesized_event, machine);
2073
if (err)
2074
goto out;
2075
2076
/* Synthesize id_index before auxtrace_info */
2077
err = perf_event__synthesize_id_index(tool,
2078
process_synthesized_event,
2079
session->evlist, machine);
2080
if (err)
2081
goto out;
2082
2083
if (rec->opts.full_auxtrace) {
2084
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2085
session, process_synthesized_event);
2086
if (err)
2087
goto out;
2088
}
2089
2090
if (!evlist__exclude_kernel(rec->evlist)) {
2091
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2092
machine);
2093
WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2094
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2095
"Check /proc/kallsyms permission or run as root.\n");
2096
2097
err = perf_event__synthesize_modules(tool, process_synthesized_event,
2098
machine);
2099
WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2100
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2101
"Check /proc/modules permission or run as root.\n");
2102
}
2103
2104
if (perf_guest) {
2105
machines__process_guests(&session->machines,
2106
perf_event__synthesize_guest_os, tool);
2107
}
2108
2109
err = perf_event__synthesize_extra_attr(&rec->tool,
2110
rec->evlist,
2111
process_synthesized_event,
2112
data->is_pipe);
2113
if (err)
2114
goto out;
2115
2116
err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2117
process_synthesized_event,
2118
NULL);
2119
if (err < 0) {
2120
pr_err("Couldn't synthesize thread map.\n");
2121
return err;
2122
}
2123
2124
err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2125
process_synthesized_event, NULL);
2126
if (err < 0) {
2127
pr_err("Couldn't synthesize cpu map.\n");
2128
return err;
2129
}
2130
2131
err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2132
machine, opts);
2133
if (err < 0) {
2134
pr_warning("Couldn't synthesize bpf events.\n");
2135
err = 0;
2136
}
2137
2138
if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2139
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2140
machine);
2141
if (err < 0) {
2142
pr_warning("Couldn't synthesize cgroup events.\n");
2143
err = 0;
2144
}
2145
}
2146
2147
if (rec->opts.nr_threads_synthesize > 1) {
2148
mutex_init(&synth_lock);
2149
perf_set_multithreaded();
2150
f = process_locked_synthesized_event;
2151
}
2152
2153
if (rec->opts.synth & PERF_SYNTH_TASK) {
2154
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2155
2156
err = __machine__synthesize_threads(machine, tool, &opts->target,
2157
rec->evlist->core.threads,
2158
f, needs_mmap, opts->sample_address,
2159
rec->opts.nr_threads_synthesize);
2160
}
2161
2162
if (rec->opts.nr_threads_synthesize > 1) {
2163
perf_set_singlethreaded();
2164
mutex_destroy(&synth_lock);
2165
}
2166
2167
out:
2168
return err;
2169
}
2170
2171
static void record__synthesize_final_bpf_metadata(struct record *rec __maybe_unused)
2172
{
2173
#ifdef HAVE_LIBBPF_SUPPORT
2174
perf_event__synthesize_final_bpf_metadata(rec->session,
2175
process_synthesized_event);
2176
#endif
2177
}
2178
2179
static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2180
{
2181
struct record *rec = data;
2182
pthread_kill(rec->thread_id, SIGUSR2);
2183
return 0;
2184
}
2185
2186
static int record__setup_sb_evlist(struct record *rec)
2187
{
2188
struct record_opts *opts = &rec->opts;
2189
2190
if (rec->sb_evlist != NULL) {
2191
/*
2192
* We get here if --switch-output-event populated the
2193
* sb_evlist, so associate a callback that will send a SIGUSR2
2194
* to the main thread.
2195
*/
2196
evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2197
rec->thread_id = pthread_self();
2198
}
2199
#ifdef HAVE_LIBBPF_SUPPORT
2200
if (!opts->no_bpf_event) {
2201
if (rec->sb_evlist == NULL) {
2202
rec->sb_evlist = evlist__new();
2203
2204
if (rec->sb_evlist == NULL) {
2205
pr_err("Couldn't create side band evlist.\n.");
2206
return -1;
2207
}
2208
}
2209
2210
if (evlist__add_bpf_sb_event(rec->sb_evlist, perf_session__env(rec->session))) {
2211
pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2212
return -1;
2213
}
2214
}
2215
#endif
2216
if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2217
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2218
opts->no_bpf_event = true;
2219
}
2220
2221
return 0;
2222
}
2223
2224
static int record__init_clock(struct record *rec)
2225
{
2226
struct perf_session *session = rec->session;
2227
struct timespec ref_clockid;
2228
struct timeval ref_tod;
2229
struct perf_env *env = perf_session__env(session);
2230
u64 ref;
2231
2232
if (!rec->opts.use_clockid)
2233
return 0;
2234
2235
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2236
env->clock.clockid_res_ns = rec->opts.clockid_res_ns;
2237
2238
env->clock.clockid = rec->opts.clockid;
2239
2240
if (gettimeofday(&ref_tod, NULL) != 0) {
2241
pr_err("gettimeofday failed, cannot set reference time.\n");
2242
return -1;
2243
}
2244
2245
if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2246
pr_err("clock_gettime failed, cannot set reference time.\n");
2247
return -1;
2248
}
2249
2250
ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2251
(u64) ref_tod.tv_usec * NSEC_PER_USEC;
2252
2253
env->clock.tod_ns = ref;
2254
2255
ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2256
(u64) ref_clockid.tv_nsec;
2257
2258
env->clock.clockid_ns = ref;
2259
return 0;
2260
}
2261
2262
static void hit_auxtrace_snapshot_trigger(struct record *rec)
2263
{
2264
if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2265
trigger_hit(&auxtrace_snapshot_trigger);
2266
auxtrace_record__snapshot_started = 1;
2267
if (auxtrace_record__snapshot_start(rec->itr))
2268
trigger_error(&auxtrace_snapshot_trigger);
2269
}
2270
}
2271
2272
static int record__terminate_thread(struct record_thread *thread_data)
2273
{
2274
int err;
2275
enum thread_msg ack = THREAD_MSG__UNDEFINED;
2276
pid_t tid = thread_data->tid;
2277
2278
close(thread_data->pipes.msg[1]);
2279
thread_data->pipes.msg[1] = -1;
2280
err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2281
if (err > 0)
2282
pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2283
else
2284
pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2285
thread->tid, tid);
2286
2287
return 0;
2288
}
2289
2290
static int record__start_threads(struct record *rec)
2291
{
2292
int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2293
struct record_thread *thread_data = rec->thread_data;
2294
sigset_t full, mask;
2295
pthread_t handle;
2296
pthread_attr_t attrs;
2297
2298
thread = &thread_data[0];
2299
2300
if (!record__threads_enabled(rec))
2301
return 0;
2302
2303
sigfillset(&full);
2304
if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2305
pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2306
return -1;
2307
}
2308
2309
pthread_attr_init(&attrs);
2310
pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2311
2312
for (t = 1; t < nr_threads; t++) {
2313
enum thread_msg msg = THREAD_MSG__UNDEFINED;
2314
2315
#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2316
pthread_attr_setaffinity_np(&attrs,
2317
MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2318
(cpu_set_t *)(thread_data[t].mask->affinity.bits));
2319
#endif
2320
if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2321
for (tt = 1; tt < t; tt++)
2322
record__terminate_thread(&thread_data[t]);
2323
pr_err("Failed to start threads: %s\n", strerror(errno));
2324
ret = -1;
2325
goto out_err;
2326
}
2327
2328
err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2329
if (err > 0)
2330
pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2331
thread_msg_tags[msg]);
2332
else
2333
pr_warning("threads[%d]: failed to receive start notification from %d\n",
2334
thread->tid, rec->thread_data[t].tid);
2335
}
2336
2337
sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2338
(cpu_set_t *)thread->mask->affinity.bits);
2339
2340
pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2341
2342
out_err:
2343
pthread_attr_destroy(&attrs);
2344
2345
if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2346
pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2347
ret = -1;
2348
}
2349
2350
return ret;
2351
}
2352
2353
static int record__stop_threads(struct record *rec)
2354
{
2355
int t;
2356
struct record_thread *thread_data = rec->thread_data;
2357
2358
for (t = 1; t < rec->nr_threads; t++)
2359
record__terminate_thread(&thread_data[t]);
2360
2361
for (t = 0; t < rec->nr_threads; t++) {
2362
rec->samples += thread_data[t].samples;
2363
if (!record__threads_enabled(rec))
2364
continue;
2365
rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2366
rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2367
pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2368
thread_data[t].samples, thread_data[t].waking);
2369
if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2370
pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2371
thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2372
else
2373
pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2374
}
2375
2376
return 0;
2377
}
2378
2379
static unsigned long record__waking(struct record *rec)
2380
{
2381
int t;
2382
unsigned long waking = 0;
2383
struct record_thread *thread_data = rec->thread_data;
2384
2385
for (t = 0; t < rec->nr_threads; t++)
2386
waking += thread_data[t].waking;
2387
2388
return waking;
2389
}
2390
2391
static int __cmd_record(struct record *rec, int argc, const char **argv)
2392
{
2393
int err;
2394
int status = 0;
2395
const bool forks = argc > 0;
2396
struct perf_tool *tool = &rec->tool;
2397
struct record_opts *opts = &rec->opts;
2398
struct perf_data *data = &rec->data;
2399
struct perf_session *session;
2400
bool disabled = false, draining = false;
2401
int fd;
2402
float ratio = 0;
2403
enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2404
struct perf_env *env;
2405
2406
atexit(record__sig_exit);
2407
signal(SIGCHLD, sig_handler);
2408
signal(SIGINT, sig_handler);
2409
signal(SIGTERM, sig_handler);
2410
signal(SIGSEGV, sigsegv_handler);
2411
2412
if (rec->opts.record_cgroup) {
2413
#ifndef HAVE_FILE_HANDLE
2414
pr_err("cgroup tracking is not supported\n");
2415
return -1;
2416
#endif
2417
}
2418
2419
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2420
signal(SIGUSR2, snapshot_sig_handler);
2421
if (rec->opts.auxtrace_snapshot_mode)
2422
trigger_on(&auxtrace_snapshot_trigger);
2423
if (rec->switch_output.enabled)
2424
trigger_on(&switch_output_trigger);
2425
} else {
2426
signal(SIGUSR2, SIG_IGN);
2427
}
2428
2429
perf_tool__init(tool, /*ordered_events=*/true);
2430
tool->sample = process_sample_event;
2431
tool->fork = perf_event__process_fork;
2432
tool->exit = perf_event__process_exit;
2433
tool->comm = perf_event__process_comm;
2434
tool->namespaces = perf_event__process_namespaces;
2435
tool->mmap = build_id__process_mmap;
2436
tool->mmap2 = build_id__process_mmap2;
2437
tool->itrace_start = process_timestamp_boundary;
2438
tool->aux = process_timestamp_boundary;
2439
tool->namespace_events = rec->opts.record_namespaces;
2440
tool->cgroup_events = rec->opts.record_cgroup;
2441
session = perf_session__new(data, tool);
2442
if (IS_ERR(session)) {
2443
pr_err("Perf session creation failed.\n");
2444
return PTR_ERR(session);
2445
}
2446
env = perf_session__env(session);
2447
if (record__threads_enabled(rec)) {
2448
if (perf_data__is_pipe(&rec->data)) {
2449
pr_err("Parallel trace streaming is not available in pipe mode.\n");
2450
return -1;
2451
}
2452
if (rec->opts.full_auxtrace) {
2453
pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2454
return -1;
2455
}
2456
}
2457
2458
fd = perf_data__fd(data);
2459
rec->session = session;
2460
2461
if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2462
pr_err("Compression initialization failed.\n");
2463
return -1;
2464
}
2465
#ifdef HAVE_EVENTFD_SUPPORT
2466
done_fd = eventfd(0, EFD_NONBLOCK);
2467
if (done_fd < 0) {
2468
pr_err("Failed to create wakeup eventfd, error: %m\n");
2469
status = -1;
2470
goto out_delete_session;
2471
}
2472
err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2473
if (err < 0) {
2474
pr_err("Failed to add wakeup eventfd to poll list\n");
2475
status = err;
2476
goto out_delete_session;
2477
}
2478
#endif // HAVE_EVENTFD_SUPPORT
2479
2480
env->comp_type = PERF_COMP_ZSTD;
2481
env->comp_level = rec->opts.comp_level;
2482
2483
if (rec->opts.kcore &&
2484
!record__kcore_readable(&session->machines.host)) {
2485
pr_err("ERROR: kcore is not readable.\n");
2486
return -1;
2487
}
2488
2489
if (record__init_clock(rec))
2490
return -1;
2491
2492
record__init_features(rec);
2493
2494
if (forks) {
2495
err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2496
workload_exec_failed_signal);
2497
if (err < 0) {
2498
pr_err("Couldn't run the workload!\n");
2499
status = err;
2500
goto out_delete_session;
2501
}
2502
}
2503
2504
/*
2505
* If we have just single event and are sending data
2506
* through pipe, we need to force the ids allocation,
2507
* because we synthesize event name through the pipe
2508
* and need the id for that.
2509
*/
2510
if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2511
rec->opts.sample_id = true;
2512
2513
if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2514
rec->timestamp_filename = false;
2515
pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2516
}
2517
2518
/*
2519
* Use global stat_config that is zero meaning aggr_mode is AGGR_NONE
2520
* and hybrid_merge is false.
2521
*/
2522
evlist__uniquify_evsel_names(rec->evlist, &stat_config);
2523
2524
evlist__config(rec->evlist, opts, &callchain_param);
2525
2526
/* Debug message used by test scripts */
2527
pr_debug3("perf record opening and mmapping events\n");
2528
if (record__open(rec) != 0) {
2529
err = -1;
2530
goto out_free_threads;
2531
}
2532
/* Debug message used by test scripts */
2533
pr_debug3("perf record done opening and mmapping events\n");
2534
env->comp_mmap_len = session->evlist->core.mmap_len;
2535
2536
if (rec->opts.kcore) {
2537
err = record__kcore_copy(&session->machines.host, data);
2538
if (err) {
2539
pr_err("ERROR: Failed to copy kcore\n");
2540
goto out_free_threads;
2541
}
2542
}
2543
2544
/*
2545
* Normally perf_session__new would do this, but it doesn't have the
2546
* evlist.
2547
*/
2548
if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2549
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2550
rec->tool.ordered_events = false;
2551
}
2552
2553
if (evlist__nr_groups(rec->evlist) == 0)
2554
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2555
2556
if (data->is_pipe) {
2557
err = perf_header__write_pipe(fd);
2558
if (err < 0)
2559
goto out_free_threads;
2560
} else {
2561
err = perf_session__write_header(session, rec->evlist, fd, false);
2562
if (err < 0)
2563
goto out_free_threads;
2564
}
2565
2566
err = -1;
2567
if (!rec->no_buildid
2568
&& !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2569
pr_err("Couldn't generate buildids. "
2570
"Use --no-buildid to profile anyway.\n");
2571
goto out_free_threads;
2572
}
2573
2574
if (!evlist__needs_bpf_sb_event(rec->evlist))
2575
opts->no_bpf_event = true;
2576
2577
err = record__setup_sb_evlist(rec);
2578
if (err)
2579
goto out_free_threads;
2580
2581
err = record__synthesize(rec, false);
2582
if (err < 0)
2583
goto out_free_threads;
2584
2585
if (rec->realtime_prio) {
2586
struct sched_param param;
2587
2588
param.sched_priority = rec->realtime_prio;
2589
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2590
pr_err("Could not set realtime priority.\n");
2591
err = -1;
2592
goto out_free_threads;
2593
}
2594
}
2595
2596
if (record__start_threads(rec))
2597
goto out_free_threads;
2598
2599
/*
2600
* When perf is starting the traced process, all the events
2601
* (apart from group members) have enable_on_exec=1 set,
2602
* so don't spoil it by prematurely enabling them.
2603
*/
2604
if (!target__none(&opts->target) && !opts->target.initial_delay)
2605
evlist__enable(rec->evlist);
2606
2607
/*
2608
* offcpu-time does not call execve, so enable_on_exe wouldn't work
2609
* when recording a workload, do it manually
2610
*/
2611
if (rec->off_cpu)
2612
evlist__enable_evsel(rec->evlist, (char *)OFFCPU_EVENT);
2613
2614
/*
2615
* Let the child rip
2616
*/
2617
if (forks) {
2618
struct machine *machine = &session->machines.host;
2619
union perf_event *event;
2620
pid_t tgid;
2621
2622
event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2623
if (event == NULL) {
2624
err = -ENOMEM;
2625
goto out_child;
2626
}
2627
2628
/*
2629
* Some H/W events are generated before COMM event
2630
* which is emitted during exec(), so perf script
2631
* cannot see a correct process name for those events.
2632
* Synthesize COMM event to prevent it.
2633
*/
2634
tgid = perf_event__synthesize_comm(tool, event,
2635
rec->evlist->workload.pid,
2636
process_synthesized_event,
2637
machine);
2638
free(event);
2639
2640
if (tgid == -1)
2641
goto out_child;
2642
2643
event = malloc(sizeof(event->namespaces) +
2644
(NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2645
machine->id_hdr_size);
2646
if (event == NULL) {
2647
err = -ENOMEM;
2648
goto out_child;
2649
}
2650
2651
/*
2652
* Synthesize NAMESPACES event for the command specified.
2653
*/
2654
perf_event__synthesize_namespaces(tool, event,
2655
rec->evlist->workload.pid,
2656
tgid, process_synthesized_event,
2657
machine);
2658
free(event);
2659
2660
evlist__start_workload(rec->evlist);
2661
}
2662
2663
if (opts->target.initial_delay) {
2664
pr_info(EVLIST_DISABLED_MSG);
2665
if (opts->target.initial_delay > 0) {
2666
usleep(opts->target.initial_delay * USEC_PER_MSEC);
2667
evlist__enable(rec->evlist);
2668
pr_info(EVLIST_ENABLED_MSG);
2669
}
2670
}
2671
2672
err = event_enable_timer__start(rec->evlist->eet);
2673
if (err)
2674
goto out_child;
2675
2676
/* Debug message used by test scripts */
2677
pr_debug3("perf record has started\n");
2678
fflush(stderr);
2679
2680
trigger_ready(&auxtrace_snapshot_trigger);
2681
trigger_ready(&switch_output_trigger);
2682
perf_hooks__invoke_record_start();
2683
2684
/*
2685
* Must write FINISHED_INIT so it will be seen after all other
2686
* synthesized user events, but before any regular events.
2687
*/
2688
err = write_finished_init(rec, false);
2689
if (err < 0)
2690
goto out_child;
2691
2692
for (;;) {
2693
unsigned long long hits = thread->samples;
2694
2695
/*
2696
* rec->evlist->bkw_mmap_state is possible to be
2697
* BKW_MMAP_EMPTY here: when done == true and
2698
* hits != rec->samples in previous round.
2699
*
2700
* evlist__toggle_bkw_mmap ensure we never
2701
* convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2702
*/
2703
if (trigger_is_hit(&switch_output_trigger) || done || draining)
2704
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2705
2706
if (record__mmap_read_all(rec, false) < 0) {
2707
trigger_error(&auxtrace_snapshot_trigger);
2708
trigger_error(&switch_output_trigger);
2709
err = -1;
2710
goto out_child;
2711
}
2712
2713
if (auxtrace_record__snapshot_started) {
2714
auxtrace_record__snapshot_started = 0;
2715
if (!trigger_is_error(&auxtrace_snapshot_trigger))
2716
record__read_auxtrace_snapshot(rec, false);
2717
if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2718
pr_err("AUX area tracing snapshot failed\n");
2719
err = -1;
2720
goto out_child;
2721
}
2722
}
2723
2724
if (trigger_is_hit(&switch_output_trigger)) {
2725
/*
2726
* If switch_output_trigger is hit, the data in
2727
* overwritable ring buffer should have been collected,
2728
* so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2729
*
2730
* If SIGUSR2 raise after or during record__mmap_read_all(),
2731
* record__mmap_read_all() didn't collect data from
2732
* overwritable ring buffer. Read again.
2733
*/
2734
if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2735
continue;
2736
trigger_ready(&switch_output_trigger);
2737
2738
/*
2739
* Reenable events in overwrite ring buffer after
2740
* record__mmap_read_all(): we should have collected
2741
* data from it.
2742
*/
2743
evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2744
2745
if (!quiet)
2746
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2747
record__waking(rec));
2748
thread->waking = 0;
2749
fd = record__switch_output(rec, false);
2750
if (fd < 0) {
2751
pr_err("Failed to switch to new file\n");
2752
trigger_error(&switch_output_trigger);
2753
err = fd;
2754
goto out_child;
2755
}
2756
2757
/* re-arm the alarm */
2758
if (rec->switch_output.time)
2759
alarm(rec->switch_output.time);
2760
}
2761
2762
if (hits == thread->samples) {
2763
if (done || draining)
2764
break;
2765
err = fdarray__poll(&thread->pollfd, -1);
2766
/*
2767
* Propagate error, only if there's any. Ignore positive
2768
* number of returned events and interrupt error.
2769
*/
2770
if (err > 0 || (err < 0 && errno == EINTR))
2771
err = 0;
2772
thread->waking++;
2773
2774
if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2775
record__thread_munmap_filtered, NULL) == 0)
2776
draining = true;
2777
2778
err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2779
if (err)
2780
goto out_child;
2781
}
2782
2783
if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2784
switch (cmd) {
2785
case EVLIST_CTL_CMD_SNAPSHOT:
2786
hit_auxtrace_snapshot_trigger(rec);
2787
evlist__ctlfd_ack(rec->evlist);
2788
break;
2789
case EVLIST_CTL_CMD_STOP:
2790
done = 1;
2791
break;
2792
case EVLIST_CTL_CMD_ACK:
2793
case EVLIST_CTL_CMD_UNSUPPORTED:
2794
case EVLIST_CTL_CMD_ENABLE:
2795
case EVLIST_CTL_CMD_DISABLE:
2796
case EVLIST_CTL_CMD_EVLIST:
2797
case EVLIST_CTL_CMD_PING:
2798
default:
2799
break;
2800
}
2801
}
2802
2803
err = event_enable_timer__process(rec->evlist->eet);
2804
if (err < 0)
2805
goto out_child;
2806
if (err) {
2807
err = 0;
2808
done = 1;
2809
}
2810
2811
/*
2812
* When perf is starting the traced process, at the end events
2813
* die with the process and we wait for that. Thus no need to
2814
* disable events in this case.
2815
*/
2816
if (done && !disabled && !target__none(&opts->target)) {
2817
trigger_off(&auxtrace_snapshot_trigger);
2818
evlist__disable(rec->evlist);
2819
disabled = true;
2820
}
2821
}
2822
2823
trigger_off(&auxtrace_snapshot_trigger);
2824
trigger_off(&switch_output_trigger);
2825
2826
record__synthesize_final_bpf_metadata(rec);
2827
2828
if (opts->auxtrace_snapshot_on_exit)
2829
record__auxtrace_snapshot_exit(rec);
2830
2831
if (forks && workload_exec_errno) {
2832
char msg[STRERR_BUFSIZE];
2833
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2834
struct strbuf sb = STRBUF_INIT;
2835
2836
evlist__format_evsels(rec->evlist, &sb, 2048);
2837
2838
pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2839
sb.buf, argv[0], emsg);
2840
strbuf_release(&sb);
2841
err = -1;
2842
goto out_child;
2843
}
2844
2845
if (!quiet)
2846
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2847
record__waking(rec));
2848
2849
write_finished_init(rec, true);
2850
2851
if (target__none(&rec->opts.target))
2852
record__synthesize_workload(rec, true);
2853
2854
out_child:
2855
record__stop_threads(rec);
2856
record__mmap_read_all(rec, true);
2857
out_free_threads:
2858
record__free_thread_data(rec);
2859
evlist__finalize_ctlfd(rec->evlist);
2860
record__aio_mmap_read_sync(rec);
2861
2862
if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2863
ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2864
env->comp_ratio = ratio + 0.5;
2865
}
2866
2867
if (forks) {
2868
int exit_status;
2869
2870
if (!child_finished)
2871
kill(rec->evlist->workload.pid, SIGTERM);
2872
2873
wait(&exit_status);
2874
2875
if (err < 0)
2876
status = err;
2877
else if (WIFEXITED(exit_status))
2878
status = WEXITSTATUS(exit_status);
2879
else if (WIFSIGNALED(exit_status))
2880
signr = WTERMSIG(exit_status);
2881
} else
2882
status = err;
2883
2884
if (rec->off_cpu)
2885
rec->bytes_written += off_cpu_write(rec->session);
2886
2887
record__read_lost_samples(rec);
2888
record__synthesize(rec, true);
2889
/* this will be recalculated during process_buildids() */
2890
rec->samples = 0;
2891
2892
if (!err) {
2893
if (!rec->timestamp_filename) {
2894
record__finish_output(rec);
2895
} else {
2896
fd = record__switch_output(rec, true);
2897
if (fd < 0) {
2898
status = fd;
2899
goto out_delete_session;
2900
}
2901
}
2902
}
2903
2904
perf_hooks__invoke_record_end();
2905
2906
if (!err && !quiet) {
2907
char samples[128];
2908
const char *postfix = rec->timestamp_filename ?
2909
".<timestamp>" : "";
2910
2911
if (rec->samples && !rec->opts.full_auxtrace)
2912
scnprintf(samples, sizeof(samples),
2913
" (%" PRIu64 " samples)", rec->samples);
2914
else
2915
samples[0] = '\0';
2916
2917
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2918
perf_data__size(data) / 1024.0 / 1024.0,
2919
data->path, postfix, samples);
2920
if (ratio) {
2921
fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2922
rec->session->bytes_transferred / 1024.0 / 1024.0,
2923
ratio);
2924
}
2925
fprintf(stderr, " ]\n");
2926
}
2927
2928
out_delete_session:
2929
#ifdef HAVE_EVENTFD_SUPPORT
2930
if (done_fd >= 0) {
2931
fd = done_fd;
2932
done_fd = -1;
2933
2934
close(fd);
2935
}
2936
#endif
2937
zstd_fini(&session->zstd_data);
2938
if (!opts->no_bpf_event)
2939
evlist__stop_sb_thread(rec->sb_evlist);
2940
2941
perf_session__delete(session);
2942
return status;
2943
}
2944
2945
static void callchain_debug(struct callchain_param *callchain)
2946
{
2947
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2948
2949
pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2950
2951
if (callchain->record_mode == CALLCHAIN_DWARF)
2952
pr_debug("callchain: stack dump size %d\n",
2953
callchain->dump_size);
2954
}
2955
2956
int record_opts__parse_callchain(struct record_opts *record,
2957
struct callchain_param *callchain,
2958
const char *arg, bool unset)
2959
{
2960
int ret;
2961
callchain->enabled = !unset;
2962
2963
/* --no-call-graph */
2964
if (unset) {
2965
callchain->record_mode = CALLCHAIN_NONE;
2966
pr_debug("callchain: disabled\n");
2967
return 0;
2968
}
2969
2970
ret = parse_callchain_record_opt(arg, callchain);
2971
if (!ret) {
2972
/* Enable data address sampling for DWARF unwind. */
2973
if (callchain->record_mode == CALLCHAIN_DWARF)
2974
record->sample_address = true;
2975
callchain_debug(callchain);
2976
}
2977
2978
return ret;
2979
}
2980
2981
int record_parse_callchain_opt(const struct option *opt,
2982
const char *arg,
2983
int unset)
2984
{
2985
return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2986
}
2987
2988
int record_callchain_opt(const struct option *opt,
2989
const char *arg __maybe_unused,
2990
int unset __maybe_unused)
2991
{
2992
struct callchain_param *callchain = opt->value;
2993
2994
callchain->enabled = true;
2995
2996
if (callchain->record_mode == CALLCHAIN_NONE)
2997
callchain->record_mode = CALLCHAIN_FP;
2998
2999
callchain_debug(callchain);
3000
return 0;
3001
}
3002
3003
static int perf_record_config(const char *var, const char *value, void *cb)
3004
{
3005
struct record *rec = cb;
3006
3007
if (!strcmp(var, "record.build-id")) {
3008
if (!strcmp(value, "cache"))
3009
rec->no_buildid_cache = false;
3010
else if (!strcmp(value, "no-cache"))
3011
rec->no_buildid_cache = true;
3012
else if (!strcmp(value, "skip"))
3013
rec->no_buildid = true;
3014
else if (!strcmp(value, "mmap"))
3015
rec->buildid_mmap = true;
3016
else if (!strcmp(value, "no-mmap"))
3017
rec->buildid_mmap = false;
3018
else
3019
return -1;
3020
return 0;
3021
}
3022
if (!strcmp(var, "record.call-graph")) {
3023
var = "call-graph.record-mode";
3024
return perf_default_config(var, value, cb);
3025
}
3026
#ifdef HAVE_AIO_SUPPORT
3027
if (!strcmp(var, "record.aio")) {
3028
rec->opts.nr_cblocks = strtol(value, NULL, 0);
3029
if (!rec->opts.nr_cblocks)
3030
rec->opts.nr_cblocks = nr_cblocks_default;
3031
}
3032
#endif
3033
if (!strcmp(var, "record.debuginfod")) {
3034
rec->debuginfod.urls = strdup(value);
3035
if (!rec->debuginfod.urls)
3036
return -ENOMEM;
3037
rec->debuginfod.set = true;
3038
}
3039
3040
return 0;
3041
}
3042
3043
static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
3044
{
3045
struct record *rec = (struct record *)opt->value;
3046
3047
return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
3048
}
3049
3050
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3051
{
3052
struct record_opts *opts = (struct record_opts *)opt->value;
3053
3054
if (unset || !str)
3055
return 0;
3056
3057
if (!strcasecmp(str, "node"))
3058
opts->affinity = PERF_AFFINITY_NODE;
3059
else if (!strcasecmp(str, "cpu"))
3060
opts->affinity = PERF_AFFINITY_CPU;
3061
3062
return 0;
3063
}
3064
3065
static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3066
{
3067
mask->nbits = nr_bits;
3068
mask->bits = bitmap_zalloc(mask->nbits);
3069
if (!mask->bits)
3070
return -ENOMEM;
3071
3072
return 0;
3073
}
3074
3075
static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3076
{
3077
bitmap_free(mask->bits);
3078
mask->nbits = 0;
3079
}
3080
3081
static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3082
{
3083
int ret;
3084
3085
ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3086
if (ret) {
3087
mask->affinity.bits = NULL;
3088
return ret;
3089
}
3090
3091
ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3092
if (ret) {
3093
record__mmap_cpu_mask_free(&mask->maps);
3094
mask->maps.bits = NULL;
3095
}
3096
3097
return ret;
3098
}
3099
3100
static void record__thread_mask_free(struct thread_mask *mask)
3101
{
3102
record__mmap_cpu_mask_free(&mask->maps);
3103
record__mmap_cpu_mask_free(&mask->affinity);
3104
}
3105
3106
static int record__parse_threads(const struct option *opt, const char *str, int unset)
3107
{
3108
int s;
3109
struct record_opts *opts = opt->value;
3110
3111
if (unset || !str || !strlen(str)) {
3112
opts->threads_spec = THREAD_SPEC__CPU;
3113
} else {
3114
for (s = 1; s < THREAD_SPEC__MAX; s++) {
3115
if (s == THREAD_SPEC__USER) {
3116
opts->threads_user_spec = strdup(str);
3117
if (!opts->threads_user_spec)
3118
return -ENOMEM;
3119
opts->threads_spec = THREAD_SPEC__USER;
3120
break;
3121
}
3122
if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3123
opts->threads_spec = s;
3124
break;
3125
}
3126
}
3127
}
3128
3129
if (opts->threads_spec == THREAD_SPEC__USER)
3130
pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3131
else
3132
pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3133
3134
return 0;
3135
}
3136
3137
static int parse_output_max_size(const struct option *opt,
3138
const char *str, int unset)
3139
{
3140
unsigned long *s = (unsigned long *)opt->value;
3141
static struct parse_tag tags_size[] = {
3142
{ .tag = 'B', .mult = 1 },
3143
{ .tag = 'K', .mult = 1 << 10 },
3144
{ .tag = 'M', .mult = 1 << 20 },
3145
{ .tag = 'G', .mult = 1 << 30 },
3146
{ .tag = 0 },
3147
};
3148
unsigned long val;
3149
3150
if (unset) {
3151
*s = 0;
3152
return 0;
3153
}
3154
3155
val = parse_tag_value(str, tags_size);
3156
if (val != (unsigned long) -1) {
3157
*s = val;
3158
return 0;
3159
}
3160
3161
return -1;
3162
}
3163
3164
static int record__parse_mmap_pages(const struct option *opt,
3165
const char *str,
3166
int unset __maybe_unused)
3167
{
3168
struct record_opts *opts = opt->value;
3169
char *s, *p;
3170
unsigned int mmap_pages;
3171
int ret;
3172
3173
if (!str)
3174
return -EINVAL;
3175
3176
s = strdup(str);
3177
if (!s)
3178
return -ENOMEM;
3179
3180
p = strchr(s, ',');
3181
if (p)
3182
*p = '\0';
3183
3184
if (*s) {
3185
ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3186
if (ret)
3187
goto out_free;
3188
opts->mmap_pages = mmap_pages;
3189
}
3190
3191
if (!p) {
3192
ret = 0;
3193
goto out_free;
3194
}
3195
3196
ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3197
if (ret)
3198
goto out_free;
3199
3200
opts->auxtrace_mmap_pages = mmap_pages;
3201
3202
out_free:
3203
free(s);
3204
return ret;
3205
}
3206
3207
static int record__parse_off_cpu_thresh(const struct option *opt,
3208
const char *str,
3209
int unset __maybe_unused)
3210
{
3211
struct record_opts *opts = opt->value;
3212
char *endptr;
3213
u64 off_cpu_thresh_ms;
3214
3215
if (!str)
3216
return -EINVAL;
3217
3218
off_cpu_thresh_ms = strtoull(str, &endptr, 10);
3219
3220
/* the threshold isn't string "0", yet strtoull() returns 0, parsing failed */
3221
if (*endptr || (off_cpu_thresh_ms == 0 && strcmp(str, "0")))
3222
return -EINVAL;
3223
else
3224
opts->off_cpu_thresh_ns = off_cpu_thresh_ms * NSEC_PER_MSEC;
3225
3226
return 0;
3227
}
3228
3229
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3230
{
3231
}
3232
3233
static int parse_control_option(const struct option *opt,
3234
const char *str,
3235
int unset __maybe_unused)
3236
{
3237
struct record_opts *opts = opt->value;
3238
3239
return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3240
}
3241
3242
static void switch_output_size_warn(struct record *rec)
3243
{
3244
u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3245
struct switch_output *s = &rec->switch_output;
3246
3247
wakeup_size /= 2;
3248
3249
if (s->size < wakeup_size) {
3250
char buf[100];
3251
3252
unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3253
pr_warning("WARNING: switch-output data size lower than "
3254
"wakeup kernel buffer size (%s) "
3255
"expect bigger perf.data sizes\n", buf);
3256
}
3257
}
3258
3259
static int switch_output_setup(struct record *rec)
3260
{
3261
struct switch_output *s = &rec->switch_output;
3262
static struct parse_tag tags_size[] = {
3263
{ .tag = 'B', .mult = 1 },
3264
{ .tag = 'K', .mult = 1 << 10 },
3265
{ .tag = 'M', .mult = 1 << 20 },
3266
{ .tag = 'G', .mult = 1 << 30 },
3267
{ .tag = 0 },
3268
};
3269
static struct parse_tag tags_time[] = {
3270
{ .tag = 's', .mult = 1 },
3271
{ .tag = 'm', .mult = 60 },
3272
{ .tag = 'h', .mult = 60*60 },
3273
{ .tag = 'd', .mult = 60*60*24 },
3274
{ .tag = 0 },
3275
};
3276
unsigned long val;
3277
3278
/*
3279
* If we're using --switch-output-events, then we imply its
3280
* --switch-output=signal, as we'll send a SIGUSR2 from the side band
3281
* thread to its parent.
3282
*/
3283
if (rec->switch_output_event_set) {
3284
if (record__threads_enabled(rec)) {
3285
pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3286
return 0;
3287
}
3288
goto do_signal;
3289
}
3290
3291
if (!s->set)
3292
return 0;
3293
3294
if (record__threads_enabled(rec)) {
3295
pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3296
return 0;
3297
}
3298
3299
if (!strcmp(s->str, "signal")) {
3300
do_signal:
3301
s->signal = true;
3302
pr_debug("switch-output with SIGUSR2 signal\n");
3303
goto enabled;
3304
}
3305
3306
val = parse_tag_value(s->str, tags_size);
3307
if (val != (unsigned long) -1) {
3308
s->size = val;
3309
pr_debug("switch-output with %s size threshold\n", s->str);
3310
goto enabled;
3311
}
3312
3313
val = parse_tag_value(s->str, tags_time);
3314
if (val != (unsigned long) -1) {
3315
s->time = val;
3316
pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3317
s->str, s->time);
3318
goto enabled;
3319
}
3320
3321
return -1;
3322
3323
enabled:
3324
rec->timestamp_filename = true;
3325
s->enabled = true;
3326
3327
if (s->size && !rec->opts.no_buffering)
3328
switch_output_size_warn(rec);
3329
3330
return 0;
3331
}
3332
3333
static const char * const __record_usage[] = {
3334
"perf record [<options>] [<command>]",
3335
"perf record [<options>] -- <command> [<options>]",
3336
NULL
3337
};
3338
const char * const *record_usage = __record_usage;
3339
3340
static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3341
struct perf_sample *sample, struct machine *machine)
3342
{
3343
/*
3344
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3345
* no need to add them twice.
3346
*/
3347
if (!(event->header.misc & PERF_RECORD_MISC_USER))
3348
return 0;
3349
return perf_event__process_mmap(tool, event, sample, machine);
3350
}
3351
3352
static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3353
struct perf_sample *sample, struct machine *machine)
3354
{
3355
/*
3356
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3357
* no need to add them twice.
3358
*/
3359
if (!(event->header.misc & PERF_RECORD_MISC_USER))
3360
return 0;
3361
3362
return perf_event__process_mmap2(tool, event, sample, machine);
3363
}
3364
3365
static int process_timestamp_boundary(const struct perf_tool *tool,
3366
union perf_event *event __maybe_unused,
3367
struct perf_sample *sample,
3368
struct machine *machine __maybe_unused)
3369
{
3370
struct record *rec = container_of(tool, struct record, tool);
3371
3372
set_timestamp_boundary(rec, sample->time);
3373
return 0;
3374
}
3375
3376
static int parse_record_synth_option(const struct option *opt,
3377
const char *str,
3378
int unset __maybe_unused)
3379
{
3380
struct record_opts *opts = opt->value;
3381
char *p = strdup(str);
3382
3383
if (p == NULL)
3384
return -1;
3385
3386
opts->synth = parse_synth_opt(p);
3387
free(p);
3388
3389
if (opts->synth < 0) {
3390
pr_err("Invalid synth option: %s\n", str);
3391
return -1;
3392
}
3393
return 0;
3394
}
3395
3396
/*
3397
* XXX Ideally would be local to cmd_record() and passed to a record__new
3398
* because we need to have access to it in record__exit, that is called
3399
* after cmd_record() exits, but since record_options need to be accessible to
3400
* builtin-script, leave it here.
3401
*
3402
* At least we don't ouch it in all the other functions here directly.
3403
*
3404
* Just say no to tons of global variables, sigh.
3405
*/
3406
static struct record record = {
3407
.opts = {
3408
.sample_time = true,
3409
.mmap_pages = UINT_MAX,
3410
.user_freq = UINT_MAX,
3411
.user_interval = ULLONG_MAX,
3412
.freq = 4000,
3413
.target = {
3414
.uses_mmap = true,
3415
.default_per_cpu = true,
3416
},
3417
.mmap_flush = MMAP_FLUSH_DEFAULT,
3418
.nr_threads_synthesize = 1,
3419
.ctl_fd = -1,
3420
.ctl_fd_ack = -1,
3421
.synth = PERF_SYNTH_ALL,
3422
.off_cpu_thresh_ns = OFFCPU_THRESH,
3423
},
3424
.buildid_mmap = true,
3425
};
3426
3427
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3428
"\n\t\t\t\tDefault: fp";
3429
3430
static bool dry_run;
3431
3432
static struct parse_events_option_args parse_events_option_args = {
3433
.evlistp = &record.evlist,
3434
};
3435
3436
static struct parse_events_option_args switch_output_parse_events_option_args = {
3437
.evlistp = &record.sb_evlist,
3438
};
3439
3440
/*
3441
* XXX Will stay a global variable till we fix builtin-script.c to stop messing
3442
* with it and switch to use the library functions in perf_evlist that came
3443
* from builtin-record.c, i.e. use record_opts,
3444
* evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3445
* using pipes, etc.
3446
*/
3447
static struct option __record_options[] = {
3448
OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3449
"event selector. use 'perf list' to list available events",
3450
parse_events_option),
3451
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3452
"event filter", parse_filter),
3453
OPT_BOOLEAN(0, "latency", &record.latency,
3454
"Enable data collection for latency profiling.\n"
3455
"\t\t\t Use perf report --latency for latency-centric profile."),
3456
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3457
NULL, "don't record events from perf itself",
3458
exclude_perf),
3459
OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3460
"record events on existing process id"),
3461
OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3462
"record events on existing thread id"),
3463
OPT_INTEGER('r', "realtime", &record.realtime_prio,
3464
"collect data with this RT SCHED_FIFO priority"),
3465
OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3466
"collect data without buffering"),
3467
OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3468
"collect raw sample records from all opened counters"),
3469
OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3470
"system-wide collection from all CPUs"),
3471
OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3472
"list of cpus to monitor"),
3473
OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3474
OPT_STRING('o', "output", &record.data.path, "file",
3475
"output file name"),
3476
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3477
&record.opts.no_inherit_set,
3478
"child tasks do not inherit counters"),
3479
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3480
"synthesize non-sample events at the end of output"),
3481
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3482
OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3483
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3484
"Fail if the specified frequency can't be used"),
3485
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3486
"profile at this frequency",
3487
record__parse_freq),
3488
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3489
"number of mmap data pages and AUX area tracing mmap pages",
3490
record__parse_mmap_pages),
3491
OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3492
"Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3493
record__mmap_flush_parse),
3494
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3495
NULL, "enables call-graph recording" ,
3496
&record_callchain_opt),
3497
OPT_CALLBACK(0, "call-graph", &record.opts,
3498
"record_mode[,record_size]", record_callchain_help,
3499
&record_parse_callchain_opt),
3500
OPT_INCR('v', "verbose", &verbose,
3501
"be more verbose (show counter open errors, etc)"),
3502
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3503
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3504
"per thread counts"),
3505
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3506
OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3507
"Record the sample physical addresses"),
3508
OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3509
"Record the sampled data address data page size"),
3510
OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3511
"Record the sampled code address (ip) page size"),
3512
OPT_BOOLEAN(0, "sample-mem-info", &record.opts.sample_data_src,
3513
"Record the data source for memory operations"),
3514
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3515
OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3516
"Record the sample identifier"),
3517
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3518
&record.opts.sample_time_set,
3519
"Record the sample timestamps"),
3520
OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3521
"Record the sample period"),
3522
OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3523
"don't sample"),
3524
OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3525
&record.no_buildid_cache_set,
3526
"do not update the buildid cache"),
3527
OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3528
&record.no_buildid_set,
3529
"do not collect buildids in perf.data"),
3530
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3531
"monitor event in cgroup name only",
3532
parse_cgroups),
3533
OPT_CALLBACK('D', "delay", &record, "ms",
3534
"ms to wait before starting measurement after program start (-1: start with events disabled), "
3535
"or ranges of time to enable events e.g. '-D 10-20,30-40'",
3536
record__parse_event_enable_time),
3537
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3538
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
3539
3540
OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3541
"branch any", "sample any taken branches",
3542
parse_branch_stack),
3543
3544
OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3545
"branch filter mask", "branch stack filter modes",
3546
parse_branch_stack),
3547
OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3548
"sample by weight (on special events only)"),
3549
OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3550
"sample transaction flags (special events only)"),
3551
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3552
"use per-thread mmaps"),
3553
OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3554
"sample selected machine registers on interrupt,"
3555
" use '-I?' to list register names", parse_intr_regs),
3556
OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3557
"sample selected machine registers in user space,"
3558
" use '--user-regs=?' to list register names", parse_user_regs),
3559
OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3560
"Record running/enabled time of read (:S) events"),
3561
OPT_CALLBACK('k', "clockid", &record.opts,
3562
"clockid", "clockid to use for events, see clock_gettime()",
3563
parse_clockid),
3564
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3565
"opts", "AUX area tracing Snapshot Mode", ""),
3566
OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3567
"opts", "sample AUX area", ""),
3568
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3569
"per thread proc mmap processing timeout in ms"),
3570
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3571
"Record namespaces events"),
3572
OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3573
"Record cgroup events"),
3574
OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3575
&record.opts.record_switch_events_set,
3576
"Record context switch events"),
3577
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3578
"Configure all used events to run in kernel space.",
3579
PARSE_OPT_EXCLUSIVE),
3580
OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3581
"Configure all used events to run in user space.",
3582
PARSE_OPT_EXCLUSIVE),
3583
OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3584
"collect kernel callchains"),
3585
OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3586
"collect user callchains"),
3587
OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3588
"file", "vmlinux pathname"),
3589
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3590
"Record build-id of all DSOs regardless of hits"),
3591
OPT_BOOLEAN_SET(0, "buildid-mmap", &record.buildid_mmap, &record.buildid_mmap_set,
3592
"Record build-id in mmap events and skip build-id processing."),
3593
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3594
"append timestamp to output filename"),
3595
OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3596
"Record timestamp boundary (time of first/last samples)"),
3597
OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3598
&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3599
"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3600
"signal"),
3601
OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3602
&record.switch_output_event_set, "switch output event",
3603
"switch output event selector. use 'perf list' to list available events",
3604
parse_events_option_new_evlist),
3605
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3606
"Limit number of switch output generated files"),
3607
OPT_BOOLEAN(0, "dry-run", &dry_run,
3608
"Parse options then exit"),
3609
#ifdef HAVE_AIO_SUPPORT
3610
OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3611
&nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3612
record__aio_parse),
3613
#endif
3614
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3615
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3616
record__parse_affinity),
3617
#ifdef HAVE_ZSTD_SUPPORT
3618
OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3619
"Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3620
record__parse_comp_level),
3621
#endif
3622
OPT_CALLBACK(0, "max-size", &record.output_max_size,
3623
"size", "Limit the maximum size of the output file", parse_output_max_size),
3624
OPT_UINTEGER(0, "num-thread-synthesize",
3625
&record.opts.nr_threads_synthesize,
3626
"number of threads to run for event synthesis"),
3627
#ifdef HAVE_LIBPFM
3628
OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3629
"libpfm4 event selector. use 'perf list' to list available events",
3630
parse_libpfm_events_option),
3631
#endif
3632
OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3633
"Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3634
"\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3635
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3636
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3637
parse_control_option),
3638
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3639
"Fine-tune event synthesis: default=all", parse_record_synth_option),
3640
OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3641
&record.debuginfod.set, "debuginfod urls",
3642
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3643
"system"),
3644
OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3645
"write collected trace data into several data files using parallel threads",
3646
record__parse_threads),
3647
OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3648
OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3649
"BPF filter action"),
3650
OPT_CALLBACK(0, "off-cpu-thresh", &record.opts, "ms",
3651
"Dump off-cpu samples if off-cpu time exceeds this threshold (in milliseconds). (Default: 500ms)",
3652
record__parse_off_cpu_thresh),
3653
OPT_END()
3654
};
3655
3656
struct option *record_options = __record_options;
3657
3658
static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3659
{
3660
struct perf_cpu cpu;
3661
int idx;
3662
3663
if (cpu_map__is_dummy(cpus))
3664
return 0;
3665
3666
perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3667
/* Return ENODEV is input cpu is greater than max cpu */
3668
if ((unsigned long)cpu.cpu > mask->nbits)
3669
return -ENODEV;
3670
__set_bit(cpu.cpu, mask->bits);
3671
}
3672
3673
return 0;
3674
}
3675
3676
static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3677
{
3678
struct perf_cpu_map *cpus;
3679
3680
cpus = perf_cpu_map__new(mask_spec);
3681
if (!cpus)
3682
return -ENOMEM;
3683
3684
bitmap_zero(mask->bits, mask->nbits);
3685
if (record__mmap_cpu_mask_init(mask, cpus))
3686
return -ENODEV;
3687
3688
perf_cpu_map__put(cpus);
3689
3690
return 0;
3691
}
3692
3693
static void record__free_thread_masks(struct record *rec, int nr_threads)
3694
{
3695
int t;
3696
3697
if (rec->thread_masks)
3698
for (t = 0; t < nr_threads; t++)
3699
record__thread_mask_free(&rec->thread_masks[t]);
3700
3701
zfree(&rec->thread_masks);
3702
}
3703
3704
static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3705
{
3706
int t, ret;
3707
3708
rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3709
if (!rec->thread_masks) {
3710
pr_err("Failed to allocate thread masks\n");
3711
return -ENOMEM;
3712
}
3713
3714
for (t = 0; t < nr_threads; t++) {
3715
ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3716
if (ret) {
3717
pr_err("Failed to allocate thread masks[%d]\n", t);
3718
goto out_free;
3719
}
3720
}
3721
3722
return 0;
3723
3724
out_free:
3725
record__free_thread_masks(rec, nr_threads);
3726
3727
return ret;
3728
}
3729
3730
static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3731
{
3732
int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3733
3734
ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3735
if (ret)
3736
return ret;
3737
3738
rec->nr_threads = nr_cpus;
3739
pr_debug("nr_threads: %d\n", rec->nr_threads);
3740
3741
for (t = 0; t < rec->nr_threads; t++) {
3742
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3743
__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3744
if (verbose > 0) {
3745
pr_debug("thread_masks[%d]: ", t);
3746
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3747
pr_debug("thread_masks[%d]: ", t);
3748
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3749
}
3750
}
3751
3752
return 0;
3753
}
3754
3755
static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3756
const char **maps_spec, const char **affinity_spec,
3757
u32 nr_spec)
3758
{
3759
u32 s;
3760
int ret = 0, t = 0;
3761
struct mmap_cpu_mask cpus_mask;
3762
struct thread_mask thread_mask, full_mask, *thread_masks;
3763
3764
ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3765
if (ret) {
3766
pr_err("Failed to allocate CPUs mask\n");
3767
return ret;
3768
}
3769
3770
ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3771
if (ret) {
3772
pr_err("Failed to init cpu mask\n");
3773
goto out_free_cpu_mask;
3774
}
3775
3776
ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3777
if (ret) {
3778
pr_err("Failed to allocate full mask\n");
3779
goto out_free_cpu_mask;
3780
}
3781
3782
ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3783
if (ret) {
3784
pr_err("Failed to allocate thread mask\n");
3785
goto out_free_full_and_cpu_masks;
3786
}
3787
3788
for (s = 0; s < nr_spec; s++) {
3789
ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3790
if (ret) {
3791
pr_err("Failed to initialize maps thread mask\n");
3792
goto out_free;
3793
}
3794
ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3795
if (ret) {
3796
pr_err("Failed to initialize affinity thread mask\n");
3797
goto out_free;
3798
}
3799
3800
/* ignore invalid CPUs but do not allow empty masks */
3801
if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3802
cpus_mask.bits, thread_mask.maps.nbits)) {
3803
pr_err("Empty maps mask: %s\n", maps_spec[s]);
3804
ret = -EINVAL;
3805
goto out_free;
3806
}
3807
if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3808
cpus_mask.bits, thread_mask.affinity.nbits)) {
3809
pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3810
ret = -EINVAL;
3811
goto out_free;
3812
}
3813
3814
/* do not allow intersection with other masks (full_mask) */
3815
if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3816
thread_mask.maps.nbits)) {
3817
pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3818
ret = -EINVAL;
3819
goto out_free;
3820
}
3821
if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3822
thread_mask.affinity.nbits)) {
3823
pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3824
ret = -EINVAL;
3825
goto out_free;
3826
}
3827
3828
bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3829
thread_mask.maps.bits, full_mask.maps.nbits);
3830
bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3831
thread_mask.affinity.bits, full_mask.maps.nbits);
3832
3833
thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3834
if (!thread_masks) {
3835
pr_err("Failed to reallocate thread masks\n");
3836
ret = -ENOMEM;
3837
goto out_free;
3838
}
3839
rec->thread_masks = thread_masks;
3840
rec->thread_masks[t] = thread_mask;
3841
if (verbose > 0) {
3842
pr_debug("thread_masks[%d]: ", t);
3843
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3844
pr_debug("thread_masks[%d]: ", t);
3845
mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3846
}
3847
t++;
3848
ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3849
if (ret) {
3850
pr_err("Failed to allocate thread mask\n");
3851
goto out_free_full_and_cpu_masks;
3852
}
3853
}
3854
rec->nr_threads = t;
3855
pr_debug("nr_threads: %d\n", rec->nr_threads);
3856
if (!rec->nr_threads)
3857
ret = -EINVAL;
3858
3859
out_free:
3860
record__thread_mask_free(&thread_mask);
3861
out_free_full_and_cpu_masks:
3862
record__thread_mask_free(&full_mask);
3863
out_free_cpu_mask:
3864
record__mmap_cpu_mask_free(&cpus_mask);
3865
3866
return ret;
3867
}
3868
3869
static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3870
{
3871
int ret;
3872
struct cpu_topology *topo;
3873
3874
topo = cpu_topology__new();
3875
if (!topo) {
3876
pr_err("Failed to allocate CPU topology\n");
3877
return -ENOMEM;
3878
}
3879
3880
ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3881
topo->core_cpus_list, topo->core_cpus_lists);
3882
cpu_topology__delete(topo);
3883
3884
return ret;
3885
}
3886
3887
static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3888
{
3889
int ret;
3890
struct cpu_topology *topo;
3891
3892
topo = cpu_topology__new();
3893
if (!topo) {
3894
pr_err("Failed to allocate CPU topology\n");
3895
return -ENOMEM;
3896
}
3897
3898
ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3899
topo->package_cpus_list, topo->package_cpus_lists);
3900
cpu_topology__delete(topo);
3901
3902
return ret;
3903
}
3904
3905
static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3906
{
3907
u32 s;
3908
int ret;
3909
const char **spec;
3910
struct numa_topology *topo;
3911
3912
topo = numa_topology__new();
3913
if (!topo) {
3914
pr_err("Failed to allocate NUMA topology\n");
3915
return -ENOMEM;
3916
}
3917
3918
spec = zalloc(topo->nr * sizeof(char *));
3919
if (!spec) {
3920
pr_err("Failed to allocate NUMA spec\n");
3921
ret = -ENOMEM;
3922
goto out_delete_topo;
3923
}
3924
for (s = 0; s < topo->nr; s++)
3925
spec[s] = topo->nodes[s].cpus;
3926
3927
ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3928
3929
zfree(&spec);
3930
3931
out_delete_topo:
3932
numa_topology__delete(topo);
3933
3934
return ret;
3935
}
3936
3937
static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3938
{
3939
int t, ret;
3940
u32 s, nr_spec = 0;
3941
char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3942
char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3943
3944
for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3945
spec = strtok_r(user_spec, ":", &spec_ptr);
3946
if (spec == NULL)
3947
break;
3948
pr_debug2("threads_spec[%d]: %s\n", t, spec);
3949
mask = strtok_r(spec, "/", &mask_ptr);
3950
if (mask == NULL)
3951
break;
3952
pr_debug2(" maps mask: %s\n", mask);
3953
tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3954
if (!tmp_spec) {
3955
pr_err("Failed to reallocate maps spec\n");
3956
ret = -ENOMEM;
3957
goto out_free;
3958
}
3959
maps_spec = tmp_spec;
3960
maps_spec[nr_spec] = dup_mask = strdup(mask);
3961
if (!maps_spec[nr_spec]) {
3962
pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3963
ret = -ENOMEM;
3964
goto out_free;
3965
}
3966
mask = strtok_r(NULL, "/", &mask_ptr);
3967
if (mask == NULL) {
3968
pr_err("Invalid thread maps or affinity specs\n");
3969
ret = -EINVAL;
3970
goto out_free;
3971
}
3972
pr_debug2(" affinity mask: %s\n", mask);
3973
tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3974
if (!tmp_spec) {
3975
pr_err("Failed to reallocate affinity spec\n");
3976
ret = -ENOMEM;
3977
goto out_free;
3978
}
3979
affinity_spec = tmp_spec;
3980
affinity_spec[nr_spec] = strdup(mask);
3981
if (!affinity_spec[nr_spec]) {
3982
pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3983
ret = -ENOMEM;
3984
goto out_free;
3985
}
3986
dup_mask = NULL;
3987
nr_spec++;
3988
}
3989
3990
ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3991
(const char **)affinity_spec, nr_spec);
3992
3993
out_free:
3994
free(dup_mask);
3995
for (s = 0; s < nr_spec; s++) {
3996
if (maps_spec)
3997
free(maps_spec[s]);
3998
if (affinity_spec)
3999
free(affinity_spec[s]);
4000
}
4001
free(affinity_spec);
4002
free(maps_spec);
4003
4004
return ret;
4005
}
4006
4007
static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
4008
{
4009
int ret;
4010
4011
ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
4012
if (ret)
4013
return ret;
4014
4015
if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
4016
return -ENODEV;
4017
4018
rec->nr_threads = 1;
4019
4020
return 0;
4021
}
4022
4023
static int record__init_thread_masks(struct record *rec)
4024
{
4025
int ret = 0;
4026
struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
4027
4028
if (!record__threads_enabled(rec))
4029
return record__init_thread_default_masks(rec, cpus);
4030
4031
if (evlist__per_thread(rec->evlist)) {
4032
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
4033
return -EINVAL;
4034
}
4035
4036
switch (rec->opts.threads_spec) {
4037
case THREAD_SPEC__CPU:
4038
ret = record__init_thread_cpu_masks(rec, cpus);
4039
break;
4040
case THREAD_SPEC__CORE:
4041
ret = record__init_thread_core_masks(rec, cpus);
4042
break;
4043
case THREAD_SPEC__PACKAGE:
4044
ret = record__init_thread_package_masks(rec, cpus);
4045
break;
4046
case THREAD_SPEC__NUMA:
4047
ret = record__init_thread_numa_masks(rec, cpus);
4048
break;
4049
case THREAD_SPEC__USER:
4050
ret = record__init_thread_user_masks(rec, cpus);
4051
break;
4052
default:
4053
break;
4054
}
4055
4056
return ret;
4057
}
4058
4059
int cmd_record(int argc, const char **argv)
4060
{
4061
int err;
4062
struct record *rec = &record;
4063
char errbuf[BUFSIZ];
4064
4065
setlocale(LC_ALL, "");
4066
4067
#ifndef HAVE_BPF_SKEL
4068
# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
4069
set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
4070
# undef set_nobuild
4071
#endif
4072
4073
/* Disable eager loading of kernel symbols that adds overhead to perf record. */
4074
symbol_conf.lazy_load_kernel_maps = true;
4075
rec->opts.affinity = PERF_AFFINITY_SYS;
4076
4077
rec->evlist = evlist__new();
4078
if (rec->evlist == NULL)
4079
return -ENOMEM;
4080
4081
err = perf_config(perf_record_config, rec);
4082
if (err)
4083
return err;
4084
4085
argc = parse_options(argc, argv, record_options, record_usage,
4086
PARSE_OPT_STOP_AT_NON_OPTION);
4087
if (quiet)
4088
perf_quiet_option();
4089
4090
err = symbol__validate_sym_arguments();
4091
if (err)
4092
return err;
4093
4094
perf_debuginfod_setup(&record.debuginfod);
4095
4096
/* Make system wide (-a) the default target. */
4097
if (!argc && target__none(&rec->opts.target))
4098
rec->opts.target.system_wide = true;
4099
4100
if (nr_cgroups && !rec->opts.target.system_wide) {
4101
usage_with_options_msg(record_usage, record_options,
4102
"cgroup monitoring only available in system-wide mode");
4103
4104
}
4105
4106
if (record.latency) {
4107
/*
4108
* There is no fundamental reason why latency profiling
4109
* can't work for system-wide mode, but exact semantics
4110
* and details are to be defined.
4111
* See the following thread for details:
4112
* https://lore.kernel.org/all/[email protected]/
4113
*/
4114
if (record.opts.target.system_wide) {
4115
pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4116
err = -EINVAL;
4117
goto out_opts;
4118
}
4119
record.opts.record_switch_events = true;
4120
}
4121
4122
if (!rec->buildid_mmap) {
4123
pr_debug("Disabling build id in synthesized mmap2 events.\n");
4124
symbol_conf.no_buildid_mmap2 = true;
4125
} else if (rec->buildid_mmap_set) {
4126
/*
4127
* Explicitly passing --buildid-mmap disables buildid processing
4128
* and cache generation.
4129
*/
4130
rec->no_buildid = true;
4131
}
4132
if (rec->buildid_mmap && !perf_can_record_build_id()) {
4133
pr_warning("Missing support for build id in kernel mmap events.\n"
4134
"Disable this warning with --no-buildid-mmap\n");
4135
rec->buildid_mmap = false;
4136
}
4137
if (rec->buildid_mmap) {
4138
/* Enable perf_event_attr::build_id bit. */
4139
rec->opts.build_id = true;
4140
}
4141
4142
if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4143
pr_err("Kernel has no cgroup sampling support.\n");
4144
err = -EINVAL;
4145
goto out_opts;
4146
}
4147
4148
if (rec->opts.kcore)
4149
rec->opts.text_poke = true;
4150
4151
if (rec->opts.kcore || record__threads_enabled(rec))
4152
rec->data.is_dir = true;
4153
4154
if (record__threads_enabled(rec)) {
4155
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4156
pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4157
goto out_opts;
4158
}
4159
if (record__aio_enabled(rec)) {
4160
pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4161
goto out_opts;
4162
}
4163
}
4164
4165
if (rec->opts.comp_level != 0) {
4166
pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4167
rec->no_buildid = true;
4168
}
4169
4170
if (rec->opts.record_switch_events &&
4171
!perf_can_record_switch_events()) {
4172
ui__error("kernel does not support recording context switch events\n");
4173
parse_options_usage(record_usage, record_options, "switch-events", 0);
4174
err = -EINVAL;
4175
goto out_opts;
4176
}
4177
4178
if (switch_output_setup(rec)) {
4179
parse_options_usage(record_usage, record_options, "switch-output", 0);
4180
err = -EINVAL;
4181
goto out_opts;
4182
}
4183
4184
if (rec->switch_output.time) {
4185
signal(SIGALRM, alarm_sig_handler);
4186
alarm(rec->switch_output.time);
4187
}
4188
4189
if (rec->switch_output.num_files) {
4190
rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4191
sizeof(char *));
4192
if (!rec->switch_output.filenames) {
4193
err = -EINVAL;
4194
goto out_opts;
4195
}
4196
}
4197
4198
if (rec->timestamp_filename && record__threads_enabled(rec)) {
4199
rec->timestamp_filename = false;
4200
pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4201
}
4202
4203
if (rec->filter_action) {
4204
if (!strcmp(rec->filter_action, "pin"))
4205
err = perf_bpf_filter__pin();
4206
else if (!strcmp(rec->filter_action, "unpin"))
4207
err = perf_bpf_filter__unpin();
4208
else {
4209
pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4210
err = -EINVAL;
4211
}
4212
goto out_opts;
4213
}
4214
4215
/* For backward compatibility, -d implies --mem-info */
4216
if (rec->opts.sample_address)
4217
rec->opts.sample_data_src = true;
4218
4219
/*
4220
* Allow aliases to facilitate the lookup of symbols for address
4221
* filters. Refer to auxtrace_parse_filters().
4222
*/
4223
symbol_conf.allow_aliases = true;
4224
4225
symbol__init(NULL);
4226
4227
err = record__auxtrace_init(rec);
4228
if (err)
4229
goto out;
4230
4231
if (dry_run)
4232
goto out;
4233
4234
err = -ENOMEM;
4235
4236
if (rec->no_buildid_cache || rec->no_buildid) {
4237
disable_buildid_cache();
4238
} else if (rec->switch_output.enabled) {
4239
/*
4240
* In 'perf record --switch-output', disable buildid
4241
* generation by default to reduce data file switching
4242
* overhead. Still generate buildid if they are required
4243
* explicitly using
4244
*
4245
* perf record --switch-output --no-no-buildid \
4246
* --no-no-buildid-cache
4247
*
4248
* Following code equals to:
4249
*
4250
* if ((rec->no_buildid || !rec->no_buildid_set) &&
4251
* (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4252
* disable_buildid_cache();
4253
*/
4254
bool disable = true;
4255
4256
if (rec->no_buildid_set && !rec->no_buildid)
4257
disable = false;
4258
if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4259
disable = false;
4260
if (disable) {
4261
rec->no_buildid = true;
4262
rec->no_buildid_cache = true;
4263
disable_buildid_cache();
4264
}
4265
}
4266
4267
if (record.opts.overwrite)
4268
record.opts.tail_synthesize = true;
4269
4270
if (rec->evlist->core.nr_entries == 0) {
4271
err = parse_event(rec->evlist, "cycles:P");
4272
if (err)
4273
goto out;
4274
}
4275
4276
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4277
rec->opts.no_inherit = true;
4278
4279
err = target__validate(&rec->opts.target);
4280
if (err) {
4281
target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4282
ui__warning("%s\n", errbuf);
4283
}
4284
4285
if (rec->uid_str) {
4286
uid_t uid = parse_uid(rec->uid_str);
4287
4288
if (uid == UINT_MAX) {
4289
ui__error("Invalid User: %s", rec->uid_str);
4290
err = -EINVAL;
4291
goto out;
4292
}
4293
err = parse_uid_filter(rec->evlist, uid);
4294
if (err)
4295
goto out;
4296
4297
/* User ID filtering implies system wide. */
4298
rec->opts.target.system_wide = true;
4299
}
4300
4301
/* Enable ignoring missing threads when -p option is defined. */
4302
rec->opts.ignore_missing_thread = rec->opts.target.pid;
4303
4304
evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4305
4306
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4307
arch__add_leaf_frame_record_opts(&rec->opts);
4308
4309
err = -ENOMEM;
4310
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4311
if (rec->opts.target.pid != NULL) {
4312
pr_err("Couldn't create thread/CPU maps: %s\n",
4313
errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4314
goto out;
4315
}
4316
else
4317
usage_with_options(record_usage, record_options);
4318
}
4319
4320
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4321
if (err)
4322
goto out;
4323
4324
/*
4325
* We take all buildids when the file contains
4326
* AUX area tracing data because we do not decode the
4327
* trace because it would take too long.
4328
*/
4329
if (rec->opts.full_auxtrace)
4330
rec->buildid_all = true;
4331
4332
if (rec->opts.text_poke) {
4333
err = record__config_text_poke(rec->evlist);
4334
if (err) {
4335
pr_err("record__config_text_poke failed, error %d\n", err);
4336
goto out;
4337
}
4338
}
4339
4340
if (rec->off_cpu) {
4341
err = record__config_off_cpu(rec);
4342
if (err) {
4343
pr_err("record__config_off_cpu failed, error %d\n", err);
4344
goto out;
4345
}
4346
}
4347
4348
if (record_opts__config(&rec->opts)) {
4349
err = -EINVAL;
4350
goto out;
4351
}
4352
4353
err = record__config_tracking_events(rec);
4354
if (err) {
4355
pr_err("record__config_tracking_events failed, error %d\n", err);
4356
goto out;
4357
}
4358
4359
err = record__init_thread_masks(rec);
4360
if (err) {
4361
pr_err("Failed to initialize parallel data streaming masks\n");
4362
goto out;
4363
}
4364
4365
if (rec->opts.nr_cblocks > nr_cblocks_max)
4366
rec->opts.nr_cblocks = nr_cblocks_max;
4367
pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4368
4369
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4370
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4371
4372
if (rec->opts.comp_level > comp_level_max)
4373
rec->opts.comp_level = comp_level_max;
4374
pr_debug("comp level: %d\n", rec->opts.comp_level);
4375
4376
err = __cmd_record(&record, argc, argv);
4377
out:
4378
record__free_thread_masks(rec, rec->nr_threads);
4379
rec->nr_threads = 0;
4380
symbol__exit();
4381
auxtrace_record__free(rec->itr);
4382
out_opts:
4383
evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4384
evlist__delete(rec->evlist);
4385
return err;
4386
}
4387
4388
static void snapshot_sig_handler(int sig __maybe_unused)
4389
{
4390
struct record *rec = &record;
4391
4392
hit_auxtrace_snapshot_trigger(rec);
4393
4394
if (switch_output_signal(rec))
4395
trigger_hit(&switch_output_trigger);
4396
}
4397
4398
static void alarm_sig_handler(int sig __maybe_unused)
4399
{
4400
struct record *rec = &record;
4401
4402
if (switch_output_time(rec))
4403
trigger_hit(&switch_output_trigger);
4404
}
4405
4406