Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/builtin-inject.c
49621 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* builtin-inject.c
4
*
5
* Builtin inject command: Examine the live mode (stdin) event stream
6
* and repipe it to stdout while optionally injecting additional
7
* events into it.
8
*/
9
#include "builtin.h"
10
11
#include "util/color.h"
12
#include "util/dso.h"
13
#include "util/vdso.h"
14
#include "util/evlist.h"
15
#include "util/evsel.h"
16
#include "util/map.h"
17
#include "util/session.h"
18
#include "util/tool.h"
19
#include "util/debug.h"
20
#include "util/build-id.h"
21
#include "util/data.h"
22
#include "util/auxtrace.h"
23
#include "util/jit.h"
24
#include "util/string2.h"
25
#include "util/symbol.h"
26
#include "util/synthetic-events.h"
27
#include "util/thread.h"
28
#include "util/namespaces.h"
29
#include "util/util.h"
30
#include "util/tsc.h"
31
32
#include <internal/lib.h>
33
34
#include <linux/err.h>
35
#include <subcmd/parse-options.h>
36
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
37
38
#include <linux/list.h>
39
#include <linux/string.h>
40
#include <linux/zalloc.h>
41
#include <linux/hash.h>
42
#include <ctype.h>
43
#include <errno.h>
44
#include <signal.h>
45
#include <inttypes.h>
46
47
struct guest_event {
48
struct perf_sample sample;
49
union perf_event *event;
50
char *event_buf;
51
};
52
53
struct guest_id {
54
/* hlist_node must be first, see free_hlist() */
55
struct hlist_node node;
56
u64 id;
57
u64 host_id;
58
u32 vcpu;
59
};
60
61
struct guest_tid {
62
/* hlist_node must be first, see free_hlist() */
63
struct hlist_node node;
64
/* Thread ID of QEMU thread */
65
u32 tid;
66
u32 vcpu;
67
};
68
69
struct guest_vcpu {
70
/* Current host CPU */
71
u32 cpu;
72
/* Thread ID of QEMU thread */
73
u32 tid;
74
};
75
76
struct guest_session {
77
char *perf_data_file;
78
u32 machine_pid;
79
u64 time_offset;
80
double time_scale;
81
struct perf_tool tool;
82
struct perf_data data;
83
struct perf_session *session;
84
char *tmp_file_name;
85
int tmp_fd;
86
struct perf_tsc_conversion host_tc;
87
struct perf_tsc_conversion guest_tc;
88
bool copy_kcore_dir;
89
bool have_tc;
90
bool fetched;
91
bool ready;
92
u16 dflt_id_hdr_size;
93
u64 dflt_id;
94
u64 highest_id;
95
/* Array of guest_vcpu */
96
struct guest_vcpu *vcpu;
97
size_t vcpu_cnt;
98
/* Hash table for guest_id */
99
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
100
/* Hash table for guest_tid */
101
struct hlist_head tids[PERF_EVLIST__HLIST_SIZE];
102
/* Place to stash next guest event */
103
struct guest_event ev;
104
};
105
106
enum build_id_rewrite_style {
107
BID_RWS__NONE = 0,
108
BID_RWS__INJECT_HEADER_LAZY,
109
BID_RWS__INJECT_HEADER_ALL,
110
BID_RWS__MMAP2_BUILDID_ALL,
111
BID_RWS__MMAP2_BUILDID_LAZY,
112
};
113
114
struct perf_inject {
115
struct perf_tool tool;
116
struct perf_session *session;
117
enum build_id_rewrite_style build_id_style;
118
bool sched_stat;
119
bool have_auxtrace;
120
bool strip;
121
bool jit_mode;
122
bool in_place_update;
123
bool in_place_update_dry_run;
124
bool copy_kcore_dir;
125
const char *input_name;
126
struct perf_data output;
127
u64 bytes_written;
128
u64 aux_id;
129
struct list_head samples;
130
struct itrace_synth_opts itrace_synth_opts;
131
char *event_copy;
132
struct perf_file_section secs[HEADER_FEAT_BITS];
133
struct guest_session guest_session;
134
struct strlist *known_build_ids;
135
const struct evsel *mmap_evsel;
136
};
137
138
struct event_entry {
139
struct list_head node;
140
u32 tid;
141
union perf_event event[];
142
};
143
144
static int tool__inject_build_id(const struct perf_tool *tool,
145
struct perf_sample *sample,
146
struct machine *machine,
147
const struct evsel *evsel,
148
__u16 misc,
149
const char *filename,
150
struct dso *dso, u32 flags);
151
static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
152
struct perf_sample *sample,
153
struct machine *machine,
154
const struct evsel *evsel,
155
__u16 misc,
156
__u32 pid, __u32 tid,
157
__u64 start, __u64 len, __u64 pgoff,
158
struct dso *dso,
159
__u32 prot, __u32 flags,
160
const char *filename);
161
162
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
163
{
164
ssize_t size;
165
166
size = perf_data__write(&inject->output, buf, sz);
167
if (size < 0)
168
return -errno;
169
170
inject->bytes_written += size;
171
return 0;
172
}
173
174
static int perf_event__repipe_synth(const struct perf_tool *tool,
175
union perf_event *event)
176
177
{
178
struct perf_inject *inject = container_of(tool, struct perf_inject,
179
tool);
180
181
return output_bytes(inject, event, event->header.size);
182
}
183
184
static int perf_event__repipe_oe_synth(const struct perf_tool *tool,
185
union perf_event *event,
186
struct ordered_events *oe __maybe_unused)
187
{
188
return perf_event__repipe_synth(tool, event);
189
}
190
191
#ifdef HAVE_JITDUMP
192
static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
193
union perf_event *event __maybe_unused,
194
struct ordered_events *oe __maybe_unused)
195
{
196
return 0;
197
}
198
#endif
199
200
static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
201
struct perf_session *session __maybe_unused,
202
union perf_event *event)
203
{
204
return perf_event__repipe_synth(tool, event);
205
}
206
207
static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
208
struct perf_session *session __maybe_unused,
209
union perf_event *event,
210
u64 data __maybe_unused,
211
const char *str __maybe_unused)
212
{
213
return perf_event__repipe_synth(tool, event);
214
}
215
216
static int perf_event__repipe_attr(const struct perf_tool *tool,
217
union perf_event *event,
218
struct evlist **pevlist)
219
{
220
struct perf_inject *inject = container_of(tool, struct perf_inject,
221
tool);
222
int ret;
223
224
ret = perf_event__process_attr(tool, event, pevlist);
225
if (ret)
226
return ret;
227
228
/* If the output isn't a pipe then the attributes will be written as part of the header. */
229
if (!inject->output.is_pipe)
230
return 0;
231
232
return perf_event__repipe_synth(tool, event);
233
}
234
235
static int perf_event__repipe_event_update(const struct perf_tool *tool,
236
union perf_event *event,
237
struct evlist **pevlist __maybe_unused)
238
{
239
return perf_event__repipe_synth(tool, event);
240
}
241
242
static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
243
{
244
char buf[4096];
245
ssize_t ssz;
246
int ret;
247
248
while (size > 0) {
249
ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf)));
250
if (ssz < 0)
251
return -errno;
252
ret = output_bytes(inject, buf, ssz);
253
if (ret)
254
return ret;
255
size -= ssz;
256
}
257
258
return 0;
259
}
260
261
static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
262
struct perf_session *session,
263
union perf_event *event)
264
{
265
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
266
int ret;
267
268
inject->have_auxtrace = true;
269
270
if (!inject->output.is_pipe) {
271
off_t offset;
272
273
offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
274
if (offset == -1)
275
return -errno;
276
ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
277
event, offset);
278
if (ret < 0)
279
return ret;
280
}
281
282
if (perf_data__is_pipe(session->data) || !session->one_mmap) {
283
ret = output_bytes(inject, event, event->header.size);
284
if (ret < 0)
285
return ret;
286
ret = copy_bytes(inject, session->data,
287
event->auxtrace.size);
288
} else {
289
ret = output_bytes(inject, event,
290
event->header.size + event->auxtrace.size);
291
}
292
if (ret < 0)
293
return ret;
294
295
return event->auxtrace.size;
296
}
297
298
static int perf_event__repipe(const struct perf_tool *tool,
299
union perf_event *event,
300
struct perf_sample *sample __maybe_unused,
301
struct machine *machine __maybe_unused)
302
{
303
return perf_event__repipe_synth(tool, event);
304
}
305
306
static int perf_event__drop(const struct perf_tool *tool __maybe_unused,
307
union perf_event *event __maybe_unused,
308
struct perf_sample *sample __maybe_unused,
309
struct machine *machine __maybe_unused)
310
{
311
return 0;
312
}
313
314
static int perf_event__drop_aux(const struct perf_tool *tool,
315
union perf_event *event __maybe_unused,
316
struct perf_sample *sample,
317
struct machine *machine __maybe_unused)
318
{
319
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
320
321
if (!inject->aux_id)
322
inject->aux_id = sample->id;
323
324
return 0;
325
}
326
327
static union perf_event *
328
perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
329
union perf_event *event,
330
struct perf_sample *sample)
331
{
332
size_t sz1 = sample->aux_sample.data - (void *)event;
333
size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
334
union perf_event *ev;
335
336
if (inject->event_copy == NULL) {
337
inject->event_copy = malloc(PERF_SAMPLE_MAX_SIZE);
338
if (!inject->event_copy)
339
return ERR_PTR(-ENOMEM);
340
}
341
ev = (union perf_event *)inject->event_copy;
342
if (sz1 > event->header.size || sz2 > event->header.size ||
343
sz1 + sz2 > event->header.size ||
344
sz1 < sizeof(struct perf_event_header) + sizeof(u64))
345
return event;
346
347
memcpy(ev, event, sz1);
348
memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
349
ev->header.size = sz1 + sz2;
350
((u64 *)((void *)ev + sz1))[-1] = 0;
351
352
return ev;
353
}
354
355
typedef int (*inject_handler)(const struct perf_tool *tool,
356
union perf_event *event,
357
struct perf_sample *sample,
358
struct evsel *evsel,
359
struct machine *machine);
360
361
static int perf_event__repipe_sample(const struct perf_tool *tool,
362
union perf_event *event,
363
struct perf_sample *sample,
364
struct evsel *evsel,
365
struct machine *machine)
366
{
367
struct perf_inject *inject = container_of(tool, struct perf_inject,
368
tool);
369
370
if (evsel && evsel->handler) {
371
inject_handler f = evsel->handler;
372
return f(tool, event, sample, evsel, machine);
373
}
374
375
build_id__mark_dso_hit(tool, event, sample, evsel, machine);
376
377
if (inject->itrace_synth_opts.set && sample->aux_sample.size) {
378
event = perf_inject__cut_auxtrace_sample(inject, event, sample);
379
if (IS_ERR(event))
380
return PTR_ERR(event);
381
}
382
383
return perf_event__repipe_synth(tool, event);
384
}
385
386
static struct dso *findnew_dso(int pid, int tid, const char *filename,
387
const struct dso_id *id, struct machine *machine)
388
{
389
struct thread *thread;
390
struct nsinfo *nsi = NULL;
391
struct nsinfo *nnsi;
392
struct dso *dso;
393
bool vdso;
394
395
thread = machine__findnew_thread(machine, pid, tid);
396
if (thread == NULL) {
397
pr_err("cannot find or create a task %d/%d.\n", tid, pid);
398
return NULL;
399
}
400
401
vdso = is_vdso_map(filename);
402
nsi = nsinfo__get(thread__nsinfo(thread));
403
404
if (vdso) {
405
/* The vdso maps are always on the host and not the
406
* container. Ensure that we don't use setns to look
407
* them up.
408
*/
409
nnsi = nsinfo__copy(nsi);
410
if (nnsi) {
411
nsinfo__put(nsi);
412
nsinfo__clear_need_setns(nnsi);
413
nsi = nnsi;
414
}
415
dso = machine__findnew_vdso(machine, thread);
416
} else {
417
dso = machine__findnew_dso_id(machine, filename, id);
418
}
419
420
if (dso) {
421
mutex_lock(dso__lock(dso));
422
dso__set_nsinfo(dso, nsi);
423
mutex_unlock(dso__lock(dso));
424
} else
425
nsinfo__put(nsi);
426
427
thread__put(thread);
428
return dso;
429
}
430
431
/*
432
* The evsel used for the sample ID for mmap events. Typically stashed when
433
* processing mmap events. If not stashed, search the evlist for the first mmap
434
* gathering event.
435
*/
436
static const struct evsel *inject__mmap_evsel(struct perf_inject *inject)
437
{
438
struct evsel *pos;
439
440
if (inject->mmap_evsel)
441
return inject->mmap_evsel;
442
443
evlist__for_each_entry(inject->session->evlist, pos) {
444
if (pos->core.attr.mmap) {
445
inject->mmap_evsel = pos;
446
return pos;
447
}
448
}
449
pr_err("No mmap events found\n");
450
return NULL;
451
}
452
453
static int perf_event__repipe_common_mmap(const struct perf_tool *tool,
454
union perf_event *event,
455
struct perf_sample *sample,
456
struct machine *machine,
457
__u32 pid, __u32 tid,
458
__u64 start, __u64 len, __u64 pgoff,
459
__u32 flags, __u32 prot,
460
const char *filename,
461
const struct dso_id *dso_id,
462
int (*perf_event_process)(const struct perf_tool *tool,
463
union perf_event *event,
464
struct perf_sample *sample,
465
struct machine *machine))
466
{
467
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
468
struct dso *dso = NULL;
469
bool dso_sought = false;
470
471
#ifdef HAVE_JITDUMP
472
if (inject->jit_mode) {
473
u64 n = 0;
474
int ret;
475
476
/* If jit marker, then inject jit mmaps and generate ELF images. */
477
ret = jit_process(inject->session, &inject->output, machine,
478
filename, pid, tid, &n);
479
if (ret < 0)
480
return ret;
481
if (ret) {
482
inject->bytes_written += n;
483
return 0;
484
}
485
}
486
#endif
487
if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
488
dso = findnew_dso(pid, tid, filename, dso_id, machine);
489
dso_sought = true;
490
if (dso) {
491
/* mark it not to inject build-id */
492
dso__set_hit(dso);
493
}
494
}
495
if (inject->build_id_style == BID_RWS__INJECT_HEADER_ALL) {
496
if (!dso_sought) {
497
dso = findnew_dso(pid, tid, filename, dso_id, machine);
498
dso_sought = true;
499
}
500
501
if (dso && !dso__hit(dso)) {
502
struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
503
504
if (evsel) {
505
dso__set_hit(dso);
506
tool__inject_build_id(tool, sample, machine, evsel,
507
/*misc=*/sample->cpumode,
508
filename, dso, flags);
509
}
510
}
511
} else {
512
int err;
513
514
/*
515
* Remember the evsel for lazy build id generation. It is used
516
* for the sample id header type.
517
*/
518
if ((inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
519
inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) &&
520
!inject->mmap_evsel)
521
inject->mmap_evsel = evlist__event2evsel(inject->session->evlist, event);
522
523
/* Create the thread, map, etc. Not done for the unordered inject all case. */
524
err = perf_event_process(tool, event, sample, machine);
525
526
if (err) {
527
dso__put(dso);
528
return err;
529
}
530
}
531
if ((inject->build_id_style == BID_RWS__MMAP2_BUILDID_ALL) &&
532
!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
533
struct evsel *evsel = evlist__event2evsel(inject->session->evlist, event);
534
535
if (evsel && !dso_sought) {
536
dso = findnew_dso(pid, tid, filename, dso_id, machine);
537
dso_sought = true;
538
}
539
if (evsel && dso &&
540
!tool__inject_mmap2_build_id(tool, sample, machine, evsel,
541
sample->cpumode | PERF_RECORD_MISC_MMAP_BUILD_ID,
542
pid, tid, start, len, pgoff,
543
dso,
544
prot, flags,
545
filename)) {
546
/* Injected mmap2 so no need to repipe. */
547
dso__put(dso);
548
return 0;
549
}
550
}
551
dso__put(dso);
552
if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY)
553
return 0;
554
555
return perf_event__repipe(tool, event, sample, machine);
556
}
557
558
static int perf_event__repipe_mmap(const struct perf_tool *tool,
559
union perf_event *event,
560
struct perf_sample *sample,
561
struct machine *machine)
562
{
563
return perf_event__repipe_common_mmap(
564
tool, event, sample, machine,
565
event->mmap.pid, event->mmap.tid,
566
event->mmap.start, event->mmap.len, event->mmap.pgoff,
567
/*flags=*/0, PROT_EXEC,
568
event->mmap.filename, /*dso_id=*/NULL,
569
perf_event__process_mmap);
570
}
571
572
static int perf_event__repipe_mmap2(const struct perf_tool *tool,
573
union perf_event *event,
574
struct perf_sample *sample,
575
struct machine *machine)
576
{
577
struct dso_id id = dso_id_empty;
578
579
if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
580
build_id__init(&id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
581
} else {
582
id.maj = event->mmap2.maj;
583
id.min = event->mmap2.min;
584
id.ino = event->mmap2.ino;
585
id.ino_generation = event->mmap2.ino_generation;
586
id.mmap2_valid = true;
587
id.mmap2_ino_generation_valid = true;
588
}
589
590
return perf_event__repipe_common_mmap(
591
tool, event, sample, machine,
592
event->mmap2.pid, event->mmap2.tid,
593
event->mmap2.start, event->mmap2.len, event->mmap2.pgoff,
594
event->mmap2.flags, event->mmap2.prot,
595
event->mmap2.filename, &id,
596
perf_event__process_mmap2);
597
}
598
599
static int perf_event__repipe_fork(const struct perf_tool *tool,
600
union perf_event *event,
601
struct perf_sample *sample,
602
struct machine *machine)
603
{
604
int err;
605
606
err = perf_event__process_fork(tool, event, sample, machine);
607
perf_event__repipe(tool, event, sample, machine);
608
609
return err;
610
}
611
612
static int perf_event__repipe_comm(const struct perf_tool *tool,
613
union perf_event *event,
614
struct perf_sample *sample,
615
struct machine *machine)
616
{
617
int err;
618
619
err = perf_event__process_comm(tool, event, sample, machine);
620
perf_event__repipe(tool, event, sample, machine);
621
622
return err;
623
}
624
625
static int perf_event__repipe_namespaces(const struct perf_tool *tool,
626
union perf_event *event,
627
struct perf_sample *sample,
628
struct machine *machine)
629
{
630
int err = perf_event__process_namespaces(tool, event, sample, machine);
631
632
perf_event__repipe(tool, event, sample, machine);
633
634
return err;
635
}
636
637
static int perf_event__repipe_exit(const struct perf_tool *tool,
638
union perf_event *event,
639
struct perf_sample *sample,
640
struct machine *machine)
641
{
642
int err;
643
644
err = perf_event__process_exit(tool, event, sample, machine);
645
perf_event__repipe(tool, event, sample, machine);
646
647
return err;
648
}
649
650
#ifdef HAVE_LIBTRACEEVENT
651
static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
652
struct perf_session *session,
653
union perf_event *event)
654
{
655
perf_event__repipe_synth(tool, event);
656
657
return perf_event__process_tracing_data(tool, session, event);
658
}
659
#endif
660
661
static int dso__read_build_id(struct dso *dso)
662
{
663
struct nscookie nsc;
664
struct build_id bid = { .size = 0, };
665
666
if (dso__has_build_id(dso))
667
return 0;
668
669
mutex_lock(dso__lock(dso));
670
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
671
if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
672
dso__set_build_id(dso, &bid);
673
else if (dso__nsinfo(dso)) {
674
char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
675
676
if (new_name && filename__read_build_id(new_name, &bid) > 0)
677
dso__set_build_id(dso, &bid);
678
free(new_name);
679
}
680
nsinfo__mountns_exit(&nsc);
681
mutex_unlock(dso__lock(dso));
682
683
return dso__has_build_id(dso) ? 0 : -1;
684
}
685
686
static struct strlist *perf_inject__parse_known_build_ids(
687
const char *known_build_ids_string)
688
{
689
struct str_node *pos, *tmp;
690
struct strlist *known_build_ids;
691
int bid_len;
692
693
known_build_ids = strlist__new(known_build_ids_string, NULL);
694
if (known_build_ids == NULL)
695
return NULL;
696
strlist__for_each_entry_safe(pos, tmp, known_build_ids) {
697
const char *build_id, *dso_name;
698
699
build_id = skip_spaces(pos->s);
700
dso_name = strchr(build_id, ' ');
701
if (dso_name == NULL) {
702
strlist__remove(known_build_ids, pos);
703
continue;
704
}
705
bid_len = dso_name - pos->s;
706
dso_name = skip_spaces(dso_name);
707
if (bid_len % 2 != 0 || bid_len >= SBUILD_ID_SIZE) {
708
strlist__remove(known_build_ids, pos);
709
continue;
710
}
711
for (int ix = 0; 2 * ix + 1 < bid_len; ++ix) {
712
if (!isxdigit(build_id[2 * ix]) ||
713
!isxdigit(build_id[2 * ix + 1])) {
714
strlist__remove(known_build_ids, pos);
715
break;
716
}
717
}
718
}
719
return known_build_ids;
720
}
721
722
static bool perf_inject__lookup_known_build_id(struct perf_inject *inject,
723
struct dso *dso)
724
{
725
struct str_node *pos;
726
727
strlist__for_each_entry(pos, inject->known_build_ids) {
728
struct build_id bid;
729
const char *build_id, *dso_name;
730
size_t bid_len;
731
732
build_id = skip_spaces(pos->s);
733
dso_name = strchr(build_id, ' ');
734
bid_len = dso_name - pos->s;
735
if (bid_len > sizeof(bid.data))
736
bid_len = sizeof(bid.data);
737
dso_name = skip_spaces(dso_name);
738
if (strcmp(dso__long_name(dso), dso_name))
739
continue;
740
for (size_t ix = 0; 2 * ix + 1 < bid_len; ++ix) {
741
bid.data[ix] = (hex(build_id[2 * ix]) << 4 |
742
hex(build_id[2 * ix + 1]));
743
}
744
bid.size = bid_len / 2;
745
dso__set_build_id(dso, &bid);
746
return true;
747
}
748
return false;
749
}
750
751
static int tool__inject_build_id(const struct perf_tool *tool,
752
struct perf_sample *sample,
753
struct machine *machine,
754
const struct evsel *evsel,
755
__u16 misc,
756
const char *filename,
757
struct dso *dso, u32 flags)
758
{
759
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
760
int err;
761
762
if (is_anon_memory(filename) || flags & MAP_HUGETLB)
763
return 0;
764
if (is_no_dso_memory(filename))
765
return 0;
766
767
if (inject->known_build_ids != NULL &&
768
perf_inject__lookup_known_build_id(inject, dso))
769
return 1;
770
771
if (dso__read_build_id(dso) < 0) {
772
pr_debug("no build_id found for %s\n", filename);
773
return -1;
774
}
775
776
err = perf_event__synthesize_build_id(tool, sample, machine,
777
perf_event__repipe,
778
evsel, misc, dso__bid(dso),
779
filename);
780
if (err) {
781
pr_err("Can't synthesize build_id event for %s\n", filename);
782
return -1;
783
}
784
785
return 0;
786
}
787
788
static int tool__inject_mmap2_build_id(const struct perf_tool *tool,
789
struct perf_sample *sample,
790
struct machine *machine,
791
const struct evsel *evsel,
792
__u16 misc,
793
__u32 pid, __u32 tid,
794
__u64 start, __u64 len, __u64 pgoff,
795
struct dso *dso,
796
__u32 prot, __u32 flags,
797
const char *filename)
798
{
799
int err;
800
801
/* Return to repipe anonymous maps. */
802
if (is_anon_memory(filename) || flags & MAP_HUGETLB)
803
return 1;
804
if (is_no_dso_memory(filename))
805
return 1;
806
807
if (dso__read_build_id(dso)) {
808
pr_debug("no build_id found for %s\n", filename);
809
return -1;
810
}
811
812
err = perf_event__synthesize_mmap2_build_id(tool, sample, machine,
813
perf_event__repipe,
814
evsel,
815
misc, pid, tid,
816
start, len, pgoff,
817
dso__bid(dso),
818
prot, flags,
819
filename);
820
if (err) {
821
pr_err("Can't synthesize build_id event for %s\n", filename);
822
return -1;
823
}
824
return 0;
825
}
826
827
static int mark_dso_hit(const struct perf_inject *inject,
828
const struct perf_tool *tool,
829
struct perf_sample *sample,
830
struct machine *machine,
831
const struct evsel *mmap_evsel,
832
struct map *map, bool sample_in_dso)
833
{
834
struct dso *dso;
835
u16 misc = sample->cpumode;
836
837
if (!map)
838
return 0;
839
840
if (!sample_in_dso) {
841
u16 guest_mask = PERF_RECORD_MISC_GUEST_KERNEL |
842
PERF_RECORD_MISC_GUEST_USER;
843
844
if ((misc & guest_mask) != 0) {
845
misc &= PERF_RECORD_MISC_HYPERVISOR;
846
misc |= __map__is_kernel(map)
847
? PERF_RECORD_MISC_GUEST_KERNEL
848
: PERF_RECORD_MISC_GUEST_USER;
849
} else {
850
misc &= PERF_RECORD_MISC_HYPERVISOR;
851
misc |= __map__is_kernel(map)
852
? PERF_RECORD_MISC_KERNEL
853
: PERF_RECORD_MISC_USER;
854
}
855
}
856
dso = map__dso(map);
857
if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY) {
858
if (dso && !dso__hit(dso)) {
859
dso__set_hit(dso);
860
tool__inject_build_id(tool, sample, machine,
861
mmap_evsel, misc, dso__long_name(dso), dso,
862
map__flags(map));
863
}
864
} else if (inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
865
if (!map__hit(map)) {
866
const struct build_id null_bid = { .size = 0 };
867
const struct build_id *bid = dso ? dso__bid(dso) : &null_bid;
868
const char *filename = dso ? dso__long_name(dso) : "";
869
870
map__set_hit(map);
871
perf_event__synthesize_mmap2_build_id(tool, sample, machine,
872
perf_event__repipe,
873
mmap_evsel,
874
misc,
875
sample->pid, sample->tid,
876
map__start(map),
877
map__end(map) - map__start(map),
878
map__pgoff(map),
879
bid,
880
map__prot(map),
881
map__flags(map),
882
filename);
883
}
884
}
885
return 0;
886
}
887
888
struct mark_dso_hit_args {
889
const struct perf_inject *inject;
890
const struct perf_tool *tool;
891
struct perf_sample *sample;
892
struct machine *machine;
893
const struct evsel *mmap_evsel;
894
};
895
896
static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data)
897
{
898
struct mark_dso_hit_args *args = data;
899
struct map *map = node->ms.map;
900
901
return mark_dso_hit(args->inject, args->tool, args->sample, args->machine,
902
args->mmap_evsel, map, /*sample_in_dso=*/false);
903
}
904
905
int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event,
906
struct perf_sample *sample,
907
struct evsel *evsel __maybe_unused,
908
struct machine *machine)
909
{
910
struct addr_location al;
911
struct thread *thread;
912
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
913
struct mark_dso_hit_args args = {
914
.inject = inject,
915
.tool = tool,
916
/*
917
* Use the parsed sample data of the sample event, which will
918
* have a later timestamp than the mmap event.
919
*/
920
.sample = sample,
921
.machine = machine,
922
.mmap_evsel = inject__mmap_evsel(inject),
923
};
924
925
addr_location__init(&al);
926
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
927
if (thread == NULL) {
928
pr_err("problem processing %d event, skipping it.\n",
929
event->header.type);
930
goto repipe;
931
}
932
933
if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
934
mark_dso_hit(inject, tool, sample, machine, args.mmap_evsel, al.map,
935
/*sample_in_dso=*/true);
936
}
937
938
sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
939
/*symbols=*/false, mark_dso_hit_callback, &args);
940
941
thread__put(thread);
942
repipe:
943
perf_event__repipe(tool, event, sample, machine);
944
addr_location__exit(&al);
945
return 0;
946
}
947
948
static int perf_inject__sched_process_exit(const struct perf_tool *tool,
949
union perf_event *event __maybe_unused,
950
struct perf_sample *sample,
951
struct evsel *evsel __maybe_unused,
952
struct machine *machine __maybe_unused)
953
{
954
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
955
struct event_entry *ent;
956
957
list_for_each_entry(ent, &inject->samples, node) {
958
if (sample->tid == ent->tid) {
959
list_del_init(&ent->node);
960
free(ent);
961
break;
962
}
963
}
964
965
return 0;
966
}
967
968
static int perf_inject__sched_switch(const struct perf_tool *tool,
969
union perf_event *event,
970
struct perf_sample *sample,
971
struct evsel *evsel,
972
struct machine *machine)
973
{
974
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
975
struct event_entry *ent;
976
977
perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
978
979
ent = malloc(event->header.size + sizeof(struct event_entry));
980
if (ent == NULL) {
981
color_fprintf(stderr, PERF_COLOR_RED,
982
"Not enough memory to process sched switch event!");
983
return -1;
984
}
985
986
ent->tid = sample->tid;
987
memcpy(&ent->event, event, event->header.size);
988
list_add(&ent->node, &inject->samples);
989
return 0;
990
}
991
992
#ifdef HAVE_LIBTRACEEVENT
993
static int perf_inject__sched_stat(const struct perf_tool *tool,
994
union perf_event *event __maybe_unused,
995
struct perf_sample *sample,
996
struct evsel *evsel,
997
struct machine *machine)
998
{
999
struct event_entry *ent;
1000
union perf_event *event_sw;
1001
struct perf_sample sample_sw;
1002
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1003
u32 pid = evsel__intval(evsel, sample, "pid");
1004
1005
list_for_each_entry(ent, &inject->samples, node) {
1006
if (pid == ent->tid)
1007
goto found;
1008
}
1009
1010
return 0;
1011
found:
1012
event_sw = &ent->event[0];
1013
evsel__parse_sample(evsel, event_sw, &sample_sw);
1014
1015
sample_sw.period = sample->period;
1016
sample_sw.time = sample->time;
1017
perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
1018
evsel->core.attr.read_format, &sample_sw);
1019
build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
1020
return perf_event__repipe(tool, event_sw, &sample_sw, machine);
1021
}
1022
#endif
1023
1024
static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
1025
{
1026
if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
1027
return NULL;
1028
return &gs->vcpu[vcpu];
1029
}
1030
1031
static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
1032
{
1033
ssize_t ret = writen(gs->tmp_fd, buf, sz);
1034
1035
return ret < 0 ? ret : 0;
1036
}
1037
1038
static int guest_session__repipe(const struct perf_tool *tool,
1039
union perf_event *event,
1040
struct perf_sample *sample __maybe_unused,
1041
struct machine *machine __maybe_unused)
1042
{
1043
struct guest_session *gs = container_of(tool, struct guest_session, tool);
1044
1045
return guest_session__output_bytes(gs, event, event->header.size);
1046
}
1047
1048
static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
1049
{
1050
struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
1051
int hash;
1052
1053
if (!guest_tid)
1054
return -ENOMEM;
1055
1056
guest_tid->tid = tid;
1057
guest_tid->vcpu = vcpu;
1058
hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
1059
hlist_add_head(&guest_tid->node, &gs->tids[hash]);
1060
1061
return 0;
1062
}
1063
1064
static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
1065
union perf_event *event,
1066
u64 offset __maybe_unused, void *data)
1067
{
1068
struct guest_session *gs = data;
1069
unsigned int vcpu;
1070
struct guest_vcpu *guest_vcpu;
1071
int ret;
1072
1073
if (event->header.type != PERF_RECORD_COMM ||
1074
event->comm.pid != gs->machine_pid)
1075
return 0;
1076
1077
/*
1078
* QEMU option -name debug-threads=on, causes thread names formatted as
1079
* below, although it is not an ABI. Also libvirt seems to use this by
1080
* default. Here we rely on it to tell us which thread is which VCPU.
1081
*/
1082
ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
1083
if (ret <= 0)
1084
return ret;
1085
pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
1086
event->comm.tid, event->comm.comm, vcpu);
1087
if (vcpu > INT_MAX) {
1088
pr_err("Invalid VCPU %u\n", vcpu);
1089
return -EINVAL;
1090
}
1091
guest_vcpu = guest_session__vcpu(gs, vcpu);
1092
if (!guest_vcpu)
1093
return -ENOMEM;
1094
if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
1095
pr_err("Fatal error: Two threads found with the same VCPU\n");
1096
return -EINVAL;
1097
}
1098
guest_vcpu->tid = event->comm.tid;
1099
1100
return guest_session__map_tid(gs, event->comm.tid, vcpu);
1101
}
1102
1103
static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
1104
{
1105
return perf_session__peek_events(session, session->header.data_offset,
1106
session->header.data_size,
1107
host_peek_vm_comms_cb, gs);
1108
}
1109
1110
static bool evlist__is_id_used(struct evlist *evlist, u64 id)
1111
{
1112
return evlist__id2sid(evlist, id);
1113
}
1114
1115
static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
1116
{
1117
do {
1118
gs->highest_id += 1;
1119
} while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
1120
1121
return gs->highest_id;
1122
}
1123
1124
static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
1125
{
1126
struct guest_id *guest_id = zalloc(sizeof(*guest_id));
1127
int hash;
1128
1129
if (!guest_id)
1130
return -ENOMEM;
1131
1132
guest_id->id = id;
1133
guest_id->host_id = host_id;
1134
guest_id->vcpu = vcpu;
1135
hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
1136
hlist_add_head(&guest_id->node, &gs->heads[hash]);
1137
1138
return 0;
1139
}
1140
1141
static u64 evlist__find_highest_id(struct evlist *evlist)
1142
{
1143
struct evsel *evsel;
1144
u64 highest_id = 1;
1145
1146
evlist__for_each_entry(evlist, evsel) {
1147
u32 j;
1148
1149
for (j = 0; j < evsel->core.ids; j++) {
1150
u64 id = evsel->core.id[j];
1151
1152
if (id > highest_id)
1153
highest_id = id;
1154
}
1155
}
1156
1157
return highest_id;
1158
}
1159
1160
static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
1161
{
1162
struct evlist *evlist = gs->session->evlist;
1163
struct evsel *evsel;
1164
int ret;
1165
1166
evlist__for_each_entry(evlist, evsel) {
1167
u32 j;
1168
1169
for (j = 0; j < evsel->core.ids; j++) {
1170
struct perf_sample_id *sid;
1171
u64 host_id;
1172
u64 id;
1173
1174
id = evsel->core.id[j];
1175
sid = evlist__id2sid(evlist, id);
1176
if (!sid || sid->cpu.cpu == -1)
1177
continue;
1178
host_id = guest_session__allocate_new_id(gs, host_evlist);
1179
ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
1180
if (ret)
1181
return ret;
1182
}
1183
}
1184
1185
return 0;
1186
}
1187
1188
static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
1189
{
1190
struct hlist_head *head;
1191
struct guest_id *guest_id;
1192
int hash;
1193
1194
hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
1195
head = &gs->heads[hash];
1196
1197
hlist_for_each_entry(guest_id, head, node)
1198
if (guest_id->id == id)
1199
return guest_id;
1200
1201
return NULL;
1202
}
1203
1204
static int process_attr(const struct perf_tool *tool, union perf_event *event,
1205
struct perf_sample *sample __maybe_unused,
1206
struct machine *machine __maybe_unused)
1207
{
1208
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1209
1210
return perf_event__process_attr(tool, event, &inject->session->evlist);
1211
}
1212
1213
static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
1214
{
1215
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1216
struct perf_event_attr attr = evsel->core.attr;
1217
u64 *id_array;
1218
u32 *vcpu_array;
1219
int ret = -ENOMEM;
1220
u32 i;
1221
1222
id_array = calloc(evsel->core.ids, sizeof(*id_array));
1223
if (!id_array)
1224
return -ENOMEM;
1225
1226
vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
1227
if (!vcpu_array)
1228
goto out;
1229
1230
for (i = 0; i < evsel->core.ids; i++) {
1231
u64 id = evsel->core.id[i];
1232
struct guest_id *guest_id = guest_session__lookup_id(gs, id);
1233
1234
if (!guest_id) {
1235
pr_err("Failed to find guest id %"PRIu64"\n", id);
1236
ret = -EINVAL;
1237
goto out;
1238
}
1239
id_array[i] = guest_id->host_id;
1240
vcpu_array[i] = guest_id->vcpu;
1241
}
1242
1243
attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
1244
attr.exclude_host = 1;
1245
attr.exclude_guest = 0;
1246
1247
ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
1248
id_array, process_attr);
1249
if (ret)
1250
pr_err("Failed to add guest attr.\n");
1251
1252
for (i = 0; i < evsel->core.ids; i++) {
1253
struct perf_sample_id *sid;
1254
u32 vcpu = vcpu_array[i];
1255
1256
sid = evlist__id2sid(inject->session->evlist, id_array[i]);
1257
/* Guest event is per-thread from the host point of view */
1258
sid->cpu.cpu = -1;
1259
sid->tid = gs->vcpu[vcpu].tid;
1260
sid->machine_pid = gs->machine_pid;
1261
sid->vcpu.cpu = vcpu;
1262
}
1263
out:
1264
free(vcpu_array);
1265
free(id_array);
1266
return ret;
1267
}
1268
1269
static int guest_session__add_attrs(struct guest_session *gs)
1270
{
1271
struct evlist *evlist = gs->session->evlist;
1272
struct evsel *evsel;
1273
int ret;
1274
1275
evlist__for_each_entry(evlist, evsel) {
1276
ret = guest_session__add_attr(gs, evsel);
1277
if (ret)
1278
return ret;
1279
}
1280
1281
return 0;
1282
}
1283
1284
static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1285
{
1286
struct perf_session *session = inject->session;
1287
struct evlist *evlist = session->evlist;
1288
struct machine *machine = &session->machines.host;
1289
size_t from = evlist->core.nr_entries - new_cnt;
1290
1291
return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1292
evlist, machine, from);
1293
}
1294
1295
static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1296
{
1297
struct hlist_head *head;
1298
struct guest_tid *guest_tid;
1299
int hash;
1300
1301
hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1302
head = &gs->tids[hash];
1303
1304
hlist_for_each_entry(guest_tid, head, node)
1305
if (guest_tid->tid == tid)
1306
return guest_tid;
1307
1308
return NULL;
1309
}
1310
1311
static bool dso__is_in_kernel_space(struct dso *dso)
1312
{
1313
if (dso__is_vdso(dso))
1314
return false;
1315
1316
return dso__is_kcore(dso) ||
1317
dso__kernel(dso) ||
1318
is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1319
}
1320
1321
static u64 evlist__first_id(struct evlist *evlist)
1322
{
1323
struct evsel *evsel;
1324
1325
evlist__for_each_entry(evlist, evsel) {
1326
if (evsel->core.ids)
1327
return evsel->core.id[0];
1328
}
1329
return 0;
1330
}
1331
1332
static int process_build_id(const struct perf_tool *tool,
1333
union perf_event *event,
1334
struct perf_sample *sample __maybe_unused,
1335
struct machine *machine __maybe_unused)
1336
{
1337
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1338
1339
return perf_event__process_build_id(tool, inject->session, event);
1340
}
1341
1342
static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1343
{
1344
struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1345
struct perf_sample synth_sample = {
1346
.pid = -1,
1347
.tid = -1,
1348
.time = -1,
1349
.stream_id = -1,
1350
.cpu = -1,
1351
.period = 1,
1352
.cpumode = dso__is_in_kernel_space(dso)
1353
? PERF_RECORD_MISC_GUEST_KERNEL
1354
: PERF_RECORD_MISC_GUEST_USER,
1355
};
1356
1357
if (!machine)
1358
return -ENOMEM;
1359
1360
dso__set_hit(dso);
1361
1362
return perf_event__synthesize_build_id(&inject->tool, &synth_sample, machine,
1363
process_build_id, inject__mmap_evsel(inject),
1364
/*misc=*/synth_sample.cpumode,
1365
dso__bid(dso), dso__long_name(dso));
1366
}
1367
1368
static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
1369
{
1370
struct guest_session *gs = data;
1371
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1372
1373
if (!dso__has_build_id(dso))
1374
return 0;
1375
1376
return synthesize_build_id(inject, dso, gs->machine_pid);
1377
1378
}
1379
1380
static int guest_session__add_build_ids(struct guest_session *gs)
1381
{
1382
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1383
1384
/* Build IDs will be put in the Build ID feature section */
1385
perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1386
1387
return dsos__for_each_dso(&gs->session->machines.host.dsos,
1388
guest_session__add_build_ids_cb,
1389
gs);
1390
}
1391
1392
static int guest_session__ksymbol_event(const struct perf_tool *tool,
1393
union perf_event *event,
1394
struct perf_sample *sample __maybe_unused,
1395
struct machine *machine __maybe_unused)
1396
{
1397
struct guest_session *gs = container_of(tool, struct guest_session, tool);
1398
1399
/* Only support out-of-line i.e. no BPF support */
1400
if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1401
return 0;
1402
1403
return guest_session__output_bytes(gs, event, event->header.size);
1404
}
1405
1406
static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1407
{
1408
char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1409
struct perf_session *session;
1410
int ret;
1411
1412
/* Only these events will be injected */
1413
gs->tool.mmap = guest_session__repipe;
1414
gs->tool.mmap2 = guest_session__repipe;
1415
gs->tool.comm = guest_session__repipe;
1416
gs->tool.fork = guest_session__repipe;
1417
gs->tool.exit = guest_session__repipe;
1418
gs->tool.lost = guest_session__repipe;
1419
gs->tool.context_switch = guest_session__repipe;
1420
gs->tool.ksymbol = guest_session__ksymbol_event;
1421
gs->tool.text_poke = guest_session__repipe;
1422
/*
1423
* Processing a build ID creates a struct dso with that build ID. Later,
1424
* all guest dsos are iterated and the build IDs processed into the host
1425
* session where they will be output to the Build ID feature section
1426
* when the perf.data file header is written.
1427
*/
1428
gs->tool.build_id = perf_event__process_build_id;
1429
/* Process the id index to know what VCPU an ID belongs to */
1430
gs->tool.id_index = perf_event__process_id_index;
1431
1432
gs->tool.ordered_events = true;
1433
gs->tool.ordering_requires_timestamps = true;
1434
1435
gs->data.path = name;
1436
gs->data.force = force;
1437
gs->data.mode = PERF_DATA_MODE_READ;
1438
1439
session = perf_session__new(&gs->data, &gs->tool);
1440
if (IS_ERR(session))
1441
return PTR_ERR(session);
1442
gs->session = session;
1443
1444
/*
1445
* Initial events have zero'd ID samples. Get default ID sample size
1446
* used for removing them.
1447
*/
1448
gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1449
/* And default ID for adding back a host-compatible ID sample */
1450
gs->dflt_id = evlist__first_id(session->evlist);
1451
if (!gs->dflt_id) {
1452
pr_err("Guest data has no sample IDs");
1453
return -EINVAL;
1454
}
1455
1456
/* Temporary file for guest events */
1457
gs->tmp_file_name = strdup(tmp_file_name);
1458
if (!gs->tmp_file_name)
1459
return -ENOMEM;
1460
gs->tmp_fd = mkstemp(gs->tmp_file_name);
1461
if (gs->tmp_fd < 0)
1462
return -errno;
1463
1464
if (zstd_init(&gs->session->zstd_data, 0) < 0)
1465
pr_warning("Guest session decompression initialization failed.\n");
1466
1467
/*
1468
* perf does not support processing 2 sessions simultaneously, so output
1469
* guest events to a temporary file.
1470
*/
1471
ret = perf_session__process_events(gs->session);
1472
if (ret)
1473
return ret;
1474
1475
if (lseek(gs->tmp_fd, 0, SEEK_SET))
1476
return -errno;
1477
1478
return 0;
1479
}
1480
1481
/* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1482
static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1483
{
1484
struct hlist_node *pos, *n;
1485
size_t i;
1486
1487
for (i = 0; i < hlist_sz; ++i) {
1488
hlist_for_each_safe(pos, n, &heads[i]) {
1489
hlist_del(pos);
1490
free(pos);
1491
}
1492
}
1493
}
1494
1495
static void guest_session__exit(struct guest_session *gs)
1496
{
1497
if (gs->session) {
1498
perf_session__delete(gs->session);
1499
free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1500
free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1501
}
1502
if (gs->tmp_file_name) {
1503
if (gs->tmp_fd >= 0)
1504
close(gs->tmp_fd);
1505
unlink(gs->tmp_file_name);
1506
zfree(&gs->tmp_file_name);
1507
}
1508
zfree(&gs->vcpu);
1509
zfree(&gs->perf_data_file);
1510
}
1511
1512
static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1513
{
1514
tc->time_shift = time_conv->time_shift;
1515
tc->time_mult = time_conv->time_mult;
1516
tc->time_zero = time_conv->time_zero;
1517
tc->time_cycles = time_conv->time_cycles;
1518
tc->time_mask = time_conv->time_mask;
1519
tc->cap_user_time_zero = time_conv->cap_user_time_zero;
1520
tc->cap_user_time_short = time_conv->cap_user_time_short;
1521
}
1522
1523
static void guest_session__get_tc(struct guest_session *gs)
1524
{
1525
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1526
1527
get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1528
get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1529
}
1530
1531
static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1532
{
1533
u64 tsc;
1534
1535
if (!guest_time) {
1536
*host_time = 0;
1537
return;
1538
}
1539
1540
if (gs->guest_tc.cap_user_time_zero)
1541
tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1542
else
1543
tsc = guest_time;
1544
1545
/*
1546
* This is the correct order of operations for x86 if the TSC Offset and
1547
* Multiplier values are used.
1548
*/
1549
tsc -= gs->time_offset;
1550
tsc /= gs->time_scale;
1551
1552
if (gs->host_tc.cap_user_time_zero)
1553
*host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1554
else
1555
*host_time = tsc;
1556
}
1557
1558
static int guest_session__fetch(struct guest_session *gs)
1559
{
1560
void *buf;
1561
struct perf_event_header *hdr;
1562
size_t hdr_sz = sizeof(*hdr);
1563
ssize_t ret;
1564
1565
buf = gs->ev.event_buf;
1566
if (!buf) {
1567
buf = malloc(PERF_SAMPLE_MAX_SIZE);
1568
if (!buf)
1569
return -ENOMEM;
1570
gs->ev.event_buf = buf;
1571
}
1572
hdr = buf;
1573
ret = readn(gs->tmp_fd, buf, hdr_sz);
1574
if (ret < 0)
1575
return ret;
1576
1577
if (!ret) {
1578
/* Zero size means EOF */
1579
hdr->size = 0;
1580
return 0;
1581
}
1582
1583
buf += hdr_sz;
1584
1585
ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1586
if (ret < 0)
1587
return ret;
1588
1589
gs->ev.event = (union perf_event *)gs->ev.event_buf;
1590
gs->ev.sample.time = 0;
1591
1592
if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1593
pr_err("Unexpected type fetching guest event");
1594
return 0;
1595
}
1596
1597
ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1598
if (ret) {
1599
pr_err("Parse failed fetching guest event");
1600
return ret;
1601
}
1602
1603
if (!gs->have_tc) {
1604
guest_session__get_tc(gs);
1605
gs->have_tc = true;
1606
}
1607
1608
guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1609
1610
return 0;
1611
}
1612
1613
static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1614
const struct perf_sample *sample)
1615
{
1616
struct evsel *evsel;
1617
void *array;
1618
int ret;
1619
1620
evsel = evlist__id2evsel(evlist, sample->id);
1621
array = ev;
1622
1623
if (!evsel) {
1624
pr_err("No evsel for id %"PRIu64"\n", sample->id);
1625
return -EINVAL;
1626
}
1627
1628
array += ev->header.size;
1629
ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1630
if (ret < 0)
1631
return ret;
1632
1633
if (ret & 7) {
1634
pr_err("Bad id sample size %d\n", ret);
1635
return -EINVAL;
1636
}
1637
1638
ev->header.size += ret;
1639
1640
return 0;
1641
}
1642
1643
static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1644
{
1645
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1646
int ret;
1647
1648
if (!gs->ready)
1649
return 0;
1650
1651
while (1) {
1652
struct perf_sample *sample;
1653
struct guest_id *guest_id;
1654
union perf_event *ev;
1655
u16 id_hdr_size;
1656
u8 cpumode;
1657
u64 id;
1658
1659
if (!gs->fetched) {
1660
ret = guest_session__fetch(gs);
1661
if (ret)
1662
return ret;
1663
gs->fetched = true;
1664
}
1665
1666
ev = gs->ev.event;
1667
sample = &gs->ev.sample;
1668
1669
if (!ev->header.size)
1670
return 0; /* EOF */
1671
1672
if (sample->time > timestamp)
1673
return 0;
1674
1675
/* Change cpumode to guest */
1676
cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1677
if (cpumode & PERF_RECORD_MISC_USER)
1678
cpumode = PERF_RECORD_MISC_GUEST_USER;
1679
else
1680
cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1681
ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1682
ev->header.misc |= cpumode;
1683
1684
id = sample->id;
1685
if (!id) {
1686
id = gs->dflt_id;
1687
id_hdr_size = gs->dflt_id_hdr_size;
1688
} else {
1689
struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1690
1691
id_hdr_size = evsel__id_hdr_size(evsel);
1692
}
1693
1694
if (id_hdr_size & 7) {
1695
pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1696
return -EINVAL;
1697
}
1698
1699
if (ev->header.size & 7) {
1700
pr_err("Bad event size %u\n", ev->header.size);
1701
return -EINVAL;
1702
}
1703
1704
/* Remove guest id sample */
1705
ev->header.size -= id_hdr_size;
1706
1707
if (ev->header.size & 7) {
1708
pr_err("Bad raw event size %u\n", ev->header.size);
1709
return -EINVAL;
1710
}
1711
1712
guest_id = guest_session__lookup_id(gs, id);
1713
if (!guest_id) {
1714
pr_err("Guest event with unknown id %llu\n",
1715
(unsigned long long)id);
1716
return -EINVAL;
1717
}
1718
1719
/* Change to host ID to avoid conflicting ID values */
1720
sample->id = guest_id->host_id;
1721
sample->stream_id = guest_id->host_id;
1722
1723
if (sample->cpu != (u32)-1) {
1724
if (sample->cpu >= gs->vcpu_cnt) {
1725
pr_err("Guest event with unknown VCPU %u\n",
1726
sample->cpu);
1727
return -EINVAL;
1728
}
1729
/* Change to host CPU instead of guest VCPU */
1730
sample->cpu = gs->vcpu[sample->cpu].cpu;
1731
}
1732
1733
/* New id sample with new ID and CPU */
1734
ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1735
if (ret)
1736
return ret;
1737
1738
if (ev->header.size & 7) {
1739
pr_err("Bad new event size %u\n", ev->header.size);
1740
return -EINVAL;
1741
}
1742
1743
gs->fetched = false;
1744
1745
ret = output_bytes(inject, ev, ev->header.size);
1746
if (ret)
1747
return ret;
1748
}
1749
}
1750
1751
static int guest_session__flush_events(struct guest_session *gs)
1752
{
1753
return guest_session__inject_events(gs, -1);
1754
}
1755
1756
static int host__repipe(const struct perf_tool *tool,
1757
union perf_event *event,
1758
struct perf_sample *sample,
1759
struct machine *machine)
1760
{
1761
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1762
int ret;
1763
1764
ret = guest_session__inject_events(&inject->guest_session, sample->time);
1765
if (ret)
1766
return ret;
1767
1768
return perf_event__repipe(tool, event, sample, machine);
1769
}
1770
1771
static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
1772
union perf_event *event)
1773
{
1774
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1775
struct guest_session *gs = &inject->guest_session;
1776
int ret;
1777
1778
/*
1779
* Peek through host COMM events to find QEMU threads and the VCPU they
1780
* are running.
1781
*/
1782
ret = host_peek_vm_comms(session, gs);
1783
if (ret)
1784
return ret;
1785
1786
if (!gs->vcpu_cnt) {
1787
pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1788
return -EINVAL;
1789
}
1790
1791
/*
1792
* Allocate new (unused) host sample IDs and map them to the guest IDs.
1793
*/
1794
gs->highest_id = evlist__find_highest_id(session->evlist);
1795
ret = guest_session__map_ids(gs, session->evlist);
1796
if (ret)
1797
return ret;
1798
1799
ret = guest_session__add_attrs(gs);
1800
if (ret)
1801
return ret;
1802
1803
ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1804
if (ret) {
1805
pr_err("Failed to synthesize id_index\n");
1806
return ret;
1807
}
1808
1809
ret = guest_session__add_build_ids(gs);
1810
if (ret) {
1811
pr_err("Failed to add guest build IDs\n");
1812
return ret;
1813
}
1814
1815
gs->ready = true;
1816
1817
ret = guest_session__inject_events(gs, 0);
1818
if (ret)
1819
return ret;
1820
1821
return perf_event__repipe_op2_synth(tool, session, event);
1822
}
1823
1824
/*
1825
* Obey finished-round ordering. The FINISHED_ROUND event is first processed
1826
* which flushes host events to file up until the last flush time. Then inject
1827
* guest events up to the same time. Finally write out the FINISHED_ROUND event
1828
* itself.
1829
*/
1830
static int host__finished_round(const struct perf_tool *tool,
1831
union perf_event *event,
1832
struct ordered_events *oe)
1833
{
1834
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1835
int ret = perf_event__process_finished_round(tool, event, oe);
1836
u64 timestamp = ordered_events__last_flush_time(oe);
1837
1838
if (ret)
1839
return ret;
1840
1841
ret = guest_session__inject_events(&inject->guest_session, timestamp);
1842
if (ret)
1843
return ret;
1844
1845
return perf_event__repipe_oe_synth(tool, event, oe);
1846
}
1847
1848
static int host__context_switch(const struct perf_tool *tool,
1849
union perf_event *event,
1850
struct perf_sample *sample,
1851
struct machine *machine)
1852
{
1853
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1854
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1855
struct guest_session *gs = &inject->guest_session;
1856
u32 pid = event->context_switch.next_prev_pid;
1857
u32 tid = event->context_switch.next_prev_tid;
1858
struct guest_tid *guest_tid;
1859
u32 vcpu;
1860
1861
if (out || pid != gs->machine_pid)
1862
goto out;
1863
1864
guest_tid = guest_session__lookup_tid(gs, tid);
1865
if (!guest_tid)
1866
goto out;
1867
1868
if (sample->cpu == (u32)-1) {
1869
pr_err("Switch event does not have CPU\n");
1870
return -EINVAL;
1871
}
1872
1873
vcpu = guest_tid->vcpu;
1874
if (vcpu >= gs->vcpu_cnt)
1875
return -EINVAL;
1876
1877
/* Guest is switching in, record which CPU the VCPU is now running on */
1878
gs->vcpu[vcpu].cpu = sample->cpu;
1879
out:
1880
return host__repipe(tool, event, sample, machine);
1881
}
1882
1883
static void sig_handler(int sig __maybe_unused)
1884
{
1885
session_done = 1;
1886
}
1887
1888
static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1889
{
1890
struct perf_event_attr *attr = &evsel->core.attr;
1891
const char *name = evsel__name(evsel);
1892
1893
if (!(attr->sample_type & sample_type)) {
1894
pr_err("Samples for %s event do not have %s attribute set.",
1895
name, sample_msg);
1896
return -EINVAL;
1897
}
1898
1899
return 0;
1900
}
1901
1902
static int drop_sample(const struct perf_tool *tool __maybe_unused,
1903
union perf_event *event __maybe_unused,
1904
struct perf_sample *sample __maybe_unused,
1905
struct evsel *evsel __maybe_unused,
1906
struct machine *machine __maybe_unused)
1907
{
1908
return 0;
1909
}
1910
1911
static void strip_init(struct perf_inject *inject)
1912
{
1913
struct evlist *evlist = inject->session->evlist;
1914
struct evsel *evsel;
1915
1916
inject->tool.context_switch = perf_event__drop;
1917
1918
evlist__for_each_entry(evlist, evsel)
1919
evsel->handler = drop_sample;
1920
}
1921
1922
static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1923
{
1924
struct perf_inject *inject = opt->value;
1925
const char *args;
1926
char *dry_run;
1927
1928
if (unset)
1929
return 0;
1930
1931
inject->itrace_synth_opts.set = true;
1932
inject->itrace_synth_opts.vm_time_correlation = true;
1933
inject->in_place_update = true;
1934
1935
if (!str)
1936
return 0;
1937
1938
dry_run = skip_spaces(str);
1939
if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1940
inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1941
inject->in_place_update_dry_run = true;
1942
args = dry_run + strlen("dry-run");
1943
} else {
1944
args = str;
1945
}
1946
1947
inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1948
1949
return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1950
}
1951
1952
static int parse_guest_data(const struct option *opt, const char *str, int unset)
1953
{
1954
struct perf_inject *inject = opt->value;
1955
struct guest_session *gs = &inject->guest_session;
1956
char *tok;
1957
char *s;
1958
1959
if (unset)
1960
return 0;
1961
1962
if (!str)
1963
goto bad_args;
1964
1965
s = strdup(str);
1966
if (!s)
1967
return -ENOMEM;
1968
1969
gs->perf_data_file = strsep(&s, ",");
1970
if (!gs->perf_data_file)
1971
goto bad_args;
1972
1973
gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1974
if (gs->copy_kcore_dir)
1975
inject->output.is_dir = true;
1976
1977
tok = strsep(&s, ",");
1978
if (!tok)
1979
goto bad_args;
1980
gs->machine_pid = strtoul(tok, NULL, 0);
1981
if (!inject->guest_session.machine_pid)
1982
goto bad_args;
1983
1984
gs->time_scale = 1;
1985
1986
tok = strsep(&s, ",");
1987
if (!tok)
1988
goto out;
1989
gs->time_offset = strtoull(tok, NULL, 0);
1990
1991
tok = strsep(&s, ",");
1992
if (!tok)
1993
goto out;
1994
gs->time_scale = strtod(tok, NULL);
1995
if (!gs->time_scale)
1996
goto bad_args;
1997
out:
1998
return 0;
1999
2000
bad_args:
2001
pr_err("--guest-data option requires guest perf.data file name, "
2002
"guest machine PID, and optionally guest timestamp offset, "
2003
"and guest timestamp scale factor, separated by commas.\n");
2004
return -1;
2005
}
2006
2007
static int save_section_info_cb(struct perf_file_section *section,
2008
struct perf_header *ph __maybe_unused,
2009
int feat, int fd __maybe_unused, void *data)
2010
{
2011
struct perf_inject *inject = data;
2012
2013
inject->secs[feat] = *section;
2014
return 0;
2015
}
2016
2017
static int save_section_info(struct perf_inject *inject)
2018
{
2019
struct perf_header *header = &inject->session->header;
2020
int fd = perf_data__fd(inject->session->data);
2021
2022
return perf_header__process_sections(header, fd, inject, save_section_info_cb);
2023
}
2024
2025
static bool keep_feat(int feat)
2026
{
2027
switch (feat) {
2028
/* Keep original information that describes the machine or software */
2029
case HEADER_TRACING_DATA:
2030
case HEADER_HOSTNAME:
2031
case HEADER_OSRELEASE:
2032
case HEADER_VERSION:
2033
case HEADER_ARCH:
2034
case HEADER_NRCPUS:
2035
case HEADER_CPUDESC:
2036
case HEADER_CPUID:
2037
case HEADER_TOTAL_MEM:
2038
case HEADER_CPU_TOPOLOGY:
2039
case HEADER_NUMA_TOPOLOGY:
2040
case HEADER_PMU_MAPPINGS:
2041
case HEADER_CACHE:
2042
case HEADER_MEM_TOPOLOGY:
2043
case HEADER_CLOCKID:
2044
case HEADER_BPF_PROG_INFO:
2045
case HEADER_BPF_BTF:
2046
case HEADER_CPU_PMU_CAPS:
2047
case HEADER_CLOCK_DATA:
2048
case HEADER_HYBRID_TOPOLOGY:
2049
case HEADER_PMU_CAPS:
2050
return true;
2051
/* Information that can be updated */
2052
case HEADER_BUILD_ID:
2053
case HEADER_CMDLINE:
2054
case HEADER_EVENT_DESC:
2055
case HEADER_BRANCH_STACK:
2056
case HEADER_GROUP_DESC:
2057
case HEADER_AUXTRACE:
2058
case HEADER_STAT:
2059
case HEADER_SAMPLE_TIME:
2060
case HEADER_DIR_FORMAT:
2061
case HEADER_COMPRESSED:
2062
default:
2063
return false;
2064
};
2065
}
2066
2067
static int read_file(int fd, u64 offs, void *buf, size_t sz)
2068
{
2069
ssize_t ret = preadn(fd, buf, sz, offs);
2070
2071
if (ret < 0)
2072
return -errno;
2073
if ((size_t)ret != sz)
2074
return -EINVAL;
2075
return 0;
2076
}
2077
2078
static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
2079
{
2080
int fd = perf_data__fd(inject->session->data);
2081
u64 offs = inject->secs[feat].offset;
2082
size_t sz = inject->secs[feat].size;
2083
void *buf = malloc(sz);
2084
int ret;
2085
2086
if (!buf)
2087
return -ENOMEM;
2088
2089
ret = read_file(fd, offs, buf, sz);
2090
if (ret)
2091
goto out_free;
2092
2093
ret = fw->write(fw, buf, sz);
2094
out_free:
2095
free(buf);
2096
return ret;
2097
}
2098
2099
struct inject_fc {
2100
struct feat_copier fc;
2101
struct perf_inject *inject;
2102
};
2103
2104
static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
2105
{
2106
struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
2107
struct perf_inject *inject = inj_fc->inject;
2108
int ret;
2109
2110
if (!inject->secs[feat].offset ||
2111
!keep_feat(feat))
2112
return 0;
2113
2114
ret = feat_copy(inject, feat, fw);
2115
if (ret < 0)
2116
return ret;
2117
2118
return 1; /* Feature section copied */
2119
}
2120
2121
static int copy_kcore_dir(struct perf_inject *inject)
2122
{
2123
char *cmd;
2124
int ret;
2125
2126
ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
2127
inject->input_name, inject->output.path);
2128
if (ret < 0)
2129
return ret;
2130
pr_debug("%s\n", cmd);
2131
ret = system(cmd);
2132
free(cmd);
2133
return ret;
2134
}
2135
2136
static int guest_session__copy_kcore_dir(struct guest_session *gs)
2137
{
2138
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
2139
char *cmd;
2140
int ret;
2141
2142
ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
2143
gs->perf_data_file, inject->output.path, gs->machine_pid);
2144
if (ret < 0)
2145
return ret;
2146
pr_debug("%s\n", cmd);
2147
ret = system(cmd);
2148
free(cmd);
2149
return ret;
2150
}
2151
2152
static int output_fd(struct perf_inject *inject)
2153
{
2154
return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
2155
}
2156
2157
static int __cmd_inject(struct perf_inject *inject)
2158
{
2159
int ret = -EINVAL;
2160
struct guest_session *gs = &inject->guest_session;
2161
struct perf_session *session = inject->session;
2162
int fd = output_fd(inject);
2163
u64 output_data_offset = perf_session__data_offset(session->evlist);
2164
/*
2165
* Pipe input hasn't loaded the attributes and will handle them as
2166
* events. So that the attributes don't overlap the data, write the
2167
* attributes after the data.
2168
*/
2169
bool write_attrs_after_data = !inject->output.is_pipe && inject->session->data->is_pipe;
2170
2171
signal(SIGINT, sig_handler);
2172
2173
if (inject->build_id_style != BID_RWS__NONE || inject->sched_stat ||
2174
inject->itrace_synth_opts.set) {
2175
inject->tool.mmap = perf_event__repipe_mmap;
2176
inject->tool.mmap2 = perf_event__repipe_mmap2;
2177
inject->tool.fork = perf_event__repipe_fork;
2178
#ifdef HAVE_LIBTRACEEVENT
2179
inject->tool.tracing_data = perf_event__repipe_tracing_data;
2180
#endif
2181
}
2182
2183
if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2184
inject->build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2185
inject->tool.sample = perf_event__inject_buildid;
2186
} else if (inject->sched_stat) {
2187
struct evsel *evsel;
2188
2189
evlist__for_each_entry(session->evlist, evsel) {
2190
const char *name = evsel__name(evsel);
2191
2192
if (!strcmp(name, "sched:sched_switch")) {
2193
if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
2194
return -EINVAL;
2195
2196
evsel->handler = perf_inject__sched_switch;
2197
} else if (!strcmp(name, "sched:sched_process_exit"))
2198
evsel->handler = perf_inject__sched_process_exit;
2199
#ifdef HAVE_LIBTRACEEVENT
2200
else if (!strncmp(name, "sched:sched_stat_", 17))
2201
evsel->handler = perf_inject__sched_stat;
2202
#endif
2203
}
2204
} else if (inject->itrace_synth_opts.vm_time_correlation) {
2205
session->itrace_synth_opts = &inject->itrace_synth_opts;
2206
memset(&inject->tool, 0, sizeof(inject->tool));
2207
inject->tool.id_index = perf_event__process_id_index;
2208
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
2209
inject->tool.auxtrace = perf_event__process_auxtrace;
2210
inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
2211
inject->tool.ordered_events = true;
2212
inject->tool.ordering_requires_timestamps = true;
2213
} else if (inject->itrace_synth_opts.set) {
2214
session->itrace_synth_opts = &inject->itrace_synth_opts;
2215
inject->itrace_synth_opts.inject = true;
2216
inject->tool.comm = perf_event__repipe_comm;
2217
inject->tool.namespaces = perf_event__repipe_namespaces;
2218
inject->tool.exit = perf_event__repipe_exit;
2219
inject->tool.id_index = perf_event__process_id_index;
2220
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
2221
inject->tool.auxtrace = perf_event__process_auxtrace;
2222
inject->tool.aux = perf_event__drop_aux;
2223
inject->tool.itrace_start = perf_event__drop_aux;
2224
inject->tool.aux_output_hw_id = perf_event__drop_aux;
2225
inject->tool.ordered_events = true;
2226
inject->tool.ordering_requires_timestamps = true;
2227
/* Allow space in the header for new attributes */
2228
output_data_offset = roundup(8192 + session->header.data_offset, 4096);
2229
if (inject->strip)
2230
strip_init(inject);
2231
} else if (gs->perf_data_file) {
2232
char *name = gs->perf_data_file;
2233
2234
/*
2235
* Not strictly necessary, but keep these events in order wrt
2236
* guest events.
2237
*/
2238
inject->tool.mmap = host__repipe;
2239
inject->tool.mmap2 = host__repipe;
2240
inject->tool.comm = host__repipe;
2241
inject->tool.fork = host__repipe;
2242
inject->tool.exit = host__repipe;
2243
inject->tool.lost = host__repipe;
2244
inject->tool.context_switch = host__repipe;
2245
inject->tool.ksymbol = host__repipe;
2246
inject->tool.text_poke = host__repipe;
2247
/*
2248
* Once the host session has initialized, set up sample ID
2249
* mapping and feed in guest attrs, build IDs and initial
2250
* events.
2251
*/
2252
inject->tool.finished_init = host__finished_init;
2253
/* Obey finished round ordering */
2254
inject->tool.finished_round = host__finished_round;
2255
/* Keep track of which CPU a VCPU is runnng on */
2256
inject->tool.context_switch = host__context_switch;
2257
/*
2258
* Must order events to be able to obey finished round
2259
* ordering.
2260
*/
2261
inject->tool.ordered_events = true;
2262
inject->tool.ordering_requires_timestamps = true;
2263
/* Set up a separate session to process guest perf.data file */
2264
ret = guest_session__start(gs, name, session->data->force);
2265
if (ret) {
2266
pr_err("Failed to process %s, error %d\n", name, ret);
2267
return ret;
2268
}
2269
/* Allow space in the header for guest attributes */
2270
output_data_offset += gs->session->header.data_offset;
2271
output_data_offset = roundup(output_data_offset, 4096);
2272
}
2273
2274
if (!inject->itrace_synth_opts.set)
2275
auxtrace_index__free(&session->auxtrace_index);
2276
2277
if (!inject->output.is_pipe && !inject->in_place_update)
2278
lseek(fd, output_data_offset, SEEK_SET);
2279
2280
ret = perf_session__process_events(session);
2281
if (ret)
2282
return ret;
2283
2284
if (gs->session) {
2285
/*
2286
* Remaining guest events have later timestamps. Flush them
2287
* out to file.
2288
*/
2289
ret = guest_session__flush_events(gs);
2290
if (ret) {
2291
pr_err("Failed to flush guest events\n");
2292
return ret;
2293
}
2294
}
2295
2296
if (!inject->output.is_pipe && !inject->in_place_update) {
2297
struct inject_fc inj_fc = {
2298
.fc.copy = feat_copy_cb,
2299
.inject = inject,
2300
};
2301
2302
if (inject->build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2303
inject->build_id_style == BID_RWS__INJECT_HEADER_ALL)
2304
perf_header__set_feat(&session->header, HEADER_BUILD_ID);
2305
/*
2306
* Keep all buildids when there is unprocessed AUX data because
2307
* it is not known which ones the AUX trace hits.
2308
*/
2309
if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2310
inject->have_auxtrace && !inject->itrace_synth_opts.set)
2311
perf_session__dsos_hit_all(session);
2312
/*
2313
* The AUX areas have been removed and replaced with
2314
* synthesized hardware events, so clear the feature flag.
2315
*/
2316
if (inject->itrace_synth_opts.set) {
2317
perf_header__clear_feat(&session->header,
2318
HEADER_AUXTRACE);
2319
if (inject->itrace_synth_opts.last_branch ||
2320
inject->itrace_synth_opts.add_last_branch)
2321
perf_header__set_feat(&session->header,
2322
HEADER_BRANCH_STACK);
2323
}
2324
session->header.data_offset = output_data_offset;
2325
session->header.data_size = inject->bytes_written;
2326
perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc,
2327
write_attrs_after_data);
2328
2329
if (inject->copy_kcore_dir) {
2330
ret = copy_kcore_dir(inject);
2331
if (ret) {
2332
pr_err("Failed to copy kcore\n");
2333
return ret;
2334
}
2335
}
2336
if (gs->copy_kcore_dir) {
2337
ret = guest_session__copy_kcore_dir(gs);
2338
if (ret) {
2339
pr_err("Failed to copy guest kcore\n");
2340
return ret;
2341
}
2342
}
2343
}
2344
2345
return ret;
2346
}
2347
2348
int cmd_inject(int argc, const char **argv)
2349
{
2350
struct perf_inject inject = {
2351
.input_name = "-",
2352
.samples = LIST_HEAD_INIT(inject.samples),
2353
.output = {
2354
.path = "-",
2355
.mode = PERF_DATA_MODE_WRITE,
2356
.use_stdio = true,
2357
},
2358
};
2359
struct perf_data data = {
2360
.mode = PERF_DATA_MODE_READ,
2361
.use_stdio = true,
2362
};
2363
int ret;
2364
const char *known_build_ids = NULL;
2365
bool build_ids = false;
2366
bool build_id_all = false;
2367
bool mmap2_build_ids = false;
2368
bool mmap2_build_id_all = false;
2369
2370
struct option options[] = {
2371
OPT_BOOLEAN('b', "build-ids", &build_ids,
2372
"Inject build-ids into the output stream"),
2373
OPT_BOOLEAN(0, "buildid-all", &build_id_all,
2374
"Inject build-ids of all DSOs into the output stream"),
2375
OPT_BOOLEAN('B', "mmap2-buildids", &mmap2_build_ids,
2376
"Drop unused mmap events, make others mmap2 with build IDs"),
2377
OPT_BOOLEAN(0, "mmap2-buildid-all", &mmap2_build_id_all,
2378
"Rewrite all mmap events as mmap2 events with build IDs"),
2379
OPT_STRING(0, "known-build-ids", &known_build_ids,
2380
"buildid path [,buildid path...]",
2381
"build-ids to use for given paths"),
2382
OPT_STRING('i', "input", &inject.input_name, "file",
2383
"input file name"),
2384
OPT_STRING('o', "output", &inject.output.path, "file",
2385
"output file name"),
2386
OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2387
"Merge sched-stat and sched-switch for getting events "
2388
"where and how long tasks slept"),
2389
#ifdef HAVE_JITDUMP
2390
OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2391
#endif
2392
OPT_INCR('v', "verbose", &verbose,
2393
"be more verbose (show build ids, etc)"),
2394
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2395
"file", "vmlinux pathname"),
2396
OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2397
"don't load vmlinux even if found"),
2398
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2399
"kallsyms pathname"),
2400
OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2401
OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2402
NULL, "opts", "Instruction Tracing options\n"
2403
ITRACE_HELP,
2404
itrace_parse_synth_opts),
2405
OPT_BOOLEAN(0, "strip", &inject.strip,
2406
"strip non-synthesized events (use with --itrace)"),
2407
OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2408
"correlate time between VM guests and the host",
2409
parse_vm_time_correlation),
2410
OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2411
"inject events from a guest perf.data file",
2412
parse_guest_data),
2413
OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2414
"guest mount directory under which every guest os"
2415
" instance has a subdir"),
2416
OPT_END()
2417
};
2418
const char * const inject_usage[] = {
2419
"perf inject [<options>]",
2420
NULL
2421
};
2422
bool ordered_events;
2423
2424
if (!inject.itrace_synth_opts.set) {
2425
/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
2426
symbol_conf.lazy_load_kernel_maps = true;
2427
}
2428
2429
#ifndef HAVE_JITDUMP
2430
set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2431
#endif
2432
argc = parse_options(argc, argv, options, inject_usage, 0);
2433
2434
/*
2435
* Any (unrecognized) arguments left?
2436
*/
2437
if (argc)
2438
usage_with_options(inject_usage, options);
2439
2440
if (inject.strip && !inject.itrace_synth_opts.set) {
2441
pr_err("--strip option requires --itrace option\n");
2442
return -1;
2443
}
2444
2445
if (symbol__validate_sym_arguments())
2446
return -1;
2447
2448
if (inject.in_place_update) {
2449
if (!strcmp(inject.input_name, "-")) {
2450
pr_err("Input file name required for in-place updating\n");
2451
return -1;
2452
}
2453
if (strcmp(inject.output.path, "-")) {
2454
pr_err("Output file name must not be specified for in-place updating\n");
2455
return -1;
2456
}
2457
if (!data.force && !inject.in_place_update_dry_run) {
2458
pr_err("The input file would be updated in place, "
2459
"the --force option is required.\n");
2460
return -1;
2461
}
2462
if (!inject.in_place_update_dry_run)
2463
data.in_place_update = true;
2464
} else {
2465
if (strcmp(inject.output.path, "-") && !inject.strip &&
2466
has_kcore_dir(inject.input_name)) {
2467
inject.output.is_dir = true;
2468
inject.copy_kcore_dir = true;
2469
}
2470
if (perf_data__open(&inject.output)) {
2471
perror("failed to create output file");
2472
return -1;
2473
}
2474
}
2475
if (mmap2_build_ids)
2476
inject.build_id_style = BID_RWS__MMAP2_BUILDID_LAZY;
2477
if (mmap2_build_id_all)
2478
inject.build_id_style = BID_RWS__MMAP2_BUILDID_ALL;
2479
if (build_ids)
2480
inject.build_id_style = BID_RWS__INJECT_HEADER_LAZY;
2481
if (build_id_all)
2482
inject.build_id_style = BID_RWS__INJECT_HEADER_ALL;
2483
2484
data.path = inject.input_name;
2485
2486
ordered_events = inject.jit_mode || inject.sched_stat ||
2487
inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2488
inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY;
2489
perf_tool__init(&inject.tool, ordered_events);
2490
inject.tool.sample = perf_event__repipe_sample;
2491
inject.tool.read = perf_event__repipe_sample;
2492
inject.tool.mmap = perf_event__repipe;
2493
inject.tool.mmap2 = perf_event__repipe;
2494
inject.tool.comm = perf_event__repipe;
2495
inject.tool.namespaces = perf_event__repipe;
2496
inject.tool.cgroup = perf_event__repipe;
2497
inject.tool.fork = perf_event__repipe;
2498
inject.tool.exit = perf_event__repipe;
2499
inject.tool.lost = perf_event__repipe;
2500
inject.tool.lost_samples = perf_event__repipe;
2501
inject.tool.aux = perf_event__repipe;
2502
inject.tool.itrace_start = perf_event__repipe;
2503
inject.tool.aux_output_hw_id = perf_event__repipe;
2504
inject.tool.context_switch = perf_event__repipe;
2505
inject.tool.throttle = perf_event__repipe;
2506
inject.tool.unthrottle = perf_event__repipe;
2507
inject.tool.ksymbol = perf_event__repipe;
2508
inject.tool.bpf = perf_event__repipe;
2509
inject.tool.text_poke = perf_event__repipe;
2510
inject.tool.attr = perf_event__repipe_attr;
2511
inject.tool.event_update = perf_event__repipe_event_update;
2512
inject.tool.tracing_data = perf_event__repipe_op2_synth;
2513
inject.tool.finished_round = perf_event__repipe_oe_synth;
2514
inject.tool.build_id = perf_event__repipe_op2_synth;
2515
inject.tool.id_index = perf_event__repipe_op2_synth;
2516
inject.tool.auxtrace_info = perf_event__repipe_op2_synth;
2517
inject.tool.auxtrace_error = perf_event__repipe_op2_synth;
2518
inject.tool.time_conv = perf_event__repipe_op2_synth;
2519
inject.tool.thread_map = perf_event__repipe_op2_synth;
2520
inject.tool.cpu_map = perf_event__repipe_op2_synth;
2521
inject.tool.stat_config = perf_event__repipe_op2_synth;
2522
inject.tool.stat = perf_event__repipe_op2_synth;
2523
inject.tool.stat_round = perf_event__repipe_op2_synth;
2524
inject.tool.feature = perf_event__repipe_op2_synth;
2525
inject.tool.finished_init = perf_event__repipe_op2_synth;
2526
inject.tool.compressed = perf_event__repipe_op4_synth;
2527
inject.tool.auxtrace = perf_event__repipe_auxtrace;
2528
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
2529
inject.tool.dont_split_sample_group = true;
2530
inject.tool.merge_deferred_callchains = false;
2531
inject.session = __perf_session__new(&data, &inject.tool,
2532
/*trace_event_repipe=*/inject.output.is_pipe,
2533
/*host_env=*/NULL);
2534
2535
if (IS_ERR(inject.session)) {
2536
ret = PTR_ERR(inject.session);
2537
goto out_close_output;
2538
}
2539
2540
if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2541
pr_warning("Decompression initialization failed.\n");
2542
2543
/* Save original section info before feature bits change */
2544
ret = save_section_info(&inject);
2545
if (ret)
2546
goto out_delete;
2547
2548
if (inject.output.is_pipe) {
2549
ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2550
if (ret < 0) {
2551
pr_err("Couldn't write a new pipe header.\n");
2552
goto out_delete;
2553
}
2554
2555
/*
2556
* If the input is already a pipe then the features and
2557
* attributes don't need synthesizing, they will be present in
2558
* the input.
2559
*/
2560
if (!data.is_pipe) {
2561
ret = perf_event__synthesize_for_pipe(&inject.tool,
2562
inject.session,
2563
&inject.output,
2564
perf_event__repipe);
2565
if (ret < 0)
2566
goto out_delete;
2567
}
2568
}
2569
2570
if (inject.build_id_style == BID_RWS__INJECT_HEADER_LAZY ||
2571
inject.build_id_style == BID_RWS__MMAP2_BUILDID_LAZY) {
2572
/*
2573
* to make sure the mmap records are ordered correctly
2574
* and so that the correct especially due to jitted code
2575
* mmaps. We cannot generate the buildid hit list and
2576
* inject the jit mmaps at the same time for now.
2577
*/
2578
inject.tool.ordering_requires_timestamps = true;
2579
}
2580
if (inject.build_id_style != BID_RWS__NONE && known_build_ids != NULL) {
2581
inject.known_build_ids =
2582
perf_inject__parse_known_build_ids(known_build_ids);
2583
2584
if (inject.known_build_ids == NULL) {
2585
pr_err("Couldn't parse known build ids.\n");
2586
goto out_delete;
2587
}
2588
}
2589
2590
#ifdef HAVE_JITDUMP
2591
if (inject.jit_mode) {
2592
inject.tool.mmap2 = perf_event__repipe_mmap2;
2593
inject.tool.mmap = perf_event__repipe_mmap;
2594
inject.tool.ordering_requires_timestamps = true;
2595
/*
2596
* JIT MMAP injection injects all MMAP events in one go, so it
2597
* does not obey finished_round semantics.
2598
*/
2599
inject.tool.finished_round = perf_event__drop_oe;
2600
}
2601
#endif
2602
ret = symbol__init(perf_session__env(inject.session));
2603
if (ret < 0)
2604
goto out_delete;
2605
2606
ret = __cmd_inject(&inject);
2607
2608
guest_session__exit(&inject.guest_session);
2609
2610
out_delete:
2611
strlist__delete(inject.known_build_ids);
2612
zstd_fini(&(inject.session->zstd_data));
2613
perf_session__delete(inject.session);
2614
out_close_output:
2615
if (!inject.in_place_update)
2616
perf_data__close(&inject.output);
2617
free(inject.itrace_synth_opts.vm_tm_corr_args);
2618
free(inject.event_copy);
2619
free(inject.guest_session.ev.event_buf);
2620
return ret;
2621
}
2622
2623