Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/perf/builtin-lock.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <errno.h>
3
#include <inttypes.h>
4
#include "builtin.h"
5
#include "perf.h"
6
7
#include "util/evlist.h" // for struct evsel_str_handler
8
#include "util/evsel.h"
9
#include "util/symbol.h"
10
#include "util/thread.h"
11
#include "util/header.h"
12
#include "util/target.h"
13
#include "util/cgroup.h"
14
#include "util/callchain.h"
15
#include "util/lock-contention.h"
16
#include "util/bpf_skel/lock_data.h"
17
18
#include <subcmd/pager.h>
19
#include <subcmd/parse-options.h>
20
#include "util/trace-event.h"
21
#include "util/tracepoint.h"
22
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/data.h"
27
#include "util/string2.h"
28
#include "util/map.h"
29
#include "util/util.h"
30
31
#include <stdio.h>
32
#include <sys/types.h>
33
#include <sys/prctl.h>
34
#include <semaphore.h>
35
#include <math.h>
36
#include <limits.h>
37
#include <ctype.h>
38
39
#include <linux/list.h>
40
#include <linux/hash.h>
41
#include <linux/kernel.h>
42
#include <linux/zalloc.h>
43
#include <linux/err.h>
44
#include <linux/stringify.h>
45
46
static struct perf_session *session;
47
static struct target target;
48
49
static struct rb_root thread_stats;
50
51
static bool combine_locks;
52
static bool show_thread_stats;
53
static bool show_lock_addrs;
54
static bool show_lock_owner;
55
static bool show_lock_cgroups;
56
static bool use_bpf;
57
static unsigned long bpf_map_entries = MAX_ENTRIES;
58
static int max_stack_depth = CONTENTION_STACK_DEPTH;
59
static int stack_skip = CONTENTION_STACK_SKIP;
60
static int print_nr_entries = INT_MAX / 2;
61
static const char *output_name = NULL;
62
static FILE *lock_output;
63
64
static struct lock_filter filters;
65
static struct lock_delay *delays;
66
static int nr_delays;
67
68
static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
69
70
static struct thread_stat *thread_stat_find(u32 tid)
71
{
72
struct rb_node *node;
73
struct thread_stat *st;
74
75
node = thread_stats.rb_node;
76
while (node) {
77
st = container_of(node, struct thread_stat, rb);
78
if (st->tid == tid)
79
return st;
80
else if (tid < st->tid)
81
node = node->rb_left;
82
else
83
node = node->rb_right;
84
}
85
86
return NULL;
87
}
88
89
static void thread_stat_insert(struct thread_stat *new)
90
{
91
struct rb_node **rb = &thread_stats.rb_node;
92
struct rb_node *parent = NULL;
93
struct thread_stat *p;
94
95
while (*rb) {
96
p = container_of(*rb, struct thread_stat, rb);
97
parent = *rb;
98
99
if (new->tid < p->tid)
100
rb = &(*rb)->rb_left;
101
else if (new->tid > p->tid)
102
rb = &(*rb)->rb_right;
103
else
104
BUG_ON("inserting invalid thread_stat\n");
105
}
106
107
rb_link_node(&new->rb, parent, rb);
108
rb_insert_color(&new->rb, &thread_stats);
109
}
110
111
static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
112
{
113
struct thread_stat *st;
114
115
st = thread_stat_find(tid);
116
if (st)
117
return st;
118
119
st = zalloc(sizeof(struct thread_stat));
120
if (!st) {
121
pr_err("memory allocation failed\n");
122
return NULL;
123
}
124
125
st->tid = tid;
126
INIT_LIST_HEAD(&st->seq_list);
127
128
thread_stat_insert(st);
129
130
return st;
131
}
132
133
static struct thread_stat *thread_stat_findnew_first(u32 tid);
134
static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
135
thread_stat_findnew_first;
136
137
static struct thread_stat *thread_stat_findnew_first(u32 tid)
138
{
139
struct thread_stat *st;
140
141
st = zalloc(sizeof(struct thread_stat));
142
if (!st) {
143
pr_err("memory allocation failed\n");
144
return NULL;
145
}
146
st->tid = tid;
147
INIT_LIST_HEAD(&st->seq_list);
148
149
rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
150
rb_insert_color(&st->rb, &thread_stats);
151
152
thread_stat_findnew = thread_stat_findnew_after_first;
153
return st;
154
}
155
156
/* build simple key function one is bigger than two */
157
#define SINGLE_KEY(member) \
158
static int lock_stat_key_ ## member(struct lock_stat *one, \
159
struct lock_stat *two) \
160
{ \
161
return one->member > two->member; \
162
}
163
164
SINGLE_KEY(nr_acquired)
165
SINGLE_KEY(nr_contended)
166
SINGLE_KEY(avg_wait_time)
167
SINGLE_KEY(wait_time_total)
168
SINGLE_KEY(wait_time_max)
169
170
static int lock_stat_key_wait_time_min(struct lock_stat *one,
171
struct lock_stat *two)
172
{
173
u64 s1 = one->wait_time_min;
174
u64 s2 = two->wait_time_min;
175
if (s1 == ULLONG_MAX)
176
s1 = 0;
177
if (s2 == ULLONG_MAX)
178
s2 = 0;
179
return s1 > s2;
180
}
181
182
struct lock_key {
183
/*
184
* name: the value for specify by user
185
* this should be simpler than raw name of member
186
* e.g. nr_acquired -> acquired, wait_time_total -> wait_total
187
*/
188
const char *name;
189
/* header: the string printed on the header line */
190
const char *header;
191
/* len: the printing width of the field */
192
int len;
193
/* key: a pointer to function to compare two lock stats for sorting */
194
int (*key)(struct lock_stat*, struct lock_stat*);
195
/* print: a pointer to function to print a given lock stats */
196
void (*print)(struct lock_key*, struct lock_stat*);
197
/* list: list entry to link this */
198
struct list_head list;
199
};
200
201
static void lock_stat_key_print_time(unsigned long long nsec, int len)
202
{
203
static const struct {
204
float base;
205
const char *unit;
206
} table[] = {
207
{ 1e9 * 3600, "h " },
208
{ 1e9 * 60, "m " },
209
{ 1e9, "s " },
210
{ 1e6, "ms" },
211
{ 1e3, "us" },
212
{ 0, NULL },
213
};
214
215
/* for CSV output */
216
if (len == 0) {
217
fprintf(lock_output, "%llu", nsec);
218
return;
219
}
220
221
for (int i = 0; table[i].unit; i++) {
222
if (nsec < table[i].base)
223
continue;
224
225
fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
226
return;
227
}
228
229
fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
230
}
231
232
#define PRINT_KEY(member) \
233
static void lock_stat_key_print_ ## member(struct lock_key *key, \
234
struct lock_stat *ls) \
235
{ \
236
fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
237
}
238
239
#define PRINT_TIME(member) \
240
static void lock_stat_key_print_ ## member(struct lock_key *key, \
241
struct lock_stat *ls) \
242
{ \
243
lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
244
}
245
246
PRINT_KEY(nr_acquired)
247
PRINT_KEY(nr_contended)
248
PRINT_TIME(avg_wait_time)
249
PRINT_TIME(wait_time_total)
250
PRINT_TIME(wait_time_max)
251
252
static void lock_stat_key_print_wait_time_min(struct lock_key *key,
253
struct lock_stat *ls)
254
{
255
u64 wait_time = ls->wait_time_min;
256
257
if (wait_time == ULLONG_MAX)
258
wait_time = 0;
259
260
lock_stat_key_print_time(wait_time, key->len);
261
}
262
263
264
static const char *sort_key = "acquired";
265
266
static int (*compare)(struct lock_stat *, struct lock_stat *);
267
268
static struct rb_root sorted; /* place to store intermediate data */
269
static struct rb_root result; /* place to store sorted data */
270
271
static LIST_HEAD(lock_keys);
272
static const char *output_fields;
273
274
#define DEF_KEY_LOCK(name, header, fn_suffix, len) \
275
{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
276
static struct lock_key report_keys[] = {
277
DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
278
DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
279
DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
280
DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
281
DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
282
DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
283
284
/* extra comparisons much complicated should be here */
285
{ }
286
};
287
288
static struct lock_key contention_keys[] = {
289
DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
290
DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
291
DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
292
DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
293
DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
294
295
/* extra comparisons much complicated should be here */
296
{ }
297
};
298
299
static int select_key(bool contention)
300
{
301
int i;
302
struct lock_key *keys = report_keys;
303
304
if (contention)
305
keys = contention_keys;
306
307
for (i = 0; keys[i].name; i++) {
308
if (!strcmp(keys[i].name, sort_key)) {
309
compare = keys[i].key;
310
311
/* selected key should be in the output fields */
312
if (list_empty(&keys[i].list))
313
list_add_tail(&keys[i].list, &lock_keys);
314
315
return 0;
316
}
317
}
318
319
pr_err("Unknown compare key: %s\n", sort_key);
320
return -1;
321
}
322
323
static int add_output_field(bool contention, char *name)
324
{
325
int i;
326
struct lock_key *keys = report_keys;
327
328
if (contention)
329
keys = contention_keys;
330
331
for (i = 0; keys[i].name; i++) {
332
if (strcmp(keys[i].name, name))
333
continue;
334
335
/* prevent double link */
336
if (list_empty(&keys[i].list))
337
list_add_tail(&keys[i].list, &lock_keys);
338
339
return 0;
340
}
341
342
pr_err("Unknown output field: %s\n", name);
343
return -1;
344
}
345
346
static int setup_output_field(bool contention, const char *str)
347
{
348
char *tok, *tmp, *orig;
349
int i, ret = 0;
350
struct lock_key *keys = report_keys;
351
352
if (contention)
353
keys = contention_keys;
354
355
/* no output field given: use all of them */
356
if (str == NULL) {
357
for (i = 0; keys[i].name; i++)
358
list_add_tail(&keys[i].list, &lock_keys);
359
return 0;
360
}
361
362
for (i = 0; keys[i].name; i++)
363
INIT_LIST_HEAD(&keys[i].list);
364
365
orig = tmp = strdup(str);
366
if (orig == NULL)
367
return -ENOMEM;
368
369
while ((tok = strsep(&tmp, ",")) != NULL){
370
ret = add_output_field(contention, tok);
371
if (ret < 0)
372
break;
373
}
374
free(orig);
375
376
return ret;
377
}
378
379
static void combine_lock_stats(struct lock_stat *st)
380
{
381
struct rb_node **rb = &sorted.rb_node;
382
struct rb_node *parent = NULL;
383
struct lock_stat *p;
384
int ret;
385
386
while (*rb) {
387
p = container_of(*rb, struct lock_stat, rb);
388
parent = *rb;
389
390
if (st->name && p->name)
391
ret = strcmp(st->name, p->name);
392
else
393
ret = !!st->name - !!p->name;
394
395
if (ret == 0) {
396
p->nr_acquired += st->nr_acquired;
397
p->nr_contended += st->nr_contended;
398
p->wait_time_total += st->wait_time_total;
399
400
if (p->nr_contended)
401
p->avg_wait_time = p->wait_time_total / p->nr_contended;
402
403
if (p->wait_time_min > st->wait_time_min)
404
p->wait_time_min = st->wait_time_min;
405
if (p->wait_time_max < st->wait_time_max)
406
p->wait_time_max = st->wait_time_max;
407
408
p->broken |= st->broken;
409
st->combined = 1;
410
return;
411
}
412
413
if (ret < 0)
414
rb = &(*rb)->rb_left;
415
else
416
rb = &(*rb)->rb_right;
417
}
418
419
rb_link_node(&st->rb, parent, rb);
420
rb_insert_color(&st->rb, &sorted);
421
}
422
423
static void insert_to(struct rb_root *rr, struct lock_stat *st,
424
int (*bigger)(struct lock_stat *, struct lock_stat *))
425
{
426
struct rb_node **rb = &rr->rb_node;
427
struct rb_node *parent = NULL;
428
struct lock_stat *p;
429
430
while (*rb) {
431
p = container_of(*rb, struct lock_stat, rb);
432
parent = *rb;
433
434
if (bigger(st, p))
435
rb = &(*rb)->rb_left;
436
else
437
rb = &(*rb)->rb_right;
438
}
439
440
rb_link_node(&st->rb, parent, rb);
441
rb_insert_color(&st->rb, rr);
442
}
443
444
static inline void insert_to_result(struct lock_stat *st,
445
int (*bigger)(struct lock_stat *,
446
struct lock_stat *))
447
{
448
if (combine_locks && st->combined)
449
return;
450
insert_to(&result, st, bigger);
451
}
452
453
static inline struct lock_stat *pop_from(struct rb_root *rr)
454
{
455
struct rb_node *node = rr->rb_node;
456
457
if (!node)
458
return NULL;
459
460
while (node->rb_left)
461
node = node->rb_left;
462
463
rb_erase(node, rr);
464
return container_of(node, struct lock_stat, rb);
465
466
}
467
468
/* returns left most element of result, and erase it */
469
static struct lock_stat *pop_from_result(void)
470
{
471
return pop_from(&result);
472
}
473
474
struct trace_lock_handler {
475
/* it's used on CONFIG_LOCKDEP */
476
int (*acquire_event)(struct evsel *evsel,
477
struct perf_sample *sample);
478
479
/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
480
int (*acquired_event)(struct evsel *evsel,
481
struct perf_sample *sample);
482
483
/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
484
int (*contended_event)(struct evsel *evsel,
485
struct perf_sample *sample);
486
487
/* it's used on CONFIG_LOCKDEP */
488
int (*release_event)(struct evsel *evsel,
489
struct perf_sample *sample);
490
491
/* it's used when CONFIG_LOCKDEP is off */
492
int (*contention_begin_event)(struct evsel *evsel,
493
struct perf_sample *sample);
494
495
/* it's used when CONFIG_LOCKDEP is off */
496
int (*contention_end_event)(struct evsel *evsel,
497
struct perf_sample *sample);
498
};
499
500
static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
501
{
502
struct lock_seq_stat *seq;
503
504
list_for_each_entry(seq, &ts->seq_list, list) {
505
if (seq->addr == addr)
506
return seq;
507
}
508
509
seq = zalloc(sizeof(struct lock_seq_stat));
510
if (!seq) {
511
pr_err("memory allocation failed\n");
512
return NULL;
513
}
514
seq->state = SEQ_STATE_UNINITIALIZED;
515
seq->addr = addr;
516
517
list_add(&seq->list, &ts->seq_list);
518
return seq;
519
}
520
521
enum broken_state {
522
BROKEN_ACQUIRE,
523
BROKEN_ACQUIRED,
524
BROKEN_CONTENDED,
525
BROKEN_RELEASE,
526
BROKEN_MAX,
527
};
528
529
static int bad_hist[BROKEN_MAX];
530
531
enum acquire_flags {
532
TRY_LOCK = 1,
533
READ_LOCK = 2,
534
};
535
536
static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
537
{
538
switch (aggr_mode) {
539
case LOCK_AGGR_ADDR:
540
*key = addr;
541
break;
542
case LOCK_AGGR_TASK:
543
*key = tid;
544
break;
545
case LOCK_AGGR_CALLER:
546
case LOCK_AGGR_CGROUP:
547
default:
548
pr_err("Invalid aggregation mode: %d\n", aggr_mode);
549
return -EINVAL;
550
}
551
return 0;
552
}
553
554
static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
555
556
static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
557
struct perf_sample *sample)
558
{
559
if (aggr_mode == LOCK_AGGR_CALLER) {
560
*key = callchain_id(evsel, sample);
561
return 0;
562
}
563
return get_key_by_aggr_mode_simple(key, addr, sample->tid);
564
}
565
566
static int report_lock_acquire_event(struct evsel *evsel,
567
struct perf_sample *sample)
568
{
569
struct lock_stat *ls;
570
struct thread_stat *ts;
571
struct lock_seq_stat *seq;
572
const char *name = evsel__strval(evsel, sample, "name");
573
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
574
int flag = evsel__intval(evsel, sample, "flags");
575
u64 key;
576
int ret;
577
578
ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
579
if (ret < 0)
580
return ret;
581
582
ls = lock_stat_findnew(key, name, 0);
583
if (!ls)
584
return -ENOMEM;
585
586
ts = thread_stat_findnew(sample->tid);
587
if (!ts)
588
return -ENOMEM;
589
590
seq = get_seq(ts, addr);
591
if (!seq)
592
return -ENOMEM;
593
594
switch (seq->state) {
595
case SEQ_STATE_UNINITIALIZED:
596
case SEQ_STATE_RELEASED:
597
if (!flag) {
598
seq->state = SEQ_STATE_ACQUIRING;
599
} else {
600
if (flag & TRY_LOCK)
601
ls->nr_trylock++;
602
if (flag & READ_LOCK)
603
ls->nr_readlock++;
604
seq->state = SEQ_STATE_READ_ACQUIRED;
605
seq->read_count = 1;
606
ls->nr_acquired++;
607
}
608
break;
609
case SEQ_STATE_READ_ACQUIRED:
610
if (flag & READ_LOCK) {
611
seq->read_count++;
612
ls->nr_acquired++;
613
goto end;
614
} else {
615
goto broken;
616
}
617
break;
618
case SEQ_STATE_ACQUIRED:
619
case SEQ_STATE_ACQUIRING:
620
case SEQ_STATE_CONTENDED:
621
broken:
622
/* broken lock sequence */
623
if (!ls->broken) {
624
ls->broken = 1;
625
bad_hist[BROKEN_ACQUIRE]++;
626
}
627
list_del_init(&seq->list);
628
free(seq);
629
goto end;
630
default:
631
BUG_ON("Unknown state of lock sequence found!\n");
632
break;
633
}
634
635
ls->nr_acquire++;
636
seq->prev_event_time = sample->time;
637
end:
638
return 0;
639
}
640
641
static int report_lock_acquired_event(struct evsel *evsel,
642
struct perf_sample *sample)
643
{
644
struct lock_stat *ls;
645
struct thread_stat *ts;
646
struct lock_seq_stat *seq;
647
u64 contended_term;
648
const char *name = evsel__strval(evsel, sample, "name");
649
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
650
u64 key;
651
int ret;
652
653
ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
654
if (ret < 0)
655
return ret;
656
657
ls = lock_stat_findnew(key, name, 0);
658
if (!ls)
659
return -ENOMEM;
660
661
ts = thread_stat_findnew(sample->tid);
662
if (!ts)
663
return -ENOMEM;
664
665
seq = get_seq(ts, addr);
666
if (!seq)
667
return -ENOMEM;
668
669
switch (seq->state) {
670
case SEQ_STATE_UNINITIALIZED:
671
/* orphan event, do nothing */
672
return 0;
673
case SEQ_STATE_ACQUIRING:
674
break;
675
case SEQ_STATE_CONTENDED:
676
contended_term = sample->time - seq->prev_event_time;
677
ls->wait_time_total += contended_term;
678
if (contended_term < ls->wait_time_min)
679
ls->wait_time_min = contended_term;
680
if (ls->wait_time_max < contended_term)
681
ls->wait_time_max = contended_term;
682
break;
683
case SEQ_STATE_RELEASED:
684
case SEQ_STATE_ACQUIRED:
685
case SEQ_STATE_READ_ACQUIRED:
686
/* broken lock sequence */
687
if (!ls->broken) {
688
ls->broken = 1;
689
bad_hist[BROKEN_ACQUIRED]++;
690
}
691
list_del_init(&seq->list);
692
free(seq);
693
goto end;
694
default:
695
BUG_ON("Unknown state of lock sequence found!\n");
696
break;
697
}
698
699
seq->state = SEQ_STATE_ACQUIRED;
700
ls->nr_acquired++;
701
ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
702
seq->prev_event_time = sample->time;
703
end:
704
return 0;
705
}
706
707
static int report_lock_contended_event(struct evsel *evsel,
708
struct perf_sample *sample)
709
{
710
struct lock_stat *ls;
711
struct thread_stat *ts;
712
struct lock_seq_stat *seq;
713
const char *name = evsel__strval(evsel, sample, "name");
714
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
715
u64 key;
716
int ret;
717
718
ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
719
if (ret < 0)
720
return ret;
721
722
ls = lock_stat_findnew(key, name, 0);
723
if (!ls)
724
return -ENOMEM;
725
726
ts = thread_stat_findnew(sample->tid);
727
if (!ts)
728
return -ENOMEM;
729
730
seq = get_seq(ts, addr);
731
if (!seq)
732
return -ENOMEM;
733
734
switch (seq->state) {
735
case SEQ_STATE_UNINITIALIZED:
736
/* orphan event, do nothing */
737
return 0;
738
case SEQ_STATE_ACQUIRING:
739
break;
740
case SEQ_STATE_RELEASED:
741
case SEQ_STATE_ACQUIRED:
742
case SEQ_STATE_READ_ACQUIRED:
743
case SEQ_STATE_CONTENDED:
744
/* broken lock sequence */
745
if (!ls->broken) {
746
ls->broken = 1;
747
bad_hist[BROKEN_CONTENDED]++;
748
}
749
list_del_init(&seq->list);
750
free(seq);
751
goto end;
752
default:
753
BUG_ON("Unknown state of lock sequence found!\n");
754
break;
755
}
756
757
seq->state = SEQ_STATE_CONTENDED;
758
ls->nr_contended++;
759
ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
760
seq->prev_event_time = sample->time;
761
end:
762
return 0;
763
}
764
765
static int report_lock_release_event(struct evsel *evsel,
766
struct perf_sample *sample)
767
{
768
struct lock_stat *ls;
769
struct thread_stat *ts;
770
struct lock_seq_stat *seq;
771
const char *name = evsel__strval(evsel, sample, "name");
772
u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
773
u64 key;
774
int ret;
775
776
ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
777
if (ret < 0)
778
return ret;
779
780
ls = lock_stat_findnew(key, name, 0);
781
if (!ls)
782
return -ENOMEM;
783
784
ts = thread_stat_findnew(sample->tid);
785
if (!ts)
786
return -ENOMEM;
787
788
seq = get_seq(ts, addr);
789
if (!seq)
790
return -ENOMEM;
791
792
switch (seq->state) {
793
case SEQ_STATE_UNINITIALIZED:
794
goto end;
795
case SEQ_STATE_ACQUIRED:
796
break;
797
case SEQ_STATE_READ_ACQUIRED:
798
seq->read_count--;
799
BUG_ON(seq->read_count < 0);
800
if (seq->read_count) {
801
ls->nr_release++;
802
goto end;
803
}
804
break;
805
case SEQ_STATE_ACQUIRING:
806
case SEQ_STATE_CONTENDED:
807
case SEQ_STATE_RELEASED:
808
/* broken lock sequence */
809
if (!ls->broken) {
810
ls->broken = 1;
811
bad_hist[BROKEN_RELEASE]++;
812
}
813
goto free_seq;
814
default:
815
BUG_ON("Unknown state of lock sequence found!\n");
816
break;
817
}
818
819
ls->nr_release++;
820
free_seq:
821
list_del_init(&seq->list);
822
free(seq);
823
end:
824
return 0;
825
}
826
827
static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
828
char *buf, int size)
829
{
830
u64 offset;
831
832
if (map == NULL || sym == NULL) {
833
buf[0] = '\0';
834
return 0;
835
}
836
837
offset = map__map_ip(map, ip) - sym->start;
838
839
if (offset)
840
return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
841
else
842
return strlcpy(buf, sym->name, size);
843
}
844
static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
845
char *buf, int size)
846
{
847
struct thread *thread;
848
struct callchain_cursor *cursor;
849
struct machine *machine = &session->machines.host;
850
struct symbol *sym;
851
int skip = 0;
852
int ret;
853
854
/* lock names will be replaced to task name later */
855
if (show_thread_stats)
856
return -1;
857
858
thread = machine__findnew_thread(machine, -1, sample->pid);
859
if (thread == NULL)
860
return -1;
861
862
cursor = get_tls_callchain_cursor();
863
864
/* use caller function name from the callchain */
865
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
866
NULL, NULL, max_stack_depth);
867
if (ret != 0) {
868
thread__put(thread);
869
return -1;
870
}
871
872
callchain_cursor_commit(cursor);
873
thread__put(thread);
874
875
while (true) {
876
struct callchain_cursor_node *node;
877
878
node = callchain_cursor_current(cursor);
879
if (node == NULL)
880
break;
881
882
/* skip first few entries - for lock functions */
883
if (++skip <= stack_skip)
884
goto next;
885
886
sym = node->ms.sym;
887
if (sym && !machine__is_lock_function(machine, node->ip)) {
888
get_symbol_name_offset(node->ms.map, sym, node->ip,
889
buf, size);
890
return 0;
891
}
892
893
next:
894
callchain_cursor_advance(cursor);
895
}
896
return -1;
897
}
898
899
static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
900
{
901
struct callchain_cursor *cursor;
902
struct machine *machine = &session->machines.host;
903
struct thread *thread;
904
u64 hash = 0;
905
int skip = 0;
906
int ret;
907
908
thread = machine__findnew_thread(machine, -1, sample->pid);
909
if (thread == NULL)
910
return -1;
911
912
cursor = get_tls_callchain_cursor();
913
/* use caller function name from the callchain */
914
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
915
NULL, NULL, max_stack_depth);
916
thread__put(thread);
917
918
if (ret != 0)
919
return -1;
920
921
callchain_cursor_commit(cursor);
922
923
while (true) {
924
struct callchain_cursor_node *node;
925
926
node = callchain_cursor_current(cursor);
927
if (node == NULL)
928
break;
929
930
/* skip first few entries - for lock functions */
931
if (++skip <= stack_skip)
932
goto next;
933
934
if (node->ms.sym && machine__is_lock_function(machine, node->ip))
935
goto next;
936
937
hash ^= hash_long((unsigned long)node->ip, 64);
938
939
next:
940
callchain_cursor_advance(cursor);
941
}
942
return hash;
943
}
944
945
static u64 *get_callstack(struct perf_sample *sample, int max_stack)
946
{
947
u64 *callstack;
948
u64 i;
949
int c;
950
951
callstack = calloc(max_stack, sizeof(*callstack));
952
if (callstack == NULL)
953
return NULL;
954
955
for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
956
u64 ip = sample->callchain->ips[i];
957
958
if (ip >= PERF_CONTEXT_MAX)
959
continue;
960
961
callstack[c++] = ip;
962
}
963
return callstack;
964
}
965
966
static int report_lock_contention_begin_event(struct evsel *evsel,
967
struct perf_sample *sample)
968
{
969
struct lock_stat *ls;
970
struct thread_stat *ts;
971
struct lock_seq_stat *seq;
972
u64 addr = evsel__intval(evsel, sample, "lock_addr");
973
unsigned int flags = evsel__intval(evsel, sample, "flags");
974
u64 key;
975
int i, ret;
976
static bool kmap_loaded;
977
struct machine *machine = &session->machines.host;
978
struct map *kmap;
979
struct symbol *sym;
980
981
ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
982
if (ret < 0)
983
return ret;
984
985
if (!kmap_loaded) {
986
unsigned long *addrs;
987
988
/* make sure it loads the kernel map to find lock symbols */
989
map__load(machine__kernel_map(machine));
990
kmap_loaded = true;
991
992
/* convert (kernel) symbols to addresses */
993
for (i = 0; i < filters.nr_syms; i++) {
994
sym = machine__find_kernel_symbol_by_name(machine,
995
filters.syms[i],
996
&kmap);
997
if (sym == NULL) {
998
pr_warning("ignore unknown symbol: %s\n",
999
filters.syms[i]);
1000
continue;
1001
}
1002
1003
addrs = realloc(filters.addrs,
1004
(filters.nr_addrs + 1) * sizeof(*addrs));
1005
if (addrs == NULL) {
1006
pr_warning("memory allocation failure\n");
1007
return -ENOMEM;
1008
}
1009
1010
addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1011
filters.addrs = addrs;
1012
}
1013
}
1014
1015
ls = lock_stat_find(key);
1016
if (!ls) {
1017
char buf[128];
1018
const char *name = "";
1019
1020
switch (aggr_mode) {
1021
case LOCK_AGGR_ADDR:
1022
sym = machine__find_kernel_symbol(machine, key, &kmap);
1023
if (sym)
1024
name = sym->name;
1025
break;
1026
case LOCK_AGGR_CALLER:
1027
name = buf;
1028
if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1029
name = "Unknown";
1030
break;
1031
case LOCK_AGGR_CGROUP:
1032
case LOCK_AGGR_TASK:
1033
default:
1034
break;
1035
}
1036
1037
ls = lock_stat_findnew(key, name, flags);
1038
if (!ls)
1039
return -ENOMEM;
1040
}
1041
1042
if (filters.nr_types) {
1043
bool found = false;
1044
1045
for (i = 0; i < filters.nr_types; i++) {
1046
if (flags == filters.types[i]) {
1047
found = true;
1048
break;
1049
}
1050
}
1051
1052
if (!found)
1053
return 0;
1054
}
1055
1056
if (filters.nr_addrs) {
1057
bool found = false;
1058
1059
for (i = 0; i < filters.nr_addrs; i++) {
1060
if (addr == filters.addrs[i]) {
1061
found = true;
1062
break;
1063
}
1064
}
1065
1066
if (!found)
1067
return 0;
1068
}
1069
1070
if (needs_callstack()) {
1071
u64 *callstack = get_callstack(sample, max_stack_depth);
1072
if (callstack == NULL)
1073
return -ENOMEM;
1074
1075
if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1076
free(callstack);
1077
return 0;
1078
}
1079
1080
if (ls->callstack == NULL)
1081
ls->callstack = callstack;
1082
else
1083
free(callstack);
1084
}
1085
1086
ts = thread_stat_findnew(sample->tid);
1087
if (!ts)
1088
return -ENOMEM;
1089
1090
seq = get_seq(ts, addr);
1091
if (!seq)
1092
return -ENOMEM;
1093
1094
switch (seq->state) {
1095
case SEQ_STATE_UNINITIALIZED:
1096
case SEQ_STATE_ACQUIRED:
1097
break;
1098
case SEQ_STATE_CONTENDED:
1099
/*
1100
* It can have nested contention begin with mutex spinning,
1101
* then we would use the original contention begin event and
1102
* ignore the second one.
1103
*/
1104
goto end;
1105
case SEQ_STATE_ACQUIRING:
1106
case SEQ_STATE_READ_ACQUIRED:
1107
case SEQ_STATE_RELEASED:
1108
/* broken lock sequence */
1109
if (!ls->broken) {
1110
ls->broken = 1;
1111
bad_hist[BROKEN_CONTENDED]++;
1112
}
1113
list_del_init(&seq->list);
1114
free(seq);
1115
goto end;
1116
default:
1117
BUG_ON("Unknown state of lock sequence found!\n");
1118
break;
1119
}
1120
1121
if (seq->state != SEQ_STATE_CONTENDED) {
1122
seq->state = SEQ_STATE_CONTENDED;
1123
seq->prev_event_time = sample->time;
1124
ls->nr_contended++;
1125
}
1126
end:
1127
return 0;
1128
}
1129
1130
static int report_lock_contention_end_event(struct evsel *evsel,
1131
struct perf_sample *sample)
1132
{
1133
struct lock_stat *ls;
1134
struct thread_stat *ts;
1135
struct lock_seq_stat *seq;
1136
u64 contended_term;
1137
u64 addr = evsel__intval(evsel, sample, "lock_addr");
1138
u64 key;
1139
int ret;
1140
1141
ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1142
if (ret < 0)
1143
return ret;
1144
1145
ls = lock_stat_find(key);
1146
if (!ls)
1147
return 0;
1148
1149
ts = thread_stat_find(sample->tid);
1150
if (!ts)
1151
return 0;
1152
1153
seq = get_seq(ts, addr);
1154
if (!seq)
1155
return -ENOMEM;
1156
1157
switch (seq->state) {
1158
case SEQ_STATE_UNINITIALIZED:
1159
goto end;
1160
case SEQ_STATE_CONTENDED:
1161
contended_term = sample->time - seq->prev_event_time;
1162
ls->wait_time_total += contended_term;
1163
if (contended_term < ls->wait_time_min)
1164
ls->wait_time_min = contended_term;
1165
if (ls->wait_time_max < contended_term)
1166
ls->wait_time_max = contended_term;
1167
break;
1168
case SEQ_STATE_ACQUIRING:
1169
case SEQ_STATE_ACQUIRED:
1170
case SEQ_STATE_READ_ACQUIRED:
1171
case SEQ_STATE_RELEASED:
1172
/* broken lock sequence */
1173
if (!ls->broken) {
1174
ls->broken = 1;
1175
bad_hist[BROKEN_ACQUIRED]++;
1176
}
1177
list_del_init(&seq->list);
1178
free(seq);
1179
goto end;
1180
default:
1181
BUG_ON("Unknown state of lock sequence found!\n");
1182
break;
1183
}
1184
1185
seq->state = SEQ_STATE_ACQUIRED;
1186
ls->nr_acquired++;
1187
ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1188
end:
1189
return 0;
1190
}
1191
1192
/* lock oriented handlers */
1193
/* TODO: handlers for CPU oriented, thread oriented */
1194
static struct trace_lock_handler report_lock_ops = {
1195
.acquire_event = report_lock_acquire_event,
1196
.acquired_event = report_lock_acquired_event,
1197
.contended_event = report_lock_contended_event,
1198
.release_event = report_lock_release_event,
1199
.contention_begin_event = report_lock_contention_begin_event,
1200
.contention_end_event = report_lock_contention_end_event,
1201
};
1202
1203
static struct trace_lock_handler contention_lock_ops = {
1204
.contention_begin_event = report_lock_contention_begin_event,
1205
.contention_end_event = report_lock_contention_end_event,
1206
};
1207
1208
1209
static struct trace_lock_handler *trace_handler;
1210
1211
static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1212
{
1213
if (trace_handler->acquire_event)
1214
return trace_handler->acquire_event(evsel, sample);
1215
return 0;
1216
}
1217
1218
static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1219
{
1220
if (trace_handler->acquired_event)
1221
return trace_handler->acquired_event(evsel, sample);
1222
return 0;
1223
}
1224
1225
static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1226
{
1227
if (trace_handler->contended_event)
1228
return trace_handler->contended_event(evsel, sample);
1229
return 0;
1230
}
1231
1232
static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1233
{
1234
if (trace_handler->release_event)
1235
return trace_handler->release_event(evsel, sample);
1236
return 0;
1237
}
1238
1239
static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1240
{
1241
if (trace_handler->contention_begin_event)
1242
return trace_handler->contention_begin_event(evsel, sample);
1243
return 0;
1244
}
1245
1246
static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1247
{
1248
if (trace_handler->contention_end_event)
1249
return trace_handler->contention_end_event(evsel, sample);
1250
return 0;
1251
}
1252
1253
static void print_bad_events(int bad, int total)
1254
{
1255
/* Output for debug, this have to be removed */
1256
int i;
1257
int broken = 0;
1258
const char *name[4] =
1259
{ "acquire", "acquired", "contended", "release" };
1260
1261
for (i = 0; i < BROKEN_MAX; i++)
1262
broken += bad_hist[i];
1263
1264
if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1265
return;
1266
1267
fprintf(lock_output, "\n=== output for debug ===\n\n");
1268
fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1269
fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1270
fprintf(lock_output, "histogram of events caused bad sequence\n");
1271
for (i = 0; i < BROKEN_MAX; i++)
1272
fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1273
}
1274
1275
/* TODO: various way to print, coloring, nano or milli sec */
1276
static void print_result(void)
1277
{
1278
struct lock_stat *st;
1279
struct lock_key *key;
1280
char cut_name[20];
1281
int bad, total, printed;
1282
1283
if (!quiet) {
1284
fprintf(lock_output, "%20s ", "Name");
1285
list_for_each_entry(key, &lock_keys, list)
1286
fprintf(lock_output, "%*s ", key->len, key->header);
1287
fprintf(lock_output, "\n\n");
1288
}
1289
1290
bad = total = printed = 0;
1291
while ((st = pop_from_result())) {
1292
total++;
1293
if (st->broken)
1294
bad++;
1295
if (!st->nr_acquired)
1296
continue;
1297
1298
bzero(cut_name, 20);
1299
1300
if (strlen(st->name) < 20) {
1301
/* output raw name */
1302
const char *name = st->name;
1303
1304
if (show_thread_stats) {
1305
struct thread *t;
1306
1307
/* st->addr contains tid of thread */
1308
t = perf_session__findnew(session, st->addr);
1309
name = thread__comm_str(t);
1310
}
1311
1312
fprintf(lock_output, "%20s ", name);
1313
} else {
1314
strncpy(cut_name, st->name, 16);
1315
cut_name[16] = '.';
1316
cut_name[17] = '.';
1317
cut_name[18] = '.';
1318
cut_name[19] = '\0';
1319
/* cut off name for saving output style */
1320
fprintf(lock_output, "%20s ", cut_name);
1321
}
1322
1323
list_for_each_entry(key, &lock_keys, list) {
1324
key->print(key, st);
1325
fprintf(lock_output, " ");
1326
}
1327
fprintf(lock_output, "\n");
1328
1329
if (++printed >= print_nr_entries)
1330
break;
1331
}
1332
1333
print_bad_events(bad, total);
1334
}
1335
1336
static bool info_threads, info_map;
1337
1338
static void dump_threads(void)
1339
{
1340
struct thread_stat *st;
1341
struct rb_node *node;
1342
struct thread *t;
1343
1344
fprintf(lock_output, "%10s: comm\n", "Thread ID");
1345
1346
node = rb_first(&thread_stats);
1347
while (node) {
1348
st = container_of(node, struct thread_stat, rb);
1349
t = perf_session__findnew(session, st->tid);
1350
fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1351
node = rb_next(node);
1352
thread__put(t);
1353
}
1354
}
1355
1356
static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1357
{
1358
int ret;
1359
1360
if (a->name && b->name)
1361
ret = strcmp(a->name, b->name);
1362
else
1363
ret = !!a->name - !!b->name;
1364
1365
if (!ret)
1366
return a->addr < b->addr;
1367
else
1368
return ret < 0;
1369
}
1370
1371
static void dump_map(void)
1372
{
1373
unsigned int i;
1374
struct lock_stat *st;
1375
1376
fprintf(lock_output, "Address of instance: name of class\n");
1377
for (i = 0; i < LOCKHASH_SIZE; i++) {
1378
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1379
insert_to_result(st, compare_maps);
1380
}
1381
}
1382
1383
while ((st = pop_from_result()))
1384
fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1385
}
1386
1387
static void dump_info(void)
1388
{
1389
if (info_threads)
1390
dump_threads();
1391
1392
if (info_map) {
1393
if (info_threads)
1394
fputc('\n', lock_output);
1395
dump_map();
1396
}
1397
}
1398
1399
static const struct evsel_str_handler lock_tracepoints[] = {
1400
{ "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
1401
{ "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1402
{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1403
{ "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
1404
};
1405
1406
static const struct evsel_str_handler contention_tracepoints[] = {
1407
{ "lock:contention_begin", evsel__process_contention_begin, },
1408
{ "lock:contention_end", evsel__process_contention_end, },
1409
};
1410
1411
static int process_event_update(const struct perf_tool *tool,
1412
union perf_event *event,
1413
struct evlist **pevlist)
1414
{
1415
int ret;
1416
1417
ret = perf_event__process_event_update(tool, event, pevlist);
1418
if (ret < 0)
1419
return ret;
1420
1421
/* this can return -EEXIST since we call it for each evsel */
1422
perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1423
perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1424
return 0;
1425
}
1426
1427
typedef int (*tracepoint_handler)(struct evsel *evsel,
1428
struct perf_sample *sample);
1429
1430
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1431
union perf_event *event,
1432
struct perf_sample *sample,
1433
struct evsel *evsel,
1434
struct machine *machine)
1435
{
1436
int err = 0;
1437
struct thread *thread = machine__findnew_thread(machine, sample->pid,
1438
sample->tid);
1439
1440
if (thread == NULL) {
1441
pr_debug("problem processing %d event, skipping it.\n",
1442
event->header.type);
1443
return -1;
1444
}
1445
1446
if (evsel->handler != NULL) {
1447
tracepoint_handler f = evsel->handler;
1448
err = f(evsel, sample);
1449
}
1450
1451
thread__put(thread);
1452
1453
return err;
1454
}
1455
1456
static void combine_result(void)
1457
{
1458
unsigned int i;
1459
struct lock_stat *st;
1460
1461
if (!combine_locks)
1462
return;
1463
1464
for (i = 0; i < LOCKHASH_SIZE; i++) {
1465
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1466
combine_lock_stats(st);
1467
}
1468
}
1469
}
1470
1471
static void sort_result(void)
1472
{
1473
unsigned int i;
1474
struct lock_stat *st;
1475
1476
for (i = 0; i < LOCKHASH_SIZE; i++) {
1477
hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1478
insert_to_result(st, compare);
1479
}
1480
}
1481
}
1482
1483
static const struct {
1484
unsigned int flags;
1485
/*
1486
* Name of the lock flags (access), with delimeter ':'.
1487
* For example, rwsem:R of rwsem:W.
1488
*/
1489
const char *flags_name;
1490
/* Name of the lock (type), for example, rwlock or rwsem. */
1491
const char *lock_name;
1492
} lock_type_table[] = {
1493
{ 0, "semaphore", "semaphore" },
1494
{ LCB_F_SPIN, "spinlock", "spinlock" },
1495
{ LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" },
1496
{ LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" },
1497
{ LCB_F_READ, "rwsem:R", "rwsem" },
1498
{ LCB_F_WRITE, "rwsem:W", "rwsem" },
1499
{ LCB_F_RT, "rt-mutex", "rt-mutex" },
1500
{ LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" },
1501
{ LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" },
1502
{ LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" },
1503
{ LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" },
1504
{ LCB_F_MUTEX, "mutex", "mutex" },
1505
{ LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" },
1506
/* alias for optimistic spinning only */
1507
{ LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" },
1508
};
1509
1510
static const char *get_type_flags_name(unsigned int flags)
1511
{
1512
flags &= LCB_F_TYPE_MASK;
1513
1514
for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1515
if (lock_type_table[i].flags == flags)
1516
return lock_type_table[i].flags_name;
1517
}
1518
return "unknown";
1519
}
1520
1521
static const char *get_type_lock_name(unsigned int flags)
1522
{
1523
flags &= LCB_F_TYPE_MASK;
1524
1525
for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1526
if (lock_type_table[i].flags == flags)
1527
return lock_type_table[i].lock_name;
1528
}
1529
return "unknown";
1530
}
1531
1532
static void lock_filter_finish(void)
1533
{
1534
zfree(&filters.types);
1535
filters.nr_types = 0;
1536
1537
zfree(&filters.addrs);
1538
filters.nr_addrs = 0;
1539
1540
for (int i = 0; i < filters.nr_syms; i++)
1541
free(filters.syms[i]);
1542
1543
zfree(&filters.syms);
1544
filters.nr_syms = 0;
1545
1546
zfree(&filters.cgrps);
1547
filters.nr_cgrps = 0;
1548
1549
for (int i = 0; i < filters.nr_slabs; i++)
1550
free(filters.slabs[i]);
1551
1552
zfree(&filters.slabs);
1553
filters.nr_slabs = 0;
1554
}
1555
1556
static void sort_contention_result(void)
1557
{
1558
sort_result();
1559
}
1560
1561
static void print_header_stdio(void)
1562
{
1563
struct lock_key *key;
1564
1565
list_for_each_entry(key, &lock_keys, list)
1566
fprintf(lock_output, "%*s ", key->len, key->header);
1567
1568
switch (aggr_mode) {
1569
case LOCK_AGGR_TASK:
1570
fprintf(lock_output, " %10s %s\n\n", "pid",
1571
show_lock_owner ? "owner" : "comm");
1572
break;
1573
case LOCK_AGGR_CALLER:
1574
fprintf(lock_output, " %10s %s\n\n", "type", "caller");
1575
break;
1576
case LOCK_AGGR_ADDR:
1577
fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
1578
break;
1579
case LOCK_AGGR_CGROUP:
1580
fprintf(lock_output, " %s\n\n", "cgroup");
1581
break;
1582
default:
1583
break;
1584
}
1585
}
1586
1587
static void print_header_csv(const char *sep)
1588
{
1589
struct lock_key *key;
1590
1591
fprintf(lock_output, "# output: ");
1592
list_for_each_entry(key, &lock_keys, list)
1593
fprintf(lock_output, "%s%s ", key->header, sep);
1594
1595
switch (aggr_mode) {
1596
case LOCK_AGGR_TASK:
1597
fprintf(lock_output, "%s%s %s\n", "pid", sep,
1598
show_lock_owner ? "owner" : "comm");
1599
break;
1600
case LOCK_AGGR_CALLER:
1601
fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1602
if (verbose > 0)
1603
fprintf(lock_output, "%s %s", sep, "stacktrace");
1604
fprintf(lock_output, "\n");
1605
break;
1606
case LOCK_AGGR_ADDR:
1607
fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1608
break;
1609
case LOCK_AGGR_CGROUP:
1610
fprintf(lock_output, "%s\n", "cgroup");
1611
break;
1612
default:
1613
break;
1614
}
1615
}
1616
1617
static void print_header(void)
1618
{
1619
if (!quiet) {
1620
if (symbol_conf.field_sep)
1621
print_header_csv(symbol_conf.field_sep);
1622
else
1623
print_header_stdio();
1624
}
1625
}
1626
1627
static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1628
{
1629
struct lock_key *key;
1630
struct thread *t;
1631
int pid;
1632
1633
list_for_each_entry(key, &lock_keys, list) {
1634
key->print(key, st);
1635
fprintf(lock_output, " ");
1636
}
1637
1638
switch (aggr_mode) {
1639
case LOCK_AGGR_CALLER:
1640
fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name);
1641
break;
1642
case LOCK_AGGR_TASK:
1643
pid = st->addr;
1644
t = perf_session__findnew(session, pid);
1645
fprintf(lock_output, " %10d %s\n",
1646
pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1647
break;
1648
case LOCK_AGGR_ADDR:
1649
fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
1650
st->name, get_type_lock_name(st->flags));
1651
break;
1652
case LOCK_AGGR_CGROUP:
1653
fprintf(lock_output, " %s\n", st->name);
1654
break;
1655
default:
1656
break;
1657
}
1658
1659
if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1660
struct map *kmap;
1661
struct symbol *sym;
1662
char buf[128];
1663
u64 ip;
1664
1665
for (int i = 0; i < max_stack_depth; i++) {
1666
if (!st->callstack || !st->callstack[i])
1667
break;
1668
1669
ip = st->callstack[i];
1670
sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1671
get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1672
fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
1673
}
1674
}
1675
}
1676
1677
static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1678
const char *sep)
1679
{
1680
struct lock_key *key;
1681
struct thread *t;
1682
int pid;
1683
1684
list_for_each_entry(key, &lock_keys, list) {
1685
key->print(key, st);
1686
fprintf(lock_output, "%s ", sep);
1687
}
1688
1689
switch (aggr_mode) {
1690
case LOCK_AGGR_CALLER:
1691
fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1692
if (verbose <= 0)
1693
fprintf(lock_output, "\n");
1694
break;
1695
case LOCK_AGGR_TASK:
1696
pid = st->addr;
1697
t = perf_session__findnew(session, pid);
1698
fprintf(lock_output, "%d%s %s\n", pid, sep,
1699
pid == -1 ? "Unknown" : thread__comm_str(t));
1700
break;
1701
case LOCK_AGGR_ADDR:
1702
fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1703
st->name, sep, get_type_lock_name(st->flags));
1704
break;
1705
case LOCK_AGGR_CGROUP:
1706
fprintf(lock_output, "%s\n",st->name);
1707
break;
1708
default:
1709
break;
1710
}
1711
1712
if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1713
struct map *kmap;
1714
struct symbol *sym;
1715
char buf[128];
1716
u64 ip;
1717
1718
for (int i = 0; i < max_stack_depth; i++) {
1719
if (!st->callstack || !st->callstack[i])
1720
break;
1721
1722
ip = st->callstack[i];
1723
sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1724
get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1725
fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1726
}
1727
fprintf(lock_output, "\n");
1728
}
1729
}
1730
1731
static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1732
{
1733
if (symbol_conf.field_sep)
1734
print_lock_stat_csv(con, st, symbol_conf.field_sep);
1735
else
1736
print_lock_stat_stdio(con, st);
1737
}
1738
1739
static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1740
{
1741
/* Output for debug, this have to be removed */
1742
int broken = fails->task + fails->stack + fails->time + fails->data;
1743
1744
if (!use_bpf)
1745
print_bad_events(bad, total);
1746
1747
if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1748
return;
1749
1750
total += broken;
1751
fprintf(lock_output, "\n=== output for debug ===\n\n");
1752
fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1753
fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1754
1755
fprintf(lock_output, "histogram of failure reasons\n");
1756
fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1757
fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1758
fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1759
fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1760
}
1761
1762
static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1763
const char *sep)
1764
{
1765
/* Output for debug, this have to be removed */
1766
if (use_bpf)
1767
bad = fails->task + fails->stack + fails->time + fails->data;
1768
1769
if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1770
return;
1771
1772
total += bad;
1773
fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1774
1775
if (use_bpf) {
1776
fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1777
fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1778
fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1779
fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1780
} else {
1781
int i;
1782
const char *name[4] = { "acquire", "acquired", "contended", "release" };
1783
1784
for (i = 0; i < BROKEN_MAX; i++)
1785
fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1786
}
1787
fprintf(lock_output, "\n");
1788
}
1789
1790
static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1791
{
1792
if (symbol_conf.field_sep)
1793
print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1794
else
1795
print_footer_stdio(total, bad, fails);
1796
}
1797
1798
static void print_contention_result(struct lock_contention *con)
1799
{
1800
struct lock_stat *st;
1801
int bad, total, printed;
1802
1803
if (!quiet)
1804
print_header();
1805
1806
bad = total = printed = 0;
1807
1808
while ((st = pop_from_result())) {
1809
total += use_bpf ? st->nr_contended : 1;
1810
if (st->broken)
1811
bad++;
1812
1813
if (!st->wait_time_total)
1814
continue;
1815
1816
print_lock_stat(con, st);
1817
1818
if (++printed >= print_nr_entries)
1819
break;
1820
}
1821
1822
if (con->owner && con->save_callstack && verbose > 0) {
1823
struct rb_root root = RB_ROOT;
1824
1825
if (symbol_conf.field_sep)
1826
fprintf(lock_output, "# owner stack trace:\n");
1827
else
1828
fprintf(lock_output, "\n=== owner stack trace ===\n\n");
1829
while ((st = pop_owner_stack_trace(con)))
1830
insert_to(&root, st, compare);
1831
1832
while ((st = pop_from(&root))) {
1833
print_lock_stat(con, st);
1834
free(st);
1835
}
1836
}
1837
1838
if (print_nr_entries) {
1839
/* update the total/bad stats */
1840
while ((st = pop_from_result())) {
1841
total += use_bpf ? st->nr_contended : 1;
1842
if (st->broken)
1843
bad++;
1844
}
1845
}
1846
/* some entries are collected but hidden by the callstack filter */
1847
total += con->nr_filtered;
1848
1849
print_footer(total, bad, &con->fails);
1850
}
1851
1852
static bool force;
1853
1854
static int __cmd_report(bool display_info)
1855
{
1856
int err = -EINVAL;
1857
struct perf_tool eops;
1858
struct perf_data data = {
1859
.path = input_name,
1860
.mode = PERF_DATA_MODE_READ,
1861
.force = force,
1862
};
1863
1864
perf_tool__init(&eops, /*ordered_events=*/true);
1865
eops.attr = perf_event__process_attr;
1866
eops.event_update = process_event_update;
1867
eops.sample = process_sample_event;
1868
eops.comm = perf_event__process_comm;
1869
eops.mmap = perf_event__process_mmap;
1870
eops.namespaces = perf_event__process_namespaces;
1871
eops.tracing_data = perf_event__process_tracing_data;
1872
session = perf_session__new(&data, &eops);
1873
if (IS_ERR(session)) {
1874
pr_err("Initializing perf session failed\n");
1875
return PTR_ERR(session);
1876
}
1877
1878
symbol_conf.allow_aliases = true;
1879
symbol__init(perf_session__env(session));
1880
1881
if (!data.is_pipe) {
1882
if (!perf_session__has_traces(session, "lock record"))
1883
goto out_delete;
1884
1885
if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1886
pr_err("Initializing perf session tracepoint handlers failed\n");
1887
goto out_delete;
1888
}
1889
1890
if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1891
pr_err("Initializing perf session tracepoint handlers failed\n");
1892
goto out_delete;
1893
}
1894
}
1895
1896
if (setup_output_field(false, output_fields))
1897
goto out_delete;
1898
1899
if (select_key(false))
1900
goto out_delete;
1901
1902
if (show_thread_stats)
1903
aggr_mode = LOCK_AGGR_TASK;
1904
1905
err = perf_session__process_events(session);
1906
if (err)
1907
goto out_delete;
1908
1909
setup_pager();
1910
if (display_info) /* used for info subcommand */
1911
dump_info();
1912
else {
1913
combine_result();
1914
sort_result();
1915
print_result();
1916
}
1917
1918
out_delete:
1919
perf_session__delete(session);
1920
return err;
1921
}
1922
1923
static void sighandler(int sig __maybe_unused)
1924
{
1925
}
1926
1927
static int check_lock_contention_options(const struct option *options,
1928
const char * const *usage)
1929
1930
{
1931
if (show_thread_stats && show_lock_addrs) {
1932
pr_err("Cannot use thread and addr mode together\n");
1933
parse_options_usage(usage, options, "threads", 0);
1934
parse_options_usage(NULL, options, "lock-addr", 0);
1935
return -1;
1936
}
1937
1938
if (show_lock_owner && !use_bpf) {
1939
pr_err("Lock owners are available only with BPF\n");
1940
parse_options_usage(usage, options, "lock-owner", 0);
1941
parse_options_usage(NULL, options, "use-bpf", 0);
1942
return -1;
1943
}
1944
1945
if (show_lock_owner && show_lock_addrs) {
1946
pr_err("Cannot use owner and addr mode together\n");
1947
parse_options_usage(usage, options, "lock-owner", 0);
1948
parse_options_usage(NULL, options, "lock-addr", 0);
1949
return -1;
1950
}
1951
1952
if (show_lock_cgroups && !use_bpf) {
1953
pr_err("Cgroups are available only with BPF\n");
1954
parse_options_usage(usage, options, "lock-cgroup", 0);
1955
parse_options_usage(NULL, options, "use-bpf", 0);
1956
return -1;
1957
}
1958
1959
if (show_lock_cgroups && show_lock_addrs) {
1960
pr_err("Cannot use cgroup and addr mode together\n");
1961
parse_options_usage(usage, options, "lock-cgroup", 0);
1962
parse_options_usage(NULL, options, "lock-addr", 0);
1963
return -1;
1964
}
1965
1966
if (show_lock_cgroups && show_thread_stats) {
1967
pr_err("Cannot use cgroup and thread mode together\n");
1968
parse_options_usage(usage, options, "lock-cgroup", 0);
1969
parse_options_usage(NULL, options, "threads", 0);
1970
return -1;
1971
}
1972
1973
if (symbol_conf.field_sep) {
1974
if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1975
strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1976
strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1977
pr_err("Cannot use the separator that is already used\n");
1978
parse_options_usage(usage, options, "x", 1);
1979
return -1;
1980
}
1981
}
1982
1983
if (show_lock_owner && !show_thread_stats) {
1984
pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n");
1985
pr_warning("Please use -t option too to keep the old behavior.\n");
1986
}
1987
1988
return 0;
1989
}
1990
1991
static int __cmd_contention(int argc, const char **argv)
1992
{
1993
int err = -EINVAL;
1994
struct perf_tool eops;
1995
struct perf_data data = {
1996
.path = input_name,
1997
.mode = PERF_DATA_MODE_READ,
1998
.force = force,
1999
};
2000
struct lock_contention con = {
2001
.target = &target,
2002
.map_nr_entries = bpf_map_entries,
2003
.max_stack = max_stack_depth,
2004
.stack_skip = stack_skip,
2005
.filters = &filters,
2006
.delays = delays,
2007
.nr_delays = nr_delays,
2008
.save_callstack = needs_callstack(),
2009
.owner = show_lock_owner,
2010
.cgroups = RB_ROOT,
2011
};
2012
2013
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2014
if (!lockhash_table)
2015
return -ENOMEM;
2016
2017
con.result = &lockhash_table[0];
2018
2019
perf_tool__init(&eops, /*ordered_events=*/true);
2020
eops.attr = perf_event__process_attr;
2021
eops.event_update = process_event_update;
2022
eops.sample = process_sample_event;
2023
eops.comm = perf_event__process_comm;
2024
eops.mmap = perf_event__process_mmap;
2025
eops.tracing_data = perf_event__process_tracing_data;
2026
2027
session = perf_session__new(use_bpf ? NULL : &data, &eops);
2028
if (IS_ERR(session)) {
2029
pr_err("Initializing perf session failed\n");
2030
err = PTR_ERR(session);
2031
session = NULL;
2032
goto out_delete;
2033
}
2034
2035
con.machine = &session->machines.host;
2036
2037
con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2038
show_lock_addrs ? LOCK_AGGR_ADDR :
2039
show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2040
2041
if (con.aggr_mode == LOCK_AGGR_CALLER)
2042
con.save_callstack = true;
2043
2044
symbol_conf.allow_aliases = true;
2045
symbol__init(perf_session__env(session));
2046
2047
if (use_bpf) {
2048
err = target__validate(&target);
2049
if (err) {
2050
char errbuf[512];
2051
2052
target__strerror(&target, err, errbuf, 512);
2053
pr_err("%s\n", errbuf);
2054
goto out_delete;
2055
}
2056
2057
signal(SIGINT, sighandler);
2058
signal(SIGCHLD, sighandler);
2059
signal(SIGTERM, sighandler);
2060
2061
con.evlist = evlist__new();
2062
if (con.evlist == NULL) {
2063
err = -ENOMEM;
2064
goto out_delete;
2065
}
2066
2067
err = evlist__create_maps(con.evlist, &target);
2068
if (err < 0)
2069
goto out_delete;
2070
2071
if (argc) {
2072
err = evlist__prepare_workload(con.evlist, &target,
2073
argv, false, NULL);
2074
if (err < 0)
2075
goto out_delete;
2076
}
2077
2078
err = lock_contention_prepare(&con);
2079
if (err < 0) {
2080
pr_err("lock contention BPF setup failed\n");
2081
goto out_delete;
2082
}
2083
} else if (!data.is_pipe) {
2084
if (!perf_session__has_traces(session, "lock record"))
2085
goto out_delete;
2086
2087
if (!evlist__find_evsel_by_str(session->evlist,
2088
"lock:contention_begin")) {
2089
pr_err("lock contention evsel not found\n");
2090
goto out_delete;
2091
}
2092
2093
if (perf_session__set_tracepoints_handlers(session,
2094
contention_tracepoints)) {
2095
pr_err("Initializing perf session tracepoint handlers failed\n");
2096
goto out_delete;
2097
}
2098
}
2099
2100
err = setup_output_field(true, output_fields);
2101
if (err) {
2102
pr_err("Failed to setup output field\n");
2103
goto out_delete;
2104
}
2105
2106
err = select_key(true);
2107
if (err)
2108
goto out_delete;
2109
2110
if (symbol_conf.field_sep) {
2111
int i;
2112
struct lock_key *keys = contention_keys;
2113
2114
/* do not align output in CSV format */
2115
for (i = 0; keys[i].name; i++)
2116
keys[i].len = 0;
2117
}
2118
2119
if (use_bpf) {
2120
lock_contention_start();
2121
if (argc)
2122
evlist__start_workload(con.evlist);
2123
2124
/* wait for signal */
2125
pause();
2126
2127
lock_contention_stop();
2128
lock_contention_read(&con);
2129
} else {
2130
err = perf_session__process_events(session);
2131
if (err)
2132
goto out_delete;
2133
}
2134
2135
setup_pager();
2136
2137
sort_contention_result();
2138
print_contention_result(&con);
2139
2140
out_delete:
2141
lock_filter_finish();
2142
evlist__delete(con.evlist);
2143
lock_contention_finish(&con);
2144
perf_session__delete(session);
2145
zfree(&lockhash_table);
2146
return err;
2147
}
2148
2149
2150
static int __cmd_record(int argc, const char **argv)
2151
{
2152
const char *record_args[] = {
2153
"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2154
};
2155
const char *callgraph_args[] = {
2156
"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2157
};
2158
unsigned int rec_argc, i, j, ret;
2159
unsigned int nr_tracepoints;
2160
unsigned int nr_callgraph_args = 0;
2161
const char **rec_argv;
2162
bool has_lock_stat = true;
2163
2164
for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2165
if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2166
pr_debug("tracepoint %s is not enabled. "
2167
"Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2168
lock_tracepoints[i].name);
2169
has_lock_stat = false;
2170
break;
2171
}
2172
}
2173
2174
if (has_lock_stat)
2175
goto setup_args;
2176
2177
for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2178
if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2179
pr_err("tracepoint %s is not enabled.\n",
2180
contention_tracepoints[i].name);
2181
return 1;
2182
}
2183
}
2184
2185
nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2186
2187
setup_args:
2188
rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2189
2190
if (has_lock_stat)
2191
nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2192
else
2193
nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2194
2195
/* factor of 2 is for -e in front of each tracepoint */
2196
rec_argc += 2 * nr_tracepoints;
2197
2198
rec_argv = calloc(rec_argc + 1, sizeof(char *));
2199
if (!rec_argv)
2200
return -ENOMEM;
2201
2202
for (i = 0; i < ARRAY_SIZE(record_args); i++)
2203
rec_argv[i] = record_args[i];
2204
2205
for (j = 0; j < nr_tracepoints; j++) {
2206
rec_argv[i++] = "-e";
2207
rec_argv[i++] = has_lock_stat
2208
? lock_tracepoints[j].name
2209
: contention_tracepoints[j].name;
2210
}
2211
2212
for (j = 0; j < nr_callgraph_args; j++, i++)
2213
rec_argv[i] = callgraph_args[j];
2214
2215
for (j = 1; j < (unsigned int)argc; j++, i++)
2216
rec_argv[i] = argv[j];
2217
2218
BUG_ON(i != rec_argc);
2219
2220
ret = cmd_record(i, rec_argv);
2221
free(rec_argv);
2222
return ret;
2223
}
2224
2225
static int parse_map_entry(const struct option *opt, const char *str,
2226
int unset __maybe_unused)
2227
{
2228
unsigned long *len = (unsigned long *)opt->value;
2229
unsigned long val;
2230
char *endptr;
2231
2232
errno = 0;
2233
val = strtoul(str, &endptr, 0);
2234
if (*endptr != '\0' || errno != 0) {
2235
pr_err("invalid BPF map length: %s\n", str);
2236
return -1;
2237
}
2238
2239
*len = val;
2240
return 0;
2241
}
2242
2243
static int parse_max_stack(const struct option *opt, const char *str,
2244
int unset __maybe_unused)
2245
{
2246
unsigned long *len = (unsigned long *)opt->value;
2247
long val;
2248
char *endptr;
2249
2250
errno = 0;
2251
val = strtol(str, &endptr, 0);
2252
if (*endptr != '\0' || errno != 0) {
2253
pr_err("invalid max stack depth: %s\n", str);
2254
return -1;
2255
}
2256
2257
if (val < 0 || val > sysctl__max_stack()) {
2258
pr_err("invalid max stack depth: %ld\n", val);
2259
return -1;
2260
}
2261
2262
*len = val;
2263
return 0;
2264
}
2265
2266
static bool add_lock_type(unsigned int flags)
2267
{
2268
unsigned int *tmp;
2269
2270
tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2271
if (tmp == NULL)
2272
return false;
2273
2274
tmp[filters.nr_types++] = flags;
2275
filters.types = tmp;
2276
return true;
2277
}
2278
2279
static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2280
int unset __maybe_unused)
2281
{
2282
char *s, *tmp, *tok;
2283
2284
s = strdup(str);
2285
if (s == NULL)
2286
return -1;
2287
2288
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2289
bool found = false;
2290
2291
/* `tok` is a flags name if it contains ':'. */
2292
if (strchr(tok, ':')) {
2293
for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2294
if (!strcmp(lock_type_table[i].flags_name, tok) &&
2295
add_lock_type(lock_type_table[i].flags)) {
2296
found = true;
2297
break;
2298
}
2299
}
2300
2301
if (!found) {
2302
pr_err("Unknown lock flags name: %s\n", tok);
2303
free(s);
2304
return -1;
2305
}
2306
2307
continue;
2308
}
2309
2310
/*
2311
* Otherwise `tok` is a lock name.
2312
* Single lock name could contain multiple flags.
2313
* Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2314
*/
2315
if (!strcmp(tok, "pcpu-sem"))
2316
tok = (char *)"percpu-rwsem";
2317
for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2318
if (!strcmp(lock_type_table[i].lock_name, tok)) {
2319
if (add_lock_type(lock_type_table[i].flags)) {
2320
found = true;
2321
} else {
2322
free(s);
2323
return -1;
2324
}
2325
}
2326
}
2327
2328
if (!found) {
2329
pr_err("Unknown lock name: %s\n", tok);
2330
free(s);
2331
return -1;
2332
}
2333
2334
}
2335
2336
free(s);
2337
return 0;
2338
}
2339
2340
static bool add_lock_addr(unsigned long addr)
2341
{
2342
unsigned long *tmp;
2343
2344
tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2345
if (tmp == NULL) {
2346
pr_err("Memory allocation failure\n");
2347
return false;
2348
}
2349
2350
tmp[filters.nr_addrs++] = addr;
2351
filters.addrs = tmp;
2352
return true;
2353
}
2354
2355
static bool add_lock_sym(char *name)
2356
{
2357
char **tmp;
2358
char *sym = strdup(name);
2359
2360
if (sym == NULL) {
2361
pr_err("Memory allocation failure\n");
2362
return false;
2363
}
2364
2365
tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2366
if (tmp == NULL) {
2367
pr_err("Memory allocation failure\n");
2368
free(sym);
2369
return false;
2370
}
2371
2372
tmp[filters.nr_syms++] = sym;
2373
filters.syms = tmp;
2374
return true;
2375
}
2376
2377
static bool add_lock_slab(char *name)
2378
{
2379
char **tmp;
2380
char *sym = strdup(name);
2381
2382
if (sym == NULL) {
2383
pr_err("Memory allocation failure\n");
2384
return false;
2385
}
2386
2387
tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2388
if (tmp == NULL) {
2389
pr_err("Memory allocation failure\n");
2390
return false;
2391
}
2392
2393
tmp[filters.nr_slabs++] = sym;
2394
filters.slabs = tmp;
2395
return true;
2396
}
2397
2398
static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2399
int unset __maybe_unused)
2400
{
2401
char *s, *tmp, *tok;
2402
int ret = 0;
2403
u64 addr;
2404
2405
s = strdup(str);
2406
if (s == NULL)
2407
return -1;
2408
2409
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2410
char *end;
2411
2412
addr = strtoul(tok, &end, 16);
2413
if (*end == '\0') {
2414
if (!add_lock_addr(addr)) {
2415
ret = -1;
2416
break;
2417
}
2418
continue;
2419
}
2420
2421
if (*tok == '&') {
2422
if (!add_lock_slab(tok + 1)) {
2423
ret = -1;
2424
break;
2425
}
2426
continue;
2427
}
2428
2429
/*
2430
* At this moment, we don't have kernel symbols. Save the symbols
2431
* in a separate list and resolve them to addresses later.
2432
*/
2433
if (!add_lock_sym(tok)) {
2434
ret = -1;
2435
break;
2436
}
2437
}
2438
2439
free(s);
2440
return ret;
2441
}
2442
2443
static int parse_output(const struct option *opt __maybe_unused, const char *str,
2444
int unset __maybe_unused)
2445
{
2446
const char **name = (const char **)opt->value;
2447
2448
if (str == NULL)
2449
return -1;
2450
2451
lock_output = fopen(str, "w");
2452
if (lock_output == NULL) {
2453
pr_err("Cannot open %s\n", str);
2454
return -1;
2455
}
2456
2457
*name = str;
2458
return 0;
2459
}
2460
2461
static bool add_lock_cgroup(char *name)
2462
{
2463
u64 *tmp;
2464
struct cgroup *cgrp;
2465
2466
cgrp = cgroup__new(name, /*do_open=*/false);
2467
if (cgrp == NULL) {
2468
pr_err("Failed to create cgroup: %s\n", name);
2469
return false;
2470
}
2471
2472
if (read_cgroup_id(cgrp) < 0) {
2473
pr_err("Failed to read cgroup id for %s\n", name);
2474
cgroup__put(cgrp);
2475
return false;
2476
}
2477
2478
tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2479
if (tmp == NULL) {
2480
pr_err("Memory allocation failure\n");
2481
return false;
2482
}
2483
2484
tmp[filters.nr_cgrps++] = cgrp->id;
2485
filters.cgrps = tmp;
2486
cgroup__put(cgrp);
2487
return true;
2488
}
2489
2490
static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2491
int unset __maybe_unused)
2492
{
2493
char *s, *tmp, *tok;
2494
int ret = 0;
2495
2496
s = strdup(str);
2497
if (s == NULL)
2498
return -1;
2499
2500
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2501
if (!add_lock_cgroup(tok)) {
2502
ret = -1;
2503
break;
2504
}
2505
}
2506
2507
free(s);
2508
return ret;
2509
}
2510
2511
static bool add_lock_delay(char *spec)
2512
{
2513
char *at, *pos;
2514
struct lock_delay *tmp;
2515
unsigned long duration;
2516
2517
at = strchr(spec, '@');
2518
if (at == NULL) {
2519
pr_err("lock delay should have '@' sign: %s\n", spec);
2520
return false;
2521
}
2522
if (at == spec) {
2523
pr_err("lock delay should have time before '@': %s\n", spec);
2524
return false;
2525
}
2526
2527
*at = '\0';
2528
duration = strtoul(spec, &pos, 0);
2529
if (!strcmp(pos, "ns"))
2530
duration *= 1;
2531
else if (!strcmp(pos, "us"))
2532
duration *= 1000;
2533
else if (!strcmp(pos, "ms"))
2534
duration *= 1000 * 1000;
2535
else if (*pos) {
2536
pr_err("invalid delay time: %s@%s\n", spec, at + 1);
2537
return false;
2538
}
2539
2540
if (duration > 10 * 1000 * 1000) {
2541
pr_err("lock delay is too long: %s (> 10ms)\n", spec);
2542
return false;
2543
}
2544
2545
tmp = realloc(delays, (nr_delays + 1) * sizeof(*delays));
2546
if (tmp == NULL) {
2547
pr_err("Memory allocation failure\n");
2548
return false;
2549
}
2550
delays = tmp;
2551
2552
delays[nr_delays].sym = strdup(at + 1);
2553
if (delays[nr_delays].sym == NULL) {
2554
pr_err("Memory allocation failure\n");
2555
return false;
2556
}
2557
delays[nr_delays].time = duration;
2558
2559
nr_delays++;
2560
return true;
2561
}
2562
2563
static int parse_lock_delay(const struct option *opt __maybe_unused, const char *str,
2564
int unset __maybe_unused)
2565
{
2566
char *s, *tmp, *tok;
2567
int ret = 0;
2568
2569
s = strdup(str);
2570
if (s == NULL)
2571
return -1;
2572
2573
for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2574
if (!add_lock_delay(tok)) {
2575
ret = -1;
2576
break;
2577
}
2578
}
2579
2580
free(s);
2581
return ret;
2582
}
2583
2584
int cmd_lock(int argc, const char **argv)
2585
{
2586
const struct option lock_options[] = {
2587
OPT_STRING('i', "input", &input_name, "file", "input file name"),
2588
OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2589
OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2590
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2591
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2592
OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2593
"file", "vmlinux pathname"),
2594
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2595
"file", "kallsyms pathname"),
2596
OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2597
OPT_END()
2598
};
2599
2600
const struct option info_options[] = {
2601
OPT_BOOLEAN('t', "threads", &info_threads,
2602
"dump the thread list in perf.data"),
2603
OPT_BOOLEAN('m', "map", &info_map,
2604
"dump the map of lock instances (address:name table)"),
2605
OPT_PARENT(lock_options)
2606
};
2607
2608
const struct option report_options[] = {
2609
OPT_STRING('k', "key", &sort_key, "acquired",
2610
"key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2611
OPT_STRING('F', "field", &output_fields, NULL,
2612
"output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2613
/* TODO: type */
2614
OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2615
"combine locks in the same class"),
2616
OPT_BOOLEAN('t', "threads", &show_thread_stats,
2617
"show per-thread lock stats"),
2618
OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2619
OPT_PARENT(lock_options)
2620
};
2621
2622
struct option contention_options[] = {
2623
OPT_STRING('k', "key", &sort_key, "wait_total",
2624
"key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2625
OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2626
"output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2627
OPT_BOOLEAN('t', "threads", &show_thread_stats,
2628
"show per-thread lock stats"),
2629
OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2630
OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2631
"System-wide collection from all CPUs"),
2632
OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2633
"List of cpus to monitor"),
2634
OPT_STRING('p', "pid", &target.pid, "pid",
2635
"Trace on existing process id"),
2636
OPT_STRING(0, "tid", &target.tid, "tid",
2637
"Trace on existing thread id (exclusive to --pid)"),
2638
OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2639
"Max number of BPF map entries", parse_map_entry),
2640
OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2641
"Set the maximum stack depth when collecting lock contention, "
2642
"Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2643
OPT_INTEGER(0, "stack-skip", &stack_skip,
2644
"Set the number of stack depth to skip when finding a lock caller, "
2645
"Default: " __stringify(CONTENTION_STACK_SKIP)),
2646
OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2647
OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2648
OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2649
"Filter specific type of locks", parse_lock_type),
2650
OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2651
"Filter specific address/symbol of locks", parse_lock_addr),
2652
OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2653
"Filter specific function in the callstack", parse_call_stack),
2654
OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2655
OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2656
"print result in CSV format with custom separator"),
2657
OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2658
OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2659
"Filter specific cgroups", parse_cgroup_filter),
2660
OPT_CALLBACK('J', "inject-delay", NULL, "TIME@FUNC",
2661
"Inject delays to specific locks", parse_lock_delay),
2662
OPT_PARENT(lock_options)
2663
};
2664
2665
const char * const info_usage[] = {
2666
"perf lock info [<options>]",
2667
NULL
2668
};
2669
const char *const lock_subcommands[] = { "record", "report", "script",
2670
"info", "contention", NULL };
2671
const char *lock_usage[] = {
2672
NULL,
2673
NULL
2674
};
2675
const char * const report_usage[] = {
2676
"perf lock report [<options>]",
2677
NULL
2678
};
2679
const char * const contention_usage[] = {
2680
"perf lock contention [<options>]",
2681
NULL
2682
};
2683
unsigned int i;
2684
int rc = 0;
2685
2686
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2687
if (!lockhash_table)
2688
return -ENOMEM;
2689
2690
for (i = 0; i < LOCKHASH_SIZE; i++)
2691
INIT_HLIST_HEAD(lockhash_table + i);
2692
2693
lock_output = stderr;
2694
argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2695
lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2696
if (!argc)
2697
usage_with_options(lock_usage, lock_options);
2698
2699
if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2700
return __cmd_record(argc, argv);
2701
} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2702
trace_handler = &report_lock_ops;
2703
if (argc) {
2704
argc = parse_options(argc, argv,
2705
report_options, report_usage, 0);
2706
if (argc)
2707
usage_with_options(report_usage, report_options);
2708
}
2709
rc = __cmd_report(false);
2710
} else if (!strcmp(argv[0], "script")) {
2711
/* Aliased to 'perf script' */
2712
rc = cmd_script(argc, argv);
2713
} else if (!strcmp(argv[0], "info")) {
2714
if (argc) {
2715
argc = parse_options(argc, argv,
2716
info_options, info_usage, 0);
2717
if (argc)
2718
usage_with_options(info_usage, info_options);
2719
}
2720
2721
/* If neither threads nor map requested, display both */
2722
if (!info_threads && !info_map) {
2723
info_threads = true;
2724
info_map = true;
2725
}
2726
2727
/* recycling report_lock_ops */
2728
trace_handler = &report_lock_ops;
2729
rc = __cmd_report(true);
2730
} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2731
trace_handler = &contention_lock_ops;
2732
sort_key = "wait_total";
2733
output_fields = "contended,wait_total,wait_max,avg_wait";
2734
2735
#ifndef HAVE_BPF_SKEL
2736
set_option_nobuild(contention_options, 'b', "use-bpf",
2737
"no BUILD_BPF_SKEL=1", false);
2738
#endif
2739
if (argc) {
2740
argc = parse_options(argc, argv, contention_options,
2741
contention_usage, 0);
2742
}
2743
2744
if (check_lock_contention_options(contention_options,
2745
contention_usage) < 0)
2746
return -1;
2747
2748
rc = __cmd_contention(argc, argv);
2749
} else {
2750
usage_with_options(lock_usage, lock_options);
2751
}
2752
2753
/* free usage string allocated by parse_options_subcommand */
2754
free((void *)lock_usage[0]);
2755
2756
zfree(&lockhash_table);
2757
return rc;
2758
}
2759
2760