Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c
39488 views
1
/*
2
* CDDL HEADER START
3
*
4
* The contents of this file are subject to the terms of the
5
* Common Development and Distribution License (the "License").
6
* You may not use this file except in compliance with the License.
7
*
8
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9
* or http://www.opensolaris.org/os/licensing.
10
* See the License for the specific language governing permissions
11
* and limitations under the License.
12
*
13
* When distributing Covered Code, include this CDDL HEADER in each
14
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15
* If applicable, add the following below this CDDL HEADER, with the
16
* fields enclosed by brackets "[]" replaced with your own identifying
17
* information: Portions Copyright [yyyy] [name of copyright owner]
18
*
19
* CDDL HEADER END
20
*/
21
/*
22
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23
* Use is subject to license terms.
24
*/
25
26
#pragma ident "%Z%%M% %I% %E% SMI"
27
28
#include <stdio.h>
29
#include <stddef.h>
30
#include <stdlib.h>
31
#include <stdarg.h>
32
#include <string.h>
33
#include <strings.h>
34
#include <ctype.h>
35
#include <fcntl.h>
36
#include <unistd.h>
37
#include <errno.h>
38
#include <limits.h>
39
#include <sys/types.h>
40
#include <sys/modctl.h>
41
#include <sys/stat.h>
42
#include <sys/wait.h>
43
#include <dtrace.h>
44
#include <sys/lockstat.h>
45
#include <alloca.h>
46
#include <signal.h>
47
#include <assert.h>
48
49
#ifdef illumos
50
#define GETOPT_EOF EOF
51
#else
52
#include <sys/time.h>
53
#include <sys/resource.h>
54
55
#define mergesort(a, b, c, d) lsmergesort(a, b, c, d)
56
#define GETOPT_EOF (-1)
57
58
typedef uintptr_t pc_t;
59
#endif
60
61
#define LOCKSTAT_OPTSTR "x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
62
63
#define LS_MAX_STACK_DEPTH 50
64
#define LS_MAX_EVENTS 64
65
66
typedef struct lsrec {
67
struct lsrec *ls_next; /* next in hash chain */
68
#ifdef illumos
69
uintptr_t ls_lock; /* lock address */
70
#else
71
char *ls_lock; /* lock name */
72
#endif
73
uintptr_t ls_caller; /* caller address */
74
uint32_t ls_count; /* cumulative event count */
75
uint32_t ls_event; /* type of event */
76
uintptr_t ls_refcnt; /* cumulative reference count */
77
uint64_t ls_time; /* cumulative event duration */
78
uint32_t ls_hist[64]; /* log2(duration) histogram */
79
uintptr_t ls_stack[LS_MAX_STACK_DEPTH];
80
} lsrec_t;
81
82
typedef struct lsdata {
83
struct lsrec *lsd_next; /* next available */
84
int lsd_count; /* number of records */
85
} lsdata_t;
86
87
/*
88
* Definitions for the types of experiments which can be run. They are
89
* listed in increasing order of memory cost and processing time cost.
90
* The numerical value of each type is the number of bytes needed per record.
91
*/
92
#define LS_BASIC offsetof(lsrec_t, ls_time)
93
#define LS_TIME offsetof(lsrec_t, ls_hist[0])
94
#define LS_HIST offsetof(lsrec_t, ls_stack[0])
95
#define LS_STACK(depth) offsetof(lsrec_t, ls_stack[depth])
96
97
static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
98
static void report_trace(FILE *, lsrec_t **);
99
100
extern int symtab_init(void);
101
extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
102
extern uintptr_t sym_to_addr(char *name);
103
extern size_t sym_size(char *name);
104
extern char *strtok_r(char *, const char *, char **);
105
106
#define DEFAULT_NRECS 10000
107
#define DEFAULT_HZ 97
108
#define MAX_HZ 1000
109
#define MIN_AGGSIZE (16 * 1024)
110
#define MAX_AGGSIZE (32 * 1024 * 1024)
111
112
static int g_stkdepth;
113
static int g_topn = INT_MAX;
114
static hrtime_t g_elapsed;
115
static int g_rates = 0;
116
static int g_pflag = 0;
117
static int g_Pflag = 0;
118
static int g_wflag = 0;
119
static int g_Wflag = 0;
120
static int g_cflag = 0;
121
static int g_kflag = 0;
122
static int g_gflag = 0;
123
static int g_Vflag = 0;
124
static int g_tracing = 0;
125
static size_t g_recsize;
126
static size_t g_nrecs;
127
static int g_nrecs_used;
128
static uchar_t g_enabled[LS_MAX_EVENTS];
129
static hrtime_t g_min_duration[LS_MAX_EVENTS];
130
static dtrace_hdl_t *g_dtp;
131
static char *g_predicate;
132
static char *g_ipredicate;
133
static char *g_prog;
134
static int g_proglen;
135
static int g_dropped;
136
137
typedef struct ls_event_info {
138
char ev_type;
139
char ev_lhdr[20];
140
char ev_desc[80];
141
char ev_units[10];
142
char ev_name[DTRACE_NAMELEN];
143
char *ev_predicate;
144
char *ev_acquire;
145
} ls_event_info_t;
146
147
static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
148
{ 'C', "Lock", "Adaptive mutex spin", "nsec",
149
"lockstat:::adaptive-spin" },
150
{ 'C', "Lock", "Adaptive mutex block", "nsec",
151
"lockstat:::adaptive-block" },
152
{ 'C', "Lock", "Spin lock spin", "nsec",
153
"lockstat:::spin-spin" },
154
{ 'C', "Lock", "Thread lock spin", "nsec",
155
"lockstat:::thread-spin" },
156
{ 'C', "Lock", "R/W writer blocked by writer", "nsec",
157
"lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
158
{ 'C', "Lock", "R/W writer blocked by readers", "nsec",
159
"lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
160
{ 'C', "Lock", "R/W reader blocked by writer", "nsec",
161
"lockstat:::rw-block", "arg2 == 1 && arg3 == 1" },
162
{ 'C', "Lock", "R/W reader blocked by write wanted", "nsec",
163
"lockstat:::rw-block", "arg2 == 1 && arg3 == 0 && arg4" },
164
{ 'C', "Lock", "R/W writer spin on writer", "nsec",
165
"lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
166
{ 'C', "Lock", "R/W writer spin on readers", "nsec",
167
"lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
168
{ 'C', "Lock", "R/W reader spin on writer", "nsec",
169
"lockstat:::rw-spin", "arg2 == 1 && arg3 == 1" },
170
{ 'C', "Lock", "R/W reader spin on write wanted", "nsec",
171
"lockstat:::rw-spin", "arg2 == 1 && arg3 == 0 && arg4" },
172
{ 'C', "Lock", "SX exclusive block", "nsec",
173
"lockstat:::sx-block", "arg2 == 0" },
174
{ 'C', "Lock", "SX shared block", "nsec",
175
"lockstat:::sx-block", "arg2 == 1" },
176
{ 'C', "Lock", "SX exclusive spin", "nsec",
177
"lockstat:::sx-spin", "arg2 == 0" },
178
{ 'C', "Lock", "SX shared spin", "nsec",
179
"lockstat:::sx-spin", "arg2 == 1" },
180
{ 'C', "Lock", "lockmgr writer blocked by writer", "nsec",
181
"lockstat:::lockmgr-block", "arg2 == 0 && arg3 == 1" },
182
{ 'C', "Lock", "lockmgr writer blocked by readers", "nsec",
183
"lockstat:::lockmgr-block", "arg2 == 0 && arg3 == 0 && arg4" },
184
{ 'C', "Lock", "lockmgr reader blocked by writer", "nsec",
185
"lockstat:::lockmgr-block", "arg2 == 1 && arg3 == 1" },
186
{ 'C', "Lock", "lockmgr reader blocked by write wanted", "nsec",
187
"lockstat:::lockmgr-block", "arg2 == 1 && arg3 == 0 && arg4" },
188
{ 'C', "Lock", "Unknown event (type 20)", "units" },
189
{ 'C', "Lock", "Unknown event (type 21)", "units" },
190
{ 'C', "Lock", "Unknown event (type 22)", "units" },
191
{ 'C', "Lock", "Unknown event (type 23)", "units" },
192
{ 'C', "Lock", "Unknown event (type 24)", "units" },
193
{ 'C', "Lock", "Unknown event (type 25)", "units" },
194
{ 'C', "Lock", "Unknown event (type 26)", "units" },
195
{ 'C', "Lock", "Unknown event (type 27)", "units" },
196
{ 'C', "Lock", "Unknown event (type 28)", "units" },
197
{ 'C', "Lock", "Unknown event (type 29)", "units" },
198
{ 'C', "Lock", "Unknown event (type 30)", "units" },
199
{ 'C', "Lock", "Unknown event (type 31)", "units" },
200
{ 'H', "Lock", "Adaptive mutex hold", "nsec",
201
"lockstat:::adaptive-release", NULL,
202
"lockstat:::adaptive-acquire" },
203
{ 'H', "Lock", "Spin lock hold", "nsec",
204
"lockstat:::spin-release", NULL,
205
"lockstat:::spin-acquire" },
206
{ 'H', "Lock", "R/W writer hold", "nsec",
207
"lockstat:::rw-release", "arg1 == 0",
208
"lockstat:::rw-acquire" },
209
{ 'H', "Lock", "R/W reader hold", "nsec",
210
"lockstat:::rw-release", "arg1 == 1",
211
"lockstat:::rw-acquire" },
212
{ 'H', "Lock", "SX shared hold", "nsec",
213
"lockstat:::sx-release", "arg1 == 1",
214
"lockstat:::sx-acquire" },
215
{ 'H', "Lock", "SX exclusive hold", "nsec",
216
"lockstat:::sx-release", "arg1 == 0",
217
"lockstat:::sx-acquire" },
218
{ 'H', "Lock", "lockmgr shared hold", "nsec",
219
"lockstat:::lockmgr-release", "arg1 == 1",
220
"lockstat:::lockmgr-acquire" },
221
{ 'H', "Lock", "lockmgr exclusive hold", "nsec",
222
"lockstat:::lockmgr-release,lockstat:::lockmgr-disown", "arg1 == 0",
223
"lockstat:::lockmgr-acquire" },
224
{ 'H', "Lock", "Unknown event (type 40)", "units" },
225
{ 'H', "Lock", "Unknown event (type 41)", "units" },
226
{ 'H', "Lock", "Unknown event (type 42)", "units" },
227
{ 'H', "Lock", "Unknown event (type 43)", "units" },
228
{ 'H', "Lock", "Unknown event (type 44)", "units" },
229
{ 'H', "Lock", "Unknown event (type 45)", "units" },
230
{ 'H', "Lock", "Unknown event (type 46)", "units" },
231
{ 'H', "Lock", "Unknown event (type 47)", "units" },
232
{ 'H', "Lock", "Unknown event (type 48)", "units" },
233
{ 'H', "Lock", "Unknown event (type 49)", "units" },
234
{ 'H', "Lock", "Unknown event (type 50)", "units" },
235
{ 'H', "Lock", "Unknown event (type 51)", "units" },
236
{ 'H', "Lock", "Unknown event (type 52)", "units" },
237
{ 'H', "Lock", "Unknown event (type 53)", "units" },
238
{ 'H', "Lock", "Unknown event (type 54)", "units" },
239
{ 'H', "Lock", "Unknown event (type 55)", "units" },
240
#ifdef illumos
241
{ 'I', "CPU+PIL", "Profiling interrupt", "nsec",
242
#else
243
{ 'I', "CPU+Pri_Class", "Profiling interrupt", "nsec",
244
#endif
245
"profile:::profile-97", NULL },
246
{ 'I', "Lock", "Unknown event (type 57)", "units" },
247
{ 'I', "Lock", "Unknown event (type 58)", "units" },
248
{ 'I', "Lock", "Unknown event (type 59)", "units" },
249
{ 'E', "Lock", "Recursive lock entry detected", "(N/A)",
250
"lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
251
{ 'E', "Lock", "Lockstat enter failure", "(N/A)" },
252
{ 'E', "Lock", "Lockstat exit failure", "nsec" },
253
{ 'E', "Lock", "Lockstat record failure", "(N/A)" },
254
};
255
256
#ifndef illumos
257
static char *g_pri_class[] = {
258
"",
259
"Intr",
260
"RealT",
261
"TShar",
262
"Idle"
263
};
264
#endif
265
266
static void
267
fail(int do_perror, const char *message, ...)
268
{
269
va_list args;
270
int save_errno = errno;
271
272
va_start(args, message);
273
(void) fprintf(stderr, "lockstat: ");
274
(void) vfprintf(stderr, message, args);
275
va_end(args);
276
if (do_perror)
277
(void) fprintf(stderr, ": %s", strerror(save_errno));
278
(void) fprintf(stderr, "\n");
279
exit(2);
280
}
281
282
static void
283
dfail(const char *message, ...)
284
{
285
va_list args;
286
287
va_start(args, message);
288
(void) fprintf(stderr, "lockstat: ");
289
(void) vfprintf(stderr, message, args);
290
va_end(args);
291
(void) fprintf(stderr, ": %s\n",
292
dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
293
294
exit(2);
295
}
296
297
static void
298
show_events(char event_type, char *desc)
299
{
300
int i, first = -1, last;
301
302
for (i = 0; i < LS_MAX_EVENTS; i++) {
303
ls_event_info_t *evp = &g_event_info[i];
304
if (evp->ev_type != event_type ||
305
strncmp(evp->ev_desc, "Unknown event", 13) == 0)
306
continue;
307
if (first == -1)
308
first = i;
309
last = i;
310
}
311
312
(void) fprintf(stderr,
313
"\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
314
desc, event_type, first, last);
315
316
for (i = first; i <= last; i++)
317
(void) fprintf(stderr,
318
"%4d = %s\n", i, g_event_info[i].ev_desc);
319
}
320
321
static void
322
usage(void)
323
{
324
(void) fprintf(stderr,
325
"Usage: lockstat [options] command [args]\n"
326
"\nGeneral options:\n\n"
327
" -V print the corresponding D program\n"
328
"\nEvent selection options:\n\n"
329
" -C watch contention events [on by default]\n"
330
" -E watch error events [off by default]\n"
331
" -H watch hold events [off by default]\n"
332
" -I watch interrupt events [off by default]\n"
333
" -A watch all lock events [equivalent to -CH]\n"
334
" -e event_list only watch the specified events (shown below);\n"
335
" <event_list> is a comma-separated list of\n"
336
" events or ranges of events, e.g. 1,4-7,35\n"
337
" -i rate interrupt rate for -I [default: %d Hz]\n"
338
"\nData gathering options:\n\n"
339
" -b basic statistics (lock, caller, event count)\n"
340
" -t timing for all events [default]\n"
341
" -h histograms for event times\n"
342
" -s depth stack traces <depth> deep\n"
343
" -x opt[=val] enable or modify DTrace options\n"
344
"\nData filtering options:\n\n"
345
" -n nrecords maximum number of data records [default: %d]\n"
346
" -l lock[,size] only watch <lock>, which can be specified as a\n"
347
" symbolic name or hex address; <size> defaults\n"
348
" to the ELF symbol size if available, 1 if not\n"
349
" -f func[,size] only watch events generated by <func>\n"
350
" -d duration only watch events longer than <duration>\n"
351
" -T trace (rather than sample) events\n"
352
"\nData reporting options:\n\n"
353
#ifdef illumos
354
" -c coalesce lock data for arrays like pse_mutex[]\n"
355
#endif
356
" -k coalesce PCs within functions\n"
357
" -g show total events generated by function\n"
358
" -w wherever: don't distinguish events by caller\n"
359
" -W whichever: don't distinguish events by lock\n"
360
" -R display rates rather than counts\n"
361
" -p parsable output format (awk(1)-friendly)\n"
362
" -P sort lock data by (count * avg_time) product\n"
363
" -D n only display top <n> events of each type\n"
364
" -o filename send output to <filename>\n",
365
DEFAULT_HZ, DEFAULT_NRECS);
366
367
show_events('C', "Contention");
368
show_events('H', "Hold-time");
369
show_events('I', "Interrupt");
370
show_events('E', "Error");
371
(void) fprintf(stderr, "\n");
372
373
exit(1);
374
}
375
376
static int
377
lockcmp(lsrec_t *a, lsrec_t *b)
378
{
379
int i;
380
381
if (a->ls_event < b->ls_event)
382
return (-1);
383
if (a->ls_event > b->ls_event)
384
return (1);
385
386
for (i = g_stkdepth - 1; i >= 0; i--) {
387
if (a->ls_stack[i] < b->ls_stack[i])
388
return (-1);
389
if (a->ls_stack[i] > b->ls_stack[i])
390
return (1);
391
}
392
393
if (a->ls_caller < b->ls_caller)
394
return (-1);
395
if (a->ls_caller > b->ls_caller)
396
return (1);
397
398
#ifdef illumos
399
if (a->ls_lock < b->ls_lock)
400
return (-1);
401
if (a->ls_lock > b->ls_lock)
402
return (1);
403
404
return (0);
405
#else
406
return (strcmp(a->ls_lock, b->ls_lock));
407
#endif
408
}
409
410
static int
411
countcmp(lsrec_t *a, lsrec_t *b)
412
{
413
if (a->ls_event < b->ls_event)
414
return (-1);
415
if (a->ls_event > b->ls_event)
416
return (1);
417
418
return (b->ls_count - a->ls_count);
419
}
420
421
static int
422
timecmp(lsrec_t *a, lsrec_t *b)
423
{
424
if (a->ls_event < b->ls_event)
425
return (-1);
426
if (a->ls_event > b->ls_event)
427
return (1);
428
429
if (a->ls_time < b->ls_time)
430
return (1);
431
if (a->ls_time > b->ls_time)
432
return (-1);
433
434
return (0);
435
}
436
437
static int
438
lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
439
{
440
if (a->ls_event < b->ls_event)
441
return (-1);
442
if (a->ls_event > b->ls_event)
443
return (1);
444
445
#ifdef illumos
446
if (a->ls_lock < b->ls_lock)
447
return (-1);
448
if (a->ls_lock > b->ls_lock)
449
return (1);
450
451
return (0);
452
#else
453
return (strcmp(a->ls_lock, b->ls_lock));
454
#endif
455
}
456
457
static int
458
lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
459
{
460
#ifndef illumos
461
int cmp;
462
#endif
463
464
if (a->ls_event < b->ls_event)
465
return (-1);
466
if (a->ls_event > b->ls_event)
467
return (1);
468
469
#ifdef illumos
470
if (a->ls_lock < b->ls_lock)
471
return (-1);
472
if (a->ls_lock > b->ls_lock)
473
return (1);
474
#else
475
cmp = strcmp(a->ls_lock, b->ls_lock);
476
if (cmp != 0)
477
return (cmp);
478
#endif
479
480
return (b->ls_count - a->ls_count);
481
}
482
483
static int
484
sitecmp_anylock(lsrec_t *a, lsrec_t *b)
485
{
486
int i;
487
488
if (a->ls_event < b->ls_event)
489
return (-1);
490
if (a->ls_event > b->ls_event)
491
return (1);
492
493
for (i = g_stkdepth - 1; i >= 0; i--) {
494
if (a->ls_stack[i] < b->ls_stack[i])
495
return (-1);
496
if (a->ls_stack[i] > b->ls_stack[i])
497
return (1);
498
}
499
500
if (a->ls_caller < b->ls_caller)
501
return (-1);
502
if (a->ls_caller > b->ls_caller)
503
return (1);
504
505
return (0);
506
}
507
508
static int
509
site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
510
{
511
int i;
512
513
if (a->ls_event < b->ls_event)
514
return (-1);
515
if (a->ls_event > b->ls_event)
516
return (1);
517
518
for (i = g_stkdepth - 1; i >= 0; i--) {
519
if (a->ls_stack[i] < b->ls_stack[i])
520
return (-1);
521
if (a->ls_stack[i] > b->ls_stack[i])
522
return (1);
523
}
524
525
if (a->ls_caller < b->ls_caller)
526
return (-1);
527
if (a->ls_caller > b->ls_caller)
528
return (1);
529
530
return (b->ls_count - a->ls_count);
531
}
532
533
static void
534
lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
535
{
536
int m = n / 2;
537
int i, j;
538
539
if (m > 1)
540
lsmergesort(cmp, a, b, m);
541
if (n - m > 1)
542
lsmergesort(cmp, a + m, b + m, n - m);
543
for (i = m; i > 0; i--)
544
b[i - 1] = a[i - 1];
545
for (j = m - 1; j < n - 1; j++)
546
b[n + m - j - 2] = a[j + 1];
547
while (i < j)
548
*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
549
*a = b[i];
550
}
551
552
static void
553
coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
554
{
555
int i, j;
556
lsrec_t *target, *current;
557
558
target = lock[0];
559
560
for (i = 1; i < n; i++) {
561
current = lock[i];
562
if (cmp(current, target) != 0) {
563
target = current;
564
continue;
565
}
566
current->ls_event = LS_MAX_EVENTS;
567
target->ls_count += current->ls_count;
568
target->ls_refcnt += current->ls_refcnt;
569
if (g_recsize < LS_TIME)
570
continue;
571
target->ls_time += current->ls_time;
572
if (g_recsize < LS_HIST)
573
continue;
574
for (j = 0; j < 64; j++)
575
target->ls_hist[j] += current->ls_hist[j];
576
}
577
}
578
579
static void
580
coalesce_symbol(uintptr_t *addrp)
581
{
582
uintptr_t symoff;
583
size_t symsize;
584
585
if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
586
*addrp -= symoff;
587
}
588
589
static void
590
predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
591
{
592
char *new;
593
int len, newlen;
594
595
if (what == NULL)
596
return;
597
598
if (*pred == NULL) {
599
*pred = malloc(1);
600
*pred[0] = '\0';
601
}
602
603
len = strlen(*pred);
604
newlen = len + strlen(what) + 32 + strlen("( && )");
605
new = malloc(newlen);
606
607
if (*pred[0] != '\0') {
608
if (cmp != NULL) {
609
(void) sprintf(new, "(%s) && (%s %s %p)",
610
*pred, what, cmp, (void *)value);
611
} else {
612
(void) sprintf(new, "(%s) && (%s)", *pred, what);
613
}
614
} else {
615
if (cmp != NULL) {
616
(void) sprintf(new, "%s %s %p",
617
what, cmp, (void *)value);
618
} else {
619
(void) sprintf(new, "%s", what);
620
}
621
}
622
623
free(*pred);
624
*pred = new;
625
}
626
627
static void
628
predicate_destroy(char **pred)
629
{
630
free(*pred);
631
*pred = NULL;
632
}
633
634
static void
635
filter_add(char **filt, char *what, uintptr_t base, size_t size)
636
{
637
char buf[256], *c = buf, *new;
638
int len, newlen;
639
640
if (*filt == NULL) {
641
*filt = malloc(1);
642
*filt[0] = '\0';
643
}
644
645
#ifdef illumos
646
(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
647
" || " : "", what, (void *)base, what, (void *)(base + size));
648
#else
649
(void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
650
" || " : "", what, (void *)base, what, (void *)(base + size));
651
#endif
652
653
newlen = (len = strlen(*filt) + 1) + strlen(c);
654
new = malloc(newlen);
655
bcopy(*filt, new, len);
656
(void) strcat(new, c);
657
free(*filt);
658
*filt = new;
659
}
660
661
static void
662
filter_destroy(char **filt)
663
{
664
free(*filt);
665
*filt = NULL;
666
}
667
668
static void
669
dprog_add(const char *fmt, ...)
670
{
671
va_list args;
672
int size, offs;
673
char c;
674
675
va_start(args, fmt);
676
size = vsnprintf(&c, 1, fmt, args) + 1;
677
va_end(args);
678
679
if (g_proglen == 0) {
680
offs = 0;
681
} else {
682
offs = g_proglen - 1;
683
}
684
685
g_proglen = offs + size;
686
687
if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
688
fail(1, "failed to reallocate program text");
689
690
va_start(args, fmt);
691
(void) vsnprintf(&g_prog[offs], size, fmt, args);
692
va_end(args);
693
}
694
695
/*
696
* This function may read like an open sewer, but keep in mind that programs
697
* that generate other programs are rarely pretty. If one has the unenviable
698
* task of maintaining or -- worse -- extending this code, use the -V option
699
* to examine the D program as generated by this function.
700
*/
701
static void
702
dprog_addevent(int event)
703
{
704
ls_event_info_t *info = &g_event_info[event];
705
char *pred = NULL;
706
char stack[20];
707
const char *arg0, *caller;
708
char *arg1 = "arg1";
709
char buf[80];
710
hrtime_t dur;
711
int depth;
712
713
if (info->ev_name[0] == '\0')
714
return;
715
716
if (info->ev_type == 'I') {
717
/*
718
* For interrupt events, arg0 (normally the lock pointer) is
719
* the CPU address plus the current pil, and arg1 (normally
720
* the number of nanoseconds) is the number of nanoseconds
721
* late -- and it's stored in arg2.
722
*/
723
#ifdef illumos
724
arg0 = "(uintptr_t)curthread->t_cpu + \n"
725
"\t curthread->t_cpu->cpu_profile_pil";
726
#else
727
arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
728
"\t 0x01000000 + curthread->td_pri_class";
729
#endif
730
caller = "(uintptr_t)arg0";
731
arg1 = "arg2";
732
} else {
733
#ifdef illumos
734
arg0 = "(uintptr_t)arg0";
735
#else
736
arg0 = "stringof(args[0]->lock_object.lo_name)";
737
#endif
738
caller = "caller";
739
}
740
741
if (g_recsize > LS_HIST) {
742
for (depth = 0; g_recsize > LS_STACK(depth); depth++)
743
continue;
744
745
if (g_tracing) {
746
(void) sprintf(stack, "\tstack(%d);\n", depth);
747
} else {
748
(void) sprintf(stack, ", stack(%d)", depth);
749
}
750
} else {
751
(void) sprintf(stack, "");
752
}
753
754
if (info->ev_acquire != NULL) {
755
/*
756
* If this is a hold event, we need to generate an additional
757
* clause for the acquire; the clause for the release will be
758
* generated with the aggregating statement, below.
759
*/
760
dprog_add("%s\n", info->ev_acquire);
761
predicate_add(&pred, info->ev_predicate, NULL, 0);
762
predicate_add(&pred, g_predicate, NULL, 0);
763
if (pred != NULL)
764
dprog_add("/%s/\n", pred);
765
766
dprog_add("{\n");
767
(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
768
769
if (info->ev_type == 'H') {
770
dprog_add("\t%s = timestamp;\n", buf);
771
} else {
772
/*
773
* If this isn't a hold event, it's the recursive
774
* error event. For this, we simply bump the
775
* thread-local, per-lock count.
776
*/
777
dprog_add("\t%s++;\n", buf);
778
}
779
780
dprog_add("}\n\n");
781
predicate_destroy(&pred);
782
pred = NULL;
783
784
if (info->ev_type == 'E') {
785
/*
786
* If this is the recursive lock error event, we need
787
* to generate an additional clause to decrement the
788
* thread-local, per-lock count. This assures that we
789
* only execute the aggregating clause if we have
790
* recursive entry.
791
*/
792
dprog_add("%s\n", info->ev_name);
793
dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
794
}
795
796
predicate_add(&pred, buf, NULL, 0);
797
798
if (info->ev_type == 'H') {
799
(void) sprintf(buf, "timestamp -\n\t "
800
"self->ev%d[(uintptr_t)arg0]", event);
801
}
802
803
arg1 = buf;
804
} else {
805
predicate_add(&pred, info->ev_predicate, NULL, 0);
806
if (info->ev_type != 'I')
807
predicate_add(&pred, g_predicate, NULL, 0);
808
else
809
predicate_add(&pred, g_ipredicate, NULL, 0);
810
}
811
812
if ((dur = g_min_duration[event]) != 0)
813
predicate_add(&pred, arg1, ">=", dur);
814
815
dprog_add("%s\n", info->ev_name);
816
817
if (pred != NULL)
818
dprog_add("/%s/\n", pred);
819
predicate_destroy(&pred);
820
821
dprog_add("{\n");
822
823
if (g_tracing) {
824
dprog_add("\ttrace(%dULL);\n", event);
825
dprog_add("\ttrace(%s);\n", arg0);
826
dprog_add("\ttrace(%s);\n", caller);
827
dprog_add(stack);
828
} else {
829
/*
830
* The ordering here is important: when we process the
831
* aggregate, we count on the fact that @avg appears before
832
* @hist in program order to assure that @avg is assigned the
833
* first aggregation variable ID and @hist assigned the
834
* second; see the comment in process_aggregate() for details.
835
*/
836
dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
837
event, arg0, caller, stack, arg1);
838
839
if (g_recsize >= LS_HIST) {
840
dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
841
"(%s);\n", event, arg0, caller, stack, arg1);
842
}
843
}
844
845
if (info->ev_acquire != NULL)
846
dprog_add("\tself->ev%d[arg0] = 0;\n", event);
847
848
dprog_add("}\n\n");
849
}
850
851
static void
852
dprog_compile()
853
{
854
dtrace_prog_t *prog;
855
dtrace_proginfo_t info;
856
857
if (g_Vflag) {
858
(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
859
(void) fputs(g_prog, stderr);
860
(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
861
}
862
863
if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
864
DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
865
dfail("failed to compile program");
866
867
if (dtrace_program_exec(g_dtp, prog, &info) == -1)
868
dfail("failed to enable probes");
869
870
if (dtrace_go(g_dtp) != 0)
871
dfail("couldn't start tracing");
872
}
873
874
static void
875
#ifdef illumos
876
status_fire(void)
877
#else
878
status_fire(int i)
879
#endif
880
{}
881
882
static void
883
status_init(void)
884
{
885
dtrace_optval_t val, status, agg;
886
struct sigaction act;
887
struct itimerspec ts;
888
struct sigevent ev;
889
timer_t tid;
890
891
if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
892
dfail("failed to get 'statusrate'");
893
894
if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
895
dfail("failed to get 'statusrate'");
896
897
/*
898
* We would want to awaken at a rate that is the GCD of the statusrate
899
* and the aggrate -- but that seems a bit absurd. Instead, we'll
900
* simply awaken at a rate that is the more frequent of the two, which
901
* assures that we're never later than the interval implied by the
902
* more frequent rate.
903
*/
904
val = status < agg ? status : agg;
905
906
(void) sigemptyset(&act.sa_mask);
907
act.sa_flags = 0;
908
act.sa_handler = status_fire;
909
(void) sigaction(SIGUSR1, &act, NULL);
910
911
ev.sigev_notify = SIGEV_SIGNAL;
912
ev.sigev_signo = SIGUSR1;
913
914
if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
915
dfail("cannot create CLOCK_REALTIME timer");
916
917
ts.it_value.tv_sec = val / NANOSEC;
918
ts.it_value.tv_nsec = val % NANOSEC;
919
ts.it_interval = ts.it_value;
920
921
if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
922
dfail("cannot set time on CLOCK_REALTIME timer");
923
}
924
925
static void
926
status_check(void)
927
{
928
if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
929
dfail("failed to snap aggregate");
930
931
if (dtrace_status(g_dtp) == -1)
932
dfail("dtrace_status()");
933
}
934
935
static void
936
lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
937
{
938
bzero(lsrec, g_recsize);
939
lsrec->ls_count = 1;
940
941
if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
942
fail(0, "truncated DTrace record");
943
944
if (rec->dtrd_size != sizeof (uint64_t))
945
fail(0, "bad event size in first record");
946
947
/* LINTED - alignment */
948
lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
949
rec++;
950
951
#ifdef illumos
952
if (rec->dtrd_size != sizeof (uintptr_t))
953
fail(0, "bad lock address size in second record");
954
955
/* LINTED - alignment */
956
lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
957
rec++;
958
#else
959
lsrec->ls_lock = strdup((const char *)(data + rec->dtrd_offset));
960
rec++;
961
#endif
962
963
if (rec->dtrd_size != sizeof (uintptr_t))
964
fail(0, "bad caller size in third record");
965
966
/* LINTED - alignment */
967
lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
968
rec++;
969
970
if (g_recsize > LS_HIST) {
971
int frames, i;
972
pc_t *stack;
973
974
frames = rec->dtrd_size / sizeof (pc_t);
975
/* LINTED - alignment */
976
stack = (pc_t *)(data + rec->dtrd_offset);
977
978
for (i = 1; i < frames; i++)
979
lsrec->ls_stack[i - 1] = stack[i];
980
}
981
}
982
983
/*ARGSUSED*/
984
static int
985
count_aggregate(const dtrace_aggdata_t *agg, void *arg)
986
{
987
*((size_t *)arg) += 1;
988
989
return (DTRACE_AGGWALK_NEXT);
990
}
991
992
static int
993
process_aggregate(const dtrace_aggdata_t *agg, void *arg)
994
{
995
const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
996
caddr_t data = agg->dtada_data;
997
lsdata_t *lsdata = arg;
998
lsrec_t *lsrec = lsdata->lsd_next;
999
const dtrace_recdesc_t *rec;
1000
uint64_t *avg, *quantized;
1001
int i, j;
1002
1003
assert(lsdata->lsd_count < g_nrecs);
1004
1005
/*
1006
* Aggregation variable IDs are guaranteed to be generated in program
1007
* order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
1008
* plus one. As "avg" appears before "hist" in program order, we know
1009
* that "avg" will be allocated the first aggregation variable ID, and
1010
* "hist" will be allocated the second aggregation variable ID -- and
1011
* we therefore use the aggregation variable ID to differentiate the
1012
* cases.
1013
*/
1014
if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
1015
/*
1016
* If this is the histogram entry. We'll copy the quantized
1017
* data into lc_hist, and jump over the rest.
1018
*/
1019
rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1020
1021
if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
1022
fail(0, "bad variable ID in aggregation record");
1023
1024
if (rec->dtrd_size !=
1025
DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
1026
fail(0, "bad quantize size in aggregation record");
1027
1028
/* LINTED - alignment */
1029
quantized = (uint64_t *)(data + rec->dtrd_offset);
1030
1031
for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
1032
i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
1033
lsrec->ls_hist[j] = quantized[i];
1034
1035
goto out;
1036
}
1037
1038
lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
1039
aggdesc->dtagd_nrecs - 1, data);
1040
1041
rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1042
1043
if (rec->dtrd_size != 2 * sizeof (uint64_t))
1044
fail(0, "bad avg size in aggregation record");
1045
1046
/* LINTED - alignment */
1047
avg = (uint64_t *)(data + rec->dtrd_offset);
1048
lsrec->ls_count = (uint32_t)avg[0];
1049
lsrec->ls_time = (uintptr_t)avg[1];
1050
1051
if (g_recsize >= LS_HIST)
1052
return (DTRACE_AGGWALK_NEXT);
1053
1054
out:
1055
lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1056
lsdata->lsd_count++;
1057
1058
return (DTRACE_AGGWALK_NEXT);
1059
}
1060
1061
static int
1062
process_trace(const dtrace_probedata_t *pdata, void *arg)
1063
{
1064
lsdata_t *lsdata = arg;
1065
lsrec_t *lsrec = lsdata->lsd_next;
1066
dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
1067
caddr_t data = pdata->dtpda_data;
1068
1069
if (lsdata->lsd_count >= g_nrecs)
1070
return (DTRACE_CONSUME_NEXT);
1071
1072
lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
1073
1074
lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1075
lsdata->lsd_count++;
1076
1077
return (DTRACE_CONSUME_NEXT);
1078
}
1079
1080
static int
1081
process_data(FILE *out, char *data)
1082
{
1083
lsdata_t lsdata;
1084
1085
/* LINTED - alignment */
1086
lsdata.lsd_next = (lsrec_t *)data;
1087
lsdata.lsd_count = 0;
1088
1089
if (g_tracing) {
1090
if (dtrace_consume(g_dtp, out,
1091
process_trace, NULL, &lsdata) != 0)
1092
dfail("failed to consume buffer");
1093
1094
return (lsdata.lsd_count);
1095
}
1096
1097
if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
1098
process_aggregate, &lsdata) != 0)
1099
dfail("failed to walk aggregate");
1100
1101
return (lsdata.lsd_count);
1102
}
1103
1104
/*ARGSUSED*/
1105
static int
1106
drophandler(const dtrace_dropdata_t *data, void *arg)
1107
{
1108
g_dropped++;
1109
(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
1110
return (DTRACE_HANDLE_OK);
1111
}
1112
1113
int
1114
main(int argc, char **argv)
1115
{
1116
char *data_buf;
1117
lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
1118
FILE *out = stdout;
1119
int c;
1120
pid_t child;
1121
int status;
1122
int i, j;
1123
hrtime_t duration;
1124
char *addrp, *offp, *sizep, *evp, *lastp, *p;
1125
uintptr_t addr;
1126
size_t size, off;
1127
int events_specified = 0;
1128
int exec_errno = 0;
1129
uint32_t event;
1130
char *filt = NULL, *ifilt = NULL;
1131
static uint64_t ev_count[LS_MAX_EVENTS + 1];
1132
static uint64_t ev_time[LS_MAX_EVENTS + 1];
1133
dtrace_optval_t aggsize;
1134
char aggstr[10];
1135
long ncpus;
1136
int dynvar = 0;
1137
int err;
1138
1139
if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
1140
fail(0, "cannot open dtrace library: %s",
1141
dtrace_errmsg(NULL, err));
1142
}
1143
1144
if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
1145
dfail("couldn't establish drop handler");
1146
1147
if (symtab_init() == -1)
1148
fail(1, "can't load kernel symbols");
1149
1150
g_nrecs = DEFAULT_NRECS;
1151
1152
while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1153
switch (c) {
1154
case 'b':
1155
g_recsize = LS_BASIC;
1156
break;
1157
1158
case 't':
1159
g_recsize = LS_TIME;
1160
break;
1161
1162
case 'h':
1163
g_recsize = LS_HIST;
1164
break;
1165
1166
case 's':
1167
if (!isdigit(optarg[0]))
1168
usage();
1169
g_stkdepth = atoi(optarg);
1170
if (g_stkdepth > LS_MAX_STACK_DEPTH)
1171
fail(0, "max stack depth is %d",
1172
LS_MAX_STACK_DEPTH);
1173
g_recsize = LS_STACK(g_stkdepth);
1174
break;
1175
1176
case 'n':
1177
if (!isdigit(optarg[0]))
1178
usage();
1179
g_nrecs = atoi(optarg);
1180
break;
1181
1182
case 'd':
1183
if (!isdigit(optarg[0]))
1184
usage();
1185
duration = atoll(optarg);
1186
1187
/*
1188
* XXX -- durations really should be per event
1189
* since the units are different, but it's hard
1190
* to express this nicely in the interface.
1191
* Not clear yet what the cleanest solution is.
1192
*/
1193
for (i = 0; i < LS_MAX_EVENTS; i++)
1194
if (g_event_info[i].ev_type != 'E')
1195
g_min_duration[i] = duration;
1196
1197
break;
1198
1199
case 'i':
1200
if (!isdigit(optarg[0]))
1201
usage();
1202
i = atoi(optarg);
1203
if (i <= 0)
1204
usage();
1205
if (i > MAX_HZ)
1206
fail(0, "max interrupt rate is %d Hz", MAX_HZ);
1207
1208
for (j = 0; j < LS_MAX_EVENTS; j++)
1209
if (strcmp(g_event_info[j].ev_desc,
1210
"Profiling interrupt") == 0)
1211
break;
1212
1213
(void) sprintf(g_event_info[j].ev_name,
1214
"profile:::profile-%d", i);
1215
break;
1216
1217
case 'l':
1218
case 'f':
1219
addrp = strtok(optarg, ",");
1220
sizep = strtok(NULL, ",");
1221
addrp = strtok(optarg, ",+");
1222
offp = strtok(NULL, ",");
1223
1224
size = sizep ? strtoul(sizep, NULL, 0) : 1;
1225
off = offp ? strtoul(offp, NULL, 0) : 0;
1226
1227
if (addrp[0] == '0') {
1228
addr = strtoul(addrp, NULL, 16) + off;
1229
} else {
1230
addr = sym_to_addr(addrp) + off;
1231
if (sizep == NULL)
1232
size = sym_size(addrp) - off;
1233
if (addr - off == 0)
1234
fail(0, "symbol '%s' not found", addrp);
1235
if (size == 0)
1236
size = 1;
1237
}
1238
1239
1240
if (c == 'l') {
1241
filter_add(&filt, "arg0", addr, size);
1242
} else {
1243
filter_add(&filt, "caller", addr, size);
1244
filter_add(&ifilt, "arg0", addr, size);
1245
}
1246
break;
1247
1248
case 'e':
1249
evp = strtok_r(optarg, ",", &lastp);
1250
while (evp) {
1251
int ev1, ev2;
1252
char *evp2;
1253
1254
(void) strtok(evp, "-");
1255
evp2 = strtok(NULL, "-");
1256
ev1 = atoi(evp);
1257
ev2 = evp2 ? atoi(evp2) : ev1;
1258
if ((uint_t)ev1 >= LS_MAX_EVENTS ||
1259
(uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
1260
fail(0, "-e events out of range");
1261
for (i = ev1; i <= ev2; i++)
1262
g_enabled[i] = 1;
1263
evp = strtok_r(NULL, ",", &lastp);
1264
}
1265
events_specified = 1;
1266
break;
1267
1268
#ifdef illumos
1269
case 'c':
1270
g_cflag = 1;
1271
break;
1272
#endif
1273
1274
case 'k':
1275
g_kflag = 1;
1276
break;
1277
1278
case 'w':
1279
g_wflag = 1;
1280
break;
1281
1282
case 'W':
1283
g_Wflag = 1;
1284
break;
1285
1286
case 'g':
1287
g_gflag = 1;
1288
break;
1289
1290
case 'C':
1291
case 'E':
1292
case 'H':
1293
case 'I':
1294
for (i = 0; i < LS_MAX_EVENTS; i++)
1295
if (g_event_info[i].ev_type == c)
1296
g_enabled[i] = 1;
1297
events_specified = 1;
1298
break;
1299
1300
case 'A':
1301
for (i = 0; i < LS_MAX_EVENTS; i++)
1302
if (strchr("CH", g_event_info[i].ev_type))
1303
g_enabled[i] = 1;
1304
events_specified = 1;
1305
break;
1306
1307
case 'T':
1308
g_tracing = 1;
1309
break;
1310
1311
case 'D':
1312
if (!isdigit(optarg[0]))
1313
usage();
1314
g_topn = atoi(optarg);
1315
break;
1316
1317
case 'R':
1318
g_rates = 1;
1319
break;
1320
1321
case 'p':
1322
g_pflag = 1;
1323
break;
1324
1325
case 'P':
1326
g_Pflag = 1;
1327
break;
1328
1329
case 'o':
1330
if ((out = fopen(optarg, "w")) == NULL)
1331
fail(1, "error opening file");
1332
break;
1333
1334
case 'V':
1335
g_Vflag = 1;
1336
break;
1337
1338
default:
1339
if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
1340
usage();
1341
}
1342
}
1343
1344
if (filt != NULL) {
1345
predicate_add(&g_predicate, filt, NULL, 0);
1346
filter_destroy(&filt);
1347
}
1348
1349
if (ifilt != NULL) {
1350
predicate_add(&g_ipredicate, ifilt, NULL, 0);
1351
filter_destroy(&ifilt);
1352
}
1353
1354
if (g_recsize == 0) {
1355
if (g_gflag) {
1356
g_stkdepth = LS_MAX_STACK_DEPTH;
1357
g_recsize = LS_STACK(g_stkdepth);
1358
} else {
1359
g_recsize = LS_TIME;
1360
}
1361
}
1362
1363
if (g_gflag && g_recsize <= LS_STACK(0))
1364
fail(0, "'-g' requires at least '-s 1' data gathering");
1365
1366
/*
1367
* Make sure the alignment is reasonable
1368
*/
1369
g_recsize = -(-g_recsize & -sizeof (uint64_t));
1370
1371
for (i = 0; i < LS_MAX_EVENTS; i++) {
1372
/*
1373
* If no events were specified, enable -C.
1374
*/
1375
if (!events_specified && g_event_info[i].ev_type == 'C')
1376
g_enabled[i] = 1;
1377
}
1378
1379
for (i = 0; i < LS_MAX_EVENTS; i++) {
1380
if (!g_enabled[i])
1381
continue;
1382
1383
if (g_event_info[i].ev_acquire != NULL) {
1384
/*
1385
* If we've enabled a hold event, we must explicitly
1386
* allocate dynamic variable space.
1387
*/
1388
dynvar = 1;
1389
}
1390
1391
dprog_addevent(i);
1392
}
1393
1394
/*
1395
* Make sure there are remaining arguments to specify a child command
1396
* to execute.
1397
*/
1398
if (argc <= optind)
1399
usage();
1400
1401
if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
1402
dfail("couldn't determine number of online CPUs");
1403
1404
/*
1405
* By default, we set our data buffer size to be the number of records
1406
* multiplied by the size of the record, doubled to account for some
1407
* DTrace slop and divided by the number of CPUs. We silently clamp
1408
* the aggregation size at both a minimum and a maximum to prevent
1409
* absurdly low or high values.
1410
*/
1411
if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
1412
aggsize = MIN_AGGSIZE;
1413
1414
if (aggsize > MAX_AGGSIZE)
1415
aggsize = MAX_AGGSIZE;
1416
1417
(void) sprintf(aggstr, "%lld", (long long)aggsize);
1418
1419
if (!g_tracing) {
1420
if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
1421
dfail("failed to set 'bufsize'");
1422
1423
if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
1424
dfail("failed to set 'aggsize'");
1425
1426
if (dynvar) {
1427
/*
1428
* If we're using dynamic variables, we set our
1429
* dynamic variable size to be one megabyte per CPU,
1430
* with a hard-limit of 32 megabytes. This may still
1431
* be too small in some cases, but it can be tuned
1432
* manually via -x if need be.
1433
*/
1434
(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
1435
1436
if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
1437
dfail("failed to set 'dynvarsize'");
1438
}
1439
} else {
1440
if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
1441
dfail("failed to set 'bufsize'");
1442
}
1443
1444
if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
1445
dfail("failed to set 'statusrate'");
1446
1447
optind = 1;
1448
while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1449
switch (c) {
1450
case 'x':
1451
if ((p = strchr(optarg, '=')) != NULL)
1452
*p++ = '\0';
1453
1454
if (dtrace_setopt(g_dtp, optarg, p) != 0)
1455
dfail("failed to set -x %s", optarg);
1456
break;
1457
}
1458
}
1459
1460
argc -= optind;
1461
argv += optind;
1462
1463
dprog_compile();
1464
status_init();
1465
1466
g_elapsed = -gethrtime();
1467
1468
/*
1469
* Spawn the specified command and wait for it to complete.
1470
*/
1471
child = fork();
1472
if (child == -1)
1473
fail(1, "cannot fork");
1474
if (child == 0) {
1475
(void) dtrace_close(g_dtp);
1476
(void) execvp(argv[0], &argv[0]);
1477
exec_errno = errno;
1478
exit(127);
1479
}
1480
1481
#ifdef illumos
1482
while (waitpid(child, &status, WEXITED) != child)
1483
#else
1484
while (waitpid(child, &status, 0) != child)
1485
#endif
1486
status_check();
1487
1488
g_elapsed += gethrtime();
1489
1490
if (WIFEXITED(status)) {
1491
if (WEXITSTATUS(status) != 0) {
1492
if (exec_errno != 0) {
1493
errno = exec_errno;
1494
fail(1, "could not execute %s", argv[0]);
1495
}
1496
(void) fprintf(stderr,
1497
"lockstat: warning: %s exited with code %d\n",
1498
argv[0], WEXITSTATUS(status));
1499
}
1500
} else {
1501
(void) fprintf(stderr,
1502
"lockstat: warning: %s died on signal %d\n",
1503
argv[0], WTERMSIG(status));
1504
}
1505
1506
if (dtrace_stop(g_dtp) == -1)
1507
dfail("failed to stop dtrace");
1508
1509
/*
1510
* Before we read out the results, we need to allocate our buffer.
1511
* If we're tracing, then we'll just use the precalculated size. If
1512
* we're not, then we'll take a snapshot of the aggregate, and walk
1513
* it to count the number of records.
1514
*/
1515
if (!g_tracing) {
1516
if (dtrace_aggregate_snap(g_dtp) != 0)
1517
dfail("failed to snap aggregate");
1518
1519
g_nrecs = 0;
1520
1521
if (dtrace_aggregate_walk(g_dtp,
1522
count_aggregate, &g_nrecs) != 0)
1523
dfail("failed to walk aggregate");
1524
}
1525
1526
#ifdef illumos
1527
if ((data_buf = memalign(sizeof (uint64_t),
1528
(g_nrecs + 1) * g_recsize)) == NULL)
1529
#else
1530
if (posix_memalign((void **)&data_buf, sizeof (uint64_t),
1531
(g_nrecs + 1) * g_recsize) )
1532
#endif
1533
fail(1, "Memory allocation failed");
1534
1535
/*
1536
* Read out the DTrace data.
1537
*/
1538
g_nrecs_used = process_data(out, data_buf);
1539
1540
if (g_nrecs_used > g_nrecs || g_dropped)
1541
(void) fprintf(stderr, "lockstat: warning: "
1542
"ran out of data records (use -n for more)\n");
1543
1544
/* LINTED - alignment */
1545
for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1546
/* LINTED - alignment */
1547
lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1548
ev_count[lsp->ls_event] += lsp->ls_count;
1549
ev_time[lsp->ls_event] += lsp->ls_time;
1550
}
1551
1552
/*
1553
* If -g was specified, convert stacks into individual records.
1554
*/
1555
if (g_gflag) {
1556
lsrec_t *newlsp, *oldlsp;
1557
1558
#ifdef illumos
1559
newlsp = memalign(sizeof (uint64_t),
1560
g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1561
#else
1562
posix_memalign((void **)&newlsp, sizeof (uint64_t),
1563
g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1564
#endif
1565
if (newlsp == NULL)
1566
fail(1, "Cannot allocate space for -g processing");
1567
lsp = newlsp;
1568
/* LINTED - alignment */
1569
for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1570
/* LINTED - alignment */
1571
oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
1572
int fr;
1573
int caller_in_stack = 0;
1574
1575
if (oldlsp->ls_count == 0)
1576
continue;
1577
1578
for (fr = 0; fr < g_stkdepth; fr++) {
1579
if (oldlsp->ls_stack[fr] == 0)
1580
break;
1581
if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
1582
caller_in_stack = 1;
1583
bcopy(oldlsp, lsp, LS_TIME);
1584
lsp->ls_caller = oldlsp->ls_stack[fr];
1585
#ifndef illumos
1586
lsp->ls_lock = strdup(oldlsp->ls_lock);
1587
#endif
1588
/* LINTED - alignment */
1589
lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1590
}
1591
if (!caller_in_stack) {
1592
bcopy(oldlsp, lsp, LS_TIME);
1593
/* LINTED - alignment */
1594
lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1595
}
1596
#ifndef illumos
1597
free(oldlsp->ls_lock);
1598
#endif
1599
}
1600
g_nrecs = g_nrecs_used =
1601
((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
1602
g_recsize = LS_TIME;
1603
g_stkdepth = 0;
1604
free(data_buf);
1605
data_buf = (char *)newlsp;
1606
}
1607
1608
if ((sort_buf = calloc(2 * (g_nrecs + 1),
1609
sizeof (void *))) == NULL)
1610
fail(1, "Sort buffer allocation failed");
1611
merge_buf = sort_buf + (g_nrecs + 1);
1612
1613
/*
1614
* Build the sort buffer, discarding zero-count records along the way.
1615
*/
1616
/* LINTED - alignment */
1617
for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1618
/* LINTED - alignment */
1619
lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1620
if (lsp->ls_count == 0)
1621
lsp->ls_event = LS_MAX_EVENTS;
1622
sort_buf[i] = lsp;
1623
}
1624
1625
if (g_nrecs_used == 0)
1626
exit(0);
1627
1628
/*
1629
* Add a sentinel after the last record
1630
*/
1631
sort_buf[i] = lsp;
1632
lsp->ls_event = LS_MAX_EVENTS;
1633
1634
if (g_tracing) {
1635
report_trace(out, sort_buf);
1636
return (0);
1637
}
1638
1639
/*
1640
* Application of -g may have resulted in multiple records
1641
* with the same signature; coalesce them.
1642
*/
1643
if (g_gflag) {
1644
mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1645
coalesce(lockcmp, sort_buf, g_nrecs_used);
1646
}
1647
1648
/*
1649
* Coalesce locks within the same symbol if -c option specified.
1650
* Coalesce PCs within the same function if -k option specified.
1651
*/
1652
if (g_cflag || g_kflag) {
1653
for (i = 0; i < g_nrecs_used; i++) {
1654
int fr;
1655
lsp = sort_buf[i];
1656
#ifdef illumos
1657
if (g_cflag)
1658
coalesce_symbol(&lsp->ls_lock);
1659
#endif
1660
if (g_kflag) {
1661
for (fr = 0; fr < g_stkdepth; fr++)
1662
coalesce_symbol(&lsp->ls_stack[fr]);
1663
coalesce_symbol(&lsp->ls_caller);
1664
}
1665
}
1666
mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1667
coalesce(lockcmp, sort_buf, g_nrecs_used);
1668
}
1669
1670
/*
1671
* Coalesce callers if -w option specified
1672
*/
1673
if (g_wflag) {
1674
mergesort(lock_and_count_cmp_anywhere,
1675
sort_buf, merge_buf, g_nrecs_used);
1676
coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
1677
}
1678
1679
/*
1680
* Coalesce locks if -W option specified
1681
*/
1682
if (g_Wflag) {
1683
mergesort(site_and_count_cmp_anylock,
1684
sort_buf, merge_buf, g_nrecs_used);
1685
coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
1686
}
1687
1688
/*
1689
* Sort data by contention count (ls_count) or total time (ls_time),
1690
* depending on g_Pflag. Override g_Pflag if time wasn't measured.
1691
*/
1692
if (g_recsize < LS_TIME)
1693
g_Pflag = 0;
1694
1695
if (g_Pflag)
1696
mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
1697
else
1698
mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
1699
1700
/*
1701
* Display data by event type
1702
*/
1703
first = &sort_buf[0];
1704
while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
1705
current = first;
1706
while ((lsp = *current)->ls_event == event)
1707
current++;
1708
report_stats(out, first, current - first, ev_count[event],
1709
ev_time[event]);
1710
first = current;
1711
}
1712
1713
#ifndef illumos
1714
/*
1715
* Free lock name buffers
1716
*/
1717
for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1718
lsp = (lsrec_t *)((char *)lsp + g_recsize))
1719
free(lsp->ls_lock);
1720
#endif
1721
1722
return (0);
1723
}
1724
1725
static char *
1726
format_symbol(char *buf, uintptr_t addr, int show_size)
1727
{
1728
uintptr_t symoff;
1729
char *symname;
1730
size_t symsize;
1731
1732
symname = addr_to_sym(addr, &symoff, &symsize);
1733
1734
if (show_size && symoff == 0)
1735
(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
1736
else if (symoff == 0)
1737
(void) sprintf(buf, "%s", symname);
1738
else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0) /* CPU+PIL */
1739
#ifdef illumos
1740
(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
1741
#else
1742
(void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
1743
#endif
1744
else if (symoff <= symsize || (symoff < 256 && addr != symoff))
1745
(void) sprintf(buf, "%s+0x%llx", symname,
1746
(unsigned long long)symoff);
1747
else
1748
(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
1749
return (buf);
1750
}
1751
1752
static void
1753
report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
1754
uint64_t total_time)
1755
{
1756
uint32_t event = sort_buf[0]->ls_event;
1757
lsrec_t *lsp;
1758
double ptotal = 0.0;
1759
double percent;
1760
int i, j, fr;
1761
int displayed;
1762
int first_bin, last_bin, max_bin_count, total_bin_count;
1763
int rectype;
1764
char buf[256];
1765
char lhdr[80], chdr[80];
1766
1767
rectype = g_recsize;
1768
1769
if (g_topn == 0) {
1770
(void) fprintf(out, "%20llu %s\n",
1771
g_rates == 0 ? total_count :
1772
((unsigned long long)total_count * NANOSEC) / g_elapsed,
1773
g_event_info[event].ev_desc);
1774
return;
1775
}
1776
1777
(void) sprintf(lhdr, "%s%s",
1778
g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
1779
(void) sprintf(chdr, "%s%s",
1780
g_wflag ? "Hottest " : "", "Caller");
1781
1782
if (!g_pflag)
1783
(void) fprintf(out,
1784
"\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
1785
g_event_info[event].ev_desc, (double)total_count,
1786
(double)g_elapsed / NANOSEC,
1787
(double)total_count * NANOSEC / g_elapsed);
1788
1789
if (!g_pflag && rectype < LS_HIST) {
1790
(void) sprintf(buf, "%s", g_event_info[event].ev_units);
1791
(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1792
g_rates ? "ops/s" : "Count",
1793
g_gflag ? "genr" : "indv",
1794
"cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
1795
(void) fprintf(out, "---------------------------------"
1796
"----------------------------------------------\n");
1797
}
1798
1799
displayed = 0;
1800
for (i = 0; i < nrecs; i++) {
1801
lsp = sort_buf[i];
1802
1803
if (displayed++ >= g_topn)
1804
break;
1805
1806
if (g_pflag) {
1807
int j;
1808
1809
(void) fprintf(out, "%u %u",
1810
lsp->ls_event, lsp->ls_count);
1811
#ifdef illumos
1812
(void) fprintf(out, " %s",
1813
format_symbol(buf, lsp->ls_lock, g_cflag));
1814
#else
1815
(void) fprintf(out, " %s", lsp->ls_lock);
1816
#endif
1817
(void) fprintf(out, " %s",
1818
format_symbol(buf, lsp->ls_caller, 0));
1819
(void) fprintf(out, " %f",
1820
(double)lsp->ls_refcnt / lsp->ls_count);
1821
if (rectype >= LS_TIME)
1822
(void) fprintf(out, " %llu",
1823
(unsigned long long)lsp->ls_time);
1824
if (rectype >= LS_HIST) {
1825
for (j = 0; j < 64; j++)
1826
(void) fprintf(out, " %u",
1827
lsp->ls_hist[j]);
1828
}
1829
for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
1830
if (rectype <= LS_STACK(j) ||
1831
lsp->ls_stack[j] == 0)
1832
break;
1833
(void) fprintf(out, " %s",
1834
format_symbol(buf, lsp->ls_stack[j], 0));
1835
}
1836
(void) fprintf(out, "\n");
1837
continue;
1838
}
1839
1840
if (rectype >= LS_HIST) {
1841
(void) fprintf(out, "---------------------------------"
1842
"----------------------------------------------\n");
1843
(void) sprintf(buf, "%s",
1844
g_event_info[event].ev_units);
1845
(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1846
g_rates ? "ops/s" : "Count",
1847
g_gflag ? "genr" : "indv",
1848
"cuml", "rcnt", buf, lhdr, chdr);
1849
}
1850
1851
if (g_Pflag && total_time != 0)
1852
percent = (lsp->ls_time * 100.00) / total_time;
1853
else
1854
percent = (lsp->ls_count * 100.00) / total_count;
1855
1856
ptotal += percent;
1857
1858
if (rectype >= LS_TIME)
1859
(void) sprintf(buf, "%llu",
1860
(unsigned long long)(lsp->ls_time / lsp->ls_count));
1861
else
1862
buf[0] = '\0';
1863
1864
(void) fprintf(out, "%5llu ",
1865
g_rates == 0 ? lsp->ls_count :
1866
((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
1867
1868
(void) fprintf(out, "%3.0f%% ", percent);
1869
1870
if (g_gflag)
1871
(void) fprintf(out, "---- ");
1872
else
1873
(void) fprintf(out, "%3.0f%% ", ptotal);
1874
1875
(void) fprintf(out, "%4.2f %8s ",
1876
(double)lsp->ls_refcnt / lsp->ls_count, buf);
1877
1878
#ifdef illumos
1879
(void) fprintf(out, "%-22s ",
1880
format_symbol(buf, lsp->ls_lock, g_cflag));
1881
#else
1882
(void) fprintf(out, "%-22s ", lsp->ls_lock);
1883
#endif
1884
1885
(void) fprintf(out, "%-24s\n",
1886
format_symbol(buf, lsp->ls_caller, 0));
1887
1888
if (rectype < LS_HIST)
1889
continue;
1890
1891
(void) fprintf(out, "\n");
1892
(void) fprintf(out, "%10s %31s %-9s %-24s\n",
1893
g_event_info[event].ev_units,
1894
"------ Time Distribution ------",
1895
g_rates ? "ops/s" : "count",
1896
rectype > LS_STACK(0) ? "Stack" : "");
1897
1898
first_bin = 0;
1899
while (lsp->ls_hist[first_bin] == 0)
1900
first_bin++;
1901
1902
last_bin = 63;
1903
while (lsp->ls_hist[last_bin] == 0)
1904
last_bin--;
1905
1906
max_bin_count = 0;
1907
total_bin_count = 0;
1908
for (j = first_bin; j <= last_bin; j++) {
1909
total_bin_count += lsp->ls_hist[j];
1910
if (lsp->ls_hist[j] > max_bin_count)
1911
max_bin_count = lsp->ls_hist[j];
1912
}
1913
1914
/*
1915
* If we went a few frames below the caller, ignore them
1916
*/
1917
for (fr = 3; fr > 0; fr--)
1918
if (lsp->ls_stack[fr] == lsp->ls_caller)
1919
break;
1920
1921
for (j = first_bin; j <= last_bin; j++) {
1922
uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
1923
(void) fprintf(out, "%10llu |%s%s %-9u ",
1924
1ULL << j,
1925
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
1926
" " + depth,
1927
g_rates == 0 ? lsp->ls_hist[j] :
1928
(uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
1929
g_elapsed));
1930
if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
1931
(void) fprintf(out, "\n");
1932
continue;
1933
}
1934
(void) fprintf(out, "%-24s\n",
1935
format_symbol(buf, lsp->ls_stack[fr], 0));
1936
fr++;
1937
}
1938
while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1939
(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
1940
format_symbol(buf, lsp->ls_stack[fr], 0));
1941
fr++;
1942
}
1943
}
1944
1945
if (!g_pflag)
1946
(void) fprintf(out, "---------------------------------"
1947
"----------------------------------------------\n");
1948
1949
(void) fflush(out);
1950
}
1951
1952
static void
1953
report_trace(FILE *out, lsrec_t **sort_buf)
1954
{
1955
lsrec_t *lsp;
1956
int i, fr;
1957
int rectype;
1958
char buf[256], buf2[256];
1959
1960
rectype = g_recsize;
1961
1962
if (!g_pflag) {
1963
(void) fprintf(out, "%5s %7s %11s %-24s %-24s\n",
1964
"Event", "Time", "Owner", "Lock", "Caller");
1965
(void) fprintf(out, "---------------------------------"
1966
"----------------------------------------------\n");
1967
}
1968
1969
for (i = 0; i < g_nrecs_used; i++) {
1970
1971
lsp = sort_buf[i];
1972
1973
if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
1974
continue;
1975
1976
(void) fprintf(out, "%2d %10llu %11p %-24s %-24s\n",
1977
lsp->ls_event, (unsigned long long)lsp->ls_time,
1978
(void *)lsp->ls_next,
1979
#ifdef illumos
1980
format_symbol(buf, lsp->ls_lock, 0),
1981
#else
1982
lsp->ls_lock,
1983
#endif
1984
format_symbol(buf2, lsp->ls_caller, 0));
1985
1986
if (rectype <= LS_STACK(0))
1987
continue;
1988
1989
/*
1990
* If we went a few frames below the caller, ignore them
1991
*/
1992
for (fr = 3; fr > 0; fr--)
1993
if (lsp->ls_stack[fr] == lsp->ls_caller)
1994
break;
1995
1996
while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1997
(void) fprintf(out, "%53s %-24s\n", "",
1998
format_symbol(buf, lsp->ls_stack[fr], 0));
1999
fr++;
2000
}
2001
(void) fprintf(out, "\n");
2002
}
2003
2004
(void) fflush(out);
2005
}
2006
2007