Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/accounting/getdelays.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* getdelays.c
3
*
4
* Utility to get per-pid and per-tgid delay accounting statistics
5
* Also illustrates usage of the taskstats interface
6
*
7
* Copyright (C) Shailabh Nagar, IBM Corp. 2005
8
* Copyright (C) Balbir Singh, IBM Corp. 2006
9
* Copyright (c) Jay Lan, SGI. 2006
10
*
11
* Compile with
12
* gcc -I/usr/src/linux/include getdelays.c -o getdelays
13
*/
14
15
#include <stdio.h>
16
#include <stdlib.h>
17
#include <errno.h>
18
#include <unistd.h>
19
#include <poll.h>
20
#include <string.h>
21
#include <fcntl.h>
22
#include <sys/types.h>
23
#include <sys/stat.h>
24
#include <sys/socket.h>
25
#include <sys/wait.h>
26
#include <signal.h>
27
28
#include <linux/genetlink.h>
29
#include <linux/taskstats.h>
30
#include <linux/cgroupstats.h>
31
32
/*
33
* Generic macros for dealing with netlink sockets. Might be duplicated
34
* elsewhere. It is recommended that commercial grade applications use
35
* libnl or libnetlink and use the interfaces provided by the library
36
*/
37
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
38
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
39
#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
40
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
41
42
#define err(code, fmt, arg...) \
43
do { \
44
fprintf(stderr, fmt, ##arg); \
45
exit(code); \
46
} while (0)
47
48
int rcvbufsz;
49
char name[100];
50
int dbg;
51
int print_delays;
52
int print_io_accounting;
53
int print_task_context_switch_counts;
54
55
#define PRINTF(fmt, arg...) { \
56
if (dbg) { \
57
printf(fmt, ##arg); \
58
} \
59
}
60
61
/* Maximum size of response requested or message sent */
62
#define MAX_MSG_SIZE 1024
63
/* Maximum number of cpus expected to be specified in a cpumask */
64
#define MAX_CPUS 32
65
66
struct msgtemplate {
67
struct nlmsghdr n;
68
struct genlmsghdr g;
69
char buf[MAX_MSG_SIZE];
70
};
71
72
char cpumask[100+6*MAX_CPUS];
73
74
static void usage(void)
75
{
76
fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
77
"[-m cpumask] [-t tgid] [-p pid]\n");
78
fprintf(stderr, " -d: print delayacct stats\n");
79
fprintf(stderr, " -i: print IO accounting (works only with -p)\n");
80
fprintf(stderr, " -l: listen forever\n");
81
fprintf(stderr, " -v: debug on\n");
82
fprintf(stderr, " -C: container path\n");
83
}
84
85
/*
86
* Create a raw netlink socket and bind
87
*/
88
static int create_nl_socket(int protocol)
89
{
90
int fd;
91
struct sockaddr_nl local;
92
93
fd = socket(AF_NETLINK, SOCK_RAW, protocol);
94
if (fd < 0)
95
return -1;
96
97
if (rcvbufsz)
98
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
99
&rcvbufsz, sizeof(rcvbufsz)) < 0) {
100
fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
101
rcvbufsz);
102
goto error;
103
}
104
105
memset(&local, 0, sizeof(local));
106
local.nl_family = AF_NETLINK;
107
108
if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
109
goto error;
110
111
return fd;
112
error:
113
close(fd);
114
return -1;
115
}
116
117
118
static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
119
__u8 genl_cmd, __u16 nla_type,
120
void *nla_data, int nla_len)
121
{
122
struct nlattr *na;
123
struct sockaddr_nl nladdr;
124
int r, buflen;
125
char *buf;
126
127
struct msgtemplate msg;
128
129
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
130
msg.n.nlmsg_type = nlmsg_type;
131
msg.n.nlmsg_flags = NLM_F_REQUEST;
132
msg.n.nlmsg_seq = 0;
133
msg.n.nlmsg_pid = nlmsg_pid;
134
msg.g.cmd = genl_cmd;
135
msg.g.version = 0x1;
136
na = (struct nlattr *) GENLMSG_DATA(&msg);
137
na->nla_type = nla_type;
138
na->nla_len = nla_len + NLA_HDRLEN;
139
memcpy(NLA_DATA(na), nla_data, nla_len);
140
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
141
142
buf = (char *) &msg;
143
buflen = msg.n.nlmsg_len ;
144
memset(&nladdr, 0, sizeof(nladdr));
145
nladdr.nl_family = AF_NETLINK;
146
while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
147
sizeof(nladdr))) < buflen) {
148
if (r > 0) {
149
buf += r;
150
buflen -= r;
151
} else if (errno != EAGAIN)
152
return -1;
153
}
154
return 0;
155
}
156
157
158
/*
159
* Probe the controller in genetlink to find the family id
160
* for the TASKSTATS family
161
*/
162
static int get_family_id(int sd)
163
{
164
struct {
165
struct nlmsghdr n;
166
struct genlmsghdr g;
167
char buf[256];
168
} ans;
169
170
int id = 0, rc;
171
struct nlattr *na;
172
int rep_len;
173
174
strcpy(name, TASKSTATS_GENL_NAME);
175
rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
176
CTRL_ATTR_FAMILY_NAME, (void *)name,
177
strlen(TASKSTATS_GENL_NAME)+1);
178
if (rc < 0)
179
return 0; /* sendto() failure? */
180
181
rep_len = recv(sd, &ans, sizeof(ans), 0);
182
if (ans.n.nlmsg_type == NLMSG_ERROR ||
183
(rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
184
return 0;
185
186
na = (struct nlattr *) GENLMSG_DATA(&ans);
187
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
188
if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
189
id = *(__u16 *) NLA_DATA(na);
190
}
191
return id;
192
}
193
194
#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
195
#define delay_ms(t) (t / 1000000ULL)
196
197
/*
198
* Version compatibility note:
199
* Field availability depends on taskstats version (t->version),
200
* corresponding to TASKSTATS_VERSION in kernel headers
201
* see include/uapi/linux/taskstats.h
202
*
203
* Version feature mapping:
204
* version >= 11 - supports COMPACT statistics
205
* version >= 13 - supports WPCOPY statistics
206
* version >= 14 - supports IRQ statistics
207
* version >= 16 - supports *_max and *_min delay statistics
208
*
209
* Always verify version before accessing version-dependent fields
210
* to maintain backward compatibility.
211
*/
212
#define PRINT_CPU_DELAY(version, t) \
213
do { \
214
if (version >= 16) { \
215
printf("%-10s%15s%15s%15s%15s%15s%15s%15s\n", \
216
"CPU", "count", "real total", "virtual total", \
217
"delay total", "delay average", "delay max", "delay min"); \
218
printf(" %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n", \
219
(unsigned long long)(t)->cpu_count, \
220
(unsigned long long)(t)->cpu_run_real_total, \
221
(unsigned long long)(t)->cpu_run_virtual_total, \
222
(unsigned long long)(t)->cpu_delay_total, \
223
average_ms((double)(t)->cpu_delay_total, (t)->cpu_count), \
224
delay_ms((double)(t)->cpu_delay_max), \
225
delay_ms((double)(t)->cpu_delay_min)); \
226
} else { \
227
printf("%-10s%15s%15s%15s%15s%15s\n", \
228
"CPU", "count", "real total", "virtual total", \
229
"delay total", "delay average"); \
230
printf(" %15llu%15llu%15llu%15llu%15.3fms\n", \
231
(unsigned long long)(t)->cpu_count, \
232
(unsigned long long)(t)->cpu_run_real_total, \
233
(unsigned long long)(t)->cpu_run_virtual_total, \
234
(unsigned long long)(t)->cpu_delay_total, \
235
average_ms((double)(t)->cpu_delay_total, (t)->cpu_count)); \
236
} \
237
} while (0)
238
#define PRINT_FILED_DELAY(name, version, t, count, total, max, min) \
239
do { \
240
if (version >= 16) { \
241
printf("%-10s%15s%15s%15s%15s%15s\n", \
242
name, "count", "delay total", "delay average", \
243
"delay max", "delay min"); \
244
printf(" %15llu%15llu%15.3fms%13.6fms%13.6fms\n", \
245
(unsigned long long)(t)->count, \
246
(unsigned long long)(t)->total, \
247
average_ms((double)(t)->total, (t)->count), \
248
delay_ms((double)(t)->max), \
249
delay_ms((double)(t)->min)); \
250
} else { \
251
printf("%-10s%15s%15s%15s\n", \
252
name, "count", "delay total", "delay average"); \
253
printf(" %15llu%15llu%15.3fms\n", \
254
(unsigned long long)(t)->count, \
255
(unsigned long long)(t)->total, \
256
average_ms((double)(t)->total, (t)->count)); \
257
} \
258
} while (0)
259
260
static void print_delayacct(struct taskstats *t)
261
{
262
printf("\n\n");
263
264
PRINT_CPU_DELAY(t->version, t);
265
266
PRINT_FILED_DELAY("IO", t->version, t,
267
blkio_count, blkio_delay_total,
268
blkio_delay_max, blkio_delay_min);
269
270
PRINT_FILED_DELAY("SWAP", t->version, t,
271
swapin_count, swapin_delay_total,
272
swapin_delay_max, swapin_delay_min);
273
274
PRINT_FILED_DELAY("RECLAIM", t->version, t,
275
freepages_count, freepages_delay_total,
276
freepages_delay_max, freepages_delay_min);
277
278
PRINT_FILED_DELAY("THRASHING", t->version, t,
279
thrashing_count, thrashing_delay_total,
280
thrashing_delay_max, thrashing_delay_min);
281
282
if (t->version >= 11) {
283
PRINT_FILED_DELAY("COMPACT", t->version, t,
284
compact_count, compact_delay_total,
285
compact_delay_max, compact_delay_min);
286
}
287
288
if (t->version >= 13) {
289
PRINT_FILED_DELAY("WPCOPY", t->version, t,
290
wpcopy_count, wpcopy_delay_total,
291
wpcopy_delay_max, wpcopy_delay_min);
292
}
293
294
if (t->version >= 14) {
295
PRINT_FILED_DELAY("IRQ", t->version, t,
296
irq_count, irq_delay_total,
297
irq_delay_max, irq_delay_min);
298
}
299
}
300
301
static void task_context_switch_counts(struct taskstats *t)
302
{
303
printf("\n\nTask %15s%15s\n"
304
" %15llu%15llu\n",
305
"voluntary", "nonvoluntary",
306
(unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
307
}
308
309
static void print_cgroupstats(struct cgroupstats *c)
310
{
311
printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
312
"uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
313
(unsigned long long)c->nr_io_wait,
314
(unsigned long long)c->nr_running,
315
(unsigned long long)c->nr_stopped,
316
(unsigned long long)c->nr_uninterruptible);
317
}
318
319
320
static void print_ioacct(struct taskstats *t)
321
{
322
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
323
t->ac_comm,
324
(unsigned long long)t->read_bytes,
325
(unsigned long long)t->write_bytes,
326
(unsigned long long)t->cancelled_write_bytes);
327
}
328
329
int main(int argc, char *argv[])
330
{
331
int c, rc, rep_len, aggr_len, len2;
332
int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
333
__u16 id;
334
__u32 mypid;
335
336
struct nlattr *na;
337
int nl_sd = -1;
338
int len = 0;
339
pid_t tid = 0;
340
pid_t rtid = 0;
341
342
int fd = 0;
343
int write_file = 0;
344
int maskset = 0;
345
char *logfile = NULL;
346
int loop = 0;
347
int containerset = 0;
348
char *containerpath = NULL;
349
int cfd = 0;
350
int forking = 0;
351
sigset_t sigset;
352
353
struct msgtemplate msg;
354
355
while (!forking) {
356
c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
357
if (c < 0)
358
break;
359
360
switch (c) {
361
case 'd':
362
printf("print delayacct stats ON\n");
363
print_delays = 1;
364
break;
365
case 'i':
366
printf("printing IO accounting\n");
367
print_io_accounting = 1;
368
break;
369
case 'q':
370
printf("printing task/process context switch rates\n");
371
print_task_context_switch_counts = 1;
372
break;
373
case 'C':
374
containerset = 1;
375
containerpath = optarg;
376
break;
377
case 'w':
378
logfile = strdup(optarg);
379
printf("write to file %s\n", logfile);
380
write_file = 1;
381
break;
382
case 'r':
383
rcvbufsz = atoi(optarg);
384
printf("receive buf size %d\n", rcvbufsz);
385
if (rcvbufsz < 0)
386
err(1, "Invalid rcv buf size\n");
387
break;
388
case 'm':
389
strncpy(cpumask, optarg, sizeof(cpumask));
390
cpumask[sizeof(cpumask) - 1] = '\0';
391
maskset = 1;
392
printf("cpumask %s maskset %d\n", cpumask, maskset);
393
break;
394
case 't':
395
tid = atoi(optarg);
396
if (!tid)
397
err(1, "Invalid tgid\n");
398
cmd_type = TASKSTATS_CMD_ATTR_TGID;
399
break;
400
case 'p':
401
tid = atoi(optarg);
402
if (!tid)
403
err(1, "Invalid pid\n");
404
cmd_type = TASKSTATS_CMD_ATTR_PID;
405
break;
406
case 'c':
407
408
/* Block SIGCHLD for sigwait() later */
409
if (sigemptyset(&sigset) == -1)
410
err(1, "Failed to empty sigset");
411
if (sigaddset(&sigset, SIGCHLD))
412
err(1, "Failed to set sigchld in sigset");
413
sigprocmask(SIG_BLOCK, &sigset, NULL);
414
415
/* fork/exec a child */
416
tid = fork();
417
if (tid < 0)
418
err(1, "Fork failed\n");
419
if (tid == 0)
420
if (execvp(argv[optind - 1],
421
&argv[optind - 1]) < 0)
422
exit(-1);
423
424
/* Set the command type and avoid further processing */
425
cmd_type = TASKSTATS_CMD_ATTR_PID;
426
forking = 1;
427
break;
428
case 'v':
429
printf("debug on\n");
430
dbg = 1;
431
break;
432
case 'l':
433
printf("listen forever\n");
434
loop = 1;
435
break;
436
default:
437
usage();
438
exit(-1);
439
}
440
}
441
442
if (write_file) {
443
fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
444
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
445
if (fd == -1) {
446
perror("Cannot open output file\n");
447
exit(1);
448
}
449
}
450
451
nl_sd = create_nl_socket(NETLINK_GENERIC);
452
if (nl_sd < 0)
453
err(1, "error creating Netlink socket\n");
454
455
456
mypid = getpid();
457
id = get_family_id(nl_sd);
458
if (!id) {
459
fprintf(stderr, "Error getting family id, errno %d\n", errno);
460
goto err;
461
}
462
PRINTF("family id %d\n", id);
463
464
if (maskset) {
465
rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
466
TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
467
&cpumask, strlen(cpumask) + 1);
468
PRINTF("Sent register cpumask, retval %d\n", rc);
469
if (rc < 0) {
470
fprintf(stderr, "error sending register cpumask\n");
471
goto err;
472
}
473
}
474
475
if (tid && containerset) {
476
fprintf(stderr, "Select either -t or -C, not both\n");
477
goto err;
478
}
479
480
/*
481
* If we forked a child, wait for it to exit. Cannot use waitpid()
482
* as all the delicious data would be reaped as part of the wait
483
*/
484
if (tid && forking) {
485
int sig_received;
486
sigwait(&sigset, &sig_received);
487
}
488
489
if (tid) {
490
rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
491
cmd_type, &tid, sizeof(__u32));
492
PRINTF("Sent pid/tgid, retval %d\n", rc);
493
if (rc < 0) {
494
fprintf(stderr, "error sending tid/tgid cmd\n");
495
goto done;
496
}
497
}
498
499
if (containerset) {
500
cfd = open(containerpath, O_RDONLY);
501
if (cfd < 0) {
502
perror("error opening container file");
503
goto err;
504
}
505
rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
506
CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
507
if (rc < 0) {
508
perror("error sending cgroupstats command");
509
goto err;
510
}
511
}
512
if (!maskset && !tid && !containerset) {
513
usage();
514
goto err;
515
}
516
517
do {
518
rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
519
PRINTF("received %d bytes\n", rep_len);
520
521
if (rep_len < 0) {
522
fprintf(stderr, "nonfatal reply error: errno %d\n",
523
errno);
524
continue;
525
}
526
if (msg.n.nlmsg_type == NLMSG_ERROR ||
527
!NLMSG_OK((&msg.n), rep_len)) {
528
struct nlmsgerr *err = NLMSG_DATA(&msg);
529
fprintf(stderr, "fatal reply error, errno %d\n",
530
err->error);
531
goto done;
532
}
533
534
PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
535
sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
536
537
538
rep_len = GENLMSG_PAYLOAD(&msg.n);
539
540
na = (struct nlattr *) GENLMSG_DATA(&msg);
541
len = 0;
542
while (len < rep_len) {
543
len += NLA_ALIGN(na->nla_len);
544
switch (na->nla_type) {
545
case TASKSTATS_TYPE_AGGR_TGID:
546
/* Fall through */
547
case TASKSTATS_TYPE_AGGR_PID:
548
aggr_len = NLA_PAYLOAD(na->nla_len);
549
len2 = 0;
550
/* For nested attributes, na follows */
551
na = (struct nlattr *) NLA_DATA(na);
552
while (len2 < aggr_len) {
553
switch (na->nla_type) {
554
case TASKSTATS_TYPE_PID:
555
rtid = *(int *) NLA_DATA(na);
556
if (print_delays)
557
printf("PID\t%d\n", rtid);
558
break;
559
case TASKSTATS_TYPE_TGID:
560
rtid = *(int *) NLA_DATA(na);
561
if (print_delays)
562
printf("TGID\t%d\n", rtid);
563
break;
564
case TASKSTATS_TYPE_STATS:
565
if (print_delays)
566
print_delayacct((struct taskstats *) NLA_DATA(na));
567
if (print_io_accounting)
568
print_ioacct((struct taskstats *) NLA_DATA(na));
569
if (print_task_context_switch_counts)
570
task_context_switch_counts((struct taskstats *) NLA_DATA(na));
571
if (fd) {
572
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
573
err(1,"write error\n");
574
}
575
}
576
if (!loop)
577
goto done;
578
break;
579
case TASKSTATS_TYPE_NULL:
580
break;
581
default:
582
fprintf(stderr, "Unknown nested"
583
" nla_type %d\n",
584
na->nla_type);
585
break;
586
}
587
len2 += NLA_ALIGN(na->nla_len);
588
na = (struct nlattr *)((char *)na +
589
NLA_ALIGN(na->nla_len));
590
}
591
break;
592
593
case CGROUPSTATS_TYPE_CGROUP_STATS:
594
print_cgroupstats(NLA_DATA(na));
595
break;
596
default:
597
fprintf(stderr, "Unknown nla_type %d\n",
598
na->nla_type);
599
case TASKSTATS_TYPE_NULL:
600
break;
601
}
602
na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
603
}
604
} while (loop);
605
done:
606
if (maskset) {
607
rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
608
TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
609
&cpumask, strlen(cpumask) + 1);
610
printf("Sent deregister mask, retval %d\n", rc);
611
if (rc < 0)
612
err(rc, "error sending deregister cpumask\n");
613
}
614
err:
615
close(nl_sd);
616
if (fd)
617
close(fd);
618
if (cfd)
619
close(cfd);
620
return 0;
621
}
622
623