Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/accounting/delaytop.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* delaytop.c - system-wide delay monitoring tool.
4
*
5
* This tool provides real-time monitoring and statistics of
6
* system, container, and task-level delays, including CPU,
7
* memory, IO, and IRQ. It supports both interactive (top-like),
8
* and can output delay information for the whole system, specific
9
* containers (cgroups), or individual tasks (PIDs).
10
*
11
* Key features:
12
* - Collects per-task delay accounting statistics via taskstats.
13
* - Collects system-wide PSI information.
14
* - Supports sorting, filtering.
15
* - Supports both interactive (screen refresh).
16
*
17
* Copyright (C) Fan Yu, ZTE Corp. 2025
18
* Copyright (C) Wang Yaxin, ZTE Corp. 2025
19
*
20
* Compile with
21
* gcc -I/usr/src/linux/include delaytop.c -o delaytop
22
*/
23
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include <errno.h>
28
#include <unistd.h>
29
#include <fcntl.h>
30
#include <getopt.h>
31
#include <signal.h>
32
#include <time.h>
33
#include <dirent.h>
34
#include <ctype.h>
35
#include <stdbool.h>
36
#include <sys/types.h>
37
#include <sys/stat.h>
38
#include <sys/socket.h>
39
#include <sys/select.h>
40
#include <termios.h>
41
#include <limits.h>
42
#include <linux/genetlink.h>
43
#include <linux/taskstats.h>
44
#include <linux/cgroupstats.h>
45
46
#define PSI_CPU_SOME "/proc/pressure/cpu"
47
#define PSI_CPU_FULL "/proc/pressure/cpu"
48
#define PSI_MEMORY_SOME "/proc/pressure/memory"
49
#define PSI_MEMORY_FULL "/proc/pressure/memory"
50
#define PSI_IO_SOME "/proc/pressure/io"
51
#define PSI_IO_FULL "/proc/pressure/io"
52
#define PSI_IRQ_FULL "/proc/pressure/irq"
53
54
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
55
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
56
#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
57
58
#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
59
#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
60
61
#define TASK_COMM_LEN 16
62
#define MAX_MSG_SIZE 1024
63
#define MAX_TASKS 1000
64
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
65
#define BOOL_FPRINT(stream, fmt, ...) \
66
({ \
67
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
68
ret >= 0; \
69
})
70
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
71
72
/* Program settings structure */
73
struct config {
74
int delay; /* Update interval in seconds */
75
int iterations; /* Number of iterations, 0 == infinite */
76
int max_processes; /* Maximum number of processes to show */
77
char sort_field; /* Field to sort by */
78
int output_one_time; /* Output once and exit */
79
int monitor_pid; /* Monitor specific PID */
80
char *container_path; /* Path to container cgroup */
81
};
82
83
/* PSI statistics structure */
84
struct psi_stats {
85
double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
86
unsigned long long cpu_some_total;
87
double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
88
unsigned long long cpu_full_total;
89
double memory_some_avg10, memory_some_avg60, memory_some_avg300;
90
unsigned long long memory_some_total;
91
double memory_full_avg10, memory_full_avg60, memory_full_avg300;
92
unsigned long long memory_full_total;
93
double io_some_avg10, io_some_avg60, io_some_avg300;
94
unsigned long long io_some_total;
95
double io_full_avg10, io_full_avg60, io_full_avg300;
96
unsigned long long io_full_total;
97
double irq_full_avg10, irq_full_avg60, irq_full_avg300;
98
unsigned long long irq_full_total;
99
};
100
101
/* Task delay information structure */
102
struct task_info {
103
int pid;
104
int tgid;
105
char command[TASK_COMM_LEN];
106
unsigned long long cpu_count;
107
unsigned long long cpu_delay_total;
108
unsigned long long blkio_count;
109
unsigned long long blkio_delay_total;
110
unsigned long long swapin_count;
111
unsigned long long swapin_delay_total;
112
unsigned long long freepages_count;
113
unsigned long long freepages_delay_total;
114
unsigned long long thrashing_count;
115
unsigned long long thrashing_delay_total;
116
unsigned long long compact_count;
117
unsigned long long compact_delay_total;
118
unsigned long long wpcopy_count;
119
unsigned long long wpcopy_delay_total;
120
unsigned long long irq_count;
121
unsigned long long irq_delay_total;
122
};
123
124
/* Container statistics structure */
125
struct container_stats {
126
int nr_sleeping; /* Number of sleeping processes */
127
int nr_running; /* Number of running processes */
128
int nr_stopped; /* Number of stopped processes */
129
int nr_uninterruptible; /* Number of uninterruptible processes */
130
int nr_io_wait; /* Number of processes in IO wait */
131
};
132
133
/* Global variables */
134
static struct config cfg;
135
static struct psi_stats psi;
136
static struct task_info tasks[MAX_TASKS];
137
static int task_count;
138
static int running = 1;
139
static struct container_stats container_stats;
140
141
/* Netlink socket variables */
142
static int nl_sd = -1;
143
static int family_id;
144
145
/* Set terminal to non-canonical mode for q-to-quit */
146
static struct termios orig_termios;
147
static void enable_raw_mode(void)
148
{
149
struct termios raw;
150
151
tcgetattr(STDIN_FILENO, &orig_termios);
152
raw = orig_termios;
153
raw.c_lflag &= ~(ICANON | ECHO);
154
tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw);
155
}
156
static void disable_raw_mode(void)
157
{
158
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
159
}
160
161
/* Display usage information and command line options */
162
static void usage(void)
163
{
164
printf("Usage: delaytop [Options]\n"
165
"Options:\n"
166
" -h, --help Show this help message and exit\n"
167
" -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
168
" -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
169
" -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
170
" -o, --once Display once and exit\n"
171
" -p, --pid=PID Monitor only the specified PID\n"
172
" -C, --container=PATH Monitor the container at specified cgroup path\n");
173
exit(0);
174
}
175
176
/* Parse command line arguments and set configuration */
177
static void parse_args(int argc, char **argv)
178
{
179
int c;
180
struct option long_options[] = {
181
{"help", no_argument, 0, 'h'},
182
{"delay", required_argument, 0, 'd'},
183
{"iterations", required_argument, 0, 'n'},
184
{"pid", required_argument, 0, 'p'},
185
{"once", no_argument, 0, 'o'},
186
{"processes", required_argument, 0, 'P'},
187
{"container", required_argument, 0, 'C'},
188
{0, 0, 0, 0}
189
};
190
191
/* Set defaults */
192
cfg.delay = 2;
193
cfg.iterations = 0;
194
cfg.max_processes = 20;
195
cfg.sort_field = 'c'; /* Default sort by CPU delay */
196
cfg.output_one_time = 0;
197
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
198
cfg.container_path = NULL;
199
200
while (1) {
201
int option_index = 0;
202
203
c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
204
if (c == -1)
205
break;
206
207
switch (c) {
208
case 'h':
209
usage();
210
break;
211
case 'd':
212
cfg.delay = atoi(optarg);
213
if (cfg.delay < 1) {
214
fprintf(stderr, "Error: delay must be >= 1.\n");
215
exit(1);
216
}
217
break;
218
case 'n':
219
cfg.iterations = atoi(optarg);
220
if (cfg.iterations < 0) {
221
fprintf(stderr, "Error: iterations must be >= 0.\n");
222
exit(1);
223
}
224
break;
225
case 'p':
226
cfg.monitor_pid = atoi(optarg);
227
if (cfg.monitor_pid < 1) {
228
fprintf(stderr, "Error: pid must be >= 1.\n");
229
exit(1);
230
}
231
break;
232
case 'o':
233
cfg.output_one_time = 1;
234
break;
235
case 'P':
236
cfg.max_processes = atoi(optarg);
237
if (cfg.max_processes < 1) {
238
fprintf(stderr, "Error: processes must be >= 1.\n");
239
exit(1);
240
}
241
if (cfg.max_processes > MAX_TASKS) {
242
fprintf(stderr, "Warning: processes capped to %d.\n",
243
MAX_TASKS);
244
cfg.max_processes = MAX_TASKS;
245
}
246
break;
247
case 'C':
248
cfg.container_path = strdup(optarg);
249
break;
250
default:
251
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
252
exit(1);
253
}
254
}
255
}
256
257
/* Create a raw netlink socket and bind */
258
static int create_nl_socket(void)
259
{
260
int fd;
261
struct sockaddr_nl local;
262
263
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
264
if (fd < 0)
265
return -1;
266
267
memset(&local, 0, sizeof(local));
268
local.nl_family = AF_NETLINK;
269
270
if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
271
fprintf(stderr, "Failed to bind socket when create nl_socket\n");
272
close(fd);
273
return -1;
274
}
275
276
return fd;
277
}
278
279
/* Send a command via netlink */
280
static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
281
__u8 genl_cmd, __u16 nla_type,
282
void *nla_data, int nla_len)
283
{
284
struct sockaddr_nl nladdr;
285
struct nlattr *na;
286
int r, buflen;
287
char *buf;
288
289
struct {
290
struct nlmsghdr n;
291
struct genlmsghdr g;
292
char buf[MAX_MSG_SIZE];
293
} msg;
294
295
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
296
msg.n.nlmsg_type = nlmsg_type;
297
msg.n.nlmsg_flags = NLM_F_REQUEST;
298
msg.n.nlmsg_seq = 0;
299
msg.n.nlmsg_pid = nlmsg_pid;
300
msg.g.cmd = genl_cmd;
301
msg.g.version = 0x1;
302
na = (struct nlattr *) GENLMSG_DATA(&msg);
303
na->nla_type = nla_type;
304
na->nla_len = nla_len + NLA_HDRLEN;
305
memcpy(NLA_DATA(na), nla_data, nla_len);
306
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
307
308
buf = (char *) &msg;
309
buflen = msg.n.nlmsg_len;
310
memset(&nladdr, 0, sizeof(nladdr));
311
nladdr.nl_family = AF_NETLINK;
312
while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
313
sizeof(nladdr))) < buflen) {
314
if (r > 0) {
315
buf += r;
316
buflen -= r;
317
} else if (errno != EAGAIN)
318
return -1;
319
}
320
return 0;
321
}
322
323
/* Get family ID for taskstats via netlink */
324
static int get_family_id(int sd)
325
{
326
struct {
327
struct nlmsghdr n;
328
struct genlmsghdr g;
329
char buf[256];
330
} ans;
331
332
int id = 0, rc;
333
struct nlattr *na;
334
int rep_len;
335
char name[100];
336
337
strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1);
338
name[sizeof(name) - 1] = '\0';
339
rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
340
CTRL_ATTR_FAMILY_NAME, (void *)name,
341
strlen(TASKSTATS_GENL_NAME)+1);
342
if (rc < 0) {
343
fprintf(stderr, "Failed to send cmd for family id\n");
344
return 0;
345
}
346
347
rep_len = recv(sd, &ans, sizeof(ans), 0);
348
if (ans.n.nlmsg_type == NLMSG_ERROR ||
349
(rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) {
350
fprintf(stderr, "Failed to receive response for family id\n");
351
return 0;
352
}
353
354
na = (struct nlattr *) GENLMSG_DATA(&ans);
355
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
356
if (na->nla_type == CTRL_ATTR_FAMILY_ID)
357
id = *(__u16 *) NLA_DATA(na);
358
return id;
359
}
360
361
static void read_psi_stats(void)
362
{
363
FILE *fp;
364
char line[256];
365
int ret = 0;
366
/* Zero all fields */
367
memset(&psi, 0, sizeof(psi));
368
/* CPU pressure */
369
fp = fopen(PSI_CPU_SOME, "r");
370
if (fp) {
371
while (fgets(line, sizeof(line), fp)) {
372
if (strncmp(line, "some", 4) == 0) {
373
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
374
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
375
&psi.cpu_some_avg300, &psi.cpu_some_total);
376
if (ret != 4)
377
fprintf(stderr, "Failed to parse CPU some PSI data\n");
378
} else if (strncmp(line, "full", 4) == 0) {
379
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
380
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
381
&psi.cpu_full_avg300, &psi.cpu_full_total);
382
if (ret != 4)
383
fprintf(stderr, "Failed to parse CPU full PSI data\n");
384
}
385
}
386
fclose(fp);
387
}
388
/* Memory pressure */
389
fp = fopen(PSI_MEMORY_SOME, "r");
390
if (fp) {
391
while (fgets(line, sizeof(line), fp)) {
392
if (strncmp(line, "some", 4) == 0) {
393
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
394
&psi.memory_some_avg10, &psi.memory_some_avg60,
395
&psi.memory_some_avg300, &psi.memory_some_total);
396
if (ret != 4)
397
fprintf(stderr, "Failed to parse Memory some PSI data\n");
398
} else if (strncmp(line, "full", 4) == 0) {
399
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
400
&psi.memory_full_avg10, &psi.memory_full_avg60,
401
&psi.memory_full_avg300, &psi.memory_full_total);
402
}
403
if (ret != 4)
404
fprintf(stderr, "Failed to parse Memory full PSI data\n");
405
}
406
fclose(fp);
407
}
408
/* IO pressure */
409
fp = fopen(PSI_IO_SOME, "r");
410
if (fp) {
411
while (fgets(line, sizeof(line), fp)) {
412
if (strncmp(line, "some", 4) == 0) {
413
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
414
&psi.io_some_avg10, &psi.io_some_avg60,
415
&psi.io_some_avg300, &psi.io_some_total);
416
if (ret != 4)
417
fprintf(stderr, "Failed to parse IO some PSI data\n");
418
} else if (strncmp(line, "full", 4) == 0) {
419
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
420
&psi.io_full_avg10, &psi.io_full_avg60,
421
&psi.io_full_avg300, &psi.io_full_total);
422
if (ret != 4)
423
fprintf(stderr, "Failed to parse IO full PSI data\n");
424
}
425
}
426
fclose(fp);
427
}
428
/* IRQ pressure (only full) */
429
fp = fopen(PSI_IRQ_FULL, "r");
430
if (fp) {
431
while (fgets(line, sizeof(line), fp)) {
432
if (strncmp(line, "full", 4) == 0) {
433
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
434
&psi.irq_full_avg10, &psi.irq_full_avg60,
435
&psi.irq_full_avg300, &psi.irq_full_total);
436
if (ret != 4)
437
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
438
}
439
}
440
fclose(fp);
441
}
442
}
443
444
static int read_comm(int pid, char *comm_buf, size_t buf_size)
445
{
446
char path[64];
447
int ret = -1;
448
size_t len;
449
FILE *fp;
450
451
snprintf(path, sizeof(path), "/proc/%d/comm", pid);
452
fp = fopen(path, "r");
453
if (!fp) {
454
fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid);
455
return ret;
456
}
457
458
if (fgets(comm_buf, buf_size, fp)) {
459
len = strlen(comm_buf);
460
if (len > 0 && comm_buf[len - 1] == '\n')
461
comm_buf[len - 1] = '\0';
462
ret = 0;
463
}
464
465
fclose(fp);
466
467
return ret;
468
}
469
470
static void fetch_and_fill_task_info(int pid, const char *comm)
471
{
472
struct {
473
struct nlmsghdr n;
474
struct genlmsghdr g;
475
char buf[MAX_MSG_SIZE];
476
} resp;
477
struct taskstats stats;
478
struct nlattr *nested;
479
struct nlattr *na;
480
int nested_len;
481
int nl_len;
482
int rc;
483
484
/* Send request for task stats */
485
if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET,
486
TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) {
487
fprintf(stderr, "Failed to send request for task stats\n");
488
return;
489
}
490
491
/* Receive response */
492
rc = recv(nl_sd, &resp, sizeof(resp), 0);
493
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
494
fprintf(stderr, "Failed to receive response for task stats\n");
495
return;
496
}
497
498
/* Parse response */
499
nl_len = GENLMSG_PAYLOAD(&resp.n);
500
na = (struct nlattr *) GENLMSG_DATA(&resp);
501
while (nl_len > 0) {
502
if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) {
503
nested = (struct nlattr *) NLA_DATA(na);
504
nested_len = NLA_PAYLOAD(na->nla_len);
505
while (nested_len > 0) {
506
if (nested->nla_type == TASKSTATS_TYPE_STATS) {
507
memcpy(&stats, NLA_DATA(nested), sizeof(stats));
508
if (task_count < MAX_TASKS) {
509
tasks[task_count].pid = pid;
510
tasks[task_count].tgid = pid;
511
strncpy(tasks[task_count].command, comm,
512
TASK_COMM_LEN - 1);
513
tasks[task_count].command[TASK_COMM_LEN - 1] = '\0';
514
SET_TASK_STAT(task_count, cpu_count);
515
SET_TASK_STAT(task_count, cpu_delay_total);
516
SET_TASK_STAT(task_count, blkio_count);
517
SET_TASK_STAT(task_count, blkio_delay_total);
518
SET_TASK_STAT(task_count, swapin_count);
519
SET_TASK_STAT(task_count, swapin_delay_total);
520
SET_TASK_STAT(task_count, freepages_count);
521
SET_TASK_STAT(task_count, freepages_delay_total);
522
SET_TASK_STAT(task_count, thrashing_count);
523
SET_TASK_STAT(task_count, thrashing_delay_total);
524
SET_TASK_STAT(task_count, compact_count);
525
SET_TASK_STAT(task_count, compact_delay_total);
526
SET_TASK_STAT(task_count, wpcopy_count);
527
SET_TASK_STAT(task_count, wpcopy_delay_total);
528
SET_TASK_STAT(task_count, irq_count);
529
SET_TASK_STAT(task_count, irq_delay_total);
530
task_count++;
531
}
532
break;
533
}
534
nested_len -= NLA_ALIGN(nested->nla_len);
535
nested = NLA_NEXT(nested);
536
}
537
}
538
nl_len -= NLA_ALIGN(na->nla_len);
539
na = NLA_NEXT(na);
540
}
541
return;
542
}
543
544
static void get_task_delays(void)
545
{
546
char comm[TASK_COMM_LEN];
547
struct dirent *entry;
548
DIR *dir;
549
int pid;
550
551
task_count = 0;
552
if (cfg.monitor_pid > 0) {
553
if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0)
554
fetch_and_fill_task_info(cfg.monitor_pid, comm);
555
return;
556
}
557
558
dir = opendir("/proc");
559
if (!dir) {
560
fprintf(stderr, "Error opening /proc directory\n");
561
return;
562
}
563
564
while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) {
565
if (!isdigit(entry->d_name[0]))
566
continue;
567
pid = atoi(entry->d_name);
568
if (pid == 0)
569
continue;
570
if (read_comm(pid, comm, sizeof(comm)) != 0)
571
continue;
572
fetch_and_fill_task_info(pid, comm);
573
}
574
closedir(dir);
575
}
576
577
/* Calculate average delay in milliseconds */
578
static double average_ms(unsigned long long total, unsigned long long count)
579
{
580
if (count == 0)
581
return 0;
582
return (double)total / 1000000.0 / count;
583
}
584
585
/* Comparison function for sorting tasks */
586
static int compare_tasks(const void *a, const void *b)
587
{
588
const struct task_info *t1 = (const struct task_info *)a;
589
const struct task_info *t2 = (const struct task_info *)b;
590
double avg1, avg2;
591
592
switch (cfg.sort_field) {
593
case 'c': /* CPU */
594
avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
595
avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
596
if (avg1 != avg2)
597
return avg2 > avg1 ? 1 : -1;
598
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
599
600
default:
601
return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
602
}
603
}
604
605
/* Sort tasks by selected field */
606
static void sort_tasks(void)
607
{
608
if (task_count > 0)
609
qsort(tasks, task_count, sizeof(struct task_info), compare_tasks);
610
}
611
612
/* Get container statistics via cgroupstats */
613
static void get_container_stats(void)
614
{
615
int rc, cfd;
616
struct {
617
struct nlmsghdr n;
618
struct genlmsghdr g;
619
char buf[MAX_MSG_SIZE];
620
} req, resp;
621
struct nlattr *na;
622
int nl_len;
623
struct cgroupstats stats;
624
625
/* Check if container path is set */
626
if (!cfg.container_path)
627
return;
628
629
/* Open container cgroup */
630
cfd = open(cfg.container_path, O_RDONLY);
631
if (cfd < 0) {
632
fprintf(stderr, "Error opening container path: %s\n", cfg.container_path);
633
return;
634
}
635
636
/* Send request for container stats */
637
if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET,
638
CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) {
639
fprintf(stderr, "Failed to send request for container stats\n");
640
close(cfd);
641
return;
642
}
643
644
/* Receive response */
645
rc = recv(nl_sd, &resp, sizeof(resp), 0);
646
if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
647
fprintf(stderr, "Failed to receive response for container stats\n");
648
close(cfd);
649
return;
650
}
651
652
/* Parse response */
653
nl_len = GENLMSG_PAYLOAD(&resp.n);
654
na = (struct nlattr *) GENLMSG_DATA(&resp);
655
while (nl_len > 0) {
656
if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) {
657
/* Get the cgroupstats structure */
658
memcpy(&stats, NLA_DATA(na), sizeof(stats));
659
660
/* Fill container stats */
661
container_stats.nr_sleeping = stats.nr_sleeping;
662
container_stats.nr_running = stats.nr_running;
663
container_stats.nr_stopped = stats.nr_stopped;
664
container_stats.nr_uninterruptible = stats.nr_uninterruptible;
665
container_stats.nr_io_wait = stats.nr_io_wait;
666
break;
667
}
668
nl_len -= NLA_ALIGN(na->nla_len);
669
na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
670
}
671
672
close(cfd);
673
}
674
675
/* Display results to stdout or log file */
676
static void display_results(void)
677
{
678
time_t now = time(NULL);
679
struct tm *tm_now = localtime(&now);
680
FILE *out = stdout;
681
char timestamp[32];
682
bool suc = true;
683
int i, count;
684
685
/* Clear terminal screen */
686
suc &= BOOL_FPRINT(out, "\033[H\033[J");
687
688
/* PSI output (one-line, no cat style) */
689
suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
690
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
691
"CPU some:",
692
psi.cpu_some_avg10,
693
psi.cpu_some_avg60,
694
psi.cpu_some_avg300,
695
psi.cpu_some_total / 1000);
696
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
697
"CPU full:",
698
psi.cpu_full_avg10,
699
psi.cpu_full_avg60,
700
psi.cpu_full_avg300,
701
psi.cpu_full_total / 1000);
702
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
703
"Memory full:",
704
psi.memory_full_avg10,
705
psi.memory_full_avg60,
706
psi.memory_full_avg300,
707
psi.memory_full_total / 1000);
708
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
709
"Memory some:",
710
psi.memory_some_avg10,
711
psi.memory_some_avg60,
712
psi.memory_some_avg300,
713
psi.memory_some_total / 1000);
714
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
715
"IO full:",
716
psi.io_full_avg10,
717
psi.io_full_avg60,
718
psi.io_full_avg300,
719
psi.io_full_total / 1000);
720
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
721
"IO some:",
722
psi.io_some_avg10,
723
psi.io_some_avg60,
724
psi.io_some_avg300,
725
psi.io_some_total / 1000);
726
suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
727
"IRQ full:",
728
psi.irq_full_avg10,
729
psi.irq_full_avg60,
730
psi.irq_full_avg300,
731
psi.irq_full_total / 1000);
732
733
if (cfg.container_path) {
734
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
735
suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ",
736
container_stats.nr_running, container_stats.nr_sleeping);
737
suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n",
738
container_stats.nr_stopped, container_stats.nr_uninterruptible,
739
container_stats.nr_io_wait);
740
}
741
suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
742
cfg.max_processes);
743
suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
744
suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
745
"CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
746
"THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
747
748
suc &= BOOL_FPRINT(out, "-----------------------------------------------");
749
suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
750
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
751
752
for (i = 0; i < count; i++) {
753
suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
754
tasks[i].pid, tasks[i].tgid, tasks[i].command);
755
suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
756
average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
757
average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
758
average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
759
average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
760
average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
761
average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
762
average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
763
average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
764
}
765
766
suc &= BOOL_FPRINT(out, "\n");
767
768
if (!suc)
769
perror("Error writing to output");
770
}
771
772
/* Main function */
773
int main(int argc, char **argv)
774
{
775
int iterations = 0;
776
int use_q_quit = 0;
777
778
/* Parse command line arguments */
779
parse_args(argc, argv);
780
781
/* Setup netlink socket */
782
nl_sd = create_nl_socket();
783
if (nl_sd < 0) {
784
fprintf(stderr, "Error creating netlink socket\n");
785
exit(1);
786
}
787
788
/* Get family ID for taskstats via netlink */
789
family_id = get_family_id(nl_sd);
790
if (!family_id) {
791
fprintf(stderr, "Error getting taskstats family ID\n");
792
close(nl_sd);
793
exit(1);
794
}
795
796
if (!cfg.output_one_time) {
797
use_q_quit = 1;
798
enable_raw_mode();
799
printf("Press 'q' to quit.\n");
800
fflush(stdout);
801
}
802
803
/* Main loop */
804
while (running) {
805
/* Read PSI statistics */
806
read_psi_stats();
807
808
/* Get container stats if container path provided */
809
if (cfg.container_path)
810
get_container_stats();
811
812
/* Get task delays */
813
get_task_delays();
814
815
/* Sort tasks */
816
sort_tasks();
817
818
/* Display results to stdout or log file */
819
display_results();
820
821
/* Check for iterations */
822
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
823
break;
824
825
/* Exit if output_one_time is set */
826
if (cfg.output_one_time)
827
break;
828
829
/* Check for 'q' key to quit */
830
if (use_q_quit) {
831
struct timeval tv = {cfg.delay, 0};
832
fd_set readfds;
833
834
FD_ZERO(&readfds);
835
FD_SET(STDIN_FILENO, &readfds);
836
int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
837
838
if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
839
char ch = 0;
840
841
read(STDIN_FILENO, &ch, 1);
842
if (ch == 'q' || ch == 'Q') {
843
running = 0;
844
break;
845
}
846
}
847
} else {
848
sleep(cfg.delay);
849
}
850
}
851
852
/* Restore terminal mode */
853
if (use_q_quit)
854
disable_raw_mode();
855
856
/* Cleanup */
857
close(nl_sd);
858
if (cfg.container_path)
859
free(cfg.container_path);
860
861
return 0;
862
}
863
864