Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/workqueue/wq_monitor.py
26278 views
1
#!/usr/bin/env drgn
2
#
3
# Copyright (C) 2023 Tejun Heo <[email protected]>
4
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
5
6
desc = """
7
This is a drgn script to monitor workqueues. For more info on drgn, visit
8
https://github.com/osandov/drgn.
9
10
total Total number of work items executed by the workqueue.
11
12
infl The number of currently in-flight work items.
13
14
CPUtime Total CPU time consumed by the workqueue in seconds. This is
15
sampled from scheduler ticks and only provides ballpark
16
measurement. "nohz_full=" CPUs are excluded from measurement.
17
18
CPUitsv The number of times a concurrency-managed work item hogged CPU
19
longer than the threshold (workqueue.cpu_intensive_thresh_us)
20
and got excluded from concurrency management to avoid stalling
21
other work items.
22
23
CMW/RPR For per-cpu workqueues, the number of concurrency-management
24
wake-ups while executing a work item of the workqueue. For
25
unbound workqueues, the number of times a worker was repatriated
26
to its affinity scope after being migrated to an off-scope CPU by
27
the scheduler.
28
29
mayday The number of times the rescuer was requested while waiting for
30
new worker creation.
31
32
rescued The number of work items executed by the rescuer.
33
"""
34
35
import signal
36
import re
37
import time
38
import json
39
40
import drgn
41
from drgn.helpers.linux.list import list_for_each_entry
42
43
import argparse
44
parser = argparse.ArgumentParser(description=desc,
45
formatter_class=argparse.RawTextHelpFormatter)
46
parser.add_argument('workqueue', metavar='REGEX', nargs='*',
47
help='Target workqueue name patterns (all if empty)')
48
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
49
help='Monitoring interval (0 to print once and exit)')
50
parser.add_argument('-j', '--json', action='store_true',
51
help='Output in json')
52
args = parser.parse_args()
53
54
workqueues = prog['workqueues']
55
56
WQ_UNBOUND = prog['WQ_UNBOUND']
57
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
58
59
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
60
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
61
PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
62
PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
63
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
64
PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope
65
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
66
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
67
PWQ_NR_STATS = prog['PWQ_NR_STATS']
68
69
class WqStats:
70
def __init__(self, wq):
71
self.name = wq.name.string_().decode()
72
self.unbound = wq.flags & WQ_UNBOUND != 0
73
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
74
self.stats = [0] * PWQ_NR_STATS
75
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
76
for i in range(PWQ_NR_STATS):
77
self.stats[i] += int(pwq.stats[i])
78
79
def dict(self, now):
80
return { 'timestamp' : now,
81
'name' : self.name,
82
'unbound' : self.unbound,
83
'mem_reclaim' : self.mem_reclaim,
84
'started' : self.stats[PWQ_STAT_STARTED],
85
'completed' : self.stats[PWQ_STAT_COMPLETED],
86
'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
87
'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
88
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
89
'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
90
'mayday' : self.stats[PWQ_STAT_MAYDAY],
91
'rescued' : self.stats[PWQ_STAT_RESCUED], }
92
93
def table_header_str():
94
return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
95
f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
96
97
def table_row_str(self):
98
cpu_intensive = '-'
99
cmw_rpr = '-'
100
mayday = '-'
101
rescued = '-'
102
103
if self.unbound:
104
cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
105
else:
106
cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
107
cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
108
109
if self.mem_reclaim:
110
mayday = str(self.stats[PWQ_STAT_MAYDAY])
111
rescued = str(self.stats[PWQ_STAT_RESCUED])
112
113
out = f'{self.name[-24:]:24} ' \
114
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
115
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
116
f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
117
f'{cpu_intensive:>7} ' \
118
f'{cmw_rpr:>7} ' \
119
f'{mayday:>7} ' \
120
f'{rescued:>7} '
121
return out.rstrip(':')
122
123
exit_req = False
124
125
def sigint_handler(signr, frame):
126
global exit_req
127
exit_req = True
128
129
def main():
130
# handle args
131
table_fmt = not args.json
132
interval = args.interval
133
134
re_str = None
135
if args.workqueue:
136
for r in args.workqueue:
137
if re_str is None:
138
re_str = r
139
else:
140
re_str += '|' + r
141
142
filter_re = re.compile(re_str) if re_str else None
143
144
# monitoring loop
145
signal.signal(signal.SIGINT, sigint_handler)
146
147
while not exit_req:
148
now = time.time()
149
150
if table_fmt:
151
print()
152
print(WqStats.table_header_str())
153
154
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
155
stats = WqStats(wq)
156
if filter_re and not filter_re.search(stats.name):
157
continue
158
if table_fmt:
159
print(stats.table_row_str())
160
else:
161
print(stats.dict(now))
162
163
if interval == 0:
164
break
165
time.sleep(interval)
166
167
if __name__ == "__main__":
168
main()
169
170