desc = """
This is a drgn script to monitor workqueues. For more info on drgn, visit
https://github.com/osandov/drgn.
total Total number of work items executed by the workqueue.
infl The number of currently in-flight work items.
CPUtime Total CPU time consumed by the workqueue in seconds. This is
sampled from scheduler ticks and only provides ballpark
measurement. "nohz_full=" CPUs are excluded from measurement.
CPUitsv The number of times a concurrency-managed work item hogged CPU
longer than the threshold (workqueue.cpu_intensive_thresh_us)
and got excluded from concurrency management to avoid stalling
other work items.
CMW/RPR For per-cpu workqueues, the number of concurrency-management
wake-ups while executing a work item of the workqueue. For
unbound workqueues, the number of times a worker was repatriated
to its affinity scope after being migrated to an off-scope CPU by
the scheduler.
mayday The number of times the rescuer was requested while waiting for
new worker creation.
rescued The number of work items executed by the rescuer.
"""
import signal
import re
import time
import json
import drgn
from drgn.helpers.linux.list import list_for_each_entry
import argparse
parser = argparse.ArgumentParser(description=desc,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('workqueue', metavar='REGEX', nargs='*',
help='Target workqueue name patterns (all if empty)')
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
help='Monitoring interval (0 to print once and exit)')
parser.add_argument('-j', '--json', action='store_true',
help='Output in json')
args = parser.parse_args()
workqueues = prog['workqueues']
WQ_UNBOUND = prog['WQ_UNBOUND']
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED']
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED']
PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME']
PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE']
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP']
PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED']
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY']
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED']
PWQ_NR_STATS = prog['PWQ_NR_STATS']
class WqStats:
def __init__(self, wq):
self.name = wq.name.string_().decode()
self.unbound = wq.flags & WQ_UNBOUND != 0
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
self.stats = [0] * PWQ_NR_STATS
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
for i in range(PWQ_NR_STATS):
self.stats[i] += int(pwq.stats[i])
def dict(self, now):
return { 'timestamp' : now,
'name' : self.name,
'unbound' : self.unbound,
'mem_reclaim' : self.mem_reclaim,
'started' : self.stats[PWQ_STAT_STARTED],
'completed' : self.stats[PWQ_STAT_COMPLETED],
'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
'mayday' : self.stats[PWQ_STAT_MAYDAY],
'rescued' : self.stats[PWQ_STAT_RESCUED], }
def table_header_str():
return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
def table_row_str(self):
cpu_intensive = '-'
cmw_rpr = '-'
mayday = '-'
rescued = '-'
if self.unbound:
cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
else:
cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
if self.mem_reclaim:
mayday = str(self.stats[PWQ_STAT_MAYDAY])
rescued = str(self.stats[PWQ_STAT_RESCUED])
out = f'{self.name[-24:]:24} ' \
f'{self.stats[PWQ_STAT_STARTED]:8} ' \
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
f'{cpu_intensive:>7} ' \
f'{cmw_rpr:>7} ' \
f'{mayday:>7} ' \
f'{rescued:>7} '
return out.rstrip(':')
exit_req = False
def sigint_handler(signr, frame):
global exit_req
exit_req = True
def main():
table_fmt = not args.json
interval = args.interval
re_str = None
if args.workqueue:
for r in args.workqueue:
if re_str is None:
re_str = r
else:
re_str += '|' + r
filter_re = re.compile(re_str) if re_str else None
signal.signal(signal.SIGINT, sigint_handler)
while not exit_req:
now = time.time()
if table_fmt:
print()
print(WqStats.table_header_str())
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
stats = WqStats(wq)
if filter_re and not filter_re.search(stats.name):
continue
if table_fmt:
print(stats.table_row_str())
else:
print(stats.dict(now))
if interval == 0:
break
time.sleep(interval)
if __name__ == "__main__":
main()