Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/hung_task.c
26243 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Detect Hung Task
4
*
5
* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
6
*
7
*/
8
9
#include <linux/mm.h>
10
#include <linux/cpu.h>
11
#include <linux/nmi.h>
12
#include <linux/init.h>
13
#include <linux/delay.h>
14
#include <linux/freezer.h>
15
#include <linux/kthread.h>
16
#include <linux/lockdep.h>
17
#include <linux/export.h>
18
#include <linux/panic_notifier.h>
19
#include <linux/sysctl.h>
20
#include <linux/suspend.h>
21
#include <linux/utsname.h>
22
#include <linux/sched/signal.h>
23
#include <linux/sched/debug.h>
24
#include <linux/sched/sysctl.h>
25
#include <linux/hung_task.h>
26
#include <linux/rwsem.h>
27
28
#include <trace/events/sched.h>
29
30
/*
31
* The number of tasks checked:
32
*/
33
static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
34
35
/*
36
* Total number of tasks detected as hung since boot:
37
*/
38
static unsigned long __read_mostly sysctl_hung_task_detect_count;
39
40
/*
41
* Limit number of tasks checked in a batch.
42
*
43
* This value controls the preemptibility of khungtaskd since preemption
44
* is disabled during the critical section. It also controls the size of
45
* the RCU grace period. So it needs to be upper-bound.
46
*/
47
#define HUNG_TASK_LOCK_BREAK (HZ / 10)
48
49
/*
50
* Zero means infinite timeout - no checking done:
51
*/
52
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
53
EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs);
54
55
/*
56
* Zero (default value) means use sysctl_hung_task_timeout_secs:
57
*/
58
static unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
59
60
static int __read_mostly sysctl_hung_task_warnings = 10;
61
62
static int __read_mostly did_panic;
63
static bool hung_task_show_lock;
64
static bool hung_task_call_panic;
65
static bool hung_task_show_all_bt;
66
67
static struct task_struct *watchdog_task;
68
69
#ifdef CONFIG_SMP
70
/*
71
* Should we dump all CPUs backtraces in a hung task event?
72
* Defaults to 0, can be changed via sysctl.
73
*/
74
static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
75
#else
76
#define sysctl_hung_task_all_cpu_backtrace 0
77
#endif /* CONFIG_SMP */
78
79
/*
80
* Should we panic (and reboot, if panic_timeout= is set) when a
81
* hung task is detected:
82
*/
83
static unsigned int __read_mostly sysctl_hung_task_panic =
84
IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC);
85
86
static int
87
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
88
{
89
did_panic = 1;
90
91
return NOTIFY_DONE;
92
}
93
94
static struct notifier_block panic_block = {
95
.notifier_call = hung_task_panic,
96
};
97
98
99
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
100
static void debug_show_blocker(struct task_struct *task)
101
{
102
struct task_struct *g, *t;
103
unsigned long owner, blocker, blocker_type;
104
const char *rwsem_blocked_by, *rwsem_blocked_as;
105
106
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held");
107
108
blocker = READ_ONCE(task->blocker);
109
if (!blocker)
110
return;
111
112
blocker_type = hung_task_get_blocker_type(blocker);
113
114
switch (blocker_type) {
115
case BLOCKER_TYPE_MUTEX:
116
owner = mutex_get_owner(hung_task_blocker_to_lock(blocker));
117
break;
118
case BLOCKER_TYPE_SEM:
119
owner = sem_last_holder(hung_task_blocker_to_lock(blocker));
120
break;
121
case BLOCKER_TYPE_RWSEM_READER:
122
case BLOCKER_TYPE_RWSEM_WRITER:
123
owner = (unsigned long)rwsem_owner(
124
hung_task_blocker_to_lock(blocker));
125
rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ?
126
"reader" : "writer";
127
rwsem_blocked_by = is_rwsem_reader_owned(
128
hung_task_blocker_to_lock(blocker)) ?
129
"reader" : "writer";
130
break;
131
default:
132
WARN_ON_ONCE(1);
133
return;
134
}
135
136
137
if (unlikely(!owner)) {
138
switch (blocker_type) {
139
case BLOCKER_TYPE_MUTEX:
140
pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n",
141
task->comm, task->pid);
142
break;
143
case BLOCKER_TYPE_SEM:
144
pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n",
145
task->comm, task->pid);
146
break;
147
case BLOCKER_TYPE_RWSEM_READER:
148
case BLOCKER_TYPE_RWSEM_WRITER:
149
pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n",
150
task->comm, task->pid);
151
break;
152
}
153
return;
154
}
155
156
/* Ensure the owner information is correct. */
157
for_each_process_thread(g, t) {
158
if ((unsigned long)t != owner)
159
continue;
160
161
switch (blocker_type) {
162
case BLOCKER_TYPE_MUTEX:
163
pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n",
164
task->comm, task->pid, t->comm, t->pid);
165
break;
166
case BLOCKER_TYPE_SEM:
167
pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n",
168
task->comm, task->pid, t->comm, t->pid);
169
break;
170
case BLOCKER_TYPE_RWSEM_READER:
171
case BLOCKER_TYPE_RWSEM_WRITER:
172
pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n",
173
task->comm, task->pid, rwsem_blocked_as, t->comm,
174
t->pid, rwsem_blocked_by);
175
break;
176
}
177
sched_show_task(t);
178
return;
179
}
180
}
181
#else
182
static inline void debug_show_blocker(struct task_struct *task)
183
{
184
}
185
#endif
186
187
static void check_hung_task(struct task_struct *t, unsigned long timeout)
188
{
189
unsigned long switch_count = t->nvcsw + t->nivcsw;
190
191
/*
192
* Ensure the task is not frozen.
193
* Also, skip vfork and any other user process that freezer should skip.
194
*/
195
if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
196
return;
197
198
/*
199
* When a freshly created task is scheduled once, changes its state to
200
* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
201
* musn't be checked.
202
*/
203
if (unlikely(!switch_count))
204
return;
205
206
if (switch_count != t->last_switch_count) {
207
t->last_switch_count = switch_count;
208
t->last_switch_time = jiffies;
209
return;
210
}
211
if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
212
return;
213
214
/*
215
* This counter tracks the total number of tasks detected as hung
216
* since boot.
217
*/
218
sysctl_hung_task_detect_count++;
219
220
trace_sched_process_hang(t);
221
222
if (sysctl_hung_task_panic) {
223
console_verbose();
224
hung_task_show_lock = true;
225
hung_task_call_panic = true;
226
}
227
228
/*
229
* Ok, the task did not get scheduled for more than 2 minutes,
230
* complain:
231
*/
232
if (sysctl_hung_task_warnings || hung_task_call_panic) {
233
if (sysctl_hung_task_warnings > 0)
234
sysctl_hung_task_warnings--;
235
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
236
t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
237
pr_err(" %s %s %.*s\n",
238
print_tainted(), init_utsname()->release,
239
(int)strcspn(init_utsname()->version, " "),
240
init_utsname()->version);
241
if (t->flags & PF_POSTCOREDUMP)
242
pr_err(" Blocked by coredump.\n");
243
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
244
" disables this message.\n");
245
sched_show_task(t);
246
debug_show_blocker(t);
247
hung_task_show_lock = true;
248
249
if (sysctl_hung_task_all_cpu_backtrace)
250
hung_task_show_all_bt = true;
251
if (!sysctl_hung_task_warnings)
252
pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n");
253
}
254
255
touch_nmi_watchdog();
256
}
257
258
/*
259
* To avoid extending the RCU grace period for an unbounded amount of time,
260
* periodically exit the critical section and enter a new one.
261
*
262
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
263
* to exit the grace period. For classic RCU, a reschedule is required.
264
*/
265
static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
266
{
267
bool can_cont;
268
269
get_task_struct(g);
270
get_task_struct(t);
271
rcu_read_unlock();
272
cond_resched();
273
rcu_read_lock();
274
can_cont = pid_alive(g) && pid_alive(t);
275
put_task_struct(t);
276
put_task_struct(g);
277
278
return can_cont;
279
}
280
281
/*
282
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
283
* a really long time (120 seconds). If that happens, print out
284
* a warning.
285
*/
286
static void check_hung_uninterruptible_tasks(unsigned long timeout)
287
{
288
int max_count = sysctl_hung_task_check_count;
289
unsigned long last_break = jiffies;
290
struct task_struct *g, *t;
291
292
/*
293
* If the system crashed already then all bets are off,
294
* do not report extra hung tasks:
295
*/
296
if (test_taint(TAINT_DIE) || did_panic)
297
return;
298
299
hung_task_show_lock = false;
300
rcu_read_lock();
301
for_each_process_thread(g, t) {
302
unsigned int state;
303
304
if (!max_count--)
305
goto unlock;
306
if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
307
if (!rcu_lock_break(g, t))
308
goto unlock;
309
last_break = jiffies;
310
}
311
/*
312
* skip the TASK_KILLABLE tasks -- these can be killed
313
* skip the TASK_IDLE tasks -- those are genuinely idle
314
*/
315
state = READ_ONCE(t->__state);
316
if ((state & TASK_UNINTERRUPTIBLE) &&
317
!(state & TASK_WAKEKILL) &&
318
!(state & TASK_NOLOAD))
319
check_hung_task(t, timeout);
320
}
321
unlock:
322
rcu_read_unlock();
323
if (hung_task_show_lock)
324
debug_show_all_locks();
325
326
if (hung_task_show_all_bt) {
327
hung_task_show_all_bt = false;
328
trigger_all_cpu_backtrace();
329
}
330
331
if (hung_task_call_panic)
332
panic("hung_task: blocked tasks");
333
}
334
335
static long hung_timeout_jiffies(unsigned long last_checked,
336
unsigned long timeout)
337
{
338
/* timeout of 0 will disable the watchdog */
339
return timeout ? last_checked - jiffies + timeout * HZ :
340
MAX_SCHEDULE_TIMEOUT;
341
}
342
343
#ifdef CONFIG_SYSCTL
344
/*
345
* Process updating of timeout sysctl
346
*/
347
static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write,
348
void *buffer,
349
size_t *lenp, loff_t *ppos)
350
{
351
int ret;
352
353
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
354
355
if (ret || !write)
356
goto out;
357
358
wake_up_process(watchdog_task);
359
360
out:
361
return ret;
362
}
363
364
/*
365
* This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
366
* and hung_task_check_interval_secs
367
*/
368
static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
369
static const struct ctl_table hung_task_sysctls[] = {
370
#ifdef CONFIG_SMP
371
{
372
.procname = "hung_task_all_cpu_backtrace",
373
.data = &sysctl_hung_task_all_cpu_backtrace,
374
.maxlen = sizeof(int),
375
.mode = 0644,
376
.proc_handler = proc_dointvec_minmax,
377
.extra1 = SYSCTL_ZERO,
378
.extra2 = SYSCTL_ONE,
379
},
380
#endif /* CONFIG_SMP */
381
{
382
.procname = "hung_task_panic",
383
.data = &sysctl_hung_task_panic,
384
.maxlen = sizeof(int),
385
.mode = 0644,
386
.proc_handler = proc_dointvec_minmax,
387
.extra1 = SYSCTL_ZERO,
388
.extra2 = SYSCTL_ONE,
389
},
390
{
391
.procname = "hung_task_check_count",
392
.data = &sysctl_hung_task_check_count,
393
.maxlen = sizeof(int),
394
.mode = 0644,
395
.proc_handler = proc_dointvec_minmax,
396
.extra1 = SYSCTL_ZERO,
397
},
398
{
399
.procname = "hung_task_timeout_secs",
400
.data = &sysctl_hung_task_timeout_secs,
401
.maxlen = sizeof(unsigned long),
402
.mode = 0644,
403
.proc_handler = proc_dohung_task_timeout_secs,
404
.extra2 = (void *)&hung_task_timeout_max,
405
},
406
{
407
.procname = "hung_task_check_interval_secs",
408
.data = &sysctl_hung_task_check_interval_secs,
409
.maxlen = sizeof(unsigned long),
410
.mode = 0644,
411
.proc_handler = proc_dohung_task_timeout_secs,
412
.extra2 = (void *)&hung_task_timeout_max,
413
},
414
{
415
.procname = "hung_task_warnings",
416
.data = &sysctl_hung_task_warnings,
417
.maxlen = sizeof(int),
418
.mode = 0644,
419
.proc_handler = proc_dointvec_minmax,
420
.extra1 = SYSCTL_NEG_ONE,
421
},
422
{
423
.procname = "hung_task_detect_count",
424
.data = &sysctl_hung_task_detect_count,
425
.maxlen = sizeof(unsigned long),
426
.mode = 0444,
427
.proc_handler = proc_doulongvec_minmax,
428
},
429
};
430
431
static void __init hung_task_sysctl_init(void)
432
{
433
register_sysctl_init("kernel", hung_task_sysctls);
434
}
435
#else
436
#define hung_task_sysctl_init() do { } while (0)
437
#endif /* CONFIG_SYSCTL */
438
439
440
static atomic_t reset_hung_task = ATOMIC_INIT(0);
441
442
void reset_hung_task_detector(void)
443
{
444
atomic_set(&reset_hung_task, 1);
445
}
446
EXPORT_SYMBOL_GPL(reset_hung_task_detector);
447
448
static bool hung_detector_suspended;
449
450
static int hungtask_pm_notify(struct notifier_block *self,
451
unsigned long action, void *hcpu)
452
{
453
switch (action) {
454
case PM_SUSPEND_PREPARE:
455
case PM_HIBERNATION_PREPARE:
456
case PM_RESTORE_PREPARE:
457
hung_detector_suspended = true;
458
break;
459
case PM_POST_SUSPEND:
460
case PM_POST_HIBERNATION:
461
case PM_POST_RESTORE:
462
hung_detector_suspended = false;
463
break;
464
default:
465
break;
466
}
467
return NOTIFY_OK;
468
}
469
470
/*
471
* kthread which checks for tasks stuck in D state
472
*/
473
static int watchdog(void *dummy)
474
{
475
unsigned long hung_last_checked = jiffies;
476
477
set_user_nice(current, 0);
478
479
for ( ; ; ) {
480
unsigned long timeout = sysctl_hung_task_timeout_secs;
481
unsigned long interval = sysctl_hung_task_check_interval_secs;
482
long t;
483
484
if (interval == 0)
485
interval = timeout;
486
interval = min_t(unsigned long, interval, timeout);
487
t = hung_timeout_jiffies(hung_last_checked, interval);
488
if (t <= 0) {
489
if (!atomic_xchg(&reset_hung_task, 0) &&
490
!hung_detector_suspended)
491
check_hung_uninterruptible_tasks(timeout);
492
hung_last_checked = jiffies;
493
continue;
494
}
495
schedule_timeout_interruptible(t);
496
}
497
498
return 0;
499
}
500
501
static int __init hung_task_init(void)
502
{
503
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
504
505
/* Disable hung task detector on suspend */
506
pm_notifier(hungtask_pm_notify, 0);
507
508
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
509
hung_task_sysctl_init();
510
511
return 0;
512
}
513
subsys_initcall(hung_task_init);
514
515