Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/locking/rtmutex.c
25923 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* RT-Mutexes: simple blocking mutual exclusion locks with PI support
4
*
5
* started by Ingo Molnar and Thomas Gleixner.
6
*
7
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <[email protected]>
8
* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <[email protected]>
9
* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10
* Copyright (C) 2006 Esben Nielsen
11
* Adaptive Spinlocks:
12
* Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13
* and Peter Morreale,
14
* Adaptive Spinlocks simplification:
15
* Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <[email protected]>
16
*
17
* See Documentation/locking/rt-mutex-design.rst for details.
18
*/
19
#include <linux/sched.h>
20
#include <linux/sched/debug.h>
21
#include <linux/sched/deadline.h>
22
#include <linux/sched/signal.h>
23
#include <linux/sched/rt.h>
24
#include <linux/sched/wake_q.h>
25
#include <linux/ww_mutex.h>
26
27
#include <trace/events/lock.h>
28
29
#include "rtmutex_common.h"
30
#include "lock_events.h"
31
32
#ifndef WW_RT
33
# define build_ww_mutex() (false)
34
# define ww_container_of(rtm) NULL
35
36
static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
37
struct rt_mutex *lock,
38
struct ww_acquire_ctx *ww_ctx,
39
struct wake_q_head *wake_q)
40
{
41
return 0;
42
}
43
44
static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
45
struct ww_acquire_ctx *ww_ctx,
46
struct wake_q_head *wake_q)
47
{
48
}
49
50
static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
51
struct ww_acquire_ctx *ww_ctx)
52
{
53
}
54
55
static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
56
struct rt_mutex_waiter *waiter,
57
struct ww_acquire_ctx *ww_ctx)
58
{
59
return 0;
60
}
61
62
#else
63
# define build_ww_mutex() (true)
64
# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
65
# include "ww_mutex.h"
66
#endif
67
68
/*
69
* lock->owner state tracking:
70
*
71
* lock->owner holds the task_struct pointer of the owner. Bit 0
72
* is used to keep track of the "lock has waiters" state.
73
*
74
* owner bit0
75
* NULL 0 lock is free (fast acquire possible)
76
* NULL 1 lock is free and has waiters and the top waiter
77
* is going to take the lock*
78
* taskpointer 0 lock is held (fast release possible)
79
* taskpointer 1 lock is held and has waiters**
80
*
81
* The fast atomic compare exchange based acquire and release is only
82
* possible when bit 0 of lock->owner is 0.
83
*
84
* (*) It also can be a transitional state when grabbing the lock
85
* with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
86
* we need to set the bit0 before looking at the lock, and the owner may be
87
* NULL in this small time, hence this can be a transitional state.
88
*
89
* (**) There is a small time when bit 0 is set but there are no
90
* waiters. This can happen when grabbing the lock in the slow path.
91
* To prevent a cmpxchg of the owner releasing the lock, we need to
92
* set this bit before looking at the lock.
93
*/
94
95
static __always_inline struct task_struct *
96
rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
97
{
98
unsigned long val = (unsigned long)owner;
99
100
if (rt_mutex_has_waiters(lock))
101
val |= RT_MUTEX_HAS_WAITERS;
102
103
return (struct task_struct *)val;
104
}
105
106
static __always_inline void
107
rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
108
{
109
/*
110
* lock->wait_lock is held but explicit acquire semantics are needed
111
* for a new lock owner so WRITE_ONCE is insufficient.
112
*/
113
xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
114
}
115
116
static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
117
{
118
/* lock->wait_lock is held so the unlock provides release semantics. */
119
WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
120
}
121
122
static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
123
{
124
lock->owner = (struct task_struct *)
125
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
126
}
127
128
static __always_inline void
129
fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
130
{
131
unsigned long owner, *p = (unsigned long *) &lock->owner;
132
133
if (rt_mutex_has_waiters(lock))
134
return;
135
136
/*
137
* The rbtree has no waiters enqueued, now make sure that the
138
* lock->owner still has the waiters bit set, otherwise the
139
* following can happen:
140
*
141
* CPU 0 CPU 1 CPU2
142
* l->owner=T1
143
* rt_mutex_lock(l)
144
* lock(l->lock)
145
* l->owner = T1 | HAS_WAITERS;
146
* enqueue(T2)
147
* boost()
148
* unlock(l->lock)
149
* block()
150
*
151
* rt_mutex_lock(l)
152
* lock(l->lock)
153
* l->owner = T1 | HAS_WAITERS;
154
* enqueue(T3)
155
* boost()
156
* unlock(l->lock)
157
* block()
158
* signal(->T2) signal(->T3)
159
* lock(l->lock)
160
* dequeue(T2)
161
* deboost()
162
* unlock(l->lock)
163
* lock(l->lock)
164
* dequeue(T3)
165
* ==> wait list is empty
166
* deboost()
167
* unlock(l->lock)
168
* lock(l->lock)
169
* fixup_rt_mutex_waiters()
170
* if (wait_list_empty(l) {
171
* l->owner = owner
172
* owner = l->owner & ~HAS_WAITERS;
173
* ==> l->owner = T1
174
* }
175
* lock(l->lock)
176
* rt_mutex_unlock(l) fixup_rt_mutex_waiters()
177
* if (wait_list_empty(l) {
178
* owner = l->owner & ~HAS_WAITERS;
179
* cmpxchg(l->owner, T1, NULL)
180
* ===> Success (l->owner = NULL)
181
*
182
* l->owner = owner
183
* ==> l->owner = T1
184
* }
185
*
186
* With the check for the waiter bit in place T3 on CPU2 will not
187
* overwrite. All tasks fiddling with the waiters bit are
188
* serialized by l->lock, so nothing else can modify the waiters
189
* bit. If the bit is set then nothing can change l->owner either
190
* so the simple RMW is safe. The cmpxchg() will simply fail if it
191
* happens in the middle of the RMW because the waiters bit is
192
* still set.
193
*/
194
owner = READ_ONCE(*p);
195
if (owner & RT_MUTEX_HAS_WAITERS) {
196
/*
197
* See rt_mutex_set_owner() and rt_mutex_clear_owner() on
198
* why xchg_acquire() is used for updating owner for
199
* locking and WRITE_ONCE() for unlocking.
200
*
201
* WRITE_ONCE() would work for the acquire case too, but
202
* in case that the lock acquisition failed it might
203
* force other lockers into the slow path unnecessarily.
204
*/
205
if (acquire_lock)
206
xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
207
else
208
WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
209
}
210
}
211
212
/*
213
* We can speed up the acquire/release, if there's no debugging state to be
214
* set up.
215
*/
216
#ifndef CONFIG_DEBUG_RT_MUTEXES
217
static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
218
struct task_struct *old,
219
struct task_struct *new)
220
{
221
return try_cmpxchg_acquire(&lock->owner, &old, new);
222
}
223
224
static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
225
{
226
return rt_mutex_cmpxchg_acquire(lock, NULL, current);
227
}
228
229
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
230
struct task_struct *old,
231
struct task_struct *new)
232
{
233
return try_cmpxchg_release(&lock->owner, &old, new);
234
}
235
236
/*
237
* Callers must hold the ->wait_lock -- which is the whole purpose as we force
238
* all future threads that attempt to [Rmw] the lock to the slowpath. As such
239
* relaxed semantics suffice.
240
*/
241
static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
242
{
243
unsigned long *p = (unsigned long *) &lock->owner;
244
unsigned long owner, new;
245
246
owner = READ_ONCE(*p);
247
do {
248
new = owner | RT_MUTEX_HAS_WAITERS;
249
} while (!try_cmpxchg_relaxed(p, &owner, new));
250
251
/*
252
* The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
253
* operations in the event of contention. Ensure the successful
254
* cmpxchg is visible.
255
*/
256
smp_mb__after_atomic();
257
}
258
259
/*
260
* Safe fastpath aware unlock:
261
* 1) Clear the waiters bit
262
* 2) Drop lock->wait_lock
263
* 3) Try to unlock the lock with cmpxchg
264
*/
265
static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
266
unsigned long flags)
267
__releases(lock->wait_lock)
268
{
269
struct task_struct *owner = rt_mutex_owner(lock);
270
271
clear_rt_mutex_waiters(lock);
272
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
273
/*
274
* If a new waiter comes in between the unlock and the cmpxchg
275
* we have two situations:
276
*
277
* unlock(wait_lock);
278
* lock(wait_lock);
279
* cmpxchg(p, owner, 0) == owner
280
* mark_rt_mutex_waiters(lock);
281
* acquire(lock);
282
* or:
283
*
284
* unlock(wait_lock);
285
* lock(wait_lock);
286
* mark_rt_mutex_waiters(lock);
287
*
288
* cmpxchg(p, owner, 0) != owner
289
* enqueue_waiter();
290
* unlock(wait_lock);
291
* lock(wait_lock);
292
* wake waiter();
293
* unlock(wait_lock);
294
* lock(wait_lock);
295
* acquire(lock);
296
*/
297
return rt_mutex_cmpxchg_release(lock, owner, NULL);
298
}
299
300
#else
301
static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
302
struct task_struct *old,
303
struct task_struct *new)
304
{
305
return false;
306
307
}
308
309
static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);
310
311
static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
312
{
313
/*
314
* With debug enabled rt_mutex_cmpxchg trylock() will always fail.
315
*
316
* Avoid unconditionally taking the slow path by using
317
* rt_mutex_slow_trylock() which is covered by the debug code and can
318
* acquire a non-contended rtmutex.
319
*/
320
return rt_mutex_slowtrylock(lock);
321
}
322
323
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
324
struct task_struct *old,
325
struct task_struct *new)
326
{
327
return false;
328
}
329
330
static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
331
{
332
lock->owner = (struct task_struct *)
333
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
334
}
335
336
/*
337
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
338
*/
339
static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
340
unsigned long flags)
341
__releases(lock->wait_lock)
342
{
343
lock->owner = NULL;
344
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
345
return true;
346
}
347
#endif
348
349
static __always_inline int __waiter_prio(struct task_struct *task)
350
{
351
int prio = task->prio;
352
353
if (!rt_or_dl_prio(prio))
354
return DEFAULT_PRIO;
355
356
return prio;
357
}
358
359
/*
360
* Update the waiter->tree copy of the sort keys.
361
*/
362
static __always_inline void
363
waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
364
{
365
lockdep_assert_held(&waiter->lock->wait_lock);
366
lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
367
368
waiter->tree.prio = __waiter_prio(task);
369
waiter->tree.deadline = task->dl.deadline;
370
}
371
372
/*
373
* Update the waiter->pi_tree copy of the sort keys (from the tree copy).
374
*/
375
static __always_inline void
376
waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
377
{
378
lockdep_assert_held(&waiter->lock->wait_lock);
379
lockdep_assert_held(&task->pi_lock);
380
lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
381
382
waiter->pi_tree.prio = waiter->tree.prio;
383
waiter->pi_tree.deadline = waiter->tree.deadline;
384
}
385
386
/*
387
* Only use with rt_waiter_node_{less,equal}()
388
*/
389
#define task_to_waiter_node(p) \
390
&(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
391
#define task_to_waiter(p) \
392
&(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
393
394
static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
395
struct rt_waiter_node *right)
396
{
397
if (left->prio < right->prio)
398
return 1;
399
400
/*
401
* If both waiters have dl_prio(), we check the deadlines of the
402
* associated tasks.
403
* If left waiter has a dl_prio(), and we didn't return 1 above,
404
* then right waiter has a dl_prio() too.
405
*/
406
if (dl_prio(left->prio))
407
return dl_time_before(left->deadline, right->deadline);
408
409
return 0;
410
}
411
412
static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
413
struct rt_waiter_node *right)
414
{
415
if (left->prio != right->prio)
416
return 0;
417
418
/*
419
* If both waiters have dl_prio(), we check the deadlines of the
420
* associated tasks.
421
* If left waiter has a dl_prio(), and we didn't return 0 above,
422
* then right waiter has a dl_prio() too.
423
*/
424
if (dl_prio(left->prio))
425
return left->deadline == right->deadline;
426
427
return 1;
428
}
429
430
static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
431
struct rt_mutex_waiter *top_waiter)
432
{
433
if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
434
return true;
435
436
#ifdef RT_MUTEX_BUILD_SPINLOCKS
437
/*
438
* Note that RT tasks are excluded from same priority (lateral)
439
* steals to prevent the introduction of an unbounded latency.
440
*/
441
if (rt_or_dl_prio(waiter->tree.prio))
442
return false;
443
444
return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
445
#else
446
return false;
447
#endif
448
}
449
450
#define __node_2_waiter(node) \
451
rb_entry((node), struct rt_mutex_waiter, tree.entry)
452
453
static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
454
{
455
struct rt_mutex_waiter *aw = __node_2_waiter(a);
456
struct rt_mutex_waiter *bw = __node_2_waiter(b);
457
458
if (rt_waiter_node_less(&aw->tree, &bw->tree))
459
return 1;
460
461
if (!build_ww_mutex())
462
return 0;
463
464
if (rt_waiter_node_less(&bw->tree, &aw->tree))
465
return 0;
466
467
/* NOTE: relies on waiter->ww_ctx being set before insertion */
468
if (aw->ww_ctx) {
469
if (!bw->ww_ctx)
470
return 1;
471
472
return (signed long)(aw->ww_ctx->stamp -
473
bw->ww_ctx->stamp) < 0;
474
}
475
476
return 0;
477
}
478
479
static __always_inline void
480
rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
481
{
482
lockdep_assert_held(&lock->wait_lock);
483
484
rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
485
}
486
487
static __always_inline void
488
rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
489
{
490
lockdep_assert_held(&lock->wait_lock);
491
492
if (RB_EMPTY_NODE(&waiter->tree.entry))
493
return;
494
495
rb_erase_cached(&waiter->tree.entry, &lock->waiters);
496
RB_CLEAR_NODE(&waiter->tree.entry);
497
}
498
499
#define __node_2_rt_node(node) \
500
rb_entry((node), struct rt_waiter_node, entry)
501
502
static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
503
{
504
return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
505
}
506
507
static __always_inline void
508
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
509
{
510
lockdep_assert_held(&task->pi_lock);
511
512
rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
513
}
514
515
static __always_inline void
516
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
517
{
518
lockdep_assert_held(&task->pi_lock);
519
520
if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
521
return;
522
523
rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
524
RB_CLEAR_NODE(&waiter->pi_tree.entry);
525
}
526
527
static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
528
struct task_struct *p)
529
{
530
struct task_struct *pi_task = NULL;
531
532
lockdep_assert_held(&lock->wait_lock);
533
lockdep_assert(rt_mutex_owner(lock) == p);
534
lockdep_assert_held(&p->pi_lock);
535
536
if (task_has_pi_waiters(p))
537
pi_task = task_top_pi_waiter(p)->task;
538
539
rt_mutex_setprio(p, pi_task);
540
}
541
542
/* RT mutex specific wake_q wrappers */
543
static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
544
struct task_struct *task,
545
unsigned int wake_state)
546
{
547
if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
548
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
549
WARN_ON_ONCE(wqh->rtlock_task);
550
get_task_struct(task);
551
wqh->rtlock_task = task;
552
} else {
553
wake_q_add(&wqh->head, task);
554
}
555
}
556
557
static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
558
struct rt_mutex_waiter *w)
559
{
560
rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
561
}
562
563
static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
564
{
565
if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
566
wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
567
put_task_struct(wqh->rtlock_task);
568
wqh->rtlock_task = NULL;
569
}
570
571
if (!wake_q_empty(&wqh->head))
572
wake_up_q(&wqh->head);
573
574
/* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
575
preempt_enable();
576
}
577
578
/*
579
* Deadlock detection is conditional:
580
*
581
* If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
582
* if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
583
*
584
* If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
585
* conducted independent of the detect argument.
586
*
587
* If the waiter argument is NULL this indicates the deboost path and
588
* deadlock detection is disabled independent of the detect argument
589
* and the config settings.
590
*/
591
static __always_inline bool
592
rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
593
enum rtmutex_chainwalk chwalk)
594
{
595
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
596
return waiter != NULL;
597
return chwalk == RT_MUTEX_FULL_CHAINWALK;
598
}
599
600
static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
601
{
602
return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
603
}
604
605
/*
606
* Adjust the priority chain. Also used for deadlock detection.
607
* Decreases task's usage by one - may thus free the task.
608
*
609
* @task: the task owning the mutex (owner) for which a chain walk is
610
* probably needed
611
* @chwalk: do we have to carry out deadlock detection?
612
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
613
* things for a task that has just got its priority adjusted, and
614
* is waiting on a mutex)
615
* @next_lock: the mutex on which the owner of @orig_lock was blocked before
616
* we dropped its pi_lock. Is never dereferenced, only used for
617
* comparison to detect lock chain changes.
618
* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
619
* its priority to the mutex owner (can be NULL in the case
620
* depicted above or if the top waiter is gone away and we are
621
* actually deboosting the owner)
622
* @top_task: the current top waiter
623
*
624
* Returns 0 or -EDEADLK.
625
*
626
* Chain walk basics and protection scope
627
*
628
* [R] refcount on task
629
* [Pn] task->pi_lock held
630
* [L] rtmutex->wait_lock held
631
*
632
* Normal locking order:
633
*
634
* rtmutex->wait_lock
635
* task->pi_lock
636
*
637
* Step Description Protected by
638
* function arguments:
639
* @task [R]
640
* @orig_lock if != NULL @top_task is blocked on it
641
* @next_lock Unprotected. Cannot be
642
* dereferenced. Only used for
643
* comparison.
644
* @orig_waiter if != NULL @top_task is blocked on it
645
* @top_task current, or in case of proxy
646
* locking protected by calling
647
* code
648
* again:
649
* loop_sanity_check();
650
* retry:
651
* [1] lock(task->pi_lock); [R] acquire [P1]
652
* [2] waiter = task->pi_blocked_on; [P1]
653
* [3] check_exit_conditions_1(); [P1]
654
* [4] lock = waiter->lock; [P1]
655
* [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]
656
* unlock(task->pi_lock); release [P1]
657
* goto retry;
658
* }
659
* [6] check_exit_conditions_2(); [P1] + [L]
660
* [7] requeue_lock_waiter(lock, waiter); [P1] + [L]
661
* [8] unlock(task->pi_lock); release [P1]
662
* put_task_struct(task); release [R]
663
* [9] check_exit_conditions_3(); [L]
664
* [10] task = owner(lock); [L]
665
* get_task_struct(task); [L] acquire [R]
666
* lock(task->pi_lock); [L] acquire [P2]
667
* [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
668
* [12] check_exit_conditions_4(); [P2] + [L]
669
* [13] unlock(task->pi_lock); release [P2]
670
* unlock(lock->wait_lock); release [L]
671
* goto again;
672
*
673
* Where P1 is the blocking task and P2 is the lock owner; going up one step
674
* the owner becomes the next blocked task etc..
675
*
676
*
677
*/
678
static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
679
enum rtmutex_chainwalk chwalk,
680
struct rt_mutex_base *orig_lock,
681
struct rt_mutex_base *next_lock,
682
struct rt_mutex_waiter *orig_waiter,
683
struct task_struct *top_task)
684
{
685
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
686
struct rt_mutex_waiter *prerequeue_top_waiter;
687
int ret = 0, depth = 0;
688
struct rt_mutex_base *lock;
689
bool detect_deadlock;
690
bool requeue = true;
691
692
detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
693
694
/*
695
* The (de)boosting is a step by step approach with a lot of
696
* pitfalls. We want this to be preemptible and we want hold a
697
* maximum of two locks per step. So we have to check
698
* carefully whether things change under us.
699
*/
700
again:
701
/*
702
* We limit the lock chain length for each invocation.
703
*/
704
if (++depth > max_lock_depth) {
705
static int prev_max;
706
707
/*
708
* Print this only once. If the admin changes the limit,
709
* print a new message when reaching the limit again.
710
*/
711
if (prev_max != max_lock_depth) {
712
prev_max = max_lock_depth;
713
printk(KERN_WARNING "Maximum lock depth %d reached "
714
"task: %s (%d)\n", max_lock_depth,
715
top_task->comm, task_pid_nr(top_task));
716
}
717
put_task_struct(task);
718
719
return -EDEADLK;
720
}
721
722
/*
723
* We are fully preemptible here and only hold the refcount on
724
* @task. So everything can have changed under us since the
725
* caller or our own code below (goto retry/again) dropped all
726
* locks.
727
*/
728
retry:
729
/*
730
* [1] Task cannot go away as we did a get_task() before !
731
*/
732
raw_spin_lock_irq(&task->pi_lock);
733
734
/*
735
* [2] Get the waiter on which @task is blocked on.
736
*/
737
waiter = task->pi_blocked_on;
738
739
/*
740
* [3] check_exit_conditions_1() protected by task->pi_lock.
741
*/
742
743
/*
744
* Check whether the end of the boosting chain has been
745
* reached or the state of the chain has changed while we
746
* dropped the locks.
747
*/
748
if (!waiter)
749
goto out_unlock_pi;
750
751
/*
752
* Check the orig_waiter state. After we dropped the locks,
753
* the previous owner of the lock might have released the lock.
754
*/
755
if (orig_waiter && !rt_mutex_owner(orig_lock))
756
goto out_unlock_pi;
757
758
/*
759
* We dropped all locks after taking a refcount on @task, so
760
* the task might have moved on in the lock chain or even left
761
* the chain completely and blocks now on an unrelated lock or
762
* on @orig_lock.
763
*
764
* We stored the lock on which @task was blocked in @next_lock,
765
* so we can detect the chain change.
766
*/
767
if (next_lock != waiter->lock)
768
goto out_unlock_pi;
769
770
/*
771
* There could be 'spurious' loops in the lock graph due to ww_mutex,
772
* consider:
773
*
774
* P1: A, ww_A, ww_B
775
* P2: ww_B, ww_A
776
* P3: A
777
*
778
* P3 should not return -EDEADLK because it gets trapped in the cycle
779
* created by P1 and P2 (which will resolve -- and runs into
780
* max_lock_depth above). Therefore disable detect_deadlock such that
781
* the below termination condition can trigger once all relevant tasks
782
* are boosted.
783
*
784
* Even when we start with ww_mutex we can disable deadlock detection,
785
* since we would supress a ww_mutex induced deadlock at [6] anyway.
786
* Supressing it here however is not sufficient since we might still
787
* hit [6] due to adjustment driven iteration.
788
*
789
* NOTE: if someone were to create a deadlock between 2 ww_classes we'd
790
* utterly fail to report it; lockdep should.
791
*/
792
if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
793
detect_deadlock = false;
794
795
/*
796
* Drop out, when the task has no waiters. Note,
797
* top_waiter can be NULL, when we are in the deboosting
798
* mode!
799
*/
800
if (top_waiter) {
801
if (!task_has_pi_waiters(task))
802
goto out_unlock_pi;
803
/*
804
* If deadlock detection is off, we stop here if we
805
* are not the top pi waiter of the task. If deadlock
806
* detection is enabled we continue, but stop the
807
* requeueing in the chain walk.
808
*/
809
if (top_waiter != task_top_pi_waiter(task)) {
810
if (!detect_deadlock)
811
goto out_unlock_pi;
812
else
813
requeue = false;
814
}
815
}
816
817
/*
818
* If the waiter priority is the same as the task priority
819
* then there is no further priority adjustment necessary. If
820
* deadlock detection is off, we stop the chain walk. If its
821
* enabled we continue, but stop the requeueing in the chain
822
* walk.
823
*/
824
if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
825
if (!detect_deadlock)
826
goto out_unlock_pi;
827
else
828
requeue = false;
829
}
830
831
/*
832
* [4] Get the next lock; per holding task->pi_lock we can't unblock
833
* and guarantee @lock's existence.
834
*/
835
lock = waiter->lock;
836
/*
837
* [5] We need to trylock here as we are holding task->pi_lock,
838
* which is the reverse lock order versus the other rtmutex
839
* operations.
840
*
841
* Per the above, holding task->pi_lock guarantees lock exists, so
842
* inverting this lock order is infeasible from a life-time
843
* perspective.
844
*/
845
if (!raw_spin_trylock(&lock->wait_lock)) {
846
raw_spin_unlock_irq(&task->pi_lock);
847
cpu_relax();
848
goto retry;
849
}
850
851
/*
852
* [6] check_exit_conditions_2() protected by task->pi_lock and
853
* lock->wait_lock.
854
*
855
* Deadlock detection. If the lock is the same as the original
856
* lock which caused us to walk the lock chain or if the
857
* current lock is owned by the task which initiated the chain
858
* walk, we detected a deadlock.
859
*/
860
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
861
ret = -EDEADLK;
862
863
/*
864
* When the deadlock is due to ww_mutex; also see above. Don't
865
* report the deadlock and instead let the ww_mutex wound/die
866
* logic pick which of the contending threads gets -EDEADLK.
867
*
868
* NOTE: assumes the cycle only contains a single ww_class; any
869
* other configuration and we fail to report; also, see
870
* lockdep.
871
*/
872
if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
873
ret = 0;
874
875
raw_spin_unlock(&lock->wait_lock);
876
goto out_unlock_pi;
877
}
878
879
/*
880
* If we just follow the lock chain for deadlock detection, no
881
* need to do all the requeue operations. To avoid a truckload
882
* of conditionals around the various places below, just do the
883
* minimum chain walk checks.
884
*/
885
if (!requeue) {
886
/*
887
* No requeue[7] here. Just release @task [8]
888
*/
889
raw_spin_unlock(&task->pi_lock);
890
put_task_struct(task);
891
892
/*
893
* [9] check_exit_conditions_3 protected by lock->wait_lock.
894
* If there is no owner of the lock, end of chain.
895
*/
896
if (!rt_mutex_owner(lock)) {
897
raw_spin_unlock_irq(&lock->wait_lock);
898
return 0;
899
}
900
901
/* [10] Grab the next task, i.e. owner of @lock */
902
task = get_task_struct(rt_mutex_owner(lock));
903
raw_spin_lock(&task->pi_lock);
904
905
/*
906
* No requeue [11] here. We just do deadlock detection.
907
*
908
* [12] Store whether owner is blocked
909
* itself. Decision is made after dropping the locks
910
*/
911
next_lock = task_blocked_on_lock(task);
912
/*
913
* Get the top waiter for the next iteration
914
*/
915
top_waiter = rt_mutex_top_waiter(lock);
916
917
/* [13] Drop locks */
918
raw_spin_unlock(&task->pi_lock);
919
raw_spin_unlock_irq(&lock->wait_lock);
920
921
/* If owner is not blocked, end of chain. */
922
if (!next_lock)
923
goto out_put_task;
924
goto again;
925
}
926
927
/*
928
* Store the current top waiter before doing the requeue
929
* operation on @lock. We need it for the boost/deboost
930
* decision below.
931
*/
932
prerequeue_top_waiter = rt_mutex_top_waiter(lock);
933
934
/* [7] Requeue the waiter in the lock waiter tree. */
935
rt_mutex_dequeue(lock, waiter);
936
937
/*
938
* Update the waiter prio fields now that we're dequeued.
939
*
940
* These values can have changed through either:
941
*
942
* sys_sched_set_scheduler() / sys_sched_setattr()
943
*
944
* or
945
*
946
* DL CBS enforcement advancing the effective deadline.
947
*/
948
waiter_update_prio(waiter, task);
949
950
rt_mutex_enqueue(lock, waiter);
951
952
/*
953
* [8] Release the (blocking) task in preparation for
954
* taking the owner task in [10].
955
*
956
* Since we hold lock->waiter_lock, task cannot unblock, even if we
957
* release task->pi_lock.
958
*/
959
raw_spin_unlock(&task->pi_lock);
960
put_task_struct(task);
961
962
/*
963
* [9] check_exit_conditions_3 protected by lock->wait_lock.
964
*
965
* We must abort the chain walk if there is no lock owner even
966
* in the dead lock detection case, as we have nothing to
967
* follow here. This is the end of the chain we are walking.
968
*/
969
if (!rt_mutex_owner(lock)) {
970
/*
971
* If the requeue [7] above changed the top waiter,
972
* then we need to wake the new top waiter up to try
973
* to get the lock.
974
*/
975
top_waiter = rt_mutex_top_waiter(lock);
976
if (prerequeue_top_waiter != top_waiter)
977
wake_up_state(top_waiter->task, top_waiter->wake_state);
978
raw_spin_unlock_irq(&lock->wait_lock);
979
return 0;
980
}
981
982
/*
983
* [10] Grab the next task, i.e. the owner of @lock
984
*
985
* Per holding lock->wait_lock and checking for !owner above, there
986
* must be an owner and it cannot go away.
987
*/
988
task = get_task_struct(rt_mutex_owner(lock));
989
raw_spin_lock(&task->pi_lock);
990
991
/* [11] requeue the pi waiters if necessary */
992
if (waiter == rt_mutex_top_waiter(lock)) {
993
/*
994
* The waiter became the new top (highest priority)
995
* waiter on the lock. Replace the previous top waiter
996
* in the owner tasks pi waiters tree with this waiter
997
* and adjust the priority of the owner.
998
*/
999
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
1000
waiter_clone_prio(waiter, task);
1001
rt_mutex_enqueue_pi(task, waiter);
1002
rt_mutex_adjust_prio(lock, task);
1003
1004
} else if (prerequeue_top_waiter == waiter) {
1005
/*
1006
* The waiter was the top waiter on the lock, but is
1007
* no longer the top priority waiter. Replace waiter in
1008
* the owner tasks pi waiters tree with the new top
1009
* (highest priority) waiter and adjust the priority
1010
* of the owner.
1011
* The new top waiter is stored in @waiter so that
1012
* @waiter == @top_waiter evaluates to true below and
1013
* we continue to deboost the rest of the chain.
1014
*/
1015
rt_mutex_dequeue_pi(task, waiter);
1016
waiter = rt_mutex_top_waiter(lock);
1017
waiter_clone_prio(waiter, task);
1018
rt_mutex_enqueue_pi(task, waiter);
1019
rt_mutex_adjust_prio(lock, task);
1020
} else {
1021
/*
1022
* Nothing changed. No need to do any priority
1023
* adjustment.
1024
*/
1025
}
1026
1027
/*
1028
* [12] check_exit_conditions_4() protected by task->pi_lock
1029
* and lock->wait_lock. The actual decisions are made after we
1030
* dropped the locks.
1031
*
1032
* Check whether the task which owns the current lock is pi
1033
* blocked itself. If yes we store a pointer to the lock for
1034
* the lock chain change detection above. After we dropped
1035
* task->pi_lock next_lock cannot be dereferenced anymore.
1036
*/
1037
next_lock = task_blocked_on_lock(task);
1038
/*
1039
* Store the top waiter of @lock for the end of chain walk
1040
* decision below.
1041
*/
1042
top_waiter = rt_mutex_top_waiter(lock);
1043
1044
/* [13] Drop the locks */
1045
raw_spin_unlock(&task->pi_lock);
1046
raw_spin_unlock_irq(&lock->wait_lock);
1047
1048
/*
1049
* Make the actual exit decisions [12], based on the stored
1050
* values.
1051
*
1052
* We reached the end of the lock chain. Stop right here. No
1053
* point to go back just to figure that out.
1054
*/
1055
if (!next_lock)
1056
goto out_put_task;
1057
1058
/*
1059
* If the current waiter is not the top waiter on the lock,
1060
* then we can stop the chain walk here if we are not in full
1061
* deadlock detection mode.
1062
*/
1063
if (!detect_deadlock && waiter != top_waiter)
1064
goto out_put_task;
1065
1066
goto again;
1067
1068
out_unlock_pi:
1069
raw_spin_unlock_irq(&task->pi_lock);
1070
out_put_task:
1071
put_task_struct(task);
1072
1073
return ret;
1074
}
1075
1076
/*
1077
* Try to take an rt-mutex
1078
*
1079
* Must be called with lock->wait_lock held and interrupts disabled
1080
*
1081
* @lock: The lock to be acquired.
1082
* @task: The task which wants to acquire the lock
1083
* @waiter: The waiter that is queued to the lock's wait tree if the
1084
* callsite called task_blocked_on_lock(), otherwise NULL
1085
*/
1086
static int __sched
1087
try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
1088
struct rt_mutex_waiter *waiter)
1089
{
1090
lockdep_assert_held(&lock->wait_lock);
1091
1092
/*
1093
* Before testing whether we can acquire @lock, we set the
1094
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
1095
* other tasks which try to modify @lock into the slow path
1096
* and they serialize on @lock->wait_lock.
1097
*
1098
* The RT_MUTEX_HAS_WAITERS bit can have a transitional state
1099
* as explained at the top of this file if and only if:
1100
*
1101
* - There is a lock owner. The caller must fixup the
1102
* transient state if it does a trylock or leaves the lock
1103
* function due to a signal or timeout.
1104
*
1105
* - @task acquires the lock and there are no other
1106
* waiters. This is undone in rt_mutex_set_owner(@task) at
1107
* the end of this function.
1108
*/
1109
mark_rt_mutex_waiters(lock);
1110
1111
/*
1112
* If @lock has an owner, give up.
1113
*/
1114
if (rt_mutex_owner(lock))
1115
return 0;
1116
1117
/*
1118
* If @waiter != NULL, @task has already enqueued the waiter
1119
* into @lock waiter tree. If @waiter == NULL then this is a
1120
* trylock attempt.
1121
*/
1122
if (waiter) {
1123
struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1124
1125
/*
1126
* If waiter is the highest priority waiter of @lock,
1127
* or allowed to steal it, take it over.
1128
*/
1129
if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
1130
/*
1131
* We can acquire the lock. Remove the waiter from the
1132
* lock waiters tree.
1133
*/
1134
rt_mutex_dequeue(lock, waiter);
1135
} else {
1136
return 0;
1137
}
1138
} else {
1139
/*
1140
* If the lock has waiters already we check whether @task is
1141
* eligible to take over the lock.
1142
*
1143
* If there are no other waiters, @task can acquire
1144
* the lock. @task->pi_blocked_on is NULL, so it does
1145
* not need to be dequeued.
1146
*/
1147
if (rt_mutex_has_waiters(lock)) {
1148
/* Check whether the trylock can steal it. */
1149
if (!rt_mutex_steal(task_to_waiter(task),
1150
rt_mutex_top_waiter(lock)))
1151
return 0;
1152
1153
/*
1154
* The current top waiter stays enqueued. We
1155
* don't have to change anything in the lock
1156
* waiters order.
1157
*/
1158
} else {
1159
/*
1160
* No waiters. Take the lock without the
1161
* pi_lock dance.@task->pi_blocked_on is NULL
1162
* and we have no waiters to enqueue in @task
1163
* pi waiters tree.
1164
*/
1165
goto takeit;
1166
}
1167
}
1168
1169
/*
1170
* Clear @task->pi_blocked_on. Requires protection by
1171
* @task->pi_lock. Redundant operation for the @waiter == NULL
1172
* case, but conditionals are more expensive than a redundant
1173
* store.
1174
*/
1175
raw_spin_lock(&task->pi_lock);
1176
task->pi_blocked_on = NULL;
1177
/*
1178
* Finish the lock acquisition. @task is the new owner. If
1179
* other waiters exist we have to insert the highest priority
1180
* waiter into @task->pi_waiters tree.
1181
*/
1182
if (rt_mutex_has_waiters(lock))
1183
rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1184
raw_spin_unlock(&task->pi_lock);
1185
1186
takeit:
1187
/*
1188
* This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1189
* are still waiters or clears it.
1190
*/
1191
rt_mutex_set_owner(lock, task);
1192
1193
return 1;
1194
}
1195
1196
/*
1197
* Task blocks on lock.
1198
*
1199
* Prepare waiter and propagate pi chain
1200
*
1201
* This must be called with lock->wait_lock held and interrupts disabled
1202
*/
1203
static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1204
struct rt_mutex_waiter *waiter,
1205
struct task_struct *task,
1206
struct ww_acquire_ctx *ww_ctx,
1207
enum rtmutex_chainwalk chwalk,
1208
struct wake_q_head *wake_q)
1209
{
1210
struct task_struct *owner = rt_mutex_owner(lock);
1211
struct rt_mutex_waiter *top_waiter = waiter;
1212
struct rt_mutex_base *next_lock;
1213
int chain_walk = 0, res;
1214
1215
lockdep_assert_held(&lock->wait_lock);
1216
1217
/*
1218
* Early deadlock detection. We really don't want the task to
1219
* enqueue on itself just to untangle the mess later. It's not
1220
* only an optimization. We drop the locks, so another waiter
1221
* can come in before the chain walk detects the deadlock. So
1222
* the other will detect the deadlock and return -EDEADLOCK,
1223
* which is wrong, as the other waiter is not in a deadlock
1224
* situation.
1225
*
1226
* Except for ww_mutex, in that case the chain walk must already deal
1227
* with spurious cycles, see the comments at [3] and [6].
1228
*/
1229
if (owner == task && !(build_ww_mutex() && ww_ctx))
1230
return -EDEADLK;
1231
1232
raw_spin_lock(&task->pi_lock);
1233
waiter->task = task;
1234
waiter->lock = lock;
1235
waiter_update_prio(waiter, task);
1236
waiter_clone_prio(waiter, task);
1237
1238
/* Get the top priority waiter on the lock */
1239
if (rt_mutex_has_waiters(lock))
1240
top_waiter = rt_mutex_top_waiter(lock);
1241
rt_mutex_enqueue(lock, waiter);
1242
1243
task->pi_blocked_on = waiter;
1244
1245
raw_spin_unlock(&task->pi_lock);
1246
1247
if (build_ww_mutex() && ww_ctx) {
1248
struct rt_mutex *rtm;
1249
1250
/* Check whether the waiter should back out immediately */
1251
rtm = container_of(lock, struct rt_mutex, rtmutex);
1252
res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
1253
if (res) {
1254
raw_spin_lock(&task->pi_lock);
1255
rt_mutex_dequeue(lock, waiter);
1256
task->pi_blocked_on = NULL;
1257
raw_spin_unlock(&task->pi_lock);
1258
return res;
1259
}
1260
}
1261
1262
if (!owner)
1263
return 0;
1264
1265
raw_spin_lock(&owner->pi_lock);
1266
if (waiter == rt_mutex_top_waiter(lock)) {
1267
rt_mutex_dequeue_pi(owner, top_waiter);
1268
rt_mutex_enqueue_pi(owner, waiter);
1269
1270
rt_mutex_adjust_prio(lock, owner);
1271
if (owner->pi_blocked_on)
1272
chain_walk = 1;
1273
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1274
chain_walk = 1;
1275
}
1276
1277
/* Store the lock on which owner is blocked or NULL */
1278
next_lock = task_blocked_on_lock(owner);
1279
1280
raw_spin_unlock(&owner->pi_lock);
1281
/*
1282
* Even if full deadlock detection is on, if the owner is not
1283
* blocked itself, we can avoid finding this out in the chain
1284
* walk.
1285
*/
1286
if (!chain_walk || !next_lock)
1287
return 0;
1288
1289
/*
1290
* The owner can't disappear while holding a lock,
1291
* so the owner struct is protected by wait_lock.
1292
* Gets dropped in rt_mutex_adjust_prio_chain()!
1293
*/
1294
get_task_struct(owner);
1295
1296
raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);
1297
1298
res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1299
next_lock, waiter, task);
1300
1301
raw_spin_lock_irq(&lock->wait_lock);
1302
1303
return res;
1304
}
1305
1306
/*
1307
* Remove the top waiter from the current tasks pi waiter tree and
1308
* queue it up.
1309
*
1310
* Called with lock->wait_lock held and interrupts disabled.
1311
*/
1312
static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1313
struct rt_mutex_base *lock)
1314
{
1315
struct rt_mutex_waiter *waiter;
1316
1317
lockdep_assert_held(&lock->wait_lock);
1318
1319
raw_spin_lock(&current->pi_lock);
1320
1321
waiter = rt_mutex_top_waiter(lock);
1322
1323
/*
1324
* Remove it from current->pi_waiters and deboost.
1325
*
1326
* We must in fact deboost here in order to ensure we call
1327
* rt_mutex_setprio() to update p->pi_top_task before the
1328
* task unblocks.
1329
*/
1330
rt_mutex_dequeue_pi(current, waiter);
1331
rt_mutex_adjust_prio(lock, current);
1332
1333
/*
1334
* As we are waking up the top waiter, and the waiter stays
1335
* queued on the lock until it gets the lock, this lock
1336
* obviously has waiters. Just set the bit here and this has
1337
* the added benefit of forcing all new tasks into the
1338
* slow path making sure no task of lower priority than
1339
* the top waiter can steal this lock.
1340
*/
1341
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1342
1343
/*
1344
* We deboosted before waking the top waiter task such that we don't
1345
* run two tasks with the 'same' priority (and ensure the
1346
* p->pi_top_task pointer points to a blocked task). This however can
1347
* lead to priority inversion if we would get preempted after the
1348
* deboost but before waking our donor task, hence the preempt_disable()
1349
* before unlock.
1350
*
1351
* Pairs with preempt_enable() in rt_mutex_wake_up_q();
1352
*/
1353
preempt_disable();
1354
rt_mutex_wake_q_add(wqh, waiter);
1355
raw_spin_unlock(&current->pi_lock);
1356
}
1357
1358
static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1359
{
1360
int ret = try_to_take_rt_mutex(lock, current, NULL);
1361
1362
/*
1363
* try_to_take_rt_mutex() sets the lock waiters bit
1364
* unconditionally. Clean this up.
1365
*/
1366
fixup_rt_mutex_waiters(lock, true);
1367
1368
return ret;
1369
}
1370
1371
/*
1372
* Slow path try-lock function:
1373
*/
1374
static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1375
{
1376
unsigned long flags;
1377
int ret;
1378
1379
/*
1380
* If the lock already has an owner we fail to get the lock.
1381
* This can be done without taking the @lock->wait_lock as
1382
* it is only being read, and this is a trylock anyway.
1383
*/
1384
if (rt_mutex_owner(lock))
1385
return 0;
1386
1387
/*
1388
* The mutex has currently no owner. Lock the wait lock and try to
1389
* acquire the lock. We use irqsave here to support early boot calls.
1390
*/
1391
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1392
1393
ret = __rt_mutex_slowtrylock(lock);
1394
1395
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1396
1397
return ret;
1398
}
1399
1400
static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1401
{
1402
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1403
return 1;
1404
1405
return rt_mutex_slowtrylock(lock);
1406
}
1407
1408
/*
1409
* Slow path to release a rt-mutex.
1410
*/
1411
static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1412
{
1413
DEFINE_RT_WAKE_Q(wqh);
1414
unsigned long flags;
1415
1416
/* irqsave required to support early boot calls */
1417
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1418
1419
debug_rt_mutex_unlock(lock);
1420
1421
/*
1422
* We must be careful here if the fast path is enabled. If we
1423
* have no waiters queued we cannot set owner to NULL here
1424
* because of:
1425
*
1426
* foo->lock->owner = NULL;
1427
* rtmutex_lock(foo->lock); <- fast path
1428
* free = atomic_dec_and_test(foo->refcnt);
1429
* rtmutex_unlock(foo->lock); <- fast path
1430
* if (free)
1431
* kfree(foo);
1432
* raw_spin_unlock(foo->lock->wait_lock);
1433
*
1434
* So for the fastpath enabled kernel:
1435
*
1436
* Nothing can set the waiters bit as long as we hold
1437
* lock->wait_lock. So we do the following sequence:
1438
*
1439
* owner = rt_mutex_owner(lock);
1440
* clear_rt_mutex_waiters(lock);
1441
* raw_spin_unlock(&lock->wait_lock);
1442
* if (cmpxchg(&lock->owner, owner, 0) == owner)
1443
* return;
1444
* goto retry;
1445
*
1446
* The fastpath disabled variant is simple as all access to
1447
* lock->owner is serialized by lock->wait_lock:
1448
*
1449
* lock->owner = NULL;
1450
* raw_spin_unlock(&lock->wait_lock);
1451
*/
1452
while (!rt_mutex_has_waiters(lock)) {
1453
/* Drops lock->wait_lock ! */
1454
if (unlock_rt_mutex_safe(lock, flags) == true)
1455
return;
1456
/* Relock the rtmutex and try again */
1457
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1458
}
1459
1460
/*
1461
* The wakeup next waiter path does not suffer from the above
1462
* race. See the comments there.
1463
*
1464
* Queue the next waiter for wakeup once we release the wait_lock.
1465
*/
1466
mark_wakeup_next_waiter(&wqh, lock);
1467
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1468
1469
rt_mutex_wake_up_q(&wqh);
1470
}
1471
1472
static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1473
{
1474
if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1475
return;
1476
1477
rt_mutex_slowunlock(lock);
1478
}
1479
1480
#ifdef CONFIG_SMP
1481
static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1482
struct rt_mutex_waiter *waiter,
1483
struct task_struct *owner)
1484
{
1485
bool res = true;
1486
1487
rcu_read_lock();
1488
for (;;) {
1489
/* If owner changed, trylock again. */
1490
if (owner != rt_mutex_owner(lock))
1491
break;
1492
/*
1493
* Ensure that @owner is dereferenced after checking that
1494
* the lock owner still matches @owner. If that fails,
1495
* @owner might point to freed memory. If it still matches,
1496
* the rcu_read_lock() ensures the memory stays valid.
1497
*/
1498
barrier();
1499
/*
1500
* Stop spinning when:
1501
* - the lock owner has been scheduled out
1502
* - current is not longer the top waiter
1503
* - current is requested to reschedule (redundant
1504
* for CONFIG_PREEMPT_RCU=y)
1505
* - the VCPU on which owner runs is preempted
1506
*/
1507
if (!owner_on_cpu(owner) || need_resched() ||
1508
!rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1509
res = false;
1510
break;
1511
}
1512
cpu_relax();
1513
}
1514
rcu_read_unlock();
1515
return res;
1516
}
1517
#else
1518
static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1519
struct rt_mutex_waiter *waiter,
1520
struct task_struct *owner)
1521
{
1522
return false;
1523
}
1524
#endif
1525
1526
#ifdef RT_MUTEX_BUILD_MUTEX
1527
/*
1528
* Functions required for:
1529
* - rtmutex, futex on all kernels
1530
* - mutex and rwsem substitutions on RT kernels
1531
*/
1532
1533
/*
1534
* Remove a waiter from a lock and give up
1535
*
1536
* Must be called with lock->wait_lock held and interrupts disabled. It must
1537
* have just failed to try_to_take_rt_mutex().
1538
*/
1539
static void __sched remove_waiter(struct rt_mutex_base *lock,
1540
struct rt_mutex_waiter *waiter)
1541
{
1542
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1543
struct task_struct *owner = rt_mutex_owner(lock);
1544
struct rt_mutex_base *next_lock;
1545
1546
lockdep_assert_held(&lock->wait_lock);
1547
1548
raw_spin_lock(&current->pi_lock);
1549
rt_mutex_dequeue(lock, waiter);
1550
current->pi_blocked_on = NULL;
1551
raw_spin_unlock(&current->pi_lock);
1552
1553
/*
1554
* Only update priority if the waiter was the highest priority
1555
* waiter of the lock and there is an owner to update.
1556
*/
1557
if (!owner || !is_top_waiter)
1558
return;
1559
1560
raw_spin_lock(&owner->pi_lock);
1561
1562
rt_mutex_dequeue_pi(owner, waiter);
1563
1564
if (rt_mutex_has_waiters(lock))
1565
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1566
1567
rt_mutex_adjust_prio(lock, owner);
1568
1569
/* Store the lock on which owner is blocked or NULL */
1570
next_lock = task_blocked_on_lock(owner);
1571
1572
raw_spin_unlock(&owner->pi_lock);
1573
1574
/*
1575
* Don't walk the chain, if the owner task is not blocked
1576
* itself.
1577
*/
1578
if (!next_lock)
1579
return;
1580
1581
/* gets dropped in rt_mutex_adjust_prio_chain()! */
1582
get_task_struct(owner);
1583
1584
raw_spin_unlock_irq(&lock->wait_lock);
1585
1586
rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1587
next_lock, NULL, current);
1588
1589
raw_spin_lock_irq(&lock->wait_lock);
1590
}
1591
1592
/**
1593
* rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1594
* @lock: the rt_mutex to take
1595
* @ww_ctx: WW mutex context pointer
1596
* @state: the state the task should block in (TASK_INTERRUPTIBLE
1597
* or TASK_UNINTERRUPTIBLE)
1598
* @timeout: the pre-initialized and started timer, or NULL for none
1599
* @waiter: the pre-initialized rt_mutex_waiter
1600
* @wake_q: wake_q of tasks to wake when we drop the lock->wait_lock
1601
*
1602
* Must be called with lock->wait_lock held and interrupts disabled
1603
*/
1604
static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1605
struct ww_acquire_ctx *ww_ctx,
1606
unsigned int state,
1607
struct hrtimer_sleeper *timeout,
1608
struct rt_mutex_waiter *waiter,
1609
struct wake_q_head *wake_q)
1610
__releases(&lock->wait_lock) __acquires(&lock->wait_lock)
1611
{
1612
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1613
struct task_struct *owner;
1614
int ret = 0;
1615
1616
lockevent_inc(rtmutex_slow_block);
1617
for (;;) {
1618
/* Try to acquire the lock: */
1619
if (try_to_take_rt_mutex(lock, current, waiter)) {
1620
lockevent_inc(rtmutex_slow_acq3);
1621
break;
1622
}
1623
1624
if (timeout && !timeout->task) {
1625
ret = -ETIMEDOUT;
1626
break;
1627
}
1628
if (signal_pending_state(state, current)) {
1629
ret = -EINTR;
1630
break;
1631
}
1632
1633
if (build_ww_mutex() && ww_ctx) {
1634
ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1635
if (ret)
1636
break;
1637
}
1638
1639
if (waiter == rt_mutex_top_waiter(lock))
1640
owner = rt_mutex_owner(lock);
1641
else
1642
owner = NULL;
1643
raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);
1644
1645
if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) {
1646
lockevent_inc(rtmutex_slow_sleep);
1647
rt_mutex_schedule();
1648
}
1649
1650
raw_spin_lock_irq(&lock->wait_lock);
1651
set_current_state(state);
1652
}
1653
1654
__set_current_state(TASK_RUNNING);
1655
return ret;
1656
}
1657
1658
static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1659
struct rt_mutex_base *lock,
1660
struct rt_mutex_waiter *w)
1661
{
1662
/*
1663
* If the result is not -EDEADLOCK or the caller requested
1664
* deadlock detection, nothing to do here.
1665
*/
1666
if (res != -EDEADLOCK || detect_deadlock)
1667
return;
1668
1669
if (build_ww_mutex() && w->ww_ctx)
1670
return;
1671
1672
raw_spin_unlock_irq(&lock->wait_lock);
1673
1674
WARN(1, "rtmutex deadlock detected\n");
1675
1676
while (1) {
1677
set_current_state(TASK_INTERRUPTIBLE);
1678
rt_mutex_schedule();
1679
}
1680
}
1681
1682
/**
1683
* __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1684
* @lock: The rtmutex to block lock
1685
* @ww_ctx: WW mutex context pointer
1686
* @state: The task state for sleeping
1687
* @chwalk: Indicator whether full or partial chainwalk is requested
1688
* @waiter: Initializer waiter for blocking
1689
* @wake_q: The wake_q to wake tasks after we release the wait_lock
1690
*/
1691
static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1692
struct ww_acquire_ctx *ww_ctx,
1693
unsigned int state,
1694
enum rtmutex_chainwalk chwalk,
1695
struct rt_mutex_waiter *waiter,
1696
struct wake_q_head *wake_q)
1697
{
1698
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1699
struct ww_mutex *ww = ww_container_of(rtm);
1700
int ret;
1701
1702
lockdep_assert_held(&lock->wait_lock);
1703
lockevent_inc(rtmutex_slowlock);
1704
1705
/* Try to acquire the lock again: */
1706
if (try_to_take_rt_mutex(lock, current, NULL)) {
1707
if (build_ww_mutex() && ww_ctx) {
1708
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
1709
ww_mutex_lock_acquired(ww, ww_ctx);
1710
}
1711
lockevent_inc(rtmutex_slow_acq1);
1712
return 0;
1713
}
1714
1715
set_current_state(state);
1716
1717
trace_contention_begin(lock, LCB_F_RT);
1718
1719
ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);
1720
if (likely(!ret))
1721
ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter, wake_q);
1722
1723
if (likely(!ret)) {
1724
/* acquired the lock */
1725
if (build_ww_mutex() && ww_ctx) {
1726
if (!ww_ctx->is_wait_die)
1727
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
1728
ww_mutex_lock_acquired(ww, ww_ctx);
1729
}
1730
lockevent_inc(rtmutex_slow_acq2);
1731
} else {
1732
__set_current_state(TASK_RUNNING);
1733
remove_waiter(lock, waiter);
1734
rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);
1735
lockevent_inc(rtmutex_deadlock);
1736
}
1737
1738
/*
1739
* try_to_take_rt_mutex() sets the waiter bit
1740
* unconditionally. We might have to fix that up.
1741
*/
1742
fixup_rt_mutex_waiters(lock, true);
1743
1744
trace_contention_end(lock, ret);
1745
1746
return ret;
1747
}
1748
1749
static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1750
struct ww_acquire_ctx *ww_ctx,
1751
unsigned int state,
1752
struct wake_q_head *wake_q)
1753
{
1754
struct rt_mutex_waiter waiter;
1755
int ret;
1756
1757
rt_mutex_init_waiter(&waiter);
1758
waiter.ww_ctx = ww_ctx;
1759
1760
ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1761
&waiter, wake_q);
1762
1763
debug_rt_mutex_free_waiter(&waiter);
1764
lockevent_cond_inc(rtmutex_slow_wake, !wake_q_empty(wake_q));
1765
return ret;
1766
}
1767
1768
/*
1769
* rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1770
* @lock: The rtmutex to block lock
1771
* @ww_ctx: WW mutex context pointer
1772
* @state: The task state for sleeping
1773
*/
1774
static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1775
struct ww_acquire_ctx *ww_ctx,
1776
unsigned int state)
1777
{
1778
DEFINE_WAKE_Q(wake_q);
1779
unsigned long flags;
1780
int ret;
1781
1782
/*
1783
* Do all pre-schedule work here, before we queue a waiter and invoke
1784
* PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would
1785
* otherwise recurse back into task_blocks_on_rt_mutex() through
1786
* rtlock_slowlock() and will then enqueue a second waiter for this
1787
* same task and things get really confusing real fast.
1788
*/
1789
rt_mutex_pre_schedule();
1790
1791
/*
1792
* Technically we could use raw_spin_[un]lock_irq() here, but this can
1793
* be called in early boot if the cmpxchg() fast path is disabled
1794
* (debug, no architecture support). In this case we will acquire the
1795
* rtmutex with lock->wait_lock held. But we cannot unconditionally
1796
* enable interrupts in that early boot case. So we need to use the
1797
* irqsave/restore variants.
1798
*/
1799
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1800
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
1801
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
1802
rt_mutex_post_schedule();
1803
1804
return ret;
1805
}
1806
1807
static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1808
unsigned int state)
1809
{
1810
lockdep_assert(!current->pi_blocked_on);
1811
1812
if (likely(rt_mutex_try_acquire(lock)))
1813
return 0;
1814
1815
return rt_mutex_slowlock(lock, NULL, state);
1816
}
1817
#endif /* RT_MUTEX_BUILD_MUTEX */
1818
1819
#ifdef RT_MUTEX_BUILD_SPINLOCKS
1820
/*
1821
* Functions required for spin/rw_lock substitution on RT kernels
1822
*/
1823
1824
/**
1825
* rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1826
* @lock: The underlying RT mutex
1827
* @wake_q: The wake_q to wake tasks after we release the wait_lock
1828
*/
1829
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
1830
struct wake_q_head *wake_q)
1831
__releases(&lock->wait_lock) __acquires(&lock->wait_lock)
1832
{
1833
struct rt_mutex_waiter waiter;
1834
struct task_struct *owner;
1835
1836
lockdep_assert_held(&lock->wait_lock);
1837
lockevent_inc(rtlock_slowlock);
1838
1839
if (try_to_take_rt_mutex(lock, current, NULL)) {
1840
lockevent_inc(rtlock_slow_acq1);
1841
return;
1842
}
1843
1844
rt_mutex_init_rtlock_waiter(&waiter);
1845
1846
/* Save current state and set state to TASK_RTLOCK_WAIT */
1847
current_save_and_set_rtlock_wait_state();
1848
1849
trace_contention_begin(lock, LCB_F_RT);
1850
1851
task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q);
1852
1853
for (;;) {
1854
/* Try to acquire the lock again */
1855
if (try_to_take_rt_mutex(lock, current, &waiter)) {
1856
lockevent_inc(rtlock_slow_acq2);
1857
break;
1858
}
1859
1860
if (&waiter == rt_mutex_top_waiter(lock))
1861
owner = rt_mutex_owner(lock);
1862
else
1863
owner = NULL;
1864
raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);
1865
1866
if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) {
1867
lockevent_inc(rtlock_slow_sleep);
1868
schedule_rtlock();
1869
}
1870
1871
raw_spin_lock_irq(&lock->wait_lock);
1872
set_current_state(TASK_RTLOCK_WAIT);
1873
}
1874
1875
/* Restore the task state */
1876
current_restore_rtlock_saved_state();
1877
1878
/*
1879
* try_to_take_rt_mutex() sets the waiter bit unconditionally.
1880
* We might have to fix that up:
1881
*/
1882
fixup_rt_mutex_waiters(lock, true);
1883
debug_rt_mutex_free_waiter(&waiter);
1884
1885
trace_contention_end(lock, 0);
1886
lockevent_cond_inc(rtlock_slow_wake, !wake_q_empty(wake_q));
1887
}
1888
1889
static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1890
{
1891
unsigned long flags;
1892
DEFINE_WAKE_Q(wake_q);
1893
1894
raw_spin_lock_irqsave(&lock->wait_lock, flags);
1895
rtlock_slowlock_locked(lock, &wake_q);
1896
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
1897
}
1898
1899
#endif /* RT_MUTEX_BUILD_SPINLOCKS */
1900
1901