// SPDX-License-Identifier: GPL-2.0-only1/*2* RT-Mutexes: simple blocking mutual exclusion locks with PI support3*4* started by Ingo Molnar and Thomas Gleixner.5*6* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <[email protected]>7* Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <[email protected]>8* Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt9* Copyright (C) 2006 Esben Nielsen10* Adaptive Spinlocks:11* Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,12* and Peter Morreale,13* Adaptive Spinlocks simplification:14* Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <[email protected]>15*16* See Documentation/locking/rt-mutex-design.rst for details.17*/18#include <linux/sched.h>19#include <linux/sched/debug.h>20#include <linux/sched/deadline.h>21#include <linux/sched/signal.h>22#include <linux/sched/rt.h>23#include <linux/sched/wake_q.h>24#include <linux/ww_mutex.h>2526#include <trace/events/lock.h>2728#include "rtmutex_common.h"29#include "lock_events.h"3031#ifndef WW_RT32# define build_ww_mutex() (false)33# define ww_container_of(rtm) NULL3435static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,36struct rt_mutex *lock,37struct ww_acquire_ctx *ww_ctx,38struct wake_q_head *wake_q)39{40return 0;41}4243static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,44struct ww_acquire_ctx *ww_ctx,45struct wake_q_head *wake_q)46{47}4849static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,50struct ww_acquire_ctx *ww_ctx)51{52}5354static inline int __ww_mutex_check_kill(struct rt_mutex *lock,55struct rt_mutex_waiter *waiter,56struct ww_acquire_ctx *ww_ctx)57{58return 0;59}6061#else62# define build_ww_mutex() (true)63# define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)64# include "ww_mutex.h"65#endif6667/*68* lock->owner state tracking:69*70* lock->owner holds the task_struct pointer of the owner. Bit 071* is used to keep track of the "lock has waiters" state.72*73* owner bit074* NULL 0 lock is free (fast acquire possible)75* NULL 1 lock is free and has waiters and the top waiter76* is going to take the lock*77* taskpointer 0 lock is held (fast release possible)78* taskpointer 1 lock is held and has waiters**79*80* The fast atomic compare exchange based acquire and release is only81* possible when bit 0 of lock->owner is 0.82*83* (*) It also can be a transitional state when grabbing the lock84* with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,85* we need to set the bit0 before looking at the lock, and the owner may be86* NULL in this small time, hence this can be a transitional state.87*88* (**) There is a small time when bit 0 is set but there are no89* waiters. This can happen when grabbing the lock in the slow path.90* To prevent a cmpxchg of the owner releasing the lock, we need to91* set this bit before looking at the lock.92*/9394static __always_inline struct task_struct *95rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)96{97unsigned long val = (unsigned long)owner;9899if (rt_mutex_has_waiters(lock))100val |= RT_MUTEX_HAS_WAITERS;101102return (struct task_struct *)val;103}104105static __always_inline void106rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)107{108/*109* lock->wait_lock is held but explicit acquire semantics are needed110* for a new lock owner so WRITE_ONCE is insufficient.111*/112xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));113}114115static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)116{117/* lock->wait_lock is held so the unlock provides release semantics. */118WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));119}120121static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)122{123lock->owner = (struct task_struct *)124((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);125}126127static __always_inline void128fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)129{130unsigned long owner, *p = (unsigned long *) &lock->owner;131132if (rt_mutex_has_waiters(lock))133return;134135/*136* The rbtree has no waiters enqueued, now make sure that the137* lock->owner still has the waiters bit set, otherwise the138* following can happen:139*140* CPU 0 CPU 1 CPU2141* l->owner=T1142* rt_mutex_lock(l)143* lock(l->lock)144* l->owner = T1 | HAS_WAITERS;145* enqueue(T2)146* boost()147* unlock(l->lock)148* block()149*150* rt_mutex_lock(l)151* lock(l->lock)152* l->owner = T1 | HAS_WAITERS;153* enqueue(T3)154* boost()155* unlock(l->lock)156* block()157* signal(->T2) signal(->T3)158* lock(l->lock)159* dequeue(T2)160* deboost()161* unlock(l->lock)162* lock(l->lock)163* dequeue(T3)164* ==> wait list is empty165* deboost()166* unlock(l->lock)167* lock(l->lock)168* fixup_rt_mutex_waiters()169* if (wait_list_empty(l) {170* l->owner = owner171* owner = l->owner & ~HAS_WAITERS;172* ==> l->owner = T1173* }174* lock(l->lock)175* rt_mutex_unlock(l) fixup_rt_mutex_waiters()176* if (wait_list_empty(l) {177* owner = l->owner & ~HAS_WAITERS;178* cmpxchg(l->owner, T1, NULL)179* ===> Success (l->owner = NULL)180*181* l->owner = owner182* ==> l->owner = T1183* }184*185* With the check for the waiter bit in place T3 on CPU2 will not186* overwrite. All tasks fiddling with the waiters bit are187* serialized by l->lock, so nothing else can modify the waiters188* bit. If the bit is set then nothing can change l->owner either189* so the simple RMW is safe. The cmpxchg() will simply fail if it190* happens in the middle of the RMW because the waiters bit is191* still set.192*/193owner = READ_ONCE(*p);194if (owner & RT_MUTEX_HAS_WAITERS) {195/*196* See rt_mutex_set_owner() and rt_mutex_clear_owner() on197* why xchg_acquire() is used for updating owner for198* locking and WRITE_ONCE() for unlocking.199*200* WRITE_ONCE() would work for the acquire case too, but201* in case that the lock acquisition failed it might202* force other lockers into the slow path unnecessarily.203*/204if (acquire_lock)205xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);206else207WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);208}209}210211/*212* We can speed up the acquire/release, if there's no debugging state to be213* set up.214*/215#ifndef CONFIG_DEBUG_RT_MUTEXES216static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,217struct task_struct *old,218struct task_struct *new)219{220return try_cmpxchg_acquire(&lock->owner, &old, new);221}222223static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)224{225return rt_mutex_cmpxchg_acquire(lock, NULL, current);226}227228static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,229struct task_struct *old,230struct task_struct *new)231{232return try_cmpxchg_release(&lock->owner, &old, new);233}234235/*236* Callers must hold the ->wait_lock -- which is the whole purpose as we force237* all future threads that attempt to [Rmw] the lock to the slowpath. As such238* relaxed semantics suffice.239*/240static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)241{242unsigned long *p = (unsigned long *) &lock->owner;243unsigned long owner, new;244245owner = READ_ONCE(*p);246do {247new = owner | RT_MUTEX_HAS_WAITERS;248} while (!try_cmpxchg_relaxed(p, &owner, new));249250/*251* The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE252* operations in the event of contention. Ensure the successful253* cmpxchg is visible.254*/255smp_mb__after_atomic();256}257258/*259* Safe fastpath aware unlock:260* 1) Clear the waiters bit261* 2) Drop lock->wait_lock262* 3) Try to unlock the lock with cmpxchg263*/264static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,265unsigned long flags)266__releases(lock->wait_lock)267{268struct task_struct *owner = rt_mutex_owner(lock);269270clear_rt_mutex_waiters(lock);271raw_spin_unlock_irqrestore(&lock->wait_lock, flags);272/*273* If a new waiter comes in between the unlock and the cmpxchg274* we have two situations:275*276* unlock(wait_lock);277* lock(wait_lock);278* cmpxchg(p, owner, 0) == owner279* mark_rt_mutex_waiters(lock);280* acquire(lock);281* or:282*283* unlock(wait_lock);284* lock(wait_lock);285* mark_rt_mutex_waiters(lock);286*287* cmpxchg(p, owner, 0) != owner288* enqueue_waiter();289* unlock(wait_lock);290* lock(wait_lock);291* wake waiter();292* unlock(wait_lock);293* lock(wait_lock);294* acquire(lock);295*/296return rt_mutex_cmpxchg_release(lock, owner, NULL);297}298299#else300static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,301struct task_struct *old,302struct task_struct *new)303{304return false;305306}307308static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);309310static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)311{312/*313* With debug enabled rt_mutex_cmpxchg trylock() will always fail.314*315* Avoid unconditionally taking the slow path by using316* rt_mutex_slow_trylock() which is covered by the debug code and can317* acquire a non-contended rtmutex.318*/319return rt_mutex_slowtrylock(lock);320}321322static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,323struct task_struct *old,324struct task_struct *new)325{326return false;327}328329static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)330{331lock->owner = (struct task_struct *)332((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);333}334335/*336* Simple slow path only version: lock->owner is protected by lock->wait_lock.337*/338static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,339unsigned long flags)340__releases(lock->wait_lock)341{342lock->owner = NULL;343raw_spin_unlock_irqrestore(&lock->wait_lock, flags);344return true;345}346#endif347348static __always_inline int __waiter_prio(struct task_struct *task)349{350int prio = task->prio;351352if (!rt_or_dl_prio(prio))353return DEFAULT_PRIO;354355return prio;356}357358/*359* Update the waiter->tree copy of the sort keys.360*/361static __always_inline void362waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)363{364lockdep_assert_held(&waiter->lock->wait_lock);365lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));366367waiter->tree.prio = __waiter_prio(task);368waiter->tree.deadline = task->dl.deadline;369}370371/*372* Update the waiter->pi_tree copy of the sort keys (from the tree copy).373*/374static __always_inline void375waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)376{377lockdep_assert_held(&waiter->lock->wait_lock);378lockdep_assert_held(&task->pi_lock);379lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));380381waiter->pi_tree.prio = waiter->tree.prio;382waiter->pi_tree.deadline = waiter->tree.deadline;383}384385/*386* Only use with rt_waiter_node_{less,equal}()387*/388#define task_to_waiter_node(p) \389&(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }390#define task_to_waiter(p) \391&(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }392393static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,394struct rt_waiter_node *right)395{396if (left->prio < right->prio)397return 1;398399/*400* If both waiters have dl_prio(), we check the deadlines of the401* associated tasks.402* If left waiter has a dl_prio(), and we didn't return 1 above,403* then right waiter has a dl_prio() too.404*/405if (dl_prio(left->prio))406return dl_time_before(left->deadline, right->deadline);407408return 0;409}410411static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,412struct rt_waiter_node *right)413{414if (left->prio != right->prio)415return 0;416417/*418* If both waiters have dl_prio(), we check the deadlines of the419* associated tasks.420* If left waiter has a dl_prio(), and we didn't return 0 above,421* then right waiter has a dl_prio() too.422*/423if (dl_prio(left->prio))424return left->deadline == right->deadline;425426return 1;427}428429static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,430struct rt_mutex_waiter *top_waiter)431{432if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))433return true;434435#ifdef RT_MUTEX_BUILD_SPINLOCKS436/*437* Note that RT tasks are excluded from same priority (lateral)438* steals to prevent the introduction of an unbounded latency.439*/440if (rt_or_dl_prio(waiter->tree.prio))441return false;442443return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);444#else445return false;446#endif447}448449#define __node_2_waiter(node) \450rb_entry((node), struct rt_mutex_waiter, tree.entry)451452static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)453{454struct rt_mutex_waiter *aw = __node_2_waiter(a);455struct rt_mutex_waiter *bw = __node_2_waiter(b);456457if (rt_waiter_node_less(&aw->tree, &bw->tree))458return 1;459460if (!build_ww_mutex())461return 0;462463if (rt_waiter_node_less(&bw->tree, &aw->tree))464return 0;465466/* NOTE: relies on waiter->ww_ctx being set before insertion */467if (aw->ww_ctx) {468if (!bw->ww_ctx)469return 1;470471return (signed long)(aw->ww_ctx->stamp -472bw->ww_ctx->stamp) < 0;473}474475return 0;476}477478static __always_inline void479rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)480{481lockdep_assert_held(&lock->wait_lock);482483rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);484}485486static __always_inline void487rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)488{489lockdep_assert_held(&lock->wait_lock);490491if (RB_EMPTY_NODE(&waiter->tree.entry))492return;493494rb_erase_cached(&waiter->tree.entry, &lock->waiters);495RB_CLEAR_NODE(&waiter->tree.entry);496}497498#define __node_2_rt_node(node) \499rb_entry((node), struct rt_waiter_node, entry)500501static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)502{503return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));504}505506static __always_inline void507rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)508{509lockdep_assert_held(&task->pi_lock);510511rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);512}513514static __always_inline void515rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)516{517lockdep_assert_held(&task->pi_lock);518519if (RB_EMPTY_NODE(&waiter->pi_tree.entry))520return;521522rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);523RB_CLEAR_NODE(&waiter->pi_tree.entry);524}525526static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,527struct task_struct *p)528{529struct task_struct *pi_task = NULL;530531lockdep_assert_held(&lock->wait_lock);532lockdep_assert(rt_mutex_owner(lock) == p);533lockdep_assert_held(&p->pi_lock);534535if (task_has_pi_waiters(p))536pi_task = task_top_pi_waiter(p)->task;537538rt_mutex_setprio(p, pi_task);539}540541/* RT mutex specific wake_q wrappers */542static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,543struct task_struct *task,544unsigned int wake_state)545{546if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {547if (IS_ENABLED(CONFIG_PROVE_LOCKING))548WARN_ON_ONCE(wqh->rtlock_task);549get_task_struct(task);550wqh->rtlock_task = task;551} else {552wake_q_add(&wqh->head, task);553}554}555556static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,557struct rt_mutex_waiter *w)558{559rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);560}561562static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)563{564if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {565wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);566put_task_struct(wqh->rtlock_task);567wqh->rtlock_task = NULL;568}569570if (!wake_q_empty(&wqh->head))571wake_up_q(&wqh->head);572573/* Pairs with preempt_disable() in mark_wakeup_next_waiter() */574preempt_enable();575}576577/*578* Deadlock detection is conditional:579*580* If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted581* if the detect argument is == RT_MUTEX_FULL_CHAINWALK.582*583* If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always584* conducted independent of the detect argument.585*586* If the waiter argument is NULL this indicates the deboost path and587* deadlock detection is disabled independent of the detect argument588* and the config settings.589*/590static __always_inline bool591rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,592enum rtmutex_chainwalk chwalk)593{594if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))595return waiter != NULL;596return chwalk == RT_MUTEX_FULL_CHAINWALK;597}598599static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)600{601return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;602}603604/*605* Adjust the priority chain. Also used for deadlock detection.606* Decreases task's usage by one - may thus free the task.607*608* @task: the task owning the mutex (owner) for which a chain walk is609* probably needed610* @chwalk: do we have to carry out deadlock detection?611* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck612* things for a task that has just got its priority adjusted, and613* is waiting on a mutex)614* @next_lock: the mutex on which the owner of @orig_lock was blocked before615* we dropped its pi_lock. Is never dereferenced, only used for616* comparison to detect lock chain changes.617* @orig_waiter: rt_mutex_waiter struct for the task that has just donated618* its priority to the mutex owner (can be NULL in the case619* depicted above or if the top waiter is gone away and we are620* actually deboosting the owner)621* @top_task: the current top waiter622*623* Returns 0 or -EDEADLK.624*625* Chain walk basics and protection scope626*627* [R] refcount on task628* [Pn] task->pi_lock held629* [L] rtmutex->wait_lock held630*631* Normal locking order:632*633* rtmutex->wait_lock634* task->pi_lock635*636* Step Description Protected by637* function arguments:638* @task [R]639* @orig_lock if != NULL @top_task is blocked on it640* @next_lock Unprotected. Cannot be641* dereferenced. Only used for642* comparison.643* @orig_waiter if != NULL @top_task is blocked on it644* @top_task current, or in case of proxy645* locking protected by calling646* code647* again:648* loop_sanity_check();649* retry:650* [1] lock(task->pi_lock); [R] acquire [P1]651* [2] waiter = task->pi_blocked_on; [P1]652* [3] check_exit_conditions_1(); [P1]653* [4] lock = waiter->lock; [P1]654* [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]655* unlock(task->pi_lock); release [P1]656* goto retry;657* }658* [6] check_exit_conditions_2(); [P1] + [L]659* [7] requeue_lock_waiter(lock, waiter); [P1] + [L]660* [8] unlock(task->pi_lock); release [P1]661* put_task_struct(task); release [R]662* [9] check_exit_conditions_3(); [L]663* [10] task = owner(lock); [L]664* get_task_struct(task); [L] acquire [R]665* lock(task->pi_lock); [L] acquire [P2]666* [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]667* [12] check_exit_conditions_4(); [P2] + [L]668* [13] unlock(task->pi_lock); release [P2]669* unlock(lock->wait_lock); release [L]670* goto again;671*672* Where P1 is the blocking task and P2 is the lock owner; going up one step673* the owner becomes the next blocked task etc..674*675*676*/677static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,678enum rtmutex_chainwalk chwalk,679struct rt_mutex_base *orig_lock,680struct rt_mutex_base *next_lock,681struct rt_mutex_waiter *orig_waiter,682struct task_struct *top_task)683{684struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;685struct rt_mutex_waiter *prerequeue_top_waiter;686int ret = 0, depth = 0;687struct rt_mutex_base *lock;688bool detect_deadlock;689bool requeue = true;690691detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);692693/*694* The (de)boosting is a step by step approach with a lot of695* pitfalls. We want this to be preemptible and we want hold a696* maximum of two locks per step. So we have to check697* carefully whether things change under us.698*/699again:700/*701* We limit the lock chain length for each invocation.702*/703if (++depth > max_lock_depth) {704static int prev_max;705706/*707* Print this only once. If the admin changes the limit,708* print a new message when reaching the limit again.709*/710if (prev_max != max_lock_depth) {711prev_max = max_lock_depth;712printk(KERN_WARNING "Maximum lock depth %d reached "713"task: %s (%d)\n", max_lock_depth,714top_task->comm, task_pid_nr(top_task));715}716put_task_struct(task);717718return -EDEADLK;719}720721/*722* We are fully preemptible here and only hold the refcount on723* @task. So everything can have changed under us since the724* caller or our own code below (goto retry/again) dropped all725* locks.726*/727retry:728/*729* [1] Task cannot go away as we did a get_task() before !730*/731raw_spin_lock_irq(&task->pi_lock);732733/*734* [2] Get the waiter on which @task is blocked on.735*/736waiter = task->pi_blocked_on;737738/*739* [3] check_exit_conditions_1() protected by task->pi_lock.740*/741742/*743* Check whether the end of the boosting chain has been744* reached or the state of the chain has changed while we745* dropped the locks.746*/747if (!waiter)748goto out_unlock_pi;749750/*751* Check the orig_waiter state. After we dropped the locks,752* the previous owner of the lock might have released the lock.753*/754if (orig_waiter && !rt_mutex_owner(orig_lock))755goto out_unlock_pi;756757/*758* We dropped all locks after taking a refcount on @task, so759* the task might have moved on in the lock chain or even left760* the chain completely and blocks now on an unrelated lock or761* on @orig_lock.762*763* We stored the lock on which @task was blocked in @next_lock,764* so we can detect the chain change.765*/766if (next_lock != waiter->lock)767goto out_unlock_pi;768769/*770* There could be 'spurious' loops in the lock graph due to ww_mutex,771* consider:772*773* P1: A, ww_A, ww_B774* P2: ww_B, ww_A775* P3: A776*777* P3 should not return -EDEADLK because it gets trapped in the cycle778* created by P1 and P2 (which will resolve -- and runs into779* max_lock_depth above). Therefore disable detect_deadlock such that780* the below termination condition can trigger once all relevant tasks781* are boosted.782*783* Even when we start with ww_mutex we can disable deadlock detection,784* since we would supress a ww_mutex induced deadlock at [6] anyway.785* Supressing it here however is not sufficient since we might still786* hit [6] due to adjustment driven iteration.787*788* NOTE: if someone were to create a deadlock between 2 ww_classes we'd789* utterly fail to report it; lockdep should.790*/791if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)792detect_deadlock = false;793794/*795* Drop out, when the task has no waiters. Note,796* top_waiter can be NULL, when we are in the deboosting797* mode!798*/799if (top_waiter) {800if (!task_has_pi_waiters(task))801goto out_unlock_pi;802/*803* If deadlock detection is off, we stop here if we804* are not the top pi waiter of the task. If deadlock805* detection is enabled we continue, but stop the806* requeueing in the chain walk.807*/808if (top_waiter != task_top_pi_waiter(task)) {809if (!detect_deadlock)810goto out_unlock_pi;811else812requeue = false;813}814}815816/*817* If the waiter priority is the same as the task priority818* then there is no further priority adjustment necessary. If819* deadlock detection is off, we stop the chain walk. If its820* enabled we continue, but stop the requeueing in the chain821* walk.822*/823if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {824if (!detect_deadlock)825goto out_unlock_pi;826else827requeue = false;828}829830/*831* [4] Get the next lock; per holding task->pi_lock we can't unblock832* and guarantee @lock's existence.833*/834lock = waiter->lock;835/*836* [5] We need to trylock here as we are holding task->pi_lock,837* which is the reverse lock order versus the other rtmutex838* operations.839*840* Per the above, holding task->pi_lock guarantees lock exists, so841* inverting this lock order is infeasible from a life-time842* perspective.843*/844if (!raw_spin_trylock(&lock->wait_lock)) {845raw_spin_unlock_irq(&task->pi_lock);846cpu_relax();847goto retry;848}849850/*851* [6] check_exit_conditions_2() protected by task->pi_lock and852* lock->wait_lock.853*854* Deadlock detection. If the lock is the same as the original855* lock which caused us to walk the lock chain or if the856* current lock is owned by the task which initiated the chain857* walk, we detected a deadlock.858*/859if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {860ret = -EDEADLK;861862/*863* When the deadlock is due to ww_mutex; also see above. Don't864* report the deadlock and instead let the ww_mutex wound/die865* logic pick which of the contending threads gets -EDEADLK.866*867* NOTE: assumes the cycle only contains a single ww_class; any868* other configuration and we fail to report; also, see869* lockdep.870*/871if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)872ret = 0;873874raw_spin_unlock(&lock->wait_lock);875goto out_unlock_pi;876}877878/*879* If we just follow the lock chain for deadlock detection, no880* need to do all the requeue operations. To avoid a truckload881* of conditionals around the various places below, just do the882* minimum chain walk checks.883*/884if (!requeue) {885/*886* No requeue[7] here. Just release @task [8]887*/888raw_spin_unlock(&task->pi_lock);889put_task_struct(task);890891/*892* [9] check_exit_conditions_3 protected by lock->wait_lock.893* If there is no owner of the lock, end of chain.894*/895if (!rt_mutex_owner(lock)) {896raw_spin_unlock_irq(&lock->wait_lock);897return 0;898}899900/* [10] Grab the next task, i.e. owner of @lock */901task = get_task_struct(rt_mutex_owner(lock));902raw_spin_lock(&task->pi_lock);903904/*905* No requeue [11] here. We just do deadlock detection.906*907* [12] Store whether owner is blocked908* itself. Decision is made after dropping the locks909*/910next_lock = task_blocked_on_lock(task);911/*912* Get the top waiter for the next iteration913*/914top_waiter = rt_mutex_top_waiter(lock);915916/* [13] Drop locks */917raw_spin_unlock(&task->pi_lock);918raw_spin_unlock_irq(&lock->wait_lock);919920/* If owner is not blocked, end of chain. */921if (!next_lock)922goto out_put_task;923goto again;924}925926/*927* Store the current top waiter before doing the requeue928* operation on @lock. We need it for the boost/deboost929* decision below.930*/931prerequeue_top_waiter = rt_mutex_top_waiter(lock);932933/* [7] Requeue the waiter in the lock waiter tree. */934rt_mutex_dequeue(lock, waiter);935936/*937* Update the waiter prio fields now that we're dequeued.938*939* These values can have changed through either:940*941* sys_sched_set_scheduler() / sys_sched_setattr()942*943* or944*945* DL CBS enforcement advancing the effective deadline.946*/947waiter_update_prio(waiter, task);948949rt_mutex_enqueue(lock, waiter);950951/*952* [8] Release the (blocking) task in preparation for953* taking the owner task in [10].954*955* Since we hold lock->waiter_lock, task cannot unblock, even if we956* release task->pi_lock.957*/958raw_spin_unlock(&task->pi_lock);959put_task_struct(task);960961/*962* [9] check_exit_conditions_3 protected by lock->wait_lock.963*964* We must abort the chain walk if there is no lock owner even965* in the dead lock detection case, as we have nothing to966* follow here. This is the end of the chain we are walking.967*/968if (!rt_mutex_owner(lock)) {969/*970* If the requeue [7] above changed the top waiter,971* then we need to wake the new top waiter up to try972* to get the lock.973*/974top_waiter = rt_mutex_top_waiter(lock);975if (prerequeue_top_waiter != top_waiter)976wake_up_state(top_waiter->task, top_waiter->wake_state);977raw_spin_unlock_irq(&lock->wait_lock);978return 0;979}980981/*982* [10] Grab the next task, i.e. the owner of @lock983*984* Per holding lock->wait_lock and checking for !owner above, there985* must be an owner and it cannot go away.986*/987task = get_task_struct(rt_mutex_owner(lock));988raw_spin_lock(&task->pi_lock);989990/* [11] requeue the pi waiters if necessary */991if (waiter == rt_mutex_top_waiter(lock)) {992/*993* The waiter became the new top (highest priority)994* waiter on the lock. Replace the previous top waiter995* in the owner tasks pi waiters tree with this waiter996* and adjust the priority of the owner.997*/998rt_mutex_dequeue_pi(task, prerequeue_top_waiter);999waiter_clone_prio(waiter, task);1000rt_mutex_enqueue_pi(task, waiter);1001rt_mutex_adjust_prio(lock, task);10021003} else if (prerequeue_top_waiter == waiter) {1004/*1005* The waiter was the top waiter on the lock, but is1006* no longer the top priority waiter. Replace waiter in1007* the owner tasks pi waiters tree with the new top1008* (highest priority) waiter and adjust the priority1009* of the owner.1010* The new top waiter is stored in @waiter so that1011* @waiter == @top_waiter evaluates to true below and1012* we continue to deboost the rest of the chain.1013*/1014rt_mutex_dequeue_pi(task, waiter);1015waiter = rt_mutex_top_waiter(lock);1016waiter_clone_prio(waiter, task);1017rt_mutex_enqueue_pi(task, waiter);1018rt_mutex_adjust_prio(lock, task);1019} else {1020/*1021* Nothing changed. No need to do any priority1022* adjustment.1023*/1024}10251026/*1027* [12] check_exit_conditions_4() protected by task->pi_lock1028* and lock->wait_lock. The actual decisions are made after we1029* dropped the locks.1030*1031* Check whether the task which owns the current lock is pi1032* blocked itself. If yes we store a pointer to the lock for1033* the lock chain change detection above. After we dropped1034* task->pi_lock next_lock cannot be dereferenced anymore.1035*/1036next_lock = task_blocked_on_lock(task);1037/*1038* Store the top waiter of @lock for the end of chain walk1039* decision below.1040*/1041top_waiter = rt_mutex_top_waiter(lock);10421043/* [13] Drop the locks */1044raw_spin_unlock(&task->pi_lock);1045raw_spin_unlock_irq(&lock->wait_lock);10461047/*1048* Make the actual exit decisions [12], based on the stored1049* values.1050*1051* We reached the end of the lock chain. Stop right here. No1052* point to go back just to figure that out.1053*/1054if (!next_lock)1055goto out_put_task;10561057/*1058* If the current waiter is not the top waiter on the lock,1059* then we can stop the chain walk here if we are not in full1060* deadlock detection mode.1061*/1062if (!detect_deadlock && waiter != top_waiter)1063goto out_put_task;10641065goto again;10661067out_unlock_pi:1068raw_spin_unlock_irq(&task->pi_lock);1069out_put_task:1070put_task_struct(task);10711072return ret;1073}10741075/*1076* Try to take an rt-mutex1077*1078* Must be called with lock->wait_lock held and interrupts disabled1079*1080* @lock: The lock to be acquired.1081* @task: The task which wants to acquire the lock1082* @waiter: The waiter that is queued to the lock's wait tree if the1083* callsite called task_blocked_on_lock(), otherwise NULL1084*/1085static int __sched1086try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,1087struct rt_mutex_waiter *waiter)1088{1089lockdep_assert_held(&lock->wait_lock);10901091/*1092* Before testing whether we can acquire @lock, we set the1093* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all1094* other tasks which try to modify @lock into the slow path1095* and they serialize on @lock->wait_lock.1096*1097* The RT_MUTEX_HAS_WAITERS bit can have a transitional state1098* as explained at the top of this file if and only if:1099*1100* - There is a lock owner. The caller must fixup the1101* transient state if it does a trylock or leaves the lock1102* function due to a signal or timeout.1103*1104* - @task acquires the lock and there are no other1105* waiters. This is undone in rt_mutex_set_owner(@task) at1106* the end of this function.1107*/1108mark_rt_mutex_waiters(lock);11091110/*1111* If @lock has an owner, give up.1112*/1113if (rt_mutex_owner(lock))1114return 0;11151116/*1117* If @waiter != NULL, @task has already enqueued the waiter1118* into @lock waiter tree. If @waiter == NULL then this is a1119* trylock attempt.1120*/1121if (waiter) {1122struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);11231124/*1125* If waiter is the highest priority waiter of @lock,1126* or allowed to steal it, take it over.1127*/1128if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {1129/*1130* We can acquire the lock. Remove the waiter from the1131* lock waiters tree.1132*/1133rt_mutex_dequeue(lock, waiter);1134} else {1135return 0;1136}1137} else {1138/*1139* If the lock has waiters already we check whether @task is1140* eligible to take over the lock.1141*1142* If there are no other waiters, @task can acquire1143* the lock. @task->pi_blocked_on is NULL, so it does1144* not need to be dequeued.1145*/1146if (rt_mutex_has_waiters(lock)) {1147/* Check whether the trylock can steal it. */1148if (!rt_mutex_steal(task_to_waiter(task),1149rt_mutex_top_waiter(lock)))1150return 0;11511152/*1153* The current top waiter stays enqueued. We1154* don't have to change anything in the lock1155* waiters order.1156*/1157} else {1158/*1159* No waiters. Take the lock without the1160* pi_lock dance.@task->pi_blocked_on is NULL1161* and we have no waiters to enqueue in @task1162* pi waiters tree.1163*/1164goto takeit;1165}1166}11671168/*1169* Clear @task->pi_blocked_on. Requires protection by1170* @task->pi_lock. Redundant operation for the @waiter == NULL1171* case, but conditionals are more expensive than a redundant1172* store.1173*/1174raw_spin_lock(&task->pi_lock);1175task->pi_blocked_on = NULL;1176/*1177* Finish the lock acquisition. @task is the new owner. If1178* other waiters exist we have to insert the highest priority1179* waiter into @task->pi_waiters tree.1180*/1181if (rt_mutex_has_waiters(lock))1182rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));1183raw_spin_unlock(&task->pi_lock);11841185takeit:1186/*1187* This either preserves the RT_MUTEX_HAS_WAITERS bit if there1188* are still waiters or clears it.1189*/1190rt_mutex_set_owner(lock, task);11911192return 1;1193}11941195/*1196* Task blocks on lock.1197*1198* Prepare waiter and propagate pi chain1199*1200* This must be called with lock->wait_lock held and interrupts disabled1201*/1202static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,1203struct rt_mutex_waiter *waiter,1204struct task_struct *task,1205struct ww_acquire_ctx *ww_ctx,1206enum rtmutex_chainwalk chwalk,1207struct wake_q_head *wake_q)1208{1209struct task_struct *owner = rt_mutex_owner(lock);1210struct rt_mutex_waiter *top_waiter = waiter;1211struct rt_mutex_base *next_lock;1212int chain_walk = 0, res;12131214lockdep_assert_held(&lock->wait_lock);12151216/*1217* Early deadlock detection. We really don't want the task to1218* enqueue on itself just to untangle the mess later. It's not1219* only an optimization. We drop the locks, so another waiter1220* can come in before the chain walk detects the deadlock. So1221* the other will detect the deadlock and return -EDEADLOCK,1222* which is wrong, as the other waiter is not in a deadlock1223* situation.1224*1225* Except for ww_mutex, in that case the chain walk must already deal1226* with spurious cycles, see the comments at [3] and [6].1227*/1228if (owner == task && !(build_ww_mutex() && ww_ctx))1229return -EDEADLK;12301231raw_spin_lock(&task->pi_lock);1232waiter->task = task;1233waiter->lock = lock;1234waiter_update_prio(waiter, task);1235waiter_clone_prio(waiter, task);12361237/* Get the top priority waiter on the lock */1238if (rt_mutex_has_waiters(lock))1239top_waiter = rt_mutex_top_waiter(lock);1240rt_mutex_enqueue(lock, waiter);12411242task->pi_blocked_on = waiter;12431244raw_spin_unlock(&task->pi_lock);12451246if (build_ww_mutex() && ww_ctx) {1247struct rt_mutex *rtm;12481249/* Check whether the waiter should back out immediately */1250rtm = container_of(lock, struct rt_mutex, rtmutex);1251res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);1252if (res) {1253raw_spin_lock(&task->pi_lock);1254rt_mutex_dequeue(lock, waiter);1255task->pi_blocked_on = NULL;1256raw_spin_unlock(&task->pi_lock);1257return res;1258}1259}12601261if (!owner)1262return 0;12631264raw_spin_lock(&owner->pi_lock);1265if (waiter == rt_mutex_top_waiter(lock)) {1266rt_mutex_dequeue_pi(owner, top_waiter);1267rt_mutex_enqueue_pi(owner, waiter);12681269rt_mutex_adjust_prio(lock, owner);1270if (owner->pi_blocked_on)1271chain_walk = 1;1272} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {1273chain_walk = 1;1274}12751276/* Store the lock on which owner is blocked or NULL */1277next_lock = task_blocked_on_lock(owner);12781279raw_spin_unlock(&owner->pi_lock);1280/*1281* Even if full deadlock detection is on, if the owner is not1282* blocked itself, we can avoid finding this out in the chain1283* walk.1284*/1285if (!chain_walk || !next_lock)1286return 0;12871288/*1289* The owner can't disappear while holding a lock,1290* so the owner struct is protected by wait_lock.1291* Gets dropped in rt_mutex_adjust_prio_chain()!1292*/1293get_task_struct(owner);12941295raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);12961297res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,1298next_lock, waiter, task);12991300raw_spin_lock_irq(&lock->wait_lock);13011302return res;1303}13041305/*1306* Remove the top waiter from the current tasks pi waiter tree and1307* queue it up.1308*1309* Called with lock->wait_lock held and interrupts disabled.1310*/1311static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,1312struct rt_mutex_base *lock)1313{1314struct rt_mutex_waiter *waiter;13151316lockdep_assert_held(&lock->wait_lock);13171318raw_spin_lock(¤t->pi_lock);13191320waiter = rt_mutex_top_waiter(lock);13211322/*1323* Remove it from current->pi_waiters and deboost.1324*1325* We must in fact deboost here in order to ensure we call1326* rt_mutex_setprio() to update p->pi_top_task before the1327* task unblocks.1328*/1329rt_mutex_dequeue_pi(current, waiter);1330rt_mutex_adjust_prio(lock, current);13311332/*1333* As we are waking up the top waiter, and the waiter stays1334* queued on the lock until it gets the lock, this lock1335* obviously has waiters. Just set the bit here and this has1336* the added benefit of forcing all new tasks into the1337* slow path making sure no task of lower priority than1338* the top waiter can steal this lock.1339*/1340lock->owner = (void *) RT_MUTEX_HAS_WAITERS;13411342/*1343* We deboosted before waking the top waiter task such that we don't1344* run two tasks with the 'same' priority (and ensure the1345* p->pi_top_task pointer points to a blocked task). This however can1346* lead to priority inversion if we would get preempted after the1347* deboost but before waking our donor task, hence the preempt_disable()1348* before unlock.1349*1350* Pairs with preempt_enable() in rt_mutex_wake_up_q();1351*/1352preempt_disable();1353rt_mutex_wake_q_add(wqh, waiter);1354raw_spin_unlock(¤t->pi_lock);1355}13561357static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)1358{1359int ret = try_to_take_rt_mutex(lock, current, NULL);13601361/*1362* try_to_take_rt_mutex() sets the lock waiters bit1363* unconditionally. Clean this up.1364*/1365fixup_rt_mutex_waiters(lock, true);13661367return ret;1368}13691370/*1371* Slow path try-lock function:1372*/1373static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)1374{1375unsigned long flags;1376int ret;13771378/*1379* If the lock already has an owner we fail to get the lock.1380* This can be done without taking the @lock->wait_lock as1381* it is only being read, and this is a trylock anyway.1382*/1383if (rt_mutex_owner(lock))1384return 0;13851386/*1387* The mutex has currently no owner. Lock the wait lock and try to1388* acquire the lock. We use irqsave here to support early boot calls.1389*/1390raw_spin_lock_irqsave(&lock->wait_lock, flags);13911392ret = __rt_mutex_slowtrylock(lock);13931394raw_spin_unlock_irqrestore(&lock->wait_lock, flags);13951396return ret;1397}13981399static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)1400{1401if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))1402return 1;14031404return rt_mutex_slowtrylock(lock);1405}14061407/*1408* Slow path to release a rt-mutex.1409*/1410static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)1411{1412DEFINE_RT_WAKE_Q(wqh);1413unsigned long flags;14141415/* irqsave required to support early boot calls */1416raw_spin_lock_irqsave(&lock->wait_lock, flags);14171418debug_rt_mutex_unlock(lock);14191420/*1421* We must be careful here if the fast path is enabled. If we1422* have no waiters queued we cannot set owner to NULL here1423* because of:1424*1425* foo->lock->owner = NULL;1426* rtmutex_lock(foo->lock); <- fast path1427* free = atomic_dec_and_test(foo->refcnt);1428* rtmutex_unlock(foo->lock); <- fast path1429* if (free)1430* kfree(foo);1431* raw_spin_unlock(foo->lock->wait_lock);1432*1433* So for the fastpath enabled kernel:1434*1435* Nothing can set the waiters bit as long as we hold1436* lock->wait_lock. So we do the following sequence:1437*1438* owner = rt_mutex_owner(lock);1439* clear_rt_mutex_waiters(lock);1440* raw_spin_unlock(&lock->wait_lock);1441* if (cmpxchg(&lock->owner, owner, 0) == owner)1442* return;1443* goto retry;1444*1445* The fastpath disabled variant is simple as all access to1446* lock->owner is serialized by lock->wait_lock:1447*1448* lock->owner = NULL;1449* raw_spin_unlock(&lock->wait_lock);1450*/1451while (!rt_mutex_has_waiters(lock)) {1452/* Drops lock->wait_lock ! */1453if (unlock_rt_mutex_safe(lock, flags) == true)1454return;1455/* Relock the rtmutex and try again */1456raw_spin_lock_irqsave(&lock->wait_lock, flags);1457}14581459/*1460* The wakeup next waiter path does not suffer from the above1461* race. See the comments there.1462*1463* Queue the next waiter for wakeup once we release the wait_lock.1464*/1465mark_wakeup_next_waiter(&wqh, lock);1466raw_spin_unlock_irqrestore(&lock->wait_lock, flags);14671468rt_mutex_wake_up_q(&wqh);1469}14701471static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)1472{1473if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))1474return;14751476rt_mutex_slowunlock(lock);1477}14781479#ifdef CONFIG_SMP1480static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,1481struct rt_mutex_waiter *waiter,1482struct task_struct *owner)1483{1484bool res = true;14851486rcu_read_lock();1487for (;;) {1488/* If owner changed, trylock again. */1489if (owner != rt_mutex_owner(lock))1490break;1491/*1492* Ensure that @owner is dereferenced after checking that1493* the lock owner still matches @owner. If that fails,1494* @owner might point to freed memory. If it still matches,1495* the rcu_read_lock() ensures the memory stays valid.1496*/1497barrier();1498/*1499* Stop spinning when:1500* - the lock owner has been scheduled out1501* - current is not longer the top waiter1502* - current is requested to reschedule (redundant1503* for CONFIG_PREEMPT_RCU=y)1504* - the VCPU on which owner runs is preempted1505*/1506if (!owner_on_cpu(owner) || need_resched() ||1507!rt_mutex_waiter_is_top_waiter(lock, waiter)) {1508res = false;1509break;1510}1511cpu_relax();1512}1513rcu_read_unlock();1514return res;1515}1516#else1517static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,1518struct rt_mutex_waiter *waiter,1519struct task_struct *owner)1520{1521return false;1522}1523#endif15241525#ifdef RT_MUTEX_BUILD_MUTEX1526/*1527* Functions required for:1528* - rtmutex, futex on all kernels1529* - mutex and rwsem substitutions on RT kernels1530*/15311532/*1533* Remove a waiter from a lock and give up1534*1535* Must be called with lock->wait_lock held and interrupts disabled. It must1536* have just failed to try_to_take_rt_mutex().1537*/1538static void __sched remove_waiter(struct rt_mutex_base *lock,1539struct rt_mutex_waiter *waiter)1540{1541bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));1542struct task_struct *owner = rt_mutex_owner(lock);1543struct rt_mutex_base *next_lock;15441545lockdep_assert_held(&lock->wait_lock);15461547raw_spin_lock(¤t->pi_lock);1548rt_mutex_dequeue(lock, waiter);1549current->pi_blocked_on = NULL;1550raw_spin_unlock(¤t->pi_lock);15511552/*1553* Only update priority if the waiter was the highest priority1554* waiter of the lock and there is an owner to update.1555*/1556if (!owner || !is_top_waiter)1557return;15581559raw_spin_lock(&owner->pi_lock);15601561rt_mutex_dequeue_pi(owner, waiter);15621563if (rt_mutex_has_waiters(lock))1564rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));15651566rt_mutex_adjust_prio(lock, owner);15671568/* Store the lock on which owner is blocked or NULL */1569next_lock = task_blocked_on_lock(owner);15701571raw_spin_unlock(&owner->pi_lock);15721573/*1574* Don't walk the chain, if the owner task is not blocked1575* itself.1576*/1577if (!next_lock)1578return;15791580/* gets dropped in rt_mutex_adjust_prio_chain()! */1581get_task_struct(owner);15821583raw_spin_unlock_irq(&lock->wait_lock);15841585rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,1586next_lock, NULL, current);15871588raw_spin_lock_irq(&lock->wait_lock);1589}15901591/**1592* rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop1593* @lock: the rt_mutex to take1594* @ww_ctx: WW mutex context pointer1595* @state: the state the task should block in (TASK_INTERRUPTIBLE1596* or TASK_UNINTERRUPTIBLE)1597* @timeout: the pre-initialized and started timer, or NULL for none1598* @waiter: the pre-initialized rt_mutex_waiter1599* @wake_q: wake_q of tasks to wake when we drop the lock->wait_lock1600*1601* Must be called with lock->wait_lock held and interrupts disabled1602*/1603static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,1604struct ww_acquire_ctx *ww_ctx,1605unsigned int state,1606struct hrtimer_sleeper *timeout,1607struct rt_mutex_waiter *waiter,1608struct wake_q_head *wake_q)1609__releases(&lock->wait_lock) __acquires(&lock->wait_lock)1610{1611struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);1612struct task_struct *owner;1613int ret = 0;16141615lockevent_inc(rtmutex_slow_block);1616for (;;) {1617/* Try to acquire the lock: */1618if (try_to_take_rt_mutex(lock, current, waiter)) {1619lockevent_inc(rtmutex_slow_acq3);1620break;1621}16221623if (timeout && !timeout->task) {1624ret = -ETIMEDOUT;1625break;1626}1627if (signal_pending_state(state, current)) {1628ret = -EINTR;1629break;1630}16311632if (build_ww_mutex() && ww_ctx) {1633ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);1634if (ret)1635break;1636}16371638if (waiter == rt_mutex_top_waiter(lock))1639owner = rt_mutex_owner(lock);1640else1641owner = NULL;1642raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);16431644if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) {1645lockevent_inc(rtmutex_slow_sleep);1646rt_mutex_schedule();1647}16481649raw_spin_lock_irq(&lock->wait_lock);1650set_current_state(state);1651}16521653__set_current_state(TASK_RUNNING);1654return ret;1655}16561657static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,1658struct rt_mutex_base *lock,1659struct rt_mutex_waiter *w)1660{1661/*1662* If the result is not -EDEADLOCK or the caller requested1663* deadlock detection, nothing to do here.1664*/1665if (res != -EDEADLOCK || detect_deadlock)1666return;16671668if (build_ww_mutex() && w->ww_ctx)1669return;16701671raw_spin_unlock_irq(&lock->wait_lock);16721673WARN(1, "rtmutex deadlock detected\n");16741675while (1) {1676set_current_state(TASK_INTERRUPTIBLE);1677rt_mutex_schedule();1678}1679}16801681/**1682* __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held1683* @lock: The rtmutex to block lock1684* @ww_ctx: WW mutex context pointer1685* @state: The task state for sleeping1686* @chwalk: Indicator whether full or partial chainwalk is requested1687* @waiter: Initializer waiter for blocking1688* @wake_q: The wake_q to wake tasks after we release the wait_lock1689*/1690static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,1691struct ww_acquire_ctx *ww_ctx,1692unsigned int state,1693enum rtmutex_chainwalk chwalk,1694struct rt_mutex_waiter *waiter,1695struct wake_q_head *wake_q)1696{1697struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);1698struct ww_mutex *ww = ww_container_of(rtm);1699int ret;17001701lockdep_assert_held(&lock->wait_lock);1702lockevent_inc(rtmutex_slowlock);17031704/* Try to acquire the lock again: */1705if (try_to_take_rt_mutex(lock, current, NULL)) {1706if (build_ww_mutex() && ww_ctx) {1707__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);1708ww_mutex_lock_acquired(ww, ww_ctx);1709}1710lockevent_inc(rtmutex_slow_acq1);1711return 0;1712}17131714set_current_state(state);17151716trace_contention_begin(lock, LCB_F_RT);17171718ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);1719if (likely(!ret))1720ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter, wake_q);17211722if (likely(!ret)) {1723/* acquired the lock */1724if (build_ww_mutex() && ww_ctx) {1725if (!ww_ctx->is_wait_die)1726__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);1727ww_mutex_lock_acquired(ww, ww_ctx);1728}1729lockevent_inc(rtmutex_slow_acq2);1730} else {1731__set_current_state(TASK_RUNNING);1732remove_waiter(lock, waiter);1733rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);1734lockevent_inc(rtmutex_deadlock);1735}17361737/*1738* try_to_take_rt_mutex() sets the waiter bit1739* unconditionally. We might have to fix that up.1740*/1741fixup_rt_mutex_waiters(lock, true);17421743trace_contention_end(lock, ret);17441745return ret;1746}17471748static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,1749struct ww_acquire_ctx *ww_ctx,1750unsigned int state,1751struct wake_q_head *wake_q)1752{1753struct rt_mutex_waiter waiter;1754int ret;17551756rt_mutex_init_waiter(&waiter);1757waiter.ww_ctx = ww_ctx;17581759ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,1760&waiter, wake_q);17611762debug_rt_mutex_free_waiter(&waiter);1763lockevent_cond_inc(rtmutex_slow_wake, !wake_q_empty(wake_q));1764return ret;1765}17661767/*1768* rt_mutex_slowlock - Locking slowpath invoked when fast path fails1769* @lock: The rtmutex to block lock1770* @ww_ctx: WW mutex context pointer1771* @state: The task state for sleeping1772*/1773static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,1774struct ww_acquire_ctx *ww_ctx,1775unsigned int state)1776{1777DEFINE_WAKE_Q(wake_q);1778unsigned long flags;1779int ret;17801781/*1782* Do all pre-schedule work here, before we queue a waiter and invoke1783* PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would1784* otherwise recurse back into task_blocks_on_rt_mutex() through1785* rtlock_slowlock() and will then enqueue a second waiter for this1786* same task and things get really confusing real fast.1787*/1788rt_mutex_pre_schedule();17891790/*1791* Technically we could use raw_spin_[un]lock_irq() here, but this can1792* be called in early boot if the cmpxchg() fast path is disabled1793* (debug, no architecture support). In this case we will acquire the1794* rtmutex with lock->wait_lock held. But we cannot unconditionally1795* enable interrupts in that early boot case. So we need to use the1796* irqsave/restore variants.1797*/1798raw_spin_lock_irqsave(&lock->wait_lock, flags);1799ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);1800raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);1801rt_mutex_post_schedule();18021803return ret;1804}18051806static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,1807unsigned int state)1808{1809lockdep_assert(!current->pi_blocked_on);18101811if (likely(rt_mutex_try_acquire(lock)))1812return 0;18131814return rt_mutex_slowlock(lock, NULL, state);1815}1816#endif /* RT_MUTEX_BUILD_MUTEX */18171818#ifdef RT_MUTEX_BUILD_SPINLOCKS1819/*1820* Functions required for spin/rw_lock substitution on RT kernels1821*/18221823/**1824* rtlock_slowlock_locked - Slow path lock acquisition for RT locks1825* @lock: The underlying RT mutex1826* @wake_q: The wake_q to wake tasks after we release the wait_lock1827*/1828static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,1829struct wake_q_head *wake_q)1830__releases(&lock->wait_lock) __acquires(&lock->wait_lock)1831{1832struct rt_mutex_waiter waiter;1833struct task_struct *owner;18341835lockdep_assert_held(&lock->wait_lock);1836lockevent_inc(rtlock_slowlock);18371838if (try_to_take_rt_mutex(lock, current, NULL)) {1839lockevent_inc(rtlock_slow_acq1);1840return;1841}18421843rt_mutex_init_rtlock_waiter(&waiter);18441845/* Save current state and set state to TASK_RTLOCK_WAIT */1846current_save_and_set_rtlock_wait_state();18471848trace_contention_begin(lock, LCB_F_RT);18491850task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q);18511852for (;;) {1853/* Try to acquire the lock again */1854if (try_to_take_rt_mutex(lock, current, &waiter)) {1855lockevent_inc(rtlock_slow_acq2);1856break;1857}18581859if (&waiter == rt_mutex_top_waiter(lock))1860owner = rt_mutex_owner(lock);1861else1862owner = NULL;1863raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);18641865if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) {1866lockevent_inc(rtlock_slow_sleep);1867schedule_rtlock();1868}18691870raw_spin_lock_irq(&lock->wait_lock);1871set_current_state(TASK_RTLOCK_WAIT);1872}18731874/* Restore the task state */1875current_restore_rtlock_saved_state();18761877/*1878* try_to_take_rt_mutex() sets the waiter bit unconditionally.1879* We might have to fix that up:1880*/1881fixup_rt_mutex_waiters(lock, true);1882debug_rt_mutex_free_waiter(&waiter);18831884trace_contention_end(lock, 0);1885lockevent_cond_inc(rtlock_slow_wake, !wake_q_empty(wake_q));1886}18871888static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)1889{1890unsigned long flags;1891DEFINE_WAKE_Q(wake_q);18921893raw_spin_lock_irqsave(&lock->wait_lock, flags);1894rtlock_slowlock_locked(lock, &wake_q);1895raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);1896}18971898#endif /* RT_MUTEX_BUILD_SPINLOCKS */189919001901