// SPDX-License-Identifier: GPL-2.0-only1/*2* rtmutex API3*/4#include <linux/spinlock.h>5#include <linux/export.h>67#define RT_MUTEX_BUILD_MUTEX8#include "rtmutex.c"910/*11* Max number of times we'll walk the boosting chain:12*/13int max_lock_depth = 1024;1415static const struct ctl_table rtmutex_sysctl_table[] = {16{17.procname = "max_lock_depth",18.data = &max_lock_depth,19.maxlen = sizeof(int),20.mode = 0644,21.proc_handler = proc_dointvec,22},23};2425static int __init init_rtmutex_sysctl(void)26{27register_sysctl_init("kernel", rtmutex_sysctl_table);28return 0;29}3031subsys_initcall(init_rtmutex_sysctl);3233/*34* Debug aware fast / slowpath lock,trylock,unlock35*36* The atomic acquire/release ops are compiled away, when either the37* architecture does not support cmpxchg or when debugging is enabled.38*/39static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,40unsigned int state,41struct lockdep_map *nest_lock,42unsigned int subclass)43{44int ret;4546might_sleep();47mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);48ret = __rt_mutex_lock(&lock->rtmutex, state);49if (ret)50mutex_release(&lock->dep_map, _RET_IP_);51return ret;52}5354void rt_mutex_base_init(struct rt_mutex_base *rtb)55{56__rt_mutex_base_init(rtb);57}58EXPORT_SYMBOL(rt_mutex_base_init);5960#ifdef CONFIG_DEBUG_LOCK_ALLOC61/**62* rt_mutex_lock_nested - lock a rt_mutex63*64* @lock: the rt_mutex to be locked65* @subclass: the lockdep subclass66*/67void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)68{69__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);70}71EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);7273void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)74{75__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);76}77EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);7879#else /* !CONFIG_DEBUG_LOCK_ALLOC */8081/**82* rt_mutex_lock - lock a rt_mutex83*84* @lock: the rt_mutex to be locked85*/86void __sched rt_mutex_lock(struct rt_mutex *lock)87{88__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);89}90EXPORT_SYMBOL_GPL(rt_mutex_lock);91#endif9293/**94* rt_mutex_lock_interruptible - lock a rt_mutex interruptible95*96* @lock: the rt_mutex to be locked97*98* Returns:99* 0 on success100* -EINTR when interrupted by a signal101*/102int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)103{104return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);105}106EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);107108/**109* rt_mutex_lock_killable - lock a rt_mutex killable110*111* @lock: the rt_mutex to be locked112*113* Returns:114* 0 on success115* -EINTR when interrupted by a signal116*/117int __sched rt_mutex_lock_killable(struct rt_mutex *lock)118{119return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);120}121EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);122123/**124* rt_mutex_trylock - try to lock a rt_mutex125*126* @lock: the rt_mutex to be locked127*128* This function can only be called in thread context. It's safe to call it129* from atomic regions, but not from hard or soft interrupt context.130*131* Returns:132* 1 on success133* 0 on contention134*/135int __sched rt_mutex_trylock(struct rt_mutex *lock)136{137int ret;138139if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))140return 0;141142ret = __rt_mutex_trylock(&lock->rtmutex);143if (ret)144mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);145146return ret;147}148EXPORT_SYMBOL_GPL(rt_mutex_trylock);149150/**151* rt_mutex_unlock - unlock a rt_mutex152*153* @lock: the rt_mutex to be unlocked154*/155void __sched rt_mutex_unlock(struct rt_mutex *lock)156{157mutex_release(&lock->dep_map, _RET_IP_);158__rt_mutex_unlock(&lock->rtmutex);159}160EXPORT_SYMBOL_GPL(rt_mutex_unlock);161162/*163* Futex variants, must not use fastpath.164*/165int __sched rt_mutex_futex_trylock(struct rt_mutex_base *lock)166{167return rt_mutex_slowtrylock(lock);168}169170int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock)171{172return __rt_mutex_slowtrylock(lock);173}174175/**176* __rt_mutex_futex_unlock - Futex variant, that since futex variants177* do not use the fast-path, can be simple and will not need to retry.178*179* @lock: The rt_mutex to be unlocked180* @wqh: The wake queue head from which to get the next lock waiter181*/182bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock,183struct rt_wake_q_head *wqh)184{185lockdep_assert_held(&lock->wait_lock);186187debug_rt_mutex_unlock(lock);188189if (!rt_mutex_has_waiters(lock)) {190lock->owner = NULL;191return false; /* done */192}193194/*195* mark_wakeup_next_waiter() deboosts and retains preemption196* disabled when dropping the wait_lock, to avoid inversion prior197* to the wakeup. preempt_disable() therein pairs with the198* preempt_enable() in rt_mutex_postunlock().199*/200mark_wakeup_next_waiter(wqh, lock);201202return true; /* call postunlock() */203}204205void __sched rt_mutex_futex_unlock(struct rt_mutex_base *lock)206{207DEFINE_RT_WAKE_Q(wqh);208unsigned long flags;209bool postunlock;210211raw_spin_lock_irqsave(&lock->wait_lock, flags);212postunlock = __rt_mutex_futex_unlock(lock, &wqh);213raw_spin_unlock_irqrestore(&lock->wait_lock, flags);214215if (postunlock)216rt_mutex_postunlock(&wqh);217}218219/**220* __rt_mutex_init - initialize the rt_mutex221*222* @lock: The rt_mutex to be initialized223* @name: The lock name used for debugging224* @key: The lock class key used for debugging225*226* Initialize the rt_mutex to unlocked state.227*228* Initializing of a locked rt_mutex is not allowed229*/230void __sched __rt_mutex_init(struct rt_mutex *lock, const char *name,231struct lock_class_key *key)232{233debug_check_no_locks_freed((void *)lock, sizeof(*lock));234__rt_mutex_base_init(&lock->rtmutex);235lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);236}237EXPORT_SYMBOL_GPL(__rt_mutex_init);238239/**240* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a241* proxy owner242*243* @lock: the rt_mutex to be locked244* @proxy_owner:the task to set as owner245*246* No locking. Caller has to do serializing itself247*248* Special API call for PI-futex support. This initializes the rtmutex and249* assigns it to @proxy_owner. Concurrent operations on the rtmutex are not250* possible at this point because the pi_state which contains the rtmutex251* is not yet visible to other tasks.252*/253void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,254struct task_struct *proxy_owner)255{256static struct lock_class_key pi_futex_key;257258__rt_mutex_base_init(lock);259/*260* On PREEMPT_RT the futex hashbucket spinlock becomes 'sleeping'261* and rtmutex based. That causes a lockdep false positive, because262* some of the futex functions invoke spin_unlock(&hb->lock) with263* the wait_lock of the rtmutex associated to the pi_futex held.264* spin_unlock() in turn takes wait_lock of the rtmutex on which265* the spinlock is based, which makes lockdep notice a lock266* recursion. Give the futex/rtmutex wait_lock a separate key.267*/268lockdep_set_class(&lock->wait_lock, &pi_futex_key);269rt_mutex_set_owner(lock, proxy_owner);270}271272/**273* rt_mutex_proxy_unlock - release a lock on behalf of owner274*275* @lock: the rt_mutex to be locked276*277* No locking. Caller has to do serializing itself278*279* Special API call for PI-futex support. This just cleans up the rtmutex280* (debugging) state. Concurrent operations on this rt_mutex are not281* possible because it belongs to the pi_state which is about to be freed282* and it is not longer visible to other tasks.283*/284void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)285{286debug_rt_mutex_proxy_unlock(lock);287rt_mutex_clear_owner(lock);288}289290/**291* __rt_mutex_start_proxy_lock() - Start lock acquisition for another task292* @lock: the rt_mutex to take293* @waiter: the pre-initialized rt_mutex_waiter294* @task: the task to prepare295* @wake_q: the wake_q to wake tasks after we release the wait_lock296*297* Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock298* detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.299*300* NOTE: does _NOT_ remove the @waiter on failure; must either call301* rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.302*303* Returns:304* 0 - task blocked on lock305* 1 - acquired the lock for task, caller should wake it up306* <0 - error307*308* Special API call for PI-futex support.309*/310int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,311struct rt_mutex_waiter *waiter,312struct task_struct *task,313struct wake_q_head *wake_q)314{315int ret;316317lockdep_assert_held(&lock->wait_lock);318319if (try_to_take_rt_mutex(lock, task, NULL))320return 1;321322/* We enforce deadlock detection for futexes */323ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,324RT_MUTEX_FULL_CHAINWALK, wake_q);325326if (ret && !rt_mutex_owner(lock)) {327/*328* Reset the return value. We might have329* returned with -EDEADLK and the owner330* released the lock while we were walking the331* pi chain. Let the waiter sort it out.332*/333ret = 0;334}335336return ret;337}338339/**340* rt_mutex_start_proxy_lock() - Start lock acquisition for another task341* @lock: the rt_mutex to take342* @waiter: the pre-initialized rt_mutex_waiter343* @task: the task to prepare344*345* Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock346* detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.347*348* NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter349* on failure.350*351* Returns:352* 0 - task blocked on lock353* 1 - acquired the lock for task, caller should wake it up354* <0 - error355*356* Special API call for PI-futex support.357*/358int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,359struct rt_mutex_waiter *waiter,360struct task_struct *task)361{362int ret;363DEFINE_WAKE_Q(wake_q);364365raw_spin_lock_irq(&lock->wait_lock);366ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);367if (unlikely(ret))368remove_waiter(lock, waiter);369preempt_disable();370raw_spin_unlock_irq(&lock->wait_lock);371wake_up_q(&wake_q);372preempt_enable();373374return ret;375}376377/**378* rt_mutex_wait_proxy_lock() - Wait for lock acquisition379* @lock: the rt_mutex we were woken on380* @to: the timeout, null if none. hrtimer should already have381* been started.382* @waiter: the pre-initialized rt_mutex_waiter383*384* Wait for the lock acquisition started on our behalf by385* rt_mutex_start_proxy_lock(). Upon failure, the caller must call386* rt_mutex_cleanup_proxy_lock().387*388* Returns:389* 0 - success390* <0 - error, one of -EINTR, -ETIMEDOUT391*392* Special API call for PI-futex support393*/394int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,395struct hrtimer_sleeper *to,396struct rt_mutex_waiter *waiter)397{398int ret;399400raw_spin_lock_irq(&lock->wait_lock);401/* sleep on the mutex */402set_current_state(TASK_INTERRUPTIBLE);403ret = rt_mutex_slowlock_block(lock, NULL, TASK_INTERRUPTIBLE, to, waiter, NULL);404/*405* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might406* have to fix that up.407*/408fixup_rt_mutex_waiters(lock, true);409raw_spin_unlock_irq(&lock->wait_lock);410411return ret;412}413414/**415* rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition416* @lock: the rt_mutex we were woken on417* @waiter: the pre-initialized rt_mutex_waiter418*419* Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or420* rt_mutex_wait_proxy_lock().421*422* Unless we acquired the lock; we're still enqueued on the wait-list and can423* in fact still be granted ownership until we're removed. Therefore we can424* find we are in fact the owner and must disregard the425* rt_mutex_wait_proxy_lock() failure.426*427* Returns:428* true - did the cleanup, we done.429* false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,430* caller should disregards its return value.431*432* Special API call for PI-futex support433*/434bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,435struct rt_mutex_waiter *waiter)436{437bool cleanup = false;438439raw_spin_lock_irq(&lock->wait_lock);440/*441* Do an unconditional try-lock, this deals with the lock stealing442* state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()443* sets a NULL owner.444*445* We're not interested in the return value, because the subsequent446* test on rt_mutex_owner() will infer that. If the trylock succeeded,447* we will own the lock and it will have removed the waiter. If we448* failed the trylock, we're still not owner and we need to remove449* ourselves.450*/451try_to_take_rt_mutex(lock, current, waiter);452/*453* Unless we're the owner; we're still enqueued on the wait_list.454* So check if we became owner, if not, take us off the wait_list.455*/456if (rt_mutex_owner(lock) != current) {457remove_waiter(lock, waiter);458cleanup = true;459}460/*461* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might462* have to fix that up.463*/464fixup_rt_mutex_waiters(lock, false);465466raw_spin_unlock_irq(&lock->wait_lock);467468return cleanup;469}470471/*472* Recheck the pi chain, in case we got a priority setting473*474* Called from sched_setscheduler475*/476void __sched rt_mutex_adjust_pi(struct task_struct *task)477{478struct rt_mutex_waiter *waiter;479struct rt_mutex_base *next_lock;480unsigned long flags;481482raw_spin_lock_irqsave(&task->pi_lock, flags);483484waiter = task->pi_blocked_on;485if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {486raw_spin_unlock_irqrestore(&task->pi_lock, flags);487return;488}489next_lock = waiter->lock;490raw_spin_unlock_irqrestore(&task->pi_lock, flags);491492/* gets dropped in rt_mutex_adjust_prio_chain()! */493get_task_struct(task);494495rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,496next_lock, NULL, task);497}498499/*500* Performs the wakeup of the top-waiter and re-enables preemption.501*/502void __sched rt_mutex_postunlock(struct rt_wake_q_head *wqh)503{504rt_mutex_wake_up_q(wqh);505}506507#ifdef CONFIG_DEBUG_RT_MUTEXES508void rt_mutex_debug_task_free(struct task_struct *task)509{510DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));511DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);512}513#endif514515#ifdef CONFIG_PREEMPT_RT516/* Mutexes */517void __mutex_rt_init(struct mutex *mutex, const char *name,518struct lock_class_key *key)519{520debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));521lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);522}523EXPORT_SYMBOL(__mutex_rt_init);524525static __always_inline int __mutex_lock_common(struct mutex *lock,526unsigned int state,527unsigned int subclass,528struct lockdep_map *nest_lock,529unsigned long ip)530{531int ret;532533might_sleep();534mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);535ret = __rt_mutex_lock(&lock->rtmutex, state);536if (ret)537mutex_release(&lock->dep_map, ip);538else539lock_acquired(&lock->dep_map, ip);540return ret;541}542543#ifdef CONFIG_DEBUG_LOCK_ALLOC544void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass)545{546__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);547}548EXPORT_SYMBOL_GPL(mutex_lock_nested);549550void __sched _mutex_lock_nest_lock(struct mutex *lock,551struct lockdep_map *nest_lock)552{553__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest_lock, _RET_IP_);554}555EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);556557int __sched mutex_lock_interruptible_nested(struct mutex *lock,558unsigned int subclass)559{560return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_);561}562EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);563564int __sched _mutex_lock_killable(struct mutex *lock, unsigned int subclass,565struct lockdep_map *nest_lock)566{567return __mutex_lock_common(lock, TASK_KILLABLE, subclass, nest_lock, _RET_IP_);568}569EXPORT_SYMBOL_GPL(_mutex_lock_killable);570571void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)572{573int token;574575might_sleep();576577token = io_schedule_prepare();578__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);579io_schedule_finish(token);580}581EXPORT_SYMBOL_GPL(mutex_lock_io_nested);582583int __sched _mutex_trylock_nest_lock(struct mutex *lock,584struct lockdep_map *nest_lock)585{586int ret;587588if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))589return 0;590591ret = __rt_mutex_trylock(&lock->rtmutex);592if (ret)593mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_);594595return ret;596}597EXPORT_SYMBOL_GPL(_mutex_trylock_nest_lock);598#else /* CONFIG_DEBUG_LOCK_ALLOC */599600void __sched mutex_lock(struct mutex *lock)601{602__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);603}604EXPORT_SYMBOL(mutex_lock);605606int __sched mutex_lock_interruptible(struct mutex *lock)607{608return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);609}610EXPORT_SYMBOL(mutex_lock_interruptible);611612int __sched mutex_lock_killable(struct mutex *lock)613{614return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);615}616EXPORT_SYMBOL(mutex_lock_killable);617618void __sched mutex_lock_io(struct mutex *lock)619{620int token = io_schedule_prepare();621622__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);623io_schedule_finish(token);624}625EXPORT_SYMBOL(mutex_lock_io);626627int __sched mutex_trylock(struct mutex *lock)628{629if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))630return 0;631632return __rt_mutex_trylock(&lock->rtmutex);633}634EXPORT_SYMBOL(mutex_trylock);635#endif /* !CONFIG_DEBUG_LOCK_ALLOC */636637void __sched mutex_unlock(struct mutex *lock)638{639mutex_release(&lock->dep_map, _RET_IP_);640__rt_mutex_unlock(&lock->rtmutex);641}642EXPORT_SYMBOL(mutex_unlock);643644#endif /* CONFIG_PREEMPT_RT */645646647