// SPDX-License-Identifier: GPL-2.0-only12/*3* RT-specific reader/writer semaphores and reader/writer locks4*5* down_write/write_lock()6* 1) Lock rtmutex7* 2) Remove the reader BIAS to force readers into the slow path8* 3) Wait until all readers have left the critical section9* 4) Mark it write locked10*11* up_write/write_unlock()12* 1) Remove the write locked marker13* 2) Set the reader BIAS, so readers can use the fast path again14* 3) Unlock rtmutex, to release blocked readers15*16* down_read/read_lock()17* 1) Try fast path acquisition (reader BIAS is set)18* 2) Take tmutex::wait_lock, which protects the writelocked flag19* 3) If !writelocked, acquire it for read20* 4) If writelocked, block on tmutex21* 5) unlock rtmutex, goto 1)22*23* up_read/read_unlock()24* 1) Try fast path release (reader count != 1)25* 2) Wake the writer waiting in down_write()/write_lock() #326*27* down_read/read_lock()#3 has the consequence, that rw semaphores and rw28* locks on RT are not writer fair, but writers, which should be avoided in29* RT tasks (think mmap_sem), are subject to the rtmutex priority/DL30* inheritance mechanism.31*32* It's possible to make the rw primitives writer fair by keeping a list of33* active readers. A blocked writer would force all newly incoming readers34* to block on the rtmutex, but the rtmutex would have to be proxy locked35* for one reader after the other. We can't use multi-reader inheritance36* because there is no way to support that with SCHED_DEADLINE.37* Implementing the one by one reader boosting/handover mechanism is a38* major surgery for a very dubious value.39*40* The risk of writer starvation is there, but the pathological use cases41* which trigger it are not necessarily the typical RT workloads.42*43* Fast-path orderings:44* The lock/unlock of readers can run in fast paths: lock and unlock are only45* atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE46* semantics of rwbase_rt. Atomic ops should thus provide _acquire()47* and _release() (or stronger).48*49* Common code shared between RT rw_semaphore and rwlock50*/5152static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb)53{54int r;5556/*57* Increment reader count, if sem->readers < 0, i.e. READER_BIAS is58* set.59*/60for (r = atomic_read(&rwb->readers); r < 0;) {61if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1)))62return 1;63}64return 0;65}6667static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,68unsigned int state)69{70struct rt_mutex_base *rtm = &rwb->rtmutex;71DEFINE_WAKE_Q(wake_q);72int ret;7374rwbase_pre_schedule();75raw_spin_lock_irq(&rtm->wait_lock);7677/*78* Call into the slow lock path with the rtmutex->wait_lock79* held, so this can't result in the following race:80*81* Reader1 Reader2 Writer82* down_read()83* down_write()84* rtmutex_lock(m)85* wait()86* down_read()87* unlock(m->wait_lock)88* up_read()89* wake(Writer)90* lock(m->wait_lock)91* sem->writelocked=true92* unlock(m->wait_lock)93*94* up_write()95* sem->writelocked=false96* rtmutex_unlock(m)97* down_read()98* down_write()99* rtmutex_lock(m)100* wait()101* rtmutex_lock(m)102*103* That would put Reader1 behind the writer waiting on104* Reader2 to call up_read(), which might be unbound.105*/106107trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ);108109/*110* For rwlocks this returns 0 unconditionally, so the below111* !ret conditionals are optimized out.112*/113ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q);114115/*116* On success the rtmutex is held, so there can't be a writer117* active. Increment the reader count and immediately drop the118* rtmutex again.119*120* rtmutex->wait_lock has to be unlocked in any case of course.121*/122if (!ret)123atomic_inc(&rwb->readers);124125preempt_disable();126raw_spin_unlock_irq(&rtm->wait_lock);127wake_up_q(&wake_q);128preempt_enable();129130if (!ret)131rwbase_rtmutex_unlock(rtm);132133trace_contention_end(rwb, ret);134rwbase_post_schedule();135return ret;136}137138static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,139unsigned int state)140{141lockdep_assert(!current->pi_blocked_on);142143if (rwbase_read_trylock(rwb))144return 0;145146return __rwbase_read_lock(rwb, state);147}148149static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,150unsigned int state)151{152struct rt_mutex_base *rtm = &rwb->rtmutex;153struct task_struct *owner;154DEFINE_RT_WAKE_Q(wqh);155156raw_spin_lock_irq(&rtm->wait_lock);157/*158* Wake the writer, i.e. the rtmutex owner. It might release the159* rtmutex concurrently in the fast path (due to a signal), but to160* clean up rwb->readers it needs to acquire rtm->wait_lock. The161* worst case which can happen is a spurious wakeup.162*/163owner = rt_mutex_owner(rtm);164if (owner)165rt_mutex_wake_q_add_task(&wqh, owner, state);166167/* Pairs with the preempt_enable in rt_mutex_wake_up_q() */168preempt_disable();169raw_spin_unlock_irq(&rtm->wait_lock);170rt_mutex_wake_up_q(&wqh);171}172173static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,174unsigned int state)175{176/*177* rwb->readers can only hit 0 when a writer is waiting for the178* active readers to leave the critical section.179*180* dec_and_test() is fully ordered, provides RELEASE.181*/182if (unlikely(atomic_dec_and_test(&rwb->readers)))183__rwbase_read_unlock(rwb, state);184}185186static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,187unsigned long flags)188{189struct rt_mutex_base *rtm = &rwb->rtmutex;190191/*192* _release() is needed in case that reader is in fast path, pairing193* with atomic_try_cmpxchg_acquire() in rwbase_read_trylock().194*/195(void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers);196raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);197rwbase_rtmutex_unlock(rtm);198}199200static inline void rwbase_write_unlock(struct rwbase_rt *rwb)201{202struct rt_mutex_base *rtm = &rwb->rtmutex;203unsigned long flags;204205raw_spin_lock_irqsave(&rtm->wait_lock, flags);206__rwbase_write_unlock(rwb, WRITER_BIAS, flags);207}208209static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)210{211struct rt_mutex_base *rtm = &rwb->rtmutex;212unsigned long flags;213214raw_spin_lock_irqsave(&rtm->wait_lock, flags);215/* Release it and account current as reader */216__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);217}218219static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb)220{221/* Can do without CAS because we're serialized by wait_lock. */222lockdep_assert_held(&rwb->rtmutex.wait_lock);223224/*225* _acquire is needed in case the reader is in the fast path, pairing226* with rwbase_read_unlock(), provides ACQUIRE.227*/228if (!atomic_read_acquire(&rwb->readers)) {229atomic_set(&rwb->readers, WRITER_BIAS);230return 1;231}232233return 0;234}235236static int __sched rwbase_write_lock(struct rwbase_rt *rwb,237unsigned int state)238{239struct rt_mutex_base *rtm = &rwb->rtmutex;240unsigned long flags;241242/* Take the rtmutex as a first step */243if (rwbase_rtmutex_lock_state(rtm, state))244return -EINTR;245246/* Force readers into slow path */247atomic_sub(READER_BIAS, &rwb->readers);248249rwbase_pre_schedule();250251raw_spin_lock_irqsave(&rtm->wait_lock, flags);252if (__rwbase_write_trylock(rwb))253goto out_unlock;254255rwbase_set_and_save_current_state(state);256trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE);257for (;;) {258/* Optimized out for rwlocks */259if (rwbase_signal_pending_state(state, current)) {260rwbase_restore_current_state();261__rwbase_write_unlock(rwb, 0, flags);262rwbase_post_schedule();263trace_contention_end(rwb, -EINTR);264return -EINTR;265}266267if (__rwbase_write_trylock(rwb))268break;269270raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);271rwbase_schedule();272raw_spin_lock_irqsave(&rtm->wait_lock, flags);273274set_current_state(state);275}276rwbase_restore_current_state();277trace_contention_end(rwb, 0);278279out_unlock:280raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);281rwbase_post_schedule();282return 0;283}284285static inline int rwbase_write_trylock(struct rwbase_rt *rwb)286{287struct rt_mutex_base *rtm = &rwb->rtmutex;288unsigned long flags;289290if (!rwbase_rtmutex_trylock(rtm))291return 0;292293atomic_sub(READER_BIAS, &rwb->readers);294295raw_spin_lock_irqsave(&rtm->wait_lock, flags);296if (__rwbase_write_trylock(rwb)) {297raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);298return 1;299}300__rwbase_write_unlock(rwb, 0, flags);301return 0;302}303304305