Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/locking/rwsem.c
25923 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* kernel/rwsem.c: R/W semaphores, public implementation
3
*
4
* Written by David Howells ([email protected]).
5
* Derived from asm-i386/semaphore.h
6
*
7
* Writer lock-stealing by Alex Shi <[email protected]>
8
* and Michel Lespinasse <[email protected]>
9
*
10
* Optimistic spinning by Tim Chen <[email protected]>
11
* and Davidlohr Bueso <[email protected]>. Based on mutexes.
12
*
13
* Rwsem count bit fields re-definition and rwsem rearchitecture by
14
* Waiman Long <[email protected]> and
15
* Peter Zijlstra <[email protected]>.
16
*/
17
18
#include <linux/types.h>
19
#include <linux/kernel.h>
20
#include <linux/sched.h>
21
#include <linux/sched/rt.h>
22
#include <linux/sched/task.h>
23
#include <linux/sched/debug.h>
24
#include <linux/sched/wake_q.h>
25
#include <linux/sched/signal.h>
26
#include <linux/sched/clock.h>
27
#include <linux/export.h>
28
#include <linux/rwsem.h>
29
#include <linux/atomic.h>
30
#include <linux/hung_task.h>
31
#include <trace/events/lock.h>
32
33
#ifndef CONFIG_PREEMPT_RT
34
#include "lock_events.h"
35
36
/*
37
* The least significant 2 bits of the owner value has the following
38
* meanings when set.
39
* - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint)
40
* - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
41
*
42
* When the rwsem is reader-owned and a spinning writer has timed out,
43
* the nonspinnable bit will be set to disable optimistic spinning.
44
45
* When a writer acquires a rwsem, it puts its task_struct pointer
46
* into the owner field. It is cleared after an unlock.
47
*
48
* When a reader acquires a rwsem, it will also puts its task_struct
49
* pointer into the owner field with the RWSEM_READER_OWNED bit set.
50
* On unlock, the owner field will largely be left untouched. So
51
* for a free or reader-owned rwsem, the owner value may contain
52
* information about the last reader that acquires the rwsem.
53
*
54
* That information may be helpful in debugging cases where the system
55
* seems to hang on a reader owned rwsem especially if only one reader
56
* is involved. Ideally we would like to track all the readers that own
57
* a rwsem, but the overhead is simply too big.
58
*
59
* A fast path reader optimistic lock stealing is supported when the rwsem
60
* is previously owned by a writer and the following conditions are met:
61
* - rwsem is not currently writer owned
62
* - the handoff isn't set.
63
*/
64
#define RWSEM_READER_OWNED (1UL << 0)
65
#define RWSEM_NONSPINNABLE (1UL << 1)
66
#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
67
68
#ifdef CONFIG_DEBUG_RWSEMS
69
# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
70
if (!debug_locks_silent && \
71
WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
72
#c, atomic_long_read(&(sem)->count), \
73
(unsigned long) sem->magic, \
74
atomic_long_read(&(sem)->owner), (long)current, \
75
list_empty(&(sem)->wait_list) ? "" : "not ")) \
76
debug_locks_off(); \
77
} while (0)
78
#else
79
# define DEBUG_RWSEMS_WARN_ON(c, sem)
80
#endif
81
82
/*
83
* On 64-bit architectures, the bit definitions of the count are:
84
*
85
* Bit 0 - writer locked bit
86
* Bit 1 - waiters present bit
87
* Bit 2 - lock handoff bit
88
* Bits 3-7 - reserved
89
* Bits 8-62 - 55-bit reader count
90
* Bit 63 - read fail bit
91
*
92
* On 32-bit architectures, the bit definitions of the count are:
93
*
94
* Bit 0 - writer locked bit
95
* Bit 1 - waiters present bit
96
* Bit 2 - lock handoff bit
97
* Bits 3-7 - reserved
98
* Bits 8-30 - 23-bit reader count
99
* Bit 31 - read fail bit
100
*
101
* It is not likely that the most significant bit (read fail bit) will ever
102
* be set. This guard bit is still checked anyway in the down_read() fastpath
103
* just in case we need to use up more of the reader bits for other purpose
104
* in the future.
105
*
106
* atomic_long_fetch_add() is used to obtain reader lock, whereas
107
* atomic_long_cmpxchg() will be used to obtain writer lock.
108
*
109
* There are three places where the lock handoff bit may be set or cleared.
110
* 1) rwsem_mark_wake() for readers -- set, clear
111
* 2) rwsem_try_write_lock() for writers -- set, clear
112
* 3) rwsem_del_waiter() -- clear
113
*
114
* For all the above cases, wait_lock will be held. A writer must also
115
* be the first one in the wait_list to be eligible for setting the handoff
116
* bit. So concurrent setting/clearing of handoff bit is not possible.
117
*/
118
#define RWSEM_WRITER_LOCKED (1UL << 0)
119
#define RWSEM_FLAG_WAITERS (1UL << 1)
120
#define RWSEM_FLAG_HANDOFF (1UL << 2)
121
#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
122
123
#define RWSEM_READER_SHIFT 8
124
#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
125
#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
126
#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
127
#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
128
#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
129
RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
130
131
/*
132
* All writes to owner are protected by WRITE_ONCE() to make sure that
133
* store tearing can't happen as optimistic spinners may read and use
134
* the owner value concurrently without lock. Read from owner, however,
135
* may not need READ_ONCE() as long as the pointer value is only used
136
* for comparison and isn't being dereferenced.
137
*
138
* Both rwsem_{set,clear}_owner() functions should be in the same
139
* preempt disable section as the atomic op that changes sem->count.
140
*/
141
static inline void rwsem_set_owner(struct rw_semaphore *sem)
142
{
143
lockdep_assert_preemption_disabled();
144
atomic_long_set(&sem->owner, (long)current);
145
}
146
147
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
148
{
149
lockdep_assert_preemption_disabled();
150
atomic_long_set(&sem->owner, 0);
151
}
152
153
/*
154
* Test the flags in the owner field.
155
*/
156
static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
157
{
158
return atomic_long_read(&sem->owner) & flags;
159
}
160
161
/*
162
* The task_struct pointer of the last owning reader will be left in
163
* the owner field.
164
*
165
* Note that the owner value just indicates the task has owned the rwsem
166
* previously, it may not be the real owner or one of the real owners
167
* anymore when that field is examined, so take it with a grain of salt.
168
*
169
* The reader non-spinnable bit is preserved.
170
*/
171
static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
172
struct task_struct *owner)
173
{
174
unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
175
(atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
176
177
atomic_long_set(&sem->owner, val);
178
}
179
180
static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
181
{
182
__rwsem_set_reader_owned(sem, current);
183
}
184
185
#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
186
/*
187
* Return just the real task structure pointer of the owner
188
*/
189
struct task_struct *rwsem_owner(struct rw_semaphore *sem)
190
{
191
return (struct task_struct *)
192
(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
193
}
194
195
/*
196
* Return true if the rwsem is owned by a reader.
197
*/
198
bool is_rwsem_reader_owned(struct rw_semaphore *sem)
199
{
200
/*
201
* Check the count to see if it is write-locked.
202
*/
203
long count = atomic_long_read(&sem->count);
204
205
if (count & RWSEM_WRITER_MASK)
206
return false;
207
return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
208
}
209
210
/*
211
* With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured,
212
* it will make sure that the owner field of a reader-owned rwsem either
213
* points to a real reader-owner(s) or gets cleared. The only exception is
214
* when the unlock is done by up_read_non_owner().
215
*/
216
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
217
{
218
unsigned long val = atomic_long_read(&sem->owner);
219
220
while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
221
if (atomic_long_try_cmpxchg(&sem->owner, &val,
222
val & RWSEM_OWNER_FLAGS_MASK))
223
return;
224
}
225
}
226
#else
227
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
228
{
229
}
230
#endif
231
232
/*
233
* Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
234
* remains set. Otherwise, the operation will be aborted.
235
*/
236
static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
237
{
238
unsigned long owner = atomic_long_read(&sem->owner);
239
240
do {
241
if (!(owner & RWSEM_READER_OWNED))
242
break;
243
if (owner & RWSEM_NONSPINNABLE)
244
break;
245
} while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
246
owner | RWSEM_NONSPINNABLE));
247
}
248
249
static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
250
{
251
*cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
252
253
if (WARN_ON_ONCE(*cntp < 0))
254
rwsem_set_nonspinnable(sem);
255
256
if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
257
rwsem_set_reader_owned(sem);
258
return true;
259
}
260
261
return false;
262
}
263
264
static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
265
{
266
long tmp = RWSEM_UNLOCKED_VALUE;
267
268
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
269
rwsem_set_owner(sem);
270
return true;
271
}
272
273
return false;
274
}
275
276
/*
277
* Return the real task structure pointer of the owner and the embedded
278
* flags in the owner. pflags must be non-NULL.
279
*/
280
static inline struct task_struct *
281
rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
282
{
283
unsigned long owner = atomic_long_read(&sem->owner);
284
285
*pflags = owner & RWSEM_OWNER_FLAGS_MASK;
286
return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
287
}
288
289
/*
290
* Guide to the rw_semaphore's count field.
291
*
292
* When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
293
* by a writer.
294
*
295
* The lock is owned by readers when
296
* (1) the RWSEM_WRITER_LOCKED isn't set in count,
297
* (2) some of the reader bits are set in count, and
298
* (3) the owner field has RWSEM_READ_OWNED bit set.
299
*
300
* Having some reader bits set is not enough to guarantee a readers owned
301
* lock as the readers may be in the process of backing out from the count
302
* and a writer has just released the lock. So another writer may steal
303
* the lock immediately after that.
304
*/
305
306
/*
307
* Initialize an rwsem:
308
*/
309
void __init_rwsem(struct rw_semaphore *sem, const char *name,
310
struct lock_class_key *key)
311
{
312
#ifdef CONFIG_DEBUG_LOCK_ALLOC
313
/*
314
* Make sure we are not reinitializing a held semaphore:
315
*/
316
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
317
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
318
#endif
319
#ifdef CONFIG_DEBUG_RWSEMS
320
sem->magic = sem;
321
#endif
322
atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
323
raw_spin_lock_init(&sem->wait_lock);
324
INIT_LIST_HEAD(&sem->wait_list);
325
atomic_long_set(&sem->owner, 0L);
326
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
327
osq_lock_init(&sem->osq);
328
#endif
329
}
330
EXPORT_SYMBOL(__init_rwsem);
331
332
enum rwsem_waiter_type {
333
RWSEM_WAITING_FOR_WRITE,
334
RWSEM_WAITING_FOR_READ
335
};
336
337
struct rwsem_waiter {
338
struct list_head list;
339
struct task_struct *task;
340
enum rwsem_waiter_type type;
341
unsigned long timeout;
342
bool handoff_set;
343
};
344
#define rwsem_first_waiter(sem) \
345
list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
346
347
enum rwsem_wake_type {
348
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
349
RWSEM_WAKE_READERS, /* Wake readers only */
350
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
351
};
352
353
/*
354
* The typical HZ value is either 250 or 1000. So set the minimum waiting
355
* time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
356
* queue before initiating the handoff protocol.
357
*/
358
#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
359
360
/*
361
* Magic number to batch-wakeup waiting readers, even when writers are
362
* also present in the queue. This both limits the amount of work the
363
* waking thread must do and also prevents any potential counter overflow,
364
* however unlikely.
365
*/
366
#define MAX_READERS_WAKEUP 0x100
367
368
static inline void
369
rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
370
{
371
lockdep_assert_held(&sem->wait_lock);
372
list_add_tail(&waiter->list, &sem->wait_list);
373
/* caller will set RWSEM_FLAG_WAITERS */
374
}
375
376
/*
377
* Remove a waiter from the wait_list and clear flags.
378
*
379
* Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
380
* this function. Modify with care.
381
*
382
* Return: true if wait_list isn't empty and false otherwise
383
*/
384
static inline bool
385
rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
386
{
387
lockdep_assert_held(&sem->wait_lock);
388
list_del(&waiter->list);
389
if (likely(!list_empty(&sem->wait_list)))
390
return true;
391
392
atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
393
return false;
394
}
395
396
/*
397
* handle the lock release when processes blocked on it that can now run
398
* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
399
* have been set.
400
* - there must be someone on the queue
401
* - the wait_lock must be held by the caller
402
* - tasks are marked for wakeup, the caller must later invoke wake_up_q()
403
* to actually wakeup the blocked task(s) and drop the reference count,
404
* preferably when the wait_lock is released
405
* - woken process blocks are discarded from the list after having task zeroed
406
* - writers are only marked woken if downgrading is false
407
*
408
* Implies rwsem_del_waiter() for all woken readers.
409
*/
410
static void rwsem_mark_wake(struct rw_semaphore *sem,
411
enum rwsem_wake_type wake_type,
412
struct wake_q_head *wake_q)
413
{
414
struct rwsem_waiter *waiter, *tmp;
415
long oldcount, woken = 0, adjustment = 0;
416
struct list_head wlist;
417
418
lockdep_assert_held(&sem->wait_lock);
419
420
/*
421
* Take a peek at the queue head waiter such that we can determine
422
* the wakeup(s) to perform.
423
*/
424
waiter = rwsem_first_waiter(sem);
425
426
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
427
if (wake_type == RWSEM_WAKE_ANY) {
428
/*
429
* Mark writer at the front of the queue for wakeup.
430
* Until the task is actually later awoken later by
431
* the caller, other writers are able to steal it.
432
* Readers, on the other hand, will block as they
433
* will notice the queued writer.
434
*/
435
wake_q_add(wake_q, waiter->task);
436
lockevent_inc(rwsem_wake_writer);
437
}
438
439
return;
440
}
441
442
/*
443
* No reader wakeup if there are too many of them already.
444
*/
445
if (unlikely(atomic_long_read(&sem->count) < 0))
446
return;
447
448
/*
449
* Writers might steal the lock before we grant it to the next reader.
450
* We prefer to do the first reader grant before counting readers
451
* so we can bail out early if a writer stole the lock.
452
*/
453
if (wake_type != RWSEM_WAKE_READ_OWNED) {
454
struct task_struct *owner;
455
456
adjustment = RWSEM_READER_BIAS;
457
oldcount = atomic_long_fetch_add(adjustment, &sem->count);
458
if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
459
/*
460
* When we've been waiting "too" long (for writers
461
* to give up the lock), request a HANDOFF to
462
* force the issue.
463
*/
464
if (time_after(jiffies, waiter->timeout)) {
465
if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
466
adjustment -= RWSEM_FLAG_HANDOFF;
467
lockevent_inc(rwsem_rlock_handoff);
468
}
469
waiter->handoff_set = true;
470
}
471
472
atomic_long_add(-adjustment, &sem->count);
473
return;
474
}
475
/*
476
* Set it to reader-owned to give spinners an early
477
* indication that readers now have the lock.
478
* The reader nonspinnable bit seen at slowpath entry of
479
* the reader is copied over.
480
*/
481
owner = waiter->task;
482
__rwsem_set_reader_owned(sem, owner);
483
}
484
485
/*
486
* Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
487
* queue. We know that the woken will be at least 1 as we accounted
488
* for above. Note we increment the 'active part' of the count by the
489
* number of readers before waking any processes up.
490
*
491
* This is an adaptation of the phase-fair R/W locks where at the
492
* reader phase (first waiter is a reader), all readers are eligible
493
* to acquire the lock at the same time irrespective of their order
494
* in the queue. The writers acquire the lock according to their
495
* order in the queue.
496
*
497
* We have to do wakeup in 2 passes to prevent the possibility that
498
* the reader count may be decremented before it is incremented. It
499
* is because the to-be-woken waiter may not have slept yet. So it
500
* may see waiter->task got cleared, finish its critical section and
501
* do an unlock before the reader count increment.
502
*
503
* 1) Collect the read-waiters in a separate list, count them and
504
* fully increment the reader count in rwsem.
505
* 2) For each waiters in the new list, clear waiter->task and
506
* put them into wake_q to be woken up later.
507
*/
508
INIT_LIST_HEAD(&wlist);
509
list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
510
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
511
continue;
512
513
woken++;
514
list_move_tail(&waiter->list, &wlist);
515
516
/*
517
* Limit # of readers that can be woken up per wakeup call.
518
*/
519
if (unlikely(woken >= MAX_READERS_WAKEUP))
520
break;
521
}
522
523
adjustment = woken * RWSEM_READER_BIAS - adjustment;
524
lockevent_cond_inc(rwsem_wake_reader, woken);
525
526
oldcount = atomic_long_read(&sem->count);
527
if (list_empty(&sem->wait_list)) {
528
/*
529
* Combined with list_move_tail() above, this implies
530
* rwsem_del_waiter().
531
*/
532
adjustment -= RWSEM_FLAG_WAITERS;
533
if (oldcount & RWSEM_FLAG_HANDOFF)
534
adjustment -= RWSEM_FLAG_HANDOFF;
535
} else if (woken) {
536
/*
537
* When we've woken a reader, we no longer need to force
538
* writers to give up the lock and we can clear HANDOFF.
539
*/
540
if (oldcount & RWSEM_FLAG_HANDOFF)
541
adjustment -= RWSEM_FLAG_HANDOFF;
542
}
543
544
if (adjustment)
545
atomic_long_add(adjustment, &sem->count);
546
547
/* 2nd pass */
548
list_for_each_entry_safe(waiter, tmp, &wlist, list) {
549
struct task_struct *tsk;
550
551
tsk = waiter->task;
552
get_task_struct(tsk);
553
554
/*
555
* Ensure calling get_task_struct() before setting the reader
556
* waiter to nil such that rwsem_down_read_slowpath() cannot
557
* race with do_exit() by always holding a reference count
558
* to the task to wakeup.
559
*/
560
smp_store_release(&waiter->task, NULL);
561
/*
562
* Ensure issuing the wakeup (either by us or someone else)
563
* after setting the reader waiter to nil.
564
*/
565
wake_q_add_safe(wake_q, tsk);
566
}
567
}
568
569
/*
570
* Remove a waiter and try to wake up other waiters in the wait queue
571
* This function is called from the out_nolock path of both the reader and
572
* writer slowpaths with wait_lock held. It releases the wait_lock and
573
* optionally wake up waiters before it returns.
574
*/
575
static inline void
576
rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
577
struct wake_q_head *wake_q)
578
__releases(&sem->wait_lock)
579
{
580
bool first = rwsem_first_waiter(sem) == waiter;
581
582
wake_q_init(wake_q);
583
584
/*
585
* If the wait_list isn't empty and the waiter to be deleted is
586
* the first waiter, we wake up the remaining waiters as they may
587
* be eligible to acquire or spin on the lock.
588
*/
589
if (rwsem_del_waiter(sem, waiter) && first)
590
rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
591
raw_spin_unlock_irq(&sem->wait_lock);
592
if (!wake_q_empty(wake_q))
593
wake_up_q(wake_q);
594
}
595
596
/*
597
* This function must be called with the sem->wait_lock held to prevent
598
* race conditions between checking the rwsem wait list and setting the
599
* sem->count accordingly.
600
*
601
* Implies rwsem_del_waiter() on success.
602
*/
603
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
604
struct rwsem_waiter *waiter)
605
{
606
struct rwsem_waiter *first = rwsem_first_waiter(sem);
607
long count, new;
608
609
lockdep_assert_held(&sem->wait_lock);
610
611
count = atomic_long_read(&sem->count);
612
do {
613
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
614
615
if (has_handoff) {
616
/*
617
* Honor handoff bit and yield only when the first
618
* waiter is the one that set it. Otherwisee, we
619
* still try to acquire the rwsem.
620
*/
621
if (first->handoff_set && (waiter != first))
622
return false;
623
}
624
625
new = count;
626
627
if (count & RWSEM_LOCK_MASK) {
628
/*
629
* A waiter (first or not) can set the handoff bit
630
* if it is an RT task or wait in the wait queue
631
* for too long.
632
*/
633
if (has_handoff || (!rt_or_dl_task(waiter->task) &&
634
!time_after(jiffies, waiter->timeout)))
635
return false;
636
637
new |= RWSEM_FLAG_HANDOFF;
638
} else {
639
new |= RWSEM_WRITER_LOCKED;
640
new &= ~RWSEM_FLAG_HANDOFF;
641
642
if (list_is_singular(&sem->wait_list))
643
new &= ~RWSEM_FLAG_WAITERS;
644
}
645
} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
646
647
/*
648
* We have either acquired the lock with handoff bit cleared or set
649
* the handoff bit. Only the first waiter can have its handoff_set
650
* set here to enable optimistic spinning in slowpath loop.
651
*/
652
if (new & RWSEM_FLAG_HANDOFF) {
653
first->handoff_set = true;
654
lockevent_inc(rwsem_wlock_handoff);
655
return false;
656
}
657
658
/*
659
* Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
660
* success.
661
*/
662
list_del(&waiter->list);
663
rwsem_set_owner(sem);
664
return true;
665
}
666
667
/*
668
* The rwsem_spin_on_owner() function returns the following 4 values
669
* depending on the lock owner state.
670
* OWNER_NULL : owner is currently NULL
671
* OWNER_WRITER: when owner changes and is a writer
672
* OWNER_READER: when owner changes and the new owner may be a reader.
673
* OWNER_NONSPINNABLE:
674
* when optimistic spinning has to stop because either the
675
* owner stops running, is unknown, or its timeslice has
676
* been used up.
677
*/
678
enum owner_state {
679
OWNER_NULL = 1 << 0,
680
OWNER_WRITER = 1 << 1,
681
OWNER_READER = 1 << 2,
682
OWNER_NONSPINNABLE = 1 << 3,
683
};
684
685
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
686
/*
687
* Try to acquire write lock before the writer has been put on wait queue.
688
*/
689
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
690
{
691
long count = atomic_long_read(&sem->count);
692
693
while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
694
if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
695
count | RWSEM_WRITER_LOCKED)) {
696
rwsem_set_owner(sem);
697
lockevent_inc(rwsem_opt_lock);
698
return true;
699
}
700
}
701
return false;
702
}
703
704
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
705
{
706
struct task_struct *owner;
707
unsigned long flags;
708
bool ret = true;
709
710
if (need_resched()) {
711
lockevent_inc(rwsem_opt_fail);
712
return false;
713
}
714
715
/*
716
* Disable preemption is equal to the RCU read-side crital section,
717
* thus the task_strcut structure won't go away.
718
*/
719
owner = rwsem_owner_flags(sem, &flags);
720
/*
721
* Don't check the read-owner as the entry may be stale.
722
*/
723
if ((flags & RWSEM_NONSPINNABLE) ||
724
(owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
725
ret = false;
726
727
lockevent_cond_inc(rwsem_opt_fail, !ret);
728
return ret;
729
}
730
731
static inline enum owner_state
732
rwsem_owner_state(struct task_struct *owner, unsigned long flags)
733
{
734
if (flags & RWSEM_NONSPINNABLE)
735
return OWNER_NONSPINNABLE;
736
737
if (flags & RWSEM_READER_OWNED)
738
return OWNER_READER;
739
740
return owner ? OWNER_WRITER : OWNER_NULL;
741
}
742
743
static noinline enum owner_state
744
rwsem_spin_on_owner(struct rw_semaphore *sem)
745
{
746
struct task_struct *new, *owner;
747
unsigned long flags, new_flags;
748
enum owner_state state;
749
750
lockdep_assert_preemption_disabled();
751
752
owner = rwsem_owner_flags(sem, &flags);
753
state = rwsem_owner_state(owner, flags);
754
if (state != OWNER_WRITER)
755
return state;
756
757
for (;;) {
758
/*
759
* When a waiting writer set the handoff flag, it may spin
760
* on the owner as well. Once that writer acquires the lock,
761
* we can spin on it. So we don't need to quit even when the
762
* handoff bit is set.
763
*/
764
new = rwsem_owner_flags(sem, &new_flags);
765
if ((new != owner) || (new_flags != flags)) {
766
state = rwsem_owner_state(new, new_flags);
767
break;
768
}
769
770
/*
771
* Ensure we emit the owner->on_cpu, dereference _after_
772
* checking sem->owner still matches owner, if that fails,
773
* owner might point to free()d memory, if it still matches,
774
* our spinning context already disabled preemption which is
775
* equal to RCU read-side crital section ensures the memory
776
* stays valid.
777
*/
778
barrier();
779
780
if (need_resched() || !owner_on_cpu(owner)) {
781
state = OWNER_NONSPINNABLE;
782
break;
783
}
784
785
cpu_relax();
786
}
787
788
return state;
789
}
790
791
/*
792
* Calculate reader-owned rwsem spinning threshold for writer
793
*
794
* The more readers own the rwsem, the longer it will take for them to
795
* wind down and free the rwsem. So the empirical formula used to
796
* determine the actual spinning time limit here is:
797
*
798
* Spinning threshold = (10 + nr_readers/2)us
799
*
800
* The limit is capped to a maximum of 25us (30 readers). This is just
801
* a heuristic and is subjected to change in the future.
802
*/
803
static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
804
{
805
long count = atomic_long_read(&sem->count);
806
int readers = count >> RWSEM_READER_SHIFT;
807
u64 delta;
808
809
if (readers > 30)
810
readers = 30;
811
delta = (20 + readers) * NSEC_PER_USEC / 2;
812
813
return sched_clock() + delta;
814
}
815
816
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
817
{
818
bool taken = false;
819
int prev_owner_state = OWNER_NULL;
820
int loop = 0;
821
u64 rspin_threshold = 0;
822
823
/* sem->wait_lock should not be held when doing optimistic spinning */
824
if (!osq_lock(&sem->osq))
825
goto done;
826
827
/*
828
* Optimistically spin on the owner field and attempt to acquire the
829
* lock whenever the owner changes. Spinning will be stopped when:
830
* 1) the owning writer isn't running; or
831
* 2) readers own the lock and spinning time has exceeded limit.
832
*/
833
for (;;) {
834
enum owner_state owner_state;
835
836
owner_state = rwsem_spin_on_owner(sem);
837
if (owner_state == OWNER_NONSPINNABLE)
838
break;
839
840
/*
841
* Try to acquire the lock
842
*/
843
taken = rwsem_try_write_lock_unqueued(sem);
844
845
if (taken)
846
break;
847
848
/*
849
* Time-based reader-owned rwsem optimistic spinning
850
*/
851
if (owner_state == OWNER_READER) {
852
/*
853
* Re-initialize rspin_threshold every time when
854
* the owner state changes from non-reader to reader.
855
* This allows a writer to steal the lock in between
856
* 2 reader phases and have the threshold reset at
857
* the beginning of the 2nd reader phase.
858
*/
859
if (prev_owner_state != OWNER_READER) {
860
if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
861
break;
862
rspin_threshold = rwsem_rspin_threshold(sem);
863
loop = 0;
864
}
865
866
/*
867
* Check time threshold once every 16 iterations to
868
* avoid calling sched_clock() too frequently so
869
* as to reduce the average latency between the times
870
* when the lock becomes free and when the spinner
871
* is ready to do a trylock.
872
*/
873
else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
874
rwsem_set_nonspinnable(sem);
875
lockevent_inc(rwsem_opt_nospin);
876
break;
877
}
878
}
879
880
/*
881
* An RT task cannot do optimistic spinning if it cannot
882
* be sure the lock holder is running or live-lock may
883
* happen if the current task and the lock holder happen
884
* to run in the same CPU. However, aborting optimistic
885
* spinning while a NULL owner is detected may miss some
886
* opportunity where spinning can continue without causing
887
* problem.
888
*
889
* There are 2 possible cases where an RT task may be able
890
* to continue spinning.
891
*
892
* 1) The lock owner is in the process of releasing the
893
* lock, sem->owner is cleared but the lock has not
894
* been released yet.
895
* 2) The lock was free and owner cleared, but another
896
* task just comes in and acquire the lock before
897
* we try to get it. The new owner may be a spinnable
898
* writer.
899
*
900
* To take advantage of two scenarios listed above, the RT
901
* task is made to retry one more time to see if it can
902
* acquire the lock or continue spinning on the new owning
903
* writer. Of course, if the time lag is long enough or the
904
* new owner is not a writer or spinnable, the RT task will
905
* quit spinning.
906
*
907
* If the owner is a writer, the need_resched() check is
908
* done inside rwsem_spin_on_owner(). If the owner is not
909
* a writer, need_resched() check needs to be done here.
910
*/
911
if (owner_state != OWNER_WRITER) {
912
if (need_resched())
913
break;
914
if (rt_or_dl_task(current) &&
915
(prev_owner_state != OWNER_WRITER))
916
break;
917
}
918
prev_owner_state = owner_state;
919
920
/*
921
* The cpu_relax() call is a compiler barrier which forces
922
* everything in this loop to be re-loaded. We don't need
923
* memory barriers as we'll eventually observe the right
924
* values at the cost of a few extra spins.
925
*/
926
cpu_relax();
927
}
928
osq_unlock(&sem->osq);
929
done:
930
lockevent_cond_inc(rwsem_opt_fail, !taken);
931
return taken;
932
}
933
934
/*
935
* Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
936
* only be called when the reader count reaches 0.
937
*/
938
static inline void clear_nonspinnable(struct rw_semaphore *sem)
939
{
940
if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
941
atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
942
}
943
944
#else
945
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
946
{
947
return false;
948
}
949
950
static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
951
{
952
return false;
953
}
954
955
static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
956
957
static inline enum owner_state
958
rwsem_spin_on_owner(struct rw_semaphore *sem)
959
{
960
return OWNER_NONSPINNABLE;
961
}
962
#endif
963
964
/*
965
* Prepare to wake up waiter(s) in the wait queue by putting them into the
966
* given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
967
* reader-owned, wake up read lock waiters in queue front or wake up any
968
* front waiter otherwise.
969
970
* This is being called from both reader and writer slow paths.
971
*/
972
static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
973
struct wake_q_head *wake_q)
974
{
975
enum rwsem_wake_type wake_type;
976
977
if (count & RWSEM_WRITER_MASK)
978
return;
979
980
if (count & RWSEM_READER_MASK) {
981
wake_type = RWSEM_WAKE_READERS;
982
} else {
983
wake_type = RWSEM_WAKE_ANY;
984
clear_nonspinnable(sem);
985
}
986
rwsem_mark_wake(sem, wake_type, wake_q);
987
}
988
989
/*
990
* Wait for the read lock to be granted
991
*/
992
static struct rw_semaphore __sched *
993
rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
994
{
995
long adjustment = -RWSEM_READER_BIAS;
996
long rcnt = (count >> RWSEM_READER_SHIFT);
997
struct rwsem_waiter waiter;
998
DEFINE_WAKE_Q(wake_q);
999
1000
/*
1001
* To prevent a constant stream of readers from starving a sleeping
1002
* writer, don't attempt optimistic lock stealing if the lock is
1003
* very likely owned by readers.
1004
*/
1005
if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
1006
(rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
1007
goto queue;
1008
1009
/*
1010
* Reader optimistic lock stealing.
1011
*/
1012
if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
1013
rwsem_set_reader_owned(sem);
1014
lockevent_inc(rwsem_rlock_steal);
1015
1016
/*
1017
* Wake up other readers in the wait queue if it is
1018
* the first reader.
1019
*/
1020
if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
1021
raw_spin_lock_irq(&sem->wait_lock);
1022
if (!list_empty(&sem->wait_list))
1023
rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1024
&wake_q);
1025
raw_spin_unlock_irq(&sem->wait_lock);
1026
wake_up_q(&wake_q);
1027
}
1028
return sem;
1029
}
1030
1031
queue:
1032
waiter.task = current;
1033
waiter.type = RWSEM_WAITING_FOR_READ;
1034
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1035
waiter.handoff_set = false;
1036
1037
raw_spin_lock_irq(&sem->wait_lock);
1038
if (list_empty(&sem->wait_list)) {
1039
/*
1040
* In case the wait queue is empty and the lock isn't owned
1041
* by a writer, this reader can exit the slowpath and return
1042
* immediately as its RWSEM_READER_BIAS has already been set
1043
* in the count.
1044
*/
1045
if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
1046
/* Provide lock ACQUIRE */
1047
smp_acquire__after_ctrl_dep();
1048
raw_spin_unlock_irq(&sem->wait_lock);
1049
rwsem_set_reader_owned(sem);
1050
lockevent_inc(rwsem_rlock_fast);
1051
return sem;
1052
}
1053
adjustment += RWSEM_FLAG_WAITERS;
1054
}
1055
rwsem_add_waiter(sem, &waiter);
1056
1057
/* we're now waiting on the lock, but no longer actively locking */
1058
count = atomic_long_add_return(adjustment, &sem->count);
1059
1060
rwsem_cond_wake_waiter(sem, count, &wake_q);
1061
raw_spin_unlock_irq(&sem->wait_lock);
1062
1063
if (!wake_q_empty(&wake_q))
1064
wake_up_q(&wake_q);
1065
1066
trace_contention_begin(sem, LCB_F_READ);
1067
set_current_state(state);
1068
1069
if (state == TASK_UNINTERRUPTIBLE)
1070
hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_READER);
1071
1072
/* wait to be given the lock */
1073
for (;;) {
1074
if (!smp_load_acquire(&waiter.task)) {
1075
/* Matches rwsem_mark_wake()'s smp_store_release(). */
1076
break;
1077
}
1078
if (signal_pending_state(state, current)) {
1079
raw_spin_lock_irq(&sem->wait_lock);
1080
if (waiter.task)
1081
goto out_nolock;
1082
raw_spin_unlock_irq(&sem->wait_lock);
1083
/* Ordered by sem->wait_lock against rwsem_mark_wake(). */
1084
break;
1085
}
1086
schedule_preempt_disabled();
1087
lockevent_inc(rwsem_sleep_reader);
1088
set_current_state(state);
1089
}
1090
1091
if (state == TASK_UNINTERRUPTIBLE)
1092
hung_task_clear_blocker();
1093
1094
__set_current_state(TASK_RUNNING);
1095
lockevent_inc(rwsem_rlock);
1096
trace_contention_end(sem, 0);
1097
return sem;
1098
1099
out_nolock:
1100
rwsem_del_wake_waiter(sem, &waiter, &wake_q);
1101
__set_current_state(TASK_RUNNING);
1102
lockevent_inc(rwsem_rlock_fail);
1103
trace_contention_end(sem, -EINTR);
1104
return ERR_PTR(-EINTR);
1105
}
1106
1107
/*
1108
* Wait until we successfully acquire the write lock
1109
*/
1110
static struct rw_semaphore __sched *
1111
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
1112
{
1113
struct rwsem_waiter waiter;
1114
DEFINE_WAKE_Q(wake_q);
1115
1116
/* do optimistic spinning and steal lock if possible */
1117
if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
1118
/* rwsem_optimistic_spin() implies ACQUIRE on success */
1119
return sem;
1120
}
1121
1122
/*
1123
* Optimistic spinning failed, proceed to the slowpath
1124
* and block until we can acquire the sem.
1125
*/
1126
waiter.task = current;
1127
waiter.type = RWSEM_WAITING_FOR_WRITE;
1128
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1129
waiter.handoff_set = false;
1130
1131
raw_spin_lock_irq(&sem->wait_lock);
1132
rwsem_add_waiter(sem, &waiter);
1133
1134
/* we're now waiting on the lock */
1135
if (rwsem_first_waiter(sem) != &waiter) {
1136
rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
1137
&wake_q);
1138
if (!wake_q_empty(&wake_q)) {
1139
/*
1140
* We want to minimize wait_lock hold time especially
1141
* when a large number of readers are to be woken up.
1142
*/
1143
raw_spin_unlock_irq(&sem->wait_lock);
1144
wake_up_q(&wake_q);
1145
raw_spin_lock_irq(&sem->wait_lock);
1146
}
1147
} else {
1148
atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
1149
}
1150
1151
/* wait until we successfully acquire the lock */
1152
set_current_state(state);
1153
trace_contention_begin(sem, LCB_F_WRITE);
1154
1155
if (state == TASK_UNINTERRUPTIBLE)
1156
hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_WRITER);
1157
1158
for (;;) {
1159
if (rwsem_try_write_lock(sem, &waiter)) {
1160
/* rwsem_try_write_lock() implies ACQUIRE on success */
1161
break;
1162
}
1163
1164
raw_spin_unlock_irq(&sem->wait_lock);
1165
1166
if (signal_pending_state(state, current))
1167
goto out_nolock;
1168
1169
/*
1170
* After setting the handoff bit and failing to acquire
1171
* the lock, attempt to spin on owner to accelerate lock
1172
* transfer. If the previous owner is a on-cpu writer and it
1173
* has just released the lock, OWNER_NULL will be returned.
1174
* In this case, we attempt to acquire the lock again
1175
* without sleeping.
1176
*/
1177
if (waiter.handoff_set) {
1178
enum owner_state owner_state;
1179
1180
owner_state = rwsem_spin_on_owner(sem);
1181
if (owner_state == OWNER_NULL)
1182
goto trylock_again;
1183
}
1184
1185
schedule_preempt_disabled();
1186
lockevent_inc(rwsem_sleep_writer);
1187
set_current_state(state);
1188
trylock_again:
1189
raw_spin_lock_irq(&sem->wait_lock);
1190
}
1191
1192
if (state == TASK_UNINTERRUPTIBLE)
1193
hung_task_clear_blocker();
1194
1195
__set_current_state(TASK_RUNNING);
1196
raw_spin_unlock_irq(&sem->wait_lock);
1197
lockevent_inc(rwsem_wlock);
1198
trace_contention_end(sem, 0);
1199
return sem;
1200
1201
out_nolock:
1202
__set_current_state(TASK_RUNNING);
1203
raw_spin_lock_irq(&sem->wait_lock);
1204
rwsem_del_wake_waiter(sem, &waiter, &wake_q);
1205
lockevent_inc(rwsem_wlock_fail);
1206
trace_contention_end(sem, -EINTR);
1207
return ERR_PTR(-EINTR);
1208
}
1209
1210
/*
1211
* handle waking up a waiter on the semaphore
1212
* - up_read/up_write has decremented the active part of count if we come here
1213
*/
1214
static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
1215
{
1216
unsigned long flags;
1217
DEFINE_WAKE_Q(wake_q);
1218
1219
raw_spin_lock_irqsave(&sem->wait_lock, flags);
1220
1221
if (!list_empty(&sem->wait_list))
1222
rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
1223
1224
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1225
wake_up_q(&wake_q);
1226
1227
return sem;
1228
}
1229
1230
/*
1231
* downgrade a write lock into a read lock
1232
* - caller incremented waiting part of count and discovered it still negative
1233
* - just wake up any readers at the front of the queue
1234
*/
1235
static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
1236
{
1237
unsigned long flags;
1238
DEFINE_WAKE_Q(wake_q);
1239
1240
raw_spin_lock_irqsave(&sem->wait_lock, flags);
1241
1242
if (!list_empty(&sem->wait_list))
1243
rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
1244
1245
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
1246
wake_up_q(&wake_q);
1247
1248
return sem;
1249
}
1250
1251
/*
1252
* lock for reading
1253
*/
1254
static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
1255
{
1256
int ret = 0;
1257
long count;
1258
1259
preempt_disable();
1260
if (!rwsem_read_trylock(sem, &count)) {
1261
if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
1262
ret = -EINTR;
1263
goto out;
1264
}
1265
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1266
}
1267
out:
1268
preempt_enable();
1269
return ret;
1270
}
1271
1272
static __always_inline void __down_read(struct rw_semaphore *sem)
1273
{
1274
__down_read_common(sem, TASK_UNINTERRUPTIBLE);
1275
}
1276
1277
static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
1278
{
1279
return __down_read_common(sem, TASK_INTERRUPTIBLE);
1280
}
1281
1282
static __always_inline int __down_read_killable(struct rw_semaphore *sem)
1283
{
1284
return __down_read_common(sem, TASK_KILLABLE);
1285
}
1286
1287
static inline int __down_read_trylock(struct rw_semaphore *sem)
1288
{
1289
int ret = 0;
1290
long tmp;
1291
1292
DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1293
1294
preempt_disable();
1295
tmp = atomic_long_read(&sem->count);
1296
while (!(tmp & RWSEM_READ_FAILED_MASK)) {
1297
if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
1298
tmp + RWSEM_READER_BIAS)) {
1299
rwsem_set_reader_owned(sem);
1300
ret = 1;
1301
break;
1302
}
1303
}
1304
preempt_enable();
1305
return ret;
1306
}
1307
1308
/*
1309
* lock for writing
1310
*/
1311
static __always_inline int __down_write_common(struct rw_semaphore *sem, int state)
1312
{
1313
int ret = 0;
1314
1315
preempt_disable();
1316
if (unlikely(!rwsem_write_trylock(sem))) {
1317
if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
1318
ret = -EINTR;
1319
}
1320
preempt_enable();
1321
return ret;
1322
}
1323
1324
static __always_inline void __down_write(struct rw_semaphore *sem)
1325
{
1326
__down_write_common(sem, TASK_UNINTERRUPTIBLE);
1327
}
1328
1329
static __always_inline int __down_write_killable(struct rw_semaphore *sem)
1330
{
1331
return __down_write_common(sem, TASK_KILLABLE);
1332
}
1333
1334
static inline int __down_write_trylock(struct rw_semaphore *sem)
1335
{
1336
int ret;
1337
1338
preempt_disable();
1339
DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1340
ret = rwsem_write_trylock(sem);
1341
preempt_enable();
1342
1343
return ret;
1344
}
1345
1346
/*
1347
* unlock after reading
1348
*/
1349
static inline void __up_read(struct rw_semaphore *sem)
1350
{
1351
long tmp;
1352
1353
DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1354
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1355
1356
preempt_disable();
1357
rwsem_clear_reader_owned(sem);
1358
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1359
DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
1360
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
1361
RWSEM_FLAG_WAITERS)) {
1362
clear_nonspinnable(sem);
1363
rwsem_wake(sem);
1364
}
1365
preempt_enable();
1366
}
1367
1368
/*
1369
* unlock after writing
1370
*/
1371
static inline void __up_write(struct rw_semaphore *sem)
1372
{
1373
long tmp;
1374
1375
DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1376
/*
1377
* sem->owner may differ from current if the ownership is transferred
1378
* to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
1379
*/
1380
DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
1381
!rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1382
1383
preempt_disable();
1384
rwsem_clear_owner(sem);
1385
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
1386
if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1387
rwsem_wake(sem);
1388
preempt_enable();
1389
}
1390
1391
/*
1392
* downgrade write lock to read lock
1393
*/
1394
static inline void __downgrade_write(struct rw_semaphore *sem)
1395
{
1396
long tmp;
1397
1398
/*
1399
* When downgrading from exclusive to shared ownership,
1400
* anything inside the write-locked region cannot leak
1401
* into the read side. In contrast, anything in the
1402
* read-locked region is ok to be re-ordered into the
1403
* write side. As such, rely on RELEASE semantics.
1404
*/
1405
DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
1406
preempt_disable();
1407
tmp = atomic_long_fetch_add_release(
1408
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
1409
rwsem_set_reader_owned(sem);
1410
if (tmp & RWSEM_FLAG_WAITERS)
1411
rwsem_downgrade_wake(sem);
1412
preempt_enable();
1413
}
1414
1415
#else /* !CONFIG_PREEMPT_RT */
1416
1417
#define RT_MUTEX_BUILD_MUTEX
1418
#include "rtmutex.c"
1419
1420
#define rwbase_set_and_save_current_state(state) \
1421
set_current_state(state)
1422
1423
#define rwbase_restore_current_state() \
1424
__set_current_state(TASK_RUNNING)
1425
1426
#define rwbase_rtmutex_lock_state(rtm, state) \
1427
__rt_mutex_lock(rtm, state)
1428
1429
#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \
1430
__rt_mutex_slowlock_locked(rtm, NULL, state, wq)
1431
1432
#define rwbase_rtmutex_unlock(rtm) \
1433
__rt_mutex_unlock(rtm)
1434
1435
#define rwbase_rtmutex_trylock(rtm) \
1436
__rt_mutex_trylock(rtm)
1437
1438
#define rwbase_signal_pending_state(state, current) \
1439
signal_pending_state(state, current)
1440
1441
#define rwbase_pre_schedule() \
1442
rt_mutex_pre_schedule()
1443
1444
#define rwbase_schedule() \
1445
rt_mutex_schedule()
1446
1447
#define rwbase_post_schedule() \
1448
rt_mutex_post_schedule()
1449
1450
#include "rwbase_rt.c"
1451
1452
void __init_rwsem(struct rw_semaphore *sem, const char *name,
1453
struct lock_class_key *key)
1454
{
1455
init_rwbase_rt(&(sem)->rwbase);
1456
1457
#ifdef CONFIG_DEBUG_LOCK_ALLOC
1458
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
1459
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
1460
#endif
1461
}
1462
EXPORT_SYMBOL(__init_rwsem);
1463
1464
static inline void __down_read(struct rw_semaphore *sem)
1465
{
1466
rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1467
}
1468
1469
static inline int __down_read_interruptible(struct rw_semaphore *sem)
1470
{
1471
return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
1472
}
1473
1474
static inline int __down_read_killable(struct rw_semaphore *sem)
1475
{
1476
return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
1477
}
1478
1479
static inline int __down_read_trylock(struct rw_semaphore *sem)
1480
{
1481
return rwbase_read_trylock(&sem->rwbase);
1482
}
1483
1484
static inline void __up_read(struct rw_semaphore *sem)
1485
{
1486
rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
1487
}
1488
1489
static inline void __sched __down_write(struct rw_semaphore *sem)
1490
{
1491
rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
1492
}
1493
1494
static inline int __sched __down_write_killable(struct rw_semaphore *sem)
1495
{
1496
return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
1497
}
1498
1499
static inline int __down_write_trylock(struct rw_semaphore *sem)
1500
{
1501
return rwbase_write_trylock(&sem->rwbase);
1502
}
1503
1504
static inline void __up_write(struct rw_semaphore *sem)
1505
{
1506
rwbase_write_unlock(&sem->rwbase);
1507
}
1508
1509
static inline void __downgrade_write(struct rw_semaphore *sem)
1510
{
1511
rwbase_write_downgrade(&sem->rwbase);
1512
}
1513
1514
/* Debug stubs for the common API */
1515
#define DEBUG_RWSEMS_WARN_ON(c, sem)
1516
1517
static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
1518
struct task_struct *owner)
1519
{
1520
}
1521
1522
static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
1523
{
1524
int count = atomic_read(&sem->rwbase.readers);
1525
1526
return count < 0 && count != READER_BIAS;
1527
}
1528
1529
#endif /* CONFIG_PREEMPT_RT */
1530
1531
/*
1532
* lock for reading
1533
*/
1534
void __sched down_read(struct rw_semaphore *sem)
1535
{
1536
might_sleep();
1537
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1538
1539
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1540
}
1541
EXPORT_SYMBOL(down_read);
1542
1543
int __sched down_read_interruptible(struct rw_semaphore *sem)
1544
{
1545
might_sleep();
1546
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1547
1548
if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
1549
rwsem_release(&sem->dep_map, _RET_IP_);
1550
return -EINTR;
1551
}
1552
1553
return 0;
1554
}
1555
EXPORT_SYMBOL(down_read_interruptible);
1556
1557
int __sched down_read_killable(struct rw_semaphore *sem)
1558
{
1559
might_sleep();
1560
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1561
1562
if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1563
rwsem_release(&sem->dep_map, _RET_IP_);
1564
return -EINTR;
1565
}
1566
1567
return 0;
1568
}
1569
EXPORT_SYMBOL(down_read_killable);
1570
1571
/*
1572
* trylock for reading -- returns 1 if successful, 0 if contention
1573
*/
1574
int down_read_trylock(struct rw_semaphore *sem)
1575
{
1576
int ret = __down_read_trylock(sem);
1577
1578
if (ret == 1)
1579
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1580
return ret;
1581
}
1582
EXPORT_SYMBOL(down_read_trylock);
1583
1584
/*
1585
* lock for writing
1586
*/
1587
void __sched down_write(struct rw_semaphore *sem)
1588
{
1589
might_sleep();
1590
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1591
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1592
}
1593
EXPORT_SYMBOL(down_write);
1594
1595
/*
1596
* lock for writing
1597
*/
1598
int __sched down_write_killable(struct rw_semaphore *sem)
1599
{
1600
might_sleep();
1601
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1602
1603
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1604
__down_write_killable)) {
1605
rwsem_release(&sem->dep_map, _RET_IP_);
1606
return -EINTR;
1607
}
1608
1609
return 0;
1610
}
1611
EXPORT_SYMBOL(down_write_killable);
1612
1613
/*
1614
* trylock for writing -- returns 1 if successful, 0 if contention
1615
*/
1616
int down_write_trylock(struct rw_semaphore *sem)
1617
{
1618
int ret = __down_write_trylock(sem);
1619
1620
if (ret == 1)
1621
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
1622
1623
return ret;
1624
}
1625
EXPORT_SYMBOL(down_write_trylock);
1626
1627
/*
1628
* release a read lock
1629
*/
1630
void up_read(struct rw_semaphore *sem)
1631
{
1632
rwsem_release(&sem->dep_map, _RET_IP_);
1633
__up_read(sem);
1634
}
1635
EXPORT_SYMBOL(up_read);
1636
1637
/*
1638
* release a write lock
1639
*/
1640
void up_write(struct rw_semaphore *sem)
1641
{
1642
rwsem_release(&sem->dep_map, _RET_IP_);
1643
__up_write(sem);
1644
}
1645
EXPORT_SYMBOL(up_write);
1646
1647
/*
1648
* downgrade write lock to read lock
1649
*/
1650
void downgrade_write(struct rw_semaphore *sem)
1651
{
1652
lock_downgrade(&sem->dep_map, _RET_IP_);
1653
__downgrade_write(sem);
1654
}
1655
EXPORT_SYMBOL(downgrade_write);
1656
1657
#ifdef CONFIG_DEBUG_LOCK_ALLOC
1658
1659
void down_read_nested(struct rw_semaphore *sem, int subclass)
1660
{
1661
might_sleep();
1662
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1663
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1664
}
1665
EXPORT_SYMBOL(down_read_nested);
1666
1667
int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
1668
{
1669
might_sleep();
1670
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1671
1672
if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
1673
rwsem_release(&sem->dep_map, _RET_IP_);
1674
return -EINTR;
1675
}
1676
1677
return 0;
1678
}
1679
EXPORT_SYMBOL(down_read_killable_nested);
1680
1681
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1682
{
1683
might_sleep();
1684
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
1685
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1686
}
1687
EXPORT_SYMBOL(_down_write_nest_lock);
1688
1689
void down_read_non_owner(struct rw_semaphore *sem)
1690
{
1691
might_sleep();
1692
__down_read(sem);
1693
/*
1694
* The owner value for a reader-owned lock is mostly for debugging
1695
* purpose only and is not critical to the correct functioning of
1696
* rwsem. So it is perfectly fine to set it in a preempt-enabled
1697
* context here.
1698
*/
1699
__rwsem_set_reader_owned(sem, NULL);
1700
}
1701
EXPORT_SYMBOL(down_read_non_owner);
1702
1703
void down_write_nested(struct rw_semaphore *sem, int subclass)
1704
{
1705
might_sleep();
1706
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1707
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1708
}
1709
EXPORT_SYMBOL(down_write_nested);
1710
1711
int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1712
{
1713
might_sleep();
1714
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1715
1716
if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
1717
__down_write_killable)) {
1718
rwsem_release(&sem->dep_map, _RET_IP_);
1719
return -EINTR;
1720
}
1721
1722
return 0;
1723
}
1724
EXPORT_SYMBOL(down_write_killable_nested);
1725
1726
void up_read_non_owner(struct rw_semaphore *sem)
1727
{
1728
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1729
__up_read(sem);
1730
}
1731
EXPORT_SYMBOL(up_read_non_owner);
1732
1733
#endif
1734
1735