Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/compat/linux/linux_futex.c
39507 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2009-2021 Dmitry Chagin <[email protected]>
5
* Copyright (c) 2008 Roman Divacky
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/imgact.h>
31
#include <sys/imgact_elf.h>
32
#include <sys/ktr.h>
33
#include <sys/lock.h>
34
#include <sys/mutex.h>
35
#include <sys/priv.h>
36
#include <sys/proc.h>
37
#include <sys/sched.h>
38
#include <sys/sysent.h>
39
#include <sys/vnode.h>
40
#include <sys/umtxvar.h>
41
42
#ifdef COMPAT_LINUX32
43
#include <machine/../linux32/linux.h>
44
#include <machine/../linux32/linux32_proto.h>
45
#else
46
#include <machine/../linux/linux.h>
47
#include <machine/../linux/linux_proto.h>
48
#endif
49
#include <compat/linux/linux_emul.h>
50
#include <compat/linux/linux_futex.h>
51
#include <compat/linux/linux_misc.h>
52
#include <compat/linux/linux_time.h>
53
#include <compat/linux/linux_util.h>
54
55
#define FUTEX_SHARED 0x8 /* shared futex */
56
#define FUTEX_UNOWNED 0
57
58
#define GET_SHARED(a) (a->flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE
59
60
static int futex_atomic_op(struct thread *, int, uint32_t *, int *);
61
static int handle_futex_death(struct thread *td, struct linux_emuldata *,
62
uint32_t *, unsigned int, bool);
63
static int fetch_robust_entry(struct linux_robust_list **,
64
struct linux_robust_list **, unsigned int *);
65
66
struct linux_futex_args {
67
uint32_t *uaddr;
68
int32_t op;
69
uint32_t flags;
70
bool clockrt;
71
uint32_t val;
72
struct timespec *ts;
73
uint32_t *uaddr2;
74
uint32_t val3;
75
bool val3_compare;
76
struct timespec kts;
77
};
78
79
static inline int futex_key_get(const void *, int, int, struct umtx_key *);
80
static void linux_umtx_abs_timeout_init(struct umtx_abs_timeout *,
81
struct linux_futex_args *);
82
static int linux_futex(struct thread *, struct linux_futex_args *);
83
static int linux_futex_wait(struct thread *, struct linux_futex_args *);
84
static int linux_futex_wake(struct thread *, struct linux_futex_args *);
85
static int linux_futex_requeue(struct thread *, struct linux_futex_args *);
86
static int linux_futex_wakeop(struct thread *, struct linux_futex_args *);
87
static int linux_futex_lock_pi(struct thread *, bool, struct linux_futex_args *);
88
static int linux_futex_unlock_pi(struct thread *, bool,
89
struct linux_futex_args *);
90
static int futex_wake_pi(struct thread *, uint32_t *, bool);
91
92
static int
93
futex_key_get(const void *uaddr, int type, int share, struct umtx_key *key)
94
{
95
96
/* Check that futex address is a 32bit aligned. */
97
if (!__is_aligned(uaddr, sizeof(uint32_t)))
98
return (EINVAL);
99
return (umtx_key_get(uaddr, type, share, key));
100
}
101
102
int
103
futex_wake(struct thread *td, uint32_t *uaddr, int val, bool shared)
104
{
105
struct linux_futex_args args;
106
107
bzero(&args, sizeof(args));
108
args.op = LINUX_FUTEX_WAKE;
109
args.uaddr = uaddr;
110
args.flags = shared == true ? FUTEX_SHARED : 0;
111
args.val = val;
112
args.val3 = FUTEX_BITSET_MATCH_ANY;
113
114
return (linux_futex_wake(td, &args));
115
}
116
117
static int
118
futex_wake_pi(struct thread *td, uint32_t *uaddr, bool shared)
119
{
120
struct linux_futex_args args;
121
122
bzero(&args, sizeof(args));
123
args.op = LINUX_FUTEX_UNLOCK_PI;
124
args.uaddr = uaddr;
125
args.flags = shared == true ? FUTEX_SHARED : 0;
126
127
return (linux_futex_unlock_pi(td, true, &args));
128
}
129
130
static int
131
futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr,
132
int *res)
133
{
134
int op = (encoded_op >> 28) & 7;
135
int cmp = (encoded_op >> 24) & 15;
136
int oparg = (encoded_op << 8) >> 20;
137
int cmparg = (encoded_op << 20) >> 20;
138
int oldval = 0, ret;
139
140
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
141
oparg = 1 << oparg;
142
143
switch (op) {
144
case FUTEX_OP_SET:
145
ret = futex_xchgl(oparg, uaddr, &oldval);
146
break;
147
case FUTEX_OP_ADD:
148
ret = futex_addl(oparg, uaddr, &oldval);
149
break;
150
case FUTEX_OP_OR:
151
ret = futex_orl(oparg, uaddr, &oldval);
152
break;
153
case FUTEX_OP_ANDN:
154
ret = futex_andl(~oparg, uaddr, &oldval);
155
break;
156
case FUTEX_OP_XOR:
157
ret = futex_xorl(oparg, uaddr, &oldval);
158
break;
159
default:
160
ret = ENOSYS;
161
break;
162
}
163
164
if (ret != 0)
165
return (ret);
166
167
switch (cmp) {
168
case FUTEX_OP_CMP_EQ:
169
*res = (oldval == cmparg);
170
break;
171
case FUTEX_OP_CMP_NE:
172
*res = (oldval != cmparg);
173
break;
174
case FUTEX_OP_CMP_LT:
175
*res = (oldval < cmparg);
176
break;
177
case FUTEX_OP_CMP_GE:
178
*res = (oldval >= cmparg);
179
break;
180
case FUTEX_OP_CMP_LE:
181
*res = (oldval <= cmparg);
182
break;
183
case FUTEX_OP_CMP_GT:
184
*res = (oldval > cmparg);
185
break;
186
default:
187
ret = ENOSYS;
188
}
189
190
return (ret);
191
}
192
193
static int
194
linux_futex(struct thread *td, struct linux_futex_args *args)
195
{
196
struct linux_pemuldata *pem;
197
struct proc *p;
198
199
if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
200
args->flags = 0;
201
args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
202
} else
203
args->flags = FUTEX_SHARED;
204
205
args->clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
206
args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
207
208
if (args->clockrt &&
209
args->op != LINUX_FUTEX_WAIT_BITSET &&
210
args->op != LINUX_FUTEX_WAIT_REQUEUE_PI &&
211
args->op != LINUX_FUTEX_LOCK_PI2)
212
return (ENOSYS);
213
214
switch (args->op) {
215
case LINUX_FUTEX_WAIT:
216
args->val3 = FUTEX_BITSET_MATCH_ANY;
217
/* FALLTHROUGH */
218
219
case LINUX_FUTEX_WAIT_BITSET:
220
LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
221
args->uaddr, args->val, args->val3);
222
223
return (linux_futex_wait(td, args));
224
225
case LINUX_FUTEX_WAKE:
226
args->val3 = FUTEX_BITSET_MATCH_ANY;
227
/* FALLTHROUGH */
228
229
case LINUX_FUTEX_WAKE_BITSET:
230
LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
231
args->uaddr, args->val, args->val3);
232
233
return (linux_futex_wake(td, args));
234
235
case LINUX_FUTEX_REQUEUE:
236
/*
237
* Glibc does not use this operation since version 2.3.3,
238
* as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
239
* Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
240
* FUTEX_REQUEUE returned EINVAL.
241
*/
242
pem = pem_find(td->td_proc);
243
if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
244
linux_msg(td, "unsupported FUTEX_REQUEUE");
245
pem->flags |= LINUX_XDEPR_REQUEUEOP;
246
}
247
248
/*
249
* The above is true, however musl libc does make use of the
250
* futex requeue operation, allow operation for brands which
251
* set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
252
*/
253
p = td->td_proc;
254
Elf_Brandinfo *bi = p->p_elf_brandinfo;
255
if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
256
return (EINVAL);
257
args->val3_compare = false;
258
/* FALLTHROUGH */
259
260
case LINUX_FUTEX_CMP_REQUEUE:
261
LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
262
"nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
263
args->uaddr, args->val, args->val3, args->uaddr2,
264
args->ts);
265
266
return (linux_futex_requeue(td, args));
267
268
case LINUX_FUTEX_WAKE_OP:
269
LINUX_CTR5(sys_futex, "WAKE_OP "
270
"uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
271
args->uaddr, args->val, args->uaddr2, args->val3,
272
args->ts);
273
274
return (linux_futex_wakeop(td, args));
275
276
case LINUX_FUTEX_LOCK_PI:
277
args->clockrt = true;
278
/* FALLTHROUGH */
279
280
case LINUX_FUTEX_LOCK_PI2:
281
LINUX_CTR2(sys_futex, "LOCKPI uaddr %p val 0x%x",
282
args->uaddr, args->val);
283
284
return (linux_futex_lock_pi(td, false, args));
285
286
case LINUX_FUTEX_UNLOCK_PI:
287
LINUX_CTR1(sys_futex, "UNLOCKPI uaddr %p",
288
args->uaddr);
289
290
return (linux_futex_unlock_pi(td, false, args));
291
292
case LINUX_FUTEX_TRYLOCK_PI:
293
LINUX_CTR1(sys_futex, "TRYLOCKPI uaddr %p",
294
args->uaddr);
295
296
return (linux_futex_lock_pi(td, true, args));
297
298
/*
299
* Current implementation of FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI
300
* can't be used anymore to implement conditional variables.
301
* A detailed explanation can be found here:
302
*
303
* https://sourceware.org/bugzilla/show_bug.cgi?id=13165
304
* and here http://austingroupbugs.net/view.php?id=609
305
*
306
* And since commit
307
* https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ed19993b5b0d05d62cc883571519a67dae481a14
308
* glibc does not use them.
309
*/
310
case LINUX_FUTEX_WAIT_REQUEUE_PI:
311
/* not yet implemented */
312
pem = pem_find(td->td_proc);
313
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
314
linux_msg(td, "unsupported FUTEX_WAIT_REQUEUE_PI");
315
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
316
}
317
return (ENOSYS);
318
319
case LINUX_FUTEX_CMP_REQUEUE_PI:
320
/* not yet implemented */
321
pem = pem_find(td->td_proc);
322
if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
323
linux_msg(td, "unsupported FUTEX_CMP_REQUEUE_PI");
324
pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
325
}
326
return (ENOSYS);
327
328
default:
329
linux_msg(td, "unsupported futex op %d", args->op);
330
return (ENOSYS);
331
}
332
}
333
334
/*
335
* pi protocol:
336
* - 0 futex word value means unlocked.
337
* - TID futex word value means locked.
338
* Userspace uses atomic ops to lock/unlock these futexes without entering the
339
* kernel. If the lock-acquire fastpath fails, (transition from 0 to TID fails),
340
* then FUTEX_LOCK_PI is called.
341
* The kernel atomically set FUTEX_WAITERS bit in the futex word value, if no
342
* other waiters exists looks up the thread that owns the futex (it has put its
343
* own TID into the futex value) and made this thread the owner of the internal
344
* pi-aware lock object (mutex). Then the kernel tries to lock the internal lock
345
* object, on which it blocks. Once it returns, it has the mutex acquired, and it
346
* sets the futex value to its own TID and returns (futex value contains
347
* FUTEX_WAITERS|TID).
348
* The unlock fastpath would fail (because the FUTEX_WAITERS bit is set) and
349
* FUTEX_UNLOCK_PI will be called.
350
* If a futex is found to be held at exit time, the kernel sets the OWNER_DIED
351
* bit of the futex word and wakes up the next futex waiter (if any), WAITERS
352
* bit is preserved (if any).
353
* If OWNER_DIED bit is set the kernel sanity checks the futex word value against
354
* the internal futex state and if correct, acquire futex.
355
*/
356
static int
357
linux_futex_lock_pi(struct thread *td, bool try, struct linux_futex_args *args)
358
{
359
struct umtx_abs_timeout timo;
360
struct linux_emuldata *em;
361
struct umtx_pi *pi, *new_pi;
362
struct thread *td1;
363
struct umtx_q *uq;
364
int error, rv;
365
uint32_t owner, old_owner;
366
367
em = em_find(td);
368
uq = td->td_umtxq;
369
error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args),
370
&uq->uq_key);
371
if (error != 0)
372
return (error);
373
if (args->ts != NULL)
374
linux_umtx_abs_timeout_init(&timo, args);
375
376
umtxq_lock(&uq->uq_key);
377
pi = umtx_pi_lookup(&uq->uq_key);
378
if (pi == NULL) {
379
new_pi = umtx_pi_alloc(M_NOWAIT);
380
if (new_pi == NULL) {
381
umtxq_unlock(&uq->uq_key);
382
new_pi = umtx_pi_alloc(M_WAITOK);
383
umtxq_lock(&uq->uq_key);
384
pi = umtx_pi_lookup(&uq->uq_key);
385
if (pi != NULL) {
386
umtx_pi_free(new_pi);
387
new_pi = NULL;
388
}
389
}
390
if (new_pi != NULL) {
391
new_pi->pi_key = uq->uq_key;
392
umtx_pi_insert(new_pi);
393
pi = new_pi;
394
}
395
}
396
umtx_pi_ref(pi);
397
umtxq_unlock(&uq->uq_key);
398
for (;;) {
399
/* Try uncontested case first. */
400
rv = casueword32(args->uaddr, FUTEX_UNOWNED, &owner, em->em_tid);
401
/* The acquire succeeded. */
402
if (rv == 0) {
403
error = 0;
404
break;
405
}
406
if (rv == -1) {
407
error = EFAULT;
408
break;
409
}
410
411
/*
412
* Nobody owns it, but the acquire failed. This can happen
413
* with ll/sc atomic.
414
*/
415
if (owner == FUTEX_UNOWNED) {
416
error = thread_check_susp(td, true);
417
if (error != 0)
418
break;
419
continue;
420
}
421
422
/*
423
* Avoid overwriting a possible error from sleep due
424
* to the pending signal with suspension check result.
425
*/
426
if (error == 0) {
427
error = thread_check_susp(td, true);
428
if (error != 0)
429
break;
430
}
431
432
/* The futex word at *uaddr is already locked by the caller. */
433
if ((owner & FUTEX_TID_MASK) == em->em_tid) {
434
error = EDEADLK;
435
break;
436
}
437
438
/*
439
* Futex owner died, handle_futex_death() set the OWNER_DIED bit
440
* and clear tid. Try to acquire it.
441
*/
442
if ((owner & FUTEX_TID_MASK) == FUTEX_UNOWNED) {
443
old_owner = owner;
444
owner = owner & (FUTEX_WAITERS | FUTEX_OWNER_DIED);
445
owner |= em->em_tid;
446
rv = casueword32(args->uaddr, old_owner, &owner, owner);
447
if (rv == -1) {
448
error = EFAULT;
449
break;
450
}
451
if (rv == 1) {
452
if (error == 0) {
453
error = thread_check_susp(td, true);
454
if (error != 0)
455
break;
456
}
457
458
/*
459
* If this failed the lock could
460
* changed, restart.
461
*/
462
continue;
463
}
464
465
umtxq_lock(&uq->uq_key);
466
umtxq_busy(&uq->uq_key);
467
error = umtx_pi_claim(pi, td);
468
umtxq_unbusy(&uq->uq_key);
469
umtxq_unlock(&uq->uq_key);
470
if (error != 0) {
471
/*
472
* Since we're going to return an
473
* error, restore the futex to its
474
* previous, unowned state to avoid
475
* compounding the problem.
476
*/
477
(void)casuword32(args->uaddr, owner, old_owner);
478
}
479
break;
480
}
481
482
/*
483
* Inconsistent state: OWNER_DIED is set and tid is not 0.
484
* Linux does some checks of futex state, we return EINVAL,
485
* as the user space can take care of this.
486
*/
487
if ((owner & FUTEX_OWNER_DIED) != FUTEX_UNOWNED) {
488
error = EINVAL;
489
break;
490
}
491
492
if (try != 0) {
493
error = EBUSY;
494
break;
495
}
496
497
/*
498
* If we caught a signal, we have retried and now
499
* exit immediately.
500
*/
501
if (error != 0)
502
break;
503
504
umtxq_busy_unlocked(&uq->uq_key);
505
506
/*
507
* Set the contested bit so that a release in user space knows
508
* to use the system call for unlock. If this fails either some
509
* one else has acquired the lock or it has been released.
510
*/
511
rv = casueword32(args->uaddr, owner, &owner,
512
owner | FUTEX_WAITERS);
513
if (rv == -1) {
514
umtxq_unbusy_unlocked(&uq->uq_key);
515
error = EFAULT;
516
break;
517
}
518
if (rv == 1) {
519
umtxq_unbusy_unlocked(&uq->uq_key);
520
error = thread_check_susp(td, true);
521
if (error != 0)
522
break;
523
524
/*
525
* The lock changed and we need to retry or we
526
* lost a race to the thread unlocking the umtx.
527
*/
528
continue;
529
}
530
531
/*
532
* Substitute Linux thread id by native thread id to
533
* avoid refactoring code of umtxq_sleep_pi().
534
*/
535
td1 = linux_tdfind(td, owner & FUTEX_TID_MASK, -1);
536
if (td1 != NULL) {
537
owner = td1->td_tid;
538
PROC_UNLOCK(td1->td_proc);
539
} else {
540
umtxq_unbusy_unlocked(&uq->uq_key);
541
error = EINVAL;
542
break;
543
}
544
545
umtxq_lock(&uq->uq_key);
546
547
/* We set the contested bit, sleep. */
548
error = umtxq_sleep_pi(uq, pi, owner, "futexp",
549
args->ts == NULL ? NULL : &timo,
550
(args->flags & FUTEX_SHARED) != 0);
551
if (error != 0)
552
continue;
553
554
error = thread_check_susp(td, false);
555
if (error != 0)
556
break;
557
}
558
559
umtxq_lock(&uq->uq_key);
560
umtx_pi_unref(pi);
561
umtxq_unlock(&uq->uq_key);
562
umtx_key_release(&uq->uq_key);
563
return (error);
564
}
565
566
static int
567
linux_futex_unlock_pi(struct thread *td, bool rb, struct linux_futex_args *args)
568
{
569
struct linux_emuldata *em;
570
struct umtx_key key;
571
uint32_t old, owner, new_owner;
572
int count, error;
573
574
em = em_find(td);
575
576
/*
577
* Make sure we own this mtx.
578
*/
579
error = fueword32(args->uaddr, &owner);
580
if (error == -1)
581
return (EFAULT);
582
if (!rb && (owner & FUTEX_TID_MASK) != em->em_tid)
583
return (EPERM);
584
585
error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args), &key);
586
if (error != 0)
587
return (error);
588
umtxq_lock(&key);
589
umtxq_busy(&key);
590
error = umtx_pi_drop(td, &key, rb, &count);
591
if (error != 0 || rb) {
592
umtxq_unbusy(&key);
593
umtxq_unlock(&key);
594
umtx_key_release(&key);
595
return (error);
596
}
597
umtxq_unlock(&key);
598
599
/*
600
* When unlocking the futex, it must be marked as unowned if
601
* there is zero or one thread only waiting for it.
602
* Otherwise, it must be marked as contested.
603
*/
604
if (count > 1)
605
new_owner = FUTEX_WAITERS;
606
else
607
new_owner = FUTEX_UNOWNED;
608
609
again:
610
error = casueword32(args->uaddr, owner, &old, new_owner);
611
if (error == 1) {
612
error = thread_check_susp(td, false);
613
if (error == 0)
614
goto again;
615
}
616
umtxq_unbusy_unlocked(&key);
617
umtx_key_release(&key);
618
if (error == -1)
619
return (EFAULT);
620
if (error == 0 && old != owner)
621
return (EINVAL);
622
return (error);
623
}
624
625
static int
626
linux_futex_wakeop(struct thread *td, struct linux_futex_args *args)
627
{
628
struct umtx_key key, key2;
629
int nrwake, op_ret, ret;
630
int error, count;
631
632
if (args->uaddr == args->uaddr2)
633
return (EINVAL);
634
635
error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
636
if (error != 0)
637
return (error);
638
error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
639
if (error != 0) {
640
umtx_key_release(&key);
641
return (error);
642
}
643
umtxq_busy_unlocked(&key);
644
error = futex_atomic_op(td, args->val3, args->uaddr2, &op_ret);
645
umtxq_lock(&key);
646
umtxq_unbusy(&key);
647
if (error != 0)
648
goto out;
649
ret = umtxq_signal_mask(&key, args->val, args->val3);
650
if (op_ret > 0) {
651
nrwake = (int)(unsigned long)args->ts;
652
umtxq_lock(&key2);
653
count = umtxq_count(&key2);
654
if (count > 0)
655
ret += umtxq_signal_mask(&key2, nrwake, args->val3);
656
else
657
ret += umtxq_signal_mask(&key, nrwake, args->val3);
658
umtxq_unlock(&key2);
659
}
660
td->td_retval[0] = ret;
661
out:
662
umtxq_unlock(&key);
663
umtx_key_release(&key2);
664
umtx_key_release(&key);
665
return (error);
666
}
667
668
static int
669
linux_futex_requeue(struct thread *td, struct linux_futex_args *args)
670
{
671
int nrwake, nrrequeue;
672
struct umtx_key key, key2;
673
int error;
674
uint32_t uval;
675
676
/*
677
* Linux allows this, we would not, it is an incorrect
678
* usage of declared ABI, so return EINVAL.
679
*/
680
if (args->uaddr == args->uaddr2)
681
return (EINVAL);
682
683
nrrequeue = (int)(unsigned long)args->ts;
684
nrwake = args->val;
685
/*
686
* Sanity check to prevent signed integer overflow,
687
* see Linux CVE-2018-6927
688
*/
689
if (nrwake < 0 || nrrequeue < 0)
690
return (EINVAL);
691
692
error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
693
if (error != 0)
694
return (error);
695
error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
696
if (error != 0) {
697
umtx_key_release(&key);
698
return (error);
699
}
700
umtxq_busy_unlocked(&key);
701
error = fueword32(args->uaddr, &uval);
702
if (error != 0)
703
error = EFAULT;
704
else if (args->val3_compare == true && uval != args->val3)
705
error = EWOULDBLOCK;
706
umtxq_lock(&key);
707
umtxq_unbusy(&key);
708
if (error == 0) {
709
umtxq_lock(&key2);
710
td->td_retval[0] = umtxq_requeue(&key, nrwake, &key2, nrrequeue);
711
umtxq_unlock(&key2);
712
}
713
umtxq_unlock(&key);
714
umtx_key_release(&key2);
715
umtx_key_release(&key);
716
return (error);
717
}
718
719
static int
720
linux_futex_wake(struct thread *td, struct linux_futex_args *args)
721
{
722
struct umtx_key key;
723
int error;
724
725
if (args->val3 == 0)
726
return (EINVAL);
727
728
error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
729
if (error != 0)
730
return (error);
731
umtxq_lock(&key);
732
td->td_retval[0] = umtxq_signal_mask(&key, args->val, args->val3);
733
umtxq_unlock(&key);
734
umtx_key_release(&key);
735
return (0);
736
}
737
738
static int
739
linux_futex_wait(struct thread *td, struct linux_futex_args *args)
740
{
741
struct umtx_abs_timeout timo;
742
struct umtx_q *uq;
743
uint32_t uval;
744
int error;
745
746
if (args->val3 == 0)
747
error = EINVAL;
748
749
uq = td->td_umtxq;
750
error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args),
751
&uq->uq_key);
752
if (error != 0)
753
return (error);
754
if (args->ts != NULL)
755
linux_umtx_abs_timeout_init(&timo, args);
756
umtxq_lock(&uq->uq_key);
757
umtxq_busy(&uq->uq_key);
758
uq->uq_bitset = args->val3;
759
umtxq_insert(uq);
760
umtxq_unlock(&uq->uq_key);
761
error = fueword32(args->uaddr, &uval);
762
if (error != 0)
763
error = EFAULT;
764
else if (uval != args->val)
765
error = EWOULDBLOCK;
766
umtxq_lock(&uq->uq_key);
767
umtxq_unbusy(&uq->uq_key);
768
if (error == 0) {
769
error = umtxq_sleep(uq, "futex",
770
args->ts == NULL ? NULL : &timo);
771
if ((uq->uq_flags & UQF_UMTXQ) == 0)
772
error = 0;
773
else
774
umtxq_remove(uq);
775
} else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
776
umtxq_remove(uq);
777
}
778
umtxq_unlock(&uq->uq_key);
779
umtx_key_release(&uq->uq_key);
780
if (error == ERESTART)
781
error = EINTR;
782
return (error);
783
}
784
785
static void
786
linux_umtx_abs_timeout_init(struct umtx_abs_timeout *timo,
787
struct linux_futex_args *args)
788
{
789
int clockid, absolute;
790
791
/*
792
* The FUTEX_CLOCK_REALTIME option bit can be employed only with the
793
* FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, FUTEX_LOCK_PI2.
794
* For FUTEX_WAIT, timeout is interpreted as a relative value, for other
795
* futex operations timeout is interpreted as an absolute value.
796
* If FUTEX_CLOCK_REALTIME option bit is set, the Linux kernel measures
797
* the timeout against the CLOCK_REALTIME clock, otherwise the kernel
798
* measures the timeout against the CLOCK_MONOTONIC clock.
799
*/
800
clockid = args->clockrt ? CLOCK_REALTIME : CLOCK_MONOTONIC;
801
absolute = args->op == LINUX_FUTEX_WAIT ? false : true;
802
umtx_abs_timeout_init(timo, clockid, absolute, args->ts);
803
}
804
805
int
806
linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
807
{
808
struct linux_futex_args fargs = {
809
.uaddr = args->uaddr,
810
.op = args->op,
811
.val = args->val,
812
.ts = NULL,
813
.uaddr2 = args->uaddr2,
814
.val3 = args->val3,
815
.val3_compare = true,
816
};
817
int error;
818
819
switch (args->op & LINUX_FUTEX_CMD_MASK) {
820
case LINUX_FUTEX_WAIT:
821
case LINUX_FUTEX_WAIT_BITSET:
822
case LINUX_FUTEX_LOCK_PI:
823
case LINUX_FUTEX_LOCK_PI2:
824
if (args->timeout != NULL) {
825
error = linux_get_timespec(&fargs.kts, args->timeout);
826
if (error != 0)
827
return (error);
828
fargs.ts = &fargs.kts;
829
}
830
break;
831
default:
832
fargs.ts = PTRIN(args->timeout);
833
}
834
return (linux_futex(td, &fargs));
835
}
836
837
#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
838
int
839
linux_sys_futex_time64(struct thread *td,
840
struct linux_sys_futex_time64_args *args)
841
{
842
struct linux_futex_args fargs = {
843
.uaddr = args->uaddr,
844
.op = args->op,
845
.val = args->val,
846
.ts = NULL,
847
.uaddr2 = args->uaddr2,
848
.val3 = args->val3,
849
.val3_compare = true,
850
};
851
int error;
852
853
switch (args->op & LINUX_FUTEX_CMD_MASK) {
854
case LINUX_FUTEX_WAIT:
855
case LINUX_FUTEX_WAIT_BITSET:
856
case LINUX_FUTEX_LOCK_PI:
857
case LINUX_FUTEX_LOCK_PI2:
858
if (args->timeout != NULL) {
859
error = linux_get_timespec64(&fargs.kts, args->timeout);
860
if (error != 0)
861
return (error);
862
fargs.ts = &fargs.kts;
863
}
864
break;
865
default:
866
fargs.ts = PTRIN(args->timeout);
867
}
868
return (linux_futex(td, &fargs));
869
}
870
#endif
871
872
int
873
linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
874
{
875
struct linux_emuldata *em;
876
877
if (args->len != sizeof(struct linux_robust_list_head))
878
return (EINVAL);
879
880
em = em_find(td);
881
em->robust_futexes = args->head;
882
883
return (0);
884
}
885
886
int
887
linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
888
{
889
struct linux_emuldata *em;
890
struct linux_robust_list_head *head;
891
l_size_t len;
892
struct thread *td2;
893
int error;
894
895
if (!args->pid) {
896
em = em_find(td);
897
KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
898
head = em->robust_futexes;
899
} else {
900
td2 = linux_tdfind(td, args->pid, -1);
901
if (td2 == NULL)
902
return (ESRCH);
903
if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
904
PROC_UNLOCK(td2->td_proc);
905
return (EPERM);
906
}
907
908
em = em_find(td2);
909
KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
910
/* XXX: ptrace? */
911
if (priv_check(td, PRIV_CRED_SETUID) ||
912
priv_check(td, PRIV_CRED_SETEUID) ||
913
p_candebug(td, td2->td_proc)) {
914
PROC_UNLOCK(td2->td_proc);
915
return (EPERM);
916
}
917
head = em->robust_futexes;
918
919
PROC_UNLOCK(td2->td_proc);
920
}
921
922
len = sizeof(struct linux_robust_list_head);
923
error = copyout(&len, args->len, sizeof(l_size_t));
924
if (error != 0)
925
return (EFAULT);
926
927
return (copyout(&head, args->head, sizeof(l_uintptr_t)));
928
}
929
930
static int
931
handle_futex_death(struct thread *td, struct linux_emuldata *em, uint32_t *uaddr,
932
unsigned int pi, bool pending_op)
933
{
934
uint32_t uval, nval, mval;
935
int error;
936
937
retry:
938
error = fueword32(uaddr, &uval);
939
if (error != 0)
940
return (EFAULT);
941
942
/*
943
* Special case for regular (non PI) futexes. The unlock path in
944
* user space has two race scenarios:
945
*
946
* 1. The unlock path releases the user space futex value and
947
* before it can execute the futex() syscall to wake up
948
* waiters it is killed.
949
*
950
* 2. A woken up waiter is killed before it can acquire the
951
* futex in user space.
952
*
953
* In both cases the TID validation below prevents a wakeup of
954
* potential waiters which can cause these waiters to block
955
* forever.
956
*
957
* In both cases it is safe to attempt waking up a potential
958
* waiter without touching the user space futex value and trying
959
* to set the OWNER_DIED bit.
960
*/
961
if (pending_op && !pi && !uval) {
962
(void)futex_wake(td, uaddr, 1, true);
963
return (0);
964
}
965
966
if ((uval & FUTEX_TID_MASK) == em->em_tid) {
967
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
968
error = casueword32(uaddr, uval, &nval, mval);
969
if (error == -1)
970
return (EFAULT);
971
if (error == 1) {
972
error = thread_check_susp(td, false);
973
if (error != 0)
974
return (error);
975
goto retry;
976
}
977
978
if (!pi && (uval & FUTEX_WAITERS)) {
979
error = futex_wake(td, uaddr, 1, true);
980
if (error != 0)
981
return (error);
982
} else if (pi && (uval & FUTEX_WAITERS)) {
983
error = futex_wake_pi(td, uaddr, true);
984
if (error != 0)
985
return (error);
986
}
987
}
988
989
return (0);
990
}
991
992
static int
993
fetch_robust_entry(struct linux_robust_list **entry,
994
struct linux_robust_list **head, unsigned int *pi)
995
{
996
l_ulong uentry;
997
int error;
998
999
error = copyin((const void *)head, &uentry, sizeof(uentry));
1000
if (error != 0)
1001
return (EFAULT);
1002
1003
*entry = (void *)(uentry & ~1UL);
1004
*pi = uentry & 1;
1005
1006
return (0);
1007
}
1008
1009
#define LINUX_HANDLE_DEATH_PENDING true
1010
#define LINUX_HANDLE_DEATH_LIST false
1011
1012
/* This walks the list of robust futexes releasing them. */
1013
void
1014
release_futexes(struct thread *td, struct linux_emuldata *em)
1015
{
1016
struct linux_robust_list_head *head;
1017
struct linux_robust_list *entry, *next_entry, *pending;
1018
unsigned int limit = 2048, pi, next_pi, pip;
1019
uint32_t *uaddr;
1020
l_long futex_offset;
1021
int error;
1022
1023
head = em->robust_futexes;
1024
if (head == NULL)
1025
return;
1026
1027
if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi))
1028
return;
1029
1030
error = copyin(&head->futex_offset, &futex_offset,
1031
sizeof(futex_offset));
1032
if (error != 0)
1033
return;
1034
1035
if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip))
1036
return;
1037
1038
while (entry != &head->list) {
1039
error = fetch_robust_entry(&next_entry, PTRIN(&entry->next),
1040
&next_pi);
1041
1042
/*
1043
* A pending lock might already be on the list, so
1044
* don't process it twice.
1045
*/
1046
if (entry != pending) {
1047
uaddr = (uint32_t *)((caddr_t)entry + futex_offset);
1048
if (handle_futex_death(td, em, uaddr, pi,
1049
LINUX_HANDLE_DEATH_LIST))
1050
return;
1051
}
1052
if (error != 0)
1053
return;
1054
1055
entry = next_entry;
1056
pi = next_pi;
1057
1058
if (!--limit)
1059
break;
1060
1061
sched_relinquish(curthread);
1062
}
1063
1064
if (pending) {
1065
uaddr = (uint32_t *)((caddr_t)pending + futex_offset);
1066
(void)handle_futex_death(td, em, uaddr, pip,
1067
LINUX_HANDLE_DEATH_PENDING);
1068
}
1069
}
1070
1071