Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/io_uring/register.c
48907 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Code related to the io_uring_register() syscall
4
*
5
* Copyright (C) 2023 Jens Axboe
6
*/
7
#include <linux/kernel.h>
8
#include <linux/errno.h>
9
#include <linux/syscalls.h>
10
#include <linux/refcount.h>
11
#include <linux/bits.h>
12
#include <linux/fs.h>
13
#include <linux/file.h>
14
#include <linux/slab.h>
15
#include <linux/uaccess.h>
16
#include <linux/nospec.h>
17
#include <linux/compat.h>
18
#include <linux/io_uring.h>
19
#include <linux/io_uring_types.h>
20
21
#include "filetable.h"
22
#include "io_uring.h"
23
#include "opdef.h"
24
#include "tctx.h"
25
#include "rsrc.h"
26
#include "sqpoll.h"
27
#include "register.h"
28
#include "cancel.h"
29
#include "kbuf.h"
30
#include "napi.h"
31
#include "eventfd.h"
32
#include "msg_ring.h"
33
#include "memmap.h"
34
#include "zcrx.h"
35
#include "query.h"
36
37
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
38
IORING_REGISTER_LAST + IORING_OP_LAST)
39
40
static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
41
unsigned nr_args)
42
{
43
struct io_uring_probe *p;
44
size_t size;
45
int i, ret;
46
47
if (nr_args > IORING_OP_LAST)
48
nr_args = IORING_OP_LAST;
49
50
size = struct_size(p, ops, nr_args);
51
p = memdup_user(arg, size);
52
if (IS_ERR(p))
53
return PTR_ERR(p);
54
ret = -EINVAL;
55
if (memchr_inv(p, 0, size))
56
goto out;
57
58
p->last_op = IORING_OP_LAST - 1;
59
60
for (i = 0; i < nr_args; i++) {
61
p->ops[i].op = i;
62
if (io_uring_op_supported(i))
63
p->ops[i].flags = IO_URING_OP_SUPPORTED;
64
}
65
p->ops_len = i;
66
67
ret = 0;
68
if (copy_to_user(arg, p, size))
69
ret = -EFAULT;
70
out:
71
kfree(p);
72
return ret;
73
}
74
75
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
76
{
77
const struct cred *creds;
78
79
creds = xa_erase(&ctx->personalities, id);
80
if (creds) {
81
put_cred(creds);
82
return 0;
83
}
84
85
return -EINVAL;
86
}
87
88
89
static int io_register_personality(struct io_ring_ctx *ctx)
90
{
91
const struct cred *creds;
92
u32 id;
93
int ret;
94
95
creds = get_current_cred();
96
97
ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
98
XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
99
if (ret < 0) {
100
put_cred(creds);
101
return ret;
102
}
103
return id;
104
}
105
106
static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args,
107
struct io_restriction *restrictions)
108
{
109
struct io_uring_restriction *res;
110
size_t size;
111
int i, ret;
112
113
if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
114
return -EINVAL;
115
116
size = array_size(nr_args, sizeof(*res));
117
if (size == SIZE_MAX)
118
return -EOVERFLOW;
119
120
res = memdup_user(arg, size);
121
if (IS_ERR(res))
122
return PTR_ERR(res);
123
124
ret = -EINVAL;
125
126
for (i = 0; i < nr_args; i++) {
127
switch (res[i].opcode) {
128
case IORING_RESTRICTION_REGISTER_OP:
129
if (res[i].register_op >= IORING_REGISTER_LAST)
130
goto err;
131
__set_bit(res[i].register_op, restrictions->register_op);
132
break;
133
case IORING_RESTRICTION_SQE_OP:
134
if (res[i].sqe_op >= IORING_OP_LAST)
135
goto err;
136
__set_bit(res[i].sqe_op, restrictions->sqe_op);
137
break;
138
case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
139
restrictions->sqe_flags_allowed = res[i].sqe_flags;
140
break;
141
case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
142
restrictions->sqe_flags_required = res[i].sqe_flags;
143
break;
144
default:
145
goto err;
146
}
147
}
148
149
ret = 0;
150
151
err:
152
kfree(res);
153
return ret;
154
}
155
156
static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
157
void __user *arg, unsigned int nr_args)
158
{
159
int ret;
160
161
/* Restrictions allowed only if rings started disabled */
162
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
163
return -EBADFD;
164
165
/* We allow only a single restrictions registration */
166
if (ctx->restrictions.registered)
167
return -EBUSY;
168
169
ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
170
/* Reset all restrictions if an error happened */
171
if (ret != 0)
172
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
173
else
174
ctx->restrictions.registered = true;
175
return ret;
176
}
177
178
static int io_register_enable_rings(struct io_ring_ctx *ctx)
179
{
180
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
181
return -EBADFD;
182
183
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
184
WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
185
/*
186
* Lazy activation attempts would fail if it was polled before
187
* submitter_task is set.
188
*/
189
if (wq_has_sleeper(&ctx->poll_wq))
190
io_activate_pollwq(ctx);
191
}
192
193
if (ctx->restrictions.registered)
194
ctx->restricted = 1;
195
196
ctx->flags &= ~IORING_SETUP_R_DISABLED;
197
if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
198
wake_up(&ctx->sq_data->wait);
199
return 0;
200
}
201
202
static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
203
cpumask_var_t new_mask)
204
{
205
int ret;
206
207
if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
208
ret = io_wq_cpu_affinity(current->io_uring, new_mask);
209
} else {
210
mutex_unlock(&ctx->uring_lock);
211
ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
212
mutex_lock(&ctx->uring_lock);
213
}
214
215
return ret;
216
}
217
218
static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
219
void __user *arg, unsigned len)
220
{
221
cpumask_var_t new_mask;
222
int ret;
223
224
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
225
return -ENOMEM;
226
227
cpumask_clear(new_mask);
228
if (len > cpumask_size())
229
len = cpumask_size();
230
231
#ifdef CONFIG_COMPAT
232
if (in_compat_syscall())
233
ret = compat_get_bitmap(cpumask_bits(new_mask),
234
(const compat_ulong_t __user *)arg,
235
len * 8 /* CHAR_BIT */);
236
else
237
#endif
238
ret = copy_from_user(new_mask, arg, len);
239
240
if (ret) {
241
free_cpumask_var(new_mask);
242
return -EFAULT;
243
}
244
245
ret = __io_register_iowq_aff(ctx, new_mask);
246
free_cpumask_var(new_mask);
247
return ret;
248
}
249
250
static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
251
{
252
return __io_register_iowq_aff(ctx, NULL);
253
}
254
255
static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
256
void __user *arg)
257
__must_hold(&ctx->uring_lock)
258
{
259
struct io_tctx_node *node;
260
struct io_uring_task *tctx = NULL;
261
struct io_sq_data *sqd = NULL;
262
__u32 new_count[2];
263
int i, ret;
264
265
if (copy_from_user(new_count, arg, sizeof(new_count)))
266
return -EFAULT;
267
for (i = 0; i < ARRAY_SIZE(new_count); i++)
268
if (new_count[i] > INT_MAX)
269
return -EINVAL;
270
271
if (ctx->flags & IORING_SETUP_SQPOLL) {
272
sqd = ctx->sq_data;
273
if (sqd) {
274
struct task_struct *tsk;
275
276
/*
277
* Observe the correct sqd->lock -> ctx->uring_lock
278
* ordering. Fine to drop uring_lock here, we hold
279
* a ref to the ctx.
280
*/
281
refcount_inc(&sqd->refs);
282
mutex_unlock(&ctx->uring_lock);
283
mutex_lock(&sqd->lock);
284
mutex_lock(&ctx->uring_lock);
285
tsk = sqpoll_task_locked(sqd);
286
if (tsk)
287
tctx = tsk->io_uring;
288
}
289
} else {
290
tctx = current->io_uring;
291
}
292
293
BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
294
295
for (i = 0; i < ARRAY_SIZE(new_count); i++)
296
if (new_count[i])
297
ctx->iowq_limits[i] = new_count[i];
298
ctx->iowq_limits_set = true;
299
300
if (tctx && tctx->io_wq) {
301
ret = io_wq_max_workers(tctx->io_wq, new_count);
302
if (ret)
303
goto err;
304
} else {
305
memset(new_count, 0, sizeof(new_count));
306
}
307
308
if (sqd) {
309
mutex_unlock(&ctx->uring_lock);
310
mutex_unlock(&sqd->lock);
311
io_put_sq_data(sqd);
312
mutex_lock(&ctx->uring_lock);
313
}
314
315
if (copy_to_user(arg, new_count, sizeof(new_count)))
316
return -EFAULT;
317
318
/* that's it for SQPOLL, only the SQPOLL task creates requests */
319
if (sqd)
320
return 0;
321
322
/* now propagate the restriction to all registered users */
323
mutex_lock(&ctx->tctx_lock);
324
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
325
tctx = node->task->io_uring;
326
if (WARN_ON_ONCE(!tctx->io_wq))
327
continue;
328
329
for (i = 0; i < ARRAY_SIZE(new_count); i++)
330
new_count[i] = ctx->iowq_limits[i];
331
/* ignore errors, it always returns zero anyway */
332
(void)io_wq_max_workers(tctx->io_wq, new_count);
333
}
334
mutex_unlock(&ctx->tctx_lock);
335
return 0;
336
err:
337
if (sqd) {
338
mutex_unlock(&ctx->uring_lock);
339
mutex_unlock(&sqd->lock);
340
io_put_sq_data(sqd);
341
mutex_lock(&ctx->uring_lock);
342
}
343
return ret;
344
}
345
346
static int io_register_clock(struct io_ring_ctx *ctx,
347
struct io_uring_clock_register __user *arg)
348
{
349
struct io_uring_clock_register reg;
350
351
if (copy_from_user(&reg, arg, sizeof(reg)))
352
return -EFAULT;
353
if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
354
return -EINVAL;
355
356
switch (reg.clockid) {
357
case CLOCK_MONOTONIC:
358
ctx->clock_offset = 0;
359
break;
360
case CLOCK_BOOTTIME:
361
ctx->clock_offset = TK_OFFS_BOOT;
362
break;
363
default:
364
return -EINVAL;
365
}
366
367
ctx->clockid = reg.clockid;
368
return 0;
369
}
370
371
/*
372
* State to maintain until we can swap. Both new and old state, used for
373
* either mapping or freeing.
374
*/
375
struct io_ring_ctx_rings {
376
struct io_rings *rings;
377
struct io_uring_sqe *sq_sqes;
378
379
struct io_mapped_region sq_region;
380
struct io_mapped_region ring_region;
381
};
382
383
static void io_register_free_rings(struct io_ring_ctx *ctx,
384
struct io_ring_ctx_rings *r)
385
{
386
io_free_region(ctx->user, &r->sq_region);
387
io_free_region(ctx->user, &r->ring_region);
388
}
389
390
#define swap_old(ctx, o, n, field) \
391
do { \
392
(o).field = (ctx)->field; \
393
(ctx)->field = (n).field; \
394
} while (0)
395
396
#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
397
#define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
398
IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP | \
399
IORING_SETUP_CQE_MIXED | IORING_SETUP_SQE_MIXED)
400
401
static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
402
{
403
struct io_ctx_config config;
404
struct io_uring_region_desc rd;
405
struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
406
unsigned i, tail, old_head;
407
struct io_uring_params *p = &config.p;
408
struct io_rings_layout *rl = &config.layout;
409
int ret;
410
411
memset(&config, 0, sizeof(config));
412
413
/* limited to DEFER_TASKRUN for now */
414
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
415
return -EINVAL;
416
if (copy_from_user(p, arg, sizeof(*p)))
417
return -EFAULT;
418
if (p->flags & ~RESIZE_FLAGS)
419
return -EINVAL;
420
421
/* properties that are always inherited */
422
p->flags |= (ctx->flags & COPY_FLAGS);
423
424
ret = io_prepare_config(&config);
425
if (unlikely(ret))
426
return ret;
427
428
memset(&rd, 0, sizeof(rd));
429
rd.size = PAGE_ALIGN(rl->rings_size);
430
if (p->flags & IORING_SETUP_NO_MMAP) {
431
rd.user_addr = p->cq_off.user_addr;
432
rd.flags |= IORING_MEM_REGION_TYPE_USER;
433
}
434
ret = io_create_region(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
435
if (ret)
436
return ret;
437
438
n.rings = io_region_get_ptr(&n.ring_region);
439
440
/*
441
* At this point n.rings is shared with userspace, just like o.rings
442
* is as well. While we don't expect userspace to modify it while
443
* a resize is in progress, and it's most likely that userspace will
444
* shoot itself in the foot if it does, we can't always assume good
445
* intent... Use read/write once helpers from here on to indicate the
446
* shared nature of it.
447
*/
448
WRITE_ONCE(n.rings->sq_ring_mask, p->sq_entries - 1);
449
WRITE_ONCE(n.rings->cq_ring_mask, p->cq_entries - 1);
450
WRITE_ONCE(n.rings->sq_ring_entries, p->sq_entries);
451
WRITE_ONCE(n.rings->cq_ring_entries, p->cq_entries);
452
453
if (copy_to_user(arg, p, sizeof(*p))) {
454
io_register_free_rings(ctx, &n);
455
return -EFAULT;
456
}
457
458
memset(&rd, 0, sizeof(rd));
459
rd.size = PAGE_ALIGN(rl->sq_size);
460
if (p->flags & IORING_SETUP_NO_MMAP) {
461
rd.user_addr = p->sq_off.user_addr;
462
rd.flags |= IORING_MEM_REGION_TYPE_USER;
463
}
464
ret = io_create_region(ctx, &n.sq_region, &rd, IORING_OFF_SQES);
465
if (ret) {
466
io_register_free_rings(ctx, &n);
467
return ret;
468
}
469
n.sq_sqes = io_region_get_ptr(&n.sq_region);
470
471
/*
472
* If using SQPOLL, park the thread
473
*/
474
if (ctx->sq_data) {
475
mutex_unlock(&ctx->uring_lock);
476
io_sq_thread_park(ctx->sq_data);
477
mutex_lock(&ctx->uring_lock);
478
}
479
480
/*
481
* We'll do the swap. Grab the ctx->mmap_lock, which will exclude
482
* any new mmap's on the ring fd. Clear out existing mappings to prevent
483
* mmap from seeing them, as we'll unmap them. Any attempt to mmap
484
* existing rings beyond this point will fail. Not that it could proceed
485
* at this point anyway, as the io_uring mmap side needs go grab the
486
* ctx->mmap_lock as well. Likewise, hold the completion lock over the
487
* duration of the actual swap.
488
*/
489
mutex_lock(&ctx->mmap_lock);
490
spin_lock(&ctx->completion_lock);
491
o.rings = ctx->rings;
492
ctx->rings = NULL;
493
o.sq_sqes = ctx->sq_sqes;
494
ctx->sq_sqes = NULL;
495
496
/*
497
* Now copy SQ and CQ entries, if any. If either of the destination
498
* rings can't hold what is already there, then fail the operation.
499
*/
500
tail = READ_ONCE(o.rings->sq.tail);
501
old_head = READ_ONCE(o.rings->sq.head);
502
if (tail - old_head > p->sq_entries)
503
goto overflow;
504
for (i = old_head; i < tail; i++) {
505
unsigned src_head = i & (ctx->sq_entries - 1);
506
unsigned dst_head = i & (p->sq_entries - 1);
507
508
n.sq_sqes[dst_head] = o.sq_sqes[src_head];
509
}
510
WRITE_ONCE(n.rings->sq.head, old_head);
511
WRITE_ONCE(n.rings->sq.tail, tail);
512
513
tail = READ_ONCE(o.rings->cq.tail);
514
old_head = READ_ONCE(o.rings->cq.head);
515
if (tail - old_head > p->cq_entries) {
516
overflow:
517
/* restore old rings, and return -EOVERFLOW via cleanup path */
518
ctx->rings = o.rings;
519
ctx->sq_sqes = o.sq_sqes;
520
to_free = &n;
521
ret = -EOVERFLOW;
522
goto out;
523
}
524
for (i = old_head; i < tail; i++) {
525
unsigned src_head = i & (ctx->cq_entries - 1);
526
unsigned dst_head = i & (p->cq_entries - 1);
527
528
n.rings->cqes[dst_head] = o.rings->cqes[src_head];
529
}
530
WRITE_ONCE(n.rings->cq.head, old_head);
531
WRITE_ONCE(n.rings->cq.tail, tail);
532
/* invalidate cached cqe refill */
533
ctx->cqe_cached = ctx->cqe_sentinel = NULL;
534
535
WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
536
atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
537
WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
538
WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
539
540
/* all done, store old pointers and assign new ones */
541
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
542
ctx->sq_array = (u32 *)((char *)n.rings + rl->sq_array_offset);
543
544
ctx->sq_entries = p->sq_entries;
545
ctx->cq_entries = p->cq_entries;
546
547
ctx->rings = n.rings;
548
ctx->sq_sqes = n.sq_sqes;
549
swap_old(ctx, o, n, ring_region);
550
swap_old(ctx, o, n, sq_region);
551
to_free = &o;
552
ret = 0;
553
out:
554
spin_unlock(&ctx->completion_lock);
555
mutex_unlock(&ctx->mmap_lock);
556
io_register_free_rings(ctx, to_free);
557
558
if (ctx->sq_data)
559
io_sq_thread_unpark(ctx->sq_data);
560
561
return ret;
562
}
563
564
static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
565
{
566
struct io_uring_mem_region_reg __user *reg_uptr = uarg;
567
struct io_uring_mem_region_reg reg;
568
struct io_uring_region_desc __user *rd_uptr;
569
struct io_uring_region_desc rd;
570
struct io_mapped_region region = {};
571
int ret;
572
573
if (io_region_is_set(&ctx->param_region))
574
return -EBUSY;
575
if (copy_from_user(&reg, reg_uptr, sizeof(reg)))
576
return -EFAULT;
577
rd_uptr = u64_to_user_ptr(reg.region_uptr);
578
if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
579
return -EFAULT;
580
if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
581
return -EINVAL;
582
if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG)
583
return -EINVAL;
584
585
/*
586
* This ensures there are no waiters. Waiters are unlocked and it's
587
* hard to synchronise with them, especially if we need to initialise
588
* the region.
589
*/
590
if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) &&
591
!(ctx->flags & IORING_SETUP_R_DISABLED))
592
return -EINVAL;
593
594
ret = io_create_region(ctx, &region, &rd, IORING_MAP_OFF_PARAM_REGION);
595
if (ret)
596
return ret;
597
if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
598
io_free_region(ctx->user, &region);
599
return -EFAULT;
600
}
601
602
if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) {
603
ctx->cq_wait_arg = io_region_get_ptr(&region);
604
ctx->cq_wait_size = rd.size;
605
}
606
607
io_region_publish(ctx, &region, &ctx->param_region);
608
return 0;
609
}
610
611
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
612
void __user *arg, unsigned nr_args)
613
__releases(ctx->uring_lock)
614
__acquires(ctx->uring_lock)
615
{
616
int ret;
617
618
/*
619
* We don't quiesce the refs for register anymore and so it can't be
620
* dying as we're holding a file ref here.
621
*/
622
if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
623
return -ENXIO;
624
625
if (ctx->submitter_task && ctx->submitter_task != current)
626
return -EEXIST;
627
628
if (ctx->restricted) {
629
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
630
if (!test_bit(opcode, ctx->restrictions.register_op))
631
return -EACCES;
632
}
633
634
switch (opcode) {
635
case IORING_REGISTER_BUFFERS:
636
ret = -EFAULT;
637
if (!arg)
638
break;
639
ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
640
break;
641
case IORING_UNREGISTER_BUFFERS:
642
ret = -EINVAL;
643
if (arg || nr_args)
644
break;
645
ret = io_sqe_buffers_unregister(ctx);
646
break;
647
case IORING_REGISTER_FILES:
648
ret = -EFAULT;
649
if (!arg)
650
break;
651
ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
652
break;
653
case IORING_UNREGISTER_FILES:
654
ret = -EINVAL;
655
if (arg || nr_args)
656
break;
657
ret = io_sqe_files_unregister(ctx);
658
break;
659
case IORING_REGISTER_FILES_UPDATE:
660
ret = io_register_files_update(ctx, arg, nr_args);
661
break;
662
case IORING_REGISTER_EVENTFD:
663
ret = -EINVAL;
664
if (nr_args != 1)
665
break;
666
ret = io_eventfd_register(ctx, arg, 0);
667
break;
668
case IORING_REGISTER_EVENTFD_ASYNC:
669
ret = -EINVAL;
670
if (nr_args != 1)
671
break;
672
ret = io_eventfd_register(ctx, arg, 1);
673
break;
674
case IORING_UNREGISTER_EVENTFD:
675
ret = -EINVAL;
676
if (arg || nr_args)
677
break;
678
ret = io_eventfd_unregister(ctx);
679
break;
680
case IORING_REGISTER_PROBE:
681
ret = -EINVAL;
682
if (!arg || nr_args > 256)
683
break;
684
ret = io_probe(ctx, arg, nr_args);
685
break;
686
case IORING_REGISTER_PERSONALITY:
687
ret = -EINVAL;
688
if (arg || nr_args)
689
break;
690
ret = io_register_personality(ctx);
691
break;
692
case IORING_UNREGISTER_PERSONALITY:
693
ret = -EINVAL;
694
if (arg)
695
break;
696
ret = io_unregister_personality(ctx, nr_args);
697
break;
698
case IORING_REGISTER_ENABLE_RINGS:
699
ret = -EINVAL;
700
if (arg || nr_args)
701
break;
702
ret = io_register_enable_rings(ctx);
703
break;
704
case IORING_REGISTER_RESTRICTIONS:
705
ret = io_register_restrictions(ctx, arg, nr_args);
706
break;
707
case IORING_REGISTER_FILES2:
708
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
709
break;
710
case IORING_REGISTER_FILES_UPDATE2:
711
ret = io_register_rsrc_update(ctx, arg, nr_args,
712
IORING_RSRC_FILE);
713
break;
714
case IORING_REGISTER_BUFFERS2:
715
ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
716
break;
717
case IORING_REGISTER_BUFFERS_UPDATE:
718
ret = io_register_rsrc_update(ctx, arg, nr_args,
719
IORING_RSRC_BUFFER);
720
break;
721
case IORING_REGISTER_IOWQ_AFF:
722
ret = -EINVAL;
723
if (!arg || !nr_args)
724
break;
725
ret = io_register_iowq_aff(ctx, arg, nr_args);
726
break;
727
case IORING_UNREGISTER_IOWQ_AFF:
728
ret = -EINVAL;
729
if (arg || nr_args)
730
break;
731
ret = io_unregister_iowq_aff(ctx);
732
break;
733
case IORING_REGISTER_IOWQ_MAX_WORKERS:
734
ret = -EINVAL;
735
if (!arg || nr_args != 2)
736
break;
737
ret = io_register_iowq_max_workers(ctx, arg);
738
break;
739
case IORING_REGISTER_RING_FDS:
740
ret = io_ringfd_register(ctx, arg, nr_args);
741
break;
742
case IORING_UNREGISTER_RING_FDS:
743
ret = io_ringfd_unregister(ctx, arg, nr_args);
744
break;
745
case IORING_REGISTER_PBUF_RING:
746
ret = -EINVAL;
747
if (!arg || nr_args != 1)
748
break;
749
ret = io_register_pbuf_ring(ctx, arg);
750
break;
751
case IORING_UNREGISTER_PBUF_RING:
752
ret = -EINVAL;
753
if (!arg || nr_args != 1)
754
break;
755
ret = io_unregister_pbuf_ring(ctx, arg);
756
break;
757
case IORING_REGISTER_SYNC_CANCEL:
758
ret = -EINVAL;
759
if (!arg || nr_args != 1)
760
break;
761
ret = io_sync_cancel(ctx, arg);
762
break;
763
case IORING_REGISTER_FILE_ALLOC_RANGE:
764
ret = -EINVAL;
765
if (!arg || nr_args)
766
break;
767
ret = io_register_file_alloc_range(ctx, arg);
768
break;
769
case IORING_REGISTER_PBUF_STATUS:
770
ret = -EINVAL;
771
if (!arg || nr_args != 1)
772
break;
773
ret = io_register_pbuf_status(ctx, arg);
774
break;
775
case IORING_REGISTER_NAPI:
776
ret = -EINVAL;
777
if (!arg || nr_args != 1)
778
break;
779
ret = io_register_napi(ctx, arg);
780
break;
781
case IORING_UNREGISTER_NAPI:
782
ret = -EINVAL;
783
if (nr_args != 1)
784
break;
785
ret = io_unregister_napi(ctx, arg);
786
break;
787
case IORING_REGISTER_CLOCK:
788
ret = -EINVAL;
789
if (!arg || nr_args)
790
break;
791
ret = io_register_clock(ctx, arg);
792
break;
793
case IORING_REGISTER_CLONE_BUFFERS:
794
ret = -EINVAL;
795
if (!arg || nr_args != 1)
796
break;
797
ret = io_register_clone_buffers(ctx, arg);
798
break;
799
case IORING_REGISTER_ZCRX_IFQ:
800
ret = -EINVAL;
801
if (!arg || nr_args != 1)
802
break;
803
ret = io_register_zcrx_ifq(ctx, arg);
804
break;
805
case IORING_REGISTER_RESIZE_RINGS:
806
ret = -EINVAL;
807
if (!arg || nr_args != 1)
808
break;
809
ret = io_register_resize_rings(ctx, arg);
810
break;
811
case IORING_REGISTER_MEM_REGION:
812
ret = -EINVAL;
813
if (!arg || nr_args != 1)
814
break;
815
ret = io_register_mem_region(ctx, arg);
816
break;
817
case IORING_REGISTER_QUERY:
818
ret = io_query(arg, nr_args);
819
break;
820
case IORING_REGISTER_ZCRX_CTRL:
821
ret = io_zcrx_ctrl(ctx, arg, nr_args);
822
break;
823
default:
824
ret = -EINVAL;
825
break;
826
}
827
828
return ret;
829
}
830
831
/*
832
* Given an 'fd' value, return the ctx associated with if. If 'registered' is
833
* true, then the registered index is used. Otherwise, the normal fd table.
834
* Caller must call fput() on the returned file, unless it's an ERR_PTR.
835
*/
836
struct file *io_uring_register_get_file(unsigned int fd, bool registered)
837
{
838
struct file *file;
839
840
if (registered) {
841
/*
842
* Ring fd has been registered via IORING_REGISTER_RING_FDS, we
843
* need only dereference our task private array to find it.
844
*/
845
struct io_uring_task *tctx = current->io_uring;
846
847
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
848
return ERR_PTR(-EINVAL);
849
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
850
file = tctx->registered_rings[fd];
851
if (file)
852
get_file(file);
853
} else {
854
file = fget(fd);
855
}
856
857
if (unlikely(!file))
858
return ERR_PTR(-EBADF);
859
if (io_is_uring_fops(file))
860
return file;
861
fput(file);
862
return ERR_PTR(-EOPNOTSUPP);
863
}
864
865
static int io_uring_register_send_msg_ring(void __user *arg, unsigned int nr_args)
866
{
867
struct io_uring_sqe sqe;
868
869
if (!arg || nr_args != 1)
870
return -EINVAL;
871
if (copy_from_user(&sqe, arg, sizeof(sqe)))
872
return -EFAULT;
873
/* no flags supported */
874
if (sqe.flags)
875
return -EINVAL;
876
if (sqe.opcode != IORING_OP_MSG_RING)
877
return -EINVAL;
878
879
return io_uring_sync_msg_ring(&sqe);
880
}
881
882
/*
883
* "blind" registration opcodes are ones where there's no ring given, and
884
* hence the source fd must be -1.
885
*/
886
static int io_uring_register_blind(unsigned int opcode, void __user *arg,
887
unsigned int nr_args)
888
{
889
switch (opcode) {
890
case IORING_REGISTER_SEND_MSG_RING:
891
return io_uring_register_send_msg_ring(arg, nr_args);
892
case IORING_REGISTER_QUERY:
893
return io_query(arg, nr_args);
894
}
895
return -EINVAL;
896
}
897
898
SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
899
void __user *, arg, unsigned int, nr_args)
900
{
901
struct io_ring_ctx *ctx;
902
long ret = -EBADF;
903
struct file *file;
904
bool use_registered_ring;
905
906
use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
907
opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
908
909
if (opcode >= IORING_REGISTER_LAST)
910
return -EINVAL;
911
912
if (fd == -1)
913
return io_uring_register_blind(opcode, arg, nr_args);
914
915
file = io_uring_register_get_file(fd, use_registered_ring);
916
if (IS_ERR(file))
917
return PTR_ERR(file);
918
ctx = file->private_data;
919
920
mutex_lock(&ctx->uring_lock);
921
ret = __io_uring_register(ctx, opcode, arg, nr_args);
922
923
trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
924
ctx->buf_table.nr, ret);
925
mutex_unlock(&ctx->uring_lock);
926
927
fput(file);
928
return ret;
929
}
930
931