Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/io_uring/napi.c
26131 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
#include "io_uring.h"
4
#include "napi.h"
5
6
#ifdef CONFIG_NET_RX_BUSY_POLL
7
8
/* Timeout for cleanout of stale entries. */
9
#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
10
11
struct io_napi_entry {
12
unsigned int napi_id;
13
struct list_head list;
14
15
unsigned long timeout;
16
struct hlist_node node;
17
18
struct rcu_head rcu;
19
};
20
21
static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
22
unsigned int napi_id)
23
{
24
struct io_napi_entry *e;
25
26
hlist_for_each_entry_rcu(e, hash_list, node) {
27
if (e->napi_id != napi_id)
28
continue;
29
return e;
30
}
31
32
return NULL;
33
}
34
35
static inline ktime_t net_to_ktime(unsigned long t)
36
{
37
/* napi approximating usecs, reverse busy_loop_current_time */
38
return ns_to_ktime(t << 10);
39
}
40
41
int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
42
{
43
struct hlist_head *hash_list;
44
struct io_napi_entry *e;
45
46
/* Non-NAPI IDs can be rejected. */
47
if (!napi_id_valid(napi_id))
48
return -EINVAL;
49
50
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
51
52
scoped_guard(rcu) {
53
e = io_napi_hash_find(hash_list, napi_id);
54
if (e) {
55
WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT);
56
return -EEXIST;
57
}
58
}
59
60
e = kmalloc(sizeof(*e), GFP_NOWAIT);
61
if (!e)
62
return -ENOMEM;
63
64
e->napi_id = napi_id;
65
e->timeout = jiffies + NAPI_TIMEOUT;
66
67
/*
68
* guard(spinlock) is not used to manually unlock it before calling
69
* kfree()
70
*/
71
spin_lock(&ctx->napi_lock);
72
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
73
spin_unlock(&ctx->napi_lock);
74
kfree(e);
75
return -EEXIST;
76
}
77
78
hlist_add_tail_rcu(&e->node, hash_list);
79
list_add_tail_rcu(&e->list, &ctx->napi_list);
80
spin_unlock(&ctx->napi_lock);
81
return 0;
82
}
83
84
static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
85
{
86
struct hlist_head *hash_list;
87
struct io_napi_entry *e;
88
89
/* Non-NAPI IDs can be rejected. */
90
if (!napi_id_valid(napi_id))
91
return -EINVAL;
92
93
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
94
guard(spinlock)(&ctx->napi_lock);
95
e = io_napi_hash_find(hash_list, napi_id);
96
if (!e)
97
return -ENOENT;
98
99
list_del_rcu(&e->list);
100
hash_del_rcu(&e->node);
101
kfree_rcu(e, rcu);
102
return 0;
103
}
104
105
static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
106
{
107
struct io_napi_entry *e;
108
109
guard(spinlock)(&ctx->napi_lock);
110
/*
111
* list_for_each_entry_safe() is not required as long as:
112
* 1. list_del_rcu() does not reset the deleted node next pointer
113
* 2. kfree_rcu() delays the memory freeing until the next quiescent
114
* state
115
*/
116
list_for_each_entry(e, &ctx->napi_list, list) {
117
if (time_after(jiffies, READ_ONCE(e->timeout))) {
118
list_del_rcu(&e->list);
119
hash_del_rcu(&e->node);
120
kfree_rcu(e, rcu);
121
}
122
}
123
}
124
125
static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
126
{
127
if (is_stale)
128
__io_napi_remove_stale(ctx);
129
}
130
131
static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
132
ktime_t bp)
133
{
134
if (bp) {
135
ktime_t end_time = ktime_add(start_time, bp);
136
ktime_t now = net_to_ktime(busy_loop_current_time());
137
138
return ktime_after(now, end_time);
139
}
140
141
return true;
142
}
143
144
static bool io_napi_busy_loop_should_end(void *data,
145
unsigned long start_time)
146
{
147
struct io_wait_queue *iowq = data;
148
149
if (signal_pending(current))
150
return true;
151
if (io_should_wake(iowq) || io_has_work(iowq->ctx))
152
return true;
153
if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
154
iowq->napi_busy_poll_dt))
155
return true;
156
157
return false;
158
}
159
160
/*
161
* never report stale entries
162
*/
163
static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
164
bool (*loop_end)(void *, unsigned long),
165
void *loop_end_arg)
166
{
167
struct io_napi_entry *e;
168
169
list_for_each_entry_rcu(e, &ctx->napi_list, list)
170
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
171
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
172
return false;
173
}
174
175
static bool
176
dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
177
bool (*loop_end)(void *, unsigned long),
178
void *loop_end_arg)
179
{
180
struct io_napi_entry *e;
181
bool is_stale = false;
182
183
list_for_each_entry_rcu(e, &ctx->napi_list, list) {
184
napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
185
ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
186
187
if (time_after(jiffies, READ_ONCE(e->timeout)))
188
is_stale = true;
189
}
190
191
return is_stale;
192
}
193
194
static inline bool
195
__io_napi_do_busy_loop(struct io_ring_ctx *ctx,
196
bool (*loop_end)(void *, unsigned long),
197
void *loop_end_arg)
198
{
199
if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
200
return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
201
return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
202
}
203
204
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
205
struct io_wait_queue *iowq)
206
{
207
unsigned long start_time = busy_loop_current_time();
208
bool (*loop_end)(void *, unsigned long) = NULL;
209
void *loop_end_arg = NULL;
210
bool is_stale = false;
211
212
/* Singular lists use a different napi loop end check function and are
213
* only executed once.
214
*/
215
if (list_is_singular(&ctx->napi_list)) {
216
loop_end = io_napi_busy_loop_should_end;
217
loop_end_arg = iowq;
218
}
219
220
scoped_guard(rcu) {
221
do {
222
is_stale = __io_napi_do_busy_loop(ctx, loop_end,
223
loop_end_arg);
224
} while (!io_napi_busy_loop_should_end(iowq, start_time) &&
225
!loop_end_arg);
226
}
227
228
io_napi_remove_stale(ctx, is_stale);
229
}
230
231
/*
232
* io_napi_init() - Init napi settings
233
* @ctx: pointer to io-uring context structure
234
*
235
* Init napi settings in the io-uring context.
236
*/
237
void io_napi_init(struct io_ring_ctx *ctx)
238
{
239
u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
240
241
INIT_LIST_HEAD(&ctx->napi_list);
242
spin_lock_init(&ctx->napi_lock);
243
ctx->napi_prefer_busy_poll = false;
244
ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
245
ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
246
}
247
248
/*
249
* io_napi_free() - Deallocate napi
250
* @ctx: pointer to io-uring context structure
251
*
252
* Free the napi list and the hash table in the io-uring context.
253
*/
254
void io_napi_free(struct io_ring_ctx *ctx)
255
{
256
struct io_napi_entry *e;
257
258
guard(spinlock)(&ctx->napi_lock);
259
list_for_each_entry(e, &ctx->napi_list, list) {
260
hash_del_rcu(&e->node);
261
kfree_rcu(e, rcu);
262
}
263
INIT_LIST_HEAD_RCU(&ctx->napi_list);
264
}
265
266
static int io_napi_register_napi(struct io_ring_ctx *ctx,
267
struct io_uring_napi *napi)
268
{
269
switch (napi->op_param) {
270
case IO_URING_NAPI_TRACKING_DYNAMIC:
271
case IO_URING_NAPI_TRACKING_STATIC:
272
break;
273
default:
274
return -EINVAL;
275
}
276
/* clean the napi list for new settings */
277
io_napi_free(ctx);
278
WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
279
WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
280
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
281
return 0;
282
}
283
284
/*
285
* io_napi_register() - Register napi with io-uring
286
* @ctx: pointer to io-uring context structure
287
* @arg: pointer to io_uring_napi structure
288
*
289
* Register napi in the io-uring context.
290
*/
291
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
292
{
293
const struct io_uring_napi curr = {
294
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
295
.prefer_busy_poll = ctx->napi_prefer_busy_poll,
296
.op_param = ctx->napi_track_mode
297
};
298
struct io_uring_napi napi;
299
300
if (ctx->flags & IORING_SETUP_IOPOLL)
301
return -EINVAL;
302
if (copy_from_user(&napi, arg, sizeof(napi)))
303
return -EFAULT;
304
if (napi.pad[0] || napi.pad[1] || napi.resv)
305
return -EINVAL;
306
307
if (copy_to_user(arg, &curr, sizeof(curr)))
308
return -EFAULT;
309
310
switch (napi.opcode) {
311
case IO_URING_NAPI_REGISTER_OP:
312
return io_napi_register_napi(ctx, &napi);
313
case IO_URING_NAPI_STATIC_ADD_ID:
314
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
315
return -EINVAL;
316
return __io_napi_add_id(ctx, napi.op_param);
317
case IO_URING_NAPI_STATIC_DEL_ID:
318
if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
319
return -EINVAL;
320
return __io_napi_del_id(ctx, napi.op_param);
321
default:
322
return -EINVAL;
323
}
324
}
325
326
/*
327
* io_napi_unregister() - Unregister napi with io-uring
328
* @ctx: pointer to io-uring context structure
329
* @arg: pointer to io_uring_napi structure
330
*
331
* Unregister napi. If arg has been specified copy the busy poll timeout and
332
* prefer busy poll setting to the passed in structure.
333
*/
334
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
335
{
336
const struct io_uring_napi curr = {
337
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
338
.prefer_busy_poll = ctx->napi_prefer_busy_poll
339
};
340
341
if (arg && copy_to_user(arg, &curr, sizeof(curr)))
342
return -EFAULT;
343
344
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
345
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
346
WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
347
return 0;
348
}
349
350
/*
351
* __io_napi_busy_loop() - execute busy poll loop
352
* @ctx: pointer to io-uring context structure
353
* @iowq: pointer to io wait queue
354
*
355
* Execute the busy poll loop and merge the spliced off list.
356
*/
357
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
358
{
359
if (ctx->flags & IORING_SETUP_SQPOLL)
360
return;
361
362
iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
363
if (iowq->timeout != KTIME_MAX) {
364
ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
365
366
iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
367
}
368
369
iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
370
io_napi_blocking_busy_loop(ctx, iowq);
371
}
372
373
/*
374
* io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
375
* @ctx: pointer to io-uring context structure
376
*
377
* Splice of the napi list and execute the napi busy poll loop.
378
*/
379
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
380
{
381
bool is_stale = false;
382
383
if (!READ_ONCE(ctx->napi_busy_poll_dt))
384
return 0;
385
if (list_empty_careful(&ctx->napi_list))
386
return 0;
387
388
scoped_guard(rcu) {
389
is_stale = __io_napi_do_busy_loop(ctx, NULL, NULL);
390
}
391
392
io_napi_remove_stale(ctx, is_stale);
393
return 1;
394
}
395
396
#endif
397
398