Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/eventfd.c
15109 views
1
/*
2
* fs/eventfd.c
3
*
4
* Copyright (C) 2007 Davide Libenzi <[email protected]>
5
*
6
*/
7
8
#include <linux/file.h>
9
#include <linux/poll.h>
10
#include <linux/init.h>
11
#include <linux/fs.h>
12
#include <linux/sched.h>
13
#include <linux/kernel.h>
14
#include <linux/slab.h>
15
#include <linux/list.h>
16
#include <linux/spinlock.h>
17
#include <linux/anon_inodes.h>
18
#include <linux/syscalls.h>
19
#include <linux/module.h>
20
#include <linux/kref.h>
21
#include <linux/eventfd.h>
22
23
struct eventfd_ctx {
24
struct kref kref;
25
wait_queue_head_t wqh;
26
/*
27
* Every time that a write(2) is performed on an eventfd, the
28
* value of the __u64 being written is added to "count" and a
29
* wakeup is performed on "wqh". A read(2) will return the "count"
30
* value to userspace, and will reset "count" to zero. The kernel
31
* side eventfd_signal() also, adds to the "count" counter and
32
* issue a wakeup.
33
*/
34
__u64 count;
35
unsigned int flags;
36
};
37
38
/**
39
* eventfd_signal - Adds @n to the eventfd counter.
40
* @ctx: [in] Pointer to the eventfd context.
41
* @n: [in] Value of the counter to be added to the eventfd internal counter.
42
* The value cannot be negative.
43
*
44
* This function is supposed to be called by the kernel in paths that do not
45
* allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
46
* value, and we signal this as overflow condition by returining a POLLERR
47
* to poll(2).
48
*
49
* Returns @n in case of success, a non-negative number lower than @n in case
50
* of overflow, or the following error codes:
51
*
52
* -EINVAL : The value of @n is negative.
53
*/
54
int eventfd_signal(struct eventfd_ctx *ctx, int n)
55
{
56
unsigned long flags;
57
58
if (n < 0)
59
return -EINVAL;
60
spin_lock_irqsave(&ctx->wqh.lock, flags);
61
if (ULLONG_MAX - ctx->count < n)
62
n = (int) (ULLONG_MAX - ctx->count);
63
ctx->count += n;
64
if (waitqueue_active(&ctx->wqh))
65
wake_up_locked_poll(&ctx->wqh, POLLIN);
66
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
67
68
return n;
69
}
70
EXPORT_SYMBOL_GPL(eventfd_signal);
71
72
static void eventfd_free_ctx(struct eventfd_ctx *ctx)
73
{
74
kfree(ctx);
75
}
76
77
static void eventfd_free(struct kref *kref)
78
{
79
struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
80
81
eventfd_free_ctx(ctx);
82
}
83
84
/**
85
* eventfd_ctx_get - Acquires a reference to the internal eventfd context.
86
* @ctx: [in] Pointer to the eventfd context.
87
*
88
* Returns: In case of success, returns a pointer to the eventfd context.
89
*/
90
struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
91
{
92
kref_get(&ctx->kref);
93
return ctx;
94
}
95
EXPORT_SYMBOL_GPL(eventfd_ctx_get);
96
97
/**
98
* eventfd_ctx_put - Releases a reference to the internal eventfd context.
99
* @ctx: [in] Pointer to eventfd context.
100
*
101
* The eventfd context reference must have been previously acquired either
102
* with eventfd_ctx_get() or eventfd_ctx_fdget().
103
*/
104
void eventfd_ctx_put(struct eventfd_ctx *ctx)
105
{
106
kref_put(&ctx->kref, eventfd_free);
107
}
108
EXPORT_SYMBOL_GPL(eventfd_ctx_put);
109
110
static int eventfd_release(struct inode *inode, struct file *file)
111
{
112
struct eventfd_ctx *ctx = file->private_data;
113
114
wake_up_poll(&ctx->wqh, POLLHUP);
115
eventfd_ctx_put(ctx);
116
return 0;
117
}
118
119
static unsigned int eventfd_poll(struct file *file, poll_table *wait)
120
{
121
struct eventfd_ctx *ctx = file->private_data;
122
unsigned int events = 0;
123
unsigned long flags;
124
125
poll_wait(file, &ctx->wqh, wait);
126
127
spin_lock_irqsave(&ctx->wqh.lock, flags);
128
if (ctx->count > 0)
129
events |= POLLIN;
130
if (ctx->count == ULLONG_MAX)
131
events |= POLLERR;
132
if (ULLONG_MAX - 1 > ctx->count)
133
events |= POLLOUT;
134
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
135
136
return events;
137
}
138
139
static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
140
{
141
*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
142
ctx->count -= *cnt;
143
}
144
145
/**
146
* eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
147
* @ctx: [in] Pointer to eventfd context.
148
* @wait: [in] Wait queue to be removed.
149
* @cnt: [out] Pointer to the 64-bit counter value.
150
*
151
* Returns %0 if successful, or the following error codes:
152
*
153
* -EAGAIN : The operation would have blocked.
154
*
155
* This is used to atomically remove a wait queue entry from the eventfd wait
156
* queue head, and read/reset the counter value.
157
*/
158
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
159
__u64 *cnt)
160
{
161
unsigned long flags;
162
163
spin_lock_irqsave(&ctx->wqh.lock, flags);
164
eventfd_ctx_do_read(ctx, cnt);
165
__remove_wait_queue(&ctx->wqh, wait);
166
if (*cnt != 0 && waitqueue_active(&ctx->wqh))
167
wake_up_locked_poll(&ctx->wqh, POLLOUT);
168
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
169
170
return *cnt != 0 ? 0 : -EAGAIN;
171
}
172
EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
173
174
/**
175
* eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
176
* @ctx: [in] Pointer to eventfd context.
177
* @no_wait: [in] Different from zero if the operation should not block.
178
* @cnt: [out] Pointer to the 64-bit counter value.
179
*
180
* Returns %0 if successful, or the following error codes:
181
*
182
* -EAGAIN : The operation would have blocked but @no_wait was non-zero.
183
* -ERESTARTSYS : A signal interrupted the wait operation.
184
*
185
* If @no_wait is zero, the function might sleep until the eventfd internal
186
* counter becomes greater than zero.
187
*/
188
ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
189
{
190
ssize_t res;
191
DECLARE_WAITQUEUE(wait, current);
192
193
spin_lock_irq(&ctx->wqh.lock);
194
*cnt = 0;
195
res = -EAGAIN;
196
if (ctx->count > 0)
197
res = 0;
198
else if (!no_wait) {
199
__add_wait_queue(&ctx->wqh, &wait);
200
for (;;) {
201
set_current_state(TASK_INTERRUPTIBLE);
202
if (ctx->count > 0) {
203
res = 0;
204
break;
205
}
206
if (signal_pending(current)) {
207
res = -ERESTARTSYS;
208
break;
209
}
210
spin_unlock_irq(&ctx->wqh.lock);
211
schedule();
212
spin_lock_irq(&ctx->wqh.lock);
213
}
214
__remove_wait_queue(&ctx->wqh, &wait);
215
__set_current_state(TASK_RUNNING);
216
}
217
if (likely(res == 0)) {
218
eventfd_ctx_do_read(ctx, cnt);
219
if (waitqueue_active(&ctx->wqh))
220
wake_up_locked_poll(&ctx->wqh, POLLOUT);
221
}
222
spin_unlock_irq(&ctx->wqh.lock);
223
224
return res;
225
}
226
EXPORT_SYMBOL_GPL(eventfd_ctx_read);
227
228
static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
229
loff_t *ppos)
230
{
231
struct eventfd_ctx *ctx = file->private_data;
232
ssize_t res;
233
__u64 cnt;
234
235
if (count < sizeof(cnt))
236
return -EINVAL;
237
res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
238
if (res < 0)
239
return res;
240
241
return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
242
}
243
244
static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
245
loff_t *ppos)
246
{
247
struct eventfd_ctx *ctx = file->private_data;
248
ssize_t res;
249
__u64 ucnt;
250
DECLARE_WAITQUEUE(wait, current);
251
252
if (count < sizeof(ucnt))
253
return -EINVAL;
254
if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
255
return -EFAULT;
256
if (ucnt == ULLONG_MAX)
257
return -EINVAL;
258
spin_lock_irq(&ctx->wqh.lock);
259
res = -EAGAIN;
260
if (ULLONG_MAX - ctx->count > ucnt)
261
res = sizeof(ucnt);
262
else if (!(file->f_flags & O_NONBLOCK)) {
263
__add_wait_queue(&ctx->wqh, &wait);
264
for (res = 0;;) {
265
set_current_state(TASK_INTERRUPTIBLE);
266
if (ULLONG_MAX - ctx->count > ucnt) {
267
res = sizeof(ucnt);
268
break;
269
}
270
if (signal_pending(current)) {
271
res = -ERESTARTSYS;
272
break;
273
}
274
spin_unlock_irq(&ctx->wqh.lock);
275
schedule();
276
spin_lock_irq(&ctx->wqh.lock);
277
}
278
__remove_wait_queue(&ctx->wqh, &wait);
279
__set_current_state(TASK_RUNNING);
280
}
281
if (likely(res > 0)) {
282
ctx->count += ucnt;
283
if (waitqueue_active(&ctx->wqh))
284
wake_up_locked_poll(&ctx->wqh, POLLIN);
285
}
286
spin_unlock_irq(&ctx->wqh.lock);
287
288
return res;
289
}
290
291
static const struct file_operations eventfd_fops = {
292
.release = eventfd_release,
293
.poll = eventfd_poll,
294
.read = eventfd_read,
295
.write = eventfd_write,
296
.llseek = noop_llseek,
297
};
298
299
/**
300
* eventfd_fget - Acquire a reference of an eventfd file descriptor.
301
* @fd: [in] Eventfd file descriptor.
302
*
303
* Returns a pointer to the eventfd file structure in case of success, or the
304
* following error pointer:
305
*
306
* -EBADF : Invalid @fd file descriptor.
307
* -EINVAL : The @fd file descriptor is not an eventfd file.
308
*/
309
struct file *eventfd_fget(int fd)
310
{
311
struct file *file;
312
313
file = fget(fd);
314
if (!file)
315
return ERR_PTR(-EBADF);
316
if (file->f_op != &eventfd_fops) {
317
fput(file);
318
return ERR_PTR(-EINVAL);
319
}
320
321
return file;
322
}
323
EXPORT_SYMBOL_GPL(eventfd_fget);
324
325
/**
326
* eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
327
* @fd: [in] Eventfd file descriptor.
328
*
329
* Returns a pointer to the internal eventfd context, otherwise the error
330
* pointers returned by the following functions:
331
*
332
* eventfd_fget
333
*/
334
struct eventfd_ctx *eventfd_ctx_fdget(int fd)
335
{
336
struct file *file;
337
struct eventfd_ctx *ctx;
338
339
file = eventfd_fget(fd);
340
if (IS_ERR(file))
341
return (struct eventfd_ctx *) file;
342
ctx = eventfd_ctx_get(file->private_data);
343
fput(file);
344
345
return ctx;
346
}
347
EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
348
349
/**
350
* eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
351
* @file: [in] Eventfd file pointer.
352
*
353
* Returns a pointer to the internal eventfd context, otherwise the error
354
* pointer:
355
*
356
* -EINVAL : The @fd file descriptor is not an eventfd file.
357
*/
358
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
359
{
360
if (file->f_op != &eventfd_fops)
361
return ERR_PTR(-EINVAL);
362
363
return eventfd_ctx_get(file->private_data);
364
}
365
EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
366
367
/**
368
* eventfd_file_create - Creates an eventfd file pointer.
369
* @count: Initial eventfd counter value.
370
* @flags: Flags for the eventfd file.
371
*
372
* This function creates an eventfd file pointer, w/out installing it into
373
* the fd table. This is useful when the eventfd file is used during the
374
* initialization of data structures that require extra setup after the eventfd
375
* creation. So the eventfd creation is split into the file pointer creation
376
* phase, and the file descriptor installation phase.
377
* In this way races with userspace closing the newly installed file descriptor
378
* can be avoided.
379
* Returns an eventfd file pointer, or a proper error pointer.
380
*/
381
struct file *eventfd_file_create(unsigned int count, int flags)
382
{
383
struct file *file;
384
struct eventfd_ctx *ctx;
385
386
/* Check the EFD_* constants for consistency. */
387
BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
388
BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
389
390
if (flags & ~EFD_FLAGS_SET)
391
return ERR_PTR(-EINVAL);
392
393
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
394
if (!ctx)
395
return ERR_PTR(-ENOMEM);
396
397
kref_init(&ctx->kref);
398
init_waitqueue_head(&ctx->wqh);
399
ctx->count = count;
400
ctx->flags = flags;
401
402
file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
403
O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
404
if (IS_ERR(file))
405
eventfd_free_ctx(ctx);
406
407
return file;
408
}
409
410
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
411
{
412
int fd, error;
413
struct file *file;
414
415
error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
416
if (error < 0)
417
return error;
418
fd = error;
419
420
file = eventfd_file_create(count, flags);
421
if (IS_ERR(file)) {
422
error = PTR_ERR(file);
423
goto err_put_unused_fd;
424
}
425
fd_install(fd, file);
426
427
return fd;
428
429
err_put_unused_fd:
430
put_unused_fd(fd);
431
432
return error;
433
}
434
435
SYSCALL_DEFINE1(eventfd, unsigned int, count)
436
{
437
return sys_eventfd2(count, 0);
438
}
439
440
441