Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmlibuv/src/unix/epoll.c
3156 views
1
/* Copyright libuv contributors. All rights reserved.
2
*
3
* Permission is hereby granted, free of charge, to any person obtaining a copy
4
* of this software and associated documentation files (the "Software"), to
5
* deal in the Software without restriction, including without limitation the
6
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
* sell copies of the Software, and to permit persons to whom the Software is
8
* furnished to do so, subject to the following conditions:
9
*
10
* The above copyright notice and this permission notice shall be included in
11
* all copies or substantial portions of the Software.
12
*
13
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
* IN THE SOFTWARE.
20
*/
21
22
#include "uv.h"
23
#include "internal.h"
24
#include <errno.h>
25
#include <sys/epoll.h>
26
27
int uv__epoll_init(uv_loop_t* loop) {
28
int fd;
29
fd = epoll_create1(O_CLOEXEC);
30
31
/* epoll_create1() can fail either because it's not implemented (old kernel)
32
* or because it doesn't understand the O_CLOEXEC flag.
33
*/
34
if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) {
35
fd = epoll_create(256);
36
37
if (fd != -1)
38
uv__cloexec(fd, 1);
39
}
40
41
loop->backend_fd = fd;
42
if (fd == -1)
43
return UV__ERR(errno);
44
45
return 0;
46
}
47
48
49
void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
50
struct epoll_event* events;
51
struct epoll_event dummy;
52
uintptr_t i;
53
uintptr_t nfds;
54
55
assert(loop->watchers != NULL);
56
assert(fd >= 0);
57
58
events = (struct epoll_event*) loop->watchers[loop->nwatchers];
59
nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
60
if (events != NULL)
61
/* Invalidate events with same file descriptor */
62
for (i = 0; i < nfds; i++)
63
if (events[i].data.fd == fd)
64
events[i].data.fd = -1;
65
66
/* Remove the file descriptor from the epoll.
67
* This avoids a problem where the same file description remains open
68
* in another process, causing repeated junk epoll events.
69
*
70
* We pass in a dummy epoll_event, to work around a bug in old kernels.
71
*/
72
if (loop->backend_fd >= 0) {
73
/* Work around a bug in kernels 3.10 to 3.19 where passing a struct that
74
* has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
75
*/
76
memset(&dummy, 0, sizeof(dummy));
77
epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
78
}
79
}
80
81
82
int uv__io_check_fd(uv_loop_t* loop, int fd) {
83
struct epoll_event e;
84
int rc;
85
86
memset(&e, 0, sizeof(e));
87
e.events = POLLIN;
88
e.data.fd = -1;
89
90
rc = 0;
91
if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
92
if (errno != EEXIST)
93
rc = UV__ERR(errno);
94
95
if (rc == 0)
96
if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
97
abort();
98
99
return rc;
100
}
101
102
103
void uv__io_poll(uv_loop_t* loop, int timeout) {
104
/* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes
105
* effectively infinite on 32 bits architectures. To avoid blocking
106
* indefinitely, we cap the timeout and poll again if necessary.
107
*
108
* Note that "30 minutes" is a simplification because it depends on
109
* the value of CONFIG_HZ. The magic constant assumes CONFIG_HZ=1200,
110
* that being the largest value I have seen in the wild (and only once.)
111
*/
112
static const int max_safe_timeout = 1789569;
113
static int no_epoll_pwait_cached;
114
static int no_epoll_wait_cached;
115
int no_epoll_pwait;
116
int no_epoll_wait;
117
struct epoll_event events[1024];
118
struct epoll_event* pe;
119
struct epoll_event e;
120
int real_timeout;
121
QUEUE* q;
122
uv__io_t* w;
123
sigset_t sigset;
124
uint64_t sigmask;
125
uint64_t base;
126
int have_signals;
127
int nevents;
128
int count;
129
int nfds;
130
int fd;
131
int op;
132
int i;
133
int user_timeout;
134
int reset_timeout;
135
136
if (loop->nfds == 0) {
137
assert(QUEUE_EMPTY(&loop->watcher_queue));
138
return;
139
}
140
141
memset(&e, 0, sizeof(e));
142
143
while (!QUEUE_EMPTY(&loop->watcher_queue)) {
144
q = QUEUE_HEAD(&loop->watcher_queue);
145
QUEUE_REMOVE(q);
146
QUEUE_INIT(q);
147
148
w = QUEUE_DATA(q, uv__io_t, watcher_queue);
149
assert(w->pevents != 0);
150
assert(w->fd >= 0);
151
assert(w->fd < (int) loop->nwatchers);
152
153
e.events = w->pevents;
154
e.data.fd = w->fd;
155
156
if (w->events == 0)
157
op = EPOLL_CTL_ADD;
158
else
159
op = EPOLL_CTL_MOD;
160
161
/* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
162
* events, skip the syscall and squelch the events after epoll_wait().
163
*/
164
if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
165
if (errno != EEXIST)
166
abort();
167
168
assert(op == EPOLL_CTL_ADD);
169
170
/* We've reactivated a file descriptor that's been watched before. */
171
if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
172
abort();
173
}
174
175
w->events = w->pevents;
176
}
177
178
sigmask = 0;
179
if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
180
sigemptyset(&sigset);
181
sigaddset(&sigset, SIGPROF);
182
sigmask |= 1 << (SIGPROF - 1);
183
}
184
185
assert(timeout >= -1);
186
base = loop->time;
187
count = 48; /* Benchmarks suggest this gives the best throughput. */
188
real_timeout = timeout;
189
190
if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) {
191
reset_timeout = 1;
192
user_timeout = timeout;
193
timeout = 0;
194
} else {
195
reset_timeout = 0;
196
user_timeout = 0;
197
}
198
199
/* You could argue there is a dependency between these two but
200
* ultimately we don't care about their ordering with respect
201
* to one another. Worst case, we make a few system calls that
202
* could have been avoided because another thread already knows
203
* they fail with ENOSYS. Hardly the end of the world.
204
*/
205
no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached);
206
no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached);
207
208
for (;;) {
209
/* Only need to set the provider_entry_time if timeout != 0. The function
210
* will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
211
*/
212
if (timeout != 0)
213
uv__metrics_set_provider_entry_time(loop);
214
215
/* See the comment for max_safe_timeout for an explanation of why
216
* this is necessary. Executive summary: kernel bug workaround.
217
*/
218
if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
219
timeout = max_safe_timeout;
220
221
if (sigmask != 0 && no_epoll_pwait != 0)
222
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
223
abort();
224
225
if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
226
nfds = epoll_pwait(loop->backend_fd,
227
events,
228
ARRAY_SIZE(events),
229
timeout,
230
&sigset);
231
if (nfds == -1 && errno == ENOSYS) {
232
uv__store_relaxed(&no_epoll_pwait_cached, 1);
233
no_epoll_pwait = 1;
234
}
235
} else {
236
nfds = epoll_wait(loop->backend_fd,
237
events,
238
ARRAY_SIZE(events),
239
timeout);
240
if (nfds == -1 && errno == ENOSYS) {
241
uv__store_relaxed(&no_epoll_wait_cached, 1);
242
no_epoll_wait = 1;
243
}
244
}
245
246
if (sigmask != 0 && no_epoll_pwait != 0)
247
if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
248
abort();
249
250
/* Update loop->time unconditionally. It's tempting to skip the update when
251
* timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
252
* operating system didn't reschedule our process while in the syscall.
253
*/
254
SAVE_ERRNO(uv__update_time(loop));
255
256
if (nfds == 0) {
257
assert(timeout != -1);
258
259
if (reset_timeout != 0) {
260
timeout = user_timeout;
261
reset_timeout = 0;
262
}
263
264
if (timeout == -1)
265
continue;
266
267
if (timeout == 0)
268
return;
269
270
/* We may have been inside the system call for longer than |timeout|
271
* milliseconds so we need to update the timestamp to avoid drift.
272
*/
273
goto update_timeout;
274
}
275
276
if (nfds == -1) {
277
if (errno == ENOSYS) {
278
/* epoll_wait() or epoll_pwait() failed, try the other system call. */
279
assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
280
continue;
281
}
282
283
if (errno != EINTR)
284
abort();
285
286
if (reset_timeout != 0) {
287
timeout = user_timeout;
288
reset_timeout = 0;
289
}
290
291
if (timeout == -1)
292
continue;
293
294
if (timeout == 0)
295
return;
296
297
/* Interrupted by a signal. Update timeout and poll again. */
298
goto update_timeout;
299
}
300
301
have_signals = 0;
302
nevents = 0;
303
304
{
305
/* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */
306
union {
307
struct epoll_event* events;
308
uv__io_t* watchers;
309
} x;
310
311
x.events = events;
312
assert(loop->watchers != NULL);
313
loop->watchers[loop->nwatchers] = x.watchers;
314
loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
315
}
316
317
for (i = 0; i < nfds; i++) {
318
pe = events + i;
319
fd = pe->data.fd;
320
321
/* Skip invalidated events, see uv__platform_invalidate_fd */
322
if (fd == -1)
323
continue;
324
325
assert(fd >= 0);
326
assert((unsigned) fd < loop->nwatchers);
327
328
w = loop->watchers[fd];
329
330
if (w == NULL) {
331
/* File descriptor that we've stopped watching, disarm it.
332
*
333
* Ignore all errors because we may be racing with another thread
334
* when the file descriptor is closed.
335
*/
336
epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
337
continue;
338
}
339
340
/* Give users only events they're interested in. Prevents spurious
341
* callbacks when previous callback invocation in this loop has stopped
342
* the current watcher. Also, filters out events that users has not
343
* requested us to watch.
344
*/
345
pe->events &= w->pevents | POLLERR | POLLHUP;
346
347
/* Work around an epoll quirk where it sometimes reports just the
348
* EPOLLERR or EPOLLHUP event. In order to force the event loop to
349
* move forward, we merge in the read/write events that the watcher
350
* is interested in; uv__read() and uv__write() will then deal with
351
* the error or hangup in the usual fashion.
352
*
353
* Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
354
* reads the available data, calls uv_read_stop(), then sometime later
355
* calls uv_read_start() again. By then, libuv has forgotten about the
356
* hangup and the kernel won't report EPOLLIN again because there's
357
* nothing left to read. If anything, libuv is to blame here. The
358
* current hack is just a quick bandaid; to properly fix it, libuv
359
* needs to remember the error/hangup event. We should get that for
360
* free when we switch over to edge-triggered I/O.
361
*/
362
if (pe->events == POLLERR || pe->events == POLLHUP)
363
pe->events |=
364
w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
365
366
if (pe->events != 0) {
367
/* Run signal watchers last. This also affects child process watchers
368
* because those are implemented in terms of signal watchers.
369
*/
370
if (w == &loop->signal_io_watcher) {
371
have_signals = 1;
372
} else {
373
uv__metrics_update_idle_time(loop);
374
w->cb(loop, w, pe->events);
375
}
376
377
nevents++;
378
}
379
}
380
381
if (reset_timeout != 0) {
382
timeout = user_timeout;
383
reset_timeout = 0;
384
}
385
386
if (have_signals != 0) {
387
uv__metrics_update_idle_time(loop);
388
loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
389
}
390
391
loop->watchers[loop->nwatchers] = NULL;
392
loop->watchers[loop->nwatchers + 1] = NULL;
393
394
if (have_signals != 0)
395
return; /* Event loop should cycle now so don't poll again. */
396
397
if (nevents != 0) {
398
if (nfds == ARRAY_SIZE(events) && --count != 0) {
399
/* Poll for more events but don't block this time. */
400
timeout = 0;
401
continue;
402
}
403
return;
404
}
405
406
if (timeout == 0)
407
return;
408
409
if (timeout == -1)
410
continue;
411
412
update_timeout:
413
assert(timeout > 0);
414
415
real_timeout -= (loop->time - base);
416
if (real_timeout <= 0)
417
return;
418
419
timeout = real_timeout;
420
}
421
}
422
423
424