CoCalc -- epoll.c

GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmlibuv/src/unix/epoll.c
³¹⁵⁶ views
1
/* Copyright libuv contributors. All rights reserved.
2
 *
3
 * Permission is hereby granted, free of charge, to any person obtaining a copy
4
 * of this software and associated documentation files (the "Software"), to
5
 * deal in the Software without restriction, including without limitation the
6
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
 * sell copies of the Software, and to permit persons to whom the Software is
8
 * furnished to do so, subject to the following conditions:
9
 *
10
 * The above copyright notice and this permission notice shall be included in
11
 * all copies or substantial portions of the Software.
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
 * IN THE SOFTWARE.
20
 */
21

22
#include "uv.h"
23
#include "internal.h"
24
#include <errno.h>
25
#include <sys/epoll.h>
26

27
int uv__epoll_init(uv_loop_t* loop) {
28
  int fd;
29
  fd = epoll_create1(O_CLOEXEC);
30

31
  /* epoll_create1() can fail either because it's not implemented (old kernel)
32
   * or because it doesn't understand the O_CLOEXEC flag.
33
   */
34
  if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) {
35
    fd = epoll_create(256);
36

37
    if (fd != -1)
38
      uv__cloexec(fd, 1);
39
  }
40

41
  loop->backend_fd = fd;
42
  if (fd == -1)
43
    return UV__ERR(errno);
44

45
  return 0;
46
}
47

48

49
void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
50
  struct epoll_event* events;
51
  struct epoll_event dummy;
52
  uintptr_t i;
53
  uintptr_t nfds;
54

55
  assert(loop->watchers != NULL);
56
  assert(fd >= 0);
57

58
  events = (struct epoll_event*) loop->watchers[loop->nwatchers];
59
  nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
60
  if (events != NULL)
61
    /* Invalidate events with same file descriptor */
62
    for (i = 0; i < nfds; i++)
63
      if (events[i].data.fd == fd)
64
        events[i].data.fd = -1;
65

66
  /* Remove the file descriptor from the epoll.
67
   * This avoids a problem where the same file description remains open
68
   * in another process, causing repeated junk epoll events.
69
   *
70
   * We pass in a dummy epoll_event, to work around a bug in old kernels.
71
   */
72
  if (loop->backend_fd >= 0) {
73
    /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that
74
     * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
75
     */
76
    memset(&dummy, 0, sizeof(dummy));
77
    epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
78
  }
79
}
80

81

82
int uv__io_check_fd(uv_loop_t* loop, int fd) {
83
  struct epoll_event e;
84
  int rc;
85

86
  memset(&e, 0, sizeof(e));
87
  e.events = POLLIN;
88
  e.data.fd = -1;
89

90
  rc = 0;
91
  if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
92
    if (errno != EEXIST)
93
      rc = UV__ERR(errno);
94

95
  if (rc == 0)
96
    if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
97
      abort();
98

99
  return rc;
100
}
101

102

103
void uv__io_poll(uv_loop_t* loop, int timeout) {
104
  /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes
105
   * effectively infinite on 32 bits architectures.  To avoid blocking
106
   * indefinitely, we cap the timeout and poll again if necessary.
107
   *
108
   * Note that "30 minutes" is a simplification because it depends on
109
   * the value of CONFIG_HZ.  The magic constant assumes CONFIG_HZ=1200,
110
   * that being the largest value I have seen in the wild (and only once.)
111
   */
112
  static const int max_safe_timeout = 1789569;
113
  static int no_epoll_pwait_cached;
114
  static int no_epoll_wait_cached;
115
  int no_epoll_pwait;
116
  int no_epoll_wait;
117
  struct epoll_event events[1024];
118
  struct epoll_event* pe;
119
  struct epoll_event e;
120
  int real_timeout;
121
  QUEUE* q;
122
  uv__io_t* w;
123
  sigset_t sigset;
124
  uint64_t sigmask;
125
  uint64_t base;
126
  int have_signals;
127
  int nevents;
128
  int count;
129
  int nfds;
130
  int fd;
131
  int op;
132
  int i;
133
  int user_timeout;
134
  int reset_timeout;
135

136
  if (loop->nfds == 0) {
137
    assert(QUEUE_EMPTY(&loop->watcher_queue));
138
    return;
139
  }
140

141
  memset(&e, 0, sizeof(e));
142

143
  while (!QUEUE_EMPTY(&loop->watcher_queue)) {
144
    q = QUEUE_HEAD(&loop->watcher_queue);
145
    QUEUE_REMOVE(q);
146
    QUEUE_INIT(q);
147

148
    w = QUEUE_DATA(q, uv__io_t, watcher_queue);
149
    assert(w->pevents != 0);
150
    assert(w->fd >= 0);
151
    assert(w->fd < (int) loop->nwatchers);
152

153
    e.events = w->pevents;
154
    e.data.fd = w->fd;
155

156
    if (w->events == 0)
157
      op = EPOLL_CTL_ADD;
158
    else
159
      op = EPOLL_CTL_MOD;
160

161
    /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
162
     * events, skip the syscall and squelch the events after epoll_wait().
163
     */
164
    if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
165
      if (errno != EEXIST)
166
        abort();
167

168
      assert(op == EPOLL_CTL_ADD);
169

170
      /* We've reactivated a file descriptor that's been watched before. */
171
      if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
172
        abort();
173
    }
174

175
    w->events = w->pevents;
176
  }
177

178
  sigmask = 0;
179
  if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
180
    sigemptyset(&sigset);
181
    sigaddset(&sigset, SIGPROF);
182
    sigmask |= 1 << (SIGPROF - 1);
183
  }
184

185
  assert(timeout >= -1);
186
  base = loop->time;
187
  count = 48; /* Benchmarks suggest this gives the best throughput. */
188
  real_timeout = timeout;
189

190
  if (uv__get_internal_fields(loop)->flags & UV_METRICS_IDLE_TIME) {
191
    reset_timeout = 1;
192
    user_timeout = timeout;
193
    timeout = 0;
194
  } else {
195
    reset_timeout = 0;
196
    user_timeout = 0;
197
  }
198

199
  /* You could argue there is a dependency between these two but
200
   * ultimately we don't care about their ordering with respect
201
   * to one another. Worst case, we make a few system calls that
202
   * could have been avoided because another thread already knows
203
   * they fail with ENOSYS. Hardly the end of the world.
204
   */
205
  no_epoll_pwait = uv__load_relaxed(&no_epoll_pwait_cached);
206
  no_epoll_wait = uv__load_relaxed(&no_epoll_wait_cached);
207

208
  for (;;) {
209
    /* Only need to set the provider_entry_time if timeout != 0. The function
210
     * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
211
     */
212
    if (timeout != 0)
213
      uv__metrics_set_provider_entry_time(loop);
214

215
    /* See the comment for max_safe_timeout for an explanation of why
216
     * this is necessary.  Executive summary: kernel bug workaround.
217
     */
218
    if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
219
      timeout = max_safe_timeout;
220

221
    if (sigmask != 0 && no_epoll_pwait != 0)
222
      if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
223
        abort();
224

225
    if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
226
      nfds = epoll_pwait(loop->backend_fd,
227
                         events,
228
                         ARRAY_SIZE(events),
229
                         timeout,
230
                         &sigset);
231
      if (nfds == -1 && errno == ENOSYS) {
232
        uv__store_relaxed(&no_epoll_pwait_cached, 1);
233
        no_epoll_pwait = 1;
234
      }
235
    } else {
236
      nfds = epoll_wait(loop->backend_fd,
237
                        events,
238
                        ARRAY_SIZE(events),
239
                        timeout);
240
      if (nfds == -1 && errno == ENOSYS) {
241
        uv__store_relaxed(&no_epoll_wait_cached, 1);
242
        no_epoll_wait = 1;
243
      }
244
    }
245

246
    if (sigmask != 0 && no_epoll_pwait != 0)
247
      if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
248
        abort();
249

250
    /* Update loop->time unconditionally. It's tempting to skip the update when
251
     * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
252
     * operating system didn't reschedule our process while in the syscall.
253
     */
254
    SAVE_ERRNO(uv__update_time(loop));
255

256
    if (nfds == 0) {
257
      assert(timeout != -1);
258

259
      if (reset_timeout != 0) {
260
        timeout = user_timeout;
261
        reset_timeout = 0;
262
      }
263

264
      if (timeout == -1)
265
        continue;
266

267
      if (timeout == 0)
268
        return;
269

270
      /* We may have been inside the system call for longer than |timeout|
271
       * milliseconds so we need to update the timestamp to avoid drift.
272
       */
273
      goto update_timeout;
274
    }
275

276
    if (nfds == -1) {
277
      if (errno == ENOSYS) {
278
        /* epoll_wait() or epoll_pwait() failed, try the other system call. */
279
        assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
280
        continue;
281
      }
282

283
      if (errno != EINTR)
284
        abort();
285

286
      if (reset_timeout != 0) {
287
        timeout = user_timeout;
288
        reset_timeout = 0;
289
      }
290

291
      if (timeout == -1)
292
        continue;
293

294
      if (timeout == 0)
295
        return;
296

297
      /* Interrupted by a signal. Update timeout and poll again. */
298
      goto update_timeout;
299
    }
300

301
    have_signals = 0;
302
    nevents = 0;
303

304
    {
305
      /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */
306
      union {
307
        struct epoll_event* events;
308
        uv__io_t* watchers;
309
      } x;
310

311
      x.events = events;
312
      assert(loop->watchers != NULL);
313
      loop->watchers[loop->nwatchers] = x.watchers;
314
      loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
315
    }
316

317
    for (i = 0; i < nfds; i++) {
318
      pe = events + i;
319
      fd = pe->data.fd;
320

321
      /* Skip invalidated events, see uv__platform_invalidate_fd */
322
      if (fd == -1)
323
        continue;
324

325
      assert(fd >= 0);
326
      assert((unsigned) fd < loop->nwatchers);
327

328
      w = loop->watchers[fd];
329

330
      if (w == NULL) {
331
        /* File descriptor that we've stopped watching, disarm it.
332
         *
333
         * Ignore all errors because we may be racing with another thread
334
         * when the file descriptor is closed.
335
         */
336
        epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
337
        continue;
338
      }
339

340
      /* Give users only events they're interested in. Prevents spurious
341
       * callbacks when previous callback invocation in this loop has stopped
342
       * the current watcher. Also, filters out events that users has not
343
       * requested us to watch.
344
       */
345
      pe->events &= w->pevents | POLLERR | POLLHUP;
346

347
      /* Work around an epoll quirk where it sometimes reports just the
348
       * EPOLLERR or EPOLLHUP event.  In order to force the event loop to
349
       * move forward, we merge in the read/write events that the watcher
350
       * is interested in; uv__read() and uv__write() will then deal with
351
       * the error or hangup in the usual fashion.
352
       *
353
       * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
354
       * reads the available data, calls uv_read_stop(), then sometime later
355
       * calls uv_read_start() again.  By then, libuv has forgotten about the
356
       * hangup and the kernel won't report EPOLLIN again because there's
357
       * nothing left to read.  If anything, libuv is to blame here.  The
358
       * current hack is just a quick bandaid; to properly fix it, libuv
359
       * needs to remember the error/hangup event.  We should get that for
360
       * free when we switch over to edge-triggered I/O.
361
       */
362
      if (pe->events == POLLERR || pe->events == POLLHUP)
363
        pe->events |=
364
          w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
365

366
      if (pe->events != 0) {
367
        /* Run signal watchers last.  This also affects child process watchers
368
         * because those are implemented in terms of signal watchers.
369
         */
370
        if (w == &loop->signal_io_watcher) {
371
          have_signals = 1;
372
        } else {
373
          uv__metrics_update_idle_time(loop);
374
          w->cb(loop, w, pe->events);
375
        }
376

377
        nevents++;
378
      }
379
    }
380

381
    if (reset_timeout != 0) {
382
      timeout = user_timeout;
383
      reset_timeout = 0;
384
    }
385

386
    if (have_signals != 0) {
387
      uv__metrics_update_idle_time(loop);
388
      loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
389
    }
390

391
    loop->watchers[loop->nwatchers] = NULL;
392
    loop->watchers[loop->nwatchers + 1] = NULL;
393

394
    if (have_signals != 0)
395
      return;  /* Event loop should cycle now so don't poll again. */
396

397
    if (nevents != 0) {
398
      if (nfds == ARRAY_SIZE(events) && --count != 0) {
399
        /* Poll for more events but don't block this time. */
400
        timeout = 0;
401
        continue;
402
      }
403
      return;
404
    }
405

406
    if (timeout == 0)
407
      return;
408

409
    if (timeout == -1)
410
      continue;
411

412
update_timeout:
413
    assert(timeout > 0);
414

415
    real_timeout -= (loop->time - base);
416
    if (real_timeout <= 0)
417
      return;
418

419
    timeout = real_timeout;
420
  }
421
}
422

423

424
Product

Resources

Company