CoCalc -- quic_reactor.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/openssl/ssl/quic/quic_reactor.c
⁴⁸²⁶¹ views
1
/*
2
 * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved.
3
 *
4
 * Licensed under the Apache License 2.0 (the "License").  You may not use
5
 * this file except in compliance with the License.  You can obtain a copy
6
 * in the file LICENSE in the source distribution or at
7
 * https://www.openssl.org/source/license.html
8
 */
9
#include "internal/quic_reactor.h"
10
#include "internal/common.h"
11
#include "internal/thread_arch.h"
12
#include <assert.h>
13

14
/*
15
 * Core I/O Reactor Framework
16
 * ==========================
17
 */
18
static void rtor_notify_other_threads(QUIC_REACTOR *rtor);
19

20
int ossl_quic_reactor_init(QUIC_REACTOR *rtor,
21
                           void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg,
22
                                           uint32_t flags),
23
                           void *tick_cb_arg,
24
                           CRYPTO_MUTEX *mutex,
25
                           OSSL_TIME initial_tick_deadline,
26
                           uint64_t flags)
27
{
28
    rtor->poll_r.type       = BIO_POLL_DESCRIPTOR_TYPE_NONE;
29
    rtor->poll_w.type       = BIO_POLL_DESCRIPTOR_TYPE_NONE;
30
    rtor->net_read_desired  = 0;
31
    rtor->net_write_desired = 0;
32
    rtor->can_poll_r        = 0;
33
    rtor->can_poll_w        = 0;
34
    rtor->tick_deadline     = initial_tick_deadline;
35

36
    rtor->tick_cb           = tick_cb;
37
    rtor->tick_cb_arg       = tick_cb_arg;
38
    rtor->mutex             = mutex;
39

40
    rtor->cur_blocking_waiters = 0;
41

42
    if ((flags & QUIC_REACTOR_FLAG_USE_NOTIFIER) != 0) {
43
        if (!ossl_rio_notifier_init(&rtor->notifier))
44
            return 0;
45

46
        if ((rtor->notifier_cv = ossl_crypto_condvar_new()) == NULL) {
47
            ossl_rio_notifier_cleanup(&rtor->notifier);
48
            return 0;
49
        }
50

51
        rtor->have_notifier = 1;
52
    } else {
53
        rtor->have_notifier = 0;
54
    }
55

56
    return 1;
57
}
58

59
void ossl_quic_reactor_cleanup(QUIC_REACTOR *rtor)
60
{
61
    if (rtor == NULL)
62
        return;
63

64
    if (rtor->have_notifier) {
65
        ossl_rio_notifier_cleanup(&rtor->notifier);
66
        rtor->have_notifier = 0;
67

68
        ossl_crypto_condvar_free(&rtor->notifier_cv);
69
    }
70
}
71

72
void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)
73
{
74
    if (r == NULL)
75
        rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
76
    else
77
        rtor->poll_r = *r;
78

79
    rtor->can_poll_r
80
        = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_r);
81
}
82

83
void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)
84
{
85
    if (w == NULL)
86
        rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
87
    else
88
        rtor->poll_w = *w;
89

90
    rtor->can_poll_w
91
        = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_w);
92
}
93

94
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(const QUIC_REACTOR *rtor)
95
{
96
    return &rtor->poll_r;
97
}
98

99
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(const QUIC_REACTOR *rtor)
100
{
101
    return &rtor->poll_w;
102
}
103

104
int ossl_quic_reactor_can_support_poll_descriptor(const QUIC_REACTOR *rtor,
105
                                                  const BIO_POLL_DESCRIPTOR *d)
106
{
107
    return d->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD;
108
}
109

110
int ossl_quic_reactor_can_poll_r(const QUIC_REACTOR *rtor)
111
{
112
    return rtor->can_poll_r;
113
}
114

115
int ossl_quic_reactor_can_poll_w(const QUIC_REACTOR *rtor)
116
{
117
    return rtor->can_poll_w;
118
}
119

120
int ossl_quic_reactor_net_read_desired(QUIC_REACTOR *rtor)
121
{
122
    return rtor->net_read_desired;
123
}
124

125
int ossl_quic_reactor_net_write_desired(QUIC_REACTOR *rtor)
126
{
127
    return rtor->net_write_desired;
128
}
129

130
OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)
131
{
132
    return rtor->tick_deadline;
133
}
134

135
int ossl_quic_reactor_tick(QUIC_REACTOR *rtor, uint32_t flags)
136
{
137
    QUIC_TICK_RESULT res = {0};
138

139
    /*
140
     * Note that the tick callback cannot fail; this is intentional. Arguably it
141
     * does not make that much sense for ticking to 'fail' (in the sense of an
142
     * explicit error indicated to the user) because ticking is by its nature
143
     * best effort. If something fatal happens with a connection we can report
144
     * it on the next actual application I/O call.
145
     */
146
    rtor->tick_cb(&res, rtor->tick_cb_arg, flags);
147

148
    rtor->net_read_desired  = res.net_read_desired;
149
    rtor->net_write_desired = res.net_write_desired;
150
    rtor->tick_deadline     = res.tick_deadline;
151
    if (res.notify_other_threads)
152
        rtor_notify_other_threads(rtor);
153

154
    return 1;
155
}
156

157
RIO_NOTIFIER *ossl_quic_reactor_get0_notifier(QUIC_REACTOR *rtor)
158
{
159
    return rtor->have_notifier ? &rtor->notifier : NULL;
160
}
161

162
/*
163
 * Blocking I/O Adaptation Layer
164
 * =============================
165
 */
166

167
/*
168
 * Utility which can be used to poll on up to two FDs. This is designed to
169
 * support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where
170
 * different FDs are used for read and write).
171
 *
172
 * Generally use of poll(2) is preferred where available. Windows, however,
173
 * hasn't traditionally offered poll(2), only select(2). WSAPoll() was
174
 * introduced in Vista but has seemingly been buggy until relatively recent
175
 * versions of Windows 10. Moreover we support XP so this is not a suitable
176
 * target anyway. However, the traditional issues with select(2) turn out not to
177
 * be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of
178
 * FDs (and thus is limited in the magnitude of the FDs expressible), Windows
179
 * select(2) is very different. In Windows, socket handles are not allocated
180
 * contiguously from zero and thus this bitmap approach was infeasible. Thus in
181
 * adapting the Berkeley sockets API to Windows a different approach was taken
182
 * whereby the fd_set contains a fixed length array of socket handles and an
183
 * integer indicating how many entries are valid; thus Windows select()
184
 * ironically is actually much more like *NIX poll(2) than *NIX select(2). In
185
 * any case, this means that the relevant limit for Windows select() is the
186
 * number of FDs being polled, not the magnitude of those FDs. Since we only
187
 * poll for two FDs here, this limit does not concern us.
188
 *
189
 * Usage: rfd and wfd may be the same or different. Either or both may also be
190
 * -1. If rfd_want_read is 1, rfd is polled for readability, and if
191
 * wfd_want_write is 1, wfd is polled for writability. Note that since any
192
 * passed FD is always polled for error conditions, setting rfd_want_read=0 and
193
 * wfd_want_write=0 is not the same as passing -1 for both FDs.
194
 *
195
 * deadline is a timestamp to return at. If it is ossl_time_infinite(), the call
196
 * never times out.
197
 *
198
 * Returns 0 on error and 1 on success. Timeout expiry is considered a success
199
 * condition. We don't elaborate our return values here because the way we are
200
 * actually using this doesn't currently care.
201
 *
202
 * If mutex is non-NULL, it is assumed to be held for write and is unlocked for
203
 * the duration of the call.
204
 *
205
 * Precondition:   mutex is NULL or is held for write (unchecked)
206
 * Postcondition:  mutex is NULL or is held for write (unless
207
 *                   CRYPTO_THREAD_write_lock fails)
208
 */
209
static int poll_two_fds(int rfd, int rfd_want_read,
210
                        int wfd, int wfd_want_write,
211
                        int notify_rfd,
212
                        OSSL_TIME deadline,
213
                        CRYPTO_MUTEX *mutex)
214
{
215
#if defined(OPENSSL_SYS_WINDOWS) || !defined(POLLIN)
216
    fd_set rfd_set, wfd_set, efd_set;
217
    OSSL_TIME now, timeout;
218
    struct timeval tv, *ptv;
219
    int maxfd, pres;
220

221
# ifndef OPENSSL_SYS_WINDOWS
222
    /*
223
     * On Windows there is no relevant limit to the magnitude of a fd value (see
224
     * above). On *NIX the fd_set uses a bitmap and we must check the limit.
225
     */
226
    if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)
227
        return 0;
228
# endif
229

230
    FD_ZERO(&rfd_set);
231
    FD_ZERO(&wfd_set);
232
    FD_ZERO(&efd_set);
233

234
    if (rfd != INVALID_SOCKET && rfd_want_read)
235
        openssl_fdset(rfd, &rfd_set);
236
    if (wfd != INVALID_SOCKET && wfd_want_write)
237
        openssl_fdset(wfd, &wfd_set);
238

239
    /* Always check for error conditions. */
240
    if (rfd != INVALID_SOCKET)
241
        openssl_fdset(rfd, &efd_set);
242
    if (wfd != INVALID_SOCKET)
243
        openssl_fdset(wfd, &efd_set);
244

245
    /* Check for notifier FD readability. */
246
    if (notify_rfd != INVALID_SOCKET) {
247
        openssl_fdset(notify_rfd, &rfd_set);
248
        openssl_fdset(notify_rfd, &efd_set);
249
    }
250

251
    maxfd = rfd;
252
    if (wfd > maxfd)
253
        maxfd = wfd;
254
    if (notify_rfd > maxfd)
255
        maxfd = notify_rfd;
256

257
    if (!ossl_assert(rfd != INVALID_SOCKET || wfd != INVALID_SOCKET
258
                     || !ossl_time_is_infinite(deadline)))
259
        /* Do not block forever; should not happen. */
260
        return 0;
261

262
    /*
263
     * The mutex dance (unlock/re-locak after poll/seclect) is
264
     * potentially problematic. This may create a situation when
265
     * two threads arrive to select/poll with the same file
266
     * descriptors. We just need to be aware of this.
267
     */
268
# if defined(OPENSSL_THREADS)
269
    if (mutex != NULL)
270
        ossl_crypto_mutex_unlock(mutex);
271
# endif
272

273
    do {
274
        /*
275
         * select expects a timeout, not a deadline, so do the conversion.
276
         * Update for each call to ensure the correct value is used if we repeat
277
         * due to EINTR.
278
         */
279
        if (ossl_time_is_infinite(deadline)) {
280
            ptv = NULL;
281
        } else {
282
            now = ossl_time_now();
283
            /*
284
             * ossl_time_subtract saturates to zero so we don't need to check if
285
             * now > deadline.
286
             */
287
            timeout = ossl_time_subtract(deadline, now);
288
            tv      = ossl_time_to_timeval(timeout);
289
            ptv     = &tv;
290
        }
291

292
        pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);
293
    } while (pres == -1 && get_last_socket_error_is_eintr());
294

295
# if defined(OPENSSL_THREADS)
296
    if (mutex != NULL)
297
        ossl_crypto_mutex_lock(mutex);
298
# endif
299

300
    return pres < 0 ? 0 : 1;
301
#else
302
    int pres, timeout_ms;
303
    OSSL_TIME now, timeout;
304
    struct pollfd pfds[3] = {0};
305
    size_t npfd = 0;
306

307
    if (rfd == wfd) {
308
        pfds[npfd].fd = rfd;
309
        pfds[npfd].events = (rfd_want_read  ? POLLIN  : 0)
310
                          | (wfd_want_write ? POLLOUT : 0);
311
        if (rfd >= 0 && pfds[npfd].events != 0)
312
            ++npfd;
313
    } else {
314
        pfds[npfd].fd     = rfd;
315
        pfds[npfd].events = (rfd_want_read ? POLLIN : 0);
316
        if (rfd >= 0 && pfds[npfd].events != 0)
317
            ++npfd;
318

319
        pfds[npfd].fd     = wfd;
320
        pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);
321
        if (wfd >= 0 && pfds[npfd].events != 0)
322
            ++npfd;
323
    }
324

325
    if (notify_rfd >= 0) {
326
        pfds[npfd].fd       = notify_rfd;
327
        pfds[npfd].events   = POLLIN;
328
        ++npfd;
329
    }
330

331
    if (!ossl_assert(npfd != 0 || !ossl_time_is_infinite(deadline)))
332
        /* Do not block forever; should not happen. */
333
        return 0;
334

335
# if defined(OPENSSL_THREADS)
336
    if (mutex != NULL)
337
        ossl_crypto_mutex_unlock(mutex);
338
# endif
339

340
    do {
341
        if (ossl_time_is_infinite(deadline)) {
342
            timeout_ms = -1;
343
        } else {
344
            now         = ossl_time_now();
345
            timeout     = ossl_time_subtract(deadline, now);
346
            timeout_ms  = ossl_time2ms(timeout);
347
        }
348

349
        pres = poll(pfds, npfd, timeout_ms);
350
    } while (pres == -1 && get_last_socket_error_is_eintr());
351

352
# if defined(OPENSSL_THREADS)
353
    if (mutex != NULL)
354
        ossl_crypto_mutex_lock(mutex);
355
# endif
356

357
    return pres < 0 ? 0 : 1;
358
#endif
359
}
360

361
static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)
362
{
363
    if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {
364
        *fd = INVALID_SOCKET;
365
        return 1;
366
    }
367

368
    if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD
369
            || d->value.fd == INVALID_SOCKET)
370
        return 0;
371

372
    *fd = d->value.fd;
373
    return 1;
374
}
375

376
/*
377
 * Poll up to two abstract poll descriptors, as well as an optional notify FD.
378
 * Currently we only support poll descriptors which represent FDs.
379
 *
380
 * If mutex is non-NULL, it is assumed be a lock currently held for write and is
381
 * unlocked for the duration of any wait.
382
 *
383
 * Precondition:   mutex is NULL or is held for write (unchecked)
384
 * Postcondition:  mutex is NULL or is held for write (unless
385
 *                   CRYPTO_THREAD_write_lock fails)
386
 */
387
static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,
388
                                const BIO_POLL_DESCRIPTOR *w, int w_want_write,
389
                                int notify_rfd,
390
                                OSSL_TIME deadline,
391
                                CRYPTO_MUTEX *mutex)
392
{
393
    int rfd, wfd;
394

395
    if (!poll_descriptor_to_fd(r, &rfd)
396
        || !poll_descriptor_to_fd(w, &wfd))
397
        return 0;
398

399
    return poll_two_fds(rfd, r_want_read, wfd, w_want_write,
400
                        notify_rfd, deadline, mutex);
401
}
402

403
/*
404
 * Notify other threads currently blocking in
405
 * ossl_quic_reactor_block_until_pred() calls that a predicate they are using
406
 * might now be met due to state changes.
407
 *
408
 * This function must be called after state changes which might cause a
409
 * predicate in another thread to now be met (i.e., ticking). It is a no-op if
410
 * inter-thread notification is not being used.
411
 *
412
 * The reactor mutex must be held while calling this function.
413
 */
414
static void rtor_notify_other_threads(QUIC_REACTOR *rtor)
415
{
416
    if (!rtor->have_notifier)
417
        return;
418

419
    /*
420
     * This function is called when we have done anything on this thread which
421
     * might allow a predicate for a block_until_pred call on another thread to
422
     * now be met.
423
     *
424
     * When this happens, we need to wake those threads using the notifier.
425
     * However, we do not want to wake *this* thread (if/when it subsequently
426
     * enters block_until_pred) due to the notifier FD becoming readable.
427
     * Therefore, signal the notifier, and use a CV to detect when all other
428
     * threads have woken.
429
     */
430

431
   if (rtor->cur_blocking_waiters == 0)
432
       /* Nothing to do in this case. */
433
       return;
434

435
   /* Signal the notifier to wake up all threads. */
436
   if (!rtor->signalled_notifier) {
437
       ossl_rio_notifier_signal(&rtor->notifier);
438
       rtor->signalled_notifier = 1;
439
   }
440

441
   /*
442
    * Wait on the CV until all threads have finished the first phase of the
443
    * wakeup process and the last thread out has taken responsibility for
444
    * unsignalling the notifier.
445
    */
446
    while (rtor->signalled_notifier)
447
        ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);
448
}
449

450
/*
451
 * Block until a predicate function evaluates to true.
452
 *
453
 * If mutex is non-NULL, it is assumed be a lock currently held for write and is
454
 * unlocked for the duration of any wait.
455
 *
456
 * Precondition:   Must hold channel write lock (unchecked)
457
 * Precondition:   mutex is NULL or is held for write (unchecked)
458
 * Postcondition:  mutex is NULL or is held for write (unless
459
 *                   CRYPTO_THREAD_write_lock fails)
460
 */
461
int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
462
                                       int (*pred)(void *arg), void *pred_arg,
463
                                       uint32_t flags)
464
{
465
    int res, net_read_desired, net_write_desired, notifier_fd;
466
    OSSL_TIME tick_deadline;
467

468
    notifier_fd
469
        = (rtor->have_notifier ? ossl_rio_notifier_as_fd(&rtor->notifier)
470
                               : INVALID_SOCKET);
471

472
    for (;;) {
473
        if ((flags & SKIP_FIRST_TICK) != 0)
474
            flags &= ~SKIP_FIRST_TICK;
475
        else
476
            /* best effort */
477
            ossl_quic_reactor_tick(rtor, 0);
478

479
        if ((res = pred(pred_arg)) != 0)
480
            return res;
481

482
        net_read_desired  = ossl_quic_reactor_net_read_desired(rtor);
483
        net_write_desired = ossl_quic_reactor_net_write_desired(rtor);
484
        tick_deadline     = ossl_quic_reactor_get_tick_deadline(rtor);
485
        if (!net_read_desired && !net_write_desired
486
            && ossl_time_is_infinite(tick_deadline))
487
            /* Can't wait if there is nothing to wait for. */
488
            return 0;
489

490
        ossl_quic_reactor_enter_blocking_section(rtor);
491

492
        res = poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),
493
                                   net_read_desired,
494
                                   ossl_quic_reactor_get_poll_w(rtor),
495
                                   net_write_desired,
496
                                   notifier_fd,
497
                                   tick_deadline,
498
                                   rtor->mutex);
499

500
        /*
501
         * We have now exited the OS poller call. We may have
502
         * (rtor->signalled_notifier), and other threads may still be blocking.
503
         * This means that cur_blocking_waiters may still be non-zero. As such,
504
         * we cannot unsignal the notifier until all threads have had an
505
         * opportunity to wake up.
506
         *
507
         * At the same time, we cannot unsignal in the case where
508
         * cur_blocking_waiters is now zero because this condition may not occur
509
         * reliably. Consider the following scenario:
510
         *
511
         *   T1 enters block_until_pred, cur_blocking_waiters -> 1
512
         *   T2 enters block_until_pred, cur_blocking_waiters -> 2
513
         *   T3 enters block_until_pred, cur_blocking_waiters -> 3
514
         *
515
         *   T4 enters block_until_pred, does not block, ticks,
516
         *     sees that cur_blocking_waiters > 0 and signals the notifier
517
         *
518
         *   T3 wakes, cur_blocking_waiters -> 2
519
         *   T3 predicate is not satisfied, cur_blocking_waiters -> 3, block again
520
         *
521
         *   Notifier is still signalled, so T3 immediately wakes again
522
         *   and is stuck repeating the above steps.
523
         *
524
         *   T1, T2 are also woken by the notifier but never see
525
         *   cur_blocking_waiters drop to 0, so never unsignal the notifier.
526
         *
527
         * As such, a two phase approach is chosen when designalling the
528
         * notifier:
529
         *
530
         *   First, all of the poll_two_descriptor calls on all threads are
531
         *   allowed to exit due to the notifier being signalled.
532
         *
533
         *   Second, the thread which happened to be the one which decremented
534
         *   cur_blocking_waiters to 0 unsignals the notifier and is then
535
         *   responsible for broadcasting to a CV to indicate to the other
536
         *   threads that the synchronised wakeup has been completed. Other
537
         *   threads wait for this CV to be signalled.
538
         *
539
         */
540
        ossl_quic_reactor_leave_blocking_section(rtor);
541

542
        if (!res)
543
            /*
544
             * We don't actually care why the call succeeded (timeout, FD
545
             * readiness), we just call reactor_tick and start trying to do I/O
546
             * things again. If poll_two_fds returns 0, this is some other
547
             * non-timeout failure and we should stop here.
548
             *
549
             * TODO(QUIC FUTURE): In the future we could avoid unnecessary
550
             * syscalls by not retrying network I/O that isn't ready based
551
             * on the result of the poll call. However this might be difficult
552
             * because it requires we do the call to poll(2) or equivalent
553
             * syscall ourselves, whereas in the general case the application
554
             * does the polling and just calls SSL_handle_events().
555
             * Implementing this optimisation in the future will probably
556
             * therefore require API changes.
557
             */
558
            return 0;
559
    }
560

561
    return res;
562
}
563

564
void ossl_quic_reactor_enter_blocking_section(QUIC_REACTOR *rtor)
565
{
566
    ++rtor->cur_blocking_waiters;
567
}
568

569
void ossl_quic_reactor_leave_blocking_section(QUIC_REACTOR *rtor)
570
{
571
    assert(rtor->cur_blocking_waiters > 0);
572
    --rtor->cur_blocking_waiters;
573

574
    if (rtor->have_notifier && rtor->signalled_notifier) {
575
        if (rtor->cur_blocking_waiters == 0) {
576
            ossl_rio_notifier_unsignal(&rtor->notifier);
577
            rtor->signalled_notifier = 0;
578

579
            /*
580
             * Release the other threads which have woken up (and possibly
581
             * rtor_notify_other_threads as well).
582
             */
583
            ossl_crypto_condvar_broadcast(rtor->notifier_cv);
584
        } else {
585
            /* We are not the last waiter out - so wait for that one. */
586
            while (rtor->signalled_notifier)
587
                ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);
588
        }
589
    }
590
}
591

592
Product

Resources

Company