Path: blob/main/crypto/openssl/ssl/quic/quic_reactor.c
48261 views
/*1* Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved.2*3* Licensed under the Apache License 2.0 (the "License"). You may not use4* this file except in compliance with the License. You can obtain a copy5* in the file LICENSE in the source distribution or at6* https://www.openssl.org/source/license.html7*/8#include "internal/quic_reactor.h"9#include "internal/common.h"10#include "internal/thread_arch.h"11#include <assert.h>1213/*14* Core I/O Reactor Framework15* ==========================16*/17static void rtor_notify_other_threads(QUIC_REACTOR *rtor);1819int ossl_quic_reactor_init(QUIC_REACTOR *rtor,20void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg,21uint32_t flags),22void *tick_cb_arg,23CRYPTO_MUTEX *mutex,24OSSL_TIME initial_tick_deadline,25uint64_t flags)26{27rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;28rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;29rtor->net_read_desired = 0;30rtor->net_write_desired = 0;31rtor->can_poll_r = 0;32rtor->can_poll_w = 0;33rtor->tick_deadline = initial_tick_deadline;3435rtor->tick_cb = tick_cb;36rtor->tick_cb_arg = tick_cb_arg;37rtor->mutex = mutex;3839rtor->cur_blocking_waiters = 0;4041if ((flags & QUIC_REACTOR_FLAG_USE_NOTIFIER) != 0) {42if (!ossl_rio_notifier_init(&rtor->notifier))43return 0;4445if ((rtor->notifier_cv = ossl_crypto_condvar_new()) == NULL) {46ossl_rio_notifier_cleanup(&rtor->notifier);47return 0;48}4950rtor->have_notifier = 1;51} else {52rtor->have_notifier = 0;53}5455return 1;56}5758void ossl_quic_reactor_cleanup(QUIC_REACTOR *rtor)59{60if (rtor == NULL)61return;6263if (rtor->have_notifier) {64ossl_rio_notifier_cleanup(&rtor->notifier);65rtor->have_notifier = 0;6667ossl_crypto_condvar_free(&rtor->notifier_cv);68}69}7071void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)72{73if (r == NULL)74rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;75else76rtor->poll_r = *r;7778rtor->can_poll_r79= ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_r);80}8182void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)83{84if (w == NULL)85rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;86else87rtor->poll_w = *w;8889rtor->can_poll_w90= ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_w);91}9293const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(const QUIC_REACTOR *rtor)94{95return &rtor->poll_r;96}9798const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(const QUIC_REACTOR *rtor)99{100return &rtor->poll_w;101}102103int ossl_quic_reactor_can_support_poll_descriptor(const QUIC_REACTOR *rtor,104const BIO_POLL_DESCRIPTOR *d)105{106return d->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD;107}108109int ossl_quic_reactor_can_poll_r(const QUIC_REACTOR *rtor)110{111return rtor->can_poll_r;112}113114int ossl_quic_reactor_can_poll_w(const QUIC_REACTOR *rtor)115{116return rtor->can_poll_w;117}118119int ossl_quic_reactor_net_read_desired(QUIC_REACTOR *rtor)120{121return rtor->net_read_desired;122}123124int ossl_quic_reactor_net_write_desired(QUIC_REACTOR *rtor)125{126return rtor->net_write_desired;127}128129OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)130{131return rtor->tick_deadline;132}133134int ossl_quic_reactor_tick(QUIC_REACTOR *rtor, uint32_t flags)135{136QUIC_TICK_RESULT res = {0};137138/*139* Note that the tick callback cannot fail; this is intentional. Arguably it140* does not make that much sense for ticking to 'fail' (in the sense of an141* explicit error indicated to the user) because ticking is by its nature142* best effort. If something fatal happens with a connection we can report143* it on the next actual application I/O call.144*/145rtor->tick_cb(&res, rtor->tick_cb_arg, flags);146147rtor->net_read_desired = res.net_read_desired;148rtor->net_write_desired = res.net_write_desired;149rtor->tick_deadline = res.tick_deadline;150if (res.notify_other_threads)151rtor_notify_other_threads(rtor);152153return 1;154}155156RIO_NOTIFIER *ossl_quic_reactor_get0_notifier(QUIC_REACTOR *rtor)157{158return rtor->have_notifier ? &rtor->notifier : NULL;159}160161/*162* Blocking I/O Adaptation Layer163* =============================164*/165166/*167* Utility which can be used to poll on up to two FDs. This is designed to168* support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where169* different FDs are used for read and write).170*171* Generally use of poll(2) is preferred where available. Windows, however,172* hasn't traditionally offered poll(2), only select(2). WSAPoll() was173* introduced in Vista but has seemingly been buggy until relatively recent174* versions of Windows 10. Moreover we support XP so this is not a suitable175* target anyway. However, the traditional issues with select(2) turn out not to176* be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of177* FDs (and thus is limited in the magnitude of the FDs expressible), Windows178* select(2) is very different. In Windows, socket handles are not allocated179* contiguously from zero and thus this bitmap approach was infeasible. Thus in180* adapting the Berkeley sockets API to Windows a different approach was taken181* whereby the fd_set contains a fixed length array of socket handles and an182* integer indicating how many entries are valid; thus Windows select()183* ironically is actually much more like *NIX poll(2) than *NIX select(2). In184* any case, this means that the relevant limit for Windows select() is the185* number of FDs being polled, not the magnitude of those FDs. Since we only186* poll for two FDs here, this limit does not concern us.187*188* Usage: rfd and wfd may be the same or different. Either or both may also be189* -1. If rfd_want_read is 1, rfd is polled for readability, and if190* wfd_want_write is 1, wfd is polled for writability. Note that since any191* passed FD is always polled for error conditions, setting rfd_want_read=0 and192* wfd_want_write=0 is not the same as passing -1 for both FDs.193*194* deadline is a timestamp to return at. If it is ossl_time_infinite(), the call195* never times out.196*197* Returns 0 on error and 1 on success. Timeout expiry is considered a success198* condition. We don't elaborate our return values here because the way we are199* actually using this doesn't currently care.200*201* If mutex is non-NULL, it is assumed to be held for write and is unlocked for202* the duration of the call.203*204* Precondition: mutex is NULL or is held for write (unchecked)205* Postcondition: mutex is NULL or is held for write (unless206* CRYPTO_THREAD_write_lock fails)207*/208static int poll_two_fds(int rfd, int rfd_want_read,209int wfd, int wfd_want_write,210int notify_rfd,211OSSL_TIME deadline,212CRYPTO_MUTEX *mutex)213{214#if defined(OPENSSL_SYS_WINDOWS) || !defined(POLLIN)215fd_set rfd_set, wfd_set, efd_set;216OSSL_TIME now, timeout;217struct timeval tv, *ptv;218int maxfd, pres;219220# ifndef OPENSSL_SYS_WINDOWS221/*222* On Windows there is no relevant limit to the magnitude of a fd value (see223* above). On *NIX the fd_set uses a bitmap and we must check the limit.224*/225if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)226return 0;227# endif228229FD_ZERO(&rfd_set);230FD_ZERO(&wfd_set);231FD_ZERO(&efd_set);232233if (rfd != INVALID_SOCKET && rfd_want_read)234openssl_fdset(rfd, &rfd_set);235if (wfd != INVALID_SOCKET && wfd_want_write)236openssl_fdset(wfd, &wfd_set);237238/* Always check for error conditions. */239if (rfd != INVALID_SOCKET)240openssl_fdset(rfd, &efd_set);241if (wfd != INVALID_SOCKET)242openssl_fdset(wfd, &efd_set);243244/* Check for notifier FD readability. */245if (notify_rfd != INVALID_SOCKET) {246openssl_fdset(notify_rfd, &rfd_set);247openssl_fdset(notify_rfd, &efd_set);248}249250maxfd = rfd;251if (wfd > maxfd)252maxfd = wfd;253if (notify_rfd > maxfd)254maxfd = notify_rfd;255256if (!ossl_assert(rfd != INVALID_SOCKET || wfd != INVALID_SOCKET257|| !ossl_time_is_infinite(deadline)))258/* Do not block forever; should not happen. */259return 0;260261/*262* The mutex dance (unlock/re-locak after poll/seclect) is263* potentially problematic. This may create a situation when264* two threads arrive to select/poll with the same file265* descriptors. We just need to be aware of this.266*/267# if defined(OPENSSL_THREADS)268if (mutex != NULL)269ossl_crypto_mutex_unlock(mutex);270# endif271272do {273/*274* select expects a timeout, not a deadline, so do the conversion.275* Update for each call to ensure the correct value is used if we repeat276* due to EINTR.277*/278if (ossl_time_is_infinite(deadline)) {279ptv = NULL;280} else {281now = ossl_time_now();282/*283* ossl_time_subtract saturates to zero so we don't need to check if284* now > deadline.285*/286timeout = ossl_time_subtract(deadline, now);287tv = ossl_time_to_timeval(timeout);288ptv = &tv;289}290291pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);292} while (pres == -1 && get_last_socket_error_is_eintr());293294# if defined(OPENSSL_THREADS)295if (mutex != NULL)296ossl_crypto_mutex_lock(mutex);297# endif298299return pres < 0 ? 0 : 1;300#else301int pres, timeout_ms;302OSSL_TIME now, timeout;303struct pollfd pfds[3] = {0};304size_t npfd = 0;305306if (rfd == wfd) {307pfds[npfd].fd = rfd;308pfds[npfd].events = (rfd_want_read ? POLLIN : 0)309| (wfd_want_write ? POLLOUT : 0);310if (rfd >= 0 && pfds[npfd].events != 0)311++npfd;312} else {313pfds[npfd].fd = rfd;314pfds[npfd].events = (rfd_want_read ? POLLIN : 0);315if (rfd >= 0 && pfds[npfd].events != 0)316++npfd;317318pfds[npfd].fd = wfd;319pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);320if (wfd >= 0 && pfds[npfd].events != 0)321++npfd;322}323324if (notify_rfd >= 0) {325pfds[npfd].fd = notify_rfd;326pfds[npfd].events = POLLIN;327++npfd;328}329330if (!ossl_assert(npfd != 0 || !ossl_time_is_infinite(deadline)))331/* Do not block forever; should not happen. */332return 0;333334# if defined(OPENSSL_THREADS)335if (mutex != NULL)336ossl_crypto_mutex_unlock(mutex);337# endif338339do {340if (ossl_time_is_infinite(deadline)) {341timeout_ms = -1;342} else {343now = ossl_time_now();344timeout = ossl_time_subtract(deadline, now);345timeout_ms = ossl_time2ms(timeout);346}347348pres = poll(pfds, npfd, timeout_ms);349} while (pres == -1 && get_last_socket_error_is_eintr());350351# if defined(OPENSSL_THREADS)352if (mutex != NULL)353ossl_crypto_mutex_lock(mutex);354# endif355356return pres < 0 ? 0 : 1;357#endif358}359360static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)361{362if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {363*fd = INVALID_SOCKET;364return 1;365}366367if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD368|| d->value.fd == INVALID_SOCKET)369return 0;370371*fd = d->value.fd;372return 1;373}374375/*376* Poll up to two abstract poll descriptors, as well as an optional notify FD.377* Currently we only support poll descriptors which represent FDs.378*379* If mutex is non-NULL, it is assumed be a lock currently held for write and is380* unlocked for the duration of any wait.381*382* Precondition: mutex is NULL or is held for write (unchecked)383* Postcondition: mutex is NULL or is held for write (unless384* CRYPTO_THREAD_write_lock fails)385*/386static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,387const BIO_POLL_DESCRIPTOR *w, int w_want_write,388int notify_rfd,389OSSL_TIME deadline,390CRYPTO_MUTEX *mutex)391{392int rfd, wfd;393394if (!poll_descriptor_to_fd(r, &rfd)395|| !poll_descriptor_to_fd(w, &wfd))396return 0;397398return poll_two_fds(rfd, r_want_read, wfd, w_want_write,399notify_rfd, deadline, mutex);400}401402/*403* Notify other threads currently blocking in404* ossl_quic_reactor_block_until_pred() calls that a predicate they are using405* might now be met due to state changes.406*407* This function must be called after state changes which might cause a408* predicate in another thread to now be met (i.e., ticking). It is a no-op if409* inter-thread notification is not being used.410*411* The reactor mutex must be held while calling this function.412*/413static void rtor_notify_other_threads(QUIC_REACTOR *rtor)414{415if (!rtor->have_notifier)416return;417418/*419* This function is called when we have done anything on this thread which420* might allow a predicate for a block_until_pred call on another thread to421* now be met.422*423* When this happens, we need to wake those threads using the notifier.424* However, we do not want to wake *this* thread (if/when it subsequently425* enters block_until_pred) due to the notifier FD becoming readable.426* Therefore, signal the notifier, and use a CV to detect when all other427* threads have woken.428*/429430if (rtor->cur_blocking_waiters == 0)431/* Nothing to do in this case. */432return;433434/* Signal the notifier to wake up all threads. */435if (!rtor->signalled_notifier) {436ossl_rio_notifier_signal(&rtor->notifier);437rtor->signalled_notifier = 1;438}439440/*441* Wait on the CV until all threads have finished the first phase of the442* wakeup process and the last thread out has taken responsibility for443* unsignalling the notifier.444*/445while (rtor->signalled_notifier)446ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);447}448449/*450* Block until a predicate function evaluates to true.451*452* If mutex is non-NULL, it is assumed be a lock currently held for write and is453* unlocked for the duration of any wait.454*455* Precondition: Must hold channel write lock (unchecked)456* Precondition: mutex is NULL or is held for write (unchecked)457* Postcondition: mutex is NULL or is held for write (unless458* CRYPTO_THREAD_write_lock fails)459*/460int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,461int (*pred)(void *arg), void *pred_arg,462uint32_t flags)463{464int res, net_read_desired, net_write_desired, notifier_fd;465OSSL_TIME tick_deadline;466467notifier_fd468= (rtor->have_notifier ? ossl_rio_notifier_as_fd(&rtor->notifier)469: INVALID_SOCKET);470471for (;;) {472if ((flags & SKIP_FIRST_TICK) != 0)473flags &= ~SKIP_FIRST_TICK;474else475/* best effort */476ossl_quic_reactor_tick(rtor, 0);477478if ((res = pred(pred_arg)) != 0)479return res;480481net_read_desired = ossl_quic_reactor_net_read_desired(rtor);482net_write_desired = ossl_quic_reactor_net_write_desired(rtor);483tick_deadline = ossl_quic_reactor_get_tick_deadline(rtor);484if (!net_read_desired && !net_write_desired485&& ossl_time_is_infinite(tick_deadline))486/* Can't wait if there is nothing to wait for. */487return 0;488489ossl_quic_reactor_enter_blocking_section(rtor);490491res = poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),492net_read_desired,493ossl_quic_reactor_get_poll_w(rtor),494net_write_desired,495notifier_fd,496tick_deadline,497rtor->mutex);498499/*500* We have now exited the OS poller call. We may have501* (rtor->signalled_notifier), and other threads may still be blocking.502* This means that cur_blocking_waiters may still be non-zero. As such,503* we cannot unsignal the notifier until all threads have had an504* opportunity to wake up.505*506* At the same time, we cannot unsignal in the case where507* cur_blocking_waiters is now zero because this condition may not occur508* reliably. Consider the following scenario:509*510* T1 enters block_until_pred, cur_blocking_waiters -> 1511* T2 enters block_until_pred, cur_blocking_waiters -> 2512* T3 enters block_until_pred, cur_blocking_waiters -> 3513*514* T4 enters block_until_pred, does not block, ticks,515* sees that cur_blocking_waiters > 0 and signals the notifier516*517* T3 wakes, cur_blocking_waiters -> 2518* T3 predicate is not satisfied, cur_blocking_waiters -> 3, block again519*520* Notifier is still signalled, so T3 immediately wakes again521* and is stuck repeating the above steps.522*523* T1, T2 are also woken by the notifier but never see524* cur_blocking_waiters drop to 0, so never unsignal the notifier.525*526* As such, a two phase approach is chosen when designalling the527* notifier:528*529* First, all of the poll_two_descriptor calls on all threads are530* allowed to exit due to the notifier being signalled.531*532* Second, the thread which happened to be the one which decremented533* cur_blocking_waiters to 0 unsignals the notifier and is then534* responsible for broadcasting to a CV to indicate to the other535* threads that the synchronised wakeup has been completed. Other536* threads wait for this CV to be signalled.537*538*/539ossl_quic_reactor_leave_blocking_section(rtor);540541if (!res)542/*543* We don't actually care why the call succeeded (timeout, FD544* readiness), we just call reactor_tick and start trying to do I/O545* things again. If poll_two_fds returns 0, this is some other546* non-timeout failure and we should stop here.547*548* TODO(QUIC FUTURE): In the future we could avoid unnecessary549* syscalls by not retrying network I/O that isn't ready based550* on the result of the poll call. However this might be difficult551* because it requires we do the call to poll(2) or equivalent552* syscall ourselves, whereas in the general case the application553* does the polling and just calls SSL_handle_events().554* Implementing this optimisation in the future will probably555* therefore require API changes.556*/557return 0;558}559560return res;561}562563void ossl_quic_reactor_enter_blocking_section(QUIC_REACTOR *rtor)564{565++rtor->cur_blocking_waiters;566}567568void ossl_quic_reactor_leave_blocking_section(QUIC_REACTOR *rtor)569{570assert(rtor->cur_blocking_waiters > 0);571--rtor->cur_blocking_waiters;572573if (rtor->have_notifier && rtor->signalled_notifier) {574if (rtor->cur_blocking_waiters == 0) {575ossl_rio_notifier_unsignal(&rtor->notifier);576rtor->signalled_notifier = 0;577578/*579* Release the other threads which have woken up (and possibly580* rtor_notify_other_threads as well).581*/582ossl_crypto_condvar_broadcast(rtor->notifier_cv);583} else {584/* We are not the last waiter out - so wait for that one. */585while (rtor->signalled_notifier)586ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);587}588}589}590591592