Path: blob/main/crypto/openssl/ssl/quic/quic_reactor.c
108175 views
/*1* Copyright 2022-2026 The OpenSSL Project Authors. All Rights Reserved.2*3* Licensed under the Apache License 2.0 (the "License"). You may not use4* this file except in compliance with the License. You can obtain a copy5* in the file LICENSE in the source distribution or at6* https://www.openssl.org/source/license.html7*/8#include "internal/quic_reactor.h"9#include "internal/common.h"10#include "internal/thread_arch.h"11#include <assert.h>1213#if defined(OPENSSL_SYS_WINDOWS)14#include <winsock2.h>15#include <mstcpip.h>16#include <mswsock.h>17#endif1819/*20* Core I/O Reactor Framework21* ==========================22*/23static void rtor_notify_other_threads(QUIC_REACTOR *rtor);2425int ossl_quic_reactor_init(QUIC_REACTOR *rtor,26void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg,27uint32_t flags),28void *tick_cb_arg,29CRYPTO_MUTEX *mutex,30OSSL_TIME initial_tick_deadline,31uint64_t flags)32{33rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;34rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;35rtor->net_read_desired = 0;36rtor->net_write_desired = 0;37rtor->can_poll_r = 0;38rtor->can_poll_w = 0;39rtor->tick_deadline = initial_tick_deadline;4041rtor->tick_cb = tick_cb;42rtor->tick_cb_arg = tick_cb_arg;43rtor->mutex = mutex;4445rtor->cur_blocking_waiters = 0;4647if ((flags & QUIC_REACTOR_FLAG_USE_NOTIFIER) != 0) {48if (!ossl_rio_notifier_init(&rtor->notifier))49return 0;5051if ((rtor->notifier_cv = ossl_crypto_condvar_new()) == NULL) {52ossl_rio_notifier_cleanup(&rtor->notifier);53return 0;54}5556rtor->have_notifier = 1;57} else {58rtor->have_notifier = 0;59}6061return 1;62}6364void ossl_quic_reactor_cleanup(QUIC_REACTOR *rtor)65{66if (rtor == NULL)67return;6869if (rtor->have_notifier) {70ossl_rio_notifier_cleanup(&rtor->notifier);71rtor->have_notifier = 0;7273ossl_crypto_condvar_free(&rtor->notifier_cv);74}75}7677#if defined(OPENSSL_SYS_WINDOWS)78/*79* On Windows recvfrom() may return WSAECONNRESET when destination port80* used in preceding call to sendto() is no longer reachable. The reset81* error received from UDP socket takes the whole port down. This behavior82* must be suppressed for QUIC protocol so QUIC applications may rely on83* QUIC protocol itself to detect network failures.84*/85static void rtor_configure_winsock(BIO_POLL_DESCRIPTOR *bpd)86{87BOOL bNewBehavior = FALSE;88DWORD dwBytesReturned = 0;8990if (bpd->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD) {91WSAIoctl(bpd->value.fd, SIO_UDP_CONNRESET, &bNewBehavior,92sizeof(bNewBehavior), NULL, 0, &dwBytesReturned, NULL, NULL);93WSAIoctl(bpd->value.fd, SIO_UDP_NETRESET, &bNewBehavior,94sizeof(bNewBehavior), NULL, 0, &dwBytesReturned, NULL, NULL);95}96}97#endif9899void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)100{101if (r == NULL)102rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;103else104rtor->poll_r = *r;105106#if defined(OPENSSL_SYS_WINDOWS)107rtor_configure_winsock(&rtor->poll_r);108#endif109110rtor->can_poll_r111= ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_r);112}113114void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)115{116if (w == NULL)117rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;118else119rtor->poll_w = *w;120121#if defined(OPENSSL_SYS_WINDOWS)122rtor_configure_winsock(&rtor->poll_w);123#endif124125rtor->can_poll_w126= ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_w);127}128129const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(const QUIC_REACTOR *rtor)130{131return &rtor->poll_r;132}133134const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(const QUIC_REACTOR *rtor)135{136return &rtor->poll_w;137}138139int ossl_quic_reactor_can_support_poll_descriptor(const QUIC_REACTOR *rtor,140const BIO_POLL_DESCRIPTOR *d)141{142return d->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD;143}144145int ossl_quic_reactor_can_poll_r(const QUIC_REACTOR *rtor)146{147return rtor->can_poll_r;148}149150int ossl_quic_reactor_can_poll_w(const QUIC_REACTOR *rtor)151{152return rtor->can_poll_w;153}154155int ossl_quic_reactor_net_read_desired(QUIC_REACTOR *rtor)156{157return rtor->net_read_desired;158}159160int ossl_quic_reactor_net_write_desired(QUIC_REACTOR *rtor)161{162return rtor->net_write_desired;163}164165OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)166{167return rtor->tick_deadline;168}169170int ossl_quic_reactor_tick(QUIC_REACTOR *rtor, uint32_t flags)171{172QUIC_TICK_RESULT res = { 0 };173174/*175* Note that the tick callback cannot fail; this is intentional. Arguably it176* does not make that much sense for ticking to 'fail' (in the sense of an177* explicit error indicated to the user) because ticking is by its nature178* best effort. If something fatal happens with a connection we can report179* it on the next actual application I/O call.180*/181rtor->tick_cb(&res, rtor->tick_cb_arg, flags);182183rtor->net_read_desired = res.net_read_desired;184rtor->net_write_desired = res.net_write_desired;185rtor->tick_deadline = res.tick_deadline;186if (res.notify_other_threads)187rtor_notify_other_threads(rtor);188189return 1;190}191192RIO_NOTIFIER *ossl_quic_reactor_get0_notifier(QUIC_REACTOR *rtor)193{194return rtor->have_notifier ? &rtor->notifier : NULL;195}196197/*198* Blocking I/O Adaptation Layer199* =============================200*/201202/*203* Utility which can be used to poll on up to two FDs. This is designed to204* support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where205* different FDs are used for read and write).206*207* Generally use of poll(2) is preferred where available. Windows, however,208* hasn't traditionally offered poll(2), only select(2). WSAPoll() was209* introduced in Vista but has seemingly been buggy until relatively recent210* versions of Windows 10. Moreover we support XP so this is not a suitable211* target anyway. However, the traditional issues with select(2) turn out not to212* be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of213* FDs (and thus is limited in the magnitude of the FDs expressible), Windows214* select(2) is very different. In Windows, socket handles are not allocated215* contiguously from zero and thus this bitmap approach was infeasible. Thus in216* adapting the Berkeley sockets API to Windows a different approach was taken217* whereby the fd_set contains a fixed length array of socket handles and an218* integer indicating how many entries are valid; thus Windows select()219* ironically is actually much more like *NIX poll(2) than *NIX select(2). In220* any case, this means that the relevant limit for Windows select() is the221* number of FDs being polled, not the magnitude of those FDs. Since we only222* poll for two FDs here, this limit does not concern us.223*224* Usage: rfd and wfd may be the same or different. Either or both may also be225* -1. If rfd_want_read is 1, rfd is polled for readability, and if226* wfd_want_write is 1, wfd is polled for writability. Note that since any227* passed FD is always polled for error conditions, setting rfd_want_read=0 and228* wfd_want_write=0 is not the same as passing -1 for both FDs.229*230* deadline is a timestamp to return at. If it is ossl_time_infinite(), the call231* never times out.232*233* Returns 0 on error and 1 on success. Timeout expiry is considered a success234* condition. We don't elaborate our return values here because the way we are235* actually using this doesn't currently care.236*237* If mutex is non-NULL, it is assumed to be held for write and is unlocked for238* the duration of the call.239*240* Precondition: mutex is NULL or is held for write (unchecked)241* Postcondition: mutex is NULL or is held for write (unless242* CRYPTO_THREAD_write_lock fails)243*/244static int poll_two_fds(int rfd, int rfd_want_read,245int wfd, int wfd_want_write,246int notify_rfd,247OSSL_TIME deadline,248CRYPTO_MUTEX *mutex)249{250#if defined(OPENSSL_SYS_WINDOWS) || !defined(POLLIN)251fd_set rfd_set, wfd_set, efd_set;252OSSL_TIME now, timeout;253struct timeval tv, *ptv;254int maxfd, pres;255256#ifndef OPENSSL_SYS_WINDOWS257/*258* On Windows there is no relevant limit to the magnitude of a fd value (see259* above). On *NIX the fd_set uses a bitmap and we must check the limit.260*/261if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)262return 0;263#endif264265FD_ZERO(&rfd_set);266FD_ZERO(&wfd_set);267FD_ZERO(&efd_set);268269if (rfd != INVALID_SOCKET && rfd_want_read)270openssl_fdset(rfd, &rfd_set);271if (wfd != INVALID_SOCKET && wfd_want_write)272openssl_fdset(wfd, &wfd_set);273274/* Always check for error conditions. */275if (rfd != INVALID_SOCKET)276openssl_fdset(rfd, &efd_set);277if (wfd != INVALID_SOCKET)278openssl_fdset(wfd, &efd_set);279280/* Check for notifier FD readability. */281if (notify_rfd != INVALID_SOCKET) {282openssl_fdset(notify_rfd, &rfd_set);283openssl_fdset(notify_rfd, &efd_set);284}285286maxfd = rfd;287if (wfd > maxfd)288maxfd = wfd;289if (notify_rfd > maxfd)290maxfd = notify_rfd;291292if (!ossl_assert(rfd != INVALID_SOCKET || wfd != INVALID_SOCKET293|| !ossl_time_is_infinite(deadline)))294/* Do not block forever; should not happen. */295return 0;296297/*298* The mutex dance (unlock/re-locak after poll/seclect) is299* potentially problematic. This may create a situation when300* two threads arrive to select/poll with the same file301* descriptors. We just need to be aware of this.302*/303#if defined(OPENSSL_THREADS)304if (mutex != NULL)305ossl_crypto_mutex_unlock(mutex);306#endif307308do {309/*310* select expects a timeout, not a deadline, so do the conversion.311* Update for each call to ensure the correct value is used if we repeat312* due to EINTR.313*/314if (ossl_time_is_infinite(deadline)) {315ptv = NULL;316} else {317now = ossl_time_now();318/*319* ossl_time_subtract saturates to zero so we don't need to check if320* now > deadline.321*/322timeout = ossl_time_subtract(deadline, now);323tv = ossl_time_to_timeval(timeout);324ptv = &tv;325}326327pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);328} while (pres == -1 && get_last_socket_error_is_eintr());329330#if defined(OPENSSL_THREADS)331if (mutex != NULL)332ossl_crypto_mutex_lock(mutex);333#endif334335return pres < 0 ? 0 : 1;336#else337int pres, timeout_ms;338OSSL_TIME now, timeout;339struct pollfd pfds[3] = { 0 };340size_t npfd = 0;341342if (rfd == wfd) {343pfds[npfd].fd = rfd;344pfds[npfd].events = (rfd_want_read ? POLLIN : 0)345| (wfd_want_write ? POLLOUT : 0);346if (rfd >= 0 && pfds[npfd].events != 0)347++npfd;348} else {349pfds[npfd].fd = rfd;350pfds[npfd].events = (rfd_want_read ? POLLIN : 0);351if (rfd >= 0 && pfds[npfd].events != 0)352++npfd;353354pfds[npfd].fd = wfd;355pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);356if (wfd >= 0 && pfds[npfd].events != 0)357++npfd;358}359360if (notify_rfd >= 0) {361pfds[npfd].fd = notify_rfd;362pfds[npfd].events = POLLIN;363++npfd;364}365366if (!ossl_assert(npfd != 0 || !ossl_time_is_infinite(deadline)))367/* Do not block forever; should not happen. */368return 0;369370#if defined(OPENSSL_THREADS)371if (mutex != NULL)372ossl_crypto_mutex_unlock(mutex);373#endif374375do {376if (ossl_time_is_infinite(deadline)) {377timeout_ms = -1;378} else {379now = ossl_time_now();380timeout = ossl_time_subtract(deadline, now);381timeout_ms = ossl_time2ms(timeout);382}383384pres = poll(pfds, npfd, timeout_ms);385} while (pres == -1 && get_last_socket_error_is_eintr());386387#if defined(OPENSSL_THREADS)388if (mutex != NULL)389ossl_crypto_mutex_lock(mutex);390#endif391392return pres < 0 ? 0 : 1;393#endif394}395396static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)397{398if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {399*fd = INVALID_SOCKET;400return 1;401}402403if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD404|| d->value.fd == INVALID_SOCKET)405return 0;406407*fd = d->value.fd;408return 1;409}410411/*412* Poll up to two abstract poll descriptors, as well as an optional notify FD.413* Currently we only support poll descriptors which represent FDs.414*415* If mutex is non-NULL, it is assumed be a lock currently held for write and is416* unlocked for the duration of any wait.417*418* Precondition: mutex is NULL or is held for write (unchecked)419* Postcondition: mutex is NULL or is held for write (unless420* CRYPTO_THREAD_write_lock fails)421*/422static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,423const BIO_POLL_DESCRIPTOR *w, int w_want_write,424int notify_rfd,425OSSL_TIME deadline,426CRYPTO_MUTEX *mutex)427{428int rfd, wfd;429430if (!poll_descriptor_to_fd(r, &rfd)431|| !poll_descriptor_to_fd(w, &wfd))432return 0;433434return poll_two_fds(rfd, r_want_read, wfd, w_want_write,435notify_rfd, deadline, mutex);436}437438/*439* Notify other threads currently blocking in440* ossl_quic_reactor_block_until_pred() calls that a predicate they are using441* might now be met due to state changes.442*443* This function must be called after state changes which might cause a444* predicate in another thread to now be met (i.e., ticking). It is a no-op if445* inter-thread notification is not being used.446*447* The reactor mutex must be held while calling this function.448*/449static void rtor_notify_other_threads(QUIC_REACTOR *rtor)450{451if (!rtor->have_notifier)452return;453454/*455* This function is called when we have done anything on this thread which456* might allow a predicate for a block_until_pred call on another thread to457* now be met.458*459* When this happens, we need to wake those threads using the notifier.460* However, we do not want to wake *this* thread (if/when it subsequently461* enters block_until_pred) due to the notifier FD becoming readable.462* Therefore, signal the notifier, and use a CV to detect when all other463* threads have woken.464*/465466if (rtor->cur_blocking_waiters == 0)467/* Nothing to do in this case. */468return;469470/* Signal the notifier to wake up all threads. */471if (!rtor->signalled_notifier) {472ossl_rio_notifier_signal(&rtor->notifier);473rtor->signalled_notifier = 1;474}475476/*477* Wait on the CV until all threads have finished the first phase of the478* wakeup process and the last thread out has taken responsibility for479* unsignalling the notifier.480*/481while (rtor->signalled_notifier)482ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);483}484485/*486* Block until a predicate function evaluates to true.487*488* If mutex is non-NULL, it is assumed be a lock currently held for write and is489* unlocked for the duration of any wait.490*491* Precondition: Must hold channel write lock (unchecked)492* Precondition: mutex is NULL or is held for write (unchecked)493* Postcondition: mutex is NULL or is held for write (unless494* CRYPTO_THREAD_write_lock fails)495*/496int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,497int (*pred)(void *arg), void *pred_arg,498uint32_t flags)499{500int res, net_read_desired, net_write_desired, notifier_fd;501OSSL_TIME tick_deadline;502503notifier_fd504= (rtor->have_notifier ? ossl_rio_notifier_as_fd(&rtor->notifier)505: INVALID_SOCKET);506507for (;;) {508if ((flags & SKIP_FIRST_TICK) != 0)509flags &= ~SKIP_FIRST_TICK;510else511/* best effort */512ossl_quic_reactor_tick(rtor, 0);513514if ((res = pred(pred_arg)) != 0)515return res;516517net_read_desired = ossl_quic_reactor_net_read_desired(rtor);518net_write_desired = ossl_quic_reactor_net_write_desired(rtor);519tick_deadline = ossl_quic_reactor_get_tick_deadline(rtor);520if (!net_read_desired && !net_write_desired521&& ossl_time_is_infinite(tick_deadline))522/* Can't wait if there is nothing to wait for. */523return 0;524525ossl_quic_reactor_enter_blocking_section(rtor);526527res = poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),528net_read_desired,529ossl_quic_reactor_get_poll_w(rtor),530net_write_desired,531notifier_fd,532tick_deadline,533rtor->mutex);534535/*536* We have now exited the OS poller call. We may have537* (rtor->signalled_notifier), and other threads may still be blocking.538* This means that cur_blocking_waiters may still be non-zero. As such,539* we cannot unsignal the notifier until all threads have had an540* opportunity to wake up.541*542* At the same time, we cannot unsignal in the case where543* cur_blocking_waiters is now zero because this condition may not occur544* reliably. Consider the following scenario:545*546* T1 enters block_until_pred, cur_blocking_waiters -> 1547* T2 enters block_until_pred, cur_blocking_waiters -> 2548* T3 enters block_until_pred, cur_blocking_waiters -> 3549*550* T4 enters block_until_pred, does not block, ticks,551* sees that cur_blocking_waiters > 0 and signals the notifier552*553* T3 wakes, cur_blocking_waiters -> 2554* T3 predicate is not satisfied, cur_blocking_waiters -> 3, block again555*556* Notifier is still signalled, so T3 immediately wakes again557* and is stuck repeating the above steps.558*559* T1, T2 are also woken by the notifier but never see560* cur_blocking_waiters drop to 0, so never unsignal the notifier.561*562* As such, a two phase approach is chosen when designalling the563* notifier:564*565* First, all of the poll_two_descriptor calls on all threads are566* allowed to exit due to the notifier being signalled.567*568* Second, the thread which happened to be the one which decremented569* cur_blocking_waiters to 0 unsignals the notifier and is then570* responsible for broadcasting to a CV to indicate to the other571* threads that the synchronised wakeup has been completed. Other572* threads wait for this CV to be signalled.573*574*/575ossl_quic_reactor_leave_blocking_section(rtor);576577if (!res)578/*579* We don't actually care why the call succeeded (timeout, FD580* readiness), we just call reactor_tick and start trying to do I/O581* things again. If poll_two_fds returns 0, this is some other582* non-timeout failure and we should stop here.583*584* TODO(QUIC FUTURE): In the future we could avoid unnecessary585* syscalls by not retrying network I/O that isn't ready based586* on the result of the poll call. However this might be difficult587* because it requires we do the call to poll(2) or equivalent588* syscall ourselves, whereas in the general case the application589* does the polling and just calls SSL_handle_events().590* Implementing this optimisation in the future will probably591* therefore require API changes.592*/593return 0;594}595596return res;597}598599void ossl_quic_reactor_enter_blocking_section(QUIC_REACTOR *rtor)600{601++rtor->cur_blocking_waiters;602}603604void ossl_quic_reactor_leave_blocking_section(QUIC_REACTOR *rtor)605{606assert(rtor->cur_blocking_waiters > 0);607--rtor->cur_blocking_waiters;608609if (rtor->have_notifier && rtor->signalled_notifier) {610if (rtor->cur_blocking_waiters == 0) {611ossl_rio_notifier_unsignal(&rtor->notifier);612rtor->signalled_notifier = 0;613614/*615* Release the other threads which have woken up (and possibly616* rtor_notify_other_threads as well).617*/618ossl_crypto_condvar_broadcast(rtor->notifier_cv);619} else {620/* We are not the last waiter out - so wait for that one. */621while (rtor->signalled_notifier)622ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex);623}624}625}626627628