Path: blob/master/src/java.base/windows/native/libnio/ch/wepoll.c
41134 views
/*1* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425/*26* This file is available under and governed by the GNU General Public27* License version 2 only, as published by the Free Software Foundation.28* However, the following notice accompanied the original version of this29* file and, per its terms, should not be removed:30*31* wepoll - epoll for Windows32* https://github.com/piscisaureus/wepoll33*34* Copyright 2012-2020, Bert Belder <[email protected]>35* All rights reserved.36*37* Redistribution and use in source and binary forms, with or without38* modification, are permitted provided that the following conditions are39* met:40*41* * Redistributions of source code must retain the above copyright42* notice, this list of conditions and the following disclaimer.43*44* * Redistributions in binary form must reproduce the above copyright45* notice, this list of conditions and the following disclaimer in the46* documentation and/or other materials provided with the distribution.47*48* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS49* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT50* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR51* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT52* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,53* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT54* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,55* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY56* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT57* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE58* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.59*/6061#ifndef WEPOLL_EXPORT62#define WEPOLL_EXPORT63#endif6465#include <stdint.h>6667enum EPOLL_EVENTS {68EPOLLIN = (int) (1U << 0),69EPOLLPRI = (int) (1U << 1),70EPOLLOUT = (int) (1U << 2),71EPOLLERR = (int) (1U << 3),72EPOLLHUP = (int) (1U << 4),73EPOLLRDNORM = (int) (1U << 6),74EPOLLRDBAND = (int) (1U << 7),75EPOLLWRNORM = (int) (1U << 8),76EPOLLWRBAND = (int) (1U << 9),77EPOLLMSG = (int) (1U << 10), /* Never reported. */78EPOLLRDHUP = (int) (1U << 13),79EPOLLONESHOT = (int) (1U << 31)80};8182#define EPOLLIN (1U << 0)83#define EPOLLPRI (1U << 1)84#define EPOLLOUT (1U << 2)85#define EPOLLERR (1U << 3)86#define EPOLLHUP (1U << 4)87#define EPOLLRDNORM (1U << 6)88#define EPOLLRDBAND (1U << 7)89#define EPOLLWRNORM (1U << 8)90#define EPOLLWRBAND (1U << 9)91#define EPOLLMSG (1U << 10)92#define EPOLLRDHUP (1U << 13)93#define EPOLLONESHOT (1U << 31)9495#define EPOLL_CTL_ADD 196#define EPOLL_CTL_MOD 297#define EPOLL_CTL_DEL 39899typedef void* HANDLE;100typedef uintptr_t SOCKET;101102typedef union epoll_data {103void* ptr;104int fd;105uint32_t u32;106uint64_t u64;107SOCKET sock; /* Windows specific */108HANDLE hnd; /* Windows specific */109} epoll_data_t;110111struct epoll_event {112uint32_t events; /* Epoll events and flags */113epoll_data_t data; /* User data variable */114};115116#ifdef __cplusplus117extern "C" {118#endif119120WEPOLL_EXPORT HANDLE epoll_create(int size);121WEPOLL_EXPORT HANDLE epoll_create1(int flags);122123WEPOLL_EXPORT int epoll_close(HANDLE ephnd);124125WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd,126int op,127SOCKET sock,128struct epoll_event* event);129130WEPOLL_EXPORT int epoll_wait(HANDLE ephnd,131struct epoll_event* events,132int maxevents,133int timeout);134135#ifdef __cplusplus136} /* extern "C" */137#endif138139#include <assert.h>140141#include <stdlib.h>142143#define WEPOLL_INTERNAL static144#define WEPOLL_INTERNAL_EXTERN static145146#if defined(__clang__)147#pragma clang diagnostic push148#pragma clang diagnostic ignored "-Wnonportable-system-include-path"149#pragma clang diagnostic ignored "-Wreserved-id-macro"150#elif defined(_MSC_VER)151#pragma warning(push, 1)152#endif153154#undef WIN32_LEAN_AND_MEAN155#define WIN32_LEAN_AND_MEAN156157#undef _WIN32_WINNT158#define _WIN32_WINNT 0x0600159160#include <winsock2.h>161#include <ws2tcpip.h>162#include <windows.h>163164#if defined(__clang__)165#pragma clang diagnostic pop166#elif defined(_MSC_VER)167#pragma warning(pop)168#endif169170WEPOLL_INTERNAL int nt_global_init(void);171172typedef LONG NTSTATUS;173typedef NTSTATUS* PNTSTATUS;174175#ifndef NT_SUCCESS176#define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0)177#endif178179#ifndef STATUS_SUCCESS180#define STATUS_SUCCESS ((NTSTATUS) 0x00000000L)181#endif182183#ifndef STATUS_PENDING184#define STATUS_PENDING ((NTSTATUS) 0x00000103L)185#endif186187#ifndef STATUS_CANCELLED188#define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L)189#endif190191#ifndef STATUS_NOT_FOUND192#define STATUS_NOT_FOUND ((NTSTATUS) 0xC0000225L)193#endif194195typedef struct _IO_STATUS_BLOCK {196NTSTATUS Status;197ULONG_PTR Information;198} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;199200typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext,201PIO_STATUS_BLOCK IoStatusBlock,202ULONG Reserved);203204typedef struct _UNICODE_STRING {205USHORT Length;206USHORT MaximumLength;207PWSTR Buffer;208} UNICODE_STRING, *PUNICODE_STRING;209210#define RTL_CONSTANT_STRING(s) \211{ sizeof(s) - sizeof((s)[0]), sizeof(s), s }212213typedef struct _OBJECT_ATTRIBUTES {214ULONG Length;215HANDLE RootDirectory;216PUNICODE_STRING ObjectName;217ULONG Attributes;218PVOID SecurityDescriptor;219PVOID SecurityQualityOfService;220} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;221222#define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \223{ sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL }224225#ifndef FILE_OPEN226#define FILE_OPEN 0x00000001UL227#endif228229#define KEYEDEVENT_WAIT 0x00000001UL230#define KEYEDEVENT_WAKE 0x00000002UL231#define KEYEDEVENT_ALL_ACCESS \232(STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE)233234#define NT_NTDLL_IMPORT_LIST(X) \235X(NTSTATUS, \236NTAPI, \237NtCancelIoFileEx, \238(HANDLE FileHandle, \239PIO_STATUS_BLOCK IoRequestToCancel, \240PIO_STATUS_BLOCK IoStatusBlock)) \241\242X(NTSTATUS, \243NTAPI, \244NtCreateFile, \245(PHANDLE FileHandle, \246ACCESS_MASK DesiredAccess, \247POBJECT_ATTRIBUTES ObjectAttributes, \248PIO_STATUS_BLOCK IoStatusBlock, \249PLARGE_INTEGER AllocationSize, \250ULONG FileAttributes, \251ULONG ShareAccess, \252ULONG CreateDisposition, \253ULONG CreateOptions, \254PVOID EaBuffer, \255ULONG EaLength)) \256\257X(NTSTATUS, \258NTAPI, \259NtCreateKeyedEvent, \260(PHANDLE KeyedEventHandle, \261ACCESS_MASK DesiredAccess, \262POBJECT_ATTRIBUTES ObjectAttributes, \263ULONG Flags)) \264\265X(NTSTATUS, \266NTAPI, \267NtDeviceIoControlFile, \268(HANDLE FileHandle, \269HANDLE Event, \270PIO_APC_ROUTINE ApcRoutine, \271PVOID ApcContext, \272PIO_STATUS_BLOCK IoStatusBlock, \273ULONG IoControlCode, \274PVOID InputBuffer, \275ULONG InputBufferLength, \276PVOID OutputBuffer, \277ULONG OutputBufferLength)) \278\279X(NTSTATUS, \280NTAPI, \281NtReleaseKeyedEvent, \282(HANDLE KeyedEventHandle, \283PVOID KeyValue, \284BOOLEAN Alertable, \285PLARGE_INTEGER Timeout)) \286\287X(NTSTATUS, \288NTAPI, \289NtWaitForKeyedEvent, \290(HANDLE KeyedEventHandle, \291PVOID KeyValue, \292BOOLEAN Alertable, \293PLARGE_INTEGER Timeout)) \294\295X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status))296297#define X(return_type, attributes, name, parameters) \298WEPOLL_INTERNAL_EXTERN return_type(attributes* name) parameters;299NT_NTDLL_IMPORT_LIST(X)300#undef X301302#define AFD_POLL_RECEIVE 0x0001303#define AFD_POLL_RECEIVE_EXPEDITED 0x0002304#define AFD_POLL_SEND 0x0004305#define AFD_POLL_DISCONNECT 0x0008306#define AFD_POLL_ABORT 0x0010307#define AFD_POLL_LOCAL_CLOSE 0x0020308#define AFD_POLL_ACCEPT 0x0080309#define AFD_POLL_CONNECT_FAIL 0x0100310311typedef struct _AFD_POLL_HANDLE_INFO {312HANDLE Handle;313ULONG Events;314NTSTATUS Status;315} AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO;316317typedef struct _AFD_POLL_INFO {318LARGE_INTEGER Timeout;319ULONG NumberOfHandles;320ULONG Exclusive;321AFD_POLL_HANDLE_INFO Handles[1];322} AFD_POLL_INFO, *PAFD_POLL_INFO;323324WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle,325HANDLE* afd_device_handle_out);326327WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle,328AFD_POLL_INFO* poll_info,329IO_STATUS_BLOCK* io_status_block);330WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle,331IO_STATUS_BLOCK* io_status_block);332333#define return_map_error(value) \334do { \335err_map_win_error(); \336return (value); \337} while (0)338339#define return_set_error(value, error) \340do { \341err_set_win_error(error); \342return (value); \343} while (0)344345WEPOLL_INTERNAL void err_map_win_error(void);346WEPOLL_INTERNAL void err_set_win_error(DWORD error);347WEPOLL_INTERNAL int err_check_handle(HANDLE handle);348349#define IOCTL_AFD_POLL 0x00012024350351static UNICODE_STRING afd__device_name =352RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll");353354static OBJECT_ATTRIBUTES afd__device_attributes =355RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0);356357int afd_create_device_handle(HANDLE iocp_handle,358HANDLE* afd_device_handle_out) {359HANDLE afd_device_handle;360IO_STATUS_BLOCK iosb;361NTSTATUS status;362363/* By opening \Device\Afd without specifying any extended attributes, we'll364* get a handle that lets us talk to the AFD driver, but that doesn't have an365* associated endpoint (so it's not a socket). */366status = NtCreateFile(&afd_device_handle,367SYNCHRONIZE,368&afd__device_attributes,369&iosb,370NULL,3710,372FILE_SHARE_READ | FILE_SHARE_WRITE,373FILE_OPEN,3740,375NULL,3760);377if (status != STATUS_SUCCESS)378return_set_error(-1, RtlNtStatusToDosError(status));379380if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL)381goto error;382383if (!SetFileCompletionNotificationModes(afd_device_handle,384FILE_SKIP_SET_EVENT_ON_HANDLE))385goto error;386387*afd_device_handle_out = afd_device_handle;388return 0;389390error:391CloseHandle(afd_device_handle);392return_map_error(-1);393}394395int afd_poll(HANDLE afd_device_handle,396AFD_POLL_INFO* poll_info,397IO_STATUS_BLOCK* io_status_block) {398NTSTATUS status;399400/* Blocking operation is not supported. */401assert(io_status_block != NULL);402403io_status_block->Status = STATUS_PENDING;404status = NtDeviceIoControlFile(afd_device_handle,405NULL,406NULL,407io_status_block,408io_status_block,409IOCTL_AFD_POLL,410poll_info,411sizeof *poll_info,412poll_info,413sizeof *poll_info);414415if (status == STATUS_SUCCESS)416return 0;417else if (status == STATUS_PENDING)418return_set_error(-1, ERROR_IO_PENDING);419else420return_set_error(-1, RtlNtStatusToDosError(status));421}422423int afd_cancel_poll(HANDLE afd_device_handle,424IO_STATUS_BLOCK* io_status_block) {425NTSTATUS cancel_status;426IO_STATUS_BLOCK cancel_iosb;427428/* If the poll operation has already completed or has been cancelled earlier,429* there's nothing left for us to do. */430if (io_status_block->Status != STATUS_PENDING)431return 0;432433cancel_status =434NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb);435436/* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed437* just before calling NtCancelIoFileEx(). This is not an error. */438if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND)439return 0;440else441return_set_error(-1, RtlNtStatusToDosError(cancel_status));442}443444WEPOLL_INTERNAL int epoll_global_init(void);445446WEPOLL_INTERNAL int init(void);447448typedef struct port_state port_state_t;449typedef struct queue queue_t;450typedef struct sock_state sock_state_t;451typedef struct ts_tree_node ts_tree_node_t;452453WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_handle_out);454WEPOLL_INTERNAL int port_close(port_state_t* port_state);455WEPOLL_INTERNAL int port_delete(port_state_t* port_state);456457WEPOLL_INTERNAL int port_wait(port_state_t* port_state,458struct epoll_event* events,459int maxevents,460int timeout);461462WEPOLL_INTERNAL int port_ctl(port_state_t* port_state,463int op,464SOCKET sock,465struct epoll_event* ev);466467WEPOLL_INTERNAL int port_register_socket(port_state_t* port_state,468sock_state_t* sock_state,469SOCKET socket);470WEPOLL_INTERNAL void port_unregister_socket(port_state_t* port_state,471sock_state_t* sock_state);472WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state,473SOCKET socket);474475WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state,476sock_state_t* sock_state);477WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state,478sock_state_t* sock_state);479480WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state,481sock_state_t* sock_state);482WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state,483sock_state_t* sock_state);484485WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state);486WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state);487488WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node(489ts_tree_node_t* tree_node);490WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node(491port_state_t* port_state);492493/* The reflock is a special kind of lock that normally prevents a chunk of494* memory from being freed, but does allow the chunk of memory to eventually be495* released in a coordinated fashion.496*497* Under normal operation, threads increase and decrease the reference count,498* which are wait-free operations.499*500* Exactly once during the reflock's lifecycle, a thread holding a reference to501* the lock may "destroy" the lock; this operation blocks until all other502* threads holding a reference to the lock have dereferenced it. After503* "destroy" returns, the calling thread may assume that no other threads have504* a reference to the lock.505*506* Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has507* been called is invalid and results in undefined behavior. Therefore the user508* should use another lock to guarantee that this can't happen.509*/510511typedef struct reflock {512volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */513} reflock_t;514515WEPOLL_INTERNAL int reflock_global_init(void);516517WEPOLL_INTERNAL void reflock_init(reflock_t* reflock);518WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock);519WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock);520WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock);521522#include <stdbool.h>523524/* N.b.: the tree functions do not set errno or LastError when they fail. Each525* of the API functions has at most one failure mode. It is up to the caller to526* set an appropriate error code when necessary. */527528typedef struct tree tree_t;529typedef struct tree_node tree_node_t;530531typedef struct tree {532tree_node_t* root;533} tree_t;534535typedef struct tree_node {536tree_node_t* left;537tree_node_t* right;538tree_node_t* parent;539uintptr_t key;540bool red;541} tree_node_t;542543WEPOLL_INTERNAL void tree_init(tree_t* tree);544WEPOLL_INTERNAL void tree_node_init(tree_node_t* node);545546WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key);547WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node);548549WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key);550WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree);551552typedef struct ts_tree {553tree_t tree;554SRWLOCK lock;555} ts_tree_t;556557typedef struct ts_tree_node {558tree_node_t tree_node;559reflock_t reflock;560} ts_tree_node_t;561562WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl);563WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node);564565WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree,566ts_tree_node_t* node,567uintptr_t key);568569WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree,570uintptr_t key);571WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree,572uintptr_t key);573574WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node);575WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node);576577static ts_tree_t epoll__handle_tree;578579int epoll_global_init(void) {580ts_tree_init(&epoll__handle_tree);581return 0;582}583584static HANDLE epoll__create(void) {585port_state_t* port_state;586HANDLE ephnd;587ts_tree_node_t* tree_node;588589if (init() < 0)590return NULL;591592port_state = port_new(&ephnd);593if (port_state == NULL)594return NULL;595596tree_node = port_state_to_handle_tree_node(port_state);597if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) {598/* This should never happen. */599port_delete(port_state);600return_set_error(NULL, ERROR_ALREADY_EXISTS);601}602603return ephnd;604}605606HANDLE epoll_create(int size) {607if (size <= 0)608return_set_error(NULL, ERROR_INVALID_PARAMETER);609610return epoll__create();611}612613HANDLE epoll_create1(int flags) {614if (flags != 0)615return_set_error(NULL, ERROR_INVALID_PARAMETER);616617return epoll__create();618}619620int epoll_close(HANDLE ephnd) {621ts_tree_node_t* tree_node;622port_state_t* port_state;623624if (init() < 0)625return -1;626627tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);628if (tree_node == NULL) {629err_set_win_error(ERROR_INVALID_PARAMETER);630goto err;631}632633port_state = port_state_from_handle_tree_node(tree_node);634port_close(port_state);635636ts_tree_node_unref_and_destroy(tree_node);637638return port_delete(port_state);639640err:641err_check_handle(ephnd);642return -1;643}644645int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) {646ts_tree_node_t* tree_node;647port_state_t* port_state;648int r;649650if (init() < 0)651return -1;652653tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);654if (tree_node == NULL) {655err_set_win_error(ERROR_INVALID_PARAMETER);656goto err;657}658659port_state = port_state_from_handle_tree_node(tree_node);660r = port_ctl(port_state, op, sock, ev);661662ts_tree_node_unref(tree_node);663664if (r < 0)665goto err;666667return 0;668669err:670/* On Linux, in the case of epoll_ctl(), EBADF takes priority over other671* errors. Wepoll mimics this behavior. */672err_check_handle(ephnd);673err_check_handle((HANDLE) sock);674return -1;675}676677int epoll_wait(HANDLE ephnd,678struct epoll_event* events,679int maxevents,680int timeout) {681ts_tree_node_t* tree_node;682port_state_t* port_state;683int num_events;684685if (maxevents <= 0)686return_set_error(-1, ERROR_INVALID_PARAMETER);687688if (init() < 0)689return -1;690691tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);692if (tree_node == NULL) {693err_set_win_error(ERROR_INVALID_PARAMETER);694goto err;695}696697port_state = port_state_from_handle_tree_node(tree_node);698num_events = port_wait(port_state, events, maxevents, timeout);699700ts_tree_node_unref(tree_node);701702if (num_events < 0)703goto err;704705return num_events;706707err:708err_check_handle(ephnd);709return -1;710}711712#include <errno.h>713714#define ERR__ERRNO_MAPPINGS(X) \715X(ERROR_ACCESS_DENIED, EACCES) \716X(ERROR_ALREADY_EXISTS, EEXIST) \717X(ERROR_BAD_COMMAND, EACCES) \718X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \719X(ERROR_BAD_LENGTH, EACCES) \720X(ERROR_BAD_NETPATH, ENOENT) \721X(ERROR_BAD_NET_NAME, ENOENT) \722X(ERROR_BAD_NET_RESP, ENETDOWN) \723X(ERROR_BAD_PATHNAME, ENOENT) \724X(ERROR_BROKEN_PIPE, EPIPE) \725X(ERROR_CANNOT_MAKE, EACCES) \726X(ERROR_COMMITMENT_LIMIT, ENOMEM) \727X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \728X(ERROR_CONNECTION_ACTIVE, EISCONN) \729X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \730X(ERROR_CRC, EACCES) \731X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \732X(ERROR_DISK_FULL, ENOSPC) \733X(ERROR_DUP_NAME, EADDRINUSE) \734X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \735X(ERROR_FILE_NOT_FOUND, ENOENT) \736X(ERROR_GEN_FAILURE, EACCES) \737X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \738X(ERROR_HOST_DOWN, EHOSTUNREACH) \739X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \740X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \741X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \742X(ERROR_INVALID_FUNCTION, EINVAL) \743X(ERROR_INVALID_HANDLE, EBADF) \744X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \745X(ERROR_INVALID_PARAMETER, EINVAL) \746X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \747X(ERROR_IO_PENDING, EINPROGRESS) \748X(ERROR_LOCK_VIOLATION, EACCES) \749X(ERROR_MORE_DATA, EMSGSIZE) \750X(ERROR_NETNAME_DELETED, ECONNABORTED) \751X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \752X(ERROR_NETWORK_BUSY, ENETDOWN) \753X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \754X(ERROR_NOACCESS, EFAULT) \755X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \756X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \757X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \758X(ERROR_NOT_FOUND, ENOENT) \759X(ERROR_NOT_LOCKED, EACCES) \760X(ERROR_NOT_READY, EACCES) \761X(ERROR_NOT_SAME_DEVICE, EXDEV) \762X(ERROR_NOT_SUPPORTED, ENOTSUP) \763X(ERROR_NO_MORE_FILES, ENOENT) \764X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \765X(ERROR_OPERATION_ABORTED, EINTR) \766X(ERROR_OUT_OF_PAPER, EACCES) \767X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \768X(ERROR_PAGEFILE_QUOTA, ENOMEM) \769X(ERROR_PATH_NOT_FOUND, ENOENT) \770X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \771X(ERROR_PORT_UNREACHABLE, ECONNRESET) \772X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \773X(ERROR_REM_NOT_LIST, ECONNREFUSED) \774X(ERROR_REQUEST_ABORTED, EINTR) \775X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \776X(ERROR_SECTOR_NOT_FOUND, EACCES) \777X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \778X(ERROR_SHARING_VIOLATION, EACCES) \779X(ERROR_TOO_MANY_NAMES, ENOMEM) \780X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \781X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \782X(ERROR_WAIT_NO_CHILDREN, ECHILD) \783X(ERROR_WORKING_SET_QUOTA, ENOMEM) \784X(ERROR_WRITE_PROTECT, EACCES) \785X(ERROR_WRONG_DISK, EACCES) \786X(WSAEACCES, EACCES) \787X(WSAEADDRINUSE, EADDRINUSE) \788X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \789X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \790X(WSAECONNABORTED, ECONNABORTED) \791X(WSAECONNREFUSED, ECONNREFUSED) \792X(WSAECONNRESET, ECONNRESET) \793X(WSAEDISCON, EPIPE) \794X(WSAEFAULT, EFAULT) \795X(WSAEHOSTDOWN, EHOSTUNREACH) \796X(WSAEHOSTUNREACH, EHOSTUNREACH) \797X(WSAEINPROGRESS, EBUSY) \798X(WSAEINTR, EINTR) \799X(WSAEINVAL, EINVAL) \800X(WSAEISCONN, EISCONN) \801X(WSAEMSGSIZE, EMSGSIZE) \802X(WSAENETDOWN, ENETDOWN) \803X(WSAENETRESET, EHOSTUNREACH) \804X(WSAENETUNREACH, ENETUNREACH) \805X(WSAENOBUFS, ENOMEM) \806X(WSAENOTCONN, ENOTCONN) \807X(WSAENOTSOCK, ENOTSOCK) \808X(WSAEOPNOTSUPP, EOPNOTSUPP) \809X(WSAEPROCLIM, ENOMEM) \810X(WSAESHUTDOWN, EPIPE) \811X(WSAETIMEDOUT, ETIMEDOUT) \812X(WSAEWOULDBLOCK, EWOULDBLOCK) \813X(WSANOTINITIALISED, ENETDOWN) \814X(WSASYSNOTREADY, ENETDOWN) \815X(WSAVERNOTSUPPORTED, ENOSYS)816817static errno_t err__map_win_error_to_errno(DWORD error) {818switch (error) {819#define X(error_sym, errno_sym) \820case error_sym: \821return errno_sym;822ERR__ERRNO_MAPPINGS(X)823#undef X824}825return EINVAL;826}827828void err_map_win_error(void) {829errno = err__map_win_error_to_errno(GetLastError());830}831832void err_set_win_error(DWORD error) {833SetLastError(error);834errno = err__map_win_error_to_errno(error);835}836837int err_check_handle(HANDLE handle) {838DWORD flags;839840/* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check841* for this condition explicitly. */842if (handle == INVALID_HANDLE_VALUE)843return_set_error(-1, ERROR_INVALID_HANDLE);844845if (!GetHandleInformation(handle, &flags))846return_map_error(-1);847848return 0;849}850851#include <stddef.h>852853#define array_count(a) (sizeof(a) / (sizeof((a)[0])))854855#define container_of(ptr, type, member) \856((type*) ((uintptr_t) (ptr) - offsetof(type, member)))857858#define unused_var(v) ((void) (v))859860/* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */861#if defined(_MSC_VER) && _MSC_VER < 1900862#define inline __inline863#endif864865WEPOLL_INTERNAL int ws_global_init(void);866WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket);867868static bool init__done = false;869static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT;870871static BOOL CALLBACK init__once_callback(INIT_ONCE* once,872void* parameter,873void** context) {874unused_var(once);875unused_var(parameter);876unused_var(context);877878/* N.b. that initialization order matters here. */879if (ws_global_init() < 0 || nt_global_init() < 0 ||880reflock_global_init() < 0 || epoll_global_init() < 0)881return FALSE;882883init__done = true;884return TRUE;885}886887int init(void) {888if (!init__done &&889!InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL))890/* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any891* error code when the once-callback returns FALSE. We return -1 here to892* indicate that global initialization failed; the failing init function is893* resposible for setting `errno` and calling `SetLastError()`. */894return -1;895896return 0;897}898899/* Set up a workaround for the following problem:900* FARPROC addr = GetProcAddress(...);901* MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error.902* MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error.903* To compile cleanly with either compiler, do casts with this "bridge" type:904* MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */905#ifdef __GNUC__906typedef void* nt__fn_ptr_cast_t;907#else908typedef FARPROC nt__fn_ptr_cast_t;909#endif910911#define X(return_type, attributes, name, parameters) \912WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL;913NT_NTDLL_IMPORT_LIST(X)914#undef X915916int nt_global_init(void) {917HMODULE ntdll;918FARPROC fn_ptr;919920ntdll = GetModuleHandleW(L"ntdll.dll");921if (ntdll == NULL)922return -1;923924#define X(return_type, attributes, name, parameters) \925fn_ptr = GetProcAddress(ntdll, #name); \926if (fn_ptr == NULL) \927return -1; \928name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr;929NT_NTDLL_IMPORT_LIST(X)930#undef X931932return 0;933}934935#include <string.h>936937typedef struct poll_group poll_group_t;938939typedef struct queue_node queue_node_t;940941WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port);942WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group);943944WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group);945946WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node(947queue_node_t* queue_node);948WEPOLL_INTERNAL HANDLE949poll_group_get_afd_device_handle(poll_group_t* poll_group);950951typedef struct queue_node {952queue_node_t* prev;953queue_node_t* next;954} queue_node_t;955956typedef struct queue {957queue_node_t head;958} queue_t;959960WEPOLL_INTERNAL void queue_init(queue_t* queue);961WEPOLL_INTERNAL void queue_node_init(queue_node_t* node);962963WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue);964WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue);965966WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node);967WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node);968WEPOLL_INTERNAL void queue_move_to_start(queue_t* queue, queue_node_t* node);969WEPOLL_INTERNAL void queue_move_to_end(queue_t* queue, queue_node_t* node);970WEPOLL_INTERNAL void queue_remove(queue_node_t* node);971972WEPOLL_INTERNAL bool queue_is_empty(const queue_t* queue);973WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t* node);974975#define POLL_GROUP__MAX_GROUP_SIZE 32976977typedef struct poll_group {978port_state_t* port_state;979queue_node_t queue_node;980HANDLE afd_device_handle;981size_t group_size;982} poll_group_t;983984static poll_group_t* poll_group__new(port_state_t* port_state) {985HANDLE iocp_handle = port_get_iocp_handle(port_state);986queue_t* poll_group_queue = port_get_poll_group_queue(port_state);987988poll_group_t* poll_group = malloc(sizeof *poll_group);989if (poll_group == NULL)990return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);991992memset(poll_group, 0, sizeof *poll_group);993994queue_node_init(&poll_group->queue_node);995poll_group->port_state = port_state;996997if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) <9980) {999free(poll_group);1000return NULL;1001}10021003queue_append(poll_group_queue, &poll_group->queue_node);10041005return poll_group;1006}10071008void poll_group_delete(poll_group_t* poll_group) {1009assert(poll_group->group_size == 0);1010CloseHandle(poll_group->afd_device_handle);1011queue_remove(&poll_group->queue_node);1012free(poll_group);1013}10141015poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) {1016return container_of(queue_node, poll_group_t, queue_node);1017}10181019HANDLE poll_group_get_afd_device_handle(poll_group_t* poll_group) {1020return poll_group->afd_device_handle;1021}10221023poll_group_t* poll_group_acquire(port_state_t* port_state) {1024queue_t* poll_group_queue = port_get_poll_group_queue(port_state);1025poll_group_t* poll_group =1026!queue_is_empty(poll_group_queue)1027? container_of(1028queue_last(poll_group_queue), poll_group_t, queue_node)1029: NULL;10301031if (poll_group == NULL ||1032poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE)1033poll_group = poll_group__new(port_state);1034if (poll_group == NULL)1035return NULL;10361037if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE)1038queue_move_to_start(poll_group_queue, &poll_group->queue_node);10391040return poll_group;1041}10421043void poll_group_release(poll_group_t* poll_group) {1044port_state_t* port_state = poll_group->port_state;1045queue_t* poll_group_queue = port_get_poll_group_queue(port_state);10461047poll_group->group_size--;1048assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE);10491050queue_move_to_end(poll_group_queue, &poll_group->queue_node);10511052/* Poll groups are currently only freed when the epoll port is closed. */1053}10541055WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state,1056SOCKET socket);1057WEPOLL_INTERNAL void sock_delete(port_state_t* port_state,1058sock_state_t* sock_state);1059WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state,1060sock_state_t* sock_state);10611062WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state,1063sock_state_t* sock_state,1064const struct epoll_event* ev);10651066WEPOLL_INTERNAL int sock_update(port_state_t* port_state,1067sock_state_t* sock_state);1068WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state,1069IO_STATUS_BLOCK* io_status_block,1070struct epoll_event* ev);10711072WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node(1073queue_node_t* queue_node);1074WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node(1075sock_state_t* sock_state);1076WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node(1077tree_node_t* tree_node);1078WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state);10791080#define PORT__MAX_ON_STACK_COMPLETIONS 25610811082typedef struct port_state {1083HANDLE iocp_handle;1084tree_t sock_tree;1085queue_t sock_update_queue;1086queue_t sock_deleted_queue;1087queue_t poll_group_queue;1088ts_tree_node_t handle_tree_node;1089CRITICAL_SECTION lock;1090size_t active_poll_count;1091} port_state_t;10921093static inline port_state_t* port__alloc(void) {1094port_state_t* port_state = malloc(sizeof *port_state);1095if (port_state == NULL)1096return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);10971098return port_state;1099}11001101static inline void port__free(port_state_t* port) {1102assert(port != NULL);1103free(port);1104}11051106static inline HANDLE port__create_iocp(void) {1107HANDLE iocp_handle =1108CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);1109if (iocp_handle == NULL)1110return_map_error(NULL);11111112return iocp_handle;1113}11141115port_state_t* port_new(HANDLE* iocp_handle_out) {1116port_state_t* port_state;1117HANDLE iocp_handle;11181119port_state = port__alloc();1120if (port_state == NULL)1121goto err1;11221123iocp_handle = port__create_iocp();1124if (iocp_handle == NULL)1125goto err2;11261127memset(port_state, 0, sizeof *port_state);11281129port_state->iocp_handle = iocp_handle;1130tree_init(&port_state->sock_tree);1131queue_init(&port_state->sock_update_queue);1132queue_init(&port_state->sock_deleted_queue);1133queue_init(&port_state->poll_group_queue);1134ts_tree_node_init(&port_state->handle_tree_node);1135InitializeCriticalSection(&port_state->lock);11361137*iocp_handle_out = iocp_handle;1138return port_state;11391140err2:1141port__free(port_state);1142err1:1143return NULL;1144}11451146static inline int port__close_iocp(port_state_t* port_state) {1147HANDLE iocp_handle = port_state->iocp_handle;1148port_state->iocp_handle = NULL;11491150if (!CloseHandle(iocp_handle))1151return_map_error(-1);11521153return 0;1154}11551156int port_close(port_state_t* port_state) {1157int result;11581159EnterCriticalSection(&port_state->lock);1160result = port__close_iocp(port_state);1161LeaveCriticalSection(&port_state->lock);11621163return result;1164}11651166int port_delete(port_state_t* port_state) {1167tree_node_t* tree_node;1168queue_node_t* queue_node;11691170/* At this point the IOCP port should have been closed. */1171assert(port_state->iocp_handle == NULL);11721173while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) {1174sock_state_t* sock_state = sock_state_from_tree_node(tree_node);1175sock_force_delete(port_state, sock_state);1176}11771178while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) {1179sock_state_t* sock_state = sock_state_from_queue_node(queue_node);1180sock_force_delete(port_state, sock_state);1181}11821183while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) {1184poll_group_t* poll_group = poll_group_from_queue_node(queue_node);1185poll_group_delete(poll_group);1186}11871188assert(queue_is_empty(&port_state->sock_update_queue));11891190DeleteCriticalSection(&port_state->lock);11911192port__free(port_state);11931194return 0;1195}11961197static int port__update_events(port_state_t* port_state) {1198queue_t* sock_update_queue = &port_state->sock_update_queue;11991200/* Walk the queue, submitting new poll requests for every socket that needs1201* it. */1202while (!queue_is_empty(sock_update_queue)) {1203queue_node_t* queue_node = queue_first(sock_update_queue);1204sock_state_t* sock_state = sock_state_from_queue_node(queue_node);12051206if (sock_update(port_state, sock_state) < 0)1207return -1;12081209/* sock_update() removes the socket from the update queue. */1210}12111212return 0;1213}12141215static inline void port__update_events_if_polling(port_state_t* port_state) {1216if (port_state->active_poll_count > 0)1217port__update_events(port_state);1218}12191220static inline int port__feed_events(port_state_t* port_state,1221struct epoll_event* epoll_events,1222OVERLAPPED_ENTRY* iocp_events,1223DWORD iocp_event_count) {1224int epoll_event_count = 0;1225DWORD i;12261227for (i = 0; i < iocp_event_count; i++) {1228IO_STATUS_BLOCK* io_status_block =1229(IO_STATUS_BLOCK*) iocp_events[i].lpOverlapped;1230struct epoll_event* ev = &epoll_events[epoll_event_count];12311232epoll_event_count += sock_feed_event(port_state, io_status_block, ev);1233}12341235return epoll_event_count;1236}12371238static inline int port__poll(port_state_t* port_state,1239struct epoll_event* epoll_events,1240OVERLAPPED_ENTRY* iocp_events,1241DWORD maxevents,1242DWORD timeout) {1243DWORD completion_count;12441245if (port__update_events(port_state) < 0)1246return -1;12471248port_state->active_poll_count++;12491250LeaveCriticalSection(&port_state->lock);12511252BOOL r = GetQueuedCompletionStatusEx(port_state->iocp_handle,1253iocp_events,1254maxevents,1255&completion_count,1256timeout,1257FALSE);12581259EnterCriticalSection(&port_state->lock);12601261port_state->active_poll_count--;12621263if (!r)1264return_map_error(-1);12651266return port__feed_events(1267port_state, epoll_events, iocp_events, completion_count);1268}12691270int port_wait(port_state_t* port_state,1271struct epoll_event* events,1272int maxevents,1273int timeout) {1274OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS];1275OVERLAPPED_ENTRY* iocp_events;1276uint64_t due = 0;1277DWORD gqcs_timeout;1278int result;12791280/* Check whether `maxevents` is in range. */1281if (maxevents <= 0)1282return_set_error(-1, ERROR_INVALID_PARAMETER);12831284/* Decide whether the IOCP completion list can live on the stack, or allocate1285* memory for it on the heap. */1286if ((size_t) maxevents <= array_count(stack_iocp_events)) {1287iocp_events = stack_iocp_events;1288} else if ((iocp_events =1289malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) {1290iocp_events = stack_iocp_events;1291maxevents = array_count(stack_iocp_events);1292}12931294/* Compute the timeout for GetQueuedCompletionStatus, and the wait end1295* time, if the user specified a timeout other than zero or infinite. */1296if (timeout > 0) {1297due = GetTickCount64() + (uint64_t) timeout;1298gqcs_timeout = (DWORD) timeout;1299} else if (timeout == 0) {1300gqcs_timeout = 0;1301} else {1302gqcs_timeout = INFINITE;1303}13041305EnterCriticalSection(&port_state->lock);13061307/* Dequeue completion packets until either at least one interesting event1308* has been discovered, or the timeout is reached. */1309for (;;) {1310uint64_t now;13111312result = port__poll(1313port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout);1314if (result < 0 || result > 0)1315break; /* Result, error, or time-out. */13161317if (timeout < 0)1318continue; /* When timeout is negative, never time out. */13191320/* Update time. */1321now = GetTickCount64();13221323/* Do not allow the due time to be in the past. */1324if (now >= due) {1325SetLastError(WAIT_TIMEOUT);1326break;1327}13281329/* Recompute time-out argument for GetQueuedCompletionStatus. */1330gqcs_timeout = (DWORD)(due - now);1331}13321333port__update_events_if_polling(port_state);13341335LeaveCriticalSection(&port_state->lock);13361337if (iocp_events != stack_iocp_events)1338free(iocp_events);13391340if (result >= 0)1341return result;1342else if (GetLastError() == WAIT_TIMEOUT)1343return 0;1344else1345return -1;1346}13471348static inline int port__ctl_add(port_state_t* port_state,1349SOCKET sock,1350struct epoll_event* ev) {1351sock_state_t* sock_state = sock_new(port_state, sock);1352if (sock_state == NULL)1353return -1;13541355if (sock_set_event(port_state, sock_state, ev) < 0) {1356sock_delete(port_state, sock_state);1357return -1;1358}13591360port__update_events_if_polling(port_state);13611362return 0;1363}13641365static inline int port__ctl_mod(port_state_t* port_state,1366SOCKET sock,1367struct epoll_event* ev) {1368sock_state_t* sock_state = port_find_socket(port_state, sock);1369if (sock_state == NULL)1370return -1;13711372if (sock_set_event(port_state, sock_state, ev) < 0)1373return -1;13741375port__update_events_if_polling(port_state);13761377return 0;1378}13791380static inline int port__ctl_del(port_state_t* port_state, SOCKET sock) {1381sock_state_t* sock_state = port_find_socket(port_state, sock);1382if (sock_state == NULL)1383return -1;13841385sock_delete(port_state, sock_state);13861387return 0;1388}13891390static inline int port__ctl_op(port_state_t* port_state,1391int op,1392SOCKET sock,1393struct epoll_event* ev) {1394switch (op) {1395case EPOLL_CTL_ADD:1396return port__ctl_add(port_state, sock, ev);1397case EPOLL_CTL_MOD:1398return port__ctl_mod(port_state, sock, ev);1399case EPOLL_CTL_DEL:1400return port__ctl_del(port_state, sock);1401default:1402return_set_error(-1, ERROR_INVALID_PARAMETER);1403}1404}14051406int port_ctl(port_state_t* port_state,1407int op,1408SOCKET sock,1409struct epoll_event* ev) {1410int result;14111412EnterCriticalSection(&port_state->lock);1413result = port__ctl_op(port_state, op, sock, ev);1414LeaveCriticalSection(&port_state->lock);14151416return result;1417}14181419int port_register_socket(port_state_t* port_state,1420sock_state_t* sock_state,1421SOCKET socket) {1422if (tree_add(&port_state->sock_tree,1423sock_state_to_tree_node(sock_state),1424socket) < 0)1425return_set_error(-1, ERROR_ALREADY_EXISTS);1426return 0;1427}14281429void port_unregister_socket(port_state_t* port_state,1430sock_state_t* sock_state) {1431tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state));1432}14331434sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) {1435tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket);1436if (tree_node == NULL)1437return_set_error(NULL, ERROR_NOT_FOUND);1438return sock_state_from_tree_node(tree_node);1439}14401441void port_request_socket_update(port_state_t* port_state,1442sock_state_t* sock_state) {1443if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))1444return;1445queue_append(&port_state->sock_update_queue,1446sock_state_to_queue_node(sock_state));1447}14481449void port_cancel_socket_update(port_state_t* port_state,1450sock_state_t* sock_state) {1451unused_var(port_state);1452if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))1453return;1454queue_remove(sock_state_to_queue_node(sock_state));1455}14561457void port_add_deleted_socket(port_state_t* port_state,1458sock_state_t* sock_state) {1459if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))1460return;1461queue_append(&port_state->sock_deleted_queue,1462sock_state_to_queue_node(sock_state));1463}14641465void port_remove_deleted_socket(port_state_t* port_state,1466sock_state_t* sock_state) {1467unused_var(port_state);1468if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))1469return;1470queue_remove(sock_state_to_queue_node(sock_state));1471}14721473HANDLE port_get_iocp_handle(port_state_t* port_state) {1474assert(port_state->iocp_handle != NULL);1475return port_state->iocp_handle;1476}14771478queue_t* port_get_poll_group_queue(port_state_t* port_state) {1479return &port_state->poll_group_queue;1480}14811482port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) {1483return container_of(tree_node, port_state_t, handle_tree_node);1484}14851486ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) {1487return &port_state->handle_tree_node;1488}14891490void queue_init(queue_t* queue) {1491queue_node_init(&queue->head);1492}14931494void queue_node_init(queue_node_t* node) {1495node->prev = node;1496node->next = node;1497}14981499static inline void queue__detach_node(queue_node_t* node) {1500node->prev->next = node->next;1501node->next->prev = node->prev;1502}15031504queue_node_t* queue_first(const queue_t* queue) {1505return !queue_is_empty(queue) ? queue->head.next : NULL;1506}15071508queue_node_t* queue_last(const queue_t* queue) {1509return !queue_is_empty(queue) ? queue->head.prev : NULL;1510}15111512void queue_prepend(queue_t* queue, queue_node_t* node) {1513node->next = queue->head.next;1514node->prev = &queue->head;1515node->next->prev = node;1516queue->head.next = node;1517}15181519void queue_append(queue_t* queue, queue_node_t* node) {1520node->next = &queue->head;1521node->prev = queue->head.prev;1522node->prev->next = node;1523queue->head.prev = node;1524}15251526void queue_move_to_start(queue_t* queue, queue_node_t* node) {1527queue__detach_node(node);1528queue_prepend(queue, node);1529}15301531void queue_move_to_end(queue_t* queue, queue_node_t* node) {1532queue__detach_node(node);1533queue_append(queue, node);1534}15351536void queue_remove(queue_node_t* node) {1537queue__detach_node(node);1538queue_node_init(node);1539}15401541bool queue_is_empty(const queue_t* queue) {1542return !queue_is_enqueued(&queue->head);1543}15441545bool queue_is_enqueued(const queue_node_t* node) {1546return node->prev != node;1547}15481549#define REFLOCK__REF ((long) 0x00000001UL)1550#define REFLOCK__REF_MASK ((long) 0x0fffffffUL)1551#define REFLOCK__DESTROY ((long) 0x10000000UL)1552#define REFLOCK__DESTROY_MASK ((long) 0xf0000000UL)1553#define REFLOCK__POISON ((long) 0x300dead0UL)15541555static HANDLE reflock__keyed_event = NULL;15561557int reflock_global_init(void) {1558NTSTATUS status = NtCreateKeyedEvent(1559&reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0);1560if (status != STATUS_SUCCESS)1561return_set_error(-1, RtlNtStatusToDosError(status));1562return 0;1563}15641565void reflock_init(reflock_t* reflock) {1566reflock->state = 0;1567}15681569static void reflock__signal_event(void* address) {1570NTSTATUS status =1571NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL);1572if (status != STATUS_SUCCESS)1573abort();1574}15751576static void reflock__await_event(void* address) {1577NTSTATUS status =1578NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL);1579if (status != STATUS_SUCCESS)1580abort();1581}15821583void reflock_ref(reflock_t* reflock) {1584long state = InterlockedAdd(&reflock->state, REFLOCK__REF);15851586/* Verify that the counter didn't overflow and the lock isn't destroyed. */1587assert((state & REFLOCK__DESTROY_MASK) == 0);1588unused_var(state);1589}15901591void reflock_unref(reflock_t* reflock) {1592long state = InterlockedAdd(&reflock->state, -REFLOCK__REF);15931594/* Verify that the lock was referenced and not already destroyed. */1595assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0);15961597if (state == REFLOCK__DESTROY)1598reflock__signal_event(reflock);1599}16001601void reflock_unref_and_destroy(reflock_t* reflock) {1602long state =1603InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF);1604long ref_count = state & REFLOCK__REF_MASK;16051606/* Verify that the lock was referenced and not already destroyed. */1607assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY);16081609if (ref_count != 0)1610reflock__await_event(reflock);16111612state = InterlockedExchange(&reflock->state, REFLOCK__POISON);1613assert(state == REFLOCK__DESTROY);1614}16151616#define SOCK__KNOWN_EPOLL_EVENTS \1617(EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \1618EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP)16191620typedef enum sock__poll_status {1621SOCK__POLL_IDLE = 0,1622SOCK__POLL_PENDING,1623SOCK__POLL_CANCELLED1624} sock__poll_status_t;16251626typedef struct sock_state {1627IO_STATUS_BLOCK io_status_block;1628AFD_POLL_INFO poll_info;1629queue_node_t queue_node;1630tree_node_t tree_node;1631poll_group_t* poll_group;1632SOCKET base_socket;1633epoll_data_t user_data;1634uint32_t user_events;1635uint32_t pending_events;1636sock__poll_status_t poll_status;1637bool delete_pending;1638} sock_state_t;16391640static inline sock_state_t* sock__alloc(void) {1641sock_state_t* sock_state = malloc(sizeof *sock_state);1642if (sock_state == NULL)1643return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);1644return sock_state;1645}16461647static inline void sock__free(sock_state_t* sock_state) {1648assert(sock_state != NULL);1649free(sock_state);1650}16511652static inline int sock__cancel_poll(sock_state_t* sock_state) {1653assert(sock_state->poll_status == SOCK__POLL_PENDING);16541655if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group),1656&sock_state->io_status_block) < 0)1657return -1;16581659sock_state->poll_status = SOCK__POLL_CANCELLED;1660sock_state->pending_events = 0;1661return 0;1662}16631664sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) {1665SOCKET base_socket;1666poll_group_t* poll_group;1667sock_state_t* sock_state;16681669if (socket == 0 || socket == INVALID_SOCKET)1670return_set_error(NULL, ERROR_INVALID_HANDLE);16711672base_socket = ws_get_base_socket(socket);1673if (base_socket == INVALID_SOCKET)1674return NULL;16751676poll_group = poll_group_acquire(port_state);1677if (poll_group == NULL)1678return NULL;16791680sock_state = sock__alloc();1681if (sock_state == NULL)1682goto err1;16831684memset(sock_state, 0, sizeof *sock_state);16851686sock_state->base_socket = base_socket;1687sock_state->poll_group = poll_group;16881689tree_node_init(&sock_state->tree_node);1690queue_node_init(&sock_state->queue_node);16911692if (port_register_socket(port_state, sock_state, socket) < 0)1693goto err2;16941695return sock_state;16961697err2:1698sock__free(sock_state);1699err1:1700poll_group_release(poll_group);17011702return NULL;1703}17041705static int sock__delete(port_state_t* port_state,1706sock_state_t* sock_state,1707bool force) {1708if (!sock_state->delete_pending) {1709if (sock_state->poll_status == SOCK__POLL_PENDING)1710sock__cancel_poll(sock_state);17111712port_cancel_socket_update(port_state, sock_state);1713port_unregister_socket(port_state, sock_state);17141715sock_state->delete_pending = true;1716}17171718/* If the poll request still needs to complete, the sock_state object can't1719* be free()d yet. `sock_feed_event()` or `port_close()` will take care1720* of this later. */1721if (force || sock_state->poll_status == SOCK__POLL_IDLE) {1722/* Free the sock_state now. */1723port_remove_deleted_socket(port_state, sock_state);1724poll_group_release(sock_state->poll_group);1725sock__free(sock_state);1726} else {1727/* Free the socket later. */1728port_add_deleted_socket(port_state, sock_state);1729}17301731return 0;1732}17331734void sock_delete(port_state_t* port_state, sock_state_t* sock_state) {1735sock__delete(port_state, sock_state, false);1736}17371738void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) {1739sock__delete(port_state, sock_state, true);1740}17411742int sock_set_event(port_state_t* port_state,1743sock_state_t* sock_state,1744const struct epoll_event* ev) {1745/* EPOLLERR and EPOLLHUP are always reported, even when not requested by the1746* caller. However they are disabled after a event has been reported for a1747* socket for which the EPOLLONESHOT flag was set. */1748uint32_t events = ev->events | EPOLLERR | EPOLLHUP;17491750sock_state->user_events = events;1751sock_state->user_data = ev->data;17521753if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0)1754port_request_socket_update(port_state, sock_state);17551756return 0;1757}17581759static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) {1760/* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the1761* socket is closed with closesocket() or CloseHandle(). */1762DWORD afd_events = AFD_POLL_LOCAL_CLOSE;17631764if (epoll_events & (EPOLLIN | EPOLLRDNORM))1765afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT;1766if (epoll_events & (EPOLLPRI | EPOLLRDBAND))1767afd_events |= AFD_POLL_RECEIVE_EXPEDITED;1768if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND))1769afd_events |= AFD_POLL_SEND;1770if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP))1771afd_events |= AFD_POLL_DISCONNECT;1772if (epoll_events & EPOLLHUP)1773afd_events |= AFD_POLL_ABORT;1774if (epoll_events & EPOLLERR)1775afd_events |= AFD_POLL_CONNECT_FAIL;17761777return afd_events;1778}17791780static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) {1781uint32_t epoll_events = 0;17821783if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT))1784epoll_events |= EPOLLIN | EPOLLRDNORM;1785if (afd_events & AFD_POLL_RECEIVE_EXPEDITED)1786epoll_events |= EPOLLPRI | EPOLLRDBAND;1787if (afd_events & AFD_POLL_SEND)1788epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;1789if (afd_events & AFD_POLL_DISCONNECT)1790epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;1791if (afd_events & AFD_POLL_ABORT)1792epoll_events |= EPOLLHUP;1793if (afd_events & AFD_POLL_CONNECT_FAIL)1794/* Linux reports all these events after connect() has failed. */1795epoll_events |=1796EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP;17971798return epoll_events;1799}18001801int sock_update(port_state_t* port_state, sock_state_t* sock_state) {1802assert(!sock_state->delete_pending);18031804if ((sock_state->poll_status == SOCK__POLL_PENDING) &&1805(sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS &1806~sock_state->pending_events) == 0) {1807/* All the events the user is interested in are already being monitored by1808* the pending poll operation. It might spuriously complete because of an1809* event that we're no longer interested in; when that happens we'll submit1810* a new poll operation with the updated event mask. */18111812} else if (sock_state->poll_status == SOCK__POLL_PENDING) {1813/* A poll operation is already pending, but it's not monitoring for all the1814* events that the user is interested in. Therefore, cancel the pending1815* poll operation; when we receive it's completion package, a new poll1816* operation will be submitted with the correct event mask. */1817if (sock__cancel_poll(sock_state) < 0)1818return -1;18191820} else if (sock_state->poll_status == SOCK__POLL_CANCELLED) {1821/* The poll operation has already been cancelled, we're still waiting for1822* it to return. For now, there's nothing that needs to be done. */18231824} else if (sock_state->poll_status == SOCK__POLL_IDLE) {1825/* No poll operation is pending; start one. */1826sock_state->poll_info.Exclusive = FALSE;1827sock_state->poll_info.NumberOfHandles = 1;1828sock_state->poll_info.Timeout.QuadPart = INT64_MAX;1829sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket;1830sock_state->poll_info.Handles[0].Status = 0;1831sock_state->poll_info.Handles[0].Events =1832sock__epoll_events_to_afd_events(sock_state->user_events);18331834if (afd_poll(poll_group_get_afd_device_handle(sock_state->poll_group),1835&sock_state->poll_info,1836&sock_state->io_status_block) < 0) {1837switch (GetLastError()) {1838case ERROR_IO_PENDING:1839/* Overlapped poll operation in progress; this is expected. */1840break;1841case ERROR_INVALID_HANDLE:1842/* Socket closed; it'll be dropped from the epoll set. */1843return sock__delete(port_state, sock_state, false);1844default:1845/* Other errors are propagated to the caller. */1846return_map_error(-1);1847}1848}18491850/* The poll request was successfully submitted. */1851sock_state->poll_status = SOCK__POLL_PENDING;1852sock_state->pending_events = sock_state->user_events;18531854} else {1855/* Unreachable. */1856assert(false);1857}18581859port_cancel_socket_update(port_state, sock_state);1860return 0;1861}18621863int sock_feed_event(port_state_t* port_state,1864IO_STATUS_BLOCK* io_status_block,1865struct epoll_event* ev) {1866sock_state_t* sock_state =1867container_of(io_status_block, sock_state_t, io_status_block);1868AFD_POLL_INFO* poll_info = &sock_state->poll_info;1869uint32_t epoll_events = 0;18701871sock_state->poll_status = SOCK__POLL_IDLE;1872sock_state->pending_events = 0;18731874if (sock_state->delete_pending) {1875/* Socket has been deleted earlier and can now be freed. */1876return sock__delete(port_state, sock_state, false);18771878} else if (io_status_block->Status == STATUS_CANCELLED) {1879/* The poll request was cancelled by CancelIoEx. */18801881} else if (!NT_SUCCESS(io_status_block->Status)) {1882/* The overlapped request itself failed in an unexpected way. */1883epoll_events = EPOLLERR;18841885} else if (poll_info->NumberOfHandles < 1) {1886/* This poll operation succeeded but didn't report any socket events. */18871888} else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {1889/* The poll operation reported that the socket was closed. */1890return sock__delete(port_state, sock_state, false);18911892} else {1893/* Events related to our socket were reported. */1894epoll_events =1895sock__afd_events_to_epoll_events(poll_info->Handles[0].Events);1896}18971898/* Requeue the socket so a new poll request will be submitted. */1899port_request_socket_update(port_state, sock_state);19001901/* Filter out events that the user didn't ask for. */1902epoll_events &= sock_state->user_events;19031904/* Return if there are no epoll events to report. */1905if (epoll_events == 0)1906return 0;19071908/* If the the socket has the EPOLLONESHOT flag set, unmonitor all events,1909* even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */1910if (sock_state->user_events & EPOLLONESHOT)1911sock_state->user_events = 0;19121913ev->data = sock_state->user_data;1914ev->events = epoll_events;1915return 1;1916}19171918sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) {1919return container_of(queue_node, sock_state_t, queue_node);1920}19211922queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) {1923return &sock_state->queue_node;1924}19251926sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) {1927return container_of(tree_node, sock_state_t, tree_node);1928}19291930tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) {1931return &sock_state->tree_node;1932}19331934void ts_tree_init(ts_tree_t* ts_tree) {1935tree_init(&ts_tree->tree);1936InitializeSRWLock(&ts_tree->lock);1937}19381939void ts_tree_node_init(ts_tree_node_t* node) {1940tree_node_init(&node->tree_node);1941reflock_init(&node->reflock);1942}19431944int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) {1945int r;19461947AcquireSRWLockExclusive(&ts_tree->lock);1948r = tree_add(&ts_tree->tree, &node->tree_node, key);1949ReleaseSRWLockExclusive(&ts_tree->lock);19501951return r;1952}19531954static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree,1955uintptr_t key) {1956tree_node_t* tree_node = tree_find(&ts_tree->tree, key);1957if (tree_node == NULL)1958return NULL;19591960return container_of(tree_node, ts_tree_node_t, tree_node);1961}19621963ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) {1964ts_tree_node_t* ts_tree_node;19651966AcquireSRWLockExclusive(&ts_tree->lock);19671968ts_tree_node = ts_tree__find_node(ts_tree, key);1969if (ts_tree_node != NULL) {1970tree_del(&ts_tree->tree, &ts_tree_node->tree_node);1971reflock_ref(&ts_tree_node->reflock);1972}19731974ReleaseSRWLockExclusive(&ts_tree->lock);19751976return ts_tree_node;1977}19781979ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) {1980ts_tree_node_t* ts_tree_node;19811982AcquireSRWLockShared(&ts_tree->lock);19831984ts_tree_node = ts_tree__find_node(ts_tree, key);1985if (ts_tree_node != NULL)1986reflock_ref(&ts_tree_node->reflock);19871988ReleaseSRWLockShared(&ts_tree->lock);19891990return ts_tree_node;1991}19921993void ts_tree_node_unref(ts_tree_node_t* node) {1994reflock_unref(&node->reflock);1995}19961997void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) {1998reflock_unref_and_destroy(&node->reflock);1999}20002001void tree_init(tree_t* tree) {2002memset(tree, 0, sizeof *tree);2003}20042005void tree_node_init(tree_node_t* node) {2006memset(node, 0, sizeof *node);2007}20082009#define TREE__ROTATE(cis, trans) \2010tree_node_t* p = node; \2011tree_node_t* q = node->trans; \2012tree_node_t* parent = p->parent; \2013\2014if (parent) { \2015if (parent->left == p) \2016parent->left = q; \2017else \2018parent->right = q; \2019} else { \2020tree->root = q; \2021} \2022\2023q->parent = parent; \2024p->parent = q; \2025p->trans = q->cis; \2026if (p->trans) \2027p->trans->parent = p; \2028q->cis = p;20292030static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) {2031TREE__ROTATE(left, right)2032}20332034static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) {2035TREE__ROTATE(right, left)2036}20372038#define TREE__INSERT_OR_DESCEND(side) \2039if (parent->side) { \2040parent = parent->side; \2041} else { \2042parent->side = node; \2043break; \2044}20452046#define TREE__REBALANCE_AFTER_INSERT(cis, trans) \2047tree_node_t* grandparent = parent->parent; \2048tree_node_t* uncle = grandparent->trans; \2049\2050if (uncle && uncle->red) { \2051parent->red = uncle->red = false; \2052grandparent->red = true; \2053node = grandparent; \2054} else { \2055if (node == parent->trans) { \2056tree__rotate_##cis(tree, parent); \2057node = parent; \2058parent = node->parent; \2059} \2060parent->red = false; \2061grandparent->red = true; \2062tree__rotate_##trans(tree, grandparent); \2063}20642065int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) {2066tree_node_t* parent;20672068parent = tree->root;2069if (parent) {2070for (;;) {2071if (key < parent->key) {2072TREE__INSERT_OR_DESCEND(left)2073} else if (key > parent->key) {2074TREE__INSERT_OR_DESCEND(right)2075} else {2076return -1;2077}2078}2079} else {2080tree->root = node;2081}20822083node->key = key;2084node->left = node->right = NULL;2085node->parent = parent;2086node->red = true;20872088for (; parent && parent->red; parent = node->parent) {2089if (parent == parent->parent->left) {2090TREE__REBALANCE_AFTER_INSERT(left, right)2091} else {2092TREE__REBALANCE_AFTER_INSERT(right, left)2093}2094}2095tree->root->red = false;20962097return 0;2098}20992100#define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \2101tree_node_t* sibling = parent->trans; \2102\2103if (sibling->red) { \2104sibling->red = false; \2105parent->red = true; \2106tree__rotate_##cis(tree, parent); \2107sibling = parent->trans; \2108} \2109if ((sibling->left && sibling->left->red) || \2110(sibling->right && sibling->right->red)) { \2111if (!sibling->trans || !sibling->trans->red) { \2112sibling->cis->red = false; \2113sibling->red = true; \2114tree__rotate_##trans(tree, sibling); \2115sibling = parent->trans; \2116} \2117sibling->red = parent->red; \2118parent->red = sibling->trans->red = false; \2119tree__rotate_##cis(tree, parent); \2120node = tree->root; \2121break; \2122} \2123sibling->red = true;21242125void tree_del(tree_t* tree, tree_node_t* node) {2126tree_node_t* parent = node->parent;2127tree_node_t* left = node->left;2128tree_node_t* right = node->right;2129tree_node_t* next;2130bool red;21312132if (!left) {2133next = right;2134} else if (!right) {2135next = left;2136} else {2137next = right;2138while (next->left)2139next = next->left;2140}21412142if (parent) {2143if (parent->left == node)2144parent->left = next;2145else2146parent->right = next;2147} else {2148tree->root = next;2149}21502151if (left && right) {2152red = next->red;2153next->red = node->red;2154next->left = left;2155left->parent = next;2156if (next != right) {2157parent = next->parent;2158next->parent = node->parent;2159node = next->right;2160parent->left = node;2161next->right = right;2162right->parent = next;2163} else {2164next->parent = parent;2165parent = next;2166node = next->right;2167}2168} else {2169red = node->red;2170node = next;2171}21722173if (node)2174node->parent = parent;2175if (red)2176return;2177if (node && node->red) {2178node->red = false;2179return;2180}21812182do {2183if (node == tree->root)2184break;2185if (node == parent->left) {2186TREE__REBALANCE_AFTER_REMOVE(left, right)2187} else {2188TREE__REBALANCE_AFTER_REMOVE(right, left)2189}2190node = parent;2191parent = parent->parent;2192} while (!node->red);21932194if (node)2195node->red = false;2196}21972198tree_node_t* tree_find(const tree_t* tree, uintptr_t key) {2199tree_node_t* node = tree->root;2200while (node) {2201if (key < node->key)2202node = node->left;2203else if (key > node->key)2204node = node->right;2205else2206return node;2207}2208return NULL;2209}22102211tree_node_t* tree_root(const tree_t* tree) {2212return tree->root;2213}22142215#ifndef SIO_BSP_HANDLE_POLL2216#define SIO_BSP_HANDLE_POLL 0x4800001D2217#endif22182219#ifndef SIO_BASE_HANDLE2220#define SIO_BASE_HANDLE 0x480000222221#endif22222223int ws_global_init(void) {2224int r;2225WSADATA wsa_data;22262227r = WSAStartup(MAKEWORD(2, 2), &wsa_data);2228if (r != 0)2229return_set_error(-1, (DWORD) r);22302231return 0;2232}22332234static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) {2235SOCKET bsp_socket;2236DWORD bytes;22372238if (WSAIoctl(socket,2239ioctl,2240NULL,22410,2242&bsp_socket,2243sizeof bsp_socket,2244&bytes,2245NULL,2246NULL) != SOCKET_ERROR)2247return bsp_socket;2248else2249return INVALID_SOCKET;2250}22512252SOCKET ws_get_base_socket(SOCKET socket) {2253SOCKET base_socket;2254DWORD error;22552256for (;;) {2257base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE);2258if (base_socket != INVALID_SOCKET)2259return base_socket;22602261error = GetLastError();2262if (error == WSAENOTSOCK)2263return_set_error(INVALID_SOCKET, error);22642265/* Even though Microsoft documentation clearly states that LSPs should2266* never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do2267* so anyway, breaking it, with the apparent intention of preventing LSP2268* bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which2269* will at least let us obtain the socket associated with the next winsock2270* protocol chain entry. If this succeeds, loop around and call2271* `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that2272* we unwrap all layers and retrieve the actual base socket.2273* [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls2274* [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.62275*/2276base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL);2277if (base_socket != INVALID_SOCKET && base_socket != socket)2278socket = base_socket;2279else2280return_set_error(INVALID_SOCKET, error);2281}2282}228322842285