Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/java.base/windows/native/libnio/ch/wepoll.c
41134 views
1
/*
2
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
26
/*
27
* This file is available under and governed by the GNU General Public
28
* License version 2 only, as published by the Free Software Foundation.
29
* However, the following notice accompanied the original version of this
30
* file and, per its terms, should not be removed:
31
*
32
* wepoll - epoll for Windows
33
* https://github.com/piscisaureus/wepoll
34
*
35
* Copyright 2012-2020, Bert Belder <[email protected]>
36
* All rights reserved.
37
*
38
* Redistribution and use in source and binary forms, with or without
39
* modification, are permitted provided that the following conditions are
40
* met:
41
*
42
* * Redistributions of source code must retain the above copyright
43
* notice, this list of conditions and the following disclaimer.
44
*
45
* * Redistributions in binary form must reproduce the above copyright
46
* notice, this list of conditions and the following disclaimer in the
47
* documentation and/or other materials provided with the distribution.
48
*
49
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
50
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
51
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
52
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
53
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
54
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
55
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
56
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
57
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
58
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
59
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60
*/
61
62
#ifndef WEPOLL_EXPORT
63
#define WEPOLL_EXPORT
64
#endif
65
66
#include <stdint.h>
67
68
enum EPOLL_EVENTS {
69
EPOLLIN = (int) (1U << 0),
70
EPOLLPRI = (int) (1U << 1),
71
EPOLLOUT = (int) (1U << 2),
72
EPOLLERR = (int) (1U << 3),
73
EPOLLHUP = (int) (1U << 4),
74
EPOLLRDNORM = (int) (1U << 6),
75
EPOLLRDBAND = (int) (1U << 7),
76
EPOLLWRNORM = (int) (1U << 8),
77
EPOLLWRBAND = (int) (1U << 9),
78
EPOLLMSG = (int) (1U << 10), /* Never reported. */
79
EPOLLRDHUP = (int) (1U << 13),
80
EPOLLONESHOT = (int) (1U << 31)
81
};
82
83
#define EPOLLIN (1U << 0)
84
#define EPOLLPRI (1U << 1)
85
#define EPOLLOUT (1U << 2)
86
#define EPOLLERR (1U << 3)
87
#define EPOLLHUP (1U << 4)
88
#define EPOLLRDNORM (1U << 6)
89
#define EPOLLRDBAND (1U << 7)
90
#define EPOLLWRNORM (1U << 8)
91
#define EPOLLWRBAND (1U << 9)
92
#define EPOLLMSG (1U << 10)
93
#define EPOLLRDHUP (1U << 13)
94
#define EPOLLONESHOT (1U << 31)
95
96
#define EPOLL_CTL_ADD 1
97
#define EPOLL_CTL_MOD 2
98
#define EPOLL_CTL_DEL 3
99
100
typedef void* HANDLE;
101
typedef uintptr_t SOCKET;
102
103
typedef union epoll_data {
104
void* ptr;
105
int fd;
106
uint32_t u32;
107
uint64_t u64;
108
SOCKET sock; /* Windows specific */
109
HANDLE hnd; /* Windows specific */
110
} epoll_data_t;
111
112
struct epoll_event {
113
uint32_t events; /* Epoll events and flags */
114
epoll_data_t data; /* User data variable */
115
};
116
117
#ifdef __cplusplus
118
extern "C" {
119
#endif
120
121
WEPOLL_EXPORT HANDLE epoll_create(int size);
122
WEPOLL_EXPORT HANDLE epoll_create1(int flags);
123
124
WEPOLL_EXPORT int epoll_close(HANDLE ephnd);
125
126
WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd,
127
int op,
128
SOCKET sock,
129
struct epoll_event* event);
130
131
WEPOLL_EXPORT int epoll_wait(HANDLE ephnd,
132
struct epoll_event* events,
133
int maxevents,
134
int timeout);
135
136
#ifdef __cplusplus
137
} /* extern "C" */
138
#endif
139
140
#include <assert.h>
141
142
#include <stdlib.h>
143
144
#define WEPOLL_INTERNAL static
145
#define WEPOLL_INTERNAL_EXTERN static
146
147
#if defined(__clang__)
148
#pragma clang diagnostic push
149
#pragma clang diagnostic ignored "-Wnonportable-system-include-path"
150
#pragma clang diagnostic ignored "-Wreserved-id-macro"
151
#elif defined(_MSC_VER)
152
#pragma warning(push, 1)
153
#endif
154
155
#undef WIN32_LEAN_AND_MEAN
156
#define WIN32_LEAN_AND_MEAN
157
158
#undef _WIN32_WINNT
159
#define _WIN32_WINNT 0x0600
160
161
#include <winsock2.h>
162
#include <ws2tcpip.h>
163
#include <windows.h>
164
165
#if defined(__clang__)
166
#pragma clang diagnostic pop
167
#elif defined(_MSC_VER)
168
#pragma warning(pop)
169
#endif
170
171
WEPOLL_INTERNAL int nt_global_init(void);
172
173
typedef LONG NTSTATUS;
174
typedef NTSTATUS* PNTSTATUS;
175
176
#ifndef NT_SUCCESS
177
#define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0)
178
#endif
179
180
#ifndef STATUS_SUCCESS
181
#define STATUS_SUCCESS ((NTSTATUS) 0x00000000L)
182
#endif
183
184
#ifndef STATUS_PENDING
185
#define STATUS_PENDING ((NTSTATUS) 0x00000103L)
186
#endif
187
188
#ifndef STATUS_CANCELLED
189
#define STATUS_CANCELLED ((NTSTATUS) 0xC0000120L)
190
#endif
191
192
#ifndef STATUS_NOT_FOUND
193
#define STATUS_NOT_FOUND ((NTSTATUS) 0xC0000225L)
194
#endif
195
196
typedef struct _IO_STATUS_BLOCK {
197
NTSTATUS Status;
198
ULONG_PTR Information;
199
} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
200
201
typedef VOID(NTAPI* PIO_APC_ROUTINE)(PVOID ApcContext,
202
PIO_STATUS_BLOCK IoStatusBlock,
203
ULONG Reserved);
204
205
typedef struct _UNICODE_STRING {
206
USHORT Length;
207
USHORT MaximumLength;
208
PWSTR Buffer;
209
} UNICODE_STRING, *PUNICODE_STRING;
210
211
#define RTL_CONSTANT_STRING(s) \
212
{ sizeof(s) - sizeof((s)[0]), sizeof(s), s }
213
214
typedef struct _OBJECT_ATTRIBUTES {
215
ULONG Length;
216
HANDLE RootDirectory;
217
PUNICODE_STRING ObjectName;
218
ULONG Attributes;
219
PVOID SecurityDescriptor;
220
PVOID SecurityQualityOfService;
221
} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;
222
223
#define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \
224
{ sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL }
225
226
#ifndef FILE_OPEN
227
#define FILE_OPEN 0x00000001UL
228
#endif
229
230
#define KEYEDEVENT_WAIT 0x00000001UL
231
#define KEYEDEVENT_WAKE 0x00000002UL
232
#define KEYEDEVENT_ALL_ACCESS \
233
(STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE)
234
235
#define NT_NTDLL_IMPORT_LIST(X) \
236
X(NTSTATUS, \
237
NTAPI, \
238
NtCancelIoFileEx, \
239
(HANDLE FileHandle, \
240
PIO_STATUS_BLOCK IoRequestToCancel, \
241
PIO_STATUS_BLOCK IoStatusBlock)) \
242
\
243
X(NTSTATUS, \
244
NTAPI, \
245
NtCreateFile, \
246
(PHANDLE FileHandle, \
247
ACCESS_MASK DesiredAccess, \
248
POBJECT_ATTRIBUTES ObjectAttributes, \
249
PIO_STATUS_BLOCK IoStatusBlock, \
250
PLARGE_INTEGER AllocationSize, \
251
ULONG FileAttributes, \
252
ULONG ShareAccess, \
253
ULONG CreateDisposition, \
254
ULONG CreateOptions, \
255
PVOID EaBuffer, \
256
ULONG EaLength)) \
257
\
258
X(NTSTATUS, \
259
NTAPI, \
260
NtCreateKeyedEvent, \
261
(PHANDLE KeyedEventHandle, \
262
ACCESS_MASK DesiredAccess, \
263
POBJECT_ATTRIBUTES ObjectAttributes, \
264
ULONG Flags)) \
265
\
266
X(NTSTATUS, \
267
NTAPI, \
268
NtDeviceIoControlFile, \
269
(HANDLE FileHandle, \
270
HANDLE Event, \
271
PIO_APC_ROUTINE ApcRoutine, \
272
PVOID ApcContext, \
273
PIO_STATUS_BLOCK IoStatusBlock, \
274
ULONG IoControlCode, \
275
PVOID InputBuffer, \
276
ULONG InputBufferLength, \
277
PVOID OutputBuffer, \
278
ULONG OutputBufferLength)) \
279
\
280
X(NTSTATUS, \
281
NTAPI, \
282
NtReleaseKeyedEvent, \
283
(HANDLE KeyedEventHandle, \
284
PVOID KeyValue, \
285
BOOLEAN Alertable, \
286
PLARGE_INTEGER Timeout)) \
287
\
288
X(NTSTATUS, \
289
NTAPI, \
290
NtWaitForKeyedEvent, \
291
(HANDLE KeyedEventHandle, \
292
PVOID KeyValue, \
293
BOOLEAN Alertable, \
294
PLARGE_INTEGER Timeout)) \
295
\
296
X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status))
297
298
#define X(return_type, attributes, name, parameters) \
299
WEPOLL_INTERNAL_EXTERN return_type(attributes* name) parameters;
300
NT_NTDLL_IMPORT_LIST(X)
301
#undef X
302
303
#define AFD_POLL_RECEIVE 0x0001
304
#define AFD_POLL_RECEIVE_EXPEDITED 0x0002
305
#define AFD_POLL_SEND 0x0004
306
#define AFD_POLL_DISCONNECT 0x0008
307
#define AFD_POLL_ABORT 0x0010
308
#define AFD_POLL_LOCAL_CLOSE 0x0020
309
#define AFD_POLL_ACCEPT 0x0080
310
#define AFD_POLL_CONNECT_FAIL 0x0100
311
312
typedef struct _AFD_POLL_HANDLE_INFO {
313
HANDLE Handle;
314
ULONG Events;
315
NTSTATUS Status;
316
} AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO;
317
318
typedef struct _AFD_POLL_INFO {
319
LARGE_INTEGER Timeout;
320
ULONG NumberOfHandles;
321
ULONG Exclusive;
322
AFD_POLL_HANDLE_INFO Handles[1];
323
} AFD_POLL_INFO, *PAFD_POLL_INFO;
324
325
WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle,
326
HANDLE* afd_device_handle_out);
327
328
WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle,
329
AFD_POLL_INFO* poll_info,
330
IO_STATUS_BLOCK* io_status_block);
331
WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle,
332
IO_STATUS_BLOCK* io_status_block);
333
334
#define return_map_error(value) \
335
do { \
336
err_map_win_error(); \
337
return (value); \
338
} while (0)
339
340
#define return_set_error(value, error) \
341
do { \
342
err_set_win_error(error); \
343
return (value); \
344
} while (0)
345
346
WEPOLL_INTERNAL void err_map_win_error(void);
347
WEPOLL_INTERNAL void err_set_win_error(DWORD error);
348
WEPOLL_INTERNAL int err_check_handle(HANDLE handle);
349
350
#define IOCTL_AFD_POLL 0x00012024
351
352
static UNICODE_STRING afd__device_name =
353
RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll");
354
355
static OBJECT_ATTRIBUTES afd__device_attributes =
356
RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0);
357
358
int afd_create_device_handle(HANDLE iocp_handle,
359
HANDLE* afd_device_handle_out) {
360
HANDLE afd_device_handle;
361
IO_STATUS_BLOCK iosb;
362
NTSTATUS status;
363
364
/* By opening \Device\Afd without specifying any extended attributes, we'll
365
* get a handle that lets us talk to the AFD driver, but that doesn't have an
366
* associated endpoint (so it's not a socket). */
367
status = NtCreateFile(&afd_device_handle,
368
SYNCHRONIZE,
369
&afd__device_attributes,
370
&iosb,
371
NULL,
372
0,
373
FILE_SHARE_READ | FILE_SHARE_WRITE,
374
FILE_OPEN,
375
0,
376
NULL,
377
0);
378
if (status != STATUS_SUCCESS)
379
return_set_error(-1, RtlNtStatusToDosError(status));
380
381
if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL)
382
goto error;
383
384
if (!SetFileCompletionNotificationModes(afd_device_handle,
385
FILE_SKIP_SET_EVENT_ON_HANDLE))
386
goto error;
387
388
*afd_device_handle_out = afd_device_handle;
389
return 0;
390
391
error:
392
CloseHandle(afd_device_handle);
393
return_map_error(-1);
394
}
395
396
int afd_poll(HANDLE afd_device_handle,
397
AFD_POLL_INFO* poll_info,
398
IO_STATUS_BLOCK* io_status_block) {
399
NTSTATUS status;
400
401
/* Blocking operation is not supported. */
402
assert(io_status_block != NULL);
403
404
io_status_block->Status = STATUS_PENDING;
405
status = NtDeviceIoControlFile(afd_device_handle,
406
NULL,
407
NULL,
408
io_status_block,
409
io_status_block,
410
IOCTL_AFD_POLL,
411
poll_info,
412
sizeof *poll_info,
413
poll_info,
414
sizeof *poll_info);
415
416
if (status == STATUS_SUCCESS)
417
return 0;
418
else if (status == STATUS_PENDING)
419
return_set_error(-1, ERROR_IO_PENDING);
420
else
421
return_set_error(-1, RtlNtStatusToDosError(status));
422
}
423
424
int afd_cancel_poll(HANDLE afd_device_handle,
425
IO_STATUS_BLOCK* io_status_block) {
426
NTSTATUS cancel_status;
427
IO_STATUS_BLOCK cancel_iosb;
428
429
/* If the poll operation has already completed or has been cancelled earlier,
430
* there's nothing left for us to do. */
431
if (io_status_block->Status != STATUS_PENDING)
432
return 0;
433
434
cancel_status =
435
NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb);
436
437
/* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed
438
* just before calling NtCancelIoFileEx(). This is not an error. */
439
if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND)
440
return 0;
441
else
442
return_set_error(-1, RtlNtStatusToDosError(cancel_status));
443
}
444
445
WEPOLL_INTERNAL int epoll_global_init(void);
446
447
WEPOLL_INTERNAL int init(void);
448
449
typedef struct port_state port_state_t;
450
typedef struct queue queue_t;
451
typedef struct sock_state sock_state_t;
452
typedef struct ts_tree_node ts_tree_node_t;
453
454
WEPOLL_INTERNAL port_state_t* port_new(HANDLE* iocp_handle_out);
455
WEPOLL_INTERNAL int port_close(port_state_t* port_state);
456
WEPOLL_INTERNAL int port_delete(port_state_t* port_state);
457
458
WEPOLL_INTERNAL int port_wait(port_state_t* port_state,
459
struct epoll_event* events,
460
int maxevents,
461
int timeout);
462
463
WEPOLL_INTERNAL int port_ctl(port_state_t* port_state,
464
int op,
465
SOCKET sock,
466
struct epoll_event* ev);
467
468
WEPOLL_INTERNAL int port_register_socket(port_state_t* port_state,
469
sock_state_t* sock_state,
470
SOCKET socket);
471
WEPOLL_INTERNAL void port_unregister_socket(port_state_t* port_state,
472
sock_state_t* sock_state);
473
WEPOLL_INTERNAL sock_state_t* port_find_socket(port_state_t* port_state,
474
SOCKET socket);
475
476
WEPOLL_INTERNAL void port_request_socket_update(port_state_t* port_state,
477
sock_state_t* sock_state);
478
WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t* port_state,
479
sock_state_t* sock_state);
480
481
WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t* port_state,
482
sock_state_t* sock_state);
483
WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t* port_state,
484
sock_state_t* sock_state);
485
486
WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t* port_state);
487
WEPOLL_INTERNAL queue_t* port_get_poll_group_queue(port_state_t* port_state);
488
489
WEPOLL_INTERNAL port_state_t* port_state_from_handle_tree_node(
490
ts_tree_node_t* tree_node);
491
WEPOLL_INTERNAL ts_tree_node_t* port_state_to_handle_tree_node(
492
port_state_t* port_state);
493
494
/* The reflock is a special kind of lock that normally prevents a chunk of
495
* memory from being freed, but does allow the chunk of memory to eventually be
496
* released in a coordinated fashion.
497
*
498
* Under normal operation, threads increase and decrease the reference count,
499
* which are wait-free operations.
500
*
501
* Exactly once during the reflock's lifecycle, a thread holding a reference to
502
* the lock may "destroy" the lock; this operation blocks until all other
503
* threads holding a reference to the lock have dereferenced it. After
504
* "destroy" returns, the calling thread may assume that no other threads have
505
* a reference to the lock.
506
*
507
* Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has
508
* been called is invalid and results in undefined behavior. Therefore the user
509
* should use another lock to guarantee that this can't happen.
510
*/
511
512
typedef struct reflock {
513
volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */
514
} reflock_t;
515
516
WEPOLL_INTERNAL int reflock_global_init(void);
517
518
WEPOLL_INTERNAL void reflock_init(reflock_t* reflock);
519
WEPOLL_INTERNAL void reflock_ref(reflock_t* reflock);
520
WEPOLL_INTERNAL void reflock_unref(reflock_t* reflock);
521
WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t* reflock);
522
523
#include <stdbool.h>
524
525
/* N.b.: the tree functions do not set errno or LastError when they fail. Each
526
* of the API functions has at most one failure mode. It is up to the caller to
527
* set an appropriate error code when necessary. */
528
529
typedef struct tree tree_t;
530
typedef struct tree_node tree_node_t;
531
532
typedef struct tree {
533
tree_node_t* root;
534
} tree_t;
535
536
typedef struct tree_node {
537
tree_node_t* left;
538
tree_node_t* right;
539
tree_node_t* parent;
540
uintptr_t key;
541
bool red;
542
} tree_node_t;
543
544
WEPOLL_INTERNAL void tree_init(tree_t* tree);
545
WEPOLL_INTERNAL void tree_node_init(tree_node_t* node);
546
547
WEPOLL_INTERNAL int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key);
548
WEPOLL_INTERNAL void tree_del(tree_t* tree, tree_node_t* node);
549
550
WEPOLL_INTERNAL tree_node_t* tree_find(const tree_t* tree, uintptr_t key);
551
WEPOLL_INTERNAL tree_node_t* tree_root(const tree_t* tree);
552
553
typedef struct ts_tree {
554
tree_t tree;
555
SRWLOCK lock;
556
} ts_tree_t;
557
558
typedef struct ts_tree_node {
559
tree_node_t tree_node;
560
reflock_t reflock;
561
} ts_tree_node_t;
562
563
WEPOLL_INTERNAL void ts_tree_init(ts_tree_t* rtl);
564
WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t* node);
565
566
WEPOLL_INTERNAL int ts_tree_add(ts_tree_t* ts_tree,
567
ts_tree_node_t* node,
568
uintptr_t key);
569
570
WEPOLL_INTERNAL ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree,
571
uintptr_t key);
572
WEPOLL_INTERNAL ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree,
573
uintptr_t key);
574
575
WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t* node);
576
WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t* node);
577
578
static ts_tree_t epoll__handle_tree;
579
580
int epoll_global_init(void) {
581
ts_tree_init(&epoll__handle_tree);
582
return 0;
583
}
584
585
static HANDLE epoll__create(void) {
586
port_state_t* port_state;
587
HANDLE ephnd;
588
ts_tree_node_t* tree_node;
589
590
if (init() < 0)
591
return NULL;
592
593
port_state = port_new(&ephnd);
594
if (port_state == NULL)
595
return NULL;
596
597
tree_node = port_state_to_handle_tree_node(port_state);
598
if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t) ephnd) < 0) {
599
/* This should never happen. */
600
port_delete(port_state);
601
return_set_error(NULL, ERROR_ALREADY_EXISTS);
602
}
603
604
return ephnd;
605
}
606
607
HANDLE epoll_create(int size) {
608
if (size <= 0)
609
return_set_error(NULL, ERROR_INVALID_PARAMETER);
610
611
return epoll__create();
612
}
613
614
HANDLE epoll_create1(int flags) {
615
if (flags != 0)
616
return_set_error(NULL, ERROR_INVALID_PARAMETER);
617
618
return epoll__create();
619
}
620
621
int epoll_close(HANDLE ephnd) {
622
ts_tree_node_t* tree_node;
623
port_state_t* port_state;
624
625
if (init() < 0)
626
return -1;
627
628
tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
629
if (tree_node == NULL) {
630
err_set_win_error(ERROR_INVALID_PARAMETER);
631
goto err;
632
}
633
634
port_state = port_state_from_handle_tree_node(tree_node);
635
port_close(port_state);
636
637
ts_tree_node_unref_and_destroy(tree_node);
638
639
return port_delete(port_state);
640
641
err:
642
err_check_handle(ephnd);
643
return -1;
644
}
645
646
int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event* ev) {
647
ts_tree_node_t* tree_node;
648
port_state_t* port_state;
649
int r;
650
651
if (init() < 0)
652
return -1;
653
654
tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
655
if (tree_node == NULL) {
656
err_set_win_error(ERROR_INVALID_PARAMETER);
657
goto err;
658
}
659
660
port_state = port_state_from_handle_tree_node(tree_node);
661
r = port_ctl(port_state, op, sock, ev);
662
663
ts_tree_node_unref(tree_node);
664
665
if (r < 0)
666
goto err;
667
668
return 0;
669
670
err:
671
/* On Linux, in the case of epoll_ctl(), EBADF takes priority over other
672
* errors. Wepoll mimics this behavior. */
673
err_check_handle(ephnd);
674
err_check_handle((HANDLE) sock);
675
return -1;
676
}
677
678
int epoll_wait(HANDLE ephnd,
679
struct epoll_event* events,
680
int maxevents,
681
int timeout) {
682
ts_tree_node_t* tree_node;
683
port_state_t* port_state;
684
int num_events;
685
686
if (maxevents <= 0)
687
return_set_error(-1, ERROR_INVALID_PARAMETER);
688
689
if (init() < 0)
690
return -1;
691
692
tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t) ephnd);
693
if (tree_node == NULL) {
694
err_set_win_error(ERROR_INVALID_PARAMETER);
695
goto err;
696
}
697
698
port_state = port_state_from_handle_tree_node(tree_node);
699
num_events = port_wait(port_state, events, maxevents, timeout);
700
701
ts_tree_node_unref(tree_node);
702
703
if (num_events < 0)
704
goto err;
705
706
return num_events;
707
708
err:
709
err_check_handle(ephnd);
710
return -1;
711
}
712
713
#include <errno.h>
714
715
#define ERR__ERRNO_MAPPINGS(X) \
716
X(ERROR_ACCESS_DENIED, EACCES) \
717
X(ERROR_ALREADY_EXISTS, EEXIST) \
718
X(ERROR_BAD_COMMAND, EACCES) \
719
X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \
720
X(ERROR_BAD_LENGTH, EACCES) \
721
X(ERROR_BAD_NETPATH, ENOENT) \
722
X(ERROR_BAD_NET_NAME, ENOENT) \
723
X(ERROR_BAD_NET_RESP, ENETDOWN) \
724
X(ERROR_BAD_PATHNAME, ENOENT) \
725
X(ERROR_BROKEN_PIPE, EPIPE) \
726
X(ERROR_CANNOT_MAKE, EACCES) \
727
X(ERROR_COMMITMENT_LIMIT, ENOMEM) \
728
X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \
729
X(ERROR_CONNECTION_ACTIVE, EISCONN) \
730
X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \
731
X(ERROR_CRC, EACCES) \
732
X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \
733
X(ERROR_DISK_FULL, ENOSPC) \
734
X(ERROR_DUP_NAME, EADDRINUSE) \
735
X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \
736
X(ERROR_FILE_NOT_FOUND, ENOENT) \
737
X(ERROR_GEN_FAILURE, EACCES) \
738
X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \
739
X(ERROR_HOST_DOWN, EHOSTUNREACH) \
740
X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \
741
X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \
742
X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \
743
X(ERROR_INVALID_FUNCTION, EINVAL) \
744
X(ERROR_INVALID_HANDLE, EBADF) \
745
X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \
746
X(ERROR_INVALID_PARAMETER, EINVAL) \
747
X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \
748
X(ERROR_IO_PENDING, EINPROGRESS) \
749
X(ERROR_LOCK_VIOLATION, EACCES) \
750
X(ERROR_MORE_DATA, EMSGSIZE) \
751
X(ERROR_NETNAME_DELETED, ECONNABORTED) \
752
X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \
753
X(ERROR_NETWORK_BUSY, ENETDOWN) \
754
X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \
755
X(ERROR_NOACCESS, EFAULT) \
756
X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \
757
X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \
758
X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \
759
X(ERROR_NOT_FOUND, ENOENT) \
760
X(ERROR_NOT_LOCKED, EACCES) \
761
X(ERROR_NOT_READY, EACCES) \
762
X(ERROR_NOT_SAME_DEVICE, EXDEV) \
763
X(ERROR_NOT_SUPPORTED, ENOTSUP) \
764
X(ERROR_NO_MORE_FILES, ENOENT) \
765
X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \
766
X(ERROR_OPERATION_ABORTED, EINTR) \
767
X(ERROR_OUT_OF_PAPER, EACCES) \
768
X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \
769
X(ERROR_PAGEFILE_QUOTA, ENOMEM) \
770
X(ERROR_PATH_NOT_FOUND, ENOENT) \
771
X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \
772
X(ERROR_PORT_UNREACHABLE, ECONNRESET) \
773
X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \
774
X(ERROR_REM_NOT_LIST, ECONNREFUSED) \
775
X(ERROR_REQUEST_ABORTED, EINTR) \
776
X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \
777
X(ERROR_SECTOR_NOT_FOUND, EACCES) \
778
X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \
779
X(ERROR_SHARING_VIOLATION, EACCES) \
780
X(ERROR_TOO_MANY_NAMES, ENOMEM) \
781
X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \
782
X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \
783
X(ERROR_WAIT_NO_CHILDREN, ECHILD) \
784
X(ERROR_WORKING_SET_QUOTA, ENOMEM) \
785
X(ERROR_WRITE_PROTECT, EACCES) \
786
X(ERROR_WRONG_DISK, EACCES) \
787
X(WSAEACCES, EACCES) \
788
X(WSAEADDRINUSE, EADDRINUSE) \
789
X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \
790
X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \
791
X(WSAECONNABORTED, ECONNABORTED) \
792
X(WSAECONNREFUSED, ECONNREFUSED) \
793
X(WSAECONNRESET, ECONNRESET) \
794
X(WSAEDISCON, EPIPE) \
795
X(WSAEFAULT, EFAULT) \
796
X(WSAEHOSTDOWN, EHOSTUNREACH) \
797
X(WSAEHOSTUNREACH, EHOSTUNREACH) \
798
X(WSAEINPROGRESS, EBUSY) \
799
X(WSAEINTR, EINTR) \
800
X(WSAEINVAL, EINVAL) \
801
X(WSAEISCONN, EISCONN) \
802
X(WSAEMSGSIZE, EMSGSIZE) \
803
X(WSAENETDOWN, ENETDOWN) \
804
X(WSAENETRESET, EHOSTUNREACH) \
805
X(WSAENETUNREACH, ENETUNREACH) \
806
X(WSAENOBUFS, ENOMEM) \
807
X(WSAENOTCONN, ENOTCONN) \
808
X(WSAENOTSOCK, ENOTSOCK) \
809
X(WSAEOPNOTSUPP, EOPNOTSUPP) \
810
X(WSAEPROCLIM, ENOMEM) \
811
X(WSAESHUTDOWN, EPIPE) \
812
X(WSAETIMEDOUT, ETIMEDOUT) \
813
X(WSAEWOULDBLOCK, EWOULDBLOCK) \
814
X(WSANOTINITIALISED, ENETDOWN) \
815
X(WSASYSNOTREADY, ENETDOWN) \
816
X(WSAVERNOTSUPPORTED, ENOSYS)
817
818
static errno_t err__map_win_error_to_errno(DWORD error) {
819
switch (error) {
820
#define X(error_sym, errno_sym) \
821
case error_sym: \
822
return errno_sym;
823
ERR__ERRNO_MAPPINGS(X)
824
#undef X
825
}
826
return EINVAL;
827
}
828
829
void err_map_win_error(void) {
830
errno = err__map_win_error_to_errno(GetLastError());
831
}
832
833
void err_set_win_error(DWORD error) {
834
SetLastError(error);
835
errno = err__map_win_error_to_errno(error);
836
}
837
838
int err_check_handle(HANDLE handle) {
839
DWORD flags;
840
841
/* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check
842
* for this condition explicitly. */
843
if (handle == INVALID_HANDLE_VALUE)
844
return_set_error(-1, ERROR_INVALID_HANDLE);
845
846
if (!GetHandleInformation(handle, &flags))
847
return_map_error(-1);
848
849
return 0;
850
}
851
852
#include <stddef.h>
853
854
#define array_count(a) (sizeof(a) / (sizeof((a)[0])))
855
856
#define container_of(ptr, type, member) \
857
((type*) ((uintptr_t) (ptr) - offsetof(type, member)))
858
859
#define unused_var(v) ((void) (v))
860
861
/* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */
862
#if defined(_MSC_VER) && _MSC_VER < 1900
863
#define inline __inline
864
#endif
865
866
WEPOLL_INTERNAL int ws_global_init(void);
867
WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket);
868
869
static bool init__done = false;
870
static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT;
871
872
static BOOL CALLBACK init__once_callback(INIT_ONCE* once,
873
void* parameter,
874
void** context) {
875
unused_var(once);
876
unused_var(parameter);
877
unused_var(context);
878
879
/* N.b. that initialization order matters here. */
880
if (ws_global_init() < 0 || nt_global_init() < 0 ||
881
reflock_global_init() < 0 || epoll_global_init() < 0)
882
return FALSE;
883
884
init__done = true;
885
return TRUE;
886
}
887
888
int init(void) {
889
if (!init__done &&
890
!InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL))
891
/* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any
892
* error code when the once-callback returns FALSE. We return -1 here to
893
* indicate that global initialization failed; the failing init function is
894
* resposible for setting `errno` and calling `SetLastError()`. */
895
return -1;
896
897
return 0;
898
}
899
900
/* Set up a workaround for the following problem:
901
* FARPROC addr = GetProcAddress(...);
902
* MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error.
903
* MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error.
904
* To compile cleanly with either compiler, do casts with this "bridge" type:
905
* MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */
906
#ifdef __GNUC__
907
typedef void* nt__fn_ptr_cast_t;
908
#else
909
typedef FARPROC nt__fn_ptr_cast_t;
910
#endif
911
912
#define X(return_type, attributes, name, parameters) \
913
WEPOLL_INTERNAL return_type(attributes* name) parameters = NULL;
914
NT_NTDLL_IMPORT_LIST(X)
915
#undef X
916
917
int nt_global_init(void) {
918
HMODULE ntdll;
919
FARPROC fn_ptr;
920
921
ntdll = GetModuleHandleW(L"ntdll.dll");
922
if (ntdll == NULL)
923
return -1;
924
925
#define X(return_type, attributes, name, parameters) \
926
fn_ptr = GetProcAddress(ntdll, #name); \
927
if (fn_ptr == NULL) \
928
return -1; \
929
name = (return_type(attributes*) parameters)(nt__fn_ptr_cast_t) fn_ptr;
930
NT_NTDLL_IMPORT_LIST(X)
931
#undef X
932
933
return 0;
934
}
935
936
#include <string.h>
937
938
typedef struct poll_group poll_group_t;
939
940
typedef struct queue_node queue_node_t;
941
942
WEPOLL_INTERNAL poll_group_t* poll_group_acquire(port_state_t* port);
943
WEPOLL_INTERNAL void poll_group_release(poll_group_t* poll_group);
944
945
WEPOLL_INTERNAL void poll_group_delete(poll_group_t* poll_group);
946
947
WEPOLL_INTERNAL poll_group_t* poll_group_from_queue_node(
948
queue_node_t* queue_node);
949
WEPOLL_INTERNAL HANDLE
950
poll_group_get_afd_device_handle(poll_group_t* poll_group);
951
952
typedef struct queue_node {
953
queue_node_t* prev;
954
queue_node_t* next;
955
} queue_node_t;
956
957
typedef struct queue {
958
queue_node_t head;
959
} queue_t;
960
961
WEPOLL_INTERNAL void queue_init(queue_t* queue);
962
WEPOLL_INTERNAL void queue_node_init(queue_node_t* node);
963
964
WEPOLL_INTERNAL queue_node_t* queue_first(const queue_t* queue);
965
WEPOLL_INTERNAL queue_node_t* queue_last(const queue_t* queue);
966
967
WEPOLL_INTERNAL void queue_prepend(queue_t* queue, queue_node_t* node);
968
WEPOLL_INTERNAL void queue_append(queue_t* queue, queue_node_t* node);
969
WEPOLL_INTERNAL void queue_move_to_start(queue_t* queue, queue_node_t* node);
970
WEPOLL_INTERNAL void queue_move_to_end(queue_t* queue, queue_node_t* node);
971
WEPOLL_INTERNAL void queue_remove(queue_node_t* node);
972
973
WEPOLL_INTERNAL bool queue_is_empty(const queue_t* queue);
974
WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t* node);
975
976
#define POLL_GROUP__MAX_GROUP_SIZE 32
977
978
typedef struct poll_group {
979
port_state_t* port_state;
980
queue_node_t queue_node;
981
HANDLE afd_device_handle;
982
size_t group_size;
983
} poll_group_t;
984
985
static poll_group_t* poll_group__new(port_state_t* port_state) {
986
HANDLE iocp_handle = port_get_iocp_handle(port_state);
987
queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
988
989
poll_group_t* poll_group = malloc(sizeof *poll_group);
990
if (poll_group == NULL)
991
return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
992
993
memset(poll_group, 0, sizeof *poll_group);
994
995
queue_node_init(&poll_group->queue_node);
996
poll_group->port_state = port_state;
997
998
if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) <
999
0) {
1000
free(poll_group);
1001
return NULL;
1002
}
1003
1004
queue_append(poll_group_queue, &poll_group->queue_node);
1005
1006
return poll_group;
1007
}
1008
1009
void poll_group_delete(poll_group_t* poll_group) {
1010
assert(poll_group->group_size == 0);
1011
CloseHandle(poll_group->afd_device_handle);
1012
queue_remove(&poll_group->queue_node);
1013
free(poll_group);
1014
}
1015
1016
poll_group_t* poll_group_from_queue_node(queue_node_t* queue_node) {
1017
return container_of(queue_node, poll_group_t, queue_node);
1018
}
1019
1020
HANDLE poll_group_get_afd_device_handle(poll_group_t* poll_group) {
1021
return poll_group->afd_device_handle;
1022
}
1023
1024
poll_group_t* poll_group_acquire(port_state_t* port_state) {
1025
queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
1026
poll_group_t* poll_group =
1027
!queue_is_empty(poll_group_queue)
1028
? container_of(
1029
queue_last(poll_group_queue), poll_group_t, queue_node)
1030
: NULL;
1031
1032
if (poll_group == NULL ||
1033
poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE)
1034
poll_group = poll_group__new(port_state);
1035
if (poll_group == NULL)
1036
return NULL;
1037
1038
if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE)
1039
queue_move_to_start(poll_group_queue, &poll_group->queue_node);
1040
1041
return poll_group;
1042
}
1043
1044
void poll_group_release(poll_group_t* poll_group) {
1045
port_state_t* port_state = poll_group->port_state;
1046
queue_t* poll_group_queue = port_get_poll_group_queue(port_state);
1047
1048
poll_group->group_size--;
1049
assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE);
1050
1051
queue_move_to_end(poll_group_queue, &poll_group->queue_node);
1052
1053
/* Poll groups are currently only freed when the epoll port is closed. */
1054
}
1055
1056
WEPOLL_INTERNAL sock_state_t* sock_new(port_state_t* port_state,
1057
SOCKET socket);
1058
WEPOLL_INTERNAL void sock_delete(port_state_t* port_state,
1059
sock_state_t* sock_state);
1060
WEPOLL_INTERNAL void sock_force_delete(port_state_t* port_state,
1061
sock_state_t* sock_state);
1062
1063
WEPOLL_INTERNAL int sock_set_event(port_state_t* port_state,
1064
sock_state_t* sock_state,
1065
const struct epoll_event* ev);
1066
1067
WEPOLL_INTERNAL int sock_update(port_state_t* port_state,
1068
sock_state_t* sock_state);
1069
WEPOLL_INTERNAL int sock_feed_event(port_state_t* port_state,
1070
IO_STATUS_BLOCK* io_status_block,
1071
struct epoll_event* ev);
1072
1073
WEPOLL_INTERNAL sock_state_t* sock_state_from_queue_node(
1074
queue_node_t* queue_node);
1075
WEPOLL_INTERNAL queue_node_t* sock_state_to_queue_node(
1076
sock_state_t* sock_state);
1077
WEPOLL_INTERNAL sock_state_t* sock_state_from_tree_node(
1078
tree_node_t* tree_node);
1079
WEPOLL_INTERNAL tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state);
1080
1081
#define PORT__MAX_ON_STACK_COMPLETIONS 256
1082
1083
typedef struct port_state {
1084
HANDLE iocp_handle;
1085
tree_t sock_tree;
1086
queue_t sock_update_queue;
1087
queue_t sock_deleted_queue;
1088
queue_t poll_group_queue;
1089
ts_tree_node_t handle_tree_node;
1090
CRITICAL_SECTION lock;
1091
size_t active_poll_count;
1092
} port_state_t;
1093
1094
static inline port_state_t* port__alloc(void) {
1095
port_state_t* port_state = malloc(sizeof *port_state);
1096
if (port_state == NULL)
1097
return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
1098
1099
return port_state;
1100
}
1101
1102
static inline void port__free(port_state_t* port) {
1103
assert(port != NULL);
1104
free(port);
1105
}
1106
1107
static inline HANDLE port__create_iocp(void) {
1108
HANDLE iocp_handle =
1109
CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
1110
if (iocp_handle == NULL)
1111
return_map_error(NULL);
1112
1113
return iocp_handle;
1114
}
1115
1116
port_state_t* port_new(HANDLE* iocp_handle_out) {
1117
port_state_t* port_state;
1118
HANDLE iocp_handle;
1119
1120
port_state = port__alloc();
1121
if (port_state == NULL)
1122
goto err1;
1123
1124
iocp_handle = port__create_iocp();
1125
if (iocp_handle == NULL)
1126
goto err2;
1127
1128
memset(port_state, 0, sizeof *port_state);
1129
1130
port_state->iocp_handle = iocp_handle;
1131
tree_init(&port_state->sock_tree);
1132
queue_init(&port_state->sock_update_queue);
1133
queue_init(&port_state->sock_deleted_queue);
1134
queue_init(&port_state->poll_group_queue);
1135
ts_tree_node_init(&port_state->handle_tree_node);
1136
InitializeCriticalSection(&port_state->lock);
1137
1138
*iocp_handle_out = iocp_handle;
1139
return port_state;
1140
1141
err2:
1142
port__free(port_state);
1143
err1:
1144
return NULL;
1145
}
1146
1147
static inline int port__close_iocp(port_state_t* port_state) {
1148
HANDLE iocp_handle = port_state->iocp_handle;
1149
port_state->iocp_handle = NULL;
1150
1151
if (!CloseHandle(iocp_handle))
1152
return_map_error(-1);
1153
1154
return 0;
1155
}
1156
1157
int port_close(port_state_t* port_state) {
1158
int result;
1159
1160
EnterCriticalSection(&port_state->lock);
1161
result = port__close_iocp(port_state);
1162
LeaveCriticalSection(&port_state->lock);
1163
1164
return result;
1165
}
1166
1167
int port_delete(port_state_t* port_state) {
1168
tree_node_t* tree_node;
1169
queue_node_t* queue_node;
1170
1171
/* At this point the IOCP port should have been closed. */
1172
assert(port_state->iocp_handle == NULL);
1173
1174
while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) {
1175
sock_state_t* sock_state = sock_state_from_tree_node(tree_node);
1176
sock_force_delete(port_state, sock_state);
1177
}
1178
1179
while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) {
1180
sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
1181
sock_force_delete(port_state, sock_state);
1182
}
1183
1184
while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) {
1185
poll_group_t* poll_group = poll_group_from_queue_node(queue_node);
1186
poll_group_delete(poll_group);
1187
}
1188
1189
assert(queue_is_empty(&port_state->sock_update_queue));
1190
1191
DeleteCriticalSection(&port_state->lock);
1192
1193
port__free(port_state);
1194
1195
return 0;
1196
}
1197
1198
static int port__update_events(port_state_t* port_state) {
1199
queue_t* sock_update_queue = &port_state->sock_update_queue;
1200
1201
/* Walk the queue, submitting new poll requests for every socket that needs
1202
* it. */
1203
while (!queue_is_empty(sock_update_queue)) {
1204
queue_node_t* queue_node = queue_first(sock_update_queue);
1205
sock_state_t* sock_state = sock_state_from_queue_node(queue_node);
1206
1207
if (sock_update(port_state, sock_state) < 0)
1208
return -1;
1209
1210
/* sock_update() removes the socket from the update queue. */
1211
}
1212
1213
return 0;
1214
}
1215
1216
static inline void port__update_events_if_polling(port_state_t* port_state) {
1217
if (port_state->active_poll_count > 0)
1218
port__update_events(port_state);
1219
}
1220
1221
static inline int port__feed_events(port_state_t* port_state,
1222
struct epoll_event* epoll_events,
1223
OVERLAPPED_ENTRY* iocp_events,
1224
DWORD iocp_event_count) {
1225
int epoll_event_count = 0;
1226
DWORD i;
1227
1228
for (i = 0; i < iocp_event_count; i++) {
1229
IO_STATUS_BLOCK* io_status_block =
1230
(IO_STATUS_BLOCK*) iocp_events[i].lpOverlapped;
1231
struct epoll_event* ev = &epoll_events[epoll_event_count];
1232
1233
epoll_event_count += sock_feed_event(port_state, io_status_block, ev);
1234
}
1235
1236
return epoll_event_count;
1237
}
1238
1239
static inline int port__poll(port_state_t* port_state,
1240
struct epoll_event* epoll_events,
1241
OVERLAPPED_ENTRY* iocp_events,
1242
DWORD maxevents,
1243
DWORD timeout) {
1244
DWORD completion_count;
1245
1246
if (port__update_events(port_state) < 0)
1247
return -1;
1248
1249
port_state->active_poll_count++;
1250
1251
LeaveCriticalSection(&port_state->lock);
1252
1253
BOOL r = GetQueuedCompletionStatusEx(port_state->iocp_handle,
1254
iocp_events,
1255
maxevents,
1256
&completion_count,
1257
timeout,
1258
FALSE);
1259
1260
EnterCriticalSection(&port_state->lock);
1261
1262
port_state->active_poll_count--;
1263
1264
if (!r)
1265
return_map_error(-1);
1266
1267
return port__feed_events(
1268
port_state, epoll_events, iocp_events, completion_count);
1269
}
1270
1271
int port_wait(port_state_t* port_state,
1272
struct epoll_event* events,
1273
int maxevents,
1274
int timeout) {
1275
OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS];
1276
OVERLAPPED_ENTRY* iocp_events;
1277
uint64_t due = 0;
1278
DWORD gqcs_timeout;
1279
int result;
1280
1281
/* Check whether `maxevents` is in range. */
1282
if (maxevents <= 0)
1283
return_set_error(-1, ERROR_INVALID_PARAMETER);
1284
1285
/* Decide whether the IOCP completion list can live on the stack, or allocate
1286
* memory for it on the heap. */
1287
if ((size_t) maxevents <= array_count(stack_iocp_events)) {
1288
iocp_events = stack_iocp_events;
1289
} else if ((iocp_events =
1290
malloc((size_t) maxevents * sizeof *iocp_events)) == NULL) {
1291
iocp_events = stack_iocp_events;
1292
maxevents = array_count(stack_iocp_events);
1293
}
1294
1295
/* Compute the timeout for GetQueuedCompletionStatus, and the wait end
1296
* time, if the user specified a timeout other than zero or infinite. */
1297
if (timeout > 0) {
1298
due = GetTickCount64() + (uint64_t) timeout;
1299
gqcs_timeout = (DWORD) timeout;
1300
} else if (timeout == 0) {
1301
gqcs_timeout = 0;
1302
} else {
1303
gqcs_timeout = INFINITE;
1304
}
1305
1306
EnterCriticalSection(&port_state->lock);
1307
1308
/* Dequeue completion packets until either at least one interesting event
1309
* has been discovered, or the timeout is reached. */
1310
for (;;) {
1311
uint64_t now;
1312
1313
result = port__poll(
1314
port_state, events, iocp_events, (DWORD) maxevents, gqcs_timeout);
1315
if (result < 0 || result > 0)
1316
break; /* Result, error, or time-out. */
1317
1318
if (timeout < 0)
1319
continue; /* When timeout is negative, never time out. */
1320
1321
/* Update time. */
1322
now = GetTickCount64();
1323
1324
/* Do not allow the due time to be in the past. */
1325
if (now >= due) {
1326
SetLastError(WAIT_TIMEOUT);
1327
break;
1328
}
1329
1330
/* Recompute time-out argument for GetQueuedCompletionStatus. */
1331
gqcs_timeout = (DWORD)(due - now);
1332
}
1333
1334
port__update_events_if_polling(port_state);
1335
1336
LeaveCriticalSection(&port_state->lock);
1337
1338
if (iocp_events != stack_iocp_events)
1339
free(iocp_events);
1340
1341
if (result >= 0)
1342
return result;
1343
else if (GetLastError() == WAIT_TIMEOUT)
1344
return 0;
1345
else
1346
return -1;
1347
}
1348
1349
static inline int port__ctl_add(port_state_t* port_state,
1350
SOCKET sock,
1351
struct epoll_event* ev) {
1352
sock_state_t* sock_state = sock_new(port_state, sock);
1353
if (sock_state == NULL)
1354
return -1;
1355
1356
if (sock_set_event(port_state, sock_state, ev) < 0) {
1357
sock_delete(port_state, sock_state);
1358
return -1;
1359
}
1360
1361
port__update_events_if_polling(port_state);
1362
1363
return 0;
1364
}
1365
1366
static inline int port__ctl_mod(port_state_t* port_state,
1367
SOCKET sock,
1368
struct epoll_event* ev) {
1369
sock_state_t* sock_state = port_find_socket(port_state, sock);
1370
if (sock_state == NULL)
1371
return -1;
1372
1373
if (sock_set_event(port_state, sock_state, ev) < 0)
1374
return -1;
1375
1376
port__update_events_if_polling(port_state);
1377
1378
return 0;
1379
}
1380
1381
static inline int port__ctl_del(port_state_t* port_state, SOCKET sock) {
1382
sock_state_t* sock_state = port_find_socket(port_state, sock);
1383
if (sock_state == NULL)
1384
return -1;
1385
1386
sock_delete(port_state, sock_state);
1387
1388
return 0;
1389
}
1390
1391
static inline int port__ctl_op(port_state_t* port_state,
1392
int op,
1393
SOCKET sock,
1394
struct epoll_event* ev) {
1395
switch (op) {
1396
case EPOLL_CTL_ADD:
1397
return port__ctl_add(port_state, sock, ev);
1398
case EPOLL_CTL_MOD:
1399
return port__ctl_mod(port_state, sock, ev);
1400
case EPOLL_CTL_DEL:
1401
return port__ctl_del(port_state, sock);
1402
default:
1403
return_set_error(-1, ERROR_INVALID_PARAMETER);
1404
}
1405
}
1406
1407
int port_ctl(port_state_t* port_state,
1408
int op,
1409
SOCKET sock,
1410
struct epoll_event* ev) {
1411
int result;
1412
1413
EnterCriticalSection(&port_state->lock);
1414
result = port__ctl_op(port_state, op, sock, ev);
1415
LeaveCriticalSection(&port_state->lock);
1416
1417
return result;
1418
}
1419
1420
int port_register_socket(port_state_t* port_state,
1421
sock_state_t* sock_state,
1422
SOCKET socket) {
1423
if (tree_add(&port_state->sock_tree,
1424
sock_state_to_tree_node(sock_state),
1425
socket) < 0)
1426
return_set_error(-1, ERROR_ALREADY_EXISTS);
1427
return 0;
1428
}
1429
1430
void port_unregister_socket(port_state_t* port_state,
1431
sock_state_t* sock_state) {
1432
tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state));
1433
}
1434
1435
sock_state_t* port_find_socket(port_state_t* port_state, SOCKET socket) {
1436
tree_node_t* tree_node = tree_find(&port_state->sock_tree, socket);
1437
if (tree_node == NULL)
1438
return_set_error(NULL, ERROR_NOT_FOUND);
1439
return sock_state_from_tree_node(tree_node);
1440
}
1441
1442
void port_request_socket_update(port_state_t* port_state,
1443
sock_state_t* sock_state) {
1444
if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
1445
return;
1446
queue_append(&port_state->sock_update_queue,
1447
sock_state_to_queue_node(sock_state));
1448
}
1449
1450
void port_cancel_socket_update(port_state_t* port_state,
1451
sock_state_t* sock_state) {
1452
unused_var(port_state);
1453
if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
1454
return;
1455
queue_remove(sock_state_to_queue_node(sock_state));
1456
}
1457
1458
void port_add_deleted_socket(port_state_t* port_state,
1459
sock_state_t* sock_state) {
1460
if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
1461
return;
1462
queue_append(&port_state->sock_deleted_queue,
1463
sock_state_to_queue_node(sock_state));
1464
}
1465
1466
void port_remove_deleted_socket(port_state_t* port_state,
1467
sock_state_t* sock_state) {
1468
unused_var(port_state);
1469
if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
1470
return;
1471
queue_remove(sock_state_to_queue_node(sock_state));
1472
}
1473
1474
HANDLE port_get_iocp_handle(port_state_t* port_state) {
1475
assert(port_state->iocp_handle != NULL);
1476
return port_state->iocp_handle;
1477
}
1478
1479
queue_t* port_get_poll_group_queue(port_state_t* port_state) {
1480
return &port_state->poll_group_queue;
1481
}
1482
1483
port_state_t* port_state_from_handle_tree_node(ts_tree_node_t* tree_node) {
1484
return container_of(tree_node, port_state_t, handle_tree_node);
1485
}
1486
1487
ts_tree_node_t* port_state_to_handle_tree_node(port_state_t* port_state) {
1488
return &port_state->handle_tree_node;
1489
}
1490
1491
void queue_init(queue_t* queue) {
1492
queue_node_init(&queue->head);
1493
}
1494
1495
void queue_node_init(queue_node_t* node) {
1496
node->prev = node;
1497
node->next = node;
1498
}
1499
1500
static inline void queue__detach_node(queue_node_t* node) {
1501
node->prev->next = node->next;
1502
node->next->prev = node->prev;
1503
}
1504
1505
queue_node_t* queue_first(const queue_t* queue) {
1506
return !queue_is_empty(queue) ? queue->head.next : NULL;
1507
}
1508
1509
queue_node_t* queue_last(const queue_t* queue) {
1510
return !queue_is_empty(queue) ? queue->head.prev : NULL;
1511
}
1512
1513
void queue_prepend(queue_t* queue, queue_node_t* node) {
1514
node->next = queue->head.next;
1515
node->prev = &queue->head;
1516
node->next->prev = node;
1517
queue->head.next = node;
1518
}
1519
1520
void queue_append(queue_t* queue, queue_node_t* node) {
1521
node->next = &queue->head;
1522
node->prev = queue->head.prev;
1523
node->prev->next = node;
1524
queue->head.prev = node;
1525
}
1526
1527
void queue_move_to_start(queue_t* queue, queue_node_t* node) {
1528
queue__detach_node(node);
1529
queue_prepend(queue, node);
1530
}
1531
1532
void queue_move_to_end(queue_t* queue, queue_node_t* node) {
1533
queue__detach_node(node);
1534
queue_append(queue, node);
1535
}
1536
1537
void queue_remove(queue_node_t* node) {
1538
queue__detach_node(node);
1539
queue_node_init(node);
1540
}
1541
1542
bool queue_is_empty(const queue_t* queue) {
1543
return !queue_is_enqueued(&queue->head);
1544
}
1545
1546
bool queue_is_enqueued(const queue_node_t* node) {
1547
return node->prev != node;
1548
}
1549
1550
#define REFLOCK__REF ((long) 0x00000001UL)
1551
#define REFLOCK__REF_MASK ((long) 0x0fffffffUL)
1552
#define REFLOCK__DESTROY ((long) 0x10000000UL)
1553
#define REFLOCK__DESTROY_MASK ((long) 0xf0000000UL)
1554
#define REFLOCK__POISON ((long) 0x300dead0UL)
1555
1556
static HANDLE reflock__keyed_event = NULL;
1557
1558
int reflock_global_init(void) {
1559
NTSTATUS status = NtCreateKeyedEvent(
1560
&reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0);
1561
if (status != STATUS_SUCCESS)
1562
return_set_error(-1, RtlNtStatusToDosError(status));
1563
return 0;
1564
}
1565
1566
void reflock_init(reflock_t* reflock) {
1567
reflock->state = 0;
1568
}
1569
1570
static void reflock__signal_event(void* address) {
1571
NTSTATUS status =
1572
NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
1573
if (status != STATUS_SUCCESS)
1574
abort();
1575
}
1576
1577
static void reflock__await_event(void* address) {
1578
NTSTATUS status =
1579
NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
1580
if (status != STATUS_SUCCESS)
1581
abort();
1582
}
1583
1584
void reflock_ref(reflock_t* reflock) {
1585
long state = InterlockedAdd(&reflock->state, REFLOCK__REF);
1586
1587
/* Verify that the counter didn't overflow and the lock isn't destroyed. */
1588
assert((state & REFLOCK__DESTROY_MASK) == 0);
1589
unused_var(state);
1590
}
1591
1592
void reflock_unref(reflock_t* reflock) {
1593
long state = InterlockedAdd(&reflock->state, -REFLOCK__REF);
1594
1595
/* Verify that the lock was referenced and not already destroyed. */
1596
assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0);
1597
1598
if (state == REFLOCK__DESTROY)
1599
reflock__signal_event(reflock);
1600
}
1601
1602
void reflock_unref_and_destroy(reflock_t* reflock) {
1603
long state =
1604
InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF);
1605
long ref_count = state & REFLOCK__REF_MASK;
1606
1607
/* Verify that the lock was referenced and not already destroyed. */
1608
assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY);
1609
1610
if (ref_count != 0)
1611
reflock__await_event(reflock);
1612
1613
state = InterlockedExchange(&reflock->state, REFLOCK__POISON);
1614
assert(state == REFLOCK__DESTROY);
1615
}
1616
1617
#define SOCK__KNOWN_EPOLL_EVENTS \
1618
(EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | \
1619
EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND | EPOLLMSG | EPOLLRDHUP)
1620
1621
typedef enum sock__poll_status {
1622
SOCK__POLL_IDLE = 0,
1623
SOCK__POLL_PENDING,
1624
SOCK__POLL_CANCELLED
1625
} sock__poll_status_t;
1626
1627
typedef struct sock_state {
1628
IO_STATUS_BLOCK io_status_block;
1629
AFD_POLL_INFO poll_info;
1630
queue_node_t queue_node;
1631
tree_node_t tree_node;
1632
poll_group_t* poll_group;
1633
SOCKET base_socket;
1634
epoll_data_t user_data;
1635
uint32_t user_events;
1636
uint32_t pending_events;
1637
sock__poll_status_t poll_status;
1638
bool delete_pending;
1639
} sock_state_t;
1640
1641
static inline sock_state_t* sock__alloc(void) {
1642
sock_state_t* sock_state = malloc(sizeof *sock_state);
1643
if (sock_state == NULL)
1644
return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
1645
return sock_state;
1646
}
1647
1648
static inline void sock__free(sock_state_t* sock_state) {
1649
assert(sock_state != NULL);
1650
free(sock_state);
1651
}
1652
1653
static inline int sock__cancel_poll(sock_state_t* sock_state) {
1654
assert(sock_state->poll_status == SOCK__POLL_PENDING);
1655
1656
if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group),
1657
&sock_state->io_status_block) < 0)
1658
return -1;
1659
1660
sock_state->poll_status = SOCK__POLL_CANCELLED;
1661
sock_state->pending_events = 0;
1662
return 0;
1663
}
1664
1665
sock_state_t* sock_new(port_state_t* port_state, SOCKET socket) {
1666
SOCKET base_socket;
1667
poll_group_t* poll_group;
1668
sock_state_t* sock_state;
1669
1670
if (socket == 0 || socket == INVALID_SOCKET)
1671
return_set_error(NULL, ERROR_INVALID_HANDLE);
1672
1673
base_socket = ws_get_base_socket(socket);
1674
if (base_socket == INVALID_SOCKET)
1675
return NULL;
1676
1677
poll_group = poll_group_acquire(port_state);
1678
if (poll_group == NULL)
1679
return NULL;
1680
1681
sock_state = sock__alloc();
1682
if (sock_state == NULL)
1683
goto err1;
1684
1685
memset(sock_state, 0, sizeof *sock_state);
1686
1687
sock_state->base_socket = base_socket;
1688
sock_state->poll_group = poll_group;
1689
1690
tree_node_init(&sock_state->tree_node);
1691
queue_node_init(&sock_state->queue_node);
1692
1693
if (port_register_socket(port_state, sock_state, socket) < 0)
1694
goto err2;
1695
1696
return sock_state;
1697
1698
err2:
1699
sock__free(sock_state);
1700
err1:
1701
poll_group_release(poll_group);
1702
1703
return NULL;
1704
}
1705
1706
static int sock__delete(port_state_t* port_state,
1707
sock_state_t* sock_state,
1708
bool force) {
1709
if (!sock_state->delete_pending) {
1710
if (sock_state->poll_status == SOCK__POLL_PENDING)
1711
sock__cancel_poll(sock_state);
1712
1713
port_cancel_socket_update(port_state, sock_state);
1714
port_unregister_socket(port_state, sock_state);
1715
1716
sock_state->delete_pending = true;
1717
}
1718
1719
/* If the poll request still needs to complete, the sock_state object can't
1720
* be free()d yet. `sock_feed_event()` or `port_close()` will take care
1721
* of this later. */
1722
if (force || sock_state->poll_status == SOCK__POLL_IDLE) {
1723
/* Free the sock_state now. */
1724
port_remove_deleted_socket(port_state, sock_state);
1725
poll_group_release(sock_state->poll_group);
1726
sock__free(sock_state);
1727
} else {
1728
/* Free the socket later. */
1729
port_add_deleted_socket(port_state, sock_state);
1730
}
1731
1732
return 0;
1733
}
1734
1735
void sock_delete(port_state_t* port_state, sock_state_t* sock_state) {
1736
sock__delete(port_state, sock_state, false);
1737
}
1738
1739
void sock_force_delete(port_state_t* port_state, sock_state_t* sock_state) {
1740
sock__delete(port_state, sock_state, true);
1741
}
1742
1743
int sock_set_event(port_state_t* port_state,
1744
sock_state_t* sock_state,
1745
const struct epoll_event* ev) {
1746
/* EPOLLERR and EPOLLHUP are always reported, even when not requested by the
1747
* caller. However they are disabled after a event has been reported for a
1748
* socket for which the EPOLLONESHOT flag was set. */
1749
uint32_t events = ev->events | EPOLLERR | EPOLLHUP;
1750
1751
sock_state->user_events = events;
1752
sock_state->user_data = ev->data;
1753
1754
if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0)
1755
port_request_socket_update(port_state, sock_state);
1756
1757
return 0;
1758
}
1759
1760
static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) {
1761
/* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the
1762
* socket is closed with closesocket() or CloseHandle(). */
1763
DWORD afd_events = AFD_POLL_LOCAL_CLOSE;
1764
1765
if (epoll_events & (EPOLLIN | EPOLLRDNORM))
1766
afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT;
1767
if (epoll_events & (EPOLLPRI | EPOLLRDBAND))
1768
afd_events |= AFD_POLL_RECEIVE_EXPEDITED;
1769
if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND))
1770
afd_events |= AFD_POLL_SEND;
1771
if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP))
1772
afd_events |= AFD_POLL_DISCONNECT;
1773
if (epoll_events & EPOLLHUP)
1774
afd_events |= AFD_POLL_ABORT;
1775
if (epoll_events & EPOLLERR)
1776
afd_events |= AFD_POLL_CONNECT_FAIL;
1777
1778
return afd_events;
1779
}
1780
1781
static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) {
1782
uint32_t epoll_events = 0;
1783
1784
if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT))
1785
epoll_events |= EPOLLIN | EPOLLRDNORM;
1786
if (afd_events & AFD_POLL_RECEIVE_EXPEDITED)
1787
epoll_events |= EPOLLPRI | EPOLLRDBAND;
1788
if (afd_events & AFD_POLL_SEND)
1789
epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1790
if (afd_events & AFD_POLL_DISCONNECT)
1791
epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1792
if (afd_events & AFD_POLL_ABORT)
1793
epoll_events |= EPOLLHUP;
1794
if (afd_events & AFD_POLL_CONNECT_FAIL)
1795
/* Linux reports all these events after connect() has failed. */
1796
epoll_events |=
1797
EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP;
1798
1799
return epoll_events;
1800
}
1801
1802
int sock_update(port_state_t* port_state, sock_state_t* sock_state) {
1803
assert(!sock_state->delete_pending);
1804
1805
if ((sock_state->poll_status == SOCK__POLL_PENDING) &&
1806
(sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS &
1807
~sock_state->pending_events) == 0) {
1808
/* All the events the user is interested in are already being monitored by
1809
* the pending poll operation. It might spuriously complete because of an
1810
* event that we're no longer interested in; when that happens we'll submit
1811
* a new poll operation with the updated event mask. */
1812
1813
} else if (sock_state->poll_status == SOCK__POLL_PENDING) {
1814
/* A poll operation is already pending, but it's not monitoring for all the
1815
* events that the user is interested in. Therefore, cancel the pending
1816
* poll operation; when we receive it's completion package, a new poll
1817
* operation will be submitted with the correct event mask. */
1818
if (sock__cancel_poll(sock_state) < 0)
1819
return -1;
1820
1821
} else if (sock_state->poll_status == SOCK__POLL_CANCELLED) {
1822
/* The poll operation has already been cancelled, we're still waiting for
1823
* it to return. For now, there's nothing that needs to be done. */
1824
1825
} else if (sock_state->poll_status == SOCK__POLL_IDLE) {
1826
/* No poll operation is pending; start one. */
1827
sock_state->poll_info.Exclusive = FALSE;
1828
sock_state->poll_info.NumberOfHandles = 1;
1829
sock_state->poll_info.Timeout.QuadPart = INT64_MAX;
1830
sock_state->poll_info.Handles[0].Handle = (HANDLE) sock_state->base_socket;
1831
sock_state->poll_info.Handles[0].Status = 0;
1832
sock_state->poll_info.Handles[0].Events =
1833
sock__epoll_events_to_afd_events(sock_state->user_events);
1834
1835
if (afd_poll(poll_group_get_afd_device_handle(sock_state->poll_group),
1836
&sock_state->poll_info,
1837
&sock_state->io_status_block) < 0) {
1838
switch (GetLastError()) {
1839
case ERROR_IO_PENDING:
1840
/* Overlapped poll operation in progress; this is expected. */
1841
break;
1842
case ERROR_INVALID_HANDLE:
1843
/* Socket closed; it'll be dropped from the epoll set. */
1844
return sock__delete(port_state, sock_state, false);
1845
default:
1846
/* Other errors are propagated to the caller. */
1847
return_map_error(-1);
1848
}
1849
}
1850
1851
/* The poll request was successfully submitted. */
1852
sock_state->poll_status = SOCK__POLL_PENDING;
1853
sock_state->pending_events = sock_state->user_events;
1854
1855
} else {
1856
/* Unreachable. */
1857
assert(false);
1858
}
1859
1860
port_cancel_socket_update(port_state, sock_state);
1861
return 0;
1862
}
1863
1864
int sock_feed_event(port_state_t* port_state,
1865
IO_STATUS_BLOCK* io_status_block,
1866
struct epoll_event* ev) {
1867
sock_state_t* sock_state =
1868
container_of(io_status_block, sock_state_t, io_status_block);
1869
AFD_POLL_INFO* poll_info = &sock_state->poll_info;
1870
uint32_t epoll_events = 0;
1871
1872
sock_state->poll_status = SOCK__POLL_IDLE;
1873
sock_state->pending_events = 0;
1874
1875
if (sock_state->delete_pending) {
1876
/* Socket has been deleted earlier and can now be freed. */
1877
return sock__delete(port_state, sock_state, false);
1878
1879
} else if (io_status_block->Status == STATUS_CANCELLED) {
1880
/* The poll request was cancelled by CancelIoEx. */
1881
1882
} else if (!NT_SUCCESS(io_status_block->Status)) {
1883
/* The overlapped request itself failed in an unexpected way. */
1884
epoll_events = EPOLLERR;
1885
1886
} else if (poll_info->NumberOfHandles < 1) {
1887
/* This poll operation succeeded but didn't report any socket events. */
1888
1889
} else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
1890
/* The poll operation reported that the socket was closed. */
1891
return sock__delete(port_state, sock_state, false);
1892
1893
} else {
1894
/* Events related to our socket were reported. */
1895
epoll_events =
1896
sock__afd_events_to_epoll_events(poll_info->Handles[0].Events);
1897
}
1898
1899
/* Requeue the socket so a new poll request will be submitted. */
1900
port_request_socket_update(port_state, sock_state);
1901
1902
/* Filter out events that the user didn't ask for. */
1903
epoll_events &= sock_state->user_events;
1904
1905
/* Return if there are no epoll events to report. */
1906
if (epoll_events == 0)
1907
return 0;
1908
1909
/* If the the socket has the EPOLLONESHOT flag set, unmonitor all events,
1910
* even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */
1911
if (sock_state->user_events & EPOLLONESHOT)
1912
sock_state->user_events = 0;
1913
1914
ev->data = sock_state->user_data;
1915
ev->events = epoll_events;
1916
return 1;
1917
}
1918
1919
sock_state_t* sock_state_from_queue_node(queue_node_t* queue_node) {
1920
return container_of(queue_node, sock_state_t, queue_node);
1921
}
1922
1923
queue_node_t* sock_state_to_queue_node(sock_state_t* sock_state) {
1924
return &sock_state->queue_node;
1925
}
1926
1927
sock_state_t* sock_state_from_tree_node(tree_node_t* tree_node) {
1928
return container_of(tree_node, sock_state_t, tree_node);
1929
}
1930
1931
tree_node_t* sock_state_to_tree_node(sock_state_t* sock_state) {
1932
return &sock_state->tree_node;
1933
}
1934
1935
void ts_tree_init(ts_tree_t* ts_tree) {
1936
tree_init(&ts_tree->tree);
1937
InitializeSRWLock(&ts_tree->lock);
1938
}
1939
1940
void ts_tree_node_init(ts_tree_node_t* node) {
1941
tree_node_init(&node->tree_node);
1942
reflock_init(&node->reflock);
1943
}
1944
1945
int ts_tree_add(ts_tree_t* ts_tree, ts_tree_node_t* node, uintptr_t key) {
1946
int r;
1947
1948
AcquireSRWLockExclusive(&ts_tree->lock);
1949
r = tree_add(&ts_tree->tree, &node->tree_node, key);
1950
ReleaseSRWLockExclusive(&ts_tree->lock);
1951
1952
return r;
1953
}
1954
1955
static inline ts_tree_node_t* ts_tree__find_node(ts_tree_t* ts_tree,
1956
uintptr_t key) {
1957
tree_node_t* tree_node = tree_find(&ts_tree->tree, key);
1958
if (tree_node == NULL)
1959
return NULL;
1960
1961
return container_of(tree_node, ts_tree_node_t, tree_node);
1962
}
1963
1964
ts_tree_node_t* ts_tree_del_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
1965
ts_tree_node_t* ts_tree_node;
1966
1967
AcquireSRWLockExclusive(&ts_tree->lock);
1968
1969
ts_tree_node = ts_tree__find_node(ts_tree, key);
1970
if (ts_tree_node != NULL) {
1971
tree_del(&ts_tree->tree, &ts_tree_node->tree_node);
1972
reflock_ref(&ts_tree_node->reflock);
1973
}
1974
1975
ReleaseSRWLockExclusive(&ts_tree->lock);
1976
1977
return ts_tree_node;
1978
}
1979
1980
ts_tree_node_t* ts_tree_find_and_ref(ts_tree_t* ts_tree, uintptr_t key) {
1981
ts_tree_node_t* ts_tree_node;
1982
1983
AcquireSRWLockShared(&ts_tree->lock);
1984
1985
ts_tree_node = ts_tree__find_node(ts_tree, key);
1986
if (ts_tree_node != NULL)
1987
reflock_ref(&ts_tree_node->reflock);
1988
1989
ReleaseSRWLockShared(&ts_tree->lock);
1990
1991
return ts_tree_node;
1992
}
1993
1994
void ts_tree_node_unref(ts_tree_node_t* node) {
1995
reflock_unref(&node->reflock);
1996
}
1997
1998
void ts_tree_node_unref_and_destroy(ts_tree_node_t* node) {
1999
reflock_unref_and_destroy(&node->reflock);
2000
}
2001
2002
void tree_init(tree_t* tree) {
2003
memset(tree, 0, sizeof *tree);
2004
}
2005
2006
void tree_node_init(tree_node_t* node) {
2007
memset(node, 0, sizeof *node);
2008
}
2009
2010
#define TREE__ROTATE(cis, trans) \
2011
tree_node_t* p = node; \
2012
tree_node_t* q = node->trans; \
2013
tree_node_t* parent = p->parent; \
2014
\
2015
if (parent) { \
2016
if (parent->left == p) \
2017
parent->left = q; \
2018
else \
2019
parent->right = q; \
2020
} else { \
2021
tree->root = q; \
2022
} \
2023
\
2024
q->parent = parent; \
2025
p->parent = q; \
2026
p->trans = q->cis; \
2027
if (p->trans) \
2028
p->trans->parent = p; \
2029
q->cis = p;
2030
2031
static inline void tree__rotate_left(tree_t* tree, tree_node_t* node) {
2032
TREE__ROTATE(left, right)
2033
}
2034
2035
static inline void tree__rotate_right(tree_t* tree, tree_node_t* node) {
2036
TREE__ROTATE(right, left)
2037
}
2038
2039
#define TREE__INSERT_OR_DESCEND(side) \
2040
if (parent->side) { \
2041
parent = parent->side; \
2042
} else { \
2043
parent->side = node; \
2044
break; \
2045
}
2046
2047
#define TREE__REBALANCE_AFTER_INSERT(cis, trans) \
2048
tree_node_t* grandparent = parent->parent; \
2049
tree_node_t* uncle = grandparent->trans; \
2050
\
2051
if (uncle && uncle->red) { \
2052
parent->red = uncle->red = false; \
2053
grandparent->red = true; \
2054
node = grandparent; \
2055
} else { \
2056
if (node == parent->trans) { \
2057
tree__rotate_##cis(tree, parent); \
2058
node = parent; \
2059
parent = node->parent; \
2060
} \
2061
parent->red = false; \
2062
grandparent->red = true; \
2063
tree__rotate_##trans(tree, grandparent); \
2064
}
2065
2066
int tree_add(tree_t* tree, tree_node_t* node, uintptr_t key) {
2067
tree_node_t* parent;
2068
2069
parent = tree->root;
2070
if (parent) {
2071
for (;;) {
2072
if (key < parent->key) {
2073
TREE__INSERT_OR_DESCEND(left)
2074
} else if (key > parent->key) {
2075
TREE__INSERT_OR_DESCEND(right)
2076
} else {
2077
return -1;
2078
}
2079
}
2080
} else {
2081
tree->root = node;
2082
}
2083
2084
node->key = key;
2085
node->left = node->right = NULL;
2086
node->parent = parent;
2087
node->red = true;
2088
2089
for (; parent && parent->red; parent = node->parent) {
2090
if (parent == parent->parent->left) {
2091
TREE__REBALANCE_AFTER_INSERT(left, right)
2092
} else {
2093
TREE__REBALANCE_AFTER_INSERT(right, left)
2094
}
2095
}
2096
tree->root->red = false;
2097
2098
return 0;
2099
}
2100
2101
#define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \
2102
tree_node_t* sibling = parent->trans; \
2103
\
2104
if (sibling->red) { \
2105
sibling->red = false; \
2106
parent->red = true; \
2107
tree__rotate_##cis(tree, parent); \
2108
sibling = parent->trans; \
2109
} \
2110
if ((sibling->left && sibling->left->red) || \
2111
(sibling->right && sibling->right->red)) { \
2112
if (!sibling->trans || !sibling->trans->red) { \
2113
sibling->cis->red = false; \
2114
sibling->red = true; \
2115
tree__rotate_##trans(tree, sibling); \
2116
sibling = parent->trans; \
2117
} \
2118
sibling->red = parent->red; \
2119
parent->red = sibling->trans->red = false; \
2120
tree__rotate_##cis(tree, parent); \
2121
node = tree->root; \
2122
break; \
2123
} \
2124
sibling->red = true;
2125
2126
void tree_del(tree_t* tree, tree_node_t* node) {
2127
tree_node_t* parent = node->parent;
2128
tree_node_t* left = node->left;
2129
tree_node_t* right = node->right;
2130
tree_node_t* next;
2131
bool red;
2132
2133
if (!left) {
2134
next = right;
2135
} else if (!right) {
2136
next = left;
2137
} else {
2138
next = right;
2139
while (next->left)
2140
next = next->left;
2141
}
2142
2143
if (parent) {
2144
if (parent->left == node)
2145
parent->left = next;
2146
else
2147
parent->right = next;
2148
} else {
2149
tree->root = next;
2150
}
2151
2152
if (left && right) {
2153
red = next->red;
2154
next->red = node->red;
2155
next->left = left;
2156
left->parent = next;
2157
if (next != right) {
2158
parent = next->parent;
2159
next->parent = node->parent;
2160
node = next->right;
2161
parent->left = node;
2162
next->right = right;
2163
right->parent = next;
2164
} else {
2165
next->parent = parent;
2166
parent = next;
2167
node = next->right;
2168
}
2169
} else {
2170
red = node->red;
2171
node = next;
2172
}
2173
2174
if (node)
2175
node->parent = parent;
2176
if (red)
2177
return;
2178
if (node && node->red) {
2179
node->red = false;
2180
return;
2181
}
2182
2183
do {
2184
if (node == tree->root)
2185
break;
2186
if (node == parent->left) {
2187
TREE__REBALANCE_AFTER_REMOVE(left, right)
2188
} else {
2189
TREE__REBALANCE_AFTER_REMOVE(right, left)
2190
}
2191
node = parent;
2192
parent = parent->parent;
2193
} while (!node->red);
2194
2195
if (node)
2196
node->red = false;
2197
}
2198
2199
tree_node_t* tree_find(const tree_t* tree, uintptr_t key) {
2200
tree_node_t* node = tree->root;
2201
while (node) {
2202
if (key < node->key)
2203
node = node->left;
2204
else if (key > node->key)
2205
node = node->right;
2206
else
2207
return node;
2208
}
2209
return NULL;
2210
}
2211
2212
tree_node_t* tree_root(const tree_t* tree) {
2213
return tree->root;
2214
}
2215
2216
#ifndef SIO_BSP_HANDLE_POLL
2217
#define SIO_BSP_HANDLE_POLL 0x4800001D
2218
#endif
2219
2220
#ifndef SIO_BASE_HANDLE
2221
#define SIO_BASE_HANDLE 0x48000022
2222
#endif
2223
2224
int ws_global_init(void) {
2225
int r;
2226
WSADATA wsa_data;
2227
2228
r = WSAStartup(MAKEWORD(2, 2), &wsa_data);
2229
if (r != 0)
2230
return_set_error(-1, (DWORD) r);
2231
2232
return 0;
2233
}
2234
2235
static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) {
2236
SOCKET bsp_socket;
2237
DWORD bytes;
2238
2239
if (WSAIoctl(socket,
2240
ioctl,
2241
NULL,
2242
0,
2243
&bsp_socket,
2244
sizeof bsp_socket,
2245
&bytes,
2246
NULL,
2247
NULL) != SOCKET_ERROR)
2248
return bsp_socket;
2249
else
2250
return INVALID_SOCKET;
2251
}
2252
2253
SOCKET ws_get_base_socket(SOCKET socket) {
2254
SOCKET base_socket;
2255
DWORD error;
2256
2257
for (;;) {
2258
base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE);
2259
if (base_socket != INVALID_SOCKET)
2260
return base_socket;
2261
2262
error = GetLastError();
2263
if (error == WSAENOTSOCK)
2264
return_set_error(INVALID_SOCKET, error);
2265
2266
/* Even though Microsoft documentation clearly states that LSPs should
2267
* never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do
2268
* so anyway, breaking it, with the apparent intention of preventing LSP
2269
* bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which
2270
* will at least let us obtain the socket associated with the next winsock
2271
* protocol chain entry. If this succeeds, loop around and call
2272
* `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that
2273
* we unwrap all layers and retrieve the actual base socket.
2274
* [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls
2275
* [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6
2276
*/
2277
base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL);
2278
if (base_socket != INVALID_SOCKET && base_socket != socket)
2279
socket = base_socket;
2280
else
2281
return_set_error(INVALID_SOCKET, error);
2282
}
2283
}
2284
2285