Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

2061 lignes
80KB

  1. /*
  2. * wepoll - epoll for Windows
  3. * https://github.com/piscisaureus/wepoll
  4. *
  5. * Copyright 2012-2020, Bert Belder <bertbelder@gmail.com>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions are
  10. * met:
  11. *
  12. * * Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #ifndef WEPOLL_EXPORT
  32. #define WEPOLL_EXPORT
  33. #endif
  34. #include <stdint.h>
  35. enum EPOLL_EVENTS {
  36. EPOLLIN = (int)(1U << 0),
  37. EPOLLPRI = (int)(1U << 1),
  38. EPOLLOUT = (int)(1U << 2),
  39. EPOLLERR = (int)(1U << 3),
  40. EPOLLHUP = (int)(1U << 4),
  41. EPOLLRDNORM = (int)(1U << 6),
  42. EPOLLRDBAND = (int)(1U << 7),
  43. EPOLLWRNORM = (int)(1U << 8),
  44. EPOLLWRBAND = (int)(1U << 9),
  45. EPOLLMSG = (int)(1U << 10), /* Never reported. */
  46. EPOLLRDHUP = (int)(1U << 13),
  47. EPOLLONESHOT = (int)(1U << 31)
  48. };
  49. #define EPOLLIN (1U << 0)
  50. #define EPOLLPRI (1U << 1)
  51. #define EPOLLOUT (1U << 2)
  52. #define EPOLLERR (1U << 3)
  53. #define EPOLLHUP (1U << 4)
  54. #define EPOLLRDNORM (1U << 6)
  55. #define EPOLLRDBAND (1U << 7)
  56. #define EPOLLWRNORM (1U << 8)
  57. #define EPOLLWRBAND (1U << 9)
  58. #define EPOLLMSG (1U << 10)
  59. #define EPOLLRDHUP (1U << 13)
  60. #define EPOLLONESHOT (1U << 31)
  61. #define EPOLL_CTL_ADD 1
  62. #define EPOLL_CTL_MOD 2
  63. #define EPOLL_CTL_DEL 3
  64. typedef void *HANDLE;
  65. typedef uintptr_t SOCKET;
  66. typedef union epoll_data {
  67. void *ptr;
  68. int fd;
  69. uint32_t u32;
  70. uint64_t u64;
  71. SOCKET sock; /* Windows specific */
  72. HANDLE hnd; /* Windows specific */
  73. } epoll_data_t;
  74. struct epoll_event {
  75. uint32_t events; /* Epoll events and flags */
  76. epoll_data_t data; /* User data variable */
  77. };
  78. #ifdef __cplusplus
  79. extern "C" {
  80. #endif
  81. WEPOLL_EXPORT HANDLE epoll_create(int size);
  82. WEPOLL_EXPORT HANDLE epoll_create1(int flags);
  83. WEPOLL_EXPORT int epoll_close(HANDLE ephnd);
  84. WEPOLL_EXPORT int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event *event);
  85. WEPOLL_EXPORT int epoll_wait(HANDLE ephnd, struct epoll_event *events, int maxevents, int timeout);
  86. #ifdef __cplusplus
  87. } /* extern "C" */
  88. #endif
  89. #include <assert.h>
  90. #include <stdlib.h>
  91. #define WEPOLL_INTERNAL static
  92. #define WEPOLL_INTERNAL_EXTERN static
  93. #if defined(__clang__)
  94. #pragma clang diagnostic push
  95. #pragma clang diagnostic ignored "-Wnonportable-system-include-path"
  96. #pragma clang diagnostic ignored "-Wreserved-id-macro"
  97. #elif defined(_MSC_VER)
  98. #pragma warning(push, 1)
  99. #endif
  100. #undef WIN32_LEAN_AND_MEAN
  101. #define WIN32_LEAN_AND_MEAN
  102. #undef _WIN32_WINNT
  103. #define _WIN32_WINNT 0x0600
  104. #include <windows.h>
  105. #include <winsock2.h>
  106. #include <ws2tcpip.h>
  107. #if defined(__clang__)
  108. #pragma clang diagnostic pop
  109. #elif defined(_MSC_VER)
  110. #pragma warning(pop)
  111. #endif
  112. WEPOLL_INTERNAL int nt_global_init(void);
  113. typedef LONG NTSTATUS;
  114. typedef NTSTATUS *PNTSTATUS;
  115. #ifndef NT_SUCCESS
  116. #define NT_SUCCESS(status) (((NTSTATUS)(status)) >= 0)
  117. #endif
  118. #ifndef STATUS_SUCCESS
  119. #define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
  120. #endif
  121. #ifndef STATUS_PENDING
  122. #define STATUS_PENDING ((NTSTATUS)0x00000103L)
  123. #endif
  124. #ifndef STATUS_CANCELLED
  125. #define STATUS_CANCELLED ((NTSTATUS)0xC0000120L)
  126. #endif
  127. #ifndef STATUS_NOT_FOUND
  128. #define STATUS_NOT_FOUND ((NTSTATUS)0xC0000225L)
  129. #endif
  130. typedef struct _IO_STATUS_BLOCK {
  131. NTSTATUS Status;
  132. ULONG_PTR Information;
  133. } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
  134. typedef VOID(NTAPI *PIO_APC_ROUTINE)(PVOID ApcContext, PIO_STATUS_BLOCK IoStatusBlock, ULONG Reserved);
  135. typedef struct _UNICODE_STRING {
  136. USHORT Length;
  137. USHORT MaximumLength;
  138. PWSTR Buffer;
  139. } UNICODE_STRING, *PUNICODE_STRING;
  140. #define RTL_CONSTANT_STRING(s) \
  141. { sizeof(s) - sizeof((s)[0]), sizeof(s), s }
  142. typedef struct _OBJECT_ATTRIBUTES {
  143. ULONG Length;
  144. HANDLE RootDirectory;
  145. PUNICODE_STRING ObjectName;
  146. ULONG Attributes;
  147. PVOID SecurityDescriptor;
  148. PVOID SecurityQualityOfService;
  149. } OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;
  150. #define RTL_CONSTANT_OBJECT_ATTRIBUTES(ObjectName, Attributes) \
  151. { sizeof(OBJECT_ATTRIBUTES), NULL, ObjectName, Attributes, NULL, NULL }
  152. #ifndef FILE_OPEN
  153. #define FILE_OPEN 0x00000001UL
  154. #endif
  155. #define KEYEDEVENT_WAIT 0x00000001UL
  156. #define KEYEDEVENT_WAKE 0x00000002UL
  157. #define KEYEDEVENT_ALL_ACCESS (STANDARD_RIGHTS_REQUIRED | KEYEDEVENT_WAIT | KEYEDEVENT_WAKE)
  158. #define NT_NTDLL_IMPORT_LIST(X) \
  159. X(NTSTATUS, NTAPI, NtCancelIoFileEx, \
  160. (HANDLE FileHandle, PIO_STATUS_BLOCK IoRequestToCancel, PIO_STATUS_BLOCK IoStatusBlock)) \
  161. \
  162. X(NTSTATUS, NTAPI, NtCreateFile, \
  163. (PHANDLE FileHandle, ACCESS_MASK DesiredAccess, POBJECT_ATTRIBUTES ObjectAttributes, \
  164. PIO_STATUS_BLOCK IoStatusBlock, PLARGE_INTEGER AllocationSize, ULONG FileAttributes, ULONG ShareAccess, \
  165. ULONG CreateDisposition, ULONG CreateOptions, PVOID EaBuffer, ULONG EaLength)) \
  166. \
  167. X(NTSTATUS, NTAPI, NtCreateKeyedEvent, \
  168. (PHANDLE KeyedEventHandle, ACCESS_MASK DesiredAccess, POBJECT_ATTRIBUTES ObjectAttributes, ULONG Flags)) \
  169. \
  170. X(NTSTATUS, NTAPI, NtDeviceIoControlFile, \
  171. (HANDLE FileHandle, HANDLE Event, PIO_APC_ROUTINE ApcRoutine, PVOID ApcContext, PIO_STATUS_BLOCK IoStatusBlock, \
  172. ULONG IoControlCode, PVOID InputBuffer, ULONG InputBufferLength, PVOID OutputBuffer, ULONG OutputBufferLength)) \
  173. \
  174. X(NTSTATUS, NTAPI, NtReleaseKeyedEvent, \
  175. (HANDLE KeyedEventHandle, PVOID KeyValue, BOOLEAN Alertable, PLARGE_INTEGER Timeout)) \
  176. \
  177. X(NTSTATUS, NTAPI, NtWaitForKeyedEvent, \
  178. (HANDLE KeyedEventHandle, PVOID KeyValue, BOOLEAN Alertable, PLARGE_INTEGER Timeout)) \
  179. \
  180. X(ULONG, WINAPI, RtlNtStatusToDosError, (NTSTATUS Status))
  181. #define X(return_type, attributes, name, parameters) WEPOLL_INTERNAL_EXTERN return_type(attributes *name) parameters;
  182. NT_NTDLL_IMPORT_LIST(X)
  183. #undef X
  184. #define AFD_POLL_RECEIVE 0x0001
  185. #define AFD_POLL_RECEIVE_EXPEDITED 0x0002
  186. #define AFD_POLL_SEND 0x0004
  187. #define AFD_POLL_DISCONNECT 0x0008
  188. #define AFD_POLL_ABORT 0x0010
  189. #define AFD_POLL_LOCAL_CLOSE 0x0020
  190. #define AFD_POLL_ACCEPT 0x0080
  191. #define AFD_POLL_CONNECT_FAIL 0x0100
  192. typedef struct _AFD_POLL_HANDLE_INFO {
  193. HANDLE Handle;
  194. ULONG Events;
  195. NTSTATUS Status;
  196. } AFD_POLL_HANDLE_INFO, *PAFD_POLL_HANDLE_INFO;
  197. typedef struct _AFD_POLL_INFO {
  198. LARGE_INTEGER Timeout;
  199. ULONG NumberOfHandles;
  200. ULONG Exclusive;
  201. AFD_POLL_HANDLE_INFO Handles[1];
  202. } AFD_POLL_INFO, *PAFD_POLL_INFO;
  203. WEPOLL_INTERNAL int afd_create_device_handle(HANDLE iocp_handle, HANDLE *afd_device_handle_out);
  204. WEPOLL_INTERNAL int afd_poll(HANDLE afd_device_handle, AFD_POLL_INFO *poll_info, IO_STATUS_BLOCK *io_status_block);
  205. WEPOLL_INTERNAL int afd_cancel_poll(HANDLE afd_device_handle, IO_STATUS_BLOCK *io_status_block);
  206. #define return_map_error(value) \
  207. do { \
  208. err_map_win_error(); \
  209. return (value); \
  210. } while (0)
  211. #define return_set_error(value, error) \
  212. do { \
  213. err_set_win_error(error); \
  214. return (value); \
  215. } while (0)
  216. WEPOLL_INTERNAL void err_map_win_error(void);
  217. WEPOLL_INTERNAL void err_set_win_error(DWORD error);
  218. WEPOLL_INTERNAL int err_check_handle(HANDLE handle);
  219. #define IOCTL_AFD_POLL 0x00012024
  220. static UNICODE_STRING afd__device_name = RTL_CONSTANT_STRING(L"\\Device\\Afd\\Wepoll");
  221. static OBJECT_ATTRIBUTES afd__device_attributes = RTL_CONSTANT_OBJECT_ATTRIBUTES(&afd__device_name, 0);
  222. int afd_create_device_handle(HANDLE iocp_handle, HANDLE *afd_device_handle_out) {
  223. HANDLE afd_device_handle;
  224. IO_STATUS_BLOCK iosb;
  225. NTSTATUS status;
  226. /* By opening \Device\Afd without specifying any extended attributes, we'll
  227. * get a handle that lets us talk to the AFD driver, but that doesn't have an
  228. * associated endpoint (so it's not a socket). */
  229. status = NtCreateFile(
  230. &afd_device_handle, SYNCHRONIZE, &afd__device_attributes, &iosb, NULL, 0, FILE_SHARE_READ | FILE_SHARE_WRITE,
  231. FILE_OPEN, 0, NULL, 0);
  232. if (status != STATUS_SUCCESS)
  233. return_set_error(-1, RtlNtStatusToDosError(status));
  234. if (CreateIoCompletionPort(afd_device_handle, iocp_handle, 0, 0) == NULL)
  235. goto error;
  236. if (!SetFileCompletionNotificationModes(afd_device_handle, FILE_SKIP_SET_EVENT_ON_HANDLE))
  237. goto error;
  238. *afd_device_handle_out = afd_device_handle;
  239. return 0;
  240. error:
  241. CloseHandle(afd_device_handle);
  242. return_map_error(-1);
  243. }
  244. int afd_poll(HANDLE afd_device_handle, AFD_POLL_INFO *poll_info, IO_STATUS_BLOCK *io_status_block) {
  245. NTSTATUS status;
  246. /* Blocking operation is not supported. */
  247. assert(io_status_block != NULL);
  248. io_status_block->Status = STATUS_PENDING;
  249. status = NtDeviceIoControlFile(
  250. afd_device_handle, NULL, NULL, io_status_block, io_status_block, IOCTL_AFD_POLL, poll_info, sizeof *poll_info,
  251. poll_info, sizeof *poll_info);
  252. if (status == STATUS_SUCCESS)
  253. return 0;
  254. else if (status == STATUS_PENDING)
  255. return_set_error(-1, ERROR_IO_PENDING);
  256. else
  257. return_set_error(-1, RtlNtStatusToDosError(status));
  258. }
  259. int afd_cancel_poll(HANDLE afd_device_handle, IO_STATUS_BLOCK *io_status_block) {
  260. NTSTATUS cancel_status;
  261. IO_STATUS_BLOCK cancel_iosb;
  262. /* If the poll operation has already completed or has been cancelled earlier,
  263. * there's nothing left for us to do. */
  264. if (io_status_block->Status != STATUS_PENDING)
  265. return 0;
  266. cancel_status = NtCancelIoFileEx(afd_device_handle, io_status_block, &cancel_iosb);
  267. /* NtCancelIoFileEx() may return STATUS_NOT_FOUND if the operation completed
  268. * just before calling NtCancelIoFileEx(). This is not an error. */
  269. if (cancel_status == STATUS_SUCCESS || cancel_status == STATUS_NOT_FOUND)
  270. return 0;
  271. else
  272. return_set_error(-1, RtlNtStatusToDosError(cancel_status));
  273. }
  274. WEPOLL_INTERNAL int epoll_global_init(void);
  275. WEPOLL_INTERNAL int init(void);
  276. typedef struct port_state port_state_t;
  277. typedef struct queue queue_t;
  278. typedef struct sock_state sock_state_t;
  279. typedef struct ts_tree_node ts_tree_node_t;
  280. WEPOLL_INTERNAL port_state_t *port_new(HANDLE *iocp_handle_out);
  281. WEPOLL_INTERNAL int port_close(port_state_t *port_state);
  282. WEPOLL_INTERNAL int port_delete(port_state_t *port_state);
  283. WEPOLL_INTERNAL int port_wait(port_state_t *port_state, struct epoll_event *events, int maxevents, int timeout);
  284. WEPOLL_INTERNAL int port_ctl(port_state_t *port_state, int op, SOCKET sock, struct epoll_event *ev);
  285. WEPOLL_INTERNAL int port_register_socket(port_state_t *port_state, sock_state_t *sock_state, SOCKET socket);
  286. WEPOLL_INTERNAL void port_unregister_socket(port_state_t *port_state, sock_state_t *sock_state);
  287. WEPOLL_INTERNAL sock_state_t *port_find_socket(port_state_t *port_state, SOCKET socket);
  288. WEPOLL_INTERNAL void port_request_socket_update(port_state_t *port_state, sock_state_t *sock_state);
  289. WEPOLL_INTERNAL void port_cancel_socket_update(port_state_t *port_state, sock_state_t *sock_state);
  290. WEPOLL_INTERNAL void port_add_deleted_socket(port_state_t *port_state, sock_state_t *sock_state);
  291. WEPOLL_INTERNAL void port_remove_deleted_socket(port_state_t *port_state, sock_state_t *sock_state);
  292. WEPOLL_INTERNAL HANDLE port_get_iocp_handle(port_state_t *port_state);
  293. WEPOLL_INTERNAL queue_t *port_get_poll_group_queue(port_state_t *port_state);
  294. WEPOLL_INTERNAL port_state_t *port_state_from_handle_tree_node(ts_tree_node_t *tree_node);
  295. WEPOLL_INTERNAL ts_tree_node_t *port_state_to_handle_tree_node(port_state_t *port_state);
  296. /* The reflock is a special kind of lock that normally prevents a chunk of
  297. * memory from being freed, but does allow the chunk of memory to eventually be
  298. * released in a coordinated fashion.
  299. *
  300. * Under normal operation, threads increase and decrease the reference count,
  301. * which are wait-free operations.
  302. *
  303. * Exactly once during the reflock's lifecycle, a thread holding a reference to
  304. * the lock may "destroy" the lock; this operation blocks until all other
  305. * threads holding a reference to the lock have dereferenced it. After
  306. * "destroy" returns, the calling thread may assume that no other threads have
  307. * a reference to the lock.
  308. *
  309. * Attemmpting to lock or destroy a lock after reflock_unref_and_destroy() has
  310. * been called is invalid and results in undefined behavior. Therefore the user
  311. * should use another lock to guarantee that this can't happen.
  312. */
  313. typedef struct reflock {
  314. volatile long state; /* 32-bit Interlocked APIs operate on `long` values. */
  315. } reflock_t;
  316. WEPOLL_INTERNAL int reflock_global_init(void);
  317. WEPOLL_INTERNAL void reflock_init(reflock_t *reflock);
  318. WEPOLL_INTERNAL void reflock_ref(reflock_t *reflock);
  319. WEPOLL_INTERNAL void reflock_unref(reflock_t *reflock);
  320. WEPOLL_INTERNAL void reflock_unref_and_destroy(reflock_t *reflock);
  321. #include <stdbool.h>
  322. /* N.b.: the tree functions do not set errno or LastError when they fail. Each
  323. * of the API functions has at most one failure mode. It is up to the caller to
  324. * set an appropriate error code when necessary. */
  325. typedef struct tree tree_t;
  326. typedef struct tree_node tree_node_t;
  327. typedef struct tree {
  328. tree_node_t *root;
  329. } tree_t;
  330. typedef struct tree_node {
  331. tree_node_t *left;
  332. tree_node_t *right;
  333. tree_node_t *parent;
  334. uintptr_t key;
  335. bool red;
  336. } tree_node_t;
  337. WEPOLL_INTERNAL void tree_init(tree_t *tree);
  338. WEPOLL_INTERNAL void tree_node_init(tree_node_t *node);
  339. WEPOLL_INTERNAL int tree_add(tree_t *tree, tree_node_t *node, uintptr_t key);
  340. WEPOLL_INTERNAL void tree_del(tree_t *tree, tree_node_t *node);
  341. WEPOLL_INTERNAL tree_node_t *tree_find(const tree_t *tree, uintptr_t key);
  342. WEPOLL_INTERNAL tree_node_t *tree_root(const tree_t *tree);
  343. typedef struct ts_tree {
  344. tree_t tree;
  345. SRWLOCK lock;
  346. } ts_tree_t;
  347. typedef struct ts_tree_node {
  348. tree_node_t tree_node;
  349. reflock_t reflock;
  350. } ts_tree_node_t;
  351. WEPOLL_INTERNAL void ts_tree_init(ts_tree_t *rtl);
  352. WEPOLL_INTERNAL void ts_tree_node_init(ts_tree_node_t *node);
  353. WEPOLL_INTERNAL int ts_tree_add(ts_tree_t *ts_tree, ts_tree_node_t *node, uintptr_t key);
  354. WEPOLL_INTERNAL ts_tree_node_t *ts_tree_del_and_ref(ts_tree_t *ts_tree, uintptr_t key);
  355. WEPOLL_INTERNAL ts_tree_node_t *ts_tree_find_and_ref(ts_tree_t *ts_tree, uintptr_t key);
  356. WEPOLL_INTERNAL void ts_tree_node_unref(ts_tree_node_t *node);
  357. WEPOLL_INTERNAL void ts_tree_node_unref_and_destroy(ts_tree_node_t *node);
  358. static ts_tree_t epoll__handle_tree;
  359. int epoll_global_init(void) {
  360. ts_tree_init(&epoll__handle_tree);
  361. return 0;
  362. }
  363. static HANDLE epoll__create(void) {
  364. port_state_t *port_state;
  365. HANDLE ephnd;
  366. ts_tree_node_t *tree_node;
  367. if (init() < 0)
  368. return NULL;
  369. port_state = port_new(&ephnd);
  370. if (port_state == NULL)
  371. return NULL;
  372. tree_node = port_state_to_handle_tree_node(port_state);
  373. if (ts_tree_add(&epoll__handle_tree, tree_node, (uintptr_t)ephnd) < 0) {
  374. /* This should never happen. */
  375. port_delete(port_state);
  376. return_set_error(NULL, ERROR_ALREADY_EXISTS);
  377. }
  378. return ephnd;
  379. }
  380. HANDLE epoll_create(int size) {
  381. if (size <= 0)
  382. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  383. return epoll__create();
  384. }
  385. HANDLE epoll_create1(int flags) {
  386. if (flags != 0)
  387. return_set_error(NULL, ERROR_INVALID_PARAMETER);
  388. return epoll__create();
  389. }
  390. int epoll_close(HANDLE ephnd) {
  391. ts_tree_node_t *tree_node;
  392. port_state_t *port_state;
  393. if (init() < 0)
  394. return -1;
  395. tree_node = ts_tree_del_and_ref(&epoll__handle_tree, (uintptr_t)ephnd);
  396. if (tree_node == NULL) {
  397. err_set_win_error(ERROR_INVALID_PARAMETER);
  398. goto err;
  399. }
  400. port_state = port_state_from_handle_tree_node(tree_node);
  401. port_close(port_state);
  402. ts_tree_node_unref_and_destroy(tree_node);
  403. return port_delete(port_state);
  404. err:
  405. err_check_handle(ephnd);
  406. return -1;
  407. }
  408. int epoll_ctl(HANDLE ephnd, int op, SOCKET sock, struct epoll_event *ev) {
  409. ts_tree_node_t *tree_node;
  410. port_state_t *port_state;
  411. int r;
  412. if (init() < 0)
  413. return -1;
  414. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t)ephnd);
  415. if (tree_node == NULL) {
  416. err_set_win_error(ERROR_INVALID_PARAMETER);
  417. goto err;
  418. }
  419. port_state = port_state_from_handle_tree_node(tree_node);
  420. r = port_ctl(port_state, op, sock, ev);
  421. ts_tree_node_unref(tree_node);
  422. if (r < 0)
  423. goto err;
  424. return 0;
  425. err:
  426. /* On Linux, in the case of epoll_ctl(), EBADF takes priority over other
  427. * errors. Wepoll mimics this behavior. */
  428. err_check_handle(ephnd);
  429. err_check_handle((HANDLE)sock);
  430. return -1;
  431. }
  432. int epoll_wait(HANDLE ephnd, struct epoll_event *events, int maxevents, int timeout) {
  433. ts_tree_node_t *tree_node;
  434. port_state_t *port_state;
  435. int num_events;
  436. if (maxevents <= 0)
  437. return_set_error(-1, ERROR_INVALID_PARAMETER);
  438. if (init() < 0)
  439. return -1;
  440. tree_node = ts_tree_find_and_ref(&epoll__handle_tree, (uintptr_t)ephnd);
  441. if (tree_node == NULL) {
  442. err_set_win_error(ERROR_INVALID_PARAMETER);
  443. goto err;
  444. }
  445. port_state = port_state_from_handle_tree_node(tree_node);
  446. num_events = port_wait(port_state, events, maxevents, timeout);
  447. ts_tree_node_unref(tree_node);
  448. if (num_events < 0)
  449. goto err;
  450. return num_events;
  451. err:
  452. err_check_handle(ephnd);
  453. return -1;
  454. }
  455. #include <errno.h>
  456. #define ERR__ERRNO_MAPPINGS(X) \
  457. X(ERROR_ACCESS_DENIED, EACCES) \
  458. X(ERROR_ALREADY_EXISTS, EEXIST) \
  459. X(ERROR_BAD_COMMAND, EACCES) \
  460. X(ERROR_BAD_EXE_FORMAT, ENOEXEC) \
  461. X(ERROR_BAD_LENGTH, EACCES) \
  462. X(ERROR_BAD_NETPATH, ENOENT) \
  463. X(ERROR_BAD_NET_NAME, ENOENT) \
  464. X(ERROR_BAD_NET_RESP, ENETDOWN) \
  465. X(ERROR_BAD_PATHNAME, ENOENT) \
  466. X(ERROR_BROKEN_PIPE, EPIPE) \
  467. X(ERROR_CANNOT_MAKE, EACCES) \
  468. X(ERROR_COMMITMENT_LIMIT, ENOMEM) \
  469. X(ERROR_CONNECTION_ABORTED, ECONNABORTED) \
  470. X(ERROR_CONNECTION_ACTIVE, EISCONN) \
  471. X(ERROR_CONNECTION_REFUSED, ECONNREFUSED) \
  472. X(ERROR_CRC, EACCES) \
  473. X(ERROR_DIR_NOT_EMPTY, ENOTEMPTY) \
  474. X(ERROR_DISK_FULL, ENOSPC) \
  475. X(ERROR_DUP_NAME, EADDRINUSE) \
  476. X(ERROR_FILENAME_EXCED_RANGE, ENOENT) \
  477. X(ERROR_FILE_NOT_FOUND, ENOENT) \
  478. X(ERROR_GEN_FAILURE, EACCES) \
  479. X(ERROR_GRACEFUL_DISCONNECT, EPIPE) \
  480. X(ERROR_HOST_DOWN, EHOSTUNREACH) \
  481. X(ERROR_HOST_UNREACHABLE, EHOSTUNREACH) \
  482. X(ERROR_INSUFFICIENT_BUFFER, EFAULT) \
  483. X(ERROR_INVALID_ADDRESS, EADDRNOTAVAIL) \
  484. X(ERROR_INVALID_FUNCTION, EINVAL) \
  485. X(ERROR_INVALID_HANDLE, EBADF) \
  486. X(ERROR_INVALID_NETNAME, EADDRNOTAVAIL) \
  487. X(ERROR_INVALID_PARAMETER, EINVAL) \
  488. X(ERROR_INVALID_USER_BUFFER, EMSGSIZE) \
  489. X(ERROR_IO_PENDING, EINPROGRESS) \
  490. X(ERROR_LOCK_VIOLATION, EACCES) \
  491. X(ERROR_MORE_DATA, EMSGSIZE) \
  492. X(ERROR_NETNAME_DELETED, ECONNABORTED) \
  493. X(ERROR_NETWORK_ACCESS_DENIED, EACCES) \
  494. X(ERROR_NETWORK_BUSY, ENETDOWN) \
  495. X(ERROR_NETWORK_UNREACHABLE, ENETUNREACH) \
  496. X(ERROR_NOACCESS, EFAULT) \
  497. X(ERROR_NONPAGED_SYSTEM_RESOURCES, ENOMEM) \
  498. X(ERROR_NOT_ENOUGH_MEMORY, ENOMEM) \
  499. X(ERROR_NOT_ENOUGH_QUOTA, ENOMEM) \
  500. X(ERROR_NOT_FOUND, ENOENT) \
  501. X(ERROR_NOT_LOCKED, EACCES) \
  502. X(ERROR_NOT_READY, EACCES) \
  503. X(ERROR_NOT_SAME_DEVICE, EXDEV) \
  504. X(ERROR_NOT_SUPPORTED, ENOTSUP) \
  505. X(ERROR_NO_MORE_FILES, ENOENT) \
  506. X(ERROR_NO_SYSTEM_RESOURCES, ENOMEM) \
  507. X(ERROR_OPERATION_ABORTED, EINTR) \
  508. X(ERROR_OUT_OF_PAPER, EACCES) \
  509. X(ERROR_PAGED_SYSTEM_RESOURCES, ENOMEM) \
  510. X(ERROR_PAGEFILE_QUOTA, ENOMEM) \
  511. X(ERROR_PATH_NOT_FOUND, ENOENT) \
  512. X(ERROR_PIPE_NOT_CONNECTED, EPIPE) \
  513. X(ERROR_PORT_UNREACHABLE, ECONNRESET) \
  514. X(ERROR_PROTOCOL_UNREACHABLE, ENETUNREACH) \
  515. X(ERROR_REM_NOT_LIST, ECONNREFUSED) \
  516. X(ERROR_REQUEST_ABORTED, EINTR) \
  517. X(ERROR_REQ_NOT_ACCEP, EWOULDBLOCK) \
  518. X(ERROR_SECTOR_NOT_FOUND, EACCES) \
  519. X(ERROR_SEM_TIMEOUT, ETIMEDOUT) \
  520. X(ERROR_SHARING_VIOLATION, EACCES) \
  521. X(ERROR_TOO_MANY_NAMES, ENOMEM) \
  522. X(ERROR_TOO_MANY_OPEN_FILES, EMFILE) \
  523. X(ERROR_UNEXP_NET_ERR, ECONNABORTED) \
  524. X(ERROR_WAIT_NO_CHILDREN, ECHILD) \
  525. X(ERROR_WORKING_SET_QUOTA, ENOMEM) \
  526. X(ERROR_WRITE_PROTECT, EACCES) \
  527. X(ERROR_WRONG_DISK, EACCES) \
  528. X(WSAEACCES, EACCES) \
  529. X(WSAEADDRINUSE, EADDRINUSE) \
  530. X(WSAEADDRNOTAVAIL, EADDRNOTAVAIL) \
  531. X(WSAEAFNOSUPPORT, EAFNOSUPPORT) \
  532. X(WSAECONNABORTED, ECONNABORTED) \
  533. X(WSAECONNREFUSED, ECONNREFUSED) \
  534. X(WSAECONNRESET, ECONNRESET) \
  535. X(WSAEDISCON, EPIPE) \
  536. X(WSAEFAULT, EFAULT) \
  537. X(WSAEHOSTDOWN, EHOSTUNREACH) \
  538. X(WSAEHOSTUNREACH, EHOSTUNREACH) \
  539. X(WSAEINPROGRESS, EBUSY) \
  540. X(WSAEINTR, EINTR) \
  541. X(WSAEINVAL, EINVAL) \
  542. X(WSAEISCONN, EISCONN) \
  543. X(WSAEMSGSIZE, EMSGSIZE) \
  544. X(WSAENETDOWN, ENETDOWN) \
  545. X(WSAENETRESET, EHOSTUNREACH) \
  546. X(WSAENETUNREACH, ENETUNREACH) \
  547. X(WSAENOBUFS, ENOMEM) \
  548. X(WSAENOTCONN, ENOTCONN) \
  549. X(WSAENOTSOCK, ENOTSOCK) \
  550. X(WSAEOPNOTSUPP, EOPNOTSUPP) \
  551. X(WSAEPROCLIM, ENOMEM) \
  552. X(WSAESHUTDOWN, EPIPE) \
  553. X(WSAETIMEDOUT, ETIMEDOUT) \
  554. X(WSAEWOULDBLOCK, EWOULDBLOCK) \
  555. X(WSANOTINITIALISED, ENETDOWN) \
  556. X(WSASYSNOTREADY, ENETDOWN) \
  557. X(WSAVERNOTSUPPORTED, ENOSYS)
  558. static errno_t err__map_win_error_to_errno(DWORD error) {
  559. switch (error) {
  560. #define X(error_sym, errno_sym) \
  561. case error_sym: return errno_sym;
  562. ERR__ERRNO_MAPPINGS(X)
  563. #undef X
  564. }
  565. return EINVAL;
  566. }
  567. void err_map_win_error(void) {
  568. errno = err__map_win_error_to_errno(GetLastError());
  569. }
  570. void err_set_win_error(DWORD error) {
  571. SetLastError(error);
  572. errno = err__map_win_error_to_errno(error);
  573. }
  574. int err_check_handle(HANDLE handle) {
  575. DWORD flags;
  576. /* GetHandleInformation() succeeds when passed INVALID_HANDLE_VALUE, so check
  577. * for this condition explicitly. */
  578. if (handle == INVALID_HANDLE_VALUE)
  579. return_set_error(-1, ERROR_INVALID_HANDLE);
  580. if (!GetHandleInformation(handle, &flags))
  581. return_map_error(-1);
  582. return 0;
  583. }
  584. #include <stddef.h>
  585. #define array_count(a) (sizeof(a) / (sizeof((a)[0])))
  586. #define container_of(ptr, type, member) ((type *)((uintptr_t)(ptr)-offsetof(type, member)))
  587. #define unused_var(v) ((void)(v))
  588. /* Polyfill `inline` for older versions of msvc (up to Visual Studio 2013) */
  589. #if defined(_MSC_VER) && _MSC_VER < 1900
  590. #define inline __inline
  591. #endif
  592. WEPOLL_INTERNAL int ws_global_init(void);
  593. WEPOLL_INTERNAL SOCKET ws_get_base_socket(SOCKET socket);
  594. static bool init__done = false;
  595. static INIT_ONCE init__once = INIT_ONCE_STATIC_INIT;
  596. static BOOL CALLBACK init__once_callback(INIT_ONCE *once, void *parameter, void **context) {
  597. unused_var(once);
  598. unused_var(parameter);
  599. unused_var(context);
  600. /* N.b. that initialization order matters here. */
  601. if (ws_global_init() < 0 || nt_global_init() < 0 || reflock_global_init() < 0 || epoll_global_init() < 0)
  602. return FALSE;
  603. init__done = true;
  604. return TRUE;
  605. }
  606. int init(void) {
  607. if (!init__done && !InitOnceExecuteOnce(&init__once, init__once_callback, NULL, NULL))
  608. /* `InitOnceExecuteOnce()` itself is infallible, and it doesn't set any
  609. * error code when the once-callback returns FALSE. We return -1 here to
  610. * indicate that global initialization failed; the failing init function is
  611. * resposible for setting `errno` and calling `SetLastError()`. */
  612. return -1;
  613. return 0;
  614. }
  615. /* Set up a workaround for the following problem:
  616. * FARPROC addr = GetProcAddress(...);
  617. * MY_FUNC func = (MY_FUNC) addr; <-- GCC 8 warning/error.
  618. * MY_FUNC func = (MY_FUNC) (void*) addr; <-- MSVC warning/error.
  619. * To compile cleanly with either compiler, do casts with this "bridge" type:
  620. * MY_FUNC func = (MY_FUNC) (nt__fn_ptr_cast_t) addr; */
  621. #ifdef __GNUC__
  622. typedef void *nt__fn_ptr_cast_t;
  623. #else
  624. typedef FARPROC nt__fn_ptr_cast_t;
  625. #endif
  626. #define X(return_type, attributes, name, parameters) WEPOLL_INTERNAL return_type(attributes *name) parameters = NULL;
  627. NT_NTDLL_IMPORT_LIST(X)
  628. #undef X
  629. int nt_global_init(void) {
  630. HMODULE ntdll;
  631. FARPROC fn_ptr;
  632. ntdll = GetModuleHandleW(L"ntdll.dll");
  633. if (ntdll == NULL)
  634. return -1;
  635. #define X(return_type, attributes, name, parameters) \
  636. fn_ptr = GetProcAddress(ntdll, #name); \
  637. if (fn_ptr == NULL) \
  638. return -1; \
  639. name = (return_type(attributes *) parameters)(nt__fn_ptr_cast_t)fn_ptr;
  640. NT_NTDLL_IMPORT_LIST(X)
  641. #undef X
  642. return 0;
  643. }
  644. #include <string.h>
  645. typedef struct poll_group poll_group_t;
  646. typedef struct queue_node queue_node_t;
  647. WEPOLL_INTERNAL poll_group_t *poll_group_acquire(port_state_t *port);
  648. WEPOLL_INTERNAL void poll_group_release(poll_group_t *poll_group);
  649. WEPOLL_INTERNAL void poll_group_delete(poll_group_t *poll_group);
  650. WEPOLL_INTERNAL poll_group_t *poll_group_from_queue_node(queue_node_t *queue_node);
  651. WEPOLL_INTERNAL HANDLE poll_group_get_afd_device_handle(poll_group_t *poll_group);
  652. typedef struct queue_node {
  653. queue_node_t *prev;
  654. queue_node_t *next;
  655. } queue_node_t;
  656. typedef struct queue {
  657. queue_node_t head;
  658. } queue_t;
  659. WEPOLL_INTERNAL void queue_init(queue_t *queue);
  660. WEPOLL_INTERNAL void queue_node_init(queue_node_t *node);
  661. WEPOLL_INTERNAL queue_node_t *queue_first(const queue_t *queue);
  662. WEPOLL_INTERNAL queue_node_t *queue_last(const queue_t *queue);
  663. WEPOLL_INTERNAL void queue_prepend(queue_t *queue, queue_node_t *node);
  664. WEPOLL_INTERNAL void queue_append(queue_t *queue, queue_node_t *node);
  665. WEPOLL_INTERNAL void queue_move_to_start(queue_t *queue, queue_node_t *node);
  666. WEPOLL_INTERNAL void queue_move_to_end(queue_t *queue, queue_node_t *node);
  667. WEPOLL_INTERNAL void queue_remove(queue_node_t *node);
  668. WEPOLL_INTERNAL bool queue_is_empty(const queue_t *queue);
  669. WEPOLL_INTERNAL bool queue_is_enqueued(const queue_node_t *node);
  670. #define POLL_GROUP__MAX_GROUP_SIZE 32
  671. typedef struct poll_group {
  672. port_state_t *port_state;
  673. queue_node_t queue_node;
  674. HANDLE afd_device_handle;
  675. size_t group_size;
  676. } poll_group_t;
  677. static poll_group_t *poll_group__new(port_state_t *port_state) {
  678. HANDLE iocp_handle = port_get_iocp_handle(port_state);
  679. queue_t *poll_group_queue = port_get_poll_group_queue(port_state);
  680. poll_group_t *poll_group = malloc(sizeof *poll_group);
  681. if (poll_group == NULL)
  682. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  683. memset(poll_group, 0, sizeof *poll_group);
  684. queue_node_init(&poll_group->queue_node);
  685. poll_group->port_state = port_state;
  686. if (afd_create_device_handle(iocp_handle, &poll_group->afd_device_handle) < 0) {
  687. free(poll_group);
  688. return NULL;
  689. }
  690. queue_append(poll_group_queue, &poll_group->queue_node);
  691. return poll_group;
  692. }
  693. void poll_group_delete(poll_group_t *poll_group) {
  694. assert(poll_group->group_size == 0);
  695. CloseHandle(poll_group->afd_device_handle);
  696. queue_remove(&poll_group->queue_node);
  697. free(poll_group);
  698. }
  699. poll_group_t *poll_group_from_queue_node(queue_node_t *queue_node) {
  700. return container_of(queue_node, poll_group_t, queue_node);
  701. }
  702. HANDLE poll_group_get_afd_device_handle(poll_group_t *poll_group) {
  703. return poll_group->afd_device_handle;
  704. }
  705. poll_group_t *poll_group_acquire(port_state_t *port_state) {
  706. queue_t *poll_group_queue = port_get_poll_group_queue(port_state);
  707. poll_group_t *poll_group = !queue_is_empty(poll_group_queue)
  708. ? container_of(queue_last(poll_group_queue), poll_group_t, queue_node)
  709. : NULL;
  710. if (poll_group == NULL || poll_group->group_size >= POLL_GROUP__MAX_GROUP_SIZE)
  711. poll_group = poll_group__new(port_state);
  712. if (poll_group == NULL)
  713. return NULL;
  714. if (++poll_group->group_size == POLL_GROUP__MAX_GROUP_SIZE)
  715. queue_move_to_start(poll_group_queue, &poll_group->queue_node);
  716. return poll_group;
  717. }
  718. void poll_group_release(poll_group_t *poll_group) {
  719. port_state_t *port_state = poll_group->port_state;
  720. queue_t *poll_group_queue = port_get_poll_group_queue(port_state);
  721. poll_group->group_size--;
  722. assert(poll_group->group_size < POLL_GROUP__MAX_GROUP_SIZE);
  723. queue_move_to_end(poll_group_queue, &poll_group->queue_node);
  724. /* Poll groups are currently only freed when the epoll port is closed. */
  725. }
  726. WEPOLL_INTERNAL sock_state_t *sock_new(port_state_t *port_state, SOCKET socket);
  727. WEPOLL_INTERNAL void sock_delete(port_state_t *port_state, sock_state_t *sock_state);
  728. WEPOLL_INTERNAL void sock_force_delete(port_state_t *port_state, sock_state_t *sock_state);
  729. WEPOLL_INTERNAL int sock_set_event(port_state_t *port_state, sock_state_t *sock_state, const struct epoll_event *ev);
  730. WEPOLL_INTERNAL int sock_update(port_state_t *port_state, sock_state_t *sock_state);
  731. WEPOLL_INTERNAL int sock_feed_event(port_state_t *port_state, IO_STATUS_BLOCK *io_status_block, struct epoll_event *ev);
  732. WEPOLL_INTERNAL sock_state_t *sock_state_from_queue_node(queue_node_t *queue_node);
  733. WEPOLL_INTERNAL queue_node_t *sock_state_to_queue_node(sock_state_t *sock_state);
  734. WEPOLL_INTERNAL sock_state_t *sock_state_from_tree_node(tree_node_t *tree_node);
  735. WEPOLL_INTERNAL tree_node_t *sock_state_to_tree_node(sock_state_t *sock_state);
  736. #define PORT__MAX_ON_STACK_COMPLETIONS 256
  737. typedef struct port_state {
  738. HANDLE iocp_handle;
  739. tree_t sock_tree;
  740. queue_t sock_update_queue;
  741. queue_t sock_deleted_queue;
  742. queue_t poll_group_queue;
  743. ts_tree_node_t handle_tree_node;
  744. CRITICAL_SECTION lock;
  745. size_t active_poll_count;
  746. } port_state_t;
  747. static inline port_state_t *port__alloc(void) {
  748. port_state_t *port_state = malloc(sizeof *port_state);
  749. if (port_state == NULL)
  750. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  751. return port_state;
  752. }
  753. static inline void port__free(port_state_t *port) {
  754. assert(port != NULL);
  755. free(port);
  756. }
  757. static inline HANDLE port__create_iocp(void) {
  758. HANDLE iocp_handle = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
  759. if (iocp_handle == NULL)
  760. return_map_error(NULL);
  761. return iocp_handle;
  762. }
  763. port_state_t *port_new(HANDLE *iocp_handle_out) {
  764. port_state_t *port_state;
  765. HANDLE iocp_handle;
  766. port_state = port__alloc();
  767. if (port_state == NULL)
  768. goto err1;
  769. iocp_handle = port__create_iocp();
  770. if (iocp_handle == NULL)
  771. goto err2;
  772. memset(port_state, 0, sizeof *port_state);
  773. port_state->iocp_handle = iocp_handle;
  774. tree_init(&port_state->sock_tree);
  775. queue_init(&port_state->sock_update_queue);
  776. queue_init(&port_state->sock_deleted_queue);
  777. queue_init(&port_state->poll_group_queue);
  778. ts_tree_node_init(&port_state->handle_tree_node);
  779. InitializeCriticalSection(&port_state->lock);
  780. *iocp_handle_out = iocp_handle;
  781. return port_state;
  782. err2:
  783. port__free(port_state);
  784. err1:
  785. return NULL;
  786. }
  787. static inline int port__close_iocp(port_state_t *port_state) {
  788. HANDLE iocp_handle = port_state->iocp_handle;
  789. port_state->iocp_handle = NULL;
  790. if (!CloseHandle(iocp_handle))
  791. return_map_error(-1);
  792. return 0;
  793. }
  794. int port_close(port_state_t *port_state) {
  795. int result;
  796. EnterCriticalSection(&port_state->lock);
  797. result = port__close_iocp(port_state);
  798. LeaveCriticalSection(&port_state->lock);
  799. return result;
  800. }
  801. int port_delete(port_state_t *port_state) {
  802. tree_node_t *tree_node;
  803. queue_node_t *queue_node;
  804. /* At this point the IOCP port should have been closed. */
  805. assert(port_state->iocp_handle == NULL);
  806. while ((tree_node = tree_root(&port_state->sock_tree)) != NULL) {
  807. sock_state_t *sock_state = sock_state_from_tree_node(tree_node);
  808. sock_force_delete(port_state, sock_state);
  809. }
  810. while ((queue_node = queue_first(&port_state->sock_deleted_queue)) != NULL) {
  811. sock_state_t *sock_state = sock_state_from_queue_node(queue_node);
  812. sock_force_delete(port_state, sock_state);
  813. }
  814. while ((queue_node = queue_first(&port_state->poll_group_queue)) != NULL) {
  815. poll_group_t *poll_group = poll_group_from_queue_node(queue_node);
  816. poll_group_delete(poll_group);
  817. }
  818. assert(queue_is_empty(&port_state->sock_update_queue));
  819. DeleteCriticalSection(&port_state->lock);
  820. port__free(port_state);
  821. return 0;
  822. }
  823. static int port__update_events(port_state_t *port_state) {
  824. queue_t *sock_update_queue = &port_state->sock_update_queue;
  825. /* Walk the queue, submitting new poll requests for every socket that needs
  826. * it. */
  827. while (!queue_is_empty(sock_update_queue)) {
  828. queue_node_t *queue_node = queue_first(sock_update_queue);
  829. sock_state_t *sock_state = sock_state_from_queue_node(queue_node);
  830. if (sock_update(port_state, sock_state) < 0)
  831. return -1;
  832. /* sock_update() removes the socket from the update queue. */
  833. }
  834. return 0;
  835. }
  836. static inline void port__update_events_if_polling(port_state_t *port_state) {
  837. if (port_state->active_poll_count > 0)
  838. port__update_events(port_state);
  839. }
  840. static inline int port__feed_events(
  841. port_state_t *port_state, struct epoll_event *epoll_events, OVERLAPPED_ENTRY *iocp_events, DWORD iocp_event_count) {
  842. int epoll_event_count = 0;
  843. DWORD i;
  844. for (i = 0; i < iocp_event_count; i++) {
  845. IO_STATUS_BLOCK *io_status_block = (IO_STATUS_BLOCK *)iocp_events[i].lpOverlapped;
  846. struct epoll_event *ev = &epoll_events[epoll_event_count];
  847. epoll_event_count += sock_feed_event(port_state, io_status_block, ev);
  848. }
  849. return epoll_event_count;
  850. }
  851. static inline int port__poll(
  852. port_state_t *port_state, struct epoll_event *epoll_events, OVERLAPPED_ENTRY *iocp_events, DWORD maxevents,
  853. DWORD timeout) {
  854. DWORD completion_count;
  855. if (port__update_events(port_state) < 0)
  856. return -1;
  857. port_state->active_poll_count++;
  858. LeaveCriticalSection(&port_state->lock);
  859. BOOL r = GetQueuedCompletionStatusEx(
  860. port_state->iocp_handle, iocp_events, maxevents, &completion_count, timeout, FALSE);
  861. EnterCriticalSection(&port_state->lock);
  862. port_state->active_poll_count--;
  863. if (!r)
  864. return_map_error(-1);
  865. return port__feed_events(port_state, epoll_events, iocp_events, completion_count);
  866. }
  867. int port_wait(port_state_t *port_state, struct epoll_event *events, int maxevents, int timeout) {
  868. OVERLAPPED_ENTRY stack_iocp_events[PORT__MAX_ON_STACK_COMPLETIONS];
  869. OVERLAPPED_ENTRY *iocp_events;
  870. uint64_t due = 0;
  871. DWORD gqcs_timeout;
  872. int result;
  873. /* Check whether `maxevents` is in range. */
  874. if (maxevents <= 0)
  875. return_set_error(-1, ERROR_INVALID_PARAMETER);
  876. /* Decide whether the IOCP completion list can live on the stack, or allocate
  877. * memory for it on the heap. */
  878. if ((size_t)maxevents <= array_count(stack_iocp_events)) {
  879. iocp_events = stack_iocp_events;
  880. } else if ((iocp_events = malloc((size_t)maxevents * sizeof *iocp_events)) == NULL) {
  881. iocp_events = stack_iocp_events;
  882. maxevents = array_count(stack_iocp_events);
  883. }
  884. /* Compute the timeout for GetQueuedCompletionStatus, and the wait end
  885. * time, if the user specified a timeout other than zero or infinite. */
  886. if (timeout > 0) {
  887. due = GetTickCount64() + (uint64_t)timeout;
  888. gqcs_timeout = (DWORD)timeout;
  889. } else if (timeout == 0) {
  890. gqcs_timeout = 0;
  891. } else {
  892. gqcs_timeout = INFINITE;
  893. }
  894. EnterCriticalSection(&port_state->lock);
  895. /* Dequeue completion packets until either at least one interesting event
  896. * has been discovered, or the timeout is reached. */
  897. for (;;) {
  898. uint64_t now;
  899. result = port__poll(port_state, events, iocp_events, (DWORD)maxevents, gqcs_timeout);
  900. if (result < 0 || result > 0)
  901. break; /* Result, error, or time-out. */
  902. if (timeout < 0)
  903. continue; /* When timeout is negative, never time out. */
  904. /* Update time. */
  905. now = GetTickCount64();
  906. /* Do not allow the due time to be in the past. */
  907. if (now >= due) {
  908. SetLastError(WAIT_TIMEOUT);
  909. break;
  910. }
  911. /* Recompute time-out argument for GetQueuedCompletionStatus. */
  912. gqcs_timeout = (DWORD)(due - now);
  913. }
  914. port__update_events_if_polling(port_state);
  915. LeaveCriticalSection(&port_state->lock);
  916. if (iocp_events != stack_iocp_events)
  917. free(iocp_events);
  918. if (result >= 0)
  919. return result;
  920. else if (GetLastError() == WAIT_TIMEOUT)
  921. return 0;
  922. else
  923. return -1;
  924. }
  925. static inline int port__ctl_add(port_state_t *port_state, SOCKET sock, struct epoll_event *ev) {
  926. sock_state_t *sock_state = sock_new(port_state, sock);
  927. if (sock_state == NULL)
  928. return -1;
  929. if (sock_set_event(port_state, sock_state, ev) < 0) {
  930. sock_delete(port_state, sock_state);
  931. return -1;
  932. }
  933. port__update_events_if_polling(port_state);
  934. return 0;
  935. }
  936. static inline int port__ctl_mod(port_state_t *port_state, SOCKET sock, struct epoll_event *ev) {
  937. sock_state_t *sock_state = port_find_socket(port_state, sock);
  938. if (sock_state == NULL)
  939. return -1;
  940. if (sock_set_event(port_state, sock_state, ev) < 0)
  941. return -1;
  942. port__update_events_if_polling(port_state);
  943. return 0;
  944. }
  945. static inline int port__ctl_del(port_state_t *port_state, SOCKET sock) {
  946. sock_state_t *sock_state = port_find_socket(port_state, sock);
  947. if (sock_state == NULL)
  948. return -1;
  949. sock_delete(port_state, sock_state);
  950. return 0;
  951. }
  952. static inline int port__ctl_op(port_state_t *port_state, int op, SOCKET sock, struct epoll_event *ev) {
  953. switch (op) {
  954. case EPOLL_CTL_ADD: return port__ctl_add(port_state, sock, ev);
  955. case EPOLL_CTL_MOD: return port__ctl_mod(port_state, sock, ev);
  956. case EPOLL_CTL_DEL: return port__ctl_del(port_state, sock);
  957. default: return_set_error(-1, ERROR_INVALID_PARAMETER);
  958. }
  959. }
  960. int port_ctl(port_state_t *port_state, int op, SOCKET sock, struct epoll_event *ev) {
  961. int result;
  962. EnterCriticalSection(&port_state->lock);
  963. result = port__ctl_op(port_state, op, sock, ev);
  964. LeaveCriticalSection(&port_state->lock);
  965. return result;
  966. }
  967. int port_register_socket(port_state_t *port_state, sock_state_t *sock_state, SOCKET socket) {
  968. if (tree_add(&port_state->sock_tree, sock_state_to_tree_node(sock_state), socket) < 0)
  969. return_set_error(-1, ERROR_ALREADY_EXISTS);
  970. return 0;
  971. }
  972. void port_unregister_socket(port_state_t *port_state, sock_state_t *sock_state) {
  973. tree_del(&port_state->sock_tree, sock_state_to_tree_node(sock_state));
  974. }
  975. sock_state_t *port_find_socket(port_state_t *port_state, SOCKET socket) {
  976. tree_node_t *tree_node = tree_find(&port_state->sock_tree, socket);
  977. if (tree_node == NULL)
  978. return_set_error(NULL, ERROR_NOT_FOUND);
  979. return sock_state_from_tree_node(tree_node);
  980. }
  981. void port_request_socket_update(port_state_t *port_state, sock_state_t *sock_state) {
  982. if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  983. return;
  984. queue_append(&port_state->sock_update_queue, sock_state_to_queue_node(sock_state));
  985. }
  986. void port_cancel_socket_update(port_state_t *port_state, sock_state_t *sock_state) {
  987. unused_var(port_state);
  988. if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  989. return;
  990. queue_remove(sock_state_to_queue_node(sock_state));
  991. }
  992. void port_add_deleted_socket(port_state_t *port_state, sock_state_t *sock_state) {
  993. if (queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  994. return;
  995. queue_append(&port_state->sock_deleted_queue, sock_state_to_queue_node(sock_state));
  996. }
  997. void port_remove_deleted_socket(port_state_t *port_state, sock_state_t *sock_state) {
  998. unused_var(port_state);
  999. if (!queue_is_enqueued(sock_state_to_queue_node(sock_state)))
  1000. return;
  1001. queue_remove(sock_state_to_queue_node(sock_state));
  1002. }
  1003. HANDLE port_get_iocp_handle(port_state_t *port_state) {
  1004. assert(port_state->iocp_handle != NULL);
  1005. return port_state->iocp_handle;
  1006. }
  1007. queue_t *port_get_poll_group_queue(port_state_t *port_state) {
  1008. return &port_state->poll_group_queue;
  1009. }
  1010. port_state_t *port_state_from_handle_tree_node(ts_tree_node_t *tree_node) {
  1011. return container_of(tree_node, port_state_t, handle_tree_node);
  1012. }
  1013. ts_tree_node_t *port_state_to_handle_tree_node(port_state_t *port_state) {
  1014. return &port_state->handle_tree_node;
  1015. }
  1016. void queue_init(queue_t *queue) {
  1017. queue_node_init(&queue->head);
  1018. }
  1019. void queue_node_init(queue_node_t *node) {
  1020. node->prev = node;
  1021. node->next = node;
  1022. }
  1023. static inline void queue__detach_node(queue_node_t *node) {
  1024. node->prev->next = node->next;
  1025. node->next->prev = node->prev;
  1026. }
  1027. queue_node_t *queue_first(const queue_t *queue) {
  1028. return !queue_is_empty(queue) ? queue->head.next : NULL;
  1029. }
  1030. queue_node_t *queue_last(const queue_t *queue) {
  1031. return !queue_is_empty(queue) ? queue->head.prev : NULL;
  1032. }
  1033. void queue_prepend(queue_t *queue, queue_node_t *node) {
  1034. node->next = queue->head.next;
  1035. node->prev = &queue->head;
  1036. node->next->prev = node;
  1037. queue->head.next = node;
  1038. }
  1039. void queue_append(queue_t *queue, queue_node_t *node) {
  1040. node->next = &queue->head;
  1041. node->prev = queue->head.prev;
  1042. node->prev->next = node;
  1043. queue->head.prev = node;
  1044. }
  1045. void queue_move_to_start(queue_t *queue, queue_node_t *node) {
  1046. queue__detach_node(node);
  1047. queue_prepend(queue, node);
  1048. }
  1049. void queue_move_to_end(queue_t *queue, queue_node_t *node) {
  1050. queue__detach_node(node);
  1051. queue_append(queue, node);
  1052. }
  1053. void queue_remove(queue_node_t *node) {
  1054. queue__detach_node(node);
  1055. queue_node_init(node);
  1056. }
  1057. bool queue_is_empty(const queue_t *queue) {
  1058. return !queue_is_enqueued(&queue->head);
  1059. }
  1060. bool queue_is_enqueued(const queue_node_t *node) {
  1061. return node->prev != node;
  1062. }
  1063. #define REFLOCK__REF ((long)0x00000001UL)
  1064. #define REFLOCK__REF_MASK ((long)0x0fffffffUL)
  1065. #define REFLOCK__DESTROY ((long)0x10000000UL)
  1066. #define REFLOCK__DESTROY_MASK ((long)0xf0000000UL)
  1067. #define REFLOCK__POISON ((long)0x300dead0UL)
  1068. static HANDLE reflock__keyed_event = NULL;
  1069. int reflock_global_init(void) {
  1070. NTSTATUS status = NtCreateKeyedEvent(&reflock__keyed_event, KEYEDEVENT_ALL_ACCESS, NULL, 0);
  1071. if (status != STATUS_SUCCESS)
  1072. return_set_error(-1, RtlNtStatusToDosError(status));
  1073. return 0;
  1074. }
  1075. void reflock_init(reflock_t *reflock) {
  1076. reflock->state = 0;
  1077. }
  1078. static void reflock__signal_event(void *address) {
  1079. NTSTATUS status = NtReleaseKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1080. if (status != STATUS_SUCCESS)
  1081. abort();
  1082. }
  1083. static void reflock__await_event(void *address) {
  1084. NTSTATUS status = NtWaitForKeyedEvent(reflock__keyed_event, address, FALSE, NULL);
  1085. if (status != STATUS_SUCCESS)
  1086. abort();
  1087. }
  1088. void reflock_ref(reflock_t *reflock) {
  1089. long state = InterlockedAdd(&reflock->state, REFLOCK__REF);
  1090. /* Verify that the counter didn't overflow and the lock isn't destroyed. */
  1091. assert((state & REFLOCK__DESTROY_MASK) == 0);
  1092. unused_var(state);
  1093. }
  1094. void reflock_unref(reflock_t *reflock) {
  1095. long state = InterlockedAdd(&reflock->state, -REFLOCK__REF);
  1096. /* Verify that the lock was referenced and not already destroyed. */
  1097. assert((state & REFLOCK__DESTROY_MASK & ~REFLOCK__DESTROY) == 0);
  1098. if (state == REFLOCK__DESTROY)
  1099. reflock__signal_event(reflock);
  1100. }
  1101. void reflock_unref_and_destroy(reflock_t *reflock) {
  1102. long state = InterlockedAdd(&reflock->state, REFLOCK__DESTROY - REFLOCK__REF);
  1103. long ref_count = state & REFLOCK__REF_MASK;
  1104. /* Verify that the lock was referenced and not already destroyed. */
  1105. assert((state & REFLOCK__DESTROY_MASK) == REFLOCK__DESTROY);
  1106. if (ref_count != 0)
  1107. reflock__await_event(reflock);
  1108. state = InterlockedExchange(&reflock->state, REFLOCK__POISON);
  1109. assert(state == REFLOCK__DESTROY);
  1110. }
  1111. #define SOCK__KNOWN_EPOLL_EVENTS \
  1112. (EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDNORM | EPOLLRDBAND | EPOLLWRNORM | EPOLLWRBAND \
  1113. | EPOLLMSG | EPOLLRDHUP)
  1114. typedef enum sock__poll_status { SOCK__POLL_IDLE = 0, SOCK__POLL_PENDING, SOCK__POLL_CANCELLED } sock__poll_status_t;
  1115. typedef struct sock_state {
  1116. IO_STATUS_BLOCK io_status_block;
  1117. AFD_POLL_INFO poll_info;
  1118. queue_node_t queue_node;
  1119. tree_node_t tree_node;
  1120. poll_group_t *poll_group;
  1121. SOCKET base_socket;
  1122. epoll_data_t user_data;
  1123. uint32_t user_events;
  1124. uint32_t pending_events;
  1125. sock__poll_status_t poll_status;
  1126. bool delete_pending;
  1127. } sock_state_t;
  1128. static inline sock_state_t *sock__alloc(void) {
  1129. sock_state_t *sock_state = malloc(sizeof *sock_state);
  1130. if (sock_state == NULL)
  1131. return_set_error(NULL, ERROR_NOT_ENOUGH_MEMORY);
  1132. return sock_state;
  1133. }
  1134. static inline void sock__free(sock_state_t *sock_state) {
  1135. assert(sock_state != NULL);
  1136. free(sock_state);
  1137. }
  1138. static inline int sock__cancel_poll(sock_state_t *sock_state) {
  1139. assert(sock_state->poll_status == SOCK__POLL_PENDING);
  1140. if (afd_cancel_poll(poll_group_get_afd_device_handle(sock_state->poll_group), &sock_state->io_status_block) < 0)
  1141. return -1;
  1142. sock_state->poll_status = SOCK__POLL_CANCELLED;
  1143. sock_state->pending_events = 0;
  1144. return 0;
  1145. }
  1146. sock_state_t *sock_new(port_state_t *port_state, SOCKET socket) {
  1147. SOCKET base_socket;
  1148. poll_group_t *poll_group;
  1149. sock_state_t *sock_state;
  1150. if (socket == 0 || socket == INVALID_SOCKET)
  1151. return_set_error(NULL, ERROR_INVALID_HANDLE);
  1152. base_socket = ws_get_base_socket(socket);
  1153. if (base_socket == INVALID_SOCKET)
  1154. return NULL;
  1155. poll_group = poll_group_acquire(port_state);
  1156. if (poll_group == NULL)
  1157. return NULL;
  1158. sock_state = sock__alloc();
  1159. if (sock_state == NULL)
  1160. goto err1;
  1161. memset(sock_state, 0, sizeof *sock_state);
  1162. sock_state->base_socket = base_socket;
  1163. sock_state->poll_group = poll_group;
  1164. tree_node_init(&sock_state->tree_node);
  1165. queue_node_init(&sock_state->queue_node);
  1166. if (port_register_socket(port_state, sock_state, socket) < 0)
  1167. goto err2;
  1168. return sock_state;
  1169. err2:
  1170. sock__free(sock_state);
  1171. err1:
  1172. poll_group_release(poll_group);
  1173. return NULL;
  1174. }
  1175. static int sock__delete(port_state_t *port_state, sock_state_t *sock_state, bool force) {
  1176. if (!sock_state->delete_pending) {
  1177. if (sock_state->poll_status == SOCK__POLL_PENDING)
  1178. sock__cancel_poll(sock_state);
  1179. port_cancel_socket_update(port_state, sock_state);
  1180. port_unregister_socket(port_state, sock_state);
  1181. sock_state->delete_pending = true;
  1182. }
  1183. /* If the poll request still needs to complete, the sock_state object can't
  1184. * be free()d yet. `sock_feed_event()` or `port_close()` will take care
  1185. * of this later. */
  1186. if (force || sock_state->poll_status == SOCK__POLL_IDLE) {
  1187. /* Free the sock_state now. */
  1188. port_remove_deleted_socket(port_state, sock_state);
  1189. poll_group_release(sock_state->poll_group);
  1190. sock__free(sock_state);
  1191. } else {
  1192. /* Free the socket later. */
  1193. port_add_deleted_socket(port_state, sock_state);
  1194. }
  1195. return 0;
  1196. }
  1197. void sock_delete(port_state_t *port_state, sock_state_t *sock_state) {
  1198. sock__delete(port_state, sock_state, false);
  1199. }
  1200. void sock_force_delete(port_state_t *port_state, sock_state_t *sock_state) {
  1201. sock__delete(port_state, sock_state, true);
  1202. }
  1203. int sock_set_event(port_state_t *port_state, sock_state_t *sock_state, const struct epoll_event *ev) {
  1204. /* EPOLLERR and EPOLLHUP are always reported, even when not requested by the
  1205. * caller. However they are disabled after a event has been reported for a
  1206. * socket for which the EPOLLONESHOT flag was set. */
  1207. uint32_t events = ev->events | EPOLLERR | EPOLLHUP;
  1208. sock_state->user_events = events;
  1209. sock_state->user_data = ev->data;
  1210. if ((events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) != 0)
  1211. port_request_socket_update(port_state, sock_state);
  1212. return 0;
  1213. }
  1214. static inline DWORD sock__epoll_events_to_afd_events(uint32_t epoll_events) {
  1215. /* Always monitor for AFD_POLL_LOCAL_CLOSE, which is triggered when the
  1216. * socket is closed with closesocket() or CloseHandle(). */
  1217. DWORD afd_events = AFD_POLL_LOCAL_CLOSE;
  1218. if (epoll_events & (EPOLLIN | EPOLLRDNORM))
  1219. afd_events |= AFD_POLL_RECEIVE | AFD_POLL_ACCEPT;
  1220. if (epoll_events & (EPOLLPRI | EPOLLRDBAND))
  1221. afd_events |= AFD_POLL_RECEIVE_EXPEDITED;
  1222. if (epoll_events & (EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND))
  1223. afd_events |= AFD_POLL_SEND;
  1224. if (epoll_events & (EPOLLIN | EPOLLRDNORM | EPOLLRDHUP))
  1225. afd_events |= AFD_POLL_DISCONNECT;
  1226. if (epoll_events & EPOLLHUP)
  1227. afd_events |= AFD_POLL_ABORT;
  1228. if (epoll_events & EPOLLERR)
  1229. afd_events |= AFD_POLL_CONNECT_FAIL;
  1230. return afd_events;
  1231. }
  1232. static inline uint32_t sock__afd_events_to_epoll_events(DWORD afd_events) {
  1233. uint32_t epoll_events = 0;
  1234. if (afd_events & (AFD_POLL_RECEIVE | AFD_POLL_ACCEPT))
  1235. epoll_events |= EPOLLIN | EPOLLRDNORM;
  1236. if (afd_events & AFD_POLL_RECEIVE_EXPEDITED)
  1237. epoll_events |= EPOLLPRI | EPOLLRDBAND;
  1238. if (afd_events & AFD_POLL_SEND)
  1239. epoll_events |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  1240. if (afd_events & AFD_POLL_DISCONNECT)
  1241. epoll_events |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  1242. if (afd_events & AFD_POLL_ABORT)
  1243. epoll_events |= EPOLLHUP;
  1244. if (afd_events & AFD_POLL_CONNECT_FAIL)
  1245. /* Linux reports all these events after connect() has failed. */
  1246. epoll_events |= EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLRDNORM | EPOLLWRNORM | EPOLLRDHUP;
  1247. return epoll_events;
  1248. }
  1249. int sock_update(port_state_t *port_state, sock_state_t *sock_state) {
  1250. assert(!sock_state->delete_pending);
  1251. if ((sock_state->poll_status == SOCK__POLL_PENDING)
  1252. && (sock_state->user_events & SOCK__KNOWN_EPOLL_EVENTS & ~sock_state->pending_events) == 0) {
  1253. /* All the events the user is interested in are already being monitored by
  1254. * the pending poll operation. It might spuriously complete because of an
  1255. * event that we're no longer interested in; when that happens we'll submit
  1256. * a new poll operation with the updated event mask. */
  1257. } else if (sock_state->poll_status == SOCK__POLL_PENDING) {
  1258. /* A poll operation is already pending, but it's not monitoring for all the
  1259. * events that the user is interested in. Therefore, cancel the pending
  1260. * poll operation; when we receive it's completion package, a new poll
  1261. * operation will be submitted with the correct event mask. */
  1262. if (sock__cancel_poll(sock_state) < 0)
  1263. return -1;
  1264. } else if (sock_state->poll_status == SOCK__POLL_CANCELLED) {
  1265. /* The poll operation has already been cancelled, we're still waiting for
  1266. * it to return. For now, there's nothing that needs to be done. */
  1267. } else if (sock_state->poll_status == SOCK__POLL_IDLE) {
  1268. /* No poll operation is pending; start one. */
  1269. sock_state->poll_info.Exclusive = FALSE;
  1270. sock_state->poll_info.NumberOfHandles = 1;
  1271. sock_state->poll_info.Timeout.QuadPart = INT64_MAX;
  1272. sock_state->poll_info.Handles[0].Handle = (HANDLE)sock_state->base_socket;
  1273. sock_state->poll_info.Handles[0].Status = 0;
  1274. sock_state->poll_info.Handles[0].Events = sock__epoll_events_to_afd_events(sock_state->user_events);
  1275. if (afd_poll(
  1276. poll_group_get_afd_device_handle(sock_state->poll_group), &sock_state->poll_info,
  1277. &sock_state->io_status_block)
  1278. < 0) {
  1279. switch (GetLastError()) {
  1280. case ERROR_IO_PENDING:
  1281. /* Overlapped poll operation in progress; this is expected. */
  1282. break;
  1283. case ERROR_INVALID_HANDLE:
  1284. /* Socket closed; it'll be dropped from the epoll set. */
  1285. return sock__delete(port_state, sock_state, false);
  1286. default:
  1287. /* Other errors are propagated to the caller. */
  1288. return_map_error(-1);
  1289. }
  1290. }
  1291. /* The poll request was successfully submitted. */
  1292. sock_state->poll_status = SOCK__POLL_PENDING;
  1293. sock_state->pending_events = sock_state->user_events;
  1294. } else {
  1295. /* Unreachable. */
  1296. assert(false);
  1297. }
  1298. port_cancel_socket_update(port_state, sock_state);
  1299. return 0;
  1300. }
  1301. int sock_feed_event(port_state_t *port_state, IO_STATUS_BLOCK *io_status_block, struct epoll_event *ev) {
  1302. sock_state_t *sock_state = container_of(io_status_block, sock_state_t, io_status_block);
  1303. AFD_POLL_INFO *poll_info = &sock_state->poll_info;
  1304. uint32_t epoll_events = 0;
  1305. sock_state->poll_status = SOCK__POLL_IDLE;
  1306. sock_state->pending_events = 0;
  1307. if (sock_state->delete_pending) {
  1308. /* Socket has been deleted earlier and can now be freed. */
  1309. return sock__delete(port_state, sock_state, false);
  1310. } else if (io_status_block->Status == STATUS_CANCELLED) {
  1311. /* The poll request was cancelled by CancelIoEx. */
  1312. } else if (!NT_SUCCESS(io_status_block->Status)) {
  1313. /* The overlapped request itself failed in an unexpected way. */
  1314. epoll_events = EPOLLERR;
  1315. } else if (poll_info->NumberOfHandles < 1) {
  1316. /* This poll operation succeeded but didn't report any socket events. */
  1317. } else if (poll_info->Handles[0].Events & AFD_POLL_LOCAL_CLOSE) {
  1318. /* The poll operation reported that the socket was closed. */
  1319. return sock__delete(port_state, sock_state, false);
  1320. } else {
  1321. /* Events related to our socket were reported. */
  1322. epoll_events = sock__afd_events_to_epoll_events(poll_info->Handles[0].Events);
  1323. }
  1324. /* Requeue the socket so a new poll request will be submitted. */
  1325. port_request_socket_update(port_state, sock_state);
  1326. /* Filter out events that the user didn't ask for. */
  1327. epoll_events &= sock_state->user_events;
  1328. /* Return if there are no epoll events to report. */
  1329. if (epoll_events == 0)
  1330. return 0;
  1331. /* If the the socket has the EPOLLONESHOT flag set, unmonitor all events,
  1332. * even EPOLLERR and EPOLLHUP. But always keep looking for closed sockets. */
  1333. if (sock_state->user_events & EPOLLONESHOT)
  1334. sock_state->user_events = 0;
  1335. ev->data = sock_state->user_data;
  1336. ev->events = epoll_events;
  1337. return 1;
  1338. }
  1339. sock_state_t *sock_state_from_queue_node(queue_node_t *queue_node) {
  1340. return container_of(queue_node, sock_state_t, queue_node);
  1341. }
  1342. queue_node_t *sock_state_to_queue_node(sock_state_t *sock_state) {
  1343. return &sock_state->queue_node;
  1344. }
  1345. sock_state_t *sock_state_from_tree_node(tree_node_t *tree_node) {
  1346. return container_of(tree_node, sock_state_t, tree_node);
  1347. }
  1348. tree_node_t *sock_state_to_tree_node(sock_state_t *sock_state) {
  1349. return &sock_state->tree_node;
  1350. }
  1351. void ts_tree_init(ts_tree_t *ts_tree) {
  1352. tree_init(&ts_tree->tree);
  1353. InitializeSRWLock(&ts_tree->lock);
  1354. }
  1355. void ts_tree_node_init(ts_tree_node_t *node) {
  1356. tree_node_init(&node->tree_node);
  1357. reflock_init(&node->reflock);
  1358. }
  1359. int ts_tree_add(ts_tree_t *ts_tree, ts_tree_node_t *node, uintptr_t key) {
  1360. int r;
  1361. AcquireSRWLockExclusive(&ts_tree->lock);
  1362. r = tree_add(&ts_tree->tree, &node->tree_node, key);
  1363. ReleaseSRWLockExclusive(&ts_tree->lock);
  1364. return r;
  1365. }
  1366. static inline ts_tree_node_t *ts_tree__find_node(ts_tree_t *ts_tree, uintptr_t key) {
  1367. tree_node_t *tree_node = tree_find(&ts_tree->tree, key);
  1368. if (tree_node == NULL)
  1369. return NULL;
  1370. return container_of(tree_node, ts_tree_node_t, tree_node);
  1371. }
  1372. ts_tree_node_t *ts_tree_del_and_ref(ts_tree_t *ts_tree, uintptr_t key) {
  1373. ts_tree_node_t *ts_tree_node;
  1374. AcquireSRWLockExclusive(&ts_tree->lock);
  1375. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1376. if (ts_tree_node != NULL) {
  1377. tree_del(&ts_tree->tree, &ts_tree_node->tree_node);
  1378. reflock_ref(&ts_tree_node->reflock);
  1379. }
  1380. ReleaseSRWLockExclusive(&ts_tree->lock);
  1381. return ts_tree_node;
  1382. }
  1383. ts_tree_node_t *ts_tree_find_and_ref(ts_tree_t *ts_tree, uintptr_t key) {
  1384. ts_tree_node_t *ts_tree_node;
  1385. AcquireSRWLockShared(&ts_tree->lock);
  1386. ts_tree_node = ts_tree__find_node(ts_tree, key);
  1387. if (ts_tree_node != NULL)
  1388. reflock_ref(&ts_tree_node->reflock);
  1389. ReleaseSRWLockShared(&ts_tree->lock);
  1390. return ts_tree_node;
  1391. }
  1392. void ts_tree_node_unref(ts_tree_node_t *node) {
  1393. reflock_unref(&node->reflock);
  1394. }
  1395. void ts_tree_node_unref_and_destroy(ts_tree_node_t *node) {
  1396. reflock_unref_and_destroy(&node->reflock);
  1397. }
  1398. void tree_init(tree_t *tree) {
  1399. memset(tree, 0, sizeof *tree);
  1400. }
  1401. void tree_node_init(tree_node_t *node) {
  1402. memset(node, 0, sizeof *node);
  1403. }
  1404. #define TREE__ROTATE(cis, trans) \
  1405. tree_node_t *p = node; \
  1406. tree_node_t *q = node->trans; \
  1407. tree_node_t *parent = p->parent; \
  1408. \
  1409. if (parent) { \
  1410. if (parent->left == p) \
  1411. parent->left = q; \
  1412. else \
  1413. parent->right = q; \
  1414. } else { \
  1415. tree->root = q; \
  1416. } \
  1417. \
  1418. q->parent = parent; \
  1419. p->parent = q; \
  1420. p->trans = q->cis; \
  1421. if (p->trans) \
  1422. p->trans->parent = p; \
  1423. q->cis = p;
  1424. static inline void tree__rotate_left(tree_t *tree, tree_node_t *node) {
  1425. TREE__ROTATE(left, right)
  1426. }
  1427. static inline void tree__rotate_right(tree_t *tree, tree_node_t *node) {
  1428. TREE__ROTATE(right, left)
  1429. }
  1430. #define TREE__INSERT_OR_DESCEND(side) \
  1431. if (parent->side) { \
  1432. parent = parent->side; \
  1433. } else { \
  1434. parent->side = node; \
  1435. break; \
  1436. }
  1437. #define TREE__REBALANCE_AFTER_INSERT(cis, trans) \
  1438. tree_node_t *grandparent = parent->parent; \
  1439. tree_node_t *uncle = grandparent->trans; \
  1440. \
  1441. if (uncle && uncle->red) { \
  1442. parent->red = uncle->red = false; \
  1443. grandparent->red = true; \
  1444. node = grandparent; \
  1445. } else { \
  1446. if (node == parent->trans) { \
  1447. tree__rotate_##cis(tree, parent); \
  1448. node = parent; \
  1449. parent = node->parent; \
  1450. } \
  1451. parent->red = false; \
  1452. grandparent->red = true; \
  1453. tree__rotate_##trans(tree, grandparent); \
  1454. }
  1455. int tree_add(tree_t *tree, tree_node_t *node, uintptr_t key) {
  1456. tree_node_t *parent;
  1457. parent = tree->root;
  1458. if (parent) {
  1459. for (;;) {
  1460. if (key < parent->key) {
  1461. TREE__INSERT_OR_DESCEND(left)
  1462. } else if (key > parent->key) {
  1463. TREE__INSERT_OR_DESCEND(right)
  1464. } else {
  1465. return -1;
  1466. }
  1467. }
  1468. } else {
  1469. tree->root = node;
  1470. }
  1471. node->key = key;
  1472. node->left = node->right = NULL;
  1473. node->parent = parent;
  1474. node->red = true;
  1475. for (; parent && parent->red; parent = node->parent) {
  1476. if (parent == parent->parent->left) {
  1477. TREE__REBALANCE_AFTER_INSERT(left, right)
  1478. } else {
  1479. TREE__REBALANCE_AFTER_INSERT(right, left)
  1480. }
  1481. }
  1482. tree->root->red = false;
  1483. return 0;
  1484. }
  1485. #define TREE__REBALANCE_AFTER_REMOVE(cis, trans) \
  1486. tree_node_t *sibling = parent->trans; \
  1487. \
  1488. if (sibling->red) { \
  1489. sibling->red = false; \
  1490. parent->red = true; \
  1491. tree__rotate_##cis(tree, parent); \
  1492. sibling = parent->trans; \
  1493. } \
  1494. if ((sibling->left && sibling->left->red) || (sibling->right && sibling->right->red)) { \
  1495. if (!sibling->trans || !sibling->trans->red) { \
  1496. sibling->cis->red = false; \
  1497. sibling->red = true; \
  1498. tree__rotate_##trans(tree, sibling); \
  1499. sibling = parent->trans; \
  1500. } \
  1501. sibling->red = parent->red; \
  1502. parent->red = sibling->trans->red = false; \
  1503. tree__rotate_##cis(tree, parent); \
  1504. node = tree->root; \
  1505. break; \
  1506. } \
  1507. sibling->red = true;
  1508. void tree_del(tree_t *tree, tree_node_t *node) {
  1509. tree_node_t *parent = node->parent;
  1510. tree_node_t *left = node->left;
  1511. tree_node_t *right = node->right;
  1512. tree_node_t *next;
  1513. bool red;
  1514. if (!left) {
  1515. next = right;
  1516. } else if (!right) {
  1517. next = left;
  1518. } else {
  1519. next = right;
  1520. while (next->left)
  1521. next = next->left;
  1522. }
  1523. if (parent) {
  1524. if (parent->left == node)
  1525. parent->left = next;
  1526. else
  1527. parent->right = next;
  1528. } else {
  1529. tree->root = next;
  1530. }
  1531. if (left && right) {
  1532. red = next->red;
  1533. next->red = node->red;
  1534. next->left = left;
  1535. left->parent = next;
  1536. if (next != right) {
  1537. parent = next->parent;
  1538. next->parent = node->parent;
  1539. node = next->right;
  1540. parent->left = node;
  1541. next->right = right;
  1542. right->parent = next;
  1543. } else {
  1544. next->parent = parent;
  1545. parent = next;
  1546. node = next->right;
  1547. }
  1548. } else {
  1549. red = node->red;
  1550. node = next;
  1551. }
  1552. if (node)
  1553. node->parent = parent;
  1554. if (red)
  1555. return;
  1556. if (node && node->red) {
  1557. node->red = false;
  1558. return;
  1559. }
  1560. do {
  1561. if (node == tree->root)
  1562. break;
  1563. if (node == parent->left) {
  1564. TREE__REBALANCE_AFTER_REMOVE(left, right)
  1565. } else {
  1566. TREE__REBALANCE_AFTER_REMOVE(right, left)
  1567. }
  1568. node = parent;
  1569. parent = parent->parent;
  1570. } while (!node->red);
  1571. if (node)
  1572. node->red = false;
  1573. }
  1574. tree_node_t *tree_find(const tree_t *tree, uintptr_t key) {
  1575. tree_node_t *node = tree->root;
  1576. while (node) {
  1577. if (key < node->key)
  1578. node = node->left;
  1579. else if (key > node->key)
  1580. node = node->right;
  1581. else
  1582. return node;
  1583. }
  1584. return NULL;
  1585. }
  1586. tree_node_t *tree_root(const tree_t *tree) {
  1587. return tree->root;
  1588. }
  1589. #ifndef SIO_BSP_HANDLE_POLL
  1590. #define SIO_BSP_HANDLE_POLL 0x4800001D
  1591. #endif
  1592. #ifndef SIO_BASE_HANDLE
  1593. #define SIO_BASE_HANDLE 0x48000022
  1594. #endif
  1595. int ws_global_init(void) {
  1596. int r;
  1597. WSADATA wsa_data;
  1598. r = WSAStartup(MAKEWORD(2, 2), &wsa_data);
  1599. if (r != 0)
  1600. return_set_error(-1, (DWORD)r);
  1601. return 0;
  1602. }
  1603. static inline SOCKET ws__ioctl_get_bsp_socket(SOCKET socket, DWORD ioctl) {
  1604. SOCKET bsp_socket;
  1605. DWORD bytes;
  1606. if (WSAIoctl(socket, ioctl, NULL, 0, &bsp_socket, sizeof bsp_socket, &bytes, NULL, NULL) != SOCKET_ERROR)
  1607. return bsp_socket;
  1608. else
  1609. return INVALID_SOCKET;
  1610. }
  1611. SOCKET ws_get_base_socket(SOCKET socket) {
  1612. SOCKET base_socket;
  1613. DWORD error;
  1614. for (;;) {
  1615. base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BASE_HANDLE);
  1616. if (base_socket != INVALID_SOCKET)
  1617. return base_socket;
  1618. error = GetLastError();
  1619. if (error == WSAENOTSOCK)
  1620. return_set_error(INVALID_SOCKET, error);
  1621. /* Even though Microsoft documentation clearly states that LSPs should
  1622. * never intercept the `SIO_BASE_HANDLE` ioctl [1], Komodia based LSPs do
  1623. * so anyway, breaking it, with the apparent intention of preventing LSP
  1624. * bypass [2]. Fortunately they don't handle `SIO_BSP_HANDLE_POLL`, which
  1625. * will at least let us obtain the socket associated with the next winsock
  1626. * protocol chain entry. If this succeeds, loop around and call
  1627. * `SIO_BASE_HANDLE` again with the returned BSP socket, to make sure that
  1628. * we unwrap all layers and retrieve the actual base socket.
  1629. * [1] https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls
  1630. * [2] https://www.komodia.com/newwiki/index.php?title=Komodia%27s_Redirector_bug_fixes#Version_2.2.2.6
  1631. */
  1632. base_socket = ws__ioctl_get_bsp_socket(socket, SIO_BSP_HANDLE_POLL);
  1633. if (base_socket != INVALID_SOCKET && base_socket != socket)
  1634. socket = base_socket;
  1635. else
  1636. return_set_error(INVALID_SOCKET, error);
  1637. }
  1638. }