Rework WinAFD event code (#811)

We've had reports of user-after-free type crashes in Windows cleanup
code for the Event Thread. In evaluating the code, it appeared there
were some memory leaks on per-connection handles that may have remained
open during shutdown, while trying to resolve that it became apparent
the methodology chosen may not have been the right one for interfacing
with the Windows AFD system as stability issues were seen during this
debugging process.

Since this system is completely undocumented, there was no clear
resolution path other than to switch to the *other* methodology which
involves directly opening `\Device\Afd`, rather than spawning a "peer
socket" to use to queue AFD operations.

The original methodology chosen more closely resembled what is employed
by [libuv](https://github.com/libuv/libuv) and given its widespread use
was the reason it was used. The new methodology more closely resembles
[wepoll](https://github.com/piscisaureus/wepoll).

Its not clear if there are any scalability or performance advantages or
disadvantages for either method. They both seem like different ways to
do the same thing, but this current way does seem more stable.

Fixes #798
Fix By: Brad House (@bradh352)
v1.29
Brad House 4 months ago
parent 9582812952
commit 76d09afd1d
  1. 2
      appveyor.yml
  2. 2
      ci/test.sh
  3. 59
      src/lib/ares_event_thread.c
  4. 792
      src/lib/ares_event_win32.c
  5. 58
      src/lib/ares_event_win32.h
  6. 2
      test/ares-test-mock-et.cc

@ -132,7 +132,7 @@ test_script:
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" cd %TESTDIR%
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" .\adig.exe www.google.com
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" .\ahost.exe www.google.com
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" .\arestest.exe -4 -v --gtest_filter=-*LiveSearchTXT*:*LiveSearchANY*:*LiveGetLocalhostByAddr*
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" .\arestest.exe --gtest_filter=-*LiveSearchTXT*:*LiveSearchANY*:*LiveGetLocalhostByAddr*
- if "%SYSTEM%" == "CONSOLE" if "%CONFTOOL%" == "CMAKE" if not "%SKIP_TESTS%" == "yes" .\dnsdump.exe "%APPVEYOR_BUILD_FOLDER%\test\fuzzinput\answer_a" "%APPVEYOR_BUILD_FOLDER%\test\fuzzinput\answer_aaaa"
#on_finish:

@ -33,7 +33,7 @@ fi
$TEST_WRAP "${TOOLSBIN}/adig" www.google.com
$TEST_WRAP "${TOOLSBIN}/ahost" www.google.com
cd "${TESTSBIN}"
$TEST_WRAP ./arestest -4 -v $TEST_FILTER
$TEST_WRAP ./arestest -4 $TEST_FILTER
./aresfuzz ${TESTDIR}/fuzzinput/*
./aresfuzzname ${TESTDIR}/fuzznames/*
./dnsdump "${TESTDIR}/fuzzinput/answer_a" "${TESTDIR}/fuzzinput/answer_aaaa"

@ -264,6 +264,35 @@ static void ares_event_process_updates(ares_event_thread_t *e)
}
}
static void ares_event_thread_cleanup(ares_event_thread_t *e)
{
/* Manually free any updates that weren't processed */
if (e->ev_updates != NULL) {
ares__llist_node_t *node;
while ((node = ares__llist_node_first(e->ev_updates)) != NULL) {
ares_event_destroy_cb(ares__llist_node_claim(node));
}
ares__llist_destroy(e->ev_updates);
e->ev_updates = NULL;
}
if (e->ev_sock_handles != NULL) {
ares__htable_asvp_destroy(e->ev_sock_handles);
e->ev_sock_handles = NULL;
}
if (e->ev_cust_handles != NULL) {
ares__htable_vpvp_destroy(e->ev_cust_handles);
e->ev_cust_handles = NULL;
}
if (e->ev_sys != NULL && e->ev_sys->destroy != NULL) {
e->ev_sys->destroy(e);
e->ev_sys = NULL;
}
}
static void *ares_event_thread(void *arg)
{
ares_event_thread_t *e = arg;
@ -298,14 +327,16 @@ static void *ares_event_thread(void *arg)
ares__thread_mutex_lock(e->mutex);
}
/* Lets cleanup while we're in the thread itself */
ares_event_thread_cleanup(e);
ares__thread_mutex_unlock(e->mutex);
return NULL;
}
static void ares_event_thread_destroy_int(ares_event_thread_t *e)
{
ares__llist_node_t *node;
/* Wake thread and tell it to shutdown if it exists */
ares__thread_mutex_lock(e->mutex);
if (e->isup) {
@ -316,26 +347,14 @@ static void ares_event_thread_destroy_int(ares_event_thread_t *e)
/* Wait for thread to shutdown */
if (e->thread) {
ares__thread_join(e->thread, NULL);
void *rv = NULL;
ares__thread_join(e->thread, &rv);
e->thread = NULL;
}
/* Manually free any updates that weren't processed */
while ((node = ares__llist_node_first(e->ev_updates)) != NULL) {
ares_event_destroy_cb(ares__llist_node_claim(node));
}
ares__llist_destroy(e->ev_updates);
e->ev_updates = NULL;
ares__htable_asvp_destroy(e->ev_sock_handles);
e->ev_sock_handles = NULL;
ares__htable_vpvp_destroy(e->ev_cust_handles);
e->ev_cust_handles = NULL;
if (e->ev_sys && e->ev_sys->destroy) {
e->ev_sys->destroy(e);
}
/* If the event thread ever got to the point of starting, this is a no-op
* as it runs this same cleanup when it shuts down */
ares_event_thread_cleanup(e);
ares__thread_mutex_destroy(e->mutex);
e->mutex = NULL;
@ -352,6 +371,8 @@ void ares_event_thread_destroy(ares_channel_t *channel)
}
ares_event_thread_destroy_int(e);
channel->sock_state_cb_data = NULL;
channel->sock_state_cb = NULL;
}
static const ares_event_sys_t *ares_event_fetch_sys(ares_evsys_t evsys)

File diff suppressed because it is too large Load Diff

@ -67,17 +67,53 @@ typedef VOID(NTAPI *PIO_APC_ROUTINE)(PVOID ApcContext,
# define STATUS_CANCELLED ((NTSTATUS)0xC0000120L)
# define STATUS_NOT_FOUND ((NTSTATUS)0xC0000225L)
typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
LPCWSTR Buffer;
} UNICODE_STRING, *PUNICODE_STRING;
typedef struct _OBJECT_ATTRIBUTES {
ULONG Length;
HANDLE RootDirectory;
PUNICODE_STRING ObjectName;
ULONG Attributes;
PVOID SecurityDescriptor;
PVOID SecurityQualityOfService;
} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;
# ifndef FILE_OPEN
# define FILE_OPEN 0x00000001UL
# endif
/* Not sure what headers might have these */
# define IOCTL_AFD_POLL 0x00012024
# define AFD_POLL_RECEIVE 0x0001
# define AFD_POLL_RECEIVE_EXPEDITED 0x0002
# define AFD_POLL_SEND 0x0004
# define AFD_POLL_DISCONNECT 0x0008
# define AFD_POLL_ABORT 0x0010
# define AFD_POLL_LOCAL_CLOSE 0x0020
# define AFD_POLL_ACCEPT 0x0080
# define AFD_POLL_CONNECT_FAIL 0x0100
# define AFD_POLL_RECEIVE_BIT 0
# define AFD_POLL_RECEIVE (1 << AFD_POLL_RECEIVE_BIT)
# define AFD_POLL_RECEIVE_EXPEDITED_BIT 1
# define AFD_POLL_RECEIVE_EXPEDITED (1 << AFD_POLL_RECEIVE_EXPEDITED_BIT)
# define AFD_POLL_SEND_BIT 2
# define AFD_POLL_SEND (1 << AFD_POLL_SEND_BIT)
# define AFD_POLL_DISCONNECT_BIT 3
# define AFD_POLL_DISCONNECT (1 << AFD_POLL_DISCONNECT_BIT)
# define AFD_POLL_ABORT_BIT 4
# define AFD_POLL_ABORT (1 << AFD_POLL_ABORT_BIT)
# define AFD_POLL_LOCAL_CLOSE_BIT 5
# define AFD_POLL_LOCAL_CLOSE (1 << AFD_POLL_LOCAL_CLOSE_BIT)
# define AFD_POLL_CONNECT_BIT 6
# define AFD_POLL_CONNECT (1 << AFD_POLL_CONNECT_BIT)
# define AFD_POLL_ACCEPT_BIT 7
# define AFD_POLL_ACCEPT (1 << AFD_POLL_ACCEPT_BIT)
# define AFD_POLL_CONNECT_FAIL_BIT 8
# define AFD_POLL_CONNECT_FAIL (1 << AFD_POLL_CONNECT_FAIL_BIT)
# define AFD_POLL_QOS_BIT 9
# define AFD_POLL_QOS (1 << AFD_POLL_QOS_BIT)
# define AFD_POLL_GROUP_QOS_BIT 10
# define AFD_POLL_GROUP_QOS (1 << AFD_POLL_GROUP_QOS_BIT)
# define AFD_NUM_POLL_EVENTS 11
# define AFD_POLL_ALL ((1 << AFD_NUM_POLL_EVENTS) - 1)
typedef struct _AFD_POLL_HANDLE_INFO {
HANDLE Handle;
@ -101,6 +137,12 @@ typedef NTSTATUS(NTAPI *NtDeviceIoControlFile_t)(
PIO_STATUS_BLOCK IoStatusBlock, ULONG IoControlCode, PVOID InputBuffer,
ULONG InputBufferLength, PVOID OutputBuffer, ULONG OutputBufferLength);
typedef NTSTATUS(NTAPI *NtCreateFile_t)(
PHANDLE FileHandle, ACCESS_MASK DesiredAccess,
POBJECT_ATTRIBUTES ObjectAttributes, PIO_STATUS_BLOCK IoStatusBlock,
PLARGE_INTEGER AllocationSize, ULONG FileAttributes, ULONG ShareAccess,
ULONG CreateDisposition, ULONG CreateOptions, PVOID EaBuffer, ULONG EaLength);
/* On UWP/Windows Store, these definitions aren't there for some reason */
# ifndef SIO_BSP_HANDLE_POLL
# define SIO_BSP_HANDLE_POLL 0x4800001D

@ -143,7 +143,9 @@ static int sock_cb_count = 0;
static int SocketConnectCallback(ares_socket_t fd, int type, void *data) {
int rc = *(int*)data;
if (verbose) std::cerr << "SocketConnectCallback(" << fd << ") invoked" << std::endl;
(void)type;
sock_cb_count++;
if (verbose) std::cerr << "SocketConnectCallback(fd: " << fd << ", cnt: " << sock_cb_count << ") invoked" << std::endl;
return rc;
}

Loading…
Cancel
Save