From d4ecf95be5d41a70c5de1d7b7a145d3f2bd7b905 Mon Sep 17 00:00:00 2001 From: Brad House Date: Sun, 18 Aug 2024 12:37:40 -0400 Subject: [PATCH] connection handling modularization (#857) The main purpose of this PR is to modularize the communication library, and streamline the code paths for TCP and UDP into a single flow. This will help us add additional flows such as TLS, and also make sure these communication functions return a known set of error codes so that additional error codes can be added for new flows in the future. It also adds a new optional callback of `ares_set_notify_pending_write_callback()` that will assist in aggregating data from multiple queries into a single write operation. This doesn't apply to UDP, but on TCP and especially TLS in the future this can be a significant win. This is automatically applied for the Event Thread. It also fixes a long standing issue if UDP connection became saturated and started returning `EWOULDBLOCK` or `EAGAIN` it was treated as a failure. Since this is more inline with the TCP code now, it will wait on a write event to retry. Finally there are additional cleanups due to not needing to be able to retrieve socket errnos all over the place. Authored-By: Brad House (@bradh352) --- docs/Makefile.inc | 3 + docs/ares_inet_pton.3 | 7 +- docs/ares_process_fd.3 | 3 + docs/ares_process_pending_write.3 | 3 + docs/ares_set_notify_pending_write_callback.3 | 62 ++ include/ares.h | 8 + src/lib/ares__close_sockets.c | 9 +- src/lib/ares__socket.c | 561 +++++++++++++++--- src/lib/ares__sortaddrinfo.c | 18 +- src/lib/ares_destroy.c | 2 - src/lib/ares_inet_net_pton.h | 4 - src/lib/ares_init.c | 40 +- src/lib/ares_private.h | 139 +++-- src/lib/ares_process.c | 542 ++++++++--------- src/lib/ares_setup.h | 136 +---- src/lib/ares_update_servers.c | 12 - src/lib/event/ares_event.h | 2 + src/lib/event/ares_event_thread.c | 33 +- src/lib/inet_net_pton.c | 37 +- src/lib/inet_ntop.c | 29 +- src/lib/legacy/ares_fds.c | 5 +- src/lib/legacy/ares_getsock.c | 3 +- src/lib/str/ares__buf.c | 2 +- 23 files changed, 1016 insertions(+), 644 deletions(-) create mode 100644 docs/ares_process_fd.3 create mode 100644 docs/ares_process_pending_write.3 create mode 100644 docs/ares_set_notify_pending_write_callback.3 diff --git a/docs/Makefile.inc b/docs/Makefile.inc index 46e30346..4941d079 100644 --- a/docs/Makefile.inc +++ b/docs/Makefile.inc @@ -114,6 +114,8 @@ MANPAGES = ares_cancel.3 \ ares_parse_txt_reply.3 \ ares_parse_uri_reply.3 \ ares_process.3 \ + ares_process_fd.3 \ + ares_process_pending_write.3 \ ares_query.3 \ ares_query_dnsrec.3 \ ares_queue.3 \ @@ -128,6 +130,7 @@ MANPAGES = ares_cancel.3 \ ares_set_local_dev.3 \ ares_set_local_ip4.3 \ ares_set_local_ip6.3 \ + ares_set_notify_pending_write_callback.3 \ ares_set_server_state_callback.3 \ ares_set_servers.3 \ ares_set_servers_csv.3 \ diff --git a/docs/ares_inet_pton.3 b/docs/ares_inet_pton.3 index 5b7b8010..34b2df06 100644 --- a/docs/ares_inet_pton.3 +++ b/docs/ares_inet_pton.3 @@ -9,7 +9,7 @@ ares_inet_pton \- convert an IPv4 or IPv6 address from text to binary form .nf #include -const char *ares_inet_pton(int \fIaf\fP, const char *\fIsrc\fP, void *\fIdst\fP); +int ares_inet_pton(int \fIaf\fP, const char *\fIsrc\fP, void *\fIdst\fP); .fi .SH DESCRIPTION This is a portable version with the identical functionality of the commonly @@ -22,6 +22,11 @@ shall be supported. The \fBsrc\fP argument points to the string being passed in. The \fBdst\fP argument points to a buffer into which the function stores the numeric address; this shall be large enough to hold the numeric address (32 bits for AF_INET, 128 bits for AF_INET6). + +It returns 1 if the address was valid for the specified address family, or 0 +if the address was not parseable in the specified address family, or -1 if +some system error occurred (in which case errno will have been set). + .SH SEE ALSO .BR ares_init (3), .BR ares_inet_ntop (3) diff --git a/docs/ares_process_fd.3 b/docs/ares_process_fd.3 new file mode 100644 index 00000000..94e50f41 --- /dev/null +++ b/docs/ares_process_fd.3 @@ -0,0 +1,3 @@ +.\" Copyright (C) 2023 The c-ares project and its contributors. +.\" SPDX-License-Identifier: MIT +.so man3/ares_process.3 diff --git a/docs/ares_process_pending_write.3 b/docs/ares_process_pending_write.3 new file mode 100644 index 00000000..93ad3fa4 --- /dev/null +++ b/docs/ares_process_pending_write.3 @@ -0,0 +1,3 @@ +.\" Copyright (C) 2023 The c-ares project and its contributors. +.\" SPDX-License-Identifier: MIT +.so man3/ares_set_notify_pending_write_callback.3 diff --git a/docs/ares_set_notify_pending_write_callback.3 b/docs/ares_set_notify_pending_write_callback.3 new file mode 100644 index 00000000..0b50a276 --- /dev/null +++ b/docs/ares_set_notify_pending_write_callback.3 @@ -0,0 +1,62 @@ +.\" +.\" Copyright 2024 by the c-ares project and its contributors +.\" SPDX-License-Identifier: MIT +.\" +.TH ARES_SET_NOTIFY_PENDING_WRITE_CALLBACK 3 "13 Aug 2024" +.SH NAME +ares_set_notify_pending_write_callback, ares_process_pending_write \- Function +for setting a callback which is triggered when there is potential pending data +which needs to be written. +.SH SYNOPSIS +.nf +#include + +typedef void (*ares_notify_pending_write_callback)(void *\fIdata\fP); + +void ares_set_notify_pending_write_callback( + ares_channel_t *\fIchannel\fP, + ares_notify_pending_write_callback \fIcallback\fP, + void *\fIuser_data\fP); + +void ares_process_pending_write(ares_channel_t *\fIchannel\fP); + +.fi + +.SH DESCRIPTION +The \fBares_set_notify_pending_write_callback(3)\fP function sets a callback +function \fIcallback\fP in the given ares channel handle \fIchannel\fP that +is invoked whenever there is new pending TCP data to be written. Since TCP +is stream based, if there are multiple queries being enqueued back to back they +can be sent as one large buffer. Normally a \fBsend(2)\fP syscall operation +would be triggered for each query. + +When setting this callback, an event will be triggered when data is buffered, +but not written. This event is used to wake the caller's event loop which +should call \fBares_process_pending_write(3)\fP using the channel associated +with the callback. Each time the callback is triggered must result in a call +to \fBares_process_pending_write(3)\fP from the caller's event loop otherwise +stalls and timeouts may occur. The callback \fBmust not\fP call +\fBares_process_pending_write(3)\fP directly as otherwise it would invalidate +any advantage of this use-case. + +This is considered an optimization, especially when using TLS-based connections +which add additional overhead to the data stream. Due to the asyncronous nature +of c-ares, there is no way to identify when a caller may be finished enqueuing +queries via any of the possible public API calls such as +\fBares_getaddrinfo(3)\fP or \fBares_search_dnsrec(3)\fP, so this is an +enhancement to try to group query send operations together and will rely on the +singaling latency involved in waking the user's event loop. + +If no callback is set, data will be written immediately to the socket, thus +bypassing this optimization. + +This option cannot be used with \fIARES_OPT_EVENT_THREAD\fP passed to +\fBares_init_options(3)\fP since the user has no event loop. This optimization +is automatically enabled when using the Event Thread as it sets the callback +for its own internal signaling. + +.SH AVAILABILITY +This function was first introduced in c-ares version 1.34.0. + +.SH SEE ALSO +.BR ares_init_options (3) diff --git a/include/ares.h b/include/ares.h index 95fc2440..780c5e6a 100644 --- a/include/ares.h +++ b/include/ares.h @@ -460,6 +460,8 @@ typedef void (*ares_server_state_callback)(const char *server_string, ares_bool_t success, int flags, void *data); +typedef void (*ares_notify_pending_write_callback)(void *data); + CARES_EXTERN int ares_library_init(int flags); CARES_EXTERN int ares_library_init_mem(int flags, void *(*amalloc)(size_t size), @@ -526,6 +528,12 @@ CARES_EXTERN void ares_server_state_callback callback, void *user_data); +CARES_EXTERN void ares_set_notify_pending_write_callback( + ares_channel_t *channel, ares_notify_pending_write_callback callback, + void *user_data); + +CARES_EXTERN void ares_process_pending_write(ares_channel_t *channel); + CARES_EXTERN int ares_set_sortlist(ares_channel_t *channel, const char *sortstr); diff --git a/src/lib/ares__close_sockets.c b/src/lib/ares__close_sockets.c index 27bbaacf..a3575ed4 100644 --- a/src/lib/ares__close_sockets.c +++ b/src/lib/ares__close_sockets.c @@ -52,18 +52,19 @@ void ares__close_connection(ares_conn_t *conn, ares_status_t requeue_status) ares__htable_asvp_remove(channel->connnode_by_socket, conn->fd); if (conn->flags & ARES_CONN_FLAG_TCP) { - /* Reset any existing input and output buffer. */ - ares__buf_consume(server->tcp_parser, ares__buf_len(server->tcp_parser)); - ares__buf_consume(server->tcp_send, ares__buf_len(server->tcp_send)); server->tcp_conn = NULL; } + ares__buf_destroy(conn->in_buf); + ares__buf_destroy(conn->out_buf); + /* Requeue queries to other connections */ ares__requeue_queries(conn, requeue_status); ares__llist_destroy(conn->queries_to_conn); - SOCK_STATE_CALLBACK(channel, conn->fd, 0, 0); + ares__conn_sock_state_cb_update(conn, ARES_CONN_STATE_NONE); + ares__close_socket(channel, conn->fd); ares_free(conn); diff --git a/src/lib/ares__socket.c b/src/lib/ares__socket.c index 86e281fc..3605aabf 100644 --- a/src/lib/ares__socket.c +++ b/src/lib/ares__socket.c @@ -79,6 +79,94 @@ #endif +/* Macro SOCKERRNO / SET_SOCKERRNO() returns / sets the *socket-related* errno + * (or equivalent) on this platform to hide platform details to code using it. + */ +#ifdef USE_WINSOCK +# define SOCKERRNO ((int)WSAGetLastError()) +# define SET_SOCKERRNO(x) (WSASetLastError((int)(x))) +#else +# define SOCKERRNO (errno) +# define SET_SOCKERRNO(x) (errno = (x)) +#endif + +/* Portable error number symbolic names defined to Winsock error codes. */ +#ifdef USE_WINSOCK +# undef EBADF /* override definition in errno.h */ +# define EBADF WSAEBADF +# undef EINTR /* override definition in errno.h */ +# define EINTR WSAEINTR +# undef EINVAL /* override definition in errno.h */ +# define EINVAL WSAEINVAL +# undef EWOULDBLOCK /* override definition in errno.h */ +# define EWOULDBLOCK WSAEWOULDBLOCK +# undef EINPROGRESS /* override definition in errno.h */ +# define EINPROGRESS WSAEINPROGRESS +# undef EALREADY /* override definition in errno.h */ +# define EALREADY WSAEALREADY +# undef ENOTSOCK /* override definition in errno.h */ +# define ENOTSOCK WSAENOTSOCK +# undef EDESTADDRREQ /* override definition in errno.h */ +# define EDESTADDRREQ WSAEDESTADDRREQ +# undef EMSGSIZE /* override definition in errno.h */ +# define EMSGSIZE WSAEMSGSIZE +# undef EPROTOTYPE /* override definition in errno.h */ +# define EPROTOTYPE WSAEPROTOTYPE +# undef ENOPROTOOPT /* override definition in errno.h */ +# define ENOPROTOOPT WSAENOPROTOOPT +# undef EPROTONOSUPPORT /* override definition in errno.h */ +# define EPROTONOSUPPORT WSAEPROTONOSUPPORT +# define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +# undef EOPNOTSUPP /* override definition in errno.h */ +# define EOPNOTSUPP WSAEOPNOTSUPP +# define EPFNOSUPPORT WSAEPFNOSUPPORT +# undef EAFNOSUPPORT /* override definition in errno.h */ +# define EAFNOSUPPORT WSAEAFNOSUPPORT +# undef EADDRINUSE /* override definition in errno.h */ +# define EADDRINUSE WSAEADDRINUSE +# undef EADDRNOTAVAIL /* override definition in errno.h */ +# define EADDRNOTAVAIL WSAEADDRNOTAVAIL +# undef ENETDOWN /* override definition in errno.h */ +# define ENETDOWN WSAENETDOWN +# undef ENETUNREACH /* override definition in errno.h */ +# define ENETUNREACH WSAENETUNREACH +# undef ENETRESET /* override definition in errno.h */ +# define ENETRESET WSAENETRESET +# undef ECONNABORTED /* override definition in errno.h */ +# define ECONNABORTED WSAECONNABORTED +# undef ECONNRESET /* override definition in errno.h */ +# define ECONNRESET WSAECONNRESET +# undef ENOBUFS /* override definition in errno.h */ +# define ENOBUFS WSAENOBUFS +# undef EISCONN /* override definition in errno.h */ +# define EISCONN WSAEISCONN +# undef ENOTCONN /* override definition in errno.h */ +# define ENOTCONN WSAENOTCONN +# define ESHUTDOWN WSAESHUTDOWN +# define ETOOMANYREFS WSAETOOMANYREFS +# undef ETIMEDOUT /* override definition in errno.h */ +# define ETIMEDOUT WSAETIMEDOUT +# undef ECONNREFUSED /* override definition in errno.h */ +# define ECONNREFUSED WSAECONNREFUSED +# undef ELOOP /* override definition in errno.h */ +# define ELOOP WSAELOOP +# ifndef ENAMETOOLONG /* possible previous definition in errno.h */ +# define ENAMETOOLONG WSAENAMETOOLONG +# endif +# define EHOSTDOWN WSAEHOSTDOWN +# undef EHOSTUNREACH /* override definition in errno.h */ +# define EHOSTUNREACH WSAEHOSTUNREACH +# ifndef ENOTEMPTY /* possible previous definition in errno.h */ +# define ENOTEMPTY WSAENOTEMPTY +# endif +# define EPROCLIM WSAEPROCLIM +# define EUSERS WSAEUSERS +# define EDQUOT WSAEDQUOT +# define ESTALE WSAESTALE +# define EREMOTE WSAEREMOTE +#endif + + #ifndef HAVE_WRITEV /* Structure for scatter/gather I/O. */ struct iovec { @@ -87,63 +175,200 @@ struct iovec { }; #endif - -/* Return 1 if the specified error number describes a readiness error, or 0 - * otherwise. This is mostly for HP-UX, which could return EAGAIN or - * EWOULDBLOCK. See this man page - * - * http://devrsrc1.external.hp.com/STKS/cgi-bin/man2html? - * manpage=/usr/share/man/man2.Z/send.2 - */ -ares_bool_t ares__socket_try_again(int errnum) +static ares_conn_err_t ares__socket_deref_error(int err) { -#if !defined EWOULDBLOCK && !defined EAGAIN -# error "Neither EWOULDBLOCK nor EAGAIN defined" + switch (err) { +#if defined(EWOULDBLOCK) + case EWOULDBLOCK: + return ARES_CONN_ERR_WOULDBLOCK; +#endif +#if defined(EAGAIN) && (!defined(EWOULDBLOCK) || EAGAIN != EWOULDBLOCK) + case EAGAIN: + return ARES_CONN_ERR_WOULDBLOCK; #endif + case EINPROGRESS: + return ARES_CONN_ERR_WOULDBLOCK; + case ENETDOWN: + return ARES_CONN_ERR_NETDOWN; + case ENETUNREACH: + return ARES_CONN_ERR_NETUNREACH; + case ECONNABORTED: + return ARES_CONN_ERR_CONNABORTED; + case ECONNRESET: + return ARES_CONN_ERR_CONNRESET; + case ECONNREFUSED: + return ARES_CONN_ERR_CONNREFUSED; + case ETIMEDOUT: + return ARES_CONN_ERR_CONNTIMEDOUT; + case EHOSTDOWN: + return ARES_CONN_ERR_HOSTDOWN; + case EHOSTUNREACH: + return ARES_CONN_ERR_HOSTUNREACH; + case EINTR: + return ARES_CONN_ERR_INTERRUPT; + case EAFNOSUPPORT: + return ARES_CONN_ERR_AFNOSUPPORT; + case EADDRNOTAVAIL: + return ARES_CONN_ERR_BADADDR; + default: + break; + } -#ifdef EWOULDBLOCK - if (errnum == EWOULDBLOCK) { - return ARES_TRUE; + return ARES_CONN_ERR_FAILURE; +} + +static ares_bool_t same_address(const struct sockaddr *sa, + const struct ares_addr *aa) +{ + const void *addr1; + const void *addr2; + + if (sa->sa_family == aa->family) { + switch (aa->family) { + case AF_INET: + addr1 = &aa->addr.addr4; + addr2 = &(CARES_INADDR_CAST(const struct sockaddr_in *, sa))->sin_addr; + if (memcmp(addr1, addr2, sizeof(aa->addr.addr4)) == 0) { + return ARES_TRUE; /* match */ + } + break; + case AF_INET6: + addr1 = &aa->addr.addr6; + addr2 = + &(CARES_INADDR_CAST(const struct sockaddr_in6 *, sa))->sin6_addr; + if (memcmp(addr1, addr2, sizeof(aa->addr.addr6)) == 0) { + return ARES_TRUE; /* match */ + } + break; + default: + break; /* LCOV_EXCL_LINE */ + } } -#endif + return ARES_FALSE; /* different */ +} -#if defined EAGAIN && EAGAIN != EWOULDBLOCK - if (errnum == EAGAIN) { - return ARES_TRUE; +void ares__conn_sock_state_cb_update(ares_conn_t *conn, + ares_conn_state_flags_t flags) +{ + ares_channel_t *channel = conn->server->channel; + + if ((conn->state_flags & ARES_CONN_STATE_CBFLAGS) != flags && + channel->sock_state_cb) { + channel->sock_state_cb(channel->sock_state_cb_data, conn->fd, + flags & ARES_CONN_STATE_READ ? 1 : 0, + flags & ARES_CONN_STATE_WRITE ? 1 : 0); } -#endif - return ARES_FALSE; + conn->state_flags &= ~((unsigned int)ARES_CONN_STATE_CBFLAGS); + conn->state_flags |= flags; } -ares_ssize_t ares__socket_recv(ares_channel_t *channel, ares_socket_t s, - void *data, size_t data_len) +ares_conn_err_t ares__socket_recv(ares_channel_t *channel, ares_socket_t s, + ares_bool_t is_tcp, void *data, + size_t data_len, size_t *read_bytes) { + ares_ssize_t rv; + + *read_bytes = 0; + if (channel->sock_funcs && channel->sock_funcs->arecvfrom) { - return channel->sock_funcs->arecvfrom(s, data, data_len, 0, 0, 0, - channel->sock_func_cb_data); + rv = channel->sock_funcs->arecvfrom(s, data, data_len, 0, 0, 0, + channel->sock_func_cb_data); + } else { + rv = (ares_ssize_t)recv((RECV_TYPE_ARG1)s, (RECV_TYPE_ARG2)data, + (RECV_TYPE_ARG3)data_len, (RECV_TYPE_ARG4)(0)); } - return (ares_ssize_t)recv((RECV_TYPE_ARG1)s, (RECV_TYPE_ARG2)data, - (RECV_TYPE_ARG3)data_len, (RECV_TYPE_ARG4)(0)); + if (rv > 0) { + *read_bytes = (size_t)rv; + return ARES_CONN_ERR_SUCCESS; + } + + if (rv == 0) { + /* UDP allows 0-byte packets and is connectionless, so this is success */ + if (!is_tcp) { + return ARES_CONN_ERR_SUCCESS; + } else { + return ARES_CONN_ERR_CONNCLOSED; + } + } + + /* If we're here, rv<0 */ + return ares__socket_deref_error(SOCKERRNO); } -ares_ssize_t ares__socket_recvfrom(ares_channel_t *channel, ares_socket_t s, - void *data, size_t data_len, int flags, - struct sockaddr *from, - ares_socklen_t *from_len) +ares_conn_err_t ares__socket_recvfrom(ares_channel_t *channel, ares_socket_t s, + ares_bool_t is_tcp, void *data, + size_t data_len, int flags, + struct sockaddr *from, + ares_socklen_t *from_len, + size_t *read_bytes) { - if (channel->sock_funcs && channel->sock_funcs->arecvfrom) { - return channel->sock_funcs->arecvfrom(s, data, data_len, flags, from, - from_len, channel->sock_func_cb_data); - } + ares_ssize_t rv; + if (channel->sock_funcs && channel->sock_funcs->arecvfrom) { + rv = channel->sock_funcs->arecvfrom(s, data, data_len, flags, from, + from_len, channel->sock_func_cb_data); + } else { #ifdef HAVE_RECVFROM - return (ares_ssize_t)recvfrom(s, data, (RECVFROM_TYPE_ARG3)data_len, flags, + rv = (ares_ssize_t)recvfrom(s, data, (RECVFROM_TYPE_ARG3)data_len, flags, from, from_len); #else - return ares__socket_recv(channel, s, data, data_len); + return ares__socket_recv(channel, s, is_udp, data, data_len); #endif + } + + if (rv > 0) { + *read_bytes = (size_t)rv; + return ARES_CONN_ERR_SUCCESS; + } + + if (rv == 0) { + /* UDP allows 0-byte packets and is connectionless, so this is success */ + if (!is_tcp) { + return ARES_CONN_ERR_SUCCESS; + } else { + return ARES_CONN_ERR_CONNCLOSED; + } + } + + /* If we're here, rv<0 */ + return ares__socket_deref_error(SOCKERRNO); +} + +ares_conn_err_t ares__conn_read(ares_conn_t *conn, void *data, size_t len, + size_t *read_bytes) +{ + ares_channel_t *channel = conn->server->channel; + ares_conn_err_t err; + + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + struct sockaddr_storage sa_storage; + ares_socklen_t salen = sizeof(sa_storage); + + memset(&sa_storage, 0, sizeof(sa_storage)); + + err = + ares__socket_recvfrom(channel, conn->fd, ARES_FALSE, data, len, 0, + (struct sockaddr *)&sa_storage, &salen, read_bytes); + +#ifdef HAVE_RECVFROM + if (err == ARES_CONN_ERR_SUCCESS && + !same_address((struct sockaddr *)&sa_storage, &conn->server->addr)) { + err = ARES_CONN_ERR_WOULDBLOCK; + } +#endif + } else { + err = + ares__socket_recv(channel, conn->fd, ARES_TRUE, data, len, read_bytes); + } + + /* Toggle connected state if needed */ + if (err == ARES_CONN_ERR_SUCCESS) { + conn->state_flags |= ARES_CONN_STATE_CONNECTED; + } + + return err; } /* Use like: @@ -230,10 +455,23 @@ static ares_status_t ares_conn_set_self_ip(ares_conn_t *conn, ares_bool_t early) return ARES_SUCCESS; } -ares_ssize_t ares__conn_write(ares_conn_t *conn, const void *data, size_t len) +ares_conn_err_t ares__conn_write(ares_conn_t *conn, const void *data, + size_t len, size_t *written) { ares_channel_t *channel = conn->server->channel; int flags = 0; + ares_ssize_t rv; + ares_bool_t is_tfo = ARES_FALSE; + ares_conn_err_t err = ARES_CONN_ERR_SUCCESS; + + *written = 0; + + /* Don't try to write if not doing initial TFO and not connected */ + if (conn->flags & ARES_CONN_FLAG_TCP && + !(conn->state_flags & ARES_CONN_STATE_CONNECTED) && + !(conn->flags & ARES_CONN_FLAG_TFO_INITIAL)) { + return ARES_CONN_ERR_WOULDBLOCK; + } #ifdef HAVE_MSG_NOSIGNAL flags |= MSG_NOSIGNAL; @@ -243,41 +481,162 @@ ares_ssize_t ares__conn_write(ares_conn_t *conn, const void *data, size_t len) struct iovec vec; vec.iov_base = (void *)((size_t)data); /* Cast off const */ vec.iov_len = len; - return channel->sock_funcs->asendv(conn->fd, &vec, 1, - channel->sock_func_cb_data); + rv = channel->sock_funcs->asendv(conn->fd, &vec, 1, + channel->sock_func_cb_data); + if (rv <= 0) { + err = ares__socket_deref_error(SOCKERRNO); + } else { + *written = (size_t)rv; + } + goto done; } if (conn->flags & ARES_CONN_FLAG_TFO_INITIAL) { conn->flags &= ~((unsigned int)ARES_CONN_FLAG_TFO_INITIAL); + is_tfo = ARES_TRUE; #if defined(TFO_USE_SENDTO) && TFO_USE_SENDTO { struct sockaddr_storage sa_storage; ares_socklen_t salen = sizeof(sa_storage); struct sockaddr *sa = (struct sockaddr *)&sa_storage; - ares_status_t status; - ares_ssize_t rv; - status = ares__conn_set_sockaddr(conn, sa, &salen); - if (status != ARES_SUCCESS) { - return status; + if (ares__conn_set_sockaddr(conn, sa, &salen) != ARES_SUCCESS) { + return ARES_CONN_ERR_FAILURE; } rv = (ares_ssize_t)sendto((SEND_TYPE_ARG1)conn->fd, (SEND_TYPE_ARG2)data, (SEND_TYPE_ARG3)len, (SEND_TYPE_ARG4)flags, sa, salen); + if (rv <= 0) { + err = ares__socket_deref_error(SOCKERRNO); + } else { + *written = (size_t)rv; + } /* If using TFO, we might not have been able to get an IP earlier, since * we hadn't informed the OS of the destination. When using sendto() * now we have so we should be able to fetch it */ - ares_conn_set_self_ip(conn, ARES_TRUE); - return rv; + ares_conn_set_self_ip(conn, ARES_FALSE); + goto done; } #endif } - return (ares_ssize_t)send((SEND_TYPE_ARG1)conn->fd, (SEND_TYPE_ARG2)data, - (SEND_TYPE_ARG3)len, (SEND_TYPE_ARG4)flags); + rv = (ares_ssize_t)send((SEND_TYPE_ARG1)conn->fd, (SEND_TYPE_ARG2)data, + (SEND_TYPE_ARG3)len, (SEND_TYPE_ARG4)flags); + if (rv <= 0) { + err = ares__socket_deref_error(SOCKERRNO); + } else { + *written = (size_t)rv; + } + goto done; + +done: + if (err == ARES_CONN_ERR_SUCCESS && len == *written) { + /* Wrote all data, make sure we're not listening for write events unless + * using TFO, in which case we'll need a write event to know when + * we're connected. */ + ares__conn_sock_state_cb_update( + conn, ARES_CONN_STATE_READ | + (is_tfo ? ARES_CONN_STATE_WRITE : ARES_CONN_STATE_NONE)); + } else if (err == ARES_CONN_ERR_WOULDBLOCK) { + /* Need to wait on more buffer space to write */ + ares__conn_sock_state_cb_update(conn, ARES_CONN_STATE_READ | + ARES_CONN_STATE_WRITE); + } + + return err; +} + +ares_status_t ares__conn_flush(ares_conn_t *conn) +{ + const unsigned char *data; + size_t data_len; + size_t count; + ares_conn_err_t err; + ares_status_t status; + ares_bool_t tfo = ARES_FALSE; + + if (conn == NULL) { + return ARES_EFORMERR; + } + + if (conn->flags & ARES_CONN_FLAG_TFO_INITIAL) { + tfo = ARES_TRUE; + } + + do { + if (ares__buf_len(conn->out_buf) == 0) { + status = ARES_SUCCESS; + goto done; + } + + if (conn->flags & ARES_CONN_FLAG_TCP) { + data = ares__buf_peek(conn->out_buf, &data_len); + } else { + unsigned short msg_len; + + /* Read length, then provide buffer without length */ + ares__buf_tag(conn->out_buf); + status = ares__buf_fetch_be16(conn->out_buf, &msg_len); + if (status != ARES_SUCCESS) { + return status; + } + ares__buf_tag_rollback(conn->out_buf); + + data = ares__buf_peek(conn->out_buf, &data_len); + if (data_len < msg_len + 2) { + status = ARES_EFORMERR; + goto done; + } + data += 2; + data_len = msg_len; + } + + err = ares__conn_write(conn, data, data_len, &count); + if (err != ARES_CONN_ERR_SUCCESS) { + if (err != ARES_CONN_ERR_WOULDBLOCK) { + status = ARES_ECONNREFUSED; + goto done; + } + status = ARES_SUCCESS; + goto done; + } + + /* UDP didn't send the length prefix so augment that here */ + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + count += 2; + } + + /* Strip data written from the buffer */ + ares__buf_consume(conn->out_buf, (size_t)count); + status = ARES_SUCCESS; + + /* Loop only for UDP since we have to send per-packet. We already + * sent everything we could if using tcp */ + } while (!(conn->flags & ARES_CONN_FLAG_TCP)); + +done: + if (status == ARES_SUCCESS) { + ares_conn_state_flags_t flags = ARES_CONN_STATE_READ; + + /* When using TFO, the we need to enabling waiting on a write event to + * be notified of when a connection is actually established */ + if (tfo) { + flags |= ARES_CONN_STATE_WRITE; + } + + /* If using TCP and not all data was written (partial write), that means + * we need to also wait on a write event */ + if (conn->flags & ARES_CONN_FLAG_TCP && ares__buf_len(conn->out_buf)) { + flags |= ARES_CONN_STATE_WRITE; + } + + ares__conn_sock_state_cb_update(conn, flags); + } + + return status; } /* @@ -515,11 +874,12 @@ static ares_status_t ares__conn_connect(ares_conn_t *conn, struct sockaddr *sa, return ARES_SUCCESS; #elif defined(TFO_USE_CONNECTX) && TFO_USE_CONNECTX { - int rv; - int err; + int rv; + ares_conn_err_t err; do { sa_endpoints_t endpoints; + memset(&endpoints, 0, sizeof(endpoints)); endpoints.sae_dstaddr = sa; endpoints.sae_dstaddrlen = salen; @@ -528,12 +888,16 @@ static ares_status_t ares__conn_connect(ares_conn_t *conn, struct sockaddr *sa, CONNECT_DATA_IDEMPOTENT | CONNECT_RESUME_ON_READ_WRITE, NULL, 0, NULL, NULL); - err = SOCKERRNO; - if (rv == -1 && err != EINPROGRESS && err != EWOULDBLOCK) { - return ARES_ECONNREFUSED; + if (rv < 0) { + err = ares__socket_deref_error(SOCKERRNO); + } else { + break; } - } while (rv == -1 && err == EINTR); + if (err != ARES_CONN_ERR_WOULDBLOCK && err != ARES_CONN_ERR_INTERRUPT) { + return ARES_ECONNREFUSED; + } + } while (err == ARES_CONN_ERR_INTERRUPT); } return ARES_SUCCESS; #elif defined(TFO_SUPPORTED) && TFO_SUPPORTED @@ -555,6 +919,7 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, ares_conn_t *conn; ares__llist_node_t *node = NULL; int stype = is_tcp ? SOCK_STREAM : SOCK_DGRAM; + ares_conn_state_flags_t state_flags; *conn_out = NULL; @@ -568,6 +933,16 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, conn->server = server; conn->queries_to_conn = ares__llist_create(NULL); conn->flags = is_tcp ? ARES_CONN_FLAG_TCP : ARES_CONN_FLAG_NONE; + conn->out_buf = ares__buf_create(); + conn->in_buf = ares__buf_create(); + + if (conn->queries_to_conn == NULL || conn->out_buf == NULL || + conn->in_buf == NULL) { + /* LCOV_EXCL_START: OutOfMemory */ + status = ARES_ENOMEM; + goto done; + /* LCOV_EXCL_STOP */ + } /* Enable TFO if the OS supports it and we were passed in data to send during * the connect. It might be disabled later if an error is encountered. Make @@ -577,13 +952,6 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, conn->flags |= ARES_CONN_FLAG_TFO; } - if (conn->queries_to_conn == NULL) { - /* LCOV_EXCL_START: OutOfMemory */ - status = ARES_ENOMEM; - goto done; - /* LCOV_EXCL_STOP */ - } - /* Convert into the struct sockaddr structure needed by the OS */ status = ares__conn_set_sockaddr(conn, sa, &salen); if (status != ARES_SUCCESS) { @@ -591,8 +959,8 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, } /* Acquire a socket. */ - conn->fd = ares__open_socket(channel, server->addr.family, stype, 0); - if (conn->fd == ARES_SOCKET_BAD) { + if (ares__open_socket(&conn->fd, channel, server->addr.family, stype, 0) != + ARES_CONN_ERR_SUCCESS) { status = ARES_ECONNREFUSED; goto done; } @@ -633,7 +1001,7 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, } /* Need to store our own ip for DNS cookie support */ - status = ares_conn_set_self_ip(conn, ARES_FALSE); + status = ares_conn_set_self_ip(conn, ARES_TRUE); if (status != ARES_SUCCESS) { goto done; /* LCOV_EXCL_LINE: UntestablePath */ } @@ -662,7 +1030,20 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, /* LCOV_EXCL_STOP */ } - SOCK_STATE_CALLBACK(channel, conn->fd, 1, is_tcp ? 1 : 0); + state_flags = ARES_CONN_STATE_READ; + + /* Get notified on connect if using TCP */ + if (conn->flags & ARES_CONN_FLAG_TCP) { + state_flags |= ARES_CONN_STATE_WRITE; + } + + /* Dot no attempt to update sock state callbacks on TFO until *after* the + * initial write is performed. Due to the notification event, its possible + * an erroneous read can come in before the attempt to write the data which + * might be used to set the ip address */ + if (!(conn->flags & ARES_CONN_FLAG_TFO_INITIAL)) { + ares__conn_sock_state_cb_update(conn, state_flags); + } if (is_tcp) { server->tcp_conn = conn; @@ -673,6 +1054,8 @@ done: ares__llist_node_claim(node); ares__llist_destroy(conn->queries_to_conn); ares__close_socket(channel, conn->fd); + ares__buf_destroy(conn->out_buf); + ares__buf_destroy(conn->in_buf); ares_free(conn); } else { *conn_out = conn; @@ -680,15 +1063,27 @@ done: return status; } -ares_socket_t ares__open_socket(ares_channel_t *channel, int af, int type, - int protocol) +ares_conn_err_t ares__open_socket(ares_socket_t *sock, ares_channel_t *channel, + int af, int type, int protocol) { + ares_socket_t s; + + *sock = ARES_SOCKET_BAD; + if (channel->sock_funcs && channel->sock_funcs->asocket) { - return channel->sock_funcs->asocket(af, type, protocol, - channel->sock_func_cb_data); + s = channel->sock_funcs->asocket(af, type, protocol, + channel->sock_func_cb_data); + } else { + s = socket(af, type, protocol); } - return socket(af, type, protocol); + if (s == ARES_SOCKET_BAD) { + return ares__socket_deref_error(SOCKERRNO); + } + + *sock = s; + + return ARES_CONN_ERR_SUCCESS; } ares_status_t ares__connect_socket(ares_channel_t *channel, @@ -696,8 +1091,8 @@ ares_status_t ares__connect_socket(ares_channel_t *channel, const struct sockaddr *addr, ares_socklen_t addrlen) { - int rv; - int err; + int rv; + ares_conn_err_t err; do { if (channel->sock_funcs && channel->sock_funcs->aconnect) { @@ -707,13 +1102,16 @@ ares_status_t ares__connect_socket(ares_channel_t *channel, rv = connect(sockfd, addr, addrlen); } - err = SOCKERRNO; + if (rv < 0) { + err = ares__socket_deref_error(SOCKERRNO); + } else { + break; + } - if (rv == -1 && err != EINPROGRESS && err != EWOULDBLOCK) { + if (err != ARES_CONN_ERR_WOULDBLOCK && err != ARES_CONN_ERR_INTERRUPT) { return ARES_ECONNREFUSED; } - - } while (rv == -1 && err == EINTR); + } while (err == ARES_CONN_ERR_INTERRUPT); return ARES_SUCCESS; } @@ -762,3 +1160,14 @@ void ares_set_socket_functions(ares_channel_t *channel, channel->sock_funcs = funcs; channel->sock_func_cb_data = data; } + +void ares_set_notify_pending_write_callback( + ares_channel_t *channel, ares_notify_pending_write_callback callback, + void *user_data) +{ + if (channel == NULL || channel->optmask & ARES_OPT_EVENT_THREAD) { + return; + } + channel->notify_pending_write_cb = callback; + channel->notify_pending_write_cb_data = user_data; +} diff --git a/src/lib/ares__sortaddrinfo.c b/src/lib/ares__sortaddrinfo.c index 1aab81ec..2933871f 100644 --- a/src/lib/ares__sortaddrinfo.c +++ b/src/lib/ares__sortaddrinfo.c @@ -345,8 +345,9 @@ static int rfc6724_compare(const void *ptr1, const void *ptr2) static int find_src_addr(ares_channel_t *channel, const struct sockaddr *addr, struct sockaddr *src_addr) { - ares_socket_t sock; - ares_socklen_t len; + ares_socket_t sock; + ares_socklen_t len; + ares_conn_err_t err; switch (addr->sa_family) { case AF_INET: @@ -360,13 +361,12 @@ static int find_src_addr(ares_channel_t *channel, const struct sockaddr *addr, return 0; } - sock = ares__open_socket(channel, addr->sa_family, SOCK_DGRAM, IPPROTO_UDP); - if (sock == ARES_SOCKET_BAD) { - if (SOCKERRNO == EAFNOSUPPORT) { - return 0; - } else { - return -1; - } + err = + ares__open_socket(&sock, channel, addr->sa_family, SOCK_DGRAM, IPPROTO_UDP); + if (err == ARES_CONN_ERR_AFNOSUPPORT) { + return 0; + } else if (err != ARES_CONN_ERR_SUCCESS) { + return -1; } if (ares__connect_socket(channel, sock, addr, len) != ARES_SUCCESS) { diff --git a/src/lib/ares_destroy.c b/src/lib/ares_destroy.c index d75b5e22..2d333b33 100644 --- a/src/lib/ares_destroy.c +++ b/src/lib/ares_destroy.c @@ -142,8 +142,6 @@ void ares__destroy_server(ares_server_t *server) ares__close_sockets(server); ares__llist_destroy(server->connections); - ares__buf_destroy(server->tcp_parser); - ares__buf_destroy(server->tcp_send); ares_free(server); } diff --git a/src/lib/ares_inet_net_pton.h b/src/lib/ares_inet_net_pton.h index 0a52855b..e3ed83a3 100644 --- a/src/lib/ares_inet_net_pton.h +++ b/src/lib/ares_inet_net_pton.h @@ -26,10 +26,6 @@ #ifndef HEADER_CARES_INET_NET_PTON_H #define HEADER_CARES_INET_NET_PTON_H -#ifdef HAVE_INET_NET_PTON -# define ares_inet_net_pton(w, x, y, z) inet_net_pton(w, x, y, z) -#else int ares_inet_net_pton(int af, const char *src, void *dst, size_t size); -#endif #endif /* HEADER_CARES_INET_NET_PTON_H */ diff --git a/src/lib/ares_init.c b/src/lib/ares_init.c index 6dc5f4f9..da6dd48d 100644 --- a/src/lib/ares_init.c +++ b/src/lib/ares_init.c @@ -171,15 +171,6 @@ static ares_status_t init_by_defaults(ares_channel_t *channel) } } -#if defined(USE_WINSOCK) -# define toolong(x) (x == -1) && (SOCKERRNO == WSAEFAULT) -#elif defined(ENAMETOOLONG) -# define toolong(x) \ - (x == -1) && ((SOCKERRNO == ENAMETOOLONG) || (SOCKERRNO == EINVAL)) -#else -# define toolong(x) (x == -1) && (SOCKERRNO == EINVAL) -#endif - if (channel->ndomains == 0) { /* Derive a default domain search list from the kernel hostname, * or set it to empty if the hostname isn't helpful. @@ -187,9 +178,7 @@ static ares_status_t init_by_defaults(ares_channel_t *channel) #ifndef HAVE_GETHOSTNAME channel->ndomains = 0; /* default to none */ #else - GETHOSTNAME_TYPE_ARG2 lenv = 64; - size_t len = 64; - int res; + size_t len = 256; channel->ndomains = 0; /* default to none */ hostname = ares_malloc(len); @@ -198,28 +187,11 @@ static ares_status_t init_by_defaults(ares_channel_t *channel) goto error; /* LCOV_EXCL_LINE: OutOfMemory */ } - do { - res = gethostname(hostname, lenv); - - if (toolong(res)) { - char *p; - len *= 2; - lenv *= 2; - p = ares_realloc(hostname, len); - if (!p) { - rc = ARES_ENOMEM; /* LCOV_EXCL_LINE: OutOfMemory */ - goto error; /* LCOV_EXCL_LINE: OutOfMemory */ - } - hostname = p; - continue; - } else if (res) { - /* Lets not treat a gethostname failure as critical, since we - * are ok if gethostname doesn't even exist */ - *hostname = '\0'; - break; - } - - } while (res != 0); + if (gethostname(hostname, (GETHOSTNAME_TYPE_ARG2)len) != 0) { + /* Lets not treat a gethostname failure as critical, since we + * are ok if gethostname doesn't even exist */ + *hostname = '\0'; + } dot = strchr(hostname, '.'); if (dot) { diff --git a/src/lib/ares_private.h b/src/lib/ares_private.h index b85ecb5e..ecb161d7 100644 --- a/src/lib/ares_private.h +++ b/src/lib/ares_private.h @@ -172,15 +172,38 @@ typedef enum { ARES_CONN_FLAG_TFO_INITIAL = 1 << 2 } ares_conn_flags_t; +typedef enum { + ARES_CONN_STATE_NONE = 0, + ARES_CONN_STATE_READ = 1 << 0, + ARES_CONN_STATE_WRITE = 1 << 1, + ARES_CONN_STATE_CONNECTED = 1 << 2, /* This doesn't get a callback */ + ARES_CONN_STATE_CBFLAGS = ARES_CONN_STATE_READ | ARES_CONN_STATE_WRITE +} ares_conn_state_flags_t; + struct ares_conn { - ares_server_t *server; - ares_socket_t fd; - struct ares_addr self_ip; - ares_conn_flags_t flags; + ares_server_t *server; + ares_socket_t fd; + struct ares_addr self_ip; + ares_conn_flags_t flags; + ares_conn_state_flags_t state_flags; + + /*! Outbound buffered data that is not yet sent. Exists as one contiguous + * stream in TCP format (big endian 16bit length prefix followed by DNS + * wire-format message). For TCP this can be sent as-is, UDP this must + * be sent per-packet (stripping the length prefix) */ + ares__buf_t *out_buf; + + /*! Inbound buffered data that is not yet parsed. Exists as one contiguous + * stream in TCP format (big endian 16bit length prefix followed by DNS + * wire-format message). TCP may have partial data and this needs to be + * handled gracefully, but UDP will always have a full message */ + ares__buf_t *in_buf; + /* total number of queries run on this connection since it was established */ - size_t total_queries; + size_t total_queries; + /* list of outstanding queries to this connection */ - ares__llist_t *queries_to_conn; + ares__llist_t *queries_to_conn; }; #ifdef _MSC_VER @@ -271,13 +294,6 @@ struct ares_server { /* The next time when we will retry this server if it has hit failures */ ares_timeval_t next_retry_time; - /* TCP buffer since multiple responses can come back in one read, or partial - * in a read */ - ares__buf_t *tcp_parser; - - /* TCP output queue */ - ares__buf_t *tcp_send; - /*! Buckets for collecting metrics about the server */ ares_server_metrics_t metrics[ARES_METRIC_COUNT]; @@ -403,6 +419,10 @@ struct ares_channeldata { const struct ares_socket_functions *sock_funcs; void *sock_func_cb_data; + ares_notify_pending_write_callback notify_pending_write_cb; + void *notify_pending_write_cb_data; + ares_bool_t notify_pending_write; + /* Path for resolv.conf file, configurable via ares_options */ char *resolvconf_path; @@ -609,34 +629,63 @@ ares_status_t ares__open_connection(ares_conn_t **conn_out, ares_bool_t ares_sockaddr_to_ares_addr(struct ares_addr *ares_addr, unsigned short *port, const struct sockaddr *sockaddr); -ares_socket_t ares__open_socket(ares_channel_t *channel, int af, int type, - int protocol); -ares_bool_t ares__socket_try_again(int errnum); -ares_ssize_t ares__conn_write(ares_conn_t *conn, const void *data, size_t len); -ares_ssize_t ares__socket_recvfrom(ares_channel_t *channel, ares_socket_t s, - void *data, size_t data_len, int flags, - struct sockaddr *from, - ares_socklen_t *from_len); -ares_ssize_t ares__socket_recv(ares_channel_t *channel, ares_socket_t s, - void *data, size_t data_len); -void ares__close_socket(ares_channel_t *channel, ares_socket_t s); -ares_status_t ares__connect_socket(ares_channel_t *channel, - ares_socket_t sockfd, - const struct sockaddr *addr, - ares_socklen_t addrlen); -void ares__destroy_server(ares_server_t *server); - -ares_status_t ares__servers_update(ares_channel_t *channel, - ares__llist_t *server_list, - ares_bool_t user_specified); -ares_status_t ares__sconfig_append(ares__llist_t **sconfig, - const struct ares_addr *addr, - unsigned short udp_port, - unsigned short tcp_port, - const char *ll_iface); -ares_status_t ares__sconfig_append_fromstr(ares__llist_t **sconfig, - const char *str, - ares_bool_t ignore_invalid); + +/*! Socket errors */ +typedef enum { + ARES_CONN_ERR_SUCCESS = 0, /*!< Success */ + ARES_CONN_ERR_WOULDBLOCK = 1, /*!< Operation would block */ + ARES_CONN_ERR_CONNCLOSED = 2, /*!< Connection closed (gracefully) */ + ARES_CONN_ERR_CONNABORTED = 3, /*!< Connection Aborted */ + ARES_CONN_ERR_CONNRESET = 4, /*!< Connection Reset */ + ARES_CONN_ERR_CONNREFUSED = 5, /*!< Connection Refused */ + ARES_CONN_ERR_CONNTIMEDOUT = 6, /*!< Connection Timed Out */ + ARES_CONN_ERR_HOSTDOWN = 7, /*!< Host Down */ + ARES_CONN_ERR_HOSTUNREACH = 8, /*!< Host Unreachable */ + ARES_CONN_ERR_NETDOWN = 9, /*!< Network Down */ + ARES_CONN_ERR_NETUNREACH = 10, /*!< Network Unreachable */ + ARES_CONN_ERR_INTERRUPT = 11, /*!< Call interrupted by signal, repeat */ + ARES_CONN_ERR_AFNOSUPPORT = 12, /*!< Address family not supported */ + ARES_CONN_ERR_BADADDR = 13, /*!< Bad Address / Unavailable */ + ARES_CONN_ERR_FAILURE = 99 /*!< Generic failure */ +} ares_conn_err_t; + +ares_conn_err_t ares__open_socket(ares_socket_t *sock, ares_channel_t *channel, + int af, int type, int protocol); +ares_bool_t ares__socket_try_again(int errnum); +ares_conn_err_t ares__conn_write(ares_conn_t *conn, const void *data, + size_t len, size_t *written); +ares_status_t ares__conn_flush(ares_conn_t *conn); +ares_conn_err_t ares__conn_read(ares_conn_t *conn, void *data, size_t len, + size_t *read_bytes); +void ares__conn_sock_state_cb_update(ares_conn_t *conn, + ares_conn_state_flags_t flags); +ares_conn_err_t ares__socket_recv(ares_channel_t *channel, ares_socket_t s, + ares_bool_t is_tcp, void *data, + size_t data_len, size_t *read_bytes); +ares_conn_err_t ares__socket_recvfrom(ares_channel_t *channel, ares_socket_t s, + ares_bool_t is_tcp, void *data, + size_t data_len, int flags, + struct sockaddr *from, + ares_socklen_t *from_len, + size_t *read_bytes); +void ares__close_socket(ares_channel_t *channel, ares_socket_t s); +ares_status_t ares__connect_socket(ares_channel_t *channel, + ares_socket_t sockfd, + const struct sockaddr *addr, + ares_socklen_t addrlen); +void ares__destroy_server(ares_server_t *server); + +ares_status_t ares__servers_update(ares_channel_t *channel, + ares__llist_t *server_list, + ares_bool_t user_specified); +ares_status_t ares__sconfig_append(ares__llist_t **sconfig, + const struct ares_addr *addr, + unsigned short udp_port, + unsigned short tcp_port, + const char *ll_iface); +ares_status_t ares__sconfig_append_fromstr(ares__llist_t **sconfig, + const char *str, + ares_bool_t ignore_invalid); ares_status_t ares_in_addr_to_server_config_llist(const struct in_addr *servers, size_t nservers, ares__llist_t **llist); @@ -730,14 +779,6 @@ ares_status_t ares__dns_name_write(ares__buf_t *buf, ares__llist_t **list, */ void ares_queue_notify_empty(ares_channel_t *channel); - -#define SOCK_STATE_CALLBACK(c, s, r, w) \ - do { \ - if ((c)->sock_state_cb) { \ - (c)->sock_state_cb((c)->sock_state_cb_data, (s), (r), (w)); \ - } \ - } while (0) - #define ARES_CONFIG_CHECK(x) \ (x && x->lookups && ares__slist_len(x->servers) > 0 && x->timeout > 0 && \ x->tries > 0) diff --git a/src/lib/ares_process.c b/src/lib/ares_process.c index 65ee673f..1fb48bfa 100644 --- a/src/lib/ares_process.c +++ b/src/lib/ares_process.c @@ -46,23 +46,20 @@ static void timeadd(ares_timeval_t *now, size_t millisecs); -static void write_tcp_data(ares_channel_t *channel, fd_set *write_fds, - ares_socket_t write_fd); -static void read_packets(ares_channel_t *channel, fd_set *read_fds, +static void process_write(ares_channel_t *channel, fd_set *write_fds, + ares_socket_t write_fd); +static void process_read(ares_channel_t *channel, fd_set *read_fds, ares_socket_t read_fd, const ares_timeval_t *now); static void process_timeouts(ares_channel_t *channel, const ares_timeval_t *now); static ares_status_t process_answer(ares_channel_t *channel, const unsigned char *abuf, size_t alen, - ares_conn_t *conn, ares_bool_t tcp, + ares_conn_t *conn, const ares_timeval_t *now); static void handle_conn_error(ares_conn_t *conn, ares_bool_t critical_failure, ares_status_t failure_status); - static ares_bool_t same_questions(const ares_query_t *query, const ares_dns_record_t *arec); -static ares_bool_t same_address(const struct sockaddr *sa, - const struct ares_addr *aa); static void end_query(ares_channel_t *channel, ares_server_t *server, ares_query_t *query, ares_status_t status, const ares_dns_record_t *dnsrec); @@ -204,12 +201,10 @@ static void processfds(ares_channel_t *channel, fd_set *read_fds, } ares__channel_lock(channel); - ares__tvnow(&now); - read_packets(channel, read_fds, read_fd, &now); + process_read(channel, read_fds, read_fd, &now); process_timeouts(channel, &now); - /* Write last as the other 2 operations might have triggered writes */ - write_tcp_data(channel, write_fds, write_fd); + process_write(channel, write_fds, write_fd); /* See if any connections should be cleaned up */ ares__check_cleanup_conns(channel); @@ -235,105 +230,233 @@ void ares_process_fd(ares_channel_t *channel, processfds(channel, NULL, read_fd, NULL, write_fd); } +static ares_socket_t *channel_socket_list(const ares_channel_t *channel, + size_t *num) +{ + ares__slist_node_t *snode; + ares__array_t *arr = ares__array_create(sizeof(ares_socket_t), NULL); + + *num = 0; + + if (arr == NULL) { + return NULL; /* LCOV_EXCL_LINE: OutOfMemory */ + } + + for (snode = ares__slist_node_first(channel->servers); snode != NULL; + snode = ares__slist_node_next(snode)) { + ares_server_t *server = ares__slist_node_val(snode); + ares__llist_node_t *node; + + for (node = ares__llist_node_first(server->connections); node != NULL; + node = ares__llist_node_next(node)) { + const ares_conn_t *conn = ares__llist_node_val(node); + ares_socket_t *sptr; + ares_status_t status; + + if (conn->fd == ARES_SOCKET_BAD) { + continue; + } + + status = ares__array_insert_last((void **)&sptr, arr); + if (status != ARES_SUCCESS) { + ares__array_destroy(arr); /* LCOV_EXCL_LINE: OutOfMemory */ + return NULL; /* LCOV_EXCL_LINE: OutOfMemory */ + } + *sptr = conn->fd; + } + } + + return ares__array_finish(arr, num); +} + /* If any TCP sockets select true for writing, write out queued data * we have for them. */ -static void write_tcp_data(ares_channel_t *channel, fd_set *write_fds, - ares_socket_t write_fd) +static void ares_notify_write(ares_conn_t *conn) { - ares__slist_node_t *node; + ares_status_t status; + + /* Mark as connected if we got here and TFO Initial not set */ + if (!(conn->flags & ARES_CONN_FLAG_TFO_INITIAL)) { + conn->state_flags |= ARES_CONN_STATE_CONNECTED; + } + + status = ares__conn_flush(conn); + if (status != ARES_SUCCESS) { + handle_conn_error(conn, ARES_TRUE, status); + } +} - if (!write_fds && (write_fd == ARES_SOCKET_BAD)) { +static void process_write(ares_channel_t *channel, fd_set *write_fds, + ares_socket_t write_fd) +{ + size_t i; + ares_socket_t *socketlist = NULL; + size_t num_sockets = 0; + ares__llist_node_t *node = NULL; + + if (!write_fds && write_fd == ARES_SOCKET_BAD) { /* no possible action */ return; } - for (node = ares__slist_node_first(channel->servers); node != NULL; - node = ares__slist_node_next(node)) { - ares_server_t *server = ares__slist_node_val(node); - const unsigned char *data; - size_t data_len; - ares_ssize_t count; - - /* Make sure server has data to send and is selected in write_fds or - write_fd. */ - if (ares__buf_len(server->tcp_send) == 0 || server->tcp_conn == NULL) { - continue; + /* Single socket specified */ + if (!write_fds) { + node = ares__htable_asvp_get_direct(channel->connnode_by_socket, write_fd); + if (node == NULL) { + return; } - if (write_fds) { - if (!FD_ISSET(server->tcp_conn->fd, write_fds)) { - continue; - } - } else { - if (server->tcp_conn->fd != write_fd) { - continue; - } - } + ares_notify_write(ares__llist_node_val(node)); + return; + } - if (write_fds) { - /* If there's an error and we close this socket, then open - * another with the same fd to talk to another server, then we - * don't want to think that it was the new socket that was - * ready. This is not disastrous, but is likely to result in - * extra system calls and confusion. */ - FD_CLR(server->tcp_conn->fd, write_fds); - } + /* There is no good way to iterate across an fd_set, instead we must pull a + * list of all known fds, and iterate across that checking against the fd_set. + */ + socketlist = channel_socket_list(channel, &num_sockets); - data = ares__buf_peek(server->tcp_send, &data_len); - count = ares__conn_write(server->tcp_conn, data, data_len); - if (count <= 0) { - if (!ares__socket_try_again(SOCKERRNO)) { - handle_conn_error(server->tcp_conn, ARES_TRUE, ARES_ECONNREFUSED); - } + for (i = 0; i < num_sockets; i++) { + if (!FD_ISSET(socketlist[i], write_fds)) { continue; } - /* Strip data written from the buffer */ - ares__buf_consume(server->tcp_send, (size_t)count); + /* If there's an error and we close this socket, then open + * another with the same fd to talk to another server, then we + * don't want to think that it was the new socket that was + * ready. This is not disastrous, but is likely to result in + * extra system calls and confusion. */ + FD_CLR(socketlist[i], write_fds); - /* Notify state callback all data is written */ - if (ares__buf_len(server->tcp_send) == 0) { - SOCK_STATE_CALLBACK(channel, server->tcp_conn->fd, 1, 0); + node = + ares__htable_asvp_get_direct(channel->connnode_by_socket, socketlist[i]); + if (node == NULL) { + return; } + + ares_notify_write(ares__llist_node_val(node)); } + + ares_free(socketlist); } -/* If any TCP socket selects true for reading, read some data, - * allocate a buffer if we finish reading the length word, and process - * a packet if we finish reading one. - */ -static void read_tcp_data(ares_channel_t *channel, ares_conn_t *conn, - const ares_timeval_t *now) +void ares_process_pending_write(ares_channel_t *channel) { - ares_ssize_t count; - ares_server_t *server = conn->server; + ares__slist_node_t *node; - /* Fetch buffer to store data we are reading */ - size_t ptr_len = 65535; - unsigned char *ptr; + if (channel == NULL) { + return; + } + + ares__channel_lock(channel); + if (!channel->notify_pending_write) { + ares__channel_unlock(channel); + return; + } - ptr = ares__buf_append_start(server->tcp_parser, &ptr_len); + /* Set as untriggerd before calling into ares__conn_flush(), this is + * because its possible ares__conn_flush() might cause additional data to + * be enqueued if there is some form of exception so it will need to recurse. + */ + channel->notify_pending_write = ARES_FALSE; - if (ptr == NULL) { - handle_conn_error(conn, ARES_FALSE /* not critical to connection */, - ARES_SUCCESS); - return; /* bail out on malloc failure. TODO: make this - function return error codes */ + for (node = ares__slist_node_first(channel->servers); node != NULL; + node = ares__slist_node_next(node)) { + ares_server_t *server = ares__slist_node_val(node); + ares_conn_t *conn = server->tcp_conn; + ares_status_t status; + + if (conn == NULL) { + continue; + } + + /* Enqueue any pending data if there is any */ + status = ares__conn_flush(conn); + if (status != ARES_SUCCESS) { + handle_conn_error(conn, ARES_TRUE, status); + } } - /* Read from socket */ - count = ares__socket_recv(channel, conn->fd, ptr, ptr_len); - if (count <= 0) { - ares__buf_append_finish(server->tcp_parser, 0); - if (!(count == -1 && ares__socket_try_again(SOCKERRNO))) { - handle_conn_error(conn, ARES_TRUE, ARES_ECONNREFUSED); + ares__channel_unlock(channel); +} + +static ares_status_t read_conn_packets(ares_conn_t *conn) +{ + ares_bool_t read_again; + ares_conn_err_t err; + ares_channel_t *channel = conn->server->channel; + + do { + size_t count; + size_t len = 65535; + unsigned char *ptr; + size_t start_len = ares__buf_len(conn->in_buf); + + /* If UDP, lets write out a placeholder for the length indicator */ + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + if (ares__buf_append_be16(conn->in_buf, 0) != ARES_SUCCESS) { + handle_conn_error(conn, ARES_FALSE /* not critical to connection */, + ARES_SUCCESS); + return ARES_ENOMEM; + } } - return; + + /* Get a buffer of sufficient size */ + ptr = ares__buf_append_start(conn->in_buf, &len); + + if (ptr == NULL) { + handle_conn_error(conn, ARES_FALSE /* not critical to connection */, + ARES_SUCCESS); + return ARES_ENOMEM; + } + + /* Read from socket */ + err = ares__conn_read(conn, ptr, len, &count); + + if (err != ARES_CONN_ERR_SUCCESS) { + ares__buf_append_finish(conn->in_buf, 0); + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + ares__buf_set_length(conn->in_buf, start_len); + } + break; + } + + /* Record amount of data read */ + ares__buf_append_finish(conn->in_buf, (size_t)count); + + /* Only loop if we're not overwriting socket functions, and are using UDP + * or are using TCP and read the maximum buffer size */ + read_again = ARES_FALSE; + if (channel->sock_funcs == NULL) { + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + read_again = ARES_TRUE; + } else if (count == len) { + read_again = ARES_TRUE; + } + } + + /* If UDP, overwrite length */ + if (!(conn->flags & ARES_CONN_FLAG_TCP)) { + len = ares__buf_len(conn->in_buf); + ares__buf_set_length(conn->in_buf, start_len); + ares__buf_append_be16(conn->in_buf, (unsigned short)count); + ares__buf_set_length(conn->in_buf, len); + } + /* Try to read again only if *we* set up the socket, otherwise it may be + * a blocking socket and would cause recvfrom to hang. */ + } while (read_again); + + if (err != ARES_CONN_ERR_SUCCESS && err != ARES_CONN_ERR_WOULDBLOCK) { + handle_conn_error(conn, ARES_TRUE, ARES_ECONNREFUSED); + return ARES_ECONNREFUSED; } - /* Record amount of data read */ - ares__buf_append_finish(server->tcp_parser, (size_t)count); + return ARES_SUCCESS; +} + +static void read_answers(ares_conn_t *conn, const ares_timeval_t *now) +{ + ares_channel_t *channel = conn->server->channel; /* Process all queued answers */ while (1) { @@ -343,24 +466,24 @@ static void read_tcp_data(ares_channel_t *channel, ares_conn_t *conn, ares_status_t status; /* Tag so we can roll back */ - ares__buf_tag(server->tcp_parser); + ares__buf_tag(conn->in_buf); /* Read length indicator */ - if (ares__buf_fetch_be16(server->tcp_parser, &dns_len) != ARES_SUCCESS) { - ares__buf_tag_rollback(server->tcp_parser); + if (ares__buf_fetch_be16(conn->in_buf, &dns_len) != ARES_SUCCESS) { + ares__buf_tag_rollback(conn->in_buf); break; } /* Not enough data for a full response yet */ - if (ares__buf_consume(server->tcp_parser, dns_len) != ARES_SUCCESS) { - ares__buf_tag_rollback(server->tcp_parser); + if (ares__buf_consume(conn->in_buf, dns_len) != ARES_SUCCESS) { + ares__buf_tag_rollback(conn->in_buf); break; } /* Can't fail except for misuse */ - data = ares__buf_tag_fetch(server->tcp_parser, &data_len); + data = ares__buf_tag_fetch(conn->in_buf, &data_len); if (data == NULL || data_len < 2) { - ares__buf_tag_clear(server->tcp_parser); + ares__buf_tag_clear(conn->in_buf); break; } @@ -369,126 +492,36 @@ static void read_tcp_data(ares_channel_t *channel, ares_conn_t *conn, data_len -= 2; /* We finished reading this answer; process it */ - status = process_answer(channel, data, data_len, conn, ARES_TRUE, now); + status = process_answer(channel, data, data_len, conn, now); if (status != ARES_SUCCESS) { handle_conn_error(conn, ARES_TRUE, status); return; } /* Since we processed the answer, clear the tag so space can be reclaimed */ - ares__buf_tag_clear(server->tcp_parser); + ares__buf_tag_clear(conn->in_buf); } } -static ares_socket_t *channel_socket_list(const ares_channel_t *channel, - size_t *num) +static void read_conn(ares_conn_t *conn, const ares_timeval_t *now) { - ares__slist_node_t *snode; - ares__array_t *arr = ares__array_create(sizeof(ares_socket_t), NULL); - - *num = 0; - - if (arr == NULL) { - return NULL; /* LCOV_EXCL_LINE: OutOfMemory */ - } - - for (snode = ares__slist_node_first(channel->servers); snode != NULL; - snode = ares__slist_node_next(snode)) { - ares_server_t *server = ares__slist_node_val(snode); - ares__llist_node_t *node; - - for (node = ares__llist_node_first(server->connections); node != NULL; - node = ares__llist_node_next(node)) { - const ares_conn_t *conn = ares__llist_node_val(node); - ares_socket_t *sptr; - ares_status_t status; - - if (conn->fd == ARES_SOCKET_BAD) { - continue; - } - - status = ares__array_insert_last((void **)&sptr, arr); - if (status != ARES_SUCCESS) { - ares__array_destroy(arr); /* LCOV_EXCL_LINE: OutOfMemory */ - return NULL; /* LCOV_EXCL_LINE: OutOfMemory */ - } - *sptr = conn->fd; - } + /* TODO: There might be a potential issue here where there was a read that + * read some data, then looped and read again and got a disconnect. + * Right now, that would cause a resend instead of processing the data + * we have. This is fairly unlikely to occur due to only looping if + * a full buffer of 65535 bytes was read. */ + if (read_conn_packets(conn) != ARES_SUCCESS) { + return; } - - return ares__array_finish(arr, num); -} - -/* If any UDP sockets select true for reading, process them. */ -static void read_udp_packets_fd(ares_channel_t *channel, ares_conn_t *conn, - const ares_timeval_t *now) -{ - ares_ssize_t read_len; - unsigned char buf[MAXENDSSZ + 1]; - -#ifdef HAVE_RECVFROM - ares_socklen_t fromlen; - - union { - struct sockaddr sa; - struct sockaddr_in sa4; - struct sockaddr_in6 sa6; - } from; - - memset(&from, 0, sizeof(from)); -#endif - - /* To reduce event loop overhead, read and process as many - * packets as we can. */ - do { - if (conn->fd == ARES_SOCKET_BAD) { - read_len = -1; - } else { - if (conn->server->addr.family == AF_INET) { - fromlen = sizeof(from.sa4); - } else { - fromlen = sizeof(from.sa6); - } - read_len = ares__socket_recvfrom(channel, conn->fd, (void *)buf, - sizeof(buf), 0, &from.sa, &fromlen); - } - - if (read_len == 0) { - /* UDP is connectionless, so result code of 0 is a 0-length UDP - * packet, and not an indication the connection is closed like on - * tcp */ - continue; - } else if (read_len < 0) { - if (ares__socket_try_again(SOCKERRNO)) { - break; - } - - handle_conn_error(conn, ARES_TRUE, ARES_ECONNREFUSED); - return; -#ifdef HAVE_RECVFROM - } else if (!same_address(&from.sa, &conn->server->addr)) { - /* The address the response comes from does not match the address we - * sent the request to. Someone may be attempting to perform a cache - * poisoning attack. */ - continue; -#endif - - } else { - process_answer(channel, buf, (size_t)read_len, conn, ARES_FALSE, now); - } - - /* Try to read again only if *we* set up the socket, otherwise it may be - * a blocking socket and would cause recvfrom to hang. */ - } while (read_len >= 0 && channel->sock_funcs == NULL); + read_answers(conn, now); } -static void read_packets(ares_channel_t *channel, fd_set *read_fds, +static void process_read(ares_channel_t *channel, fd_set *read_fds, ares_socket_t read_fd, const ares_timeval_t *now) { size_t i; ares_socket_t *socketlist = NULL; size_t num_sockets = 0; - ares_conn_t *conn = NULL; ares__llist_node_t *node = NULL; if (!read_fds && (read_fd == ARES_SOCKET_BAD)) { @@ -503,13 +536,7 @@ static void read_packets(ares_channel_t *channel, fd_set *read_fds, return; } - conn = ares__llist_node_val(node); - - if (conn->flags & ARES_CONN_FLAG_TCP) { - read_tcp_data(channel, conn, now); - } else { - read_udp_packets_fd(channel, conn, now); - } + read_conn(ares__llist_node_val(node), now); return; } @@ -537,13 +564,7 @@ static void read_packets(ares_channel_t *channel, fd_set *read_fds, return; } - conn = ares__llist_node_val(node); - - if (conn->flags & ARES_CONN_FLAG_TCP) { - read_tcp_data(channel, conn, now); - } else { - read_udp_packets_fd(channel, conn, now); - } + read_conn(ares__llist_node_val(node), now); } ares_free(socketlist); @@ -607,7 +628,7 @@ done: * the connection to be terminated after this call. */ static ares_status_t process_answer(ares_channel_t *channel, const unsigned char *abuf, size_t alen, - ares_conn_t *conn, ares_bool_t tcp, + ares_conn_t *conn, const ares_timeval_t *now) { ares_query_t *query; @@ -618,6 +639,11 @@ static ares_status_t process_answer(ares_channel_t *channel, ares_status_t status; ares_bool_t is_cached = ARES_FALSE; + /* UDP can have 0-byte messages, drop them to the ground */ + if (alen == 0) { + return ARES_SUCCESS; + } + /* Parse the response */ status = ares_dns_parse(abuf, alen, 0, &rdnsrec); if (status != ARES_SUCCESS) { @@ -681,7 +707,8 @@ static ares_status_t process_answer(ares_channel_t *channel, * don't accept the packet, and switch the query to TCP if we hadn't * done so already. */ - if (ares_dns_record_get_flags(rdnsrec) & ARES_FLAG_TC && !tcp && + if (ares_dns_record_get_flags(rdnsrec) & ARES_FLAG_TC && + !(conn->flags & ARES_CONN_FLAG_TCP) && !(channel->flags & ARES_FLAG_IGNTC)) { query->using_tcp = ARES_TRUE; ares__send_query(query, now); @@ -971,9 +998,6 @@ static ares_status_t ares__conn_query_write(ares_conn_t *conn, ares_query_t *query, const ares_timeval_t *now) { - unsigned char *qbuf = NULL; - size_t qbuf_len = 0; - ares_ssize_t len; ares_server_t *server = conn->server; ares_channel_t *channel = server->channel; ares_status_t status; @@ -983,64 +1007,33 @@ static ares_status_t ares__conn_query_write(ares_conn_t *conn, return status; } - if (conn->flags & ARES_CONN_FLAG_TCP) { - size_t prior_len = ares__buf_len(server->tcp_send); - - status = ares_dns_write_buf_tcp(query->query, server->tcp_send); - if (status != ARES_SUCCESS) { - return status; - } - - if (conn->flags & ARES_CONN_FLAG_TFO_INITIAL) { - /* When using TFO, we need to put it on the wire immediately. */ - size_t data_len; - const unsigned char *data = NULL; - - data = ares__buf_peek(server->tcp_send, &data_len); - len = ares__conn_write(conn, data, data_len); - if (len <= 0) { - if (ares__socket_try_again(SOCKERRNO)) { - /* This means we must not have qualified for TFO, keep the data - * buffered, wait on write signal. */ - return ARES_SUCCESS; - } - - /* TCP TFO might delay failure. Reflect that here */ - return ARES_ECONNREFUSED; - } - - /* Consume what was written */ - ares__buf_consume(server->tcp_send, (size_t)len); - return ARES_SUCCESS; - } - - if (prior_len == 0) { - SOCK_STATE_CALLBACK(channel, conn->fd, 1, 1); - } - - return ARES_SUCCESS; - } - - /* UDP Here */ - status = ares_dns_write(query->query, &qbuf, &qbuf_len); + /* We write using the TCP format even for UDP, we just strip the length + * before putting on the wire */ + status = ares_dns_write_buf_tcp(query->query, conn->out_buf); if (status != ARES_SUCCESS) { return status; } - len = ares__conn_write(conn, qbuf, qbuf_len); - ares_free(qbuf); + /* Not pending a TFO write and not connected, so we can't even try to + * write until we get a signal */ + if (conn->flags & ARES_CONN_FLAG_TCP && + !(conn->state_flags & ARES_CONN_STATE_CONNECTED) && + !(conn->flags & ARES_CONN_FLAG_TFO_INITIAL)) { + return ARES_SUCCESS; + } - if (len == -1) { - if (ares__socket_try_again(SOCKERRNO)) { - return ARES_ESERVFAIL; - } - /* UDP is connection-less, but we might receive an ICMP unreachable which - * means we can't talk to the remote host at all and that will be - * reflected here */ - return ARES_ECONNREFUSED; + /* Delay actual write if possible (TCP only, and only if callback + * configured) */ + if (channel->notify_pending_write_cb && !channel->notify_pending_write && + conn->flags & ARES_CONN_FLAG_TCP) { + channel->notify_pending_write = ARES_TRUE; + channel->notify_pending_write_cb(channel->notify_pending_write_cb_data); + return ARES_SUCCESS; } - return ARES_SUCCESS; + /* Unfortunately we need to write right away and can't aggregate multiple + * queries into a single write. */ + return ares__conn_flush(conn); } ares_status_t ares__send_query(ares_query_t *query, const ares_timeval_t *now) @@ -1150,6 +1143,7 @@ ares_status_t ares__send_query(ares_query_t *query, const ares_timeval_t *now) query->conn = conn; conn->total_queries++; + return ARES_SUCCESS; } @@ -1213,36 +1207,6 @@ done: return rv; } -static ares_bool_t same_address(const struct sockaddr *sa, - const struct ares_addr *aa) -{ - const void *addr1; - const void *addr2; - - if (sa->sa_family == aa->family) { - switch (aa->family) { - case AF_INET: - addr1 = &aa->addr.addr4; - addr2 = &(CARES_INADDR_CAST(const struct sockaddr_in *, sa))->sin_addr; - if (memcmp(addr1, addr2, sizeof(aa->addr.addr4)) == 0) { - return ARES_TRUE; /* match */ - } - break; - case AF_INET6: - addr1 = &aa->addr.addr6; - addr2 = - &(CARES_INADDR_CAST(const struct sockaddr_in6 *, sa))->sin6_addr; - if (memcmp(addr1, addr2, sizeof(aa->addr.addr6)) == 0) { - return ARES_TRUE; /* match */ - } - break; - default: - break; /* LCOV_EXCL_LINE */ - } - } - return ARES_FALSE; /* different */ -} - static void ares_detach_query(ares_query_t *query) { /* Remove the query from all the lists in which it is linked */ diff --git a/src/lib/ares_setup.h b/src/lib/ares_setup.h index b6ce077f..a0bcce12 100644 --- a/src/lib/ares_setup.h +++ b/src/lib/ares_setup.h @@ -199,35 +199,12 @@ #endif -#ifdef __hpux -# if !defined(_XOPEN_SOURCE_EXTENDED) || defined(_KERNEL) -# ifdef _APP32_64BIT_OFF_T -# define OLD_APP32_64BIT_OFF_T _APP32_64BIT_OFF_T -# undef _APP32_64BIT_OFF_T -# else -# undef OLD_APP32_64BIT_OFF_T -# endif -# endif -#endif - -#ifdef __hpux -# if !defined(_XOPEN_SOURCE_EXTENDED) || defined(_KERNEL) -# ifdef OLD_APP32_64BIT_OFF_T -# define _APP32_64BIT_OFF_T OLD_APP32_64BIT_OFF_T -# undef OLD_APP32_64BIT_OFF_T -# endif -# endif -#endif - - -/* - * Definition of timeval struct for platforms that don't have it. - */ +/* Definition of timeval struct for platforms that don't have it. */ #ifndef HAVE_STRUCT_TIMEVAL struct timeval { - long tv_sec; - long tv_usec; + ares_int64_t tv_sec; + long tv_usec; }; #endif @@ -257,111 +234,4 @@ struct timeval { } while (0) #endif -/* - * Macro SOCKERRNO / SET_SOCKERRNO() returns / sets the *socket-related* errno - * (or equivalent) on this platform to hide platform details to code using it. - */ - -#ifdef USE_WINSOCK -# define SOCKERRNO ((int)WSAGetLastError()) -# define SET_SOCKERRNO(x) (WSASetLastError((int)(x))) -#else -# define SOCKERRNO (errno) -# define SET_SOCKERRNO(x) (errno = (x)) -#endif - - -/* - * Macro ERRNO / SET_ERRNO() returns / sets the NOT *socket-related* errno - * (or equivalent) on this platform to hide platform details to code using it. - */ - -#if defined(WIN32) && !defined(WATT32) -# define ERRNO ((int)GetLastError()) -# define SET_ERRNO(x) (SetLastError((DWORD)(x))) -#else -# define ERRNO (errno) -# define SET_ERRNO(x) (errno = (x)) -#endif - - -/* - * Portable error number symbolic names defined to Winsock error codes. - */ - -#ifdef USE_WINSOCK -# undef EBADF /* override definition in errno.h */ -# define EBADF WSAEBADF -# undef EINTR /* override definition in errno.h */ -# define EINTR WSAEINTR -# undef EINVAL /* override definition in errno.h */ -# define EINVAL WSAEINVAL -# undef EWOULDBLOCK /* override definition in errno.h */ -# define EWOULDBLOCK WSAEWOULDBLOCK -# undef EINPROGRESS /* override definition in errno.h */ -# define EINPROGRESS WSAEINPROGRESS -# undef EALREADY /* override definition in errno.h */ -# define EALREADY WSAEALREADY -# undef ENOTSOCK /* override definition in errno.h */ -# define ENOTSOCK WSAENOTSOCK -# undef EDESTADDRREQ /* override definition in errno.h */ -# define EDESTADDRREQ WSAEDESTADDRREQ -# undef EMSGSIZE /* override definition in errno.h */ -# define EMSGSIZE WSAEMSGSIZE -# undef EPROTOTYPE /* override definition in errno.h */ -# define EPROTOTYPE WSAEPROTOTYPE -# undef ENOPROTOOPT /* override definition in errno.h */ -# define ENOPROTOOPT WSAENOPROTOOPT -# undef EPROTONOSUPPORT /* override definition in errno.h */ -# define EPROTONOSUPPORT WSAEPROTONOSUPPORT -# define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT -# undef EOPNOTSUPP /* override definition in errno.h */ -# define EOPNOTSUPP WSAEOPNOTSUPP -# define EPFNOSUPPORT WSAEPFNOSUPPORT -# undef EAFNOSUPPORT /* override definition in errno.h */ -# define EAFNOSUPPORT WSAEAFNOSUPPORT -# undef EADDRINUSE /* override definition in errno.h */ -# define EADDRINUSE WSAEADDRINUSE -# undef EADDRNOTAVAIL /* override definition in errno.h */ -# define EADDRNOTAVAIL WSAEADDRNOTAVAIL -# undef ENETDOWN /* override definition in errno.h */ -# define ENETDOWN WSAENETDOWN -# undef ENETUNREACH /* override definition in errno.h */ -# define ENETUNREACH WSAENETUNREACH -# undef ENETRESET /* override definition in errno.h */ -# define ENETRESET WSAENETRESET -# undef ECONNABORTED /* override definition in errno.h */ -# define ECONNABORTED WSAECONNABORTED -# undef ECONNRESET /* override definition in errno.h */ -# define ECONNRESET WSAECONNRESET -# undef ENOBUFS /* override definition in errno.h */ -# define ENOBUFS WSAENOBUFS -# undef EISCONN /* override definition in errno.h */ -# define EISCONN WSAEISCONN -# undef ENOTCONN /* override definition in errno.h */ -# define ENOTCONN WSAENOTCONN -# define ESHUTDOWN WSAESHUTDOWN -# define ETOOMANYREFS WSAETOOMANYREFS -# undef ETIMEDOUT /* override definition in errno.h */ -# define ETIMEDOUT WSAETIMEDOUT -# undef ECONNREFUSED /* override definition in errno.h */ -# define ECONNREFUSED WSAECONNREFUSED -# undef ELOOP /* override definition in errno.h */ -# define ELOOP WSAELOOP -# ifndef ENAMETOOLONG /* possible previous definition in errno.h */ -# define ENAMETOOLONG WSAENAMETOOLONG -# endif -# define EHOSTDOWN WSAEHOSTDOWN -# undef EHOSTUNREACH /* override definition in errno.h */ -# define EHOSTUNREACH WSAEHOSTUNREACH -# ifndef ENOTEMPTY /* possible previous definition in errno.h */ -# define ENOTEMPTY WSAENOTEMPTY -# endif -# define EPROCLIM WSAEPROCLIM -# define EUSERS WSAEUSERS -# define EDQUOT WSAEDQUOT -# define ESTALE WSAESTALE -# define EREMOTE WSAEREMOTE -#endif - #endif /* __ARES_SETUP_H */ diff --git a/src/lib/ares_update_servers.c b/src/lib/ares_update_servers.c index 639f79d8..1be026bb 100644 --- a/src/lib/ares_update_servers.c +++ b/src/lib/ares_update_servers.c @@ -608,18 +608,6 @@ static ares_status_t ares__server_create(ares_channel_t *channel, server->ll_scope = sconfig->ll_scope; } - server->tcp_parser = ares__buf_create(); - if (server->tcp_parser == NULL) { - status = ARES_ENOMEM; /* LCOV_EXCL_LINE: OutOfMemory */ - goto done; /* LCOV_EXCL_LINE: OutOfMemory */ - } - - server->tcp_send = ares__buf_create(); - if (server->tcp_send == NULL) { - status = ARES_ENOMEM; /* LCOV_EXCL_LINE: OutOfMemory */ - goto done; /* LCOV_EXCL_LINE: OutOfMemory */ - } - server->connections = ares__llist_create(NULL); if (server->connections == NULL) { status = ARES_ENOMEM; /* LCOV_EXCL_LINE: OutOfMemory */ diff --git a/src/lib/event/ares_event.h b/src/lib/event/ares_event.h index 317731fc..db5fc91a 100644 --- a/src/lib/event/ares_event.h +++ b/src/lib/event/ares_event.h @@ -96,6 +96,8 @@ struct ares_event_thread { /*! Reference to the ares channel, for being able to call things like * ares_timeout() and ares_process_fd(). */ ares_channel_t *channel; + /*! Whether or not on the next loop we should process a pending write */ + ares_bool_t process_pending_write; /*! Not-yet-processed event handle updates. These will get enqueued by a * thread other than the event thread itself. The event thread will then * be woken then process these updates itself */ diff --git a/src/lib/event/ares_event_thread.c b/src/lib/event/ares_event_thread.c index 8b332e9b..7441be1e 100644 --- a/src/lib/event/ares_event_thread.c +++ b/src/lib/event/ares_event_thread.c @@ -216,6 +216,17 @@ static void ares_event_thread_sockstate_cb(void *data, ares_socket_t socket_fd, NULL, NULL, NULL); } +static void ares_event_thread_notifywrite_cb(void *data) +{ + ares_event_thread_t *e = data; + + ares__thread_mutex_lock(e->mutex); + e->process_pending_write = ARES_TRUE; + ares__thread_mutex_unlock(e->mutex); + + ares_event_thread_wake(e); +} + static void ares_event_process_updates(ares_event_thread_t *e) { ares__llist_node_t *node; @@ -310,6 +321,7 @@ static void *ares_event_thread(void *arg) struct timeval tv; const struct timeval *tvout; unsigned long timeout_ms = 0; /* 0 = unlimited */ + ares_bool_t process_pending_write; ares_event_process_updates(e); @@ -326,6 +338,15 @@ static void *ares_event_thread(void *arg) e->ev_sys->wait(e, timeout_ms); + /* Process pending write operation */ + ares__thread_mutex_lock(e->mutex); + process_pending_write = e->process_pending_write; + e->process_pending_write = ARES_FALSE; + ares__thread_mutex_unlock(e->mutex); + if (process_pending_write) { + ares_process_pending_write(e->channel); + } + /* Each iteration should do timeout processing */ if (e->isup) { ares_process_fd(e->channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD); @@ -379,8 +400,10 @@ void ares_event_thread_destroy(ares_channel_t *channel) } ares_event_thread_destroy_int(e); - channel->sock_state_cb_data = NULL; - channel->sock_state_cb = NULL; + channel->sock_state_cb_data = NULL; + channel->sock_state_cb = NULL; + channel->notify_pending_write_cb = NULL; + channel->notify_pending_write_cb_data = NULL; } static const ares_event_sys_t *ares_event_fetch_sys(ares_evsys_t evsys) @@ -483,8 +506,10 @@ ares_status_t ares_event_thread_init(ares_channel_t *channel) return ARES_ENOTIMP; /* LCOV_EXCL_LINE: UntestablePath */ } - channel->sock_state_cb = ares_event_thread_sockstate_cb; - channel->sock_state_cb_data = e; + channel->sock_state_cb = ares_event_thread_sockstate_cb; + channel->sock_state_cb_data = e; + channel->notify_pending_write_cb = ares_event_thread_notifywrite_cb; + channel->notify_pending_write_cb_data = e; if (!e->ev_sys->init(e)) { /* LCOV_EXCL_START: UntestablePath */ diff --git a/src/lib/inet_net_pton.c b/src/lib/inet_net_pton.c index 5356778c..7f9095a3 100644 --- a/src/lib/inet_net_pton.c +++ b/src/lib/inet_net_pton.c @@ -32,6 +32,20 @@ #include "ares_ipv6.h" #include "ares_inet_net_pton.h" +#ifdef USE_WINSOCK +# define SOCKERRNO ((int)WSAGetLastError()) +# define SET_SOCKERRNO(x) (WSASetLastError((int)(x))) +# undef EMSGSIZE +# define EMSGSIZE WSAEMSGSIZE +# undef ENOENT +# define ENOENT WSA_INVALID_PARAMETER +# undef EAFNOSUPPORT +# define EAFNOSUPPORT WSAEAFNOSUPPORT +#else +# define SOCKERRNO (errno) +# define SET_SOCKERRNO(x) (errno = (x)) +#endif + const struct ares_in6_addr ares_in6addr_any = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } }; @@ -195,11 +209,11 @@ static int ares_inet_net_pton_ipv4(const char *src, unsigned char *dst, return bits; enoent: - SET_ERRNO(ENOENT); + SET_SOCKERRNO(ENOENT); return -1; emsgsize: - SET_ERRNO(EMSGSIZE); + SET_SOCKERRNO(EMSGSIZE); return -1; } @@ -343,7 +357,7 @@ static int ares_inet_pton6(const char *src, unsigned char *dst) return 1; enoent: - SET_ERRNO(ENOENT); + SET_SOCKERRNO(ENOENT); return -1; } @@ -358,7 +372,7 @@ static int ares_inet_net_pton_ipv6(const char *src, unsigned char *dst, char *sep; if (ares_strlen(src) >= sizeof buf) { - SET_ERRNO(EMSGSIZE); + SET_SOCKERRNO(EMSGSIZE); return -1; } ares_strcpy(buf, src, sizeof buf); @@ -377,14 +391,14 @@ static int ares_inet_net_pton_ipv6(const char *src, unsigned char *dst, bits = 128; } else { if (!getbits(sep, &bits)) { - SET_ERRNO(ENOENT); + SET_SOCKERRNO(ENOENT); return -1; } } bytes = (bits + 7) / 8; if (bytes > size) { - SET_ERRNO(EMSGSIZE); + SET_SOCKERRNO(EMSGSIZE); return -1; } memcpy(dst, &in6, bytes); @@ -401,13 +415,9 @@ static int ares_inet_net_pton_ipv6(const char *src, unsigned char *dst, * number of bits, either imputed classfully or specified with /CIDR, * or -1 if some failure occurred (check errno). ENOENT means it was * not a valid network specification. - * note: - * On Windows we store the error in the thread errno, not - * in the winsock error code. This is to avoid losing the - * actual last winsock error. So use macro ERRNO to fetch the - * errno this function sets when returning (-1), not SOCKERRNO. * author: * Paul Vixie (ISC), June 1996 + * */ int ares_inet_net_pton(int af, const char *src, void *dst, size_t size) { @@ -417,7 +427,6 @@ int ares_inet_net_pton(int af, const char *src, void *dst, size_t size) case AF_INET6: return ares_inet_net_pton_ipv6(src, dst, size); default: - SET_ERRNO(EAFNOSUPPORT); return -1; } } @@ -432,11 +441,11 @@ int ares_inet_pton(int af, const char *src, void *dst) } else if (af == AF_INET6) { size = sizeof(struct ares_in6_addr); } else { - SET_ERRNO(EAFNOSUPPORT); + SET_SOCKERRNO(EAFNOSUPPORT); return -1; } result = ares_inet_net_pton(af, src, dst, size); - if (result == -1 && ERRNO == ENOENT) { + if (result == -1 && SOCKERRNO == ENOENT) { return 0; } return (result > -1) ? 1 : -1; diff --git a/src/lib/inet_ntop.c b/src/lib/inet_ntop.c index 6f96b92c..79b6c0fa 100644 --- a/src/lib/inet_ntop.c +++ b/src/lib/inet_ntop.c @@ -29,6 +29,22 @@ #include "ares_nameser.h" #include "ares_ipv6.h" +#ifdef USE_WINSOCK +# define SOCKERRNO ((int)WSAGetLastError()) +# define SET_SOCKERRNO(x) (WSASetLastError((int)(x))) +# undef EMSGSIZE +# define EMSGSIZE WSAEMSGSIZE +# undef ENOENT +# define ENOENT WSA_INVALID_PARAMETER +# undef EAFNOSUPPORT +# define EAFNOSUPPORT WSAEAFNOSUPPORT +# undef ENOSPC +# define ENOSPC WSA_INVALID_PARAMETER +#else +# define SOCKERRNO (errno) +# define SET_SOCKERRNO(x) (errno = (x)) +#endif + /* * WARNING: Don't even consider trying to compile this on a system where * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. @@ -42,11 +58,6 @@ static const char *inet_ntop6(const unsigned char *src, char *dst, size_t size); * convert a network format address to presentation format. * return: * pointer to presentation format address (`dst'), or NULL (see errno). - * note: - * On Windows we store the error in the thread errno, not - * in the winsock error code. This is to avoid losing the - * actual last winsock error. So use macro ERRNO to fetch the - * errno this function sets when returning NULL, not SOCKERRNO. * author: * Paul Vixie, 1996. */ @@ -61,7 +72,7 @@ const char *ares_inet_ntop(int af, const void *src, char *dst, default: break; } - SET_ERRNO(EAFNOSUPPORT); + SET_SOCKERRNO(EAFNOSUPPORT); return NULL; } @@ -82,13 +93,13 @@ static const char *inet_ntop4(const unsigned char *src, char *dst, size_t size) char tmp[sizeof("255.255.255.255")]; if (size < sizeof(tmp)) { - SET_ERRNO(ENOSPC); + SET_SOCKERRNO(ENOSPC); return NULL; } if ((size_t)snprintf(tmp, sizeof(tmp), fmt, src[0], src[1], src[2], src[3]) >= size) { - SET_ERRNO(ENOSPC); + SET_SOCKERRNO(ENOSPC); return NULL; } ares_strcpy(dst, tmp, size); @@ -200,7 +211,7 @@ static const char *inet_ntop6(const unsigned char *src, char *dst, size_t size) * Check for overflow, copy, and we're done. */ if ((size_t)(tp - tmp) > size) { - SET_ERRNO(ENOSPC); + SET_SOCKERRNO(ENOSPC); return NULL; } ares_strcpy(dst, tmp, size); diff --git a/src/lib/legacy/ares_fds.c b/src/lib/legacy/ares_fds.c index 3aedd2c9..710f8db1 100644 --- a/src/lib/legacy/ares_fds.c +++ b/src/lib/legacy/ares_fds.c @@ -68,8 +68,9 @@ int ares_fds(const ares_channel_t *channel, fd_set *read_fds, fd_set *write_fds) nfds = conn->fd + 1; } - /* TCP only wait on write if we have buffered data */ - if (conn->flags & ARES_CONN_FLAG_TCP && ares__buf_len(server->tcp_send)) { + /* TCP only wait on write if we have the flag set */ + if (conn->flags & ARES_CONN_FLAG_TCP && + conn->state_flags & ARES_CONN_STATE_WRITE) { FD_SET(conn->fd, write_fds); } } diff --git a/src/lib/legacy/ares_getsock.c b/src/lib/legacy/ares_getsock.c index 8c8476fa..1c0ae105 100644 --- a/src/lib/legacy/ares_getsock.c +++ b/src/lib/legacy/ares_getsock.c @@ -71,7 +71,8 @@ int ares_getsock(const ares_channel_t *channel, ares_socket_t *socks, bitmap |= ARES_GETSOCK_READABLE(setbits, sockindex); } - if (conn->flags & ARES_CONN_FLAG_TCP && ares__buf_len(server->tcp_send)) { + if (conn->flags & ARES_CONN_FLAG_TCP && + conn->state_flags & ARES_CONN_STATE_WRITE) { /* then the tcp socket is also writable! */ bitmap |= ARES_GETSOCK_WRITABLE(setbits, sockindex); } diff --git a/src/lib/str/ares__buf.c b/src/lib/str/ares__buf.c index b855260a..08b0ed76 100644 --- a/src/lib/str/ares__buf.c +++ b/src/lib/str/ares__buf.c @@ -1234,7 +1234,7 @@ ares_status_t ares__buf_load_file(const char *filename, ares__buf_t *buf) fp = fopen(filename, "rb"); if (fp == NULL) { - int error = ERRNO; + int error = errno; switch (error) { case ENOENT: case ESRCH: