Prevent complex recursion during query requeing and connection cleanup

c-ares utilizes recursion for some operations, and some of these
processes can have unintended side effects, such as if a callback
is called that then recurses into the same function. This can cause
strange cleanup conditions that lead to crashes.

Try to disassociate queries with connections as early as possible and
move cleaning up unneeded connections to its own scan rather than
trying to detect each time a query is disassociated from a connection.

Fix By: Brad House (@bradh352)
v1.27
Brad House 7 months ago
parent e7ea46e1bb
commit 902bed5dae
  1. 72
      src/lib/ares__close_sockets.c
  2. 8
      src/lib/ares_cancel.c
  3. 3
      src/lib/ares_private.h
  4. 42
      src/lib/ares_process.c
  5. 2
      test/ares-test-mock-et.cc

@ -81,33 +81,59 @@ void ares__close_sockets(struct server_state *server)
}
}
void ares__check_cleanup_conn(const ares_channel_t *channel,
struct server_connection *conn)
void ares__check_cleanup_conns(const ares_channel_t *channel)
{
ares_bool_t do_cleanup = ARES_FALSE;
ares__slist_node_t *snode;
if (channel == NULL || conn == NULL) {
if (channel == NULL) {
return;
}
if (ares__llist_len(conn->queries_to_conn)) {
return;
}
/* If we are configured not to stay open, close it out */
if (!(channel->flags & ARES_FLAG_STAYOPEN)) {
do_cleanup = ARES_TRUE;
}
/* If the udp connection hit its max queries, always close it */
if (!conn->is_tcp && channel->udp_max_queries > 0 &&
conn->total_queries >= channel->udp_max_queries) {
do_cleanup = ARES_TRUE;
/* Iterate across each server */
for (snode = ares__slist_node_first(channel->servers); snode != NULL;
snode = ares__slist_node_next(snode)) {
struct server_state *server = ares__slist_node_val(snode);
ares__llist_node_t *cnode;
/* Iterate across each connection */
cnode = ares__llist_node_first(server->connections);
while (cnode != NULL) {
ares__llist_node_t *next = ares__llist_node_next(cnode);
struct server_connection *conn = ares__llist_node_val(cnode);
ares_bool_t do_cleanup = ARES_FALSE;
cnode = next;
/* Has connections, not eligible */
if (ares__llist_len(conn->queries_to_conn)) {
continue;
}
/* If we are configured not to stay open, close it out */
if (!(channel->flags & ARES_FLAG_STAYOPEN)) {
do_cleanup = ARES_TRUE;
}
/* If the associated server has failures, close it out. Resetting the
* connection (and specifically the source port number) can help resolve
* situations where packets are being dropped.
*/
if (conn->server->consec_failures > 0) {
do_cleanup = ARES_TRUE;
}
/* If the udp connection hit its max queries, always close it */
if (!conn->is_tcp && channel->udp_max_queries > 0 &&
conn->total_queries >= channel->udp_max_queries) {
do_cleanup = ARES_TRUE;
}
if (!do_cleanup) {
continue;
}
/* Clean it up */
ares__close_connection(conn, ARES_SUCCESS);
}
}
if (!do_cleanup) {
return;
}
ares__close_connection(conn, ARES_SUCCESS);
}

@ -64,28 +64,26 @@ void ares_cancel(ares_channel_t *channel)
node = ares__llist_node_first(list_copy);
while (node != NULL) {
struct query *query;
struct server_connection *conn;
/* Cache next since this node is being deleted */
next = ares__llist_node_next(node);
query = ares__llist_node_claim(node);
conn = query->conn;
query->node_all_queries = NULL;
/* NOTE: its possible this may enqueue new queries */
query->callback(query->arg, ARES_ECANCELLED, 0, NULL, 0);
ares__free_query(query);
/* See if the connection should be cleaned up */
ares__check_cleanup_conn(channel, conn);
node = next;
}
ares__llist_destroy(list_copy);
}
/* See if the connections should be cleaned up */
ares__check_cleanup_conns(channel);
ares_queue_notify_empty(channel);
done:

@ -351,8 +351,7 @@ ares_status_t ares_send_ex(ares_channel_t *channel, const unsigned char *qbuf,
void ares__close_connection(struct server_connection *conn,
ares_status_t requeue_status);
void ares__close_sockets(struct server_state *server);
void ares__check_cleanup_conn(const ares_channel_t *channel,
struct server_connection *conn);
void ares__check_cleanup_conns(const ares_channel_t *channel);
ares_status_t ares__read_line(FILE *fp, char **buf, size_t *bufsize);
void ares__free_query(struct query *query);

@ -72,7 +72,17 @@ static void end_query(ares_channel_t *channel, struct query *query,
ares_status_t status, const unsigned char *abuf,
size_t alen);
static void server_increment_failures(struct server_state *server)
static void ares__query_disassociate_from_conn(struct query *query)
{
/* If its not part of a connection, it can't be tracked for timeouts either */
ares__slist_node_destroy(query->node_queries_by_timeout);
ares__llist_node_destroy(query->node_queries_to_conn);
query->node_queries_by_timeout = NULL;
query->node_queries_to_conn = NULL;
query->conn = NULL;
}
static void server_increment_failures(struct server_state *server)
{
ares__slist_node_t *node;
const ares_channel_t *channel = server->channel;
@ -155,6 +165,8 @@ static void processfds(ares_channel_t *channel, fd_set *read_fds,
/* Write last as the other 2 operations might have triggered writes */
write_tcp_data(channel, write_fds, write_fd);
/* See if any connections should be cleaned up */
ares__check_cleanup_conns(channel);
ares__channel_unlock(channel);
}
@ -347,8 +359,6 @@ static void read_tcp_data(ares_channel_t *channel,
/* Since we processed the answer, clear the tag so space can be reclaimed */
ares__buf_tag_clear(server->tcp_parser);
}
ares__check_cleanup_conn(channel, conn);
}
static int socket_list_append(ares_socket_t **socketlist, ares_socket_t fd,
@ -472,8 +482,6 @@ static void read_udp_packets_fd(ares_channel_t *channel,
/* Try to read again only if *we* set up the socket, otherwise it may be
* a blocking socket and would cause recvfrom to hang. */
} while (read_len >= 0 && channel->sock_funcs == NULL);
ares__check_cleanup_conn(channel, conn);
}
static void read_packets(ares_channel_t *channel, fd_set *read_fds,
@ -546,13 +554,15 @@ static void read_packets(ares_channel_t *channel, fd_set *read_fds,
/* If any queries have timed out, note the timeout and move them on. */
static void process_timeouts(ares_channel_t *channel, struct timeval *now)
{
ares__slist_node_t *node =
ares__slist_node_first(channel->queries_by_timeout);
while (node != NULL) {
ares__slist_node_t *node;
/* Just keep popping off the first as this list will re-sort as things come
* and go. We don't want to try to rely on 'next' as some operation might
* cause a cleanup of that pointer and would become invalid */
while ((node = ares__slist_node_first(channel->queries_by_timeout)) != NULL) {
struct query *query = ares__slist_node_val(node);
/* Node might be removed, cache next */
ares__slist_node_t *next = ares__slist_node_next(node);
struct server_connection *conn;
/* Since this is sorted, as soon as we hit a query that isn't timed out,
* break */
if (!ares__timedout(now, &query->timeout)) {
@ -564,9 +574,6 @@ static void process_timeouts(ares_channel_t *channel, struct timeval *now)
conn = query->conn;
server_increment_failures(conn->server);
ares__requeue_query(query, now, ARES_ETIMEOUT);
ares__check_cleanup_conn(channel, conn);
node = next;
}
}
@ -765,6 +772,8 @@ ares_status_t ares__requeue_query(struct query *query, struct timeval *now,
ares_channel_t *channel = query->channel;
size_t max_tries = ares__slist_len(channel->servers) * channel->tries;
ares__query_disassociate_from_conn(query);
if (status != ARES_SUCCESS) {
query->error_status = status;
}
@ -887,8 +896,6 @@ ares_status_t ares__send_query(struct query *query, struct timeval *now)
ares_status_t status;
ares_bool_t new_connection = ARES_FALSE;
query->conn = NULL;
/* Choose the server to send the query to */
if (channel->rotate) {
server = ares__random_server(channel);
@ -1118,12 +1125,9 @@ static ares_bool_t same_address(const struct sockaddr *sa,
static void ares_detach_query(struct query *query)
{
/* Remove the query from all the lists in which it is linked */
ares__query_disassociate_from_conn(query);
ares__htable_szvp_remove(query->channel->queries_by_qid, query->qid);
ares__slist_node_destroy(query->node_queries_by_timeout);
ares__llist_node_destroy(query->node_queries_to_conn);
ares__llist_node_destroy(query->node_all_queries);
query->node_queries_by_timeout = NULL;
query->node_queries_to_conn = NULL;
query->node_all_queries = NULL;
}

@ -133,7 +133,7 @@ TEST_P(MockUDPEventThreadTest, BadLoopbackServerNoTimeouts) {
EXPECT_EQ(ARES_ECONNREFUSED, result[i].status_);
EXPECT_EQ(0, result[i].timeouts_);
#else
EXPECT_TRUE(result[i].status_ == ARES_ECONNREFUSED || result[i].status_ == ARES_ETIMEOUT);
EXPECT_TRUE(result[i].status_ == ARES_ECONNREFUSED || result[i].status_ == ARES_ETIMEOUT || result[i].status_ == ARES_ESERVFAIL);
#endif
}
}

Loading…
Cancel
Save