diff --git a/FEATURES.md b/FEATURES.md index dc86fb63..5ec1975e 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -50,19 +50,23 @@ application. Each server is tracked for failures relating to consecutive connectivity issues or unrecoverable response codes. Servers are sorted in priority order based on this metric. Downed servers will be brought back online either when the -current highest priority has failed, or has been determined to be online when -a query is randomly selected to probe a downed server. +current highest priority server has failed, or has been determined to be online +when a query is randomly selected to probe a downed server. By default a downed server won't be retried for 5 seconds, and queries will have a 10% chance of being chosen after this timeframe to test a downed server. -Administrators may customize these settings via `ARES_OPT_SERVER_FAILOVER`. +When a downed server is selected to be probed, the query will be duplicated +and sent to the downed server independent of the original query itself. This +means that probing a downed server will always use an intended legitimate +query, but not have a negative impact of a delayed response in case that server +is still down. -In the future we may use independent queries to probe downed servers to not -impact latency of any queries when a server is known to be down. +Administrators may customize these settings via `ARES_OPT_SERVER_FAILOVER`. -`ARES_OPT_ROTATE` or a system configuration option of `rotate` will disable -this feature as servers will be chosen at random. In the future we may -enhance this capability to only randomly choose online servers. +Additionally, when using `ARES_OPT_ROTATE` or a system configuration option of +`rotate`, c-ares will randomly select a server from the list of highest priority +servers based on failures. Any servers in any lower priority bracket will be +omitted from the random selection. This feature requires the c-ares channel to persist for the lifetime of the application. diff --git a/docs/ares_init_options.3 b/docs/ares_init_options.3 index 694beb5e..9b3b4815 100644 --- a/docs/ares_init_options.3 +++ b/docs/ares_init_options.3 @@ -345,7 +345,8 @@ Configure server failover retry behavior. When a DNS server fails to respond to a query, c-ares will deprioritize the server. On subsequent queries, servers with fewer consecutive failures will be selected in preference. However, in order to detect when such a server has recovered, -c-ares will occasionally retry failed servers. The +c-ares will occasionally retry failed servers by probing with a copy of +the query, without affecting the latency of the actual requested query. The \fIares_server_failover_options\fP structure contains options to control this behavior. The \fIretry_chance\fP field gives the probability (1/N) of retrying a @@ -367,7 +368,9 @@ for each resolution. .TP 23 .B ARES_OPT_NOROTATE Do not perform round-robin nameserver selection; always use the list of -nameservers in the same order. +nameservers in the same order. The default is not to rotate servers, however +the system configuration can specify the desire to rotate and this +configuration value can negate such a system configuration. .PP .SH RETURN VALUES diff --git a/src/lib/ares_conn.h b/src/lib/ares_conn.h index 2a798d1f..7af11ea9 100644 --- a/src/lib/ares_conn.h +++ b/src/lib/ares_conn.h @@ -146,6 +146,8 @@ struct ares_server { size_t consec_failures; /* Consecutive query failure count * can be hard errors or timeouts */ + ares_bool_t probe_pending; /* Whether a probe is pending for this + * server due to prior failures */ ares_llist_t *connections; ares_conn_t *tcp_conn; diff --git a/src/lib/ares_private.h b/src/lib/ares_private.h index c3b9bdb8..00b25ae4 100644 --- a/src/lib/ares_private.h +++ b/src/lib/ares_private.h @@ -312,7 +312,7 @@ struct ares_channeldata { ares_bool_t ares_is_onion_domain(const char *name); /* Returns one of the normal ares status codes like ARES_SUCCESS */ -ares_status_t ares_send_query(ares_query_t *query, const ares_timeval_t *now); +ares_status_t ares_send_query(ares_server_t *requested_server /* Optional */, ares_query_t *query, const ares_timeval_t *now); ares_status_t ares_requeue_query(ares_query_t *query, const ares_timeval_t *now, ares_status_t status, ares_bool_t inc_try_count, @@ -486,9 +486,18 @@ ares_status_t ares_query_nolock(ares_channel_t *channel, const char *name, ares_callback_dnsrec callback, void *arg, unsigned short *qid); -/* Same as ares_send_dnsrec() except does not take a channel lock. Use this - * if a channel lock is already held */ +/*! Flags controlling behavior for ares_send_nolock() */ +typedef enum { + ARES_SEND_FLAG_NOCACHE = 1 << 0, /*!< Do not query the cache */ + ARES_SEND_FLAG_NORETRY = 1 << 1 /*!< Do not retry this query on error */ +} ares_send_flags_t; + +/* Similar to ares_send_dnsrec() except does not take a channel lock, allows + * specifying a particular server to use, and also flags controlling behavior. + */ ares_status_t ares_send_nolock(ares_channel_t *channel, + ares_server_t *server, + ares_send_flags_t flags, const ares_dns_record_t *dnsrec, ares_callback_dnsrec callback, void *arg, unsigned short *qid); diff --git a/src/lib/ares_process.c b/src/lib/ares_process.c index 2735a1cb..e6ddbd99 100644 --- a/src/lib/ares_process.c +++ b/src/lib/ares_process.c @@ -728,7 +728,8 @@ static ares_status_t process_answer(ares_channel_t *channel, goto cleanup; } - ares_send_query(query, now); + /* Send to same server */ + ares_send_query(server, query, now); status = ARES_SUCCESS; goto cleanup; } @@ -741,7 +742,7 @@ static ares_status_t process_answer(ares_channel_t *channel, !(conn->flags & ARES_CONN_FLAG_TCP) && !(channel->flags & ARES_FLAG_IGNTC)) { query->using_tcp = ARES_TRUE; - ares_send_query(query, now); + ares_send_query(NULL, query, now); status = ARES_SUCCESS; /* Switched to TCP is ok */ goto cleanup; } @@ -832,7 +833,7 @@ ares_status_t ares_requeue_query(ares_query_t *query, const ares_timeval_t *now, } if (query->try_count < max_tries && !query->no_retries) { - return ares_send_query(query, now); + return ares_send_query(NULL, query, now); } /* If we are here, all attempts to perform query failed. */ @@ -844,16 +845,42 @@ ares_status_t ares_requeue_query(ares_query_t *query, const ares_timeval_t *now, return ARES_ETIMEOUT; } -/* Pick a random server from the list, we first get a random number in the - * range of the number of servers, then scan until we find that server in - * the list */ +/*! Count the number of servers that share the same highest priority (lowest + * consecutive failures). Since they are sorted in priority order, we just + * stop when the consecutive failure count changes. Used for random selection + * of good servers. */ +static size_t count_highest_prio_servers(ares_channel_t *channel) +{ + ares_slist_node_t *node; + size_t cnt = 0; + size_t last_consec_failures = SIZE_MAX; + + for (node = ares_slist_node_first(channel->servers); node != NULL; + node = ares_slist_node_next(node)) { + const ares_server_t *server = ares_slist_node_val(node); + + if (last_consec_failures != SIZE_MAX && + last_consec_failures < server->consec_failures) { + break; + } + + last_consec_failures = server->consec_failures; + cnt++; + } + + return cnt; +} + +/* Pick a random *best* server from the list, we first get a random number in + * the range of the number of *best* servers, then scan until we find that + * server in the list */ static ares_server_t *ares_random_server(ares_channel_t *channel) { unsigned char c; size_t cnt; size_t idx; ares_slist_node_t *node; - size_t num_servers = ares_slist_len(channel->servers); + size_t num_servers = count_highest_prio_servers(channel); /* Silence coverity, not possible */ if (num_servers == 0) { @@ -878,40 +905,32 @@ static ares_server_t *ares_random_server(ares_channel_t *channel) return NULL; } -/* Pick a server from the list with failover behavior. - * - * We default to using the first server in the sorted list of servers. That is - * the server with the lowest number of consecutive failures and then the - * highest priority server (by idx) if there is a draw. - * - * However, if a server temporarily goes down and hits some failures, then that - * server will never be retried until all other servers hit the same number of - * failures. This may prevent the server from being retried for a long time. - * - * To resolve this, with some probability we select a failed server to retry - * instead. - */ -static ares_server_t *ares_failover_server(ares_channel_t *channel) +static void server_probe_cb(void *arg, ares_status_t status, size_t timeouts, + const ares_dns_record_t *dnsrec) +{ + (void)arg; + (void)status; + (void)timeouts; + (void)dnsrec; + /* Nothing to do, the logic internally will handle success/fail of this */ +} + +/* Determine if we should probe a downed server */ +static void ares_probe_failed_server(ares_channel_t *channel, + const ares_server_t *server, + const ares_query_t *query) { - ares_server_t *first_server = ares_slist_first_val(channel->servers); const ares_server_t *last_server = ares_slist_last_val(channel->servers); unsigned short r; - - /* Defensive code against no servers being available on the channel. */ - if (first_server == NULL) { - return NULL; /* LCOV_EXCL_LINE: DefensiveCoding */ - } - - /* If no servers have failures, then prefer the first server in the list. */ - if (last_server != NULL && last_server->consec_failures == 0) { - return first_server; - } - - /* If we are not configured with a server retry chance then return the first - * server. - */ - if (channel->server_retry_chance == 0) { - return first_server; + ares_timeval_t now; + ares_slist_node_t *node; + ares_server_t *probe_server = NULL; + + /* If no servers have failures, or we're not configured with a server retry + * chance, then nothing to probe */ + if ((last_server != NULL && last_server->consec_failures == 0) || + channel->server_retry_chance == 0) { + return; } /* Generate a random value to decide whether to retry a failed server. The @@ -920,24 +939,38 @@ static ares_server_t *ares_failover_server(ares_channel_t *channel) * We use an unsigned short for the random value for increased precision. */ ares_rand_bytes(channel->rand_state, (unsigned char *)&r, sizeof(r)); - if (r % channel->server_retry_chance == 0) { - /* Select a suitable failed server to retry. */ - ares_timeval_t now; - ares_slist_node_t *node; - - ares_tvnow(&now); - for (node = ares_slist_node_first(channel->servers); node != NULL; - node = ares_slist_node_next(node)) { - ares_server_t *node_val = ares_slist_node_val(node); - if (node_val != NULL && node_val->consec_failures > 0 && - ares_timedout(&now, &node_val->next_retry_time)) { - return node_val; - } + if (r % channel->server_retry_chance != 0) { + return; + } + + /* Select the first server with failures to retry that has passed the retry + * timeout and doesn't already have a pending probe */ + ares_tvnow(&now); + for (node = ares_slist_node_first(channel->servers); node != NULL; + node = ares_slist_node_next(node)) { + ares_server_t *node_val = ares_slist_node_val(node); + if (node_val != NULL && node_val->consec_failures > 0 && + !node_val->probe_pending && + ares_timedout(&now, &node_val->next_retry_time)) { + probe_server = node_val; + break; } } - /* If we have not returned yet, then return the first server. */ - return first_server; + /* Either nothing to probe or the query was enqueud to the same server + * we were going to probe. Do nothing. */ + if (probe_server == NULL || server == probe_server) { + return; + } + + /* Enqueue an identical query onto the specified server without honoring + * the cache or allowing retries. We want to make sure it only attempts to + * use the server in question */ + probe_server->probe_pending = ARES_TRUE; + ares_send_nolock(channel, probe_server, + ARES_SEND_FLAG_NOCACHE|ARES_SEND_FLAG_NORETRY, + query->query, server_probe_cb, NULL, NULL); + } static size_t ares_calc_query_timeout(const ares_query_t *query, @@ -1066,21 +1099,29 @@ static ares_status_t ares_conn_query_write(ares_conn_t *conn, return ares_conn_flush(conn); } -ares_status_t ares_send_query(ares_query_t *query, const ares_timeval_t *now) +ares_status_t ares_send_query(ares_server_t *requested_server, + ares_query_t *query, + const ares_timeval_t *now) { ares_channel_t *channel = query->channel; ares_server_t *server; ares_conn_t *conn; size_t timeplus; ares_status_t status; + ares_bool_t probe_downed_server = ARES_TRUE; + /* Choose the server to send the query to */ - if (channel->rotate) { - /* Pull random server */ - server = ares_random_server(channel); + if (requested_server != NULL) { + server = requested_server; } else { - /* Pull server with failover behavior */ - server = ares_failover_server(channel); + /* If rotate is turned on, do a random selection */ + if (channel->rotate) { + server = ares_random_server(channel); + } else { + /* First server in list */ + server = ares_slist_first_val(channel->servers); + } } if (server == NULL) { @@ -1088,6 +1129,13 @@ ares_status_t ares_send_query(ares_query_t *query, const ares_timeval_t *now) return ARES_ENOSERVER; } + /* If a query is directed to a specific query, or the server chosen has + * failures, or the query is being retried, don't probe for downed servers */ + if (requested_server != NULL || server->consec_failures > 0 || + query->try_count != 0) { + probe_downed_server = ARES_FALSE; + } + conn = ares_fetch_connection(channel, server, query); if (conn == NULL) { status = ares_open_connection(&conn, channel, server, query->using_tcp); @@ -1172,6 +1220,12 @@ ares_status_t ares_send_query(ares_query_t *query, const ares_timeval_t *now) query->conn = conn; conn->total_queries++; + /* We just successfully enqueud a query, see if we should probe downed + * servers. */ + if (probe_downed_server) { + ares_probe_failed_server(channel, server, query); + } + return ARES_SUCCESS; } @@ -1248,6 +1302,12 @@ static void end_query(ares_channel_t *channel, ares_server_t *server, ares_query_t *query, ares_status_t status, const ares_dns_record_t *dnsrec) { + /* If we were probing for the server to come back online, lets mark it as + * no longer being probed */ + if (server != NULL) { + server->probe_pending = ARES_FALSE; + } + ares_metrics_record(query, server, status, dnsrec); /* Invoke the callback. */ diff --git a/src/lib/ares_query.c b/src/lib/ares_query.c index cd42091c..b6ca7b51 100644 --- a/src/lib/ares_query.c +++ b/src/lib/ares_query.c @@ -105,7 +105,7 @@ ares_status_t ares_query_nolock(ares_channel_t *channel, const char *name, qquery->arg = arg; /* Send it off. qcallback will be called when we get an answer. */ - status = ares_send_nolock(channel, dnsrec, ares_query_dnsrec_cb, qquery, qid); + status = ares_send_nolock(channel, NULL, 0, dnsrec, ares_query_dnsrec_cb, qquery, qid); ares_dns_record_destroy(dnsrec); return status; diff --git a/src/lib/ares_search.c b/src/lib/ares_search.c index 4cfbdaea..dda2a317 100644 --- a/src/lib/ares_search.c +++ b/src/lib/ares_search.c @@ -93,7 +93,7 @@ static ares_status_t ares_search_next(ares_channel_t *channel, } status = - ares_send_nolock(channel, squery->dnsrec, search_callback, squery, NULL); + ares_send_nolock(channel, NULL, 0, squery->dnsrec, search_callback, squery, NULL); if (status != ARES_EFORMERR) { *skip_cleanup = ARES_TRUE; diff --git a/src/lib/ares_send.c b/src/lib/ares_send.c index 40094b3a..2ae46ed0 100644 --- a/src/lib/ares_send.c +++ b/src/lib/ares_send.c @@ -106,6 +106,8 @@ done: } ares_status_t ares_send_nolock(ares_channel_t *channel, + ares_server_t *server, + ares_send_flags_t flags, const ares_dns_record_t *dnsrec, ares_callback_dnsrec callback, void *arg, unsigned short *qid) @@ -123,13 +125,15 @@ ares_status_t ares_send_nolock(ares_channel_t *channel, return ARES_ENOSERVER; } - /* Check query cache */ - status = ares_qcache_fetch(channel, &now, dnsrec, &dnsrec_resp); - if (status != ARES_ENOTFOUND) { - /* ARES_SUCCESS means we retrieved the cache, anything else is a critical - * failure, all result in termination */ - callback(arg, status, 0, dnsrec_resp); - return status; + if (!(flags & ARES_SEND_FLAG_NOCACHE)) { + /* Check query cache */ + status = ares_qcache_fetch(channel, &now, dnsrec, &dnsrec_resp); + if (status != ARES_ENOTFOUND) { + /* ARES_SUCCESS means we retrieved the cache, anything else is a critical + * failure, all result in termination */ + callback(arg, status, 0, dnsrec_resp); + return status; + } } /* Allocate space for query and allocated fields. */ @@ -175,6 +179,9 @@ ares_status_t ares_send_nolock(ares_channel_t *channel, /* Initialize query status. */ query->try_count = 0; + if (flags & ARES_SEND_FLAG_NORETRY) { + query->no_retries = ARES_TRUE; + } query->error_status = ARES_SUCCESS; query->timeouts = 0; @@ -206,7 +213,7 @@ ares_status_t ares_send_nolock(ares_channel_t *channel, /* Perform the first query action. */ - status = ares_send_query(query, &now); + status = ares_send_query(server, query, &now); if (status == ARES_SUCCESS && qid) { *qid = id; } @@ -226,7 +233,7 @@ ares_status_t ares_send_dnsrec(ares_channel_t *channel, ares_channel_lock(channel); - status = ares_send_nolock(channel, dnsrec, callback, arg, qid); + status = ares_send_nolock(channel, NULL, 0, dnsrec, callback, arg, qid); ares_channel_unlock(channel); diff --git a/test/ares-test-mock-et.cc b/test/ares-test-mock-et.cc index 65fb41b5..22f80e89 100644 --- a/test/ares-test-mock-et.cc +++ b/test/ares-test-mock-et.cc @@ -1274,6 +1274,7 @@ TEST_P(MockEventThreadTest, HostAliasUnreadable) { } #endif + class MockMultiServerEventThreadTest : public MockEventThreadOptsTest, public ::testing::WithParamInterface< std::tuple > { @@ -1421,11 +1422,26 @@ TEST_P(NoRotateMultiMockEventThreadTest, ServerNoResponseFailover) { #else # define SERVER_FAILOVER_RETRY_DELAY 330 #endif -class ServerFailoverOptsMockEventThreadTest : public MockMultiServerEventThreadTest { + + +class ServerFailoverOptsMockEventThreadTest + : public MockEventThreadOptsTest, + public ::testing::WithParamInterface > { public: ServerFailoverOptsMockEventThreadTest() - : MockMultiServerEventThreadTest(FillOptions(&opts_), - ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {} + : MockEventThreadOptsTest(4, std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam()), + FillOptions(&opts_), + ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {} + void CheckExample() { + HostResult result; + ares_gethostbyname(channel_, "www.example.com.", AF_INET, HostCallback, &result); + Process(); + EXPECT_TRUE(result.done_); + std::stringstream ss; + ss << result.host_; + EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss.str()); + } + static struct ares_options* FillOptions(struct ares_options *opts) { memset(opts, 0, sizeof(struct ares_options)); opts->server_failover_opts.retry_chance = 1; @@ -1451,15 +1467,15 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) { auto tv_now = std::chrono::high_resolution_clock::now(); unsigned int delay_ms; - // 1. If all servers are healthy, then the first server should be selected. + // At start all servers are healthy, first server should be selected if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &okrsp)); CheckExample(); - // 2. Failed servers should be retried after the retry delay. - // - // Fail server #0 but leave server #1 as healthy. + // Fail server #0 but leave server #1 as healthy. This results in server + // order: + // #1 (failures: 0), #2 (failures: 0), #3 (failures: 0), #0 (failures: 1) tv_now = std::chrono::high_resolution_clock::now(); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) @@ -1469,25 +1485,32 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) { CheckExample(); // Sleep for the retry delay (actually a little more than the retry delay to account - // for unreliable timing, e.g. NTP slew) and send in another query. Server #0 - // should be retried. + // for unreliable timing, e.g. NTP slew) and send in another query. The real + // query will be sent to Server #1 (which will succeed) and Server #0 will + // be probed and return a successful result. This leaves the server order + // of: + // #0 (failures: 0), #1 (failures: 0), #2 (failures: 0), #3 (failures: 0) tv_now = std::chrono::high_resolution_clock::now(); delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be probed (successful), server 1 will respond successful for real query" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &okrsp)); + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &okrsp)); CheckExample(); - // 3. If there are multiple failed servers, then the servers should be - // retried in sorted order. - // - // Fail all servers for the first round of tries. On the second round server - // #1 responds successfully. + + // Fail all servers for the first round of tries. On the second round, #0 + // responds successfully. This should leave server order of: + // #1 (failures: 0), #2 (failures: 1), #3 (failures: 1), #0 (failures: 2) + // NOTE: A single query being retried won't spawn probes to downed servers, + // only an initial query attempt is eligible to spawn probes. So + // no probes are sent for this test. tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: All 4 servers will fail on the first attempt, server 0 will fail on second. Server 1 will succeed on second." << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp)); @@ -1496,51 +1519,69 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) { .WillOnce(SetReply(servers_[1].get(), &okrsp)); EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); + EXPECT_CALL(*servers_[3], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[3].get(), &servfailrsp)); CheckExample(); - // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2). - // Sleep for the retry delay and send in another query. Server #2 should be - // retried first, and then server #0. + + // Sleep for the retry delay and send in another query. Server #1 is the + // highest priority server and will respond with success, however a probe + // will be sent for Server #2 which will succeed: + // #1 (failures: 0), #2 (failures: 0), #3 (failures: 1 - expired), #0 (failures: 2 - expired) tv_now = std::chrono::high_resolution_clock::now(); delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Past retry delay, will query Server 1 and probe Server 2, both will succeed." << std::endl; + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &okrsp)); EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); - EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[0].get(), &okrsp)); + .WillOnce(SetReply(servers_[2].get(), &okrsp)); CheckExample(); - // Test might take a while to run and the sleep may not be accurate, so we - // want to track this interval otherwise we may not pass the last test case - // on slow systems. - auto elapse_start = tv_now; + // Cause another server to fail so we have at least one non-expired failed + // server and one expired failed server. #1 is highest priority, which we + // will fail, #2 will succeed, and #3 will be probed and succeed: + // #2 (failures: 0), #3 (failures: 0), #1 (failures: 1 not expired), #0 (failures: 2 expired) + tv_now = std::chrono::high_resolution_clock::now(); + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Will query Server 1 and fail, Server 2 will answer successfully. Server 3 will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &servfailrsp)); + EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[2].get(), &okrsp)); + EXPECT_CALL(*servers_[3], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[3].get(), &okrsp)); + CheckExample(); - // 4. If there are multiple failed servers, then servers which have not yet - // met the retry delay should be skipped. - // - // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and - // server #2 has just been retried. - // Sleep for 1/2 the retry delay and trigger a failure on server #0. + // We need to make sure that if there is a failed server that is higher priority + // but not yet expired that it will probe the next failed server instead. + // In this case #2 is the server that the query will go to and succeed, and + // then a probe will be sent for #0 (since #1 is not expired) and succeed. We + // will sleep for 1/4 the retry duration before spawning the queries so we can + // then sleep for the rest for the follow-up test. This will leave the servers + // in this state: + // #0 (failures: 0), #2 (failures: 0), #3 (failures: 0), #1 (failures: 1 not expired) tv_now = std::chrono::high_resolution_clock::now(); - delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2); + + // We need to track retry delay time to know what is expired when. + auto elapse_start = tv_now; + + delay_ms = (SERVER_FAILOVER_RETRY_DELAY/4); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server2 will be queried and succeed. Server 0 (not server 1 due to non-expired retry delay) will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[2].get(), &okrsp)); EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[0].get(), &servfailrsp)); - EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[1].get(), &okrsp)); + .WillOnce(SetReply(servers_[0].get(), &okrsp)); CheckExample(); - // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0 - // has just failed whilst server #2 is somewhere in its retry delay. - // Sleep until we know server #2s retry delay has elapsed but Server #0 has - // not. + // Finally we sleep for the remainder of the retry delay, send another + // query, which should succeed on Server #0, and also probe Server #1 which + // will also succeed. tv_now = std::chrono::high_resolution_clock::now(); unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast(tv_now - elapse_start).count(); @@ -1553,9 +1594,9 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) { ares_sleep_time(delay_ms); } tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl; - EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server1, Server 0 will be queried and succeed, Server 1 will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[0].get(), &okrsp)); EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[1].get(), &okrsp)); CheckExample(); diff --git a/test/ares-test-mock.cc b/test/ares-test-mock.cc index 6d66b9bb..f094101c 100644 --- a/test/ares-test-mock.cc +++ b/test/ares-test-mock.cc @@ -2136,11 +2136,25 @@ TEST_P(NoRotateMultiMockTest, ServerNoResponseFailover) { #else # define SERVER_FAILOVER_RETRY_DELAY 330 #endif -class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest { + +class ServerFailoverOptsMultiMockTest + : public MockChannelOptsTest, + public ::testing::WithParamInterface< std::pair > { public: ServerFailoverOptsMultiMockTest() - : MockMultiServerChannelTest(FillOptions(&opts_), - ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {} + : MockChannelOptsTest(4, GetParam().first, GetParam().second, false, + FillOptions(&opts_), + ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {} + void CheckExample() { + HostResult result; + ares_gethostbyname(channel_, "www.example.com.", AF_INET, HostCallback, &result); + Process(); + EXPECT_TRUE(result.done_); + std::stringstream ss; + ss << result.host_; + EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss.str()); + } + static struct ares_options* FillOptions(struct ares_options *opts) { memset(opts, 0, sizeof(struct ares_options)); opts->server_failover_opts.retry_chance = 1; @@ -2151,6 +2165,7 @@ class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest { struct ares_options opts_; }; + // Test case to trigger server failover behavior. We use a retry chance of // 100% and a retry delay so that we can test behavior reliably. TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { @@ -2166,15 +2181,15 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { auto tv_now = std::chrono::high_resolution_clock::now(); unsigned int delay_ms; - // 1. If all servers are healthy, then the first server should be selected. + // At start all servers are healthy, first server should be selected if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &okrsp)); CheckExample(); - // 2. Failed servers should be retried after the retry delay. - // - // Fail server #0 but leave server #1 as healthy. + // Fail server #0 but leave server #1 as healthy. This results in server + // order: + // #1 (failures: 0), #2 (failures: 0), #3 (failures: 0), #0 (failures: 1) tv_now = std::chrono::high_resolution_clock::now(); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) @@ -2184,25 +2199,32 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { CheckExample(); // Sleep for the retry delay (actually a little more than the retry delay to account - // for unreliable timing, e.g. NTP slew) and send in another query. Server #0 - // should be retried. + // for unreliable timing, e.g. NTP slew) and send in another query. The real + // query will be sent to Server #1 (which will succeed) and Server #0 will + // be probed and return a successful result. This leaves the server order + // of: + // #0 (failures: 0), #1 (failures: 0), #2 (failures: 0), #3 (failures: 0) tv_now = std::chrono::high_resolution_clock::now(); delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be probed (successful), server 1 will respond successful for real query" << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &okrsp)); + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &okrsp)); CheckExample(); - // 3. If there are multiple failed servers, then the servers should be - // retried in sorted order. - // - // Fail all servers for the first round of tries. On the second round server - // #1 responds successfully. + + // Fail all servers for the first round of tries. On the second round, #0 + // responds successfully. This should leave server order of: + // #1 (failures: 0), #2 (failures: 1), #3 (failures: 1), #0 (failures: 2) + // NOTE: A single query being retried won't spawn probes to downed servers, + // only an initial query attempt is eligible to spawn probes. So + // no probes are sent for this test. tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: All 4 servers will fail on the first attempt, server 0 will fail on second. Server 1 will succeed on second." << std::endl; EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp)); @@ -2211,51 +2233,69 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { .WillOnce(SetReply(servers_[1].get(), &okrsp)); EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); + EXPECT_CALL(*servers_[3], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[3].get(), &servfailrsp)); CheckExample(); - // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2). - // Sleep for the retry delay and send in another query. Server #2 should be - // retried first, and then server #0. + + // Sleep for the retry delay and send in another query. Server #1 is the + // highest priority server and will respond with success, however a probe + // will be sent for Server #2 which will succeed: + // #1 (failures: 0), #2 (failures: 0), #3 (failures: 1 - expired), #0 (failures: 2 - expired) tv_now = std::chrono::high_resolution_clock::now(); delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Past retry delay, will query Server 1 and probe Server 2, both will succeed." << std::endl; + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &okrsp)); EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); - EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[0].get(), &okrsp)); + .WillOnce(SetReply(servers_[2].get(), &okrsp)); CheckExample(); - // Test might take a while to run and the sleep may not be accurate, so we - // want to track this interval otherwise we may not pass the last test case - // on slow systems. - auto elapse_start = tv_now; + // Cause another server to fail so we have at least one non-expired failed + // server and one expired failed server. #1 is highest priority, which we + // will fail, #2 will succeed, and #3 will be probed and succeed: + // #2 (failures: 0), #3 (failures: 0), #1 (failures: 1 not expired), #0 (failures: 2 expired) + tv_now = std::chrono::high_resolution_clock::now(); + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Will query Server 1 and fail, Server 2 will answer successfully. Server 3 will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[1].get(), &servfailrsp)); + EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[2].get(), &okrsp)); + EXPECT_CALL(*servers_[3], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[3].get(), &okrsp)); + CheckExample(); - // 4. If there are multiple failed servers, then servers which have not yet - // met the retry delay should be skipped. - // - // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and - // server #2 has just been retried. - // Sleep for 1/2 the retry delay and trigger a failure on server #0. + // We need to make sure that if there is a failed server that is higher priority + // but not yet expired that it will probe the next failed server instead. + // In this case #2 is the server that the query will go to and succeed, and + // then a probe will be sent for #0 (since #1 is not expired) and succeed. We + // will sleep for 1/4 the retry duration before spawning the queries so we can + // then sleep for the rest for the follow-up test. This will leave the servers + // in this state: + // #0 (failures: 0), #2 (failures: 0), #3 (failures: 0), #1 (failures: 1 not expired) tv_now = std::chrono::high_resolution_clock::now(); - delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2); + + // We need to track retry delay time to know what is expired when. + auto elapse_start = tv_now; + + delay_ms = (SERVER_FAILOVER_RETRY_DELAY/4); if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl; ares_sleep_time(delay_ms); tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl; + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server2 will be queried and succeed. Server 0 (not server 1 due to non-expired retry delay) will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[2].get(), &okrsp)); EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[0].get(), &servfailrsp)); - EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[1].get(), &okrsp)); + .WillOnce(SetReply(servers_[0].get(), &okrsp)); CheckExample(); - // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0 - // has just failed whilst server #2 is somewhere in its retry delay. - // Sleep until we know server #2s retry delay has elapsed but Server #0 has - // not. + // Finally we sleep for the remainder of the retry delay, send another + // query, which should succeed on Server #0, and also probe Server #1 which + // will also succeed. tv_now = std::chrono::high_resolution_clock::now(); unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast(tv_now - elapse_start).count(); @@ -2268,9 +2308,9 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { ares_sleep_time(delay_ms); } tv_now = std::chrono::high_resolution_clock::now(); - if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl; - EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) - .WillOnce(SetReply(servers_[2].get(), &servfailrsp)); + if (verbose) std::cerr << std::chrono::duration_cast(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server1, Server 0 will be queried and succeed, Server 1 will be probed and succeed." << std::endl; + EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) + .WillOnce(SetReply(servers_[0].get(), &okrsp)); EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) .WillOnce(SetReply(servers_[1].get(), &okrsp)); CheckExample();