test: ServerFailoverOpts can fail on heavily loaded systems due to its reliance on sleep and time. Try to harden it a little bit

v1.31
Brad House 4 months ago
parent 493a2a57f0
commit 1e8f056b2c
  1. 94
      test/ares-test-mock-ai.cc
  2. 92
      test/ares-test-mock-et.cc
  3. 94
      test/ares-test-mock.cc
  4. 10
      test/ares-test.cc
  5. 2
      test/ares-test.h

@ -723,20 +723,6 @@ class NoRotateMultiMockTestAI : public MockMultiServerChannelTestAI {
NoRotateMultiMockTestAI() : MockMultiServerChannelTestAI(nullptr, ARES_OPT_NOROTATE) {} NoRotateMultiMockTestAI() : MockMultiServerChannelTestAI(nullptr, ARES_OPT_NOROTATE) {}
}; };
class ServerFailoverOptsMockTestAI : public MockMultiServerChannelTestAI {
public:
ServerFailoverOptsMockTestAI()
: MockMultiServerChannelTestAI(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = 250;
return opts;
}
private:
struct ares_options opts_;
};
TEST_P(NoRotateMultiMockTestAI, ThirdServer) { TEST_P(NoRotateMultiMockTestAI, ThirdServer) {
struct ares_options opts; struct ares_options opts;
@ -789,10 +775,26 @@ TEST_P(NoRotateMultiMockTestAI, ThirdServer) {
CheckExample(); CheckExample();
} }
#define SERVER_FAILOVER_RETRY_DELAY 750
class ServerFailoverOptsMockTestAI : public MockMultiServerChannelTestAI {
public:
ServerFailoverOptsMockTestAI()
: MockMultiServerChannelTestAI(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = SERVER_FAILOVER_RETRY_DELAY;
return opts;
}
private:
struct ares_options opts_;
};
// Test case to trigger server failover behavior. We use a retry chance of // Test case to trigger server failover behavior. We use a retry chance of
// 100% and a retry delay of 250ms so that we can test behavior reliably. // 100% and a retry delay so that we can test behavior reliably.
TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) { TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
DNSPacket servfailrsp; DNSPacket servfailrsp;
servfailrsp.set_response().set_aa().set_rcode(SERVFAIL) servfailrsp.set_response().set_aa().set_rcode(SERVFAIL)
.add_question(new DNSQuestion("www.example.com", T_A)); .add_question(new DNSQuestion("www.example.com", T_A));
DNSPacket okrsp; DNSPacket okrsp;
@ -800,7 +802,12 @@ TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
.add_question(new DNSQuestion("www.example.com", T_A)) .add_question(new DNSQuestion("www.example.com", T_A))
.add_answer(new DNSARR("www.example.com", 100, {2,3,4,5})); .add_answer(new DNSARR("www.example.com", 100, {2,3,4,5}));
auto tv_begin = std::chrono::high_resolution_clock::now();
auto tv_now = std::chrono::high_resolution_clock::now();
unsigned int delay_ms;
// 1. If all servers are healthy, then the first server should be selected. // 1. If all servers are healthy, then the first server should be selected.
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -808,16 +815,23 @@ TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
// 2. Failed servers should be retried after the retry delay. // 2. Failed servers should be retried after the retry delay.
// //
// Fail server #0 but leave server #1 as healthy. // Fail server #0 but leave server #1 as healthy.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[1].get(), &okrsp)); .WillOnce(SetReply(servers_[1].get(), &okrsp));
CheckExample(); CheckExample();
// Sleep for the retry delay (actually a little more than 250ms to account // Sleep for the retry delay (actually a little more than the retry delay to account
// for unreliable timing, e.g. NTP slew) and send in another query. Server #0 // for unreliable timing, e.g. NTP slew) and send in another query. Server #0
// should be retried. // should be retried.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -827,6 +841,8 @@ TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
// //
// Fail all servers for the first round of tries. On the second round server // Fail all servers for the first round of tries. On the second round server
// #1 responds successfully. // #1 responds successfully.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
@ -840,20 +856,36 @@ TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
// At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2). // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2).
// Sleep for the retry delay and send in another query. Server #2 should be // Sleep for the retry delay and send in another query. Server #2 should be
// retried first, and then server #0. // retried first, and then server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
// Test might take a while to run and the sleep may not be accurate, so we
// want to track this interval otherwise we may not pass the last test case
// on slow systems.
auto elapse_start = tv_now;
// 4. If there are multiple failed servers, then servers which have not yet // 4. If there are multiple failed servers, then servers which have not yet
// met the retry delay should be skipped. // met the retry delay should be skipped.
// //
// The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and
// server #2 has just been retried. // server #2 has just been retried.
// Sleep for half the retry delay and trigger a failure on server #0. // Sleep for 1/2 the retry delay and trigger a failure on server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
@ -861,10 +893,22 @@ TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
CheckExample(); CheckExample();
// The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0 // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0
// has just failed whilst server #2 is halfway through the retry delay. // has just failed whilst server #2 is somewhere in its retry delay.
// Sleep for another half the retry delay and check that server #2 is retried // Sleep until we know server #2s retry delay has elapsed but Server #0 has
// whilst server #0 is not. // not.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - elapse_start).count();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY) + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (elapsed_time > delay_ms) {
if (verbose) std::cerr << "elapsed duration " << elapsed_time << "ms greater than desired delay of " << delay_ms << "ms, not sleeping" << std::endl;
} else {
delay_ms -= elapsed_time; // subtract already elapsed time
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
}
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))

@ -1250,20 +1250,6 @@ class NoRotateMultiMockEventThreadTest : public MockMultiServerEventThreadTest {
NoRotateMultiMockEventThreadTest() : MockMultiServerEventThreadTest(nullptr, ARES_OPT_NOROTATE) {} NoRotateMultiMockEventThreadTest() : MockMultiServerEventThreadTest(nullptr, ARES_OPT_NOROTATE) {}
}; };
class ServerFailoverOptsMockEventThreadTest : public MockMultiServerEventThreadTest {
public:
ServerFailoverOptsMockEventThreadTest()
: MockMultiServerEventThreadTest(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = 250;
return opts;
}
private:
struct ares_options opts_;
};
TEST_P(NoRotateMultiMockEventThreadTest, ThirdServer) { TEST_P(NoRotateMultiMockEventThreadTest, ThirdServer) {
struct ares_options opts; struct ares_options opts;
@ -1384,8 +1370,24 @@ TEST_P(NoRotateMultiMockEventThreadTest, ServerNoResponseFailover) {
EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str()); EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str());
} }
#define SERVER_FAILOVER_RETRY_DELAY 750
class ServerFailoverOptsMockEventThreadTest : public MockMultiServerEventThreadTest {
public:
ServerFailoverOptsMockEventThreadTest()
: MockMultiServerEventThreadTest(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = SERVER_FAILOVER_RETRY_DELAY;
return opts;
}
private:
struct ares_options opts_;
};
// Test case to trigger server failover behavior. We use a retry chance of // Test case to trigger server failover behavior. We use a retry chance of
// 100% and a retry delay of 250ms so that we can test behavior reliably. // 100% and a retry delay so that we can test behavior reliably.
TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) { TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
DNSPacket servfailrsp; DNSPacket servfailrsp;
servfailrsp.set_response().set_aa().set_rcode(SERVFAIL) servfailrsp.set_response().set_aa().set_rcode(SERVFAIL)
@ -1395,7 +1397,12 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
.add_question(new DNSQuestion("www.example.com", T_A)) .add_question(new DNSQuestion("www.example.com", T_A))
.add_answer(new DNSARR("www.example.com", 100, {2,3,4,5})); .add_answer(new DNSARR("www.example.com", 100, {2,3,4,5}));
auto tv_begin = std::chrono::high_resolution_clock::now();
auto tv_now = std::chrono::high_resolution_clock::now();
unsigned int delay_ms;
// 1. If all servers are healthy, then the first server should be selected. // 1. If all servers are healthy, then the first server should be selected.
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -1403,16 +1410,23 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
// 2. Failed servers should be retried after the retry delay. // 2. Failed servers should be retried after the retry delay.
// //
// Fail server #0 but leave server #1 as healthy. // Fail server #0 but leave server #1 as healthy.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[1].get(), &okrsp)); .WillOnce(SetReply(servers_[1].get(), &okrsp));
CheckExample(); CheckExample();
// Sleep for the retry delay (actually a little more than 250ms to account // Sleep for the retry delay (actually a little more than the retry delay to account
// for unreliable timing, e.g. NTP slew) and send in another query. Server #0 // for unreliable timing, e.g. NTP slew) and send in another query. Server #0
// should be retried. // should be retried.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -1422,6 +1436,8 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
// //
// Fail all servers for the first round of tries. On the second round server // Fail all servers for the first round of tries. On the second round server
// #1 responds successfully. // #1 responds successfully.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
@ -1435,20 +1451,36 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
// At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2). // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2).
// Sleep for the retry delay and send in another query. Server #2 should be // Sleep for the retry delay and send in another query. Server #2 should be
// retried first, and then server #0. // retried first, and then server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
// Test might take a while to run and the sleep may not be accurate, so we
// want to track this interval otherwise we may not pass the last test case
// on slow systems.
auto elapse_start = tv_now;
// 4. If there are multiple failed servers, then servers which have not yet // 4. If there are multiple failed servers, then servers which have not yet
// met the retry delay should be skipped. // met the retry delay should be skipped.
// //
// The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and
// server #2 has just been retried. // server #2 has just been retried.
// Sleep for half the retry delay and trigger a failure on server #0. // Sleep for 1/2 the retry delay and trigger a failure on server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
@ -1456,10 +1488,22 @@ TEST_P(ServerFailoverOptsMockEventThreadTest, ServerFailoverOpts) {
CheckExample(); CheckExample();
// The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0 // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0
// has just failed whilst server #2 is halfway through the retry delay. // has just failed whilst server #2 is somewhere in its retry delay.
// Sleep for another half the retry delay and check that server #2 is retried // Sleep until we know server #2s retry delay has elapsed but Server #0 has
// whilst server #0 is not. // not.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - elapse_start).count();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY) + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (elapsed_time > delay_ms) {
if (verbose) std::cerr << "elapsed duration " << elapsed_time << "ms greater than desired delay of " << delay_ms << "ms, not sleeping" << std::endl;
} else {
delay_ms -= elapsed_time; // subtract already elapsed time
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
}
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))

@ -1559,20 +1559,6 @@ class NoRotateMultiMockTest : public MockMultiServerChannelTest {
NoRotateMultiMockTest() : MockMultiServerChannelTest(nullptr, ARES_OPT_NOROTATE) {} NoRotateMultiMockTest() : MockMultiServerChannelTest(nullptr, ARES_OPT_NOROTATE) {}
}; };
class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest {
public:
ServerFailoverOptsMultiMockTest()
: MockMultiServerChannelTest(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = 250;
return opts;
}
private:
struct ares_options opts_;
};
TEST_P(NoRotateMultiMockTest, ThirdServer) { TEST_P(NoRotateMultiMockTest, ThirdServer) {
struct ares_options opts; struct ares_options opts;
@ -1693,10 +1679,26 @@ TEST_P(NoRotateMultiMockTest, ServerNoResponseFailover) {
EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str()); EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str());
} }
#define SERVER_FAILOVER_RETRY_DELAY 750
class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest {
public:
ServerFailoverOptsMultiMockTest()
: MockMultiServerChannelTest(FillOptions(&opts_),
ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
static struct ares_options* FillOptions(struct ares_options *opts) {
memset(opts, 0, sizeof(struct ares_options));
opts->server_failover_opts.retry_chance = 1;
opts->server_failover_opts.retry_delay = SERVER_FAILOVER_RETRY_DELAY;
return opts;
}
private:
struct ares_options opts_;
};
// Test case to trigger server failover behavior. We use a retry chance of // Test case to trigger server failover behavior. We use a retry chance of
// 100% and a retry delay of 250ms so that we can test behavior reliably. // 100% and a retry delay so that we can test behavior reliably.
TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) { TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
DNSPacket servfailrsp; DNSPacket servfailrsp;
servfailrsp.set_response().set_aa().set_rcode(SERVFAIL) servfailrsp.set_response().set_aa().set_rcode(SERVFAIL)
.add_question(new DNSQuestion("www.example.com", T_A)); .add_question(new DNSQuestion("www.example.com", T_A));
DNSPacket okrsp; DNSPacket okrsp;
@ -1704,7 +1706,12 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
.add_question(new DNSQuestion("www.example.com", T_A)) .add_question(new DNSQuestion("www.example.com", T_A))
.add_answer(new DNSARR("www.example.com", 100, {2,3,4,5})); .add_answer(new DNSARR("www.example.com", 100, {2,3,4,5}));
auto tv_begin = std::chrono::high_resolution_clock::now();
auto tv_now = std::chrono::high_resolution_clock::now();
unsigned int delay_ms;
// 1. If all servers are healthy, then the first server should be selected. // 1. If all servers are healthy, then the first server should be selected.
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -1712,16 +1719,23 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
// 2. Failed servers should be retried after the retry delay. // 2. Failed servers should be retried after the retry delay.
// //
// Fail server #0 but leave server #1 as healthy. // Fail server #0 but leave server #1 as healthy.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[1].get(), &okrsp)); .WillOnce(SetReply(servers_[1].get(), &okrsp));
CheckExample(); CheckExample();
// Sleep for the retry delay (actually a little more than 250ms to account // Sleep for the retry delay (actually a little more than the retry delay to account
// for unreliable timing, e.g. NTP slew) and send in another query. Server #0 // for unreliable timing, e.g. NTP slew) and send in another query. Server #0
// should be retried. // should be retried.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
@ -1731,6 +1745,8 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
// //
// Fail all servers for the first round of tries. On the second round server // Fail all servers for the first round of tries. On the second round server
// #1 responds successfully. // #1 responds successfully.
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)) .WillOnce(SetReply(servers_[0].get(), &servfailrsp))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
@ -1744,20 +1760,36 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
// At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2). // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2).
// Sleep for the retry delay and send in another query. Server #2 should be // Sleep for the retry delay and send in another query. Server #2 should be
// retried first, and then server #0. // retried first, and then server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(260)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &okrsp)); .WillOnce(SetReply(servers_[0].get(), &okrsp));
CheckExample(); CheckExample();
// Test might take a while to run and the sleep may not be accurate, so we
// want to track this interval otherwise we may not pass the last test case
// on slow systems.
auto elapse_start = tv_now;
// 4. If there are multiple failed servers, then servers which have not yet // 4. If there are multiple failed servers, then servers which have not yet
// met the retry delay should be skipped. // met the retry delay should be skipped.
// //
// The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and
// server #2 has just been retried. // server #2 has just been retried.
// Sleep for half the retry delay and trigger a failure on server #0. // Sleep for 1/2 the retry delay and trigger a failure on server #0.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2);
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl;
EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[0].get(), &servfailrsp)); .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
@ -1765,10 +1797,22 @@ TEST_P(ServerFailoverOptsMultiMockTest, ServerFailoverOpts) {
CheckExample(); CheckExample();
// The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0 // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0
// has just failed whilst server #2 is halfway through the retry delay. // has just failed whilst server #2 is somewhere in its retry delay.
// Sleep for another half the retry delay and check that server #2 is retried // Sleep until we know server #2s retry delay has elapsed but Server #0 has
// whilst server #0 is not. // not.
std::this_thread::sleep_for(std::chrono::milliseconds(130)); tv_now = std::chrono::high_resolution_clock::now();
unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - elapse_start).count();
delay_ms = (SERVER_FAILOVER_RETRY_DELAY) + (SERVER_FAILOVER_RETRY_DELAY / 10);
if (elapsed_time > delay_ms) {
if (verbose) std::cerr << "elapsed duration " << elapsed_time << "ms greater than desired delay of " << delay_ms << "ms, not sleeping" << std::endl;
} else {
delay_ms -= elapsed_time; // subtract already elapsed time
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
ares_sleep_time(delay_ms);
}
tv_now = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl;
EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
.WillOnce(SetReply(servers_[2].get(), &servfailrsp)); .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A)) EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))

10
test/ares-test.cc vendored

@ -249,6 +249,16 @@ unsigned long long LibraryTest::fails_ = 0;
std::map<size_t, int> LibraryTest::size_fails_; std::map<size_t, int> LibraryTest::size_fails_;
std::mutex LibraryTest::lock_; std::mutex LibraryTest::lock_;
void ares_sleep_time(unsigned int ms)
{
auto duration = std::chrono::milliseconds(ms);
auto start_time = std::chrono::high_resolution_clock::now();
auto wake_time = start_time + duration;
std::this_thread::sleep_until(wake_time);
auto end_time = std::chrono::high_resolution_clock::now();
if (verbose) std::cerr << "sleep requested " << ms << "ms, slept for " << std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count() << "ms" << std::endl;
}
void ProcessWork(ares_channel_t *channel, void ProcessWork(ares_channel_t *channel,
std::function<std::set<ares_socket_t>()> get_extrafds, std::function<std::set<ares_socket_t>()> get_extrafds,
std::function<void(ares_socket_t)> process_extra, std::function<void(ares_socket_t)> process_extra,

2
test/ares-test.h vendored

@ -88,6 +88,8 @@ extern std::vector<std::tuple<ares_evsys_t, int>> evsys_families;
extern std::vector<std::pair<int, bool>> families_modes; extern std::vector<std::pair<int, bool>> families_modes;
extern std::vector<std::tuple<ares_evsys_t, int, bool>> evsys_families_modes; extern std::vector<std::tuple<ares_evsys_t, int, bool>> evsys_families_modes;
// Hopefully a more accurate sleep than sleep_for()
void ares_sleep_time(unsigned int ms);
// Process all pending work on ares-owned file descriptors, plus // Process all pending work on ares-owned file descriptors, plus
// optionally the given set-of-FDs + work function. // optionally the given set-of-FDs + work function.

Loading…
Cancel
Save