tests: reduce required testing time for ServerFailoverOpts on most platforms

7 months ago · c752915ced
parent fffe652eab
commit c752915ced
3 changed files with 20 additions and 146 deletions
--- a/test/ares-test-mock-ai.cc
+++ b/test/ares-test-mock-ai.cc
@ -773,147 +773,6 @@ TEST_P(NoRotateMultiMockTestAI, ThirdServer) {
  CheckExample();
 }

-#define SERVER_FAILOVER_RETRY_DELAY 750
-class ServerFailoverOptsMockTestAI : public MockMultiServerChannelTestAI {
- public:
-  ServerFailoverOptsMockTestAI()
-    : MockMultiServerChannelTestAI(FillOptions(&opts_),
-                                   ARES_OPT_SERVER_FAILOVER | ARES_OPT_NOROTATE) {}
-  static struct ares_options* FillOptions(struct ares_options *opts) {
-    memset(opts, 0, sizeof(struct ares_options));
-    opts->server_failover_opts.retry_chance = 1;
-    opts->server_failover_opts.retry_delay = SERVER_FAILOVER_RETRY_DELAY;
-    return opts;
-  }
- private:
-  struct ares_options opts_;
-};
-
-// Test case to trigger server failover behavior. We use a retry chance of
-// 100% and a retry delay so that we can test behavior reliably.
-TEST_P(ServerFailoverOptsMockTestAI, ServerFailoverOpts) {
- DNSPacket servfailrsp;
-  servfailrsp.set_response().set_aa().set_rcode(SERVFAIL)
-    .add_question(new DNSQuestion("www.example.com", T_A));
-  DNSPacket okrsp;
-  okrsp.set_response().set_aa()
-    .add_question(new DNSQuestion("www.example.com", T_A))
-    .add_answer(new DNSARR("www.example.com", 100, {2,3,4,5}));
-
-  auto tv_begin = std::chrono::high_resolution_clock::now();
-  auto tv_now   = std::chrono::high_resolution_clock::now();
-  unsigned int delay_ms;
-
-  // 1. If all servers are healthy, then the first server should be selected.
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: First server should be selected" << std::endl;
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &okrsp));
-  CheckExample();
-
-  // 2. Failed servers should be retried after the retry delay.
-  //
-  // Fail server #0 but leave server #1 as healthy.
-  tv_now = std::chrono::high_resolution_clock::now();
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 will fail but leave Server1 as healthy" << std::endl;
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
-  EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[1].get(), &okrsp));
-  CheckExample();
-
-  // Sleep for the retry delay (actually a little more than the retry delay to account
-  // for unreliable timing, e.g. NTP slew) and send in another query. Server #0
-  // should be retried.
-  tv_now = std::chrono::high_resolution_clock::now();
-  delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
-  ares_sleep_time(delay_ms);
-  tv_now = std::chrono::high_resolution_clock::now();
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Server0 should be past retry delay and should be tried again successfully" << std::endl;
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &okrsp));
-  CheckExample();
-
-  // 3. If there are multiple failed servers, then the servers should be
-  //    retried in sorted order.
-  //
-  // Fail all servers for the first round of tries. On the second round server
-  // #1 responds successfully.
-  tv_now = std::chrono::high_resolution_clock::now();
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: All 3 servers will fail on the first attempt. On second attempt, Server0 will fail, but Server1 will answer correctly." << std::endl;
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &servfailrsp))
-    .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
-  EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[1].get(), &servfailrsp))
-    .WillOnce(SetReply(servers_[1].get(), &okrsp));
-  EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
-  CheckExample();
-
-  // At this point the sorted servers look like [1] (f0) [2] (f1) [0] (f2).
-  // Sleep for the retry delay and send in another query. Server #2 should be
-  // retried first, and then server #0.
-  tv_now = std::chrono::high_resolution_clock::now();
-  delay_ms = SERVER_FAILOVER_RETRY_DELAY + (SERVER_FAILOVER_RETRY_DELAY / 10);
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
-  ares_sleep_time(delay_ms);
-  tv_now = std::chrono::high_resolution_clock::now();
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Past retry delay, so will choose Server2 and Server0 that are down. Server2 will fail but Server0 will succeed." << std::endl;
-  EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &okrsp));
-  CheckExample();
-
-  // Test might take a while to run and the sleep may not be accurate, so we
-  // want to track this interval otherwise we may not pass the last test case
-  // on slow systems.
-  auto elapse_start = tv_now;
-
-  // 4. If there are multiple failed servers, then servers which have not yet
-  //    met the retry delay should be skipped.
-  //
-  // The sorted servers currently look like [0] (f0) [1] (f0) [2] (f2) and
-  // server #2 has just been retried.
-  // Sleep for 1/2 the retry delay and trigger a failure on server #0.
-  tv_now = std::chrono::high_resolution_clock::now();
-  delay_ms = (SERVER_FAILOVER_RETRY_DELAY/2);
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
-  ares_sleep_time(delay_ms);
-  tv_now = std::chrono::high_resolution_clock::now();
-
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has not been hit yet. Server0 was last successful, so should be tried first (and will fail), Server1 is also healthy so will respond." << std::endl;
-  EXPECT_CALL(*servers_[0], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[0].get(), &servfailrsp));
-  EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[1].get(), &okrsp));
-  CheckExample();
-
-  // The sorted servers now look like [1] (f0) [0] (f1) [2] (f2). Server #0
-  // has just failed whilst server #2 is somewhere in its retry delay.
-  // Sleep until we know server #2s retry delay has elapsed but Server #0 has
-  // not.
-  tv_now = std::chrono::high_resolution_clock::now();
-
-  unsigned int elapsed_time = (unsigned int)std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - elapse_start).count();
-  delay_ms = (SERVER_FAILOVER_RETRY_DELAY) + (SERVER_FAILOVER_RETRY_DELAY / 10);
-  if (elapsed_time > delay_ms) {
-    if (verbose) std::cerr << "elapsed duration " << elapsed_time << "ms greater than desired delay of " << delay_ms << "ms, not sleeping" << std::endl;
-  } else {
-    delay_ms -= elapsed_time; // subtract already elapsed time
-    if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: sleep " << delay_ms << "ms" << std::endl;
-    ares_sleep_time(delay_ms);
-  }
-  tv_now = std::chrono::high_resolution_clock::now();
-  if (verbose) std::cerr << std::chrono::duration_cast<std::chrono::milliseconds>(tv_now - tv_begin).count() << "ms: Retry delay has expired on Server2 but not Server0, will try on Server2 and fail, then Server1 will answer" << std::endl;
-  EXPECT_CALL(*servers_[2], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[2].get(), &servfailrsp));
-  EXPECT_CALL(*servers_[1], OnRequest("www.example.com", T_A))
-    .WillOnce(SetReply(servers_[1].get(), &okrsp));
-  CheckExample();
-}
-
 TEST_P(MockChannelTestAI, FamilyV4ServiceName) {
  DNSPacket rsp4;
  rsp4.set_response().set_aa()
@ -961,8 +820,5 @@ INSTANTIATE_TEST_SUITE_P(AddressFamiliesAI, MockEDNSChannelTestAI,
 INSTANTIATE_TEST_SUITE_P(TransportModesAI, NoRotateMultiMockTestAI,
 			::testing::ValuesIn(ares::test::families_modes), PrintFamilyMode);

-INSTANTIATE_TEST_SUITE_P(TransportModesAI, ServerFailoverOptsMockTestAI,
-			::testing::ValuesIn(ares::test::families_modes), PrintFamilyMode);
-
 }  // namespace test
 }  // namespace ares
--- a/test/ares-test-mock-et.cc
+++ b/test/ares-test-mock-et.cc
@ -1368,7 +1368,16 @@ TEST_P(NoRotateMultiMockEventThreadTest, ServerNoResponseFailover) {
  EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str());
 }

-#define SERVER_FAILOVER_RETRY_DELAY 750
+#ifdef __APPLE__
+/* Apple's sleep is no where near accurate, especially on a heavily loaded
+ * system.  Its possible we could set a realtime priority on a thread to
+ * help with this, but for now, just increase the delay */
+#  define SERVER_FAILOVER_RETRY_DELAY 750
+#elif defined(_WIN32)
+#  define SERVER_FAILOVER_RETRY_DELAY 500
+#else
+#  define SERVER_FAILOVER_RETRY_DELAY 250
+#endif
 class ServerFailoverOptsMockEventThreadTest : public MockMultiServerEventThreadTest {
 public:
  ServerFailoverOptsMockEventThreadTest()
--- a/test/ares-test-mock.cc
+++ b/test/ares-test-mock.cc
@ -1662,7 +1662,16 @@ TEST_P(NoRotateMultiMockTest, ServerNoResponseFailover) {
  EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str());
 }

-#define SERVER_FAILOVER_RETRY_DELAY 750
+#ifdef __APPLE__
+/* Apple's sleep is no where near accurate, especially on a heavily loaded
+ * system.  Its possible we could set a realtime priority on a thread to
+ * help with this, but for now, just increase the delay */
+#  define SERVER_FAILOVER_RETRY_DELAY 750
+#elif defined(_WIN32)
+#  define SERVER_FAILOVER_RETRY_DELAY 500
+#else
+#  define SERVER_FAILOVER_RETRY_DELAY 250
+#endif
 class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest {
 public:
  ServerFailoverOptsMultiMockTest()