From 08b7692703f0c6892ebfaf9c051dc263a3dd39bd Mon Sep 17 00:00:00 2001 From: Brad House Date: Sat, 3 Aug 2024 11:39:28 -0400 Subject: [PATCH] tests: MacOS needs higher priority on CI systems (#849) On CI systems that can be overloaded, things like usleep() and select() may not closely honor their timeouts, and can often be a multiple of the requested timeout. Some tests, out of necessity need to rely on accurate timing in order to test timeout conditions so this means test failures when the skew is too large. Short of increasing timeouts to a point that would make tests take an unreasonable amount of time, the alternative is to make the OS honor the requested timeout more accurately. On MacOS this means to set a realtime thread priority for the tests. Other projects like libuv do this same thing. The code is taken from: https://developer.apple.com/library/archive/technotes/tn2169/_index.html Authored-By: Brad House (@bradh352) --- test/ares-test-main.cc | 42 +++++++++++++++++++++++++++++++++++++++ test/ares-test-mock-et.cc | 9 +++------ test/ares-test-mock.cc | 9 ++------- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/test/ares-test-main.cc b/test/ares-test-main.cc index f0dfeb5e..d16f34b1 100644 --- a/test/ares-test-main.cc +++ b/test/ares-test-main.cc @@ -28,6 +28,39 @@ #include "ares-test.h" +#ifdef __APPLE__ +# include +# include +# include + +static void thread_set_realtime(pthread_t pthread) +{ + mach_timebase_info_data_t timebase_info; + const uint64_t NANOS_PER_MSEC = 1000000ULL; + double clock2abs; + int rv; + thread_time_constraint_policy_data_t policy; + + mach_timebase_info(&timebase_info); + clock2abs = ((double)timebase_info.denom / (double)timebase_info.numer) + * NANOS_PER_MSEC; + + policy.period = 0; + policy.computation = (uint32_t)(5 * clock2abs); // 5 ms of work + policy.constraint = (uint32_t)(10 * clock2abs); + policy.preemptible = FALSE; + + rv = thread_policy_set(pthread_mach_thread_np(pthread), + THREAD_TIME_CONSTRAINT_POLICY, + (thread_policy_t)&policy, + THREAD_TIME_CONSTRAINT_POLICY_COUNT); + if (rv != KERN_SUCCESS) { + mach_error("thread_policy_set:", rv); + exit(1); + } +} +#endif + int main(int argc, char* argv[]) { std::vector gtest_argv = {argv[0]}; for (int ii = 1; ii < argc; ii++) { @@ -62,6 +95,15 @@ int main(int argc, char* argv[]) { signal(SIGPIPE, SIG_IGN); #endif +#ifdef __APPLE__ + /* We need to increase the priority in order for some timing-sensitive tests + * to succeed reliably. On CI systems, the host can be overloaded and things + * like sleep timers can wait many multiples of the time specified otherwise. + * This is sort of a necessary hack for test reliability. Not something that + * would generally be used */ + thread_set_realtime(pthread_self()); +#endif + int rc = RUN_ALL_TESTS(); #ifdef WIN32 diff --git a/test/ares-test-mock-et.cc b/test/ares-test-mock-et.cc index 1ee15c43..65fb41b5 100644 --- a/test/ares-test-mock-et.cc +++ b/test/ares-test-mock-et.cc @@ -1416,13 +1416,10 @@ TEST_P(NoRotateMultiMockEventThreadTest, ServerNoResponseFailover) { EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str()); } -#ifdef __APPLE__ -/* Apple's sleep is no where near accurate, especially on a heavily loaded - * system. Its possible we could set a realtime priority on a thread to - * help with this, but for now, just increase the delay */ -# define SERVER_FAILOVER_RETRY_DELAY 750 -#else +#if defined(_WIN32) # define SERVER_FAILOVER_RETRY_DELAY 500 +#else +# define SERVER_FAILOVER_RETRY_DELAY 330 #endif class ServerFailoverOptsMockEventThreadTest : public MockMultiServerEventThreadTest { public: diff --git a/test/ares-test-mock.cc b/test/ares-test-mock.cc index dfa51794..dbebaa20 100644 --- a/test/ares-test-mock.cc +++ b/test/ares-test-mock.cc @@ -2008,15 +2008,10 @@ TEST_P(NoRotateMultiMockTest, ServerNoResponseFailover) { EXPECT_EQ("{'www.example.com' aliases=[] addrs=[2.3.4.5]}", ss4.str()); } -#ifdef __APPLE__ -/* Apple's sleep is no where near accurate, especially on a heavily loaded - * system. Its possible we could set a realtime priority on a thread to - * help with this, but for now, just increase the delay */ -# define SERVER_FAILOVER_RETRY_DELAY 750 -#elif defined(_WIN32) +#if defined(_WIN32) # define SERVER_FAILOVER_RETRY_DELAY 500 #else -# define SERVER_FAILOVER_RETRY_DELAY 250 +# define SERVER_FAILOVER_RETRY_DELAY 330 #endif class ServerFailoverOptsMultiMockTest : public MockMultiServerChannelTest { public: