From 7c206579ab4cd3fd02dce320520d902a6042f308 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Fri, 9 Dec 2022 08:54:24 -0800 Subject: [PATCH] [time] Fix multiple second stall at startup of mac/ios apps (#31844) Quick explainer (this is gross): - `grpc_core::Timestamp` measures millis since an arbitrary epoch, and on 32-bit platforms is 32-bit precise - `gpr_now(GPR_CLOCK_MONOTONIC)` measures nanos since a different arbitrary epoch - since Timestamp has limited range, we choose its epoch to roughly correlate with process startup to avoid overflow - since we have static initialization problems on some platforms we choose 0 as an uninitialized designator for that epoch - since we have bugs elsewhere, we insist that time values are >1 second from Timestamp - to deal with all that, if we get `gpr_now(GPR_CLOCK_MONOTONIC)` values that are close to 0 or 1 seconds we wait for a little while to get values that will work better later - on mac and windows gpr_now(GPR_CLOCK_MONOTONIC) was setting its epoch to be process startup exactly, and so we hit this all the time This PR fixes some bugs in initialization of the Timestamp epoch and clarifies the code somewhat, and (more importantly) shifts the Windows and Mac epochs five seconds prior to process startup, guaranteeing that the logic in `grpc_core::Timestamp`'s epoch sees values that it can immediately use in all cases and eliminating the need for the pause. --- src/core/lib/gpr/time_posix.cc | 5 ++++- src/core/lib/gpr/time_windows.cc | 4 +++- src/core/lib/gprpp/time.cc | 18 ++++++++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/core/lib/gpr/time_posix.cc b/src/core/lib/gpr/time_posix.cc index 260d32721e5..93421c8155b 100644 --- a/src/core/lib/gpr/time_posix.cc +++ b/src/core/lib/gpr/time_posix.cc @@ -113,7 +113,10 @@ static gpr_timespec now_impl(gpr_clock_type clock) { now.tv_nsec = now_tv.tv_usec * 1000; break; case GPR_CLOCK_MONOTONIC: - now_dbl = ((double)(mach_absolute_time() - g_time_start)) * g_time_scale; + // Add 5 seconds arbitrarily: avoids weird conditions in gprpp/time.cc + // when there's a small number of seconds returned. + now_dbl = 5.0e9 + + ((double)(mach_absolute_time() - g_time_start)) * g_time_scale; now.tv_sec = (int64_t)(now_dbl * 1e-9); now.tv_nsec = (int32_t)(now_dbl - ((double)now.tv_sec) * 1e9); break; diff --git a/src/core/lib/gpr/time_windows.cc b/src/core/lib/gpr/time_windows.cc index 57323fb8a07..3058b48a87b 100644 --- a/src/core/lib/gpr/time_windows.cc +++ b/src/core/lib/gpr/time_windows.cc @@ -61,7 +61,9 @@ static gpr_timespec now_impl(gpr_clock_type clock) { case GPR_CLOCK_PRECISE: QueryPerformanceCounter(×tamp); diff = timestamp.QuadPart - g_start_time.QuadPart; - now_dbl = (double)diff * g_time_scale; + // Add an arbitrary 5 seconds to the monotonic clock so we don't + // immediately return close to zero. + now_dbl = 5.0 + (double)diff * g_time_scale; now_tv.tv_sec = (int64_t)now_dbl; now_tv.tv_nsec = (int32_t)((now_dbl - (double)now_tv.tv_sec) * 1e9); break; diff --git a/src/core/lib/gprpp/time.cc b/src/core/lib/gprpp/time.cc index 054ce89094d..6520ce963fd 100644 --- a/src/core/lib/gprpp/time.cc +++ b/src/core/lib/gprpp/time.cc @@ -50,20 +50,26 @@ GPR_ATTRIBUTE_NOINLINE std::pair InitTime() { int64_t process_epoch_seconds = 0; // Check the current time... if we end up with zero, try again after 100ms. - // If it doesn't advance after sleeping for 1100ms, crash the process. - for (int i = 0; i < 11; i++) { + // If it doesn't advance after sleeping for 2100ms, crash the process. + for (int i = 0; i < 21; i++) { cycles_start = gpr_get_cycle_counter(); gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC); cycles_end = gpr_get_cycle_counter(); - process_epoch_seconds = now.tv_sec - 1; - if (process_epoch_seconds != 0) { + process_epoch_seconds = now.tv_sec; + if (process_epoch_seconds > 1) { break; } + gpr_log(GPR_INFO, + "gpr_now(GPR_CLOCK_MONOTONIC) returns a very small number: " + "sleeping for 100ms"); gpr_sleep_until(gpr_time_add(now, gpr_time_from_millis(100, GPR_TIMESPAN))); } - // Time does not seem to be increasing from zero... - GPR_ASSERT(process_epoch_seconds != 0); + // Check time has increased past 1 second. + GPR_ASSERT(process_epoch_seconds > 1); + // Fake the epoch to always return >=1 second from our monotonic clock (to + // avoid bugs elsewhere) + process_epoch_seconds -= 1; int64_t expected = 0; gpr_cycle_counter process_epoch_cycles = (cycles_start + cycles_end) / 2; GPR_ASSERT(process_epoch_cycles != 0);