Export of internal Abseil changes

-- a0491c8d790972cd80e2d720fe1fdf5f711a6f1a by Greg Falcon <gfalcon@google.com>: Stop directly accessing CordRepFlat data via CordRep::data. The old pattern of access breaks the `CordRep` type abstraction; since `CordRep::data` is not in general guaranteed to contain the chunk's data, we shouldn't access it that way. This incidentally adds an assertion check (via the flat() accessor) that the CordRep is indeed flat on each such access, but a manual inspection of the code, as well as the fact that this code currently works, suggest that this is always true.) PiperOrigin-RevId: 351592344 -- f40c3b43ca5b1d7e23cd45f1ffac1783105ac1a3 by Abseil Team <absl-team@google.com>: Revert 18abb2902b9f06c63a968b24d3dda785ebf99a22 PiperOrigin-RevId: 351523518 -- 18abb2902b9f06c63a968b24d3dda785ebf99a22 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 351512412 -- 9b881602d45e95e06089792c7627cd56528a255a by Abseil Team <absl-team@google.com>: Keep time's global state in a cacheline-aligned structure. Keeping the global state as separate global variables results in two issues: 1) False sharing with adjacent global data (e.g., cycle clock source), since the global fields are updated every O(10usec). 2) The hot global fields (e.g., seq and samples) can reside on different cache lines. To fix this, simply wrap the global data in a ABSL_CACHE_ALIGNED structure. This is similar to what we do for MutexGlobals. PiperOrigin-RevId: 351389466 GitOrigin-RevId: a0491c8d790972cd80e2d720fe1fdf5f711a6f1a Change-Id: Ie0fa80112043381cd37c84e2ab2b7334839f54b5
4 years ago · 6446142122
parent 322ae2420d
commit 6446142122
4 changed files with 177 additions and 157 deletions
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@ -208,9 +208,9 @@ static CordRep* NewTree(const char* data,
  size_t n = 0;
  do {
    const size_t len = std::min(length, kMaxFlatLength);
-    CordRep* rep = CordRepFlat::New(len + alloc_hint);
+    CordRepFlat* rep = CordRepFlat::New(len + alloc_hint);
    rep->length = len;
-    memcpy(rep->data, data, len);
+    memcpy(rep->Data(), data, len);
    reps[n++] = VerifyTree(rep);
    data += len;
    length -= len;
@ -272,10 +272,10 @@ inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) {
    return data_.as_tree.rep;
  }

-  CordRep* result = CordRepFlat::New(len + extra_hint);
+  CordRepFlat* result = CordRepFlat::New(len + extra_hint);
  result->length = len;
  static_assert(kMinFlatLength >= sizeof(data_.as_chars), "");
-  memcpy(result->data, data_.as_chars, sizeof(data_.as_chars));
+  memcpy(result->Data(), data_.as_chars, sizeof(data_.as_chars));
  set_tree(result);
  return result;
 }
@ -349,7 +349,7 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region,
  }
  dst->length += size_increase;

-  *region = dst->data + in_use;
+  *region = dst->flat()->Data() + in_use;
  *size = size_increase;
  return true;
 }
@ -381,7 +381,7 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
  CordRepFlat* new_node =
      CordRepFlat::New(std::max(static_cast<size_t>(root->length), max_length));
  new_node->length = std::min(new_node->Capacity(), max_length);
-  *region = new_node->data;
+  *region = new_node->Data();
  *size = new_node->length;
  replace_tree(Concat(root, new_node));
 }
@ -407,7 +407,7 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) {
  // Allocate new node.
  CordRepFlat* new_node = CordRepFlat::New(root->length);
  new_node->length = new_node->Capacity();
-  *region = new_node->data;
+  *region = new_node->Data();
  *size = new_node->length;
  replace_tree(Concat(root, new_node));
 }
@ -523,7 +523,7 @@ Cord& Cord::operator=(absl::string_view src) {
      tree->flat()->Capacity() >= length &&
      tree->refcount.IsOne()) {
    // Copy in place if the existing FLAT node is reusable.
-    memmove(tree->data, data, length);
+    memmove(tree->flat()->Data(), data, length);
    tree->length = length;
    VerifyTree(tree);
    return *this;
@ -578,8 +578,8 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) {
    root = CordRepFlat::New(std::max<size_t>(size1, size2));
    appended = std::min(
        src_size, root->flat()->Capacity() - inline_length);
-    memcpy(root->data, data_.as_chars, inline_length);
-    memcpy(root->data + inline_length, src_data, appended);
+    memcpy(root->flat()->Data(), data_.as_chars, inline_length);
+    memcpy(root->flat()->Data() + inline_length, src_data, appended);
    root->length = inline_length + appended;
    set_tree(root);
  }
@ -635,7 +635,7 @@ inline void Cord::AppendImpl(C&& src) {
    }
    if (src_tree->tag >= FLAT) {
      // src tree just has one flat node.
-      contents_.AppendArray(src_tree->data, src_size);
+      contents_.AppendArray(src_tree->flat()->Data(), src_size);
      return;
    }
    if (&src == this) {
@ -1093,7 +1093,7 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {

  CordRep* node = tree();
  if (node->tag >= FLAT) {
-    return absl::string_view(node->data, node->length);
+    return absl::string_view(node->flat()->Data(), node->length);
  }

  if (node->tag == EXTERNAL) {
@ -1116,7 +1116,7 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
  }

  if (node->tag >= FLAT) {
-    return absl::string_view(node->data + offset, length);
+    return absl::string_view(node->flat()->Data() + offset, length);
  }

  assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here");
@ -1329,7 +1329,7 @@ Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
  assert(node->tag == EXTERNAL || node->tag >= FLAT);
  assert(length != 0);
  const char* data =
-      node->tag == EXTERNAL ? node->external()->base : node->data;
+      node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
  current_chunk_ = absl::string_view(data + offset, length);
  current_leaf_ = node;
  return *this;
@ -1362,8 +1362,8 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
    // Range to read is a proper subrange of the current chunk.
    assert(current_leaf_ != nullptr);
    CordRep* subnode = CordRep::Ref(current_leaf_);
-    const char* data =
-        subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
+    const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
+                                                : subnode->flat()->Data();
    subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
    subcord.contents_.set_tree(VerifyTree(subnode));
    RemoveChunkPrefix(n);
@ -1375,8 +1375,8 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
  assert(current_leaf_ != nullptr);
  CordRep* subnode = CordRep::Ref(current_leaf_);
  if (current_chunk_.size() < subnode->length) {
-    const char* data =
-        subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data;
+    const char* data = subnode->tag == EXTERNAL ? subnode->external()->base
+                                                : subnode->flat()->Data();
    subnode = NewSubstring(subnode, current_chunk_.data() - data,
                           current_chunk_.size());
  }
@ -1444,7 +1444,7 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
    subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
  }
  const char* data =
-      node->tag == EXTERNAL ? node->external()->base : node->data;
+      node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
  current_chunk_ = absl::string_view(data + offset + n, length - n);
  current_leaf_ = node;
  bytes_remaining_ -= n;
@ -1511,7 +1511,7 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
  assert(node->tag == EXTERNAL || node->tag >= FLAT);
  assert(length > n);
  const char* data =
-      node->tag == EXTERNAL ? node->external()->base : node->data;
+      node->tag == EXTERNAL ? node->external()->base : node->flat()->Data();
  current_chunk_ = absl::string_view(data + offset + n, length - n);
  current_leaf_ = node;
  bytes_remaining_ -= n;
@ -1529,7 +1529,7 @@ char Cord::operator[](size_t i) const {
    assert(offset < rep->length);
    if (rep->tag >= FLAT) {
      // Get the "i"th character directly from the flat array.
-      return rep->data[offset];
+      return rep->flat()->Data()[offset];
    } else if (rep->tag == EXTERNAL) {
      // Get the "i"th character from the external array.
      return rep->external()->base[offset];
@ -1562,7 +1562,7 @@ absl::string_view Cord::FlattenSlowPath() {
  if (total_size <= kMaxFlatLength) {
    new_rep = CordRepFlat::New(total_size);
    new_rep->length = total_size;
-    new_buffer = new_rep->data;
+    new_buffer = new_rep->flat()->Data();
    CopyToArraySlowPath(new_buffer);
  } else {
    new_buffer = std::allocator<char>().allocate(total_size);
@ -1583,7 +1583,7 @@ absl::string_view Cord::FlattenSlowPath() {
 /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
  assert(rep != nullptr);
  if (rep->tag >= FLAT) {
-    *fragment = absl::string_view(rep->data, rep->length);
+    *fragment = absl::string_view(rep->flat()->Data(), rep->length);
    return true;
  } else if (rep->tag == EXTERNAL) {
    *fragment = absl::string_view(rep->external()->base, rep->length);
@ -1591,8 +1591,8 @@ absl::string_view Cord::FlattenSlowPath() {
  } else if (rep->tag == SUBSTRING) {
    CordRep* child = rep->substring()->child;
    if (child->tag >= FLAT) {
-      *fragment =
-          absl::string_view(child->data + rep->substring()->start, rep->length);
+      *fragment = absl::string_view(
+          child->flat()->Data() + rep->substring()->start, rep->length);
      return true;
    } else if (child->tag == EXTERNAL) {
      *fragment = absl::string_view(
@ -1680,7 +1680,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) {
        *os << "FLAT cap=" << rep->flat()->Capacity()
            << " [";
        if (include_data)
-          *os << absl::CEscape(std::string(rep->data, rep->length));
+          *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length));
        *os << "]\n";
      }
      if (stack.empty()) break;
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@ -166,7 +166,7 @@ enum CordRepKind {
 struct CordRep {
  CordRep() = default;
  constexpr CordRep(Refcount::Immortal immortal, size_t l)
-      : length(l), refcount(immortal), tag(EXTERNAL), data{} {}
+      : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}

  // The following three fields have to be less than 32 bytes since
  // that is the smallest supported flat node size.
@ -175,7 +175,7 @@ struct CordRep {
  // If tag < FLAT, it represents CordRepKind and indicates the type of node.
  // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
  uint8_t tag;
-  char data[1];  // Starting point for flat array: MUST BE LAST FIELD of CordRep
+  char storage[1];  // Starting point for flat array: MUST BE LAST FIELD

  inline CordRepConcat* concat();
  inline const CordRepConcat* concat() const;
@ -219,8 +219,8 @@ struct CordRepConcat : public CordRep {
  CordRep* left;
  CordRep* right;

-  uint8_t depth() const { return static_cast<uint8_t>(data[0]); }
-  void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); }
+  uint8_t depth() const { return static_cast<uint8_t>(storage[0]); }
+  void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); }
 };

 struct CordRepSubstring : public CordRep {
--- a/absl/strings/internal/cord_rep_flat.h
+++ b/absl/strings/internal/cord_rep_flat.h
@ -37,7 +37,7 @@ namespace cord_internal {
 // ideally a 'nice' size aligning with allocation and cacheline sizes like 32.
 // kMaxFlatSize is bounded by the size resulting in a computed tag no greater
 // than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values.
-static constexpr size_t kFlatOverhead = offsetof(CordRep, data);
+static constexpr size_t kFlatOverhead = offsetof(CordRep, storage);
 static constexpr size_t kMinFlatSize = 32;
 static constexpr size_t kMaxFlatSize = 4096;
 static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
@ -115,6 +115,9 @@ struct CordRepFlat : public CordRep {
 #endif
  }

+  char* Data() { return storage; }
+  const char* Data() const { return storage; }
+
  // Returns the maximum capacity (payload size) of this instance.
  size_t Capacity() const { return TagToLength(tag); }

--- a/absl/time/clock.cc
+++ b/absl/time/clock.cc
@ -15,6 +15,7 @@
 #include "absl/time/clock.h"

 #include "absl/base/attributes.h"
+#include "absl/base/optimization.h"

 #ifdef _WIN32
 #include <windows.h>
@ -85,13 +86,6 @@ ABSL_NAMESPACE_END
  ::absl::time_internal::UnscaledCycleClockWrapperForGetCurrentTime::Now()
 #endif

-// The following counters are used only by the test code.
-static int64_t stats_initializations;
-static int64_t stats_reinitializations;
-static int64_t stats_calibrations;
-static int64_t stats_slow_paths;
-static int64_t stats_fast_slow_paths;
-
 namespace absl {
 ABSL_NAMESPACE_BEGIN
 namespace time_internal {
@ -105,72 +99,6 @@ class UnscaledCycleClockWrapperForGetCurrentTime {

 // uint64_t is used in this module to provide an extra bit in multiplications

-// Return the time in ns as told by the kernel interface.  Place in *cycleclock
-// the value of the cycleclock at about the time of the syscall.
-// This call represents the time base that this module synchronizes to.
-// Ensures that *cycleclock does not step back by up to (1 << 16) from
-// last_cycleclock, to discard small backward counter steps.  (Larger steps are
-// assumed to be complete resyncs, which shouldn't happen.  If they do, a full
-// reinitialization of the outer algorithm should occur.)
-static int64_t GetCurrentTimeNanosFromKernel(uint64_t last_cycleclock,
-                                             uint64_t *cycleclock) {
-  // We try to read clock values at about the same time as the kernel clock.
-  // This value gets adjusted up or down as estimate of how long that should
-  // take, so we can reject attempts that take unusually long.
-  static std::atomic<uint64_t> approx_syscall_time_in_cycles{10 * 1000};
-
-  uint64_t local_approx_syscall_time_in_cycles =  // local copy
-      approx_syscall_time_in_cycles.load(std::memory_order_relaxed);
-
-  int64_t current_time_nanos_from_system;
-  uint64_t before_cycles;
-  uint64_t after_cycles;
-  uint64_t elapsed_cycles;
-  int loops = 0;
-  do {
-    before_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW();
-    current_time_nanos_from_system = GET_CURRENT_TIME_NANOS_FROM_SYSTEM();
-    after_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW();
-    // elapsed_cycles is unsigned, so is large on overflow
-    elapsed_cycles = after_cycles - before_cycles;
-    if (elapsed_cycles >= local_approx_syscall_time_in_cycles &&
-        ++loops == 20) {  // clock changed frequencies?  Back off.
-      loops = 0;
-      if (local_approx_syscall_time_in_cycles < 1000 * 1000) {
-        local_approx_syscall_time_in_cycles =
-            (local_approx_syscall_time_in_cycles + 1) << 1;
-      }
-      approx_syscall_time_in_cycles.store(
-          local_approx_syscall_time_in_cycles,
-          std::memory_order_relaxed);
-    }
-  } while (elapsed_cycles >= local_approx_syscall_time_in_cycles ||
-           last_cycleclock - after_cycles < (static_cast<uint64_t>(1) << 16));
-
-  // Number of times in a row we've seen a kernel time call take substantially
-  // less than approx_syscall_time_in_cycles.
-  static std::atomic<uint32_t> seen_smaller{ 0 };
-
-  // Adjust approx_syscall_time_in_cycles to be within a factor of 2
-  // of the typical time to execute one iteration of the loop above.
-  if ((local_approx_syscall_time_in_cycles >> 1) < elapsed_cycles) {
-    // measured time is no smaller than half current approximation
-    seen_smaller.store(0, std::memory_order_relaxed);
-  } else if (seen_smaller.fetch_add(1, std::memory_order_relaxed) >= 3) {
-    // smaller delays several times in a row; reduce approximation by 12.5%
-    const uint64_t new_approximation =
-        local_approx_syscall_time_in_cycles -
-        (local_approx_syscall_time_in_cycles >> 3);
-    approx_syscall_time_in_cycles.store(new_approximation,
-                                        std::memory_order_relaxed);
-    seen_smaller.store(0, std::memory_order_relaxed);
-  }
-
-  *cycleclock = after_cycles;
-  return current_time_nanos_from_system;
-}
-
-
 // ---------------------------------------------------------------------
 // An implementation of reader-write locks that use no atomic ops in the read
 // case.  This is a generalization of Lamport's method for reading a multiword
@ -222,32 +150,110 @@ static_assert(((kMinNSBetweenSamples << (kScale + 1)) >> (kScale + 1)) ==
               kMinNSBetweenSamples,
               "cannot represent kMaxBetweenSamplesNSScaled");

-// A reader-writer lock protecting the static locations below.
-// See SeqAcquire() and SeqRelease() above.
-ABSL_CONST_INIT static absl::base_internal::SpinLock lock(
-    absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY);
-ABSL_CONST_INIT static std::atomic<uint64_t> seq(0);
-
 // data from a sample of the kernel's time value
 struct TimeSampleAtomic {
-  std::atomic<uint64_t> raw_ns;              // raw kernel time
-  std::atomic<uint64_t> base_ns;             // our estimate of time
-  std::atomic<uint64_t> base_cycles;         // cycle counter reading
-  std::atomic<uint64_t> nsscaled_per_cycle;  // cycle period
+  std::atomic<uint64_t> raw_ns{0};              // raw kernel time
+  std::atomic<uint64_t> base_ns{0};             // our estimate of time
+  std::atomic<uint64_t> base_cycles{0};         // cycle counter reading
+  std::atomic<uint64_t> nsscaled_per_cycle{0};  // cycle period
  // cycles before we'll sample again (a scaled reciprocal of the period,
  // to avoid a division on the fast path).
-  std::atomic<uint64_t> min_cycles_per_sample;
+  std::atomic<uint64_t> min_cycles_per_sample{0};
 };
 // Same again, but with non-atomic types
 struct TimeSample {
-  uint64_t raw_ns;                 // raw kernel time
-  uint64_t base_ns;                // our estimate of time
-  uint64_t base_cycles;            // cycle counter reading
-  uint64_t nsscaled_per_cycle;     // cycle period
-  uint64_t min_cycles_per_sample;  // approx cycles before next sample
+  uint64_t raw_ns = 0;                 // raw kernel time
+  uint64_t base_ns = 0;                // our estimate of time
+  uint64_t base_cycles = 0;            // cycle counter reading
+  uint64_t nsscaled_per_cycle = 0;     // cycle period
+  uint64_t min_cycles_per_sample = 0;  // approx cycles before next sample
 };

-static struct TimeSampleAtomic last_sample;   // the last sample; under seq
+struct ABSL_CACHELINE_ALIGNED TimeState {
+  std::atomic<uint64_t> seq{0};
+  TimeSampleAtomic last_sample;  // the last sample; under seq
+
+  // The following counters are used only by the test code.
+  int64_t stats_initializations{0};
+  int64_t stats_reinitializations{0};
+  int64_t stats_calibrations{0};
+  int64_t stats_slow_paths{0};
+  int64_t stats_fast_slow_paths{0};
+
+  uint64_t last_now_cycles GUARDED_BY(lock){0};
+
+  // Used by GetCurrentTimeNanosFromKernel().
+  // We try to read clock values at about the same time as the kernel clock.
+  // This value gets adjusted up or down as estimate of how long that should
+  // take, so we can reject attempts that take unusually long.
+  std::atomic<uint64_t> approx_syscall_time_in_cycles{10 * 1000};
+  // Number of times in a row we've seen a kernel time call take substantially
+  // less than approx_syscall_time_in_cycles.
+  std::atomic<uint32_t> kernel_time_seen_smaller{0};
+
+  // A reader-writer lock protecting the static locations below.
+  // See SeqAcquire() and SeqRelease() above.
+  absl::base_internal::SpinLock lock{absl::kConstInit,
+                                     base_internal::SCHEDULE_KERNEL_ONLY};
+};
+ABSL_CONST_INIT static TimeState time_state{};
+
+// Return the time in ns as told by the kernel interface.  Place in *cycleclock
+// the value of the cycleclock at about the time of the syscall.
+// This call represents the time base that this module synchronizes to.
+// Ensures that *cycleclock does not step back by up to (1 << 16) from
+// last_cycleclock, to discard small backward counter steps.  (Larger steps are
+// assumed to be complete resyncs, which shouldn't happen.  If they do, a full
+// reinitialization of the outer algorithm should occur.)
+static int64_t GetCurrentTimeNanosFromKernel(uint64_t last_cycleclock,
+                                             uint64_t *cycleclock)
+    ABSL_EXCLUSIVE_LOCKS_REQUIRED(time_state.lock) {
+  uint64_t local_approx_syscall_time_in_cycles =  // local copy
+      time_state.approx_syscall_time_in_cycles.load(std::memory_order_relaxed);
+
+  int64_t current_time_nanos_from_system;
+  uint64_t before_cycles;
+  uint64_t after_cycles;
+  uint64_t elapsed_cycles;
+  int loops = 0;
+  do {
+    before_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW();
+    current_time_nanos_from_system = GET_CURRENT_TIME_NANOS_FROM_SYSTEM();
+    after_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW();
+    // elapsed_cycles is unsigned, so is large on overflow
+    elapsed_cycles = after_cycles - before_cycles;
+    if (elapsed_cycles >= local_approx_syscall_time_in_cycles &&
+        ++loops == 20) {  // clock changed frequencies?  Back off.
+      loops = 0;
+      if (local_approx_syscall_time_in_cycles < 1000 * 1000) {
+        local_approx_syscall_time_in_cycles =
+            (local_approx_syscall_time_in_cycles + 1) << 1;
+      }
+      time_state.approx_syscall_time_in_cycles.store(
+          local_approx_syscall_time_in_cycles, std::memory_order_relaxed);
+    }
+  } while (elapsed_cycles >= local_approx_syscall_time_in_cycles ||
+           last_cycleclock - after_cycles < (static_cast<uint64_t>(1) << 16));
+
+  // Adjust approx_syscall_time_in_cycles to be within a factor of 2
+  // of the typical time to execute one iteration of the loop above.
+  if ((local_approx_syscall_time_in_cycles >> 1) < elapsed_cycles) {
+    // measured time is no smaller than half current approximation
+    time_state.kernel_time_seen_smaller.store(0, std::memory_order_relaxed);
+  } else if (time_state.kernel_time_seen_smaller.fetch_add(
+                 1, std::memory_order_relaxed) >= 3) {
+    // smaller delays several times in a row; reduce approximation by 12.5%
+    const uint64_t new_approximation =
+        local_approx_syscall_time_in_cycles -
+        (local_approx_syscall_time_in_cycles >> 3);
+    time_state.approx_syscall_time_in_cycles.store(new_approximation,
+                                                   std::memory_order_relaxed);
+    time_state.kernel_time_seen_smaller.store(0, std::memory_order_relaxed);
+  }
+
+  *cycleclock = after_cycles;
+  return current_time_nanos_from_system;
+}

 static int64_t GetCurrentTimeNanosSlowPath() ABSL_ATTRIBUTE_COLD;

@ -315,14 +321,15 @@ int64_t GetCurrentTimeNanos() {
  // Acquire pairs with the barrier in SeqRelease - if this load sees that
  // store, the shared-data reads necessarily see that SeqRelease's updates
  // to the same shared data.
-  seq_read0 = seq.load(std::memory_order_acquire);
+  seq_read0 = time_state.seq.load(std::memory_order_acquire);

-  base_ns = last_sample.base_ns.load(std::memory_order_relaxed);
-  base_cycles = last_sample.base_cycles.load(std::memory_order_relaxed);
+  base_ns = time_state.last_sample.base_ns.load(std::memory_order_relaxed);
+  base_cycles =
+      time_state.last_sample.base_cycles.load(std::memory_order_relaxed);
  nsscaled_per_cycle =
-      last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed);
-  min_cycles_per_sample =
-      last_sample.min_cycles_per_sample.load(std::memory_order_relaxed);
+      time_state.last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed);
+  min_cycles_per_sample = time_state.last_sample.min_cycles_per_sample.load(
+      std::memory_order_relaxed);

  // This acquire fence pairs with the release fence in SeqAcquire.  Since it
  // is sequenced between reads of shared data and seq_read1, the reads of
@ -333,7 +340,7 @@ int64_t GetCurrentTimeNanos() {
  // shared-data writes are effectively release ordered. Therefore if our
  // shared-data reads see any of a particular update's shared-data writes,
  // seq_read1 is guaranteed to see that update's SeqAcquire.
-  seq_read1 = seq.load(std::memory_order_relaxed);
+  seq_read1 = time_state.seq.load(std::memory_order_relaxed);

  // Fast path.  Return if min_cycles_per_sample has not yet elapsed since the
  // last sample, and we read a consistent sample.  The fast path activates
@ -346,9 +353,9 @@ int64_t GetCurrentTimeNanos() {
  // last_sample was updated). This is harmless, because delta_cycles will wrap
  // and report a time much much bigger than min_cycles_per_sample. In that case
  // we will take the slow path.
-  uint64_t delta_cycles = now_cycles - base_cycles;
+  uint64_t delta_cycles;
  if (seq_read0 == seq_read1 && (seq_read0 & 1) == 0 &&
-      delta_cycles < min_cycles_per_sample) {
+      (delta_cycles = now_cycles - base_cycles) < min_cycles_per_sample) {
    return base_ns + ((delta_cycles * nsscaled_per_cycle) >> kScale);
  }
  return GetCurrentTimeNanosSlowPath();
@ -388,24 +395,25 @@ static uint64_t UpdateLastSample(
 // TODO(absl-team): Remove this attribute when our compiler is smart enough
 // to do the right thing.
 ABSL_ATTRIBUTE_NOINLINE
-static int64_t GetCurrentTimeNanosSlowPath() ABSL_LOCKS_EXCLUDED(lock) {
+static int64_t GetCurrentTimeNanosSlowPath()
+    ABSL_LOCKS_EXCLUDED(time_state.lock) {
  // Serialize access to slow-path.  Fast-path readers are not blocked yet, and
  // code below must not modify last_sample until the seqlock is acquired.
-  lock.Lock();
+  time_state.lock.Lock();

  // Sample the kernel time base.  This is the definition of
  // "now" if we take the slow path.
-  static uint64_t last_now_cycles;  // protected by lock
  uint64_t now_cycles;
-  uint64_t now_ns = GetCurrentTimeNanosFromKernel(last_now_cycles, &now_cycles);
-  last_now_cycles = now_cycles;
+  uint64_t now_ns =
+      GetCurrentTimeNanosFromKernel(time_state.last_now_cycles, &now_cycles);
+  time_state.last_now_cycles = now_cycles;

  uint64_t estimated_base_ns;

  // ----------
  // Read the "last_sample" values again; this time holding the write lock.
  struct TimeSample sample;
-  ReadTimeSampleAtomic(&last_sample, &sample);
+  ReadTimeSampleAtomic(&time_state.last_sample, &sample);

  // ----------
  // Try running the fast path again; another thread may have updated the
@ -416,13 +424,13 @@ static int64_t GetCurrentTimeNanosSlowPath() ABSL_LOCKS_EXCLUDED(lock) {
    // so that blocked readers can make progress without blocking new readers.
    estimated_base_ns = sample.base_ns +
        ((delta_cycles * sample.nsscaled_per_cycle) >> kScale);
-    stats_fast_slow_paths++;
+    time_state.stats_fast_slow_paths++;
  } else {
    estimated_base_ns =
        UpdateLastSample(now_cycles, now_ns, delta_cycles, &sample);
  }

-  lock.Unlock();
+  time_state.lock.Unlock();

  return estimated_base_ns;
 }
@ -433,9 +441,10 @@ static int64_t GetCurrentTimeNanosSlowPath() ABSL_LOCKS_EXCLUDED(lock) {
 static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns,
                                 uint64_t delta_cycles,
                                 const struct TimeSample *sample)
-    ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock) {
+    ABSL_EXCLUSIVE_LOCKS_REQUIRED(time_state.lock) {
  uint64_t estimated_base_ns = now_ns;
-  uint64_t lock_value = SeqAcquire(&seq);  // acquire seqlock to block readers
+  uint64_t lock_value =
+      SeqAcquire(&time_state.seq);  // acquire seqlock to block readers

  // The 5s in the next if-statement limits the time for which we will trust
  // the cycle counter and our last sample to give a reasonable result.
@ -445,12 +454,16 @@ static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns,
      sample->raw_ns + static_cast<uint64_t>(5) * 1000 * 1000 * 1000 < now_ns ||
      now_ns < sample->raw_ns || now_cycles < sample->base_cycles) {
    // record this sample, and forget any previously known slope.
-    last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);
-    last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed);
-    last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed);
-    last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed);
-    last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed);
-    stats_initializations++;
+    time_state.last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);
+    time_state.last_sample.base_ns.store(estimated_base_ns,
+                                         std::memory_order_relaxed);
+    time_state.last_sample.base_cycles.store(now_cycles,
+                                             std::memory_order_relaxed);
+    time_state.last_sample.nsscaled_per_cycle.store(0,
+                                                    std::memory_order_relaxed);
+    time_state.last_sample.min_cycles_per_sample.store(
+        0, std::memory_order_relaxed);
+    time_state.stats_initializations++;
  } else if (sample->raw_ns + 500 * 1000 * 1000 < now_ns &&
             sample->base_cycles + 50 < now_cycles) {
    // Enough time has passed to compute the cycle time.
@ -493,28 +506,32 @@ static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns,
    if (new_nsscaled_per_cycle != 0 &&
        diff_ns < 100 * 1000 * 1000 && -diff_ns < 100 * 1000 * 1000) {
      // record the cycle time measurement
-      last_sample.nsscaled_per_cycle.store(
+      time_state.last_sample.nsscaled_per_cycle.store(
          new_nsscaled_per_cycle, std::memory_order_relaxed);
      uint64_t new_min_cycles_per_sample =
          SafeDivideAndScale(kMinNSBetweenSamples, new_nsscaled_per_cycle);
-      last_sample.min_cycles_per_sample.store(
+      time_state.last_sample.min_cycles_per_sample.store(
          new_min_cycles_per_sample, std::memory_order_relaxed);
-      stats_calibrations++;
+      time_state.stats_calibrations++;
    } else {  // something went wrong; forget the slope
-      last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed);
-      last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed);
+      time_state.last_sample.nsscaled_per_cycle.store(
+          0, std::memory_order_relaxed);
+      time_state.last_sample.min_cycles_per_sample.store(
+          0, std::memory_order_relaxed);
      estimated_base_ns = now_ns;
-      stats_reinitializations++;
+      time_state.stats_reinitializations++;
    }
-    last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);
-    last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed);
-    last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed);
+    time_state.last_sample.raw_ns.store(now_ns, std::memory_order_relaxed);
+    time_state.last_sample.base_ns.store(estimated_base_ns,
+                                         std::memory_order_relaxed);
+    time_state.last_sample.base_cycles.store(now_cycles,
+                                             std::memory_order_relaxed);
  } else {
    // have a sample, but no slope; waiting for enough time for a calibration
-    stats_slow_paths++;
+    time_state.stats_slow_paths++;
  }

-  SeqRelease(&seq, lock_value);  // release the readers
+  SeqRelease(&time_state.seq, lock_value);  // release the readers

  return estimated_base_ns;
 }