-- 465461299a9814aca325fee599cefbfe462f12fe by Abseil Team <absl-team@google.com>: Optimize trivially copyable flags with a sequence lock PiperOrigin-RevId: 349602779 -- 73f39f959e21121684a51887243abad0814a335e by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 349590869 -- 6b3106fa66b8f075a39a1a8f3265ae132b7e2c84 by Abseil Team <absl-team@google.com>: Remove ABSL_DLL from `log_prefix_hook` and `abort_hook`. PiperOrigin-RevId: 349560499 -- bb0d295e699a509f3284145e025d00036b70dbb2 by Abseil Team <absl-team@google.com>: Tiny docstring fix A small edit to make "use of this is useful" a little less redundant. :) PiperOrigin-RevId: 349445689 GitOrigin-RevId: 465461299a9814aca325fee599cefbfe462f12fe Change-Id: I08cc4091b8b95b68188cb9168ac622dacc5fa688pull/881/head
parent
e7ca23acac
commit
384af0e914
12 changed files with 518 additions and 132 deletions
@ -0,0 +1,187 @@ |
||||
//
|
||||
// Copyright 2020 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_FLAGS_INTERNAL_SEQUENCE_LOCK_H_ |
||||
#define ABSL_FLAGS_INTERNAL_SEQUENCE_LOCK_H_ |
||||
|
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
|
||||
#include <atomic> |
||||
#include <cassert> |
||||
#include <cstring> |
||||
|
||||
#include "absl/base/optimization.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace flags_internal { |
||||
|
||||
// Align 'x' up to the nearest 'align' bytes.
|
||||
inline constexpr size_t AlignUp(size_t x, size_t align) { |
||||
return align * ((x + align - 1) / align); |
||||
} |
||||
|
||||
// A SequenceLock implements lock-free reads. A sequence counter is incremented
|
||||
// before and after each write, and readers access the counter before and after
|
||||
// accessing the protected data. If the counter is verified to not change during
|
||||
// the access, and the sequence counter value was even, then the reader knows
|
||||
// that the read was race-free and valid. Otherwise, the reader must fall back
|
||||
// to a Mutex-based code path.
|
||||
//
|
||||
// This particular SequenceLock starts in an "uninitialized" state in which
|
||||
// TryRead() returns false. It must be enabled by calling MarkInitialized().
|
||||
// This serves as a marker that the associated flag value has not yet been
|
||||
// initialized and a slow path needs to be taken.
|
||||
//
|
||||
// The memory reads and writes protected by this lock must use the provided
|
||||
// `TryRead()` and `Write()` functions. These functions behave similarly to
|
||||
// `memcpy()`, with one oddity: the protected data must be an array of
|
||||
// `std::atomic<int64>`. This is to comply with the C++ standard, which
|
||||
// considers data races on non-atomic objects to be undefined behavior. See "Can
|
||||
// Seqlocks Get Along With Programming Language Memory Models?"[1] by Hans J.
|
||||
// Boehm for more details.
|
||||
//
|
||||
// [1] https://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
|
||||
class SequenceLock { |
||||
public: |
||||
constexpr SequenceLock() : lock_(kUninitialized) {} |
||||
|
||||
// Mark that this lock is ready for use.
|
||||
void MarkInitialized() { |
||||
assert(lock_.load(std::memory_order_relaxed) == kUninitialized); |
||||
lock_.store(0, std::memory_order_release); |
||||
} |
||||
|
||||
// Copy "size" bytes of data from "src" to "dst", protected as a read-side
|
||||
// critical section of the sequence lock.
|
||||
//
|
||||
// Unlike traditional sequence lock implementations which loop until getting a
|
||||
// clean read, this implementation returns false in the case of concurrent
|
||||
// calls to `Write`. In such a case, the caller should fall back to a
|
||||
// locking-based slow path.
|
||||
//
|
||||
// Returns false if the sequence lock was not yet marked as initialized.
|
||||
//
|
||||
// NOTE: If this returns false, "dst" may be overwritten with undefined
|
||||
// (potentially uninitialized) data.
|
||||
bool TryRead(void* dst, const std::atomic<uint64_t>* src, size_t size) const { |
||||
// Acquire barrier ensures that no loads done by f() are reordered
|
||||
// above the first load of the sequence counter.
|
||||
int64_t seq_before = lock_.load(std::memory_order_acquire); |
||||
if (ABSL_PREDICT_FALSE(seq_before & 1) == 1) return false; |
||||
RelaxedCopyFromAtomic(dst, src, size); |
||||
// Another acquire fence ensures that the load of 'lock_' below is
|
||||
// strictly ordered after the RelaxedCopyToAtomic call above.
|
||||
std::atomic_thread_fence(std::memory_order_acquire); |
||||
int64_t seq_after = lock_.load(std::memory_order_relaxed); |
||||
return ABSL_PREDICT_TRUE(seq_before == seq_after); |
||||
} |
||||
|
||||
// Copy "size" bytes from "src" to "dst" as a write-side critical section
|
||||
// of the sequence lock. Any concurrent readers will be forced to retry
|
||||
// until they get a read that does not conflict with this write.
|
||||
//
|
||||
// This call must be externally synchronized against other calls to Write,
|
||||
// but may proceed concurrently with reads.
|
||||
void Write(std::atomic<uint64_t>* dst, const void* src, size_t size) { |
||||
// We can use relaxed instructions to increment the counter since we
|
||||
// are extenally synchronized. The std::atomic_thread_fence below
|
||||
// ensures that the counter updates don't get interleaved with the
|
||||
// copy to the data.
|
||||
int64_t orig_seq = lock_.load(std::memory_order_relaxed); |
||||
assert((orig_seq & 1) == 0); // Must be initially unlocked.
|
||||
lock_.store(orig_seq + 1, std::memory_order_relaxed); |
||||
|
||||
// We put a release fence between update to lock_ and writes to shared data.
|
||||
// Thus all stores to shared data are effectively release operations and
|
||||
// update to lock_ above cannot be re-ordered past any of them. Note that
|
||||
// this barrier is not for the fetch_add above. A release barrier for the
|
||||
// fetch_add would be before it, not after.
|
||||
std::atomic_thread_fence(std::memory_order_release); |
||||
RelaxedCopyToAtomic(dst, src, size); |
||||
// "Release" semantics ensure that none of the writes done by
|
||||
// RelaxedCopyToAtomic() can be reordered after the following modification.
|
||||
lock_.store(orig_seq + 2, std::memory_order_release); |
||||
} |
||||
|
||||
// Return the number of times that Write() has been called.
|
||||
//
|
||||
// REQUIRES: This must be externally synchronized against concurrent calls to
|
||||
// `Write()` or `IncrementModificationCount()`.
|
||||
// REQUIRES: `MarkInitialized()` must have been previously called.
|
||||
int64_t ModificationCount() const { |
||||
int64_t val = lock_.load(std::memory_order_relaxed); |
||||
assert(val != kUninitialized && (val & 1) == 0); |
||||
return val / 2; |
||||
} |
||||
|
||||
// REQUIRES: This must be externally synchronized against concurrent calls to
|
||||
// `Write()` or `ModificationCount()`.
|
||||
// REQUIRES: `MarkInitialized()` must have been previously called.
|
||||
void IncrementModificationCount() { |
||||
int64_t val = lock_.load(std::memory_order_relaxed); |
||||
assert(val != kUninitialized); |
||||
lock_.store(val + 2, std::memory_order_relaxed); |
||||
} |
||||
|
||||
private: |
||||
// Perform the equivalent of "memcpy(dst, src, size)", but using relaxed
|
||||
// atomics.
|
||||
static void RelaxedCopyFromAtomic(void* dst, const std::atomic<uint64_t>* src, |
||||
size_t size) { |
||||
char* dst_byte = static_cast<char*>(dst); |
||||
while (size >= sizeof(uint64_t)) { |
||||
uint64_t word = src->load(std::memory_order_relaxed); |
||||
std::memcpy(dst_byte, &word, sizeof(word)); |
||||
dst_byte += sizeof(word); |
||||
src++; |
||||
size -= sizeof(word); |
||||
} |
||||
if (size > 0) { |
||||
uint64_t word = src->load(std::memory_order_relaxed); |
||||
std::memcpy(dst_byte, &word, size); |
||||
} |
||||
} |
||||
|
||||
// Perform the equivalent of "memcpy(dst, src, size)", but using relaxed
|
||||
// atomics.
|
||||
static void RelaxedCopyToAtomic(std::atomic<uint64_t>* dst, const void* src, |
||||
size_t size) { |
||||
const char* src_byte = static_cast<const char*>(src); |
||||
while (size >= sizeof(uint64_t)) { |
||||
uint64_t word; |
||||
std::memcpy(&word, src_byte, sizeof(word)); |
||||
dst->store(word, std::memory_order_relaxed); |
||||
src_byte += sizeof(word); |
||||
dst++; |
||||
size -= sizeof(word); |
||||
} |
||||
if (size > 0) { |
||||
uint64_t word = 0; |
||||
std::memcpy(&word, src_byte, size); |
||||
dst->store(word, std::memory_order_relaxed); |
||||
} |
||||
} |
||||
|
||||
static constexpr int64_t kUninitialized = -1; |
||||
std::atomic<int64_t> lock_; |
||||
}; |
||||
|
||||
} // namespace flags_internal
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_FLAGS_INTERNAL_SEQUENCE_LOCK_H_
|
@ -0,0 +1,146 @@ |
||||
// Copyright 2020 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include "absl/flags/internal/sequence_lock.h" |
||||
|
||||
#include <atomic> |
||||
#include <thread> // NOLINT(build/c++11) |
||||
#include <tuple> |
||||
#include <vector> |
||||
|
||||
#include "gtest/gtest.h" |
||||
#include "absl/base/internal/sysinfo.h" |
||||
#include "absl/container/fixed_array.h" |
||||
#include "absl/time/clock.h" |
||||
|
||||
namespace { |
||||
|
||||
namespace flags = absl::flags_internal; |
||||
|
||||
class ConcurrentSequenceLockTest |
||||
: public testing::TestWithParam<std::tuple<int, int>> { |
||||
public: |
||||
ConcurrentSequenceLockTest() |
||||
: buf_bytes_(std::get<0>(GetParam())), |
||||
num_threads_(std::get<1>(GetParam())) {} |
||||
|
||||
protected: |
||||
const int buf_bytes_; |
||||
const int num_threads_; |
||||
}; |
||||
|
||||
TEST_P(ConcurrentSequenceLockTest, ReadAndWrite) { |
||||
const int buf_words = |
||||
flags::AlignUp(buf_bytes_, sizeof(uint64_t)) / sizeof(uint64_t); |
||||
|
||||
// The buffer that will be protected by the SequenceLock.
|
||||
absl::FixedArray<std::atomic<uint64_t>> protected_buf(buf_words); |
||||
for (auto& v : protected_buf) v = -1; |
||||
|
||||
flags::SequenceLock seq_lock; |
||||
std::atomic<bool> stop{false}; |
||||
std::atomic<int64_t> bad_reads{0}; |
||||
std::atomic<int64_t> good_reads{0}; |
||||
std::atomic<int64_t> unsuccessful_reads{0}; |
||||
|
||||
// Start a bunch of threads which read 'protected_buf' under the sequence
|
||||
// lock. The main thread will concurrently update 'protected_buf'. The updates
|
||||
// always consist of an array of identical integers. The reader ensures that
|
||||
// any data it reads matches that pattern (i.e. the reads are not "torn").
|
||||
std::vector<std::thread> threads; |
||||
for (int i = 0; i < num_threads_; i++) { |
||||
threads.emplace_back([&]() { |
||||
absl::FixedArray<char> local_buf(buf_bytes_); |
||||
while (!stop.load(std::memory_order_relaxed)) { |
||||
if (seq_lock.TryRead(local_buf.data(), protected_buf.data(), |
||||
buf_bytes_)) { |
||||
bool good = true; |
||||
for (const auto& v : local_buf) { |
||||
if (v != local_buf[0]) good = false; |
||||
} |
||||
if (good) { |
||||
good_reads.fetch_add(1, std::memory_order_relaxed); |
||||
} else { |
||||
bad_reads.fetch_add(1, std::memory_order_relaxed); |
||||
} |
||||
} else { |
||||
unsuccessful_reads.fetch_add(1, std::memory_order_relaxed); |
||||
} |
||||
} |
||||
}); |
||||
} |
||||
while (unsuccessful_reads.load(std::memory_order_relaxed) < num_threads_) { |
||||
absl::SleepFor(absl::Milliseconds(1)); |
||||
} |
||||
seq_lock.MarkInitialized(); |
||||
|
||||
// Run a maximum of 5 seconds. On Windows, the scheduler behavior seems
|
||||
// somewhat unfair and without an explicit timeout for this loop, the tests
|
||||
// can run a long time.
|
||||
absl::Time deadline = absl::Now() + absl::Seconds(5); |
||||
for (int i = 0; i < 100 && absl::Now() < deadline; i++) { |
||||
absl::FixedArray<char> writer_buf(buf_bytes_); |
||||
for (auto& v : writer_buf) v = i; |
||||
seq_lock.Write(protected_buf.data(), writer_buf.data(), buf_bytes_); |
||||
absl::SleepFor(absl::Microseconds(10)); |
||||
} |
||||
stop.store(true, std::memory_order_relaxed); |
||||
for (auto& t : threads) t.join(); |
||||
ASSERT_GE(good_reads, 0); |
||||
ASSERT_EQ(bad_reads, 0); |
||||
} |
||||
|
||||
// Simple helper for generating a range of thread counts.
|
||||
// Generates [low, low*scale, low*scale^2, ...high)
|
||||
// (even if high is between low*scale^k and low*scale^(k+1)).
|
||||
std::vector<int> MultiplicativeRange(int low, int high, int scale) { |
||||
std::vector<int> result; |
||||
for (int current = low; current < high; current *= scale) { |
||||
result.push_back(current); |
||||
} |
||||
result.push_back(high); |
||||
return result; |
||||
} |
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TestManyByteSizes, ConcurrentSequenceLockTest, |
||||
testing::Combine( |
||||
// Buffer size (bytes).
|
||||
testing::Range(1, 128), |
||||
// Number of reader threads.
|
||||
testing::ValuesIn(MultiplicativeRange( |
||||
1, absl::base_internal::NumCPUs(), 2)))); |
||||
|
||||
// Simple single-threaded test, parameterized by the size of the buffer to be
|
||||
// protected.
|
||||
class SequenceLockTest : public testing::TestWithParam<int> {}; |
||||
|
||||
TEST_P(SequenceLockTest, SingleThreaded) { |
||||
const int size = GetParam(); |
||||
absl::FixedArray<std::atomic<uint64_t>> protected_buf( |
||||
flags::AlignUp(size, sizeof(uint64_t)) / sizeof(uint64_t)); |
||||
|
||||
flags::SequenceLock seq_lock; |
||||
seq_lock.MarkInitialized(); |
||||
|
||||
std::vector<char> src_buf(size, 'x'); |
||||
seq_lock.Write(protected_buf.data(), src_buf.data(), size); |
||||
|
||||
std::vector<char> dst_buf(size, '0'); |
||||
ASSERT_TRUE(seq_lock.TryRead(dst_buf.data(), protected_buf.data(), size)); |
||||
ASSERT_EQ(src_buf, dst_buf); |
||||
} |
||||
INSTANTIATE_TEST_SUITE_P(TestManyByteSizes, SequenceLockTest, |
||||
// Buffer size (bytes).
|
||||
testing::Range(1, 128)); |
||||
|
||||
} // namespace
|
Loading…
Reference in new issue