Export of internal Abseil changes

--
93c607726d663800b4bfa472cba043fd3f5d0e97 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 389158822

--
55b3bb50bbc168567c6ba25d07df2c2c39e864af by Martijn Vels <mvels@google.com>:

Change CordRepRing alternative implementation to CordRepBtree alternative.

This changes makes CordRepBtree (BTREE) the alternative to CordRepConcat (CONCAT) trees, enabled through the internal / experimental 'cord_btree_enabled' latch.

PiperOrigin-RevId: 389030571

--
d6fc346143606c096bca8eb5029e4c429ac6e305 by Todd Lipcon <tlipcon@google.com>:

Fix a small typo in SequenceLock doc comment

PiperOrigin-RevId: 388972936

--
e46f9245dce8b4150e3ca2664e0cf42b75f90a83 by Martijn Vels <mvels@google.com>:

Add 'shallow' validation mode to CordRepBtree which will be the default for internal assertions.

PiperOrigin-RevId: 388753606

--
b5e74f163b490beb006f848ace67bb650433fe13 by Martijn Vels <mvels@google.com>:

Add btree statistics to CordzInfo, and reduce rounding errors

PiperOrigin-RevId: 388715878

--
105bcbf80de649937e693b29b18220f9e6841a51 by Evan Brown <ezb@google.com>:

Skip length checking when constructing absl::string_view from `const char*`.
The length check causes unnecessary code bloat.

PiperOrigin-RevId: 388271741

--
bed595158f24839efe49c65ae483f797d79fe0ae by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 387713428
GitOrigin-RevId: 93c607726d663800b4bfa472cba043fd3f5d0e97
Change-Id: I2a4840f5ffcd7f70b7d7d45cce66f23c42cf565f
pull/998/head
Abseil Team 4 years ago committed by dinord
parent ab01e0403a
commit bf31a10b65
  1. 2
      absl/flags/internal/sequence_lock.h
  2. 1
      absl/strings/BUILD.bazel
  3. 1
      absl/strings/CMakeLists.txt
  4. 180
      absl/strings/cord.cc
  5. 37
      absl/strings/cord.h
  6. 2
      absl/strings/cord_test.cc
  7. 1
      absl/strings/internal/cord_internal.cc
  8. 6
      absl/strings/internal/cord_internal.h
  9. 21
      absl/strings/internal/cord_rep_btree.cc
  10. 32
      absl/strings/internal/cord_rep_btree.h
  11. 43
      absl/strings/internal/cord_rep_btree_test.cc
  12. 51
      absl/strings/internal/cordz_info.cc
  13. 133
      absl/strings/internal/cordz_info_statistics_test.cc
  14. 3
      absl/strings/internal/cordz_statistics.h
  15. 4
      absl/strings/string_view.h

@ -49,7 +49,7 @@ inline constexpr size_t AlignUp(size_t x, size_t align) {
// The memory reads and writes protected by this lock must use the provided
// `TryRead()` and `Write()` functions. These functions behave similarly to
// `memcpy()`, with one oddity: the protected data must be an array of
// `std::atomic<int64>`. This is to comply with the C++ standard, which
// `std::atomic<uint64>`. This is to comply with the C++ standard, which
// considers data races on non-atomic objects to be undefined behavior. See "Can
// Seqlocks Get Along With Programming Language Memory Models?"[1] by Hans J.
// Boehm for more details.

@ -318,6 +318,7 @@ cc_test(
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
"//absl/cleanup",
"@com_google_googletest//:gtest_main",
],
)

@ -952,6 +952,7 @@ absl_cc_test(
${ABSL_TEST_COPTS}
DEPS
absl::base
absl::cleanup
absl::config
absl::cord_internal
absl::cord_rep_test_util

@ -36,8 +36,8 @@
#include "absl/container/inlined_vector.h"
#include "absl/strings/escaping.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_scope.h"
#include "absl/strings/internal/cordz_update_tracker.h"
@ -51,20 +51,20 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
using ::absl::cord_internal::CordRep;
using ::absl::cord_internal::CordRepBtree;
using ::absl::cord_internal::CordRepConcat;
using ::absl::cord_internal::CordRepExternal;
using ::absl::cord_internal::CordRepFlat;
using ::absl::cord_internal::CordRepRing;
using ::absl::cord_internal::CordRepSubstring;
using ::absl::cord_internal::CordzUpdateTracker;
using ::absl::cord_internal::InlineData;
using ::absl::cord_internal::kMaxFlatLength;
using ::absl::cord_internal::kMinFlatLength;
using ::absl::cord_internal::BTREE;
using ::absl::cord_internal::CONCAT;
using ::absl::cord_internal::EXTERNAL;
using ::absl::cord_internal::FLAT;
using ::absl::cord_internal::RING;
using ::absl::cord_internal::SUBSTRING;
using ::absl::cord_internal::kInlinedVectorSize;
@ -100,8 +100,8 @@ static constexpr uint64_t min_length[] = {
static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length);
static inline bool cord_ring_enabled() {
return cord_internal::cord_ring_buffer_enabled.load(
static inline bool btree_enabled() {
return cord_internal::cord_btree_enabled.load(
std::memory_order_relaxed);
}
@ -218,27 +218,25 @@ static CordRepFlat* CreateFlat(const char* data, size_t length,
return flat;
}
// Creates a new flat or ringbuffer out of the specified array.
// Creates a new flat or Btree out of the specified array.
// The returned node has a refcount of 1.
static CordRep* RingNewTree(const char* data, size_t length,
size_t alloc_hint) {
static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) {
if (length <= kMaxFlatLength) {
return CreateFlat(data, length, alloc_hint);
}
CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0);
data += kMaxFlatLength;
length -= kMaxFlatLength;
size_t extra = (length - 1) / kMaxFlatLength + 1;
auto* root = CordRepRing::Create(flat, extra);
return CordRepRing::Append(root, {data, length}, alloc_hint);
auto* root = CordRepBtree::Create(flat);
return CordRepBtree::Append(root, {data, length}, alloc_hint);
}
// Create a new tree out of the specified array.
// The returned node has a refcount of 1.
static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) {
if (length == 0) return nullptr;
if (cord_ring_enabled()) {
return RingNewTree(data, length, alloc_hint);
if (btree_enabled()) {
return NewBtree(data, length, alloc_hint);
}
absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1);
size_t n = 0;
@ -346,10 +344,10 @@ inline void Cord::InlineRep::remove_prefix(size_t n) {
reduce_size(n);
}
// Returns `rep` converted into a CordRepRing.
// Directly returns `rep` if `rep` is already a CordRepRing.
static CordRepRing* ForceRing(CordRep* rep, size_t extra) {
return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra);
// Returns `rep` converted into a CordRepBtree.
// Directly returns `rep` if `rep` is already a CordRepBtree.
static CordRepBtree* ForceBtree(CordRep* rep) {
return rep->tag == BTREE ? rep->btree() : CordRepBtree::Create(rep);
}
void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
@ -357,8 +355,8 @@ void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
assert(!is_tree());
if (!data_.is_empty()) {
CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
if (cord_ring_enabled()) {
tree = CordRepRing::Append(CordRepRing::Create(flat, 1), tree);
if (btree_enabled()) {
tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree);
} else {
tree = Concat(flat, tree);
}
@ -369,8 +367,8 @@ void Cord::InlineRep::AppendTreeToInlined(CordRep* tree,
void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) {
assert(is_tree());
const CordzUpdateScope scope(data_.cordz_info(), method);
if (cord_ring_enabled()) {
tree = CordRepRing::Append(ForceRing(data_.as_tree(), 1), tree);
if (btree_enabled()) {
tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree);
} else {
tree = Concat(data_.as_tree(), tree);
}
@ -391,8 +389,8 @@ void Cord::InlineRep::PrependTreeToInlined(CordRep* tree,
assert(!is_tree());
if (!data_.is_empty()) {
CordRepFlat* flat = MakeFlatWithExtraCapacity(0);
if (cord_ring_enabled()) {
tree = CordRepRing::Prepend(CordRepRing::Create(flat, 1), tree);
if (btree_enabled()) {
tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree);
} else {
tree = Concat(tree, flat);
}
@ -404,8 +402,8 @@ void Cord::InlineRep::PrependTreeToTree(CordRep* tree,
MethodIdentifier method) {
assert(is_tree());
const CordzUpdateScope scope(data_.cordz_info(), method);
if (cord_ring_enabled()) {
tree = CordRepRing::Prepend(ForceRing(data_.as_tree(), 1), tree);
if (btree_enabled()) {
tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree);
} else {
tree = Concat(tree, data_.as_tree());
}
@ -427,8 +425,8 @@ void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) {
// written to region and the actual size increase will be written to size.
static inline bool PrepareAppendRegion(CordRep* root, char** region,
size_t* size, size_t max_length) {
if (root->tag == RING && root->refcount.IsOne()) {
Span<char> span = root->ring()->GetAppendBuffer(max_length);
if (root->tag == BTREE && root->refcount.IsOne()) {
Span<char> span = root->btree()->GetAppendBuffer(max_length);
if (!span.empty()) {
*region = span.data();
*size = span.size();
@ -500,8 +498,8 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
*region = new_node->Data();
*size = new_node->length;
if (cord_ring_enabled()) {
rep = CordRepRing::Append(ForceRing(rep, 1), new_node);
if (btree_enabled()) {
rep = CordRepBtree::Append(ForceBtree(rep), new_node);
} else {
rep = Concat(rep, new_node);
}
@ -511,8 +509,13 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size,
// If the rep is a leaf, this will increment the value at total_mem_usage and
// will return true.
static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
size_t maybe_sub_size = 0;
if (rep->tag == SUBSTRING) {
maybe_sub_size = sizeof(cord_internal::CordRepSubstring);
rep = rep->substring()->child;
}
if (rep->tag >= FLAT) {
*total_mem_usage += rep->flat()->AllocatedSize();
*total_mem_usage += maybe_sub_size + rep->flat()->AllocatedSize();
return true;
}
if (rep->tag == EXTERNAL) {
@ -520,8 +523,9 @@ static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) {
// assume it is 'at least' a word / pointer to data. In the future we may
// choose to use the 'data' byte as a tag to identify the types of some
// well-known externals, such as a std::string instance.
*total_mem_usage +=
sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length;
*total_mem_usage += maybe_sub_size +
sizeof(cord_internal::CordRepExternalImpl<intptr_t>) +
rep->length;
return true;
}
return false;
@ -690,9 +694,11 @@ void Cord::InlineRep::AppendArray(absl::string_view src,
return;
}
if (cord_ring_enabled()) {
rep = ForceRing(rep, (src.size() - 1) / kMaxFlatLength + 1);
rep = CordRepRing::Append(rep->ring(), src);
if (btree_enabled()) {
// TODO(b/192061034): keep legacy 10% growth rate: consider other rates.
rep = ForceBtree(rep);
const size_t alloc_hint = (std::min)(kMaxFlatLength, rep->length / 10);
rep = CordRepBtree::Append(rep->btree(), src, alloc_hint);
} else {
// Use new block(s) for any remaining bytes that were not handled above.
// Alloc extra memory only if the right child of the root of the new tree
@ -769,9 +775,13 @@ inline void Cord::AppendImpl(C&& src) {
contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord);
}
void Cord::Append(const Cord& src) { AppendImpl(src); }
void Cord::Append(const Cord& src) {
AppendImpl(src);
}
void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); }
void Cord::Append(Cord&& src) {
AppendImpl(std::move(src));
}
template <typename T, Cord::EnableIfString<T>>
void Cord::Append(T&& src) {
@ -923,8 +933,10 @@ void Cord::RemovePrefix(size_t n) {
} else {
auto constexpr method = CordzUpdateTracker::kRemovePrefix;
CordzUpdateScope scope(contents_.cordz_info(), method);
if (tree->tag == RING) {
tree = CordRepRing::RemovePrefix(tree->ring(), n);
if (tree->tag == BTREE) {
CordRep* old = tree;
tree = tree->btree()->SubTree(n, tree->length - n);
CordRep::Unref(old);
} else {
CordRep* newrep = RemovePrefixFrom(tree, n);
CordRep::Unref(tree);
@ -944,8 +956,10 @@ void Cord::RemoveSuffix(size_t n) {
} else {
auto constexpr method = CordzUpdateTracker::kRemoveSuffix;
CordzUpdateScope scope(contents_.cordz_info(), method);
if (tree->tag == RING) {
tree = CordRepRing::RemoveSuffix(tree->ring(), n);
if (tree->tag == BTREE) {
CordRep* old = tree;
tree = tree->btree()->SubTree(0, tree->length - n);
CordRep::Unref(old);
} else {
CordRep* newrep = RemoveSuffixFrom(tree, n);
CordRep::Unref(tree);
@ -1037,9 +1051,8 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const {
return sub_cord;
}
if (tree->tag == RING) {
CordRepRing* ring = CordRep::Ref(tree)->ring();
tree = CordRepRing::SubRing(ring, pos, new_size);
if (tree->tag == BTREE) {
tree = tree->btree()->SubTree(pos, new_size);
} else {
tree = NewSubRange(tree, pos, new_size);
}
@ -1227,8 +1240,8 @@ bool ComputeCompareResult<bool>(int memcmp_res) {
} // namespace
// Helper routine. Locates the first flat chunk of the Cord without
// initializing the iterator.
// Helper routine. Locates the first flat or external chunk of the Cord without
// initializing the iterator, and returns a string_view referencing the data.
inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
if (!is_tree()) {
return absl::string_view(data_.as_chars(), data_.inline_size());
@ -1243,8 +1256,13 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
return absl::string_view(node->external()->base, node->length);
}
if (node->tag == RING) {
return node->ring()->entry_data(node->ring()->head());
if (node->tag == BTREE) {
CordRepBtree* tree = node->btree();
int height = tree->height();
while (--height >= 0) {
tree = tree->Edge(CordRepBtree::kFront)->btree();
}
return tree->Data(tree->begin());
}
// Walk down the left branches until we hit a non-CONCAT node.
@ -1506,22 +1524,21 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
return subcord;
}
if (ring_reader_) {
if (btree_reader_) {
size_t chunk_size = current_chunk_.size();
if (n <= chunk_size && n <= kMaxBytesToCopy) {
subcord = Cord(current_chunk_.substr(0, n), method);
if (n < chunk_size) {
current_chunk_.remove_prefix(n);
} else {
current_chunk_ = btree_reader_.Next();
}
} else {
auto* ring = CordRep::Ref(ring_reader_.ring())->ring();
size_t offset = ring_reader_.length() - bytes_remaining_;
CordRep* rep = CordRepRing::SubRing(ring, offset, n);
CordRep* rep;
current_chunk_ = btree_reader_.Read(n, chunk_size, rep);
subcord.contents_.EmplaceTree(rep, method);
}
if (n < chunk_size) {
bytes_remaining_ -= n;
current_chunk_.remove_prefix(n);
} else {
AdvanceBytesRing(n);
}
bytes_remaining_ -= n;
return subcord;
}
@ -1698,8 +1715,8 @@ char Cord::operator[](size_t i) const {
if (rep->tag >= FLAT) {
// Get the "i"th character directly from the flat array.
return rep->flat()->Data()[offset];
} else if (rep->tag == RING) {
return rep->ring()->GetCharacter(offset);
} else if (rep->tag == BTREE) {
return rep->btree()->GetCharacter(offset);
} else if (rep->tag == EXTERNAL) {
// Get the "i"th character from the external array.
return rep->external()->base[offset];
@ -1758,8 +1775,8 @@ absl::string_view Cord::FlattenSlowPath() {
} else if (rep->tag == EXTERNAL) {
*fragment = absl::string_view(rep->external()->base, rep->length);
return true;
} else if (rep->tag == RING) {
return rep->ring()->IsFlat(fragment);
} else if (rep->tag == BTREE) {
return rep->btree()->IsFlat(fragment);
} else if (rep->tag == SUBSTRING) {
CordRep* child = rep->substring()->child;
if (child->tag >= FLAT) {
@ -1770,9 +1787,9 @@ absl::string_view Cord::FlattenSlowPath() {
*fragment = absl::string_view(
child->external()->base + rep->substring()->start, rep->length);
return true;
} else if (child->tag == RING) {
return child->ring()->IsFlat(rep->substring()->start, rep->length,
fragment);
} else if (child->tag == BTREE) {
return child->btree()->IsFlat(rep->substring()->start, rep->length,
fragment);
}
}
return false;
@ -1781,7 +1798,7 @@ absl::string_view Cord::FlattenSlowPath() {
/* static */ void Cord::ForEachChunkAux(
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback) {
if (rep->tag == RING) {
if (rep->tag == BTREE) {
ChunkIterator it(rep), end;
while (it != end) {
callback(*it);
@ -1866,15 +1883,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
*os << absl::CEscape(std::string(rep->flat()->Data(), rep->length));
*os << "]\n";
} else {
assert(rep->tag == RING);
auto* ring = rep->ring();
*os << "RING, entries = " << ring->entries() << "\n";
CordRepRing::index_type head = ring->head();
do {
DumpNode(ring->entry_child(head), include_data, os,
indent + kIndentStep);
head = ring->advance(head);
} while (head != ring->tail());
CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
}
if (stack.empty()) break;
rep = stack.back();
@ -1967,15 +1976,18 @@ static bool VerifyNode(CordRep* root, CordRep* start_node,
}
next_node = right;
}
} else if (cur_node->tag == RING) {
total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity());
const CordRepRing* ring = cur_node->ring();
CordRepRing::index_type pos = ring->head(), tail = ring->tail();
do {
CordRep* node = ring->entry_child(pos);
assert(node->tag >= FLAT || node->tag == EXTERNAL);
RepMemoryUsageLeaf(node, &total_mem_usage);
} while ((pos = ring->advance(pos)) != tail);
} else if (cur_node->tag == BTREE) {
total_mem_usage += sizeof(CordRepBtree);
const CordRepBtree* node = cur_node->btree();
if (node->height() == 0) {
for (const CordRep* edge : node->Edges()) {
RepMemoryUsageLeaf(edge, &total_mem_usage);
}
} else {
for (const CordRep* edge : node->Edges()) {
tree_stack.push_back(edge);
}
}
} else {
// Since cur_node is not a leaf or a concat node it must be a substring.
assert(cur_node->tag == SUBSTRING);

@ -79,8 +79,9 @@
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cord_rep_ring_reader.h"
#include "absl/strings/internal/cordz_functions.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_statistics.h"
@ -370,8 +371,8 @@ class Cord {
private:
using CordRep = absl::cord_internal::CordRep;
using CordRepRing = absl::cord_internal::CordRepRing;
using CordRepRingReader = absl::cord_internal::CordRepRingReader;
using CordRepBtree = absl::cord_internal::CordRepBtree;
using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader;
// Stack of right children of concat nodes that we have to visit.
// Keep this at the end of the structure to avoid cache-thrashing.
@ -397,9 +398,9 @@ class Cord {
// Stack specific operator++
ChunkIterator& AdvanceStack();
// Ring buffer specific operator++
ChunkIterator& AdvanceRing();
void AdvanceBytesRing(size_t n);
// Btree specific operator++
ChunkIterator& AdvanceBtree();
void AdvanceBytesBtree(size_t n);
// Iterates `n` bytes, where `n` is expected to be greater than or equal to
// `current_chunk_.size()`.
@ -415,8 +416,8 @@ class Cord {
// The number of bytes left in the `Cord` over which we are iterating.
size_t bytes_remaining_ = 0;
// Cord reader for ring buffers. Empty if not traversing a ring buffer.
CordRepRingReader ring_reader_;
// Cord reader for cord btrees. Empty if not traversing a btree.
CordRepBtreeReader btree_reader_;
// See 'Stack' alias definition.
Stack stack_of_right_children_;
@ -1247,8 +1248,8 @@ inline bool Cord::StartsWith(absl::string_view rhs) const {
}
inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) {
if (tree->tag == cord_internal::RING) {
current_chunk_ = ring_reader_.Reset(tree->ring());
if (tree->tag == cord_internal::BTREE) {
current_chunk_ = btree_reader_.Init(tree->btree());
return;
}
@ -1271,20 +1272,20 @@ inline Cord::ChunkIterator::ChunkIterator(const Cord* cord)
}
}
inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceRing() {
current_chunk_ = ring_reader_.Next();
inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() {
current_chunk_ = btree_reader_.Next();
return *this;
}
inline void Cord::ChunkIterator::AdvanceBytesRing(size_t n) {
inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) {
assert(n >= current_chunk_.size());
bytes_remaining_ -= n;
if (bytes_remaining_) {
if (n == current_chunk_.size()) {
current_chunk_ = ring_reader_.Next();
current_chunk_ = btree_reader_.Next();
} else {
size_t offset = ring_reader_.length() - bytes_remaining_;
current_chunk_ = ring_reader_.Seek(offset);
size_t offset = btree_reader_.length() - bytes_remaining_;
current_chunk_ = btree_reader_.Seek(offset);
}
} else {
current_chunk_ = {};
@ -1297,7 +1298,7 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
assert(bytes_remaining_ >= current_chunk_.size());
bytes_remaining_ -= current_chunk_.size();
if (bytes_remaining_ > 0) {
return ring_reader_ ? AdvanceRing() : AdvanceStack();
return btree_reader_ ? AdvanceBtree() : AdvanceStack();
} else {
current_chunk_ = {};
}
@ -1339,7 +1340,7 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) {
if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) {
RemoveChunkPrefix(n);
} else if (n != 0) {
ring_reader_ ? AdvanceBytesRing(n) : AdvanceBytesSlowPath(n);
btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n);
}
}

@ -366,7 +366,7 @@ TEST(Cord, Subcord) {
absl::Cord a;
AppendWithFragments(s, &rng, &a);
ASSERT_EQ(s.size(), a.size());
ASSERT_EQ(s, std::string(a));
// Check subcords of a, from a variety of interesting points.
std::set<size_t> positions;

@ -31,6 +31,7 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled(
kCordEnableRingBufferDefault);
ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled(
kCordShallowSubcordsDefault);
ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false);
void CordRep::Destroy(CordRep* rep) {
assert(rep != nullptr);

@ -46,6 +46,12 @@ extern std::atomic<bool> cord_btree_enabled;
extern std::atomic<bool> cord_ring_buffer_enabled;
extern std::atomic<bool> shallow_subcords_enabled;
// `cord_btree_exhaustive_validation` can be set to force exhaustive validation
// in debug assertions, and code that calls `IsValid()` explicitly. By default,
// assertions should be relatively cheap and AssertValid() can easily lead to
// O(n^2) complexity as recursive / full tree validation is O(n).
extern std::atomic<bool> cord_btree_exhaustive_validation;
inline void enable_cord_btree(bool enable) {
cord_btree_enabled.store(enable, std::memory_order_relaxed);
}

@ -32,6 +32,8 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17
namespace {
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
@ -42,6 +44,10 @@ using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
inline bool exhaustive_validation() {
return cord_btree_exhaustive_validation.load(std::memory_order_relaxed);
}
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
// not specify 'depth' and leave it to its default (0) value.
@ -357,7 +363,7 @@ void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
Delete(tree);
}
bool CordRepBtree::IsValid(const CordRepBtree* tree) {
bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) {
#define NODE_CHECK_VALID(x) \
if (!(x)) { \
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
@ -389,9 +395,9 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree) {
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
if (tree->height() > 0) {
if ((!shallow || exhaustive_validation()) && tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree())) return false;
if (!IsValid(edge->btree(), shallow)) return false;
}
}
return true;
@ -402,16 +408,17 @@ bool CordRepBtree::IsValid(const CordRepBtree* tree) {
#ifndef NDEBUG
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
if (!IsValid(tree)) {
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
if (!IsValid(tree)) {
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool shallow) {
if (!IsValid(tree, shallow)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}

@ -266,10 +266,28 @@ class CordRepBtree : public CordRep {
// holding a FLAT or EXTERNAL child rep.
static bool IsDataEdge(const CordRep* rep);
// Diagnostics
static bool IsValid(const CordRepBtree* tree);
static CordRepBtree* AssertValid(CordRepBtree* tree);
static const CordRepBtree* AssertValid(const CordRepBtree* tree);
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
// node in `tree` and the cumulative length, type and height of the direct
// child nodes of `tree` are checked. The value of `shallow` is ignored if the
// internal `cord_btree_exhaustive_validation` diagnostics variable is true,
// in which case the performed validations works as if `shallow` were false.
// This function is intended for debugging and testing purposes only.
static bool IsValid(const CordRepBtree* tree, bool shallow = false);
// Diagnostics: asserts that the provided tree is valid.
// `AssertValid()` performs a shallow validation by default. `shallow` can be
// set to false in which case an exhaustive validation is performed. This
// function is implemented in terms of calling `IsValid()` and asserting the
// return value to be true. See `IsValid()` for more information.
// This function is intended for debugging and testing purposes only.
static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
static const CordRepBtree* AssertValid(const CordRepBtree* tree,
bool shallow = true);
// Diagnostics: dump the contents of this tree to `stream`.
// This function is intended for debugging and testing purposes only.
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
@ -834,11 +852,13 @@ inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool /* shallow */) {
return tree;
}

@ -24,6 +24,7 @@
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/cleanup/cleanup.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
@ -1346,6 +1347,48 @@ TEST(CordRepBtreeTest, AssertValid) {
CordRep::Unref(tree);
}
TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) {
// Restore exhaustive validation on any exit.
const bool exhaustive_validation = cord_btree_exhaustive_validation.load();
auto cleanup = absl::MakeCleanup([exhaustive_validation] {
cord_btree_exhaustive_validation.store(exhaustive_validation);
});
// Create a tree of at least 2 levels, and mess with the original flat, which
// should go undetected in shallow mode as the flat is too far away, but
// should be detected in forced non-shallow mode.
CordRep* flat = MakeFlat("abc");
CordRepBtree* tree = CordRepBtree::Create(flat);
constexpr size_t max_cap = CordRepBtree::kMaxCapacity;
const size_t n = max_cap * max_cap * 2;
for (size_t i = 0; i < n; ++i) {
tree = CordRepBtree::Append(tree, MakeFlat("Hello world"));
}
flat->length = 100;
cord_btree_exhaustive_validation.store(false);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_TRUE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
CordRepBtree::AssertValid(tree);
CordRepBtree::AssertValid(tree, true);
#if defined(GTEST_HAS_DEATH_TEST)
EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*");
#endif
cord_btree_exhaustive_validation.store(true);
EXPECT_FALSE(CordRepBtree::IsValid(tree));
EXPECT_FALSE(CordRepBtree::IsValid(tree, true));
EXPECT_FALSE(CordRepBtree::IsValid(tree, false));
#if defined(GTEST_HAS_DEATH_TEST)
EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree), ".*");
EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, true), ".*");
#endif
flat->length = 3;
CordRep::Unref(tree);
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END

@ -19,6 +19,7 @@
#include "absl/container/inlined_vector.h"
#include "absl/debugging/stacktrace.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
@ -83,19 +84,23 @@ class CordRepAnalyzer {
// Process all top level linear nodes (substrings and flats).
repref = CountLinearReps(repref, memory_usage_);
// We should have have either a concat or ring node node if not null.
if (repref.rep != nullptr) {
assert(repref.rep->tag == RING || repref.rep->tag == CONCAT);
if (repref.rep->tag == RING) {
AnalyzeRing(repref);
} else if (repref.rep->tag == BTREE) {
AnalyzeBtree(repref);
} else if (repref.rep->tag == CONCAT) {
AnalyzeConcat(repref);
} else {
// We should have either a concat, btree, or ring node if not null.
assert(false);
}
}
// Adds values to output
statistics_.estimated_memory_usage += memory_usage_.total;
statistics_.estimated_fair_share_memory_usage += memory_usage_.fair_share;
statistics_.estimated_fair_share_memory_usage +=
static_cast<size_t>(memory_usage_.fair_share);
}
private:
@ -117,13 +122,13 @@ class CordRepAnalyzer {
// Memory usage values
struct MemoryUsage {
size_t total = 0;
size_t fair_share = 0;
double fair_share = 0.0;
// Adds 'size` memory usage to this class, with a cumulative (recursive)
// reference count of `refcount`
void Add(size_t size, size_t refcount) {
total += size;
fair_share += size / refcount;
fair_share += static_cast<double>(size) / refcount;
}
};
@ -215,28 +220,32 @@ class CordRepAnalyzer {
}
}
// Counts the provided ring buffer child into `child_usage`.
void CountRingChild(const CordRep* child, MemoryUsage& child_usage) {
RepRef rep{child, static_cast<size_t>(child->refcount.Get())};
rep = CountLinearReps(rep, child_usage);
assert(rep.rep == nullptr);
}
// Analyzes the provided ring. As ring buffers can have many child nodes, the
// effect of rounding errors can become non trivial, so we compute the totals
// first at the ring level, and then divide the fair share of the total
// including children fair share totals.
// Analyzes the provided ring.
void AnalyzeRing(RepRef rep) {
statistics_.node_count++;
statistics_.node_counts.ring++;
MemoryUsage ring_usage;
const CordRepRing* ring = rep.rep->ring();
ring_usage.Add(CordRepRing::AllocSize(ring->capacity()), 1);
memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount);
ring->ForEach([&](CordRepRing::index_type pos) {
CountRingChild(ring->entry_child(pos), ring_usage);
CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_);
});
memory_usage_.total += ring_usage.total;
memory_usage_.fair_share += ring_usage.fair_share / rep.refcount;
}
// Analyzes the provided btree.
void AnalyzeBtree(RepRef rep) {
statistics_.node_count++;
statistics_.node_counts.btree++;
memory_usage_.Add(sizeof(CordRepBtree), rep.refcount);
const CordRepBtree* tree = rep.rep->btree();
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
AnalyzeBtree(rep.Child(edge));
}
} else {
for (CordRep* edge : tree->Edges()) {
CountLinearReps(rep.Child(edge), memory_usage_);
}
}
}
CordzStatistics& statistics_;

@ -21,6 +21,7 @@
#include "absl/base/config.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_info.h"
@ -42,6 +43,8 @@ inline void PrintTo(const CordzStatistics& stats, std::ostream* s) {
namespace {
using ::testing::Ge;
// Creates a flat of the specified allocated size
CordRepFlat* Flat(size_t size) {
// Round up to a tag size, as we are going to poke an exact tag size back into
@ -134,8 +137,8 @@ size_t SizeOf(const CordRepRing* rep) {
}
// Computes fair share memory used in a naive 'we dare to recurse' way.
size_t FairShare(CordRep* rep, size_t ref = 1) {
size_t self = 0, children = 0;
double FairShareImpl(CordRep* rep, size_t ref) {
double self = 0.0, children = 0.0;
ref *= rep->refcount.Get();
if (rep->tag >= FLAT) {
self = SizeOf(rep->flat());
@ -143,22 +146,32 @@ size_t FairShare(CordRep* rep, size_t ref = 1) {
self = SizeOf(rep->external());
} else if (rep->tag == SUBSTRING) {
self = SizeOf(rep->substring());
children = FairShare(rep->substring()->child, ref);
children = FairShareImpl(rep->substring()->child, ref);
} else if (rep->tag == BTREE) {
self = SizeOf(rep->btree());
for (CordRep*edge : rep->btree()->Edges()) {
children += FairShareImpl(edge, ref);
}
} else if (rep->tag == RING) {
self = SizeOf(rep->ring());
rep->ring()->ForEach([&](CordRepRing::index_type i) {
self += FairShare(rep->ring()->entry_child(i));
self += FairShareImpl(rep->ring()->entry_child(i), 1);
});
} else if (rep->tag == CONCAT) {
self = SizeOf(rep->concat());
children = FairShare(rep->concat()->left, ref) +
FairShare(rep->concat()->right, ref);
children = FairShareImpl(rep->concat()->left, ref) +
FairShareImpl(rep->concat()->right, ref);
} else {
assert(false);
}
return self / ref + children;
}
// Returns the fair share memory size from `ShareFhareImpl()` as a size_t.
size_t FairShare(CordRep* rep, size_t ref = 1) {
return static_cast<size_t>(FairShareImpl(rep, ref));
}
// Samples the cord and returns CordzInfo::GetStatistics()
CordzStatistics SampleCord(CordRep* rep) {
InlineData cord(rep);
@ -191,6 +204,7 @@ MATCHER_P(EqStatistics, stats, "Statistics equal expected values") {
STATS_MATCHER_EXPECT_EQ(node_counts.concat);
STATS_MATCHER_EXPECT_EQ(node_counts.substring);
STATS_MATCHER_EXPECT_EQ(node_counts.ring);
STATS_MATCHER_EXPECT_EQ(node_counts.btree);
STATS_MATCHER_EXPECT_EQ(estimated_memory_usage);
STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage);
@ -424,6 +438,103 @@ TEST(CordzInfoStatisticsTest, SharedSubstringRing) {
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeLeaf) {
ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3));
RefHelper ref;
auto* flat1 = Flat(2000);
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
auto* external = External(3000);
CordRepBtree* tree = CordRepBtree::Create(flat1);
tree = CordRepBtree::Append(tree, substr);
tree = CordRepBtree::Append(tree, external);
size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
size_t flat3_size = 0;
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
flat3_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
ref.NeedsUnref(tree);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(substr) +
flat3_size + SizeOf(external);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1 + 3 + 1 + flat3_count;
expected.node_counts.flat = 2 + flat3_count;
expected.node_counts.flat_128 = flat3_count;
expected.node_counts.flat_256 = 1;
expected.node_counts.external = 1;
expected.node_counts.substring = 1;
expected.node_counts.btree = 1;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeNodeShared) {
RefHelper ref;
static constexpr int leaf_count = 3;
const size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
ASSERT_THAT(flat3_count, Ge(0));
CordRepBtree* tree = nullptr;
size_t mem_size = 0;
for (int i = 0; i < leaf_count; ++i) {
auto* flat1 = ref.Ref(Flat(2000), 9);
mem_size += SizeOf(flat1);
if (i == 0) {
tree = CordRepBtree::Create(flat1);
} else {
tree = CordRepBtree::Append(tree, flat1);
}
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
mem_size += SizeOf(flat2) + SizeOf(substr);
tree = CordRepBtree::Append(tree, substr);
auto* external = External(30);
mem_size += SizeOf(external);
tree = CordRepBtree::Append(tree, external);
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
mem_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
if (i == 0) {
mem_size += SizeOf(tree);
} else {
mem_size += SizeOf(tree->Edges().back()->btree());
}
}
ref.NeedsUnref(tree);
// Ref count: 2 for top (add 1), 5 for leaf 0 (add 4).
ref.Ref(tree, 1);
ref.Ref(tree->Edges().front(), 4);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + mem_size;
expected.estimated_fair_share_memory_usage = FairShare(tree);
expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count);
expected.node_counts.flat = leaf_count * (2 + flat3_count);
expected.node_counts.flat_128 = leaf_count * flat3_count;
expected.node_counts.flat_256 = leaf_count;
expected.node_counts.external = leaf_count;
expected.node_counts.substring = leaf_count;
expected.node_counts.btree = 1 + leaf_count;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, ThreadSafety) {
Notification stop;
static constexpr int kNumThreads = 8;
@ -471,9 +582,15 @@ TEST(CordzInfoStatisticsTest, ThreadSafety) {
CordRep::Unref(cord.as_tree());
cord.set_inline_size(0);
} else {
// 50/50 Ring or Flat coin toss
// Coin toss to 25% ring, 25% btree, and 50% flat.
CordRep* rep = Flat(256);
rep = (coin_toss(gen) != 0) ? CordRepRing::Create(rep) : rep;
if (coin_toss(gen) != 0) {
if (coin_toss(gen) != 0) {
rep = CordRepRing::Create(rep);
} else {
rep = CordRepBtree::Create(rep);
}
}
cord.make_tree(rep);
// 50/50 sample

@ -40,6 +40,7 @@ struct CordzStatistics {
size_t substring = 0; // #substring reps
size_t concat = 0; // #concat reps
size_t ring = 0; // #ring buffer reps
size_t btree = 0; // #btree reps
};
// The size of the cord in bytes. This matches the result of Cord::size().
@ -61,6 +62,8 @@ struct CordzStatistics {
// The total number of nodes referenced by this cord.
// For ring buffer Cords, this includes the 'ring buffer' node.
// For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all
// the substring, flat and external nodes referenced by the tree.
// A value of 0 implies the property has not been recorded.
int64_t node_count = 0;

@ -198,9 +198,9 @@ class string_view {
// Implicit constructor of a `string_view` from NUL-terminated `str`. When
// accepting possibly null strings, use `absl::NullSafeStringView(str)`
// instead (see below).
// The length check is skipped since it is unnecessary and causes code bloat.
constexpr string_view(const char* str) // NOLINT(runtime/explicit)
: ptr_(str),
length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {}
: ptr_(str), length_(str ? StrlenInternal(str) : 0) {}
// Implicit constructor of a `string_view` from a `const char*` and length.
constexpr string_view(const char* data, size_type len)

Loading…
Cancel
Save