Export of internal Abseil changes

--
b20c539b8e21fee7d4d908a8a26a317a3de9d993 by Martijn Vels <mvels@google.com>:

Add CordRepBtree implementation

PiperOrigin-RevId: 385679196

--
96f7753b7af5fd964537d5794dd597bb6e698071 by Derek Mauro <dmauro@google.com>:

Update Abseil dependencies

PiperOrigin-RevId: 385643956

--
67bdae4c686f0df09cc7155633c03218bf23d177 by Abseil Team <absl-team@google.com>:

Fix up some small typos in error messages.

PiperOrigin-RevId: 385625107
GitOrigin-RevId: b20c539b8e21fee7d4d908a8a26a317a3de9d993
Change-Id: I8f602cfe9f7878b0558359ab15efb048caefb3a5
pull/994/head
Abseil Team 4 years ago committed by vslashg
parent 33541e7510
commit d61843e531
  1. 2
      CMake/AbseilDll.cmake
  2. 25
      WORKSPACE
  3. 8
      absl/base/internal/thread_identity.h
  4. 32
      absl/strings/BUILD.bazel
  5. 35
      absl/strings/CMakeLists.txt
  6. 4
      absl/strings/internal/cord_internal.cc
  7. 947
      absl/strings/internal/cord_rep_btree.cc
  8. 851
      absl/strings/internal/cord_rep_btree.h
  9. 1352
      absl/strings/internal/cord_rep_btree_test.cc
  10. 185
      absl/strings/internal/cord_rep_test_util.h
  11. 2
      ci/cmake_common.sh

@ -205,6 +205,8 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/internal/cord_internal.h"
"strings/internal/cord_rep_consume.h"
"strings/internal/cord_rep_consume.cc"
"strings/internal/cord_rep_btree.cc"
"strings/internal/cord_rep_btree.h"
"strings/internal/cord_rep_flat.h"
"strings/internal/cord_rep_ring.cc"
"strings/internal/cord_rep_ring.h"

@ -15,29 +15,30 @@
#
workspace(name = "com_google_absl")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
name = "com_google_googletest", # 2021-07-09T13:28:13Z
sha256 = "12ef65654dc01ab40f6f33f9d02c04f2097d2cd9fbe48dc6001b29543583b0ad",
strip_prefix = "googletest-8d51ffdfab10b3fba636ae69bc03da4b54f8c235",
# Keep this URL in sync with ABSL_GOOGLETEST_COMMIT in ci/cmake_common.sh.
urls = ["https://github.com/google/googletest/archive/5bcd8e3bb929714e031a542d303f818e5a5af45d.zip"], # 2021-06-08T22:36:38Z
strip_prefix = "googletest-5bcd8e3bb929714e031a542d303f818e5a5af45d",
sha256 = "3adecb6686ac7367452561dca518fad5a990fb09c5a961bfa1836f15eb774348",
urls = ["https://github.com/google/googletest/archive/8d51ffdfab10b3fba636ae69bc03da4b54f8c235.zip"],
)
# Google benchmark.
http_archive(
name = "com_github_google_benchmark",
urls = ["https://github.com/google/benchmark/archive/7d0d9061d83b663ce05d9de5da3d5865a3845b79.zip"], # 2021-05-11T11:56:00Z
strip_prefix = "benchmark-7d0d9061d83b663ce05d9de5da3d5865a3845b79",
sha256 = "a07789754963e3ea3a1e13fed3a4d48fac0c5f7f749c5065f6c30cd2c1529661",
name = "com_github_google_benchmark", # 2021-07-01T09:02:54Z
sha256 = "1cb4b97a90aa1fd9c8e412a6bc29fc13fc140162a4a0db3811af40befd8c9ea5",
strip_prefix = "benchmark-e451e50e9b8af453f076dec10bd6890847f1624e",
urls = ["https://github.com/google/benchmark/archive/e451e50e9b8af453f076dec10bd6890847f1624e.zip"],
)
# C++ rules for Bazel.
http_archive(
name = "rules_cc",
urls = ["https://github.com/bazelbuild/rules_cc/archive/ab5395627c80e025e824bd005d41f96b20618b9d.zip"], # 2021-05-10T15:35:04Z
strip_prefix = "rules_cc-ab5395627c80e025e824bd005d41f96b20618b9d",
sha256 = "f3908cb40a6577ab0d1ef9e00052739bf69b4313fa7d20b4d61d4521ea67abf3",
name = "rules_cc", # 2021-06-07T16:41:49Z
sha256 = "b295cad8c5899e371dde175079c0a2cdc0151f5127acc92366a8c986beb95c76",
strip_prefix = "rules_cc-daf6ace7cfeacd6a83e9ff2ed659f416537b6c74",
urls = ["https://github.com/bazelbuild/rules_cc/archive/daf6ace7cfeacd6a83e9ff2ed659f416537b6c74.zip"],
)

@ -188,25 +188,25 @@ void ClearCurrentThreadIdentity();
// May be chosen at compile time via: -DABSL_FORCE_THREAD_IDENTITY_MODE=<mode
// index>
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC
#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_POSIX_SETSPECIFIC 0
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_TLS
#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_TLS cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_TLS 1
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE_USE_CPP11
#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE_USE_CPP11 cannot be directly set
#else
#define ABSL_THREAD_IDENTITY_MODE_USE_CPP11 2
#endif
#ifdef ABSL_THREAD_IDENTITY_MODE
#error ABSL_THREAD_IDENTITY_MODE cannot be direcly set
#error ABSL_THREAD_IDENTITY_MODE cannot be directly set
#elif defined(ABSL_FORCE_THREAD_IDENTITY_MODE)
#define ABSL_THREAD_IDENTITY_MODE ABSL_FORCE_THREAD_IDENTITY_MODE
#elif defined(_WIN32) && !defined(__MINGW32__)

@ -269,11 +269,13 @@ cc_library(
name = "cord_internal",
srcs = [
"internal/cord_internal.cc",
"internal/cord_rep_btree.cc",
"internal/cord_rep_consume.cc",
"internal/cord_rep_ring.cc",
],
hdrs = [
"internal/cord_internal.h",
"internal/cord_rep_btree.h",
"internal/cord_rep_consume.h",
"internal/cord_rep_flat.h",
"internal/cord_rep_ring.h",
@ -296,6 +298,23 @@ cc_library(
"//absl/container:layout",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
"//absl/types:span",
],
)
cc_test(
name = "cord_rep_btree_test",
size = "medium",
srcs = ["internal/cord_rep_btree_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord_internal",
":cord_rep_test_util",
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
"@com_google_googletest//:gtest_main",
],
)
@ -577,6 +596,19 @@ cc_library(
],
)
cc_library(
name = "cord_rep_test_util",
testonly = 1,
hdrs = ["internal/cord_rep_test_util.h"],
copts = ABSL_DEFAULT_COPTS,
deps = [
":cord_internal",
":strings",
"//absl/base:config",
"//absl/base:raw_logging_internal",
],
)
cc_library(
name = "cordz_test_helpers",
testonly = 1,

@ -555,12 +555,14 @@ absl_cc_library(
cord_internal
HDRS
"internal/cord_internal.h"
"internal/cord_rep_btree.h"
"internal/cord_rep_consume.h"
"internal/cord_rep_flat.h"
"internal/cord_rep_ring.h"
"internal/cord_rep_ring_reader.h"
SRCS
"internal/cord_internal.cc"
"internal/cord_rep_btree.cc"
"internal/cord_rep_consume.cc"
"internal/cord_rep_ring.cc"
COPTS
@ -847,6 +849,21 @@ absl_cc_library(
PUBLIC
)
absl_cc_library(
NAME
cord_rep_test_util
HDRS
"internal/cord_rep_test_util.h"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::config
absl::cord_internal
absl::raw_logging_internal
absl::strings
TESTONLY
)
absl_cc_library(
NAME
cord_test_helpers
@ -922,6 +939,24 @@ absl_cc_test(
GTest::gmock_main
)
absl_cc_test(
NAME
cord_rep_btree_test
SRCS
"internal/cord_rep_btree_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::base
absl::config
absl::cord_internal
absl::cord_rep_test_util
absl::core_headers
absl::raw_logging_internal
absl::strings
GTest::gmock_main
)
absl_cc_test(
NAME
cord_ring_test

@ -18,6 +18,7 @@
#include <memory>
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_ring.h"
@ -50,6 +51,9 @@ void CordRep::Destroy(CordRep* rep) {
rep = left;
continue;
}
} else if (rep->tag == BTREE) {
CordRepBtree::Destroy(rep->btree());
rep = nullptr;
} else if (rep->tag == RING) {
CordRepRing::Destroy(rep->ring());
rep = nullptr;

@ -0,0 +1,947 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree.h"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth];
using EdgeType = CordRepBtree::EdgeType;
using OpResult = CordRepBtree::OpResult;
using CopyResult = CordRepBtree::CopyResult;
constexpr auto kFront = CordRepBtree::kFront;
constexpr auto kBack = CordRepBtree::kBack;
// Implementation of the various 'Dump' functions.
// Prints the entire tree structure or 'rep'. External callers should
// not specify 'depth' and leave it to its default (0) value.
// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
int depth = 0) {
// Allow for full height trees + substring -> flat / external nodes.
assert(depth <= CordRepBtree::kMaxDepth + 2);
std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne()
? std::string("Private")
: absl::StrCat("Shared(", rep->refcount.Get(), ")");
std::string sptr = absl::StrCat("0x", absl::Hex(rep));
// Dumps the data contents of `rep` if `include_contents` is true.
// Always emits a new line character.
auto maybe_dump_data = [&stream, include_contents](const CordRep* r) {
if (include_contents) {
// Allow for up to 60 wide display of content data, which with some
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
<< CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
};
// For each level, we print the 'shared/private' state and the rep pointer,
// indented by two spaces per recursive depth.
stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") ";
if (rep->tag == BTREE) {
const CordRepBtree* node = rep->btree();
std::string label =
node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf";
stream << label << ", len = " << node->length
<< ", begin = " << node->begin() << ", end = " << node->end()
<< "\n";
for (CordRep* edge : node->Edges()) {
DumpAll(edge, include_contents, stream, depth + 1);
}
} else if (rep->tag == SUBSTRING) {
const CordRepSubstring* substring = rep->substring();
stream << "Substring, len = " << rep->length
<< ", start = " << substring->start;
maybe_dump_data(rep);
DumpAll(substring->child, include_contents, stream, depth + 1);
} else if (rep->tag >= FLAT) {
stream << "Flat, len = " << rep->length;
maybe_dump_data(rep);
} else if (rep->tag == EXTERNAL) {
stream << "Extn, len = " << rep->length;
maybe_dump_data(rep);
}
}
// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
// small data out of large reps, and general efficiency of 'always copy small
// data'. Consider making this a cord rep internal library function.
CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) {
assert(n != 0);
assert(offset + n <= rep->length);
assert(offset != 0 || n != rep->length);
if (rep->tag == SUBSTRING) {
CordRepSubstring* substring = rep->substring();
offset += substring->start;
rep = CordRep::Ref(substring->child);
CordRep::Unref(substring);
}
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = rep;
return substring;
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) {
if (n == rep->length) return rep;
if (n == 0) return CordRep::Unref(rep), nullptr;
return CreateSubstring(rep, offset, n);
}
// TODO(b/192061034): consider making this a cord rep library function.
inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
if (offset == 0) return rep;
return CreateSubstring(rep, offset, rep->length - offset);
}
template <EdgeType edge_type>
inline absl::string_view Consume(absl::string_view s, size_t n) {
return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n);
}
template <EdgeType edge_type>
inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) {
if (edge_type == kBack) {
memcpy(dst, s.data(), n);
return s.substr(n);
} else {
const size_t offset = s.size() - n;
memcpy(dst, s.data() + offset, n);
return s.substr(0, offset);
}
}
// Known issue / optimization weirdness: the store associated with the
// decrement introduces traffic between cpus (even if the result of that
// traffic does nothing), making this faster than a single call to
// refcount.Decrement() checking the zero refcount condition.
template <typename R, typename Fn>
inline void FastUnref(R* r, Fn&& fn) {
if (r->refcount.IsOne()) {
fn(r);
} else if (!r->refcount.DecrementExpectHighRefcount()) {
fn(r);
}
}
// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
// node, or a SUBSTRING of an EXTERNAL or FLAT node.
void DeleteLeafEdge(CordRep* rep) {
for (;;) {
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
return;
}
if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep->external());
return;
}
assert(rep->tag == SUBSTRING);
CordRepSubstring* substring = rep->substring();
rep = substring->child;
assert(rep->tag == EXTERNAL || rep->tag >= FLAT);
delete substring;
if (rep->refcount.Decrement()) return;
}
}
// StackOperations contains the logic to build a left-most or right-most stack
// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
// propagate node changes up the stack.
template <EdgeType edge_type>
struct StackOperations {
// Returns true if the node at 'depth' is not shared, i.e. has a refcount
// of one and all of its parent nodes have a refcount of one.
inline bool owned(int depth) const { return depth < share_depth; }
// Returns the node at 'depth'.
inline CordRepBtree* node(int depth) const { return stack[depth]; }
// Builds a `depth` levels deep stack starting at `tree` recording which nodes
// are private in the form of the 'share depth' where nodes are shared.
inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) {
assert(depth <= tree->height());
int current_depth = 0;
while (current_depth < depth && tree->refcount.IsOne()) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0);
while (current_depth < depth) {
stack[current_depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
return tree;
}
// Builds a stack with the invariant that all nodes are private owned / not
// shared. This is used in iterative updates where a previous propagation
// guaranteed all nodes are owned / private.
inline void BuildOwnedStack(CordRepBtree* tree, int height) {
assert(height <= CordRepBtree::kMaxHeight);
int depth = 0;
while (depth < height) {
assert(tree->refcount.IsOne());
stack[depth++] = tree;
tree = tree->Edge(edge_type)->btree();
}
assert(tree->refcount.IsOne());
share_depth = depth + 1;
}
// Processes the final 'top level' result action for the tree.
// See the 'Action' enum for the various action implications.
static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) {
switch (result.action) {
case CordRepBtree::kPopped:
if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) {
ABSL_RAW_LOG(FATAL, "Max height exceeded");
}
return edge_type == kBack ? CordRepBtree::New(tree, result.tree)
: CordRepBtree::New(result.tree, tree);
case CordRepBtree::kCopied:
CordRep::Unref(tree);
ABSL_FALLTHROUGH_INTENDED;
case CordRepBtree::kSelf:
return result.tree;
}
ABSL_INTERNAL_UNREACHABLE;
return result.tree;
}
// Propagate the action result in 'result' up into all nodes of the stack
// starting at depth 'depth'. 'length' contains the extra length of data that
// was added at the lowest level, and is updated into all nodes of the stack.
// See the 'Action' enum for the various action implications.
// If 'propagate' is true, then any copied node values are updated into the
// stack, which is used for iterative processing on the same stack.
template <bool propagate = false>
inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
// TODO(mvels): revisit the below code to check if 3 loops with 3
// (incremental) conditions is faster than 1 loop with a switch.
// Benchmarking and perf recordings indicate the loop with switch is
// fastest, likely because of indirect jumps on the tight case values and
// dense branches. But it's worth considering 3 loops, as the `action`
// transitions are mono directional. E.g.:
// while (action == kPopped) {
// ...
// }
// while (action == kCopied) {
// ...
// }
// ...
// We also found that an "if () do {}" loop here seems faster, possibly
// because it allows the branch predictor more granular heuristics on
// 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
// which appear to be the most common use cases.
if (depth != 0) {
do {
CordRepBtree* node = stack[--depth];
const bool owned = depth < share_depth;
switch (result.action) {
case CordRepBtree::kPopped:
assert(!propagate);
result = node->AddEdge<edge_type>(owned, result.tree, length);
break;
case CordRepBtree::kCopied:
result = node->SetEdge<edge_type>(owned, result.tree, length);
if (propagate) stack[depth] = result.tree;
break;
case CordRepBtree::kSelf:
node->length += length;
while (depth > 0) {
node = stack[--depth];
node->length += length;
}
return node;
}
} while (depth > 0);
}
return Finalize(tree, result);
}
// Invokes `Unwind` with `propagate=true` to update the stack node values.
inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length,
OpResult result) {
return Unwind</*propagate=*/true>(tree, depth, length, result);
}
// `share_depth` contains the depth at which the nodes in the stack become
// shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
// then `share_depth` is 0. If the 2nd node is shared (and implicitly all
// nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
// than the depth of the stack indicates that none of the nodes in the stack
// are shared.
int share_depth;
NodeStack stack;
};
} // namespace
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream) {
stream << "===================================\n";
if (!label.empty()) {
stream << label << '\n';
stream << "-----------------------------------\n";
}
if (rep) {
DumpAll(rep, include_contents, stream);
} else {
stream << "NULL\n";
}
}
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream) {
Dump(rep, label, false, stream);
}
void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) {
Dump(rep, absl::string_view(), false, stream);
}
void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge, DeleteLeafEdge);
}
Delete(tree);
}
void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin,
size_t end) {
for (CordRep* edge : tree->Edges(begin, end)) {
FastUnref(edge->btree(), Destroy);
}
Delete(tree);
}
bool CordRepBtree::IsValid(const CordRepBtree* tree) {
#define NODE_CHECK_VALID(x) \
if (!(x)) { \
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
return false; \
}
#define NODE_CHECK_EQ(x, y) \
if ((x) != (y)) { \
ABSL_RAW_LOG(ERROR, \
"CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
#y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \
return false; \
}
NODE_CHECK_VALID(tree != nullptr);
NODE_CHECK_EQ(tree->tag, BTREE);
NODE_CHECK_VALID(tree->height() <= kMaxHeight);
NODE_CHECK_VALID(tree->begin() < tree->capacity());
NODE_CHECK_VALID(tree->end() <= tree->capacity());
NODE_CHECK_VALID(tree->begin() <= tree->end());
size_t child_length = 0;
for (CordRep* edge : tree->Edges()) {
NODE_CHECK_VALID(edge != nullptr);
if (tree->height() > 0) {
NODE_CHECK_VALID(edge->tag == BTREE);
NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1);
} else {
NODE_CHECK_VALID(IsDataEdge(edge));
}
child_length += edge->length;
}
NODE_CHECK_EQ(child_length, tree->length);
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
if (!IsValid(edge->btree())) return false;
}
}
return true;
#undef NODE_CHECK_VALID
#undef NODE_CHECK_EQ
}
#ifndef NDEBUG
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
if (!IsValid(tree)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
if (!IsValid(tree)) {
Dump(tree, "CordRepBtree validation failed:", false, std::cout);
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED");
}
return tree;
}
#endif // NDEBUG
template <EdgeType edge_type>
inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) {
if (size() >= kMaxCapacity) return {New(edge), kPopped};
OpResult result = ToOpResult(owned);
result.tree->Add<edge_type>(edge);
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
OpResult result;
const size_t idx = index(edge_type);
if (owned) {
result = {this, kSelf};
CordRep::Unref(edges_[idx]);
} else {
// Create a copy containing all unchanged edges. Unchanged edges are the
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
result = {CopyRaw(), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
}
}
result.tree->edges_[idx] = edge;
result.tree->length += delta;
return result;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) {
const int depth = tree->height();
const size_t length = rep->length;
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
const OpResult result =
leaf->AddEdge<edge_type>(ops.owned(depth), rep, length);
return ops.Unwind(tree, depth, length, result);
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t end = 0;
const size_t cap = leaf->capacity();
while (!data.empty() && end != cap) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[end++] = flat;
data = Consume<kBack>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_end(end);
return leaf;
}
template <>
CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data,
size_t extra) {
CordRepBtree* leaf = CordRepBtree::New(0);
size_t length = 0;
size_t begin = leaf->capacity();
leaf->set_end(leaf->capacity());
while (!data.empty() && begin != 0) {
auto* flat = CordRepFlat::New(data.length() + extra);
flat->length = (std::min)(data.length(), flat->Capacity());
length += flat->length;
leaf->edges_[--begin] = flat;
data = Consume<kFront>(flat->Data(), data, flat->length);
}
leaf->length = length;
leaf->set_begin(begin);
return leaf;
}
template <>
absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignBegin();
const size_t cap = capacity();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[fetch_add_end(1)] = flat;
data = Consume<kBack>(flat->Data(), data, n);
} while (!data.empty() && end() != cap);
return data;
}
template <>
absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data,
size_t extra) {
assert(!data.empty());
assert(size() < capacity());
AlignEnd();
do {
CordRepFlat* flat = CordRepFlat::New(data.length() + extra);
const size_t n = (std::min)(data.length(), flat->Capacity());
flat->length = n;
edges_[sub_fetch_begin(1)] = flat;
data = Consume<kFront>(flat->Data(), data, n);
} while (!data.empty() && begin() != 0);
return data;
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data,
size_t extra) {
if (ABSL_PREDICT_FALSE(data.empty())) return tree;
const size_t original_data_size = data.size();
int depth = tree->height();
StackOperations<edge_type> ops;
CordRepBtree* leaf = ops.BuildStack(tree, depth);
// If there is capacity in the last edge, append as much data
// as possible into this last edge.
if (leaf->size() < leaf->capacity()) {
OpResult result = leaf->ToOpResult(ops.owned(depth));
data = result.tree->AddData<edge_type>(data, extra);
if (data.empty()) {
result.tree->length += original_data_size;
return ops.Unwind(tree, depth, original_data_size, result);
}
// We added some data into this leaf, but not all. Propagate the added
// length to the top most node, and rebuild the stack with any newly copied
// or updated nodes. From this point on, the path (leg) from the top most
// node to the right-most node towards the leaf node is privately owned.
size_t delta = original_data_size - data.size();
assert(delta > 0);
result.tree->length += delta;
tree = ops.Propagate(tree, depth, delta, result);
ops.share_depth = depth + 1;
}
// We were unable to append all data into the existing right-most leaf node.
// This means all remaining data must be put into (a) new leaf node(s) which
// we append to the tree. To make this efficient, we iteratively build full
// leaf nodes from `data` until the created leaf contains all remaining data.
// We utilize the `Unwind` method to merge the created leaf into the first
// level towards root that has capacity. On each iteration with remaining
// data, we rebuild the stack in the knowledge that right-most nodes are
// privately owned after the first `Unwind` completes.
for (;;) {
OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped};
if (result.tree->length == data.size()) {
return ops.Unwind(tree, depth, result.tree->length, result);
}
data = Consume<edge_type>(data, result.tree->length);
tree = ops.Unwind(tree, depth, result.tree->length, result);
depth = tree->height();
ops.BuildOwnedStack(tree, depth);
}
}
template <EdgeType edge_type>
CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) {
assert(dst->height() >= src->height());
// Capture source length as we may consume / destroy `src`.
const size_t length = src->length;
// We attempt to merge `src` at its corresponding height in `dst`.
const int depth = dst->height() - src->height();
StackOperations<edge_type> ops;
CordRepBtree* merge_node = ops.BuildStack(dst, depth);
// If there is enough space in `merge_node` for all edges from `src`, add all
// edges to this node, making a fresh copy as needed if not privately owned.
// If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
// `Finalize` to merge `src` into the first level towards `root` where there
// is capacity for another edge, or create a new top level node.
OpResult result;
if (merge_node->size() + src->size() <= kMaxCapacity) {
result = merge_node->ToOpResult(ops.owned(depth));
result.tree->Add<edge_type>(src->Edges());
result.tree->length += src->length;
if (src->refcount.IsOne()) {
Delete(src);
} else {
for (CordRep* edge : src->Edges()) CordRep::Ref(edge);
CordRepBtree::Unref(src);
}
} else {
result = {src, kPopped};
}
// Unless we merged at the top level (i.e.: src and dst are equal height),
// unwind the result towards the top level, and finalize the result.
if (depth) {
return ops.Unwind(dst, depth, length, result);
}
return ops.Finalize(dst, result);
}
CopyResult CordRepBtree::CopySuffix(size_t offset) {
assert(offset < this->length);
// As long as `offset` starts inside the last edge, we can 'drop' the current
// depth. For the most extreme example: if offset references the last data
// edge in the tree, there is only a single edge / path from the top of the
// tree to that last edge, so we can drop all the nodes except that edge.
// The fast path check for this is `back->length >= length - offset`.
int height = this->height();
CordRepBtree* node = this;
size_t len = node->length - offset;
CordRep* back = node->Edge(kBack);
while (back->length >= len) {
offset = back->length - len;
if (--height < 0) {
return {MakeSubstring(CordRep::Ref(back), offset), height};
}
node = back->btree();
back = node->Edge(kBack);
}
if (offset == 0) return {CordRep::Ref(node), height};
// Offset does not point into the last edge, so we span at least two edges.
// Find the index of offset with `IndexBeyond` which provides us the edge
// 'beyond' the offset if offset is not a clean starting point of an edge.
Position pos = node->IndexBeyond(offset);
CordRepBtree* sub = node->CopyToEndFrom(pos.index, len);
const CopyResult result = {sub, height};
// `pos.n` contains a non zero value if the offset is not an exact starting
// point of an edge. In this case, `pos.n` contains the 'trailing' amount of
// bytes of the edge preceding that in `pos.index`. We need to iteratively
// adjust the preceding edge with the 'broken' offset until we have a perfect
// start of the edge.
while (pos.n != 0) {
assert(pos.index >= 1);
const size_t begin = pos.index - 1;
sub->set_begin(begin);
CordRep* const edge = node->Edge(begin);
len = pos.n;
offset = edge->length - len;
if (--height < 0) {
sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len);
return result;
}
node = edge->btree();
pos = node->IndexBeyond(offset);
CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len);
sub->edges_[begin] = nsub;
sub = nsub;
}
sub->set_begin(pos.index);
return result;
}
CopyResult CordRepBtree::CopyPrefix(size_t n) {
assert(n > 0);
assert(n <= this->length);
// As long as `n` does not exceed the length of the first edge, we can 'drop'
// the current depth. For the most extreme example: if we'd copy a 1 byte
// prefix from a tree, there is only a single edge / path from the top of the
// tree to the single data edge containing this byte, so we can drop all the
// nodes except the data node.
int height = this->height();
CordRepBtree* node = this;
CordRep* front = node->Edge(kFront);
while (front->length >= n) {
if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1};
node = front->btree();
front = node->Edge(kFront);
}
if (node->length == n) return {CordRep::Ref(node), height};
// `n` spans at least two nodes, find the end point of the span.
Position pos = node->IndexOf(n);
// Create a partial copy of the node up to `pos.index`, with a defined length
// of `n`. Any 'partial last edge' is added further below as needed.
CordRepBtree* sub = node->CopyBeginTo(pos.index, n);
const CopyResult result = {sub, height};
// `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
// this is not zero, we don't have a 'clean cut', so we need to make a
// (partial) copy of that last edge, and repeat this until pos.n is zero.
while (pos.n != 0) {
size_t end = pos.index;
n = pos.n;
CordRep* edge = node->Edge(pos.index);
if (--height < 0) {
sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n);
sub->set_end(end);
AssertValid(result.edge->btree());
return result;
}
node = edge->btree();
pos = node->IndexOf(n);
CordRepBtree* nsub = node->CopyBeginTo(pos.index, n);
sub->edges_[end++] = nsub;
sub->set_end(end);
sub = nsub;
}
sub->set_end(pos.index);
AssertValid(result.edge->btree());
return result;
}
CordRep* CordRepBtree::SubTree(size_t offset, size_t n) {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return nullptr;
CordRepBtree* node = this;
int height = node->height();
Position front = node->IndexOf(offset);
CordRep* left = node->edges_[front.index];
while (front.n + n <= left->length) {
if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n);
node = left->btree();
front = node->IndexOf(front.n);
left = node->edges_[front.index];
}
const Position back = node->IndexBefore(front, n);
CordRep* const right = node->edges_[back.index];
assert(back.index > front.index);
// Get partial suffix and prefix entries.
CopyResult prefix;
CopyResult suffix;
if (height > 0) {
// Copy prefix and suffix of the boundary nodes.
prefix = left->btree()->CopySuffix(front.n);
suffix = right->btree()->CopyPrefix(back.n);
// If there is an edge between the prefix and suffix edges, then the tree
// must remain at its previous (full) height. If we have no edges between
// prefix and suffix edges, then the tree must be as high as either the
// suffix or prefix edges (which are collapsed to their minimum heights).
if (front.index + 1 == back.index) {
height = (std::max)(prefix.height, suffix.height) + 1;
}
// Raise prefix and suffixes to the new tree height.
for (int h = prefix.height + 1; h < height; ++h) {
prefix.edge = CordRepBtree::New(prefix.edge);
}
for (int h = suffix.height + 1; h < height; ++h) {
suffix.edge = CordRepBtree::New(suffix.edge);
}
} else {
// Leaf node, simply take substrings for prefix and suffix.
prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1};
suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1};
}
// Compose resulting tree.
CordRepBtree* sub = CordRepBtree::New(height);
size_t end = 0;
sub->edges_[end++] = prefix.edge;
for (CordRep* r : node->Edges(front.index + 1, back.index)) {
sub->edges_[end++] = CordRep::Ref(r);
}
sub->edges_[end++] = suffix.edge;
sub->set_end(end);
sub->length = n;
return AssertValid(sub);
}
CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left,
CordRepBtree* right) {
return left->height() >= right->height() ? Merge<kBack>(left, right)
: Merge<kFront>(right, left);
}
bool CordRepBtree::IsFlat(absl::string_view* fragment) const {
if (height() == 0 && size() == 1) {
if (fragment) *fragment = Data(begin());
return true;
}
return false;
}
bool CordRepBtree::IsFlat(size_t offset, const size_t n,
absl::string_view* fragment) const {
assert(n <= this->length);
assert(offset <= this->length - n);
if (ABSL_PREDICT_FALSE(n == 0)) return false;
int height = this->height();
const CordRepBtree* node = this;
for (;;) {
const Position front = node->IndexOf(offset);
const CordRep* edge = node->Edge(front.index);
if (edge->length < front.n + n) return false;
if (--height < 0) {
if (fragment) *fragment = EdgeData(edge).substr(front.n, n);
return true;
}
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
char CordRepBtree::GetCharacter(size_t offset) const {
assert(offset < length);
const CordRepBtree* node = this;
int height = node->height();
for (;;) {
Position front = node->IndexOf(offset);
if (--height < 0) return node->Data(front.index)[front.n];
offset = front.n;
node = node->Edge(front.index)->btree();
}
}
Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) {
// The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
assert(height() >= 4);
assert(refcount.IsOne());
// Build a stack of nodes we may potentially need to update if we find a
// non-shared FLAT with capacity at the leaf level.
const int depth = height();
CordRepBtree* node = this;
CordRepBtree* stack[kMaxDepth];
for (int i = 0; i < depth; ++i) {
node = node->Edge(kBack)->btree();
if (!node->refcount.IsOne()) return {};
stack[i] = node;
}
// Must be a privately owned flat.
CordRep* const edge = node->Edge(kBack);
if (!edge->refcount.IsOne() || edge->tag < FLAT) return {};
// Must have capacity.
const size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
// Build span on remaining capacity.
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
this->length += delta;
for (int i = 0; i < depth; ++i) {
stack[i]->length += delta;
}
return span;
}
CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) {
if (rep->tag == BTREE) return rep->btree();
CordRepBtree* node = nullptr;
auto consume = [&node](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
if (node == nullptr) {
node = New(r);
} else {
node = CordRepBtree::AddCordRep<kBack>(node, r);
}
};
Consume(rep, consume);
return node;
}
CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
return MergeTrees(tree, rep->btree());
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kBack>(tree, r);
};
Consume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) {
return MergeTrees(rep->btree(), tree);
}
auto consume = [&tree](CordRep* r, size_t offset, size_t length) {
r = MakeSubstring(r, offset, length);
tree = CordRepBtree::AddCordRep<kFront>(tree, r);
};
ReverseConsume(rep, consume);
return tree;
}
CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kBack>(tree, data, extra);
}
CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data,
size_t extra) {
return CordRepBtree::AddData<kFront>(tree, data, extra);
}
template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree,
CordRep* rep);
template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree,
absl::string_view data,
size_t extra);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

@ -0,0 +1,851 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#include <cassert>
#include <cstdint>
#include <iosfwd>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
// Data is stored at the leaf level only, non leaf nodes contain down pointers
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
// or EXTERNAL nodes. The implementation allows for data to be added to either
// end of the tree only, it does not provide any 'insert' logic. This has the
// benefit that we can expect good fill ratios: all nodes except the outer
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
// methods. Merged trees will typically have a fill ratio well above 50% as in a
// similar fashion, one side of the merged tree will typically have a 100% fill
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
// better, and the tree never needs balancing.
//
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
// input unless explicitly stated otherwise. All functions returning a CordRep*
// or CordRepBtree* instance transfer a reference back to the caller.
// Simplified, callers both 'donate' and 'consume' a reference count on each
// call, simplifying the API. An example of building a tree:
//
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
//
// In the above example, all inputs are consumed, making each call affecting
// `tree` reference count neutral. The returned `tree` value can be different
// from the input if the input is shared with other threads, or if the tree
// grows in height, but callers typically never have to concern themselves with
// that and trust that all methods DTRT at all times.
class CordRepBtree : public CordRep {
public:
// EdgeType identifies `front` and `back` enum values.
// Various implementations in CordRepBtree such as `Add` and `Edge` are
// generic and templated on operating on either of the boundary edges.
// For more information on the possible edges contained in a CordRepBtree
// instance see the documentation for `edges_`.
enum class EdgeType { kFront, kBack };
// Convenience constants into `EdgeType`
static constexpr EdgeType kFront = EdgeType::kFront;
static constexpr EdgeType kBack = EdgeType::kBack;
// Maximum number of edges: based on experiments and performance data, we can
// pick suitable values resulting in optimum cacheline aligned values. The
// preferred values are based on 64-bit systems where we aim to align this
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
// TODO(b/192061034): experiment with alternative sizes.
static constexpr size_t kMaxCapacity = 6;
// Reasonable maximum height of the btree. We can expect a fill ratio of at
// least 50%: trees are always expanded at the front or back. Concatenating
// trees will then typically fold at the top most node, where the lower nodes
// are at least at capacity on one side of joined inputs. At a lower fill
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
// We will fail / abort if an application ever exceeds this height, which
// should be extremely rare (near impossible) and be an indication of an
// application error: we do not assume it reasonable for any application to
// operate correctly with such monster trees.
// Another compelling reason for the number `12` is that any contextual stack
// required for navigation or insertion requires 12 words and 12 bytes, which
// fits inside 2 cache lines with some room to spare, and is reasonable as a
// local stack variable compared to Cord's current near 400 bytes stack use.
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
// values start with a value of 0 for leaf nodes.
static constexpr int kMaxDepth = 12;
static constexpr int kMaxHeight = kMaxDepth - 1;
// `Action` defines the action for unwinding changes done at the btree's leaf
// level that need to be propagated up to the parent node(s). Each operation
// on a node has an effect / action defined as follows:
// - kSelf
// The operation (add / update, etc) was performed directly on the node as
// the node is private to the current thread (i.e.: not shared directly or
// indirectly through a refcount > 1). Changes can be propagated directly to
// all parent nodes as all parent nodes are also then private to the current
// thread.
// - kCopied
// The operation (add / update, etc) was performed on a copy of the original
// node, as the node is (potentially) directly or indirectly shared with
// other threads. Changes need to be propagated into the parent nodes where
// the old down pointer must be unreffed and replaced with this new copy.
// Such changes to parent nodes may themselves require a copy if the parent
// node is also shared. A kCopied action can propagate all the way to the
// top node where we then must unref the `tree` input provided by the
// caller, and return the new copy.
// - kPopped
// The operation (typically add) could not be satisfied due to insufficient
// capacity in the targeted node, and a new 'leg' was created that needs to
// be added into the parent node. For example, adding a FLAT inside a leaf
// node that is at capacity will create a new leaf node containing that
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
// action propagating all the way to the top of the tree will result in
// the tree becoming one level higher than the current tree through a final
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
// referencing the old tree and the new (fully popped upwards) 'leg'.
enum Action { kSelf, kCopied, kPopped };
// Result of an operation on a node. See the `Action` enum for details.
struct OpResult {
CordRepBtree* tree;
Action action;
};
// Return value of the CopyPrefix and CopySuffix methods which can
// return a node or data edge at any height inside the tree.
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
struct CopyResult {
CordRep* edge;
int height;
};
// Logical position inside a node:
// - index: index of the edge.
// - n: size or offset value depending on context.
struct Position {
size_t index;
size_t n;
};
// Creates a btree from the given input. Adopts a ref of `rep`.
// If the input `rep` is itself a btree, i.e., `tag == BTREE`, then this
// function immediately returns `rep->btree()`. If the input is a valid data
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
// tree, and the input is consumed and transformed into a btree().
static CordRepBtree* Create(CordRep* rep);
// Destroys the provided tree. Should only be called by cord internal API's,
// typically after a ref_count.Decrement() on the last reference count.
static void Destroy(CordRepBtree* tree);
// Appends / Prepends an existing CordRep instance to this tree.
// The below methods accept three types of input:
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
// `rep` is appended or prepended to this tree 'as is'.
// 2) `rep` is a BTREE.
// `rep` is merged into `tree` respecting the Append/Prepend order.
// 3) `rep` is some other (legacy) type.
// `rep` is converted in place and added to `tree`
// Requires `tree` and `rep` to be not null.
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
// Append/Prepend the data in `data` to this tree.
// The `extra` parameter defines how much extra capacity should be allocated
// for any additional FLAT being allocated. This is an optimization hint from
// the caller. For example, a caller may need to add 2 string_views of data
// "abc" and "defghi" which are not consecutive. The caller can in this case
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
// where possible with at least 6 bytes of extra capacity beyond `length`.
// This helps avoiding data getting fragmented over multiple flats.
// There is no limit on the size of `data`. If `data` can not be stored inside
// a single flat, then the function will iteratively add flats until all data
// has been consumed and appended or prepended to the tree.
static CordRepBtree* Append(CordRepBtree* tree, string_view data,
size_t extra = 0);
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
size_t extra = 0);
// Returns a new tree, containing `n` bytes of data from this instance
// starting at offset `offset`. Where possible, the returned tree shares
// (re-uses) data edges and nodes with this instance to minimize the
// combined memory footprint of both trees.
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
CordRep* SubTree(size_t offset, size_t n);
// Returns the character at the given offset.
char GetCharacter(size_t offset) const;
// Returns true if this node holds a single data edge, and if so, sets
// `fragment` to reference the contained data. `fragment` is an optional
// output parameter and allowed to be null.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data of `n` bytes starting at offset `offset`
// is contained in a single data edge, and if so, sets fragment to reference
// the contained data. `fragment` is an optional output parameter and allowed
// to be null.
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
// Returns a span (mutable range of bytes) of up to `size` bytes into the
// last FLAT data edge inside this tree under the following conditions:
// - none of the nodes down into the FLAT node are shared.
// - the last data edge in this tree is a non-shared FLAT.
// - the referenced FLAT has additional capacity available.
// If all these conditions are met, a non-empty span is returned, and the
// length of the flat node and involved tree nodes have been increased by
// `span.length()`. The caller is responsible for immediately assigning values
// to all uninitialized data reference by the returned span.
// Requires `this->refcount.IsOne()`: this function forces the caller to do
// this fast path check on the top level node, as this is the most commonly
// shared node of a cord tree.
Span<char> GetAppendBuffer(size_t size);
// Returns the `height` of the tree. The height of a tree is limited to
// kMaxHeight. `height` is implemented as an `int` as in some places we
// use negative (-1) values for 'data edges'.
int height() const { return static_cast<int>(storage[0]); }
// Properties: begin, back, end, front/back boundary indexes.
size_t begin() const { return static_cast<size_t>(storage[1]); }
size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
size_t end() const { return static_cast<size_t>(storage[2]); }
size_t index(EdgeType edge) const {
return edge == kFront ? begin() : back();
}
// Properties: size and capacity.
// `capacity` contains the current capacity of this instance, where
// `kMaxCapacity` contains the maximum capacity of a btree node.
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
// change in the future if we see benefit in dynamically sizing 'small' nodes
// to 'large' nodes for large data trees.
size_t size() const { return end() - begin(); }
size_t capacity() const { return kMaxCapacity; }
// Edge access
inline CordRep* Edge(size_t index) const;
inline CordRep* Edge(EdgeType edge_type) const;
inline absl::Span<CordRep* const> Edges() const;
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
// Returns reference to the data edge at `index`.
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
static const char* EdgeDataPtr(const CordRep* r);
static absl::string_view EdgeData(const CordRep* r);
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep.
static bool IsDataEdge(const CordRep* rep);
// Diagnostics
static bool IsValid(const CordRepBtree* tree);
static CordRepBtree* AssertValid(CordRepBtree* tree);
static const CordRepBtree* AssertValid(const CordRepBtree* tree);
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream);
// Adds the edge `edge` to this node if possible. `owned` indicates if the
// current node is potentially shared or not with other threads. Returns:
// - {kSelf, <this>}
// The edge was directly added to this node.
// - {kCopied, <node>}
// The edge was added to a copy of this node.
// - {kPopped, New(edge, height())}
// A new leg with the edge was created as this node has no extra capacity.
template <EdgeType edge_type>
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
// Replaces the front or back edge with the provided new edge. Returns:
// - {kSelf, <this>}
// The edge was directly set in this node. The old edge is unreffed.
// - {kCopied, <node>}
// A copy of this node was created with the new edge value.
// In both cases, the function adopts a reference on `edge`.
template <EdgeType edge_type>
OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
// Creates a new empty node at the specified height.
static CordRepBtree* New(int height = 0);
// Creates a new node containing `rep`, with the height being computed
// automatically based on the type of `rep`.
static CordRepBtree* New(CordRep* rep);
// Creates a new node containing both `front` and `back` at height
// `front.height() + 1`. Requires `back.height() == front.height()`.
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
private:
CordRepBtree() = default;
~CordRepBtree() = default;
// Initializes the main properties `tag`, `begin`, `end`, `height`.
inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
// Direct property access begin / end
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
// Decreases the value of `begin` by `n`, and returns the new value. Notice
// how this returns the new value unlike atomic::fetch_add which returns the
// old value. This is because this is used to prepend edges at 'begin - 1'.
size_t sub_fetch_begin(size_t n) {
storage[1] -= static_cast<uint8_t>(n);
return storage[1];
}
// Increases the value of `end` by `n`, and returns the previous value. This
// function is typically used to append edges at 'end'.
size_t fetch_add_end(size_t n) {
const uint8_t current = storage[2];
storage[2] = static_cast<uint8_t>(current + n);
return current;
}
// Returns the index of the last edge starting on, or before `offset`, with
// `n` containing the relative offset of `offset` inside that edge.
// Requires `offset` < length.
Position IndexOf(size_t offset) const;
// Returns the index of the last edge starting before `offset`, with `n`
// containing the relative offset of `offset` inside that edge.
// This function is useful to find the edges for some span of bytes ending at
// `offset` (i.e., `n` bytes). For example:
//
// Position pos = IndexBefore(n)
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
//
// Requires 0 < `offset` <= length.
Position IndexBefore(size_t offset) const;
// Identical to the above function except starting from the position `front`.
// This function is equivalent to `IndexBefore(front.n + offset)`, with
// the difference that this function is optimized to start at `front.index`.
Position IndexBefore(Position front, size_t offset) const;
// Returns the index of the edge directly beyond the edge containing offset
// `offset`, with `n` containing the distance of that edge from `offset`.
// This function is useful for iteratively finding suffix nodes and remaining
// partial bytes in left-most suffix nodes as for example in CopySuffix.
// Requires `offset` < length.
Position IndexBeyond(size_t offset) const;
// Destruction
static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end);
static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end);
static void Delete(CordRepBtree* tree) { delete tree; }
// Creates a new leaf node containing as much data as possible from `data`.
// The data is added either forwards or reversed depending on `edge_type`.
// Callers must check the length of the returned node to determine if all data
// was copied or not.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
// Creates a raw copy of this Btree node, copying all properties, but
// without adding any references to existing edges.
CordRepBtree* CopyRaw() const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
// Creates a partial copy of this Btree node, copying all edges up to `end`,
// adding a reference on each copied edge, and sets the length of the newly
// created copy to `new_length`.
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
// Creates a partial copy of this Btree node, copying all edges starting at
// `begin`, adding a reference on each copied edge, and sets the length of
// the newly created copy to `new_length`.
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
// Returns a tree containing the result of appending `right` to `left`.
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
// deal with legacy / non conforming input, i.e.: CONCAT trees.
static CordRepBtree* CreateSlow(CordRep* rep);
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
// Aligns existing edges to start at index 0, to allow for a new edge to be
// added to the back of the current edges.
inline void AlignBegin();
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
// added in front of the current edges.
inline void AlignEnd();
// Adds the provided edge to this node.
// Requires this node to have capacity for the edge. Realigns / moves
// existing edges as needed to prepend or append the new edge.
template <EdgeType edge_type>
inline void Add(CordRep* rep);
// Adds the provided edges to this node.
// Requires this node to have capacity for the edges. Realigns / moves
// existing edges as needed to prepend or append the new edges.
template <EdgeType edge_type>
inline void Add(absl::Span<CordRep* const>);
// Adds data from `data` to this node until either all data has been consumed,
// or there is no more capacity for additional flat nodes inside this node.
// Requires the current node to be a leaf node, data to be non empty, and the
// current node to have capacity for at least one more data edge.
// Returns any remaining data from `data` that was not added, which is
// depending on the edge type (front / back) either the remaining prefix of
// suffix of the input.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
absl::string_view AddData(absl::string_view data, size_t extra);
// Replace the front or back edge with the provided value.
// Adopts a reference on `edge` and unrefs the old edge.
template <EdgeType edge_type>
inline void SetEdge(CordRep* edge);
// Returns a partial copy of the current tree containing the first `n` bytes
// of data. `CopyResult` contains both the resulting edge and its height. The
// resulting tree may be less high than the current tree, or even be a single
// matching data edge. For example, if `n == 1`, then the result will be the
// single data edge, and height will be set to -1 (one below the owning leaf
// node). If n == 0, this function returns null.
// Requires `n <= length`
CopyResult CopyPrefix(size_t n);
// Returns a partial copy of the current tree containing all data starting
// after `offset`. `CopyResult` contains both the resulting edge and its
// height. The resulting tree may be less high than the current tree, or even
// be a single matching data edge. For example, if `n == length - 1`, then the
// result will be a single data edge, and height will be set to -1 (one below
// the owning leaf node).
// Requires `offset < length`
CopyResult CopySuffix(size_t offset);
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
// depending on the value of `owned`.
inline OpResult ToOpResult(bool owned);
// Adds `rep` to the specified tree, returning the modified tree.
template <EdgeType edge_type>
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
// Adds `data` to the specified tree, returning the modified tree.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
size_t extra = 0);
// Merges `src` into `dst` with `src` being added either before (kFront) or
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
// equal to the height of `src`.
template <EdgeType edge_type>
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
// implements an inlined version for trees of limited height (3 levels),
// GetAppendBufferSlow implements the logic for large trees.
Span<char> GetAppendBufferSlow(size_t size);
// `edges_` contains all edges starting from this instance.
// These are explicitly `child` edges only, a cord btree (or any cord tree in
// that respect) does not store `parent` pointers anywhere: multiple trees /
// parents can reference the same shared child edge. The type of these edges
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
CordRep* edges_[kMaxCapacity];
friend class CordRepBtreeTestPeer;
friend class CordRepBtreeNavigator;
};
inline CordRepBtree* CordRep::btree() {
assert(tag == BTREE);
return static_cast<CordRepBtree*>(this);
}
inline const CordRepBtree* CordRep::btree() const {
assert(tag == BTREE);
return static_cast<const CordRepBtree*>(this);
}
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
tag = BTREE;
storage[0] = height;
storage[1] = begin;
storage[2] = end;
}
inline CordRep* CordRepBtree::Edge(size_t index) const {
assert(index >= begin());
assert(index < end());
return edges_[index];
}
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
return edges_[edge_type == kFront ? begin() : back()];
}
inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
return {edges_ + begin(), size()};
}
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
size_t end) const {
assert(begin <= end);
assert(begin >= this->begin());
assert(end <= this->end());
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
assert(IsDataEdge(r));
size_t offset = 0;
if (r->tag == SUBSTRING) {
offset = r->substring()->start;
r = r->substring()->child;
}
return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
}
inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
return absl::string_view(EdgeDataPtr(r), r->length);
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path the SUBSTRING check to optimize for the hot path.
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
if (rep->tag == SUBSTRING) rep = rep->substring()->child;
return rep->tag == EXTERNAL || rep->tag >= FLAT;
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
tree->InitInstance(height);
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
CordRepBtree* tree = new CordRepBtree;
int height = rep->tag == BTREE ? rep->btree()->height() + 1 : 0;
tree->length = rep->length;
tree->InitInstance(height, /*begin=*/0, /*end=*/1);
tree->edges_[0] = rep;
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
CordRepBtree* back) {
assert(front->height() == back->height());
CordRepBtree* tree = new CordRepBtree;
tree->length = front->length + back->length;
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
tree->edges_[0] = front;
tree->edges_[1] = back;
return tree;
}
inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin,
size_t end) {
if (tree->height() == 0) {
DestroyLeaf(tree, begin, end);
} else {
DestroyNonLeaf(tree, begin, end);
}
}
inline void CordRepBtree::Destroy(CordRepBtree* tree) {
DestroyTree(tree, tree->begin(), tree->end());
}
inline CordRepBtree* CordRepBtree::CopyRaw() const {
auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
new (&tree->refcount) Refcount;
return tree;
}
inline CordRepBtree* CordRepBtree::Copy() const {
CordRepBtree* tree = CopyRaw();
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
CordRepBtree* tree = CopyRaw();
tree->length = new_length;
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline void CordRepBtree::AlignBegin() {
// The below code itself does not need to be fast as typically we have
// mono-directional append/prepend calls, and `begin` / `end` are typically
// adjusted no more than once. But we want to avoid potential register clobber
// effects, making the compiler emit register save/store/spills, and minimize
// the size of code.
const size_t delta = begin();
if (ABSL_PREDICT_FALSE(delta != 0)) {
const size_t new_end = end() - delta;
set_begin(0);
set_end(new_end);
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
// size, and then do overlapping load/store of up to 4 pointers (inlined as
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
// compact and b) not clobbering any registers.
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = 0; i < new_end; ++i) {
edges_[i] = edges_[i + delta];
}
}
}
inline void CordRepBtree::AlignEnd() {
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
const size_t delta = capacity() - end();
if (delta != 0) {
const size_t new_begin = begin() + delta;
const size_t new_end = end() + delta;
set_begin(new_begin);
set_end(new_end);
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = new_end - 1; i >= new_begin; --i) {
edges_[i] = edges_[i - delta];
}
}
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
AlignBegin();
edges_[fetch_add_end(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(
absl::Span<CordRep* const> edges) {
AlignBegin();
size_t new_end = end();
for (CordRep* edge : edges) edges_[new_end++] = edge;
set_end(new_end);
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
AlignEnd();
edges_[sub_fetch_begin(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(
absl::Span<CordRep* const> edges) {
AlignEnd();
size_t new_begin = begin() - edges.size();
set_begin(new_begin);
for (CordRep* edge : edges) edges_[new_begin++] = edge;
}
template <CordRepBtree::EdgeType edge_type>
inline void CordRepBtree::SetEdge(CordRep* edge) {
const int idx = edge_type == kFront ? begin() : back();
CordRep::Unref(edges_[idx]);
edges_[idx] = edge;
}
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
}
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
assert(offset < length);
size_t index = begin();
while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
assert(offset > 0);
assert(offset <= length);
size_t index = begin();
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
size_t offset) const {
size_t index = front.index;
offset = offset + front.n;
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBeyond(
const size_t offset) const {
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
// track the offset of the `current edge` in `off`, which we increase as we
// iterate over the edges until we find the matching edge.
size_t off = 0;
size_t index = begin();
while (offset > off) off += edges_[index++]->length;
return {index, off - offset};
}
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
if (IsDataEdge(rep)) return New(rep);
return CreateSlow(rep);
}
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
CordRepBtree* tree = this;
const int height = this->height();
CordRepBtree* n1 = tree;
CordRepBtree* n2 = tree;
CordRepBtree* n3 = tree;
switch (height) {
case 3:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n2 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 2:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n1 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 1:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
ABSL_FALLTHROUGH_INTENDED;
case 0:
CordRep* edge = tree->Edge(kBack);
if (!edge->refcount.IsOne()) return {};
if (edge->tag < FLAT) return {};
size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
switch (height) {
case 3:
n3->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 2:
n2->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 1:
n1->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 0:
tree->length += delta;
return span;
}
break;
}
return GetAppendBufferSlow(size);
}
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
CordRepBtree* tree, CordRep* rep);
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
CordRepBtree* tree, CordRep* rep);
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kBack>(tree, rep);
}
return AppendSlow(tree, rep);
}
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kFront>(tree, rep);
}
return PrependSlow(tree, rep);
}
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) {
return tree;
}
#endif
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_

File diff suppressed because it is too large Load Diff

@ -0,0 +1,185 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#include <cassert>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cordrep_testing {
inline cord_internal::CordRepSubstring* MakeSubstring(
size_t start, size_t len, cord_internal::CordRep* rep) {
auto* sub = new cord_internal::CordRepSubstring;
sub->tag = cord_internal::SUBSTRING;
sub->start = start;
sub->length = len <= 0 ? rep->length - start + len : len;
sub->child = rep;
return sub;
}
inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left,
cord_internal::CordRep* right,
int depth = 0) {
auto* concat = new cord_internal::CordRepConcat;
concat->tag = cord_internal::CONCAT;
concat->length = left->length + right->length;
concat->left = left;
concat->right = right;
concat->set_depth(depth);
return concat;
}
inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) {
assert(value.length() <= cord_internal::kMaxFlatLength);
auto* flat = cord_internal::CordRepFlat::New(value.length());
flat->length = value.length();
memcpy(flat->Data(), value.data(), value.length());
return flat;
}
// Creates an external node for testing
inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) {
struct Rep : public cord_internal::CordRepExternal {
std::string s;
explicit Rep(absl::string_view sv) : s(sv) {
this->tag = cord_internal::EXTERNAL;
this->base = s.data();
this->length = s.length();
this->releaser_invoker = [](cord_internal::CordRepExternal* self) {
delete static_cast<Rep*>(self);
};
}
};
return new Rep(s);
}
inline std::string CreateRandomString(size_t n) {
absl::string_view data =
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789~!@#$%^&*()_+=-<>?:\"{}[]|";
std::minstd_rand rnd;
std::uniform_int_distribution<size_t> dist(0, data.size() - 1);
std::string s(n, ' ');
for (size_t i = 0; i < n; ++i) {
s[i] = data[dist(rnd)];
}
return s;
}
// Creates an array of flats from the provided string, chopping
// the provided string up into flats of size `chunk_size` characters
// resulting in roughly `data.size() / chunk_size` total flats.
inline std::vector<cord_internal::CordRep*> CreateFlatsFromString(
absl::string_view data, size_t chunk_size) {
assert(chunk_size > 0);
std::vector<cord_internal::CordRep*> flats;
for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) {
flats.push_back(MakeFlat(s.substr(0, chunk_size)));
}
return flats;
}
inline cord_internal::CordRepBtree* CordRepBtreeFromFlats(
absl::Span<cord_internal::CordRep* const> flats) {
assert(!flats.empty());
auto* node = cord_internal::CordRepBtree::Create(flats[0]);
for (size_t i = 1; i < flats.size(); ++i) {
node = cord_internal::CordRepBtree::Append(node, flats[i]);
}
return node;
}
inline void CordToString(cord_internal::CordRep* rep, std::string& s) {
size_t offset = 0;
size_t length = rep->length;
while (rep->tag == cord_internal::SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
if (rep->tag == cord_internal::BTREE) {
for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
CordToString(edge, s);
}
} else if (rep->tag >= cord_internal::FLAT) {
s.append(rep->flat()->Data() + offset, length);
} else if (rep->tag == cord_internal::EXTERNAL) {
s.append(rep->external()->base + offset, length);
} else {
ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag);
}
}
inline std::string CordToString(cord_internal::CordRep* rep) {
std::string s;
s.reserve(rep->length);
CordToString(rep, s);
return s;
}
// RAII Helper class to automatically unref reps on destruction.
class AutoUnref {
public:
~AutoUnref() {
for (CordRep* rep : unrefs_) CordRep::Unref(rep);
}
// Adds `rep` to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Add(CordRepType* rep) {
unrefs_.push_back(rep);
return rep;
}
// Increments the reference count of `rep` by one, and adds it to
// the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Ref(CordRepType* rep) {
unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
// Increments the reference count of `rep` by one if `condition` is true,
// and adds it to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* RefIf(bool condition, CordRepType* rep) {
if (condition) unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
private:
using CordRep = absl::cord_internal::CordRep;
std::vector<CordRep*> unrefs_;
};
} // namespace cordrep_testing
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_

@ -14,7 +14,7 @@
# The commit of GoogleTest to be used in the CMake tests in this directory.
# Keep this in sync with the commit in the WORKSPACE file.
readonly ABSL_GOOGLETEST_COMMIT="5bcd8e3bb929714e031a542d303f818e5a5af45d"
readonly ABSL_GOOGLETEST_COMMIT="8d51ffdfab10b3fba636ae69bc03da4b54f8c235"
# Avoid depending on GitHub by looking for a cached copy of the commit first.
if [[ -r "${KOKORO_GFILE_DIR:-}/distdir/${ABSL_GOOGLETEST_COMMIT}.zip" ]]; then

Loading…
Cancel
Save