-- b20c539b8e21fee7d4d908a8a26a317a3de9d993 by Martijn Vels <mvels@google.com>: Add CordRepBtree implementation PiperOrigin-RevId: 385679196 -- 96f7753b7af5fd964537d5794dd597bb6e698071 by Derek Mauro <dmauro@google.com>: Update Abseil dependencies PiperOrigin-RevId: 385643956 -- 67bdae4c686f0df09cc7155633c03218bf23d177 by Abseil Team <absl-team@google.com>: Fix up some small typos in error messages. PiperOrigin-RevId: 385625107 GitOrigin-RevId: b20c539b8e21fee7d4d908a8a26a317a3de9d993 Change-Id: I8f602cfe9f7878b0558359ab15efb048caefb3a5pull/994/head
parent
33541e7510
commit
d61843e531
11 changed files with 3426 additions and 17 deletions
@ -0,0 +1,947 @@ |
||||
// Copyright 2021 The Abseil Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/strings/internal/cord_rep_btree.h" |
||||
|
||||
#include <cassert> |
||||
#include <cstdint> |
||||
#include <iostream> |
||||
#include <string> |
||||
|
||||
#include "absl/base/attributes.h" |
||||
#include "absl/base/config.h" |
||||
#include "absl/base/internal/raw_logging.h" |
||||
#include "absl/strings/internal/cord_internal.h" |
||||
#include "absl/strings/internal/cord_rep_consume.h" |
||||
#include "absl/strings/internal/cord_rep_flat.h" |
||||
#include "absl/strings/str_cat.h" |
||||
#include "absl/strings/string_view.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace cord_internal { |
||||
|
||||
namespace { |
||||
|
||||
using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; |
||||
using EdgeType = CordRepBtree::EdgeType; |
||||
using OpResult = CordRepBtree::OpResult; |
||||
using CopyResult = CordRepBtree::CopyResult; |
||||
|
||||
constexpr auto kFront = CordRepBtree::kFront; |
||||
constexpr auto kBack = CordRepBtree::kBack; |
||||
|
||||
// Implementation of the various 'Dump' functions.
|
||||
// Prints the entire tree structure or 'rep'. External callers should
|
||||
// not specify 'depth' and leave it to its default (0) value.
|
||||
// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node.
|
||||
void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, |
||||
int depth = 0) { |
||||
// Allow for full height trees + substring -> flat / external nodes.
|
||||
assert(depth <= CordRepBtree::kMaxDepth + 2); |
||||
std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne() |
||||
? std::string("Private") |
||||
: absl::StrCat("Shared(", rep->refcount.Get(), ")"); |
||||
std::string sptr = absl::StrCat("0x", absl::Hex(rep)); |
||||
|
||||
// Dumps the data contents of `rep` if `include_contents` is true.
|
||||
// Always emits a new line character.
|
||||
auto maybe_dump_data = [&stream, include_contents](const CordRep* r) { |
||||
if (include_contents) { |
||||
// Allow for up to 60 wide display of content data, which with some
|
||||
// indentation and prefix / labels keeps us within roughly 80-100 wide.
|
||||
constexpr size_t kMaxDataLength = 60; |
||||
stream << ", data = \"" |
||||
<< CordRepBtree::EdgeData(r).substr(0, kMaxDataLength) |
||||
<< (r->length > kMaxDataLength ? "\"..." : "\""); |
||||
} |
||||
stream << '\n'; |
||||
}; |
||||
|
||||
// For each level, we print the 'shared/private' state and the rep pointer,
|
||||
// indented by two spaces per recursive depth.
|
||||
stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") "; |
||||
|
||||
if (rep->tag == BTREE) { |
||||
const CordRepBtree* node = rep->btree(); |
||||
std::string label = |
||||
node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf"; |
||||
stream << label << ", len = " << node->length |
||||
<< ", begin = " << node->begin() << ", end = " << node->end() |
||||
<< "\n"; |
||||
for (CordRep* edge : node->Edges()) { |
||||
DumpAll(edge, include_contents, stream, depth + 1); |
||||
} |
||||
} else if (rep->tag == SUBSTRING) { |
||||
const CordRepSubstring* substring = rep->substring(); |
||||
stream << "Substring, len = " << rep->length |
||||
<< ", start = " << substring->start; |
||||
maybe_dump_data(rep); |
||||
DumpAll(substring->child, include_contents, stream, depth + 1); |
||||
} else if (rep->tag >= FLAT) { |
||||
stream << "Flat, len = " << rep->length; |
||||
maybe_dump_data(rep); |
||||
} else if (rep->tag == EXTERNAL) { |
||||
stream << "Extn, len = " << rep->length; |
||||
maybe_dump_data(rep); |
||||
} |
||||
} |
||||
|
||||
// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring
|
||||
// small data out of large reps, and general efficiency of 'always copy small
|
||||
// data'. Consider making this a cord rep internal library function.
|
||||
CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) { |
||||
assert(n != 0); |
||||
assert(offset + n <= rep->length); |
||||
assert(offset != 0 || n != rep->length); |
||||
|
||||
if (rep->tag == SUBSTRING) { |
||||
CordRepSubstring* substring = rep->substring(); |
||||
offset += substring->start; |
||||
rep = CordRep::Ref(substring->child); |
||||
CordRep::Unref(substring); |
||||
} |
||||
CordRepSubstring* substring = new CordRepSubstring(); |
||||
substring->length = n; |
||||
substring->tag = SUBSTRING; |
||||
substring->start = offset; |
||||
substring->child = rep; |
||||
return substring; |
||||
} |
||||
|
||||
// TODO(b/192061034): consider making this a cord rep library function.
|
||||
inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) { |
||||
if (n == rep->length) return rep; |
||||
if (n == 0) return CordRep::Unref(rep), nullptr; |
||||
return CreateSubstring(rep, offset, n); |
||||
} |
||||
|
||||
// TODO(b/192061034): consider making this a cord rep library function.
|
||||
inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { |
||||
if (offset == 0) return rep; |
||||
return CreateSubstring(rep, offset, rep->length - offset); |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
inline absl::string_view Consume(absl::string_view s, size_t n) { |
||||
return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) { |
||||
if (edge_type == kBack) { |
||||
memcpy(dst, s.data(), n); |
||||
return s.substr(n); |
||||
} else { |
||||
const size_t offset = s.size() - n; |
||||
memcpy(dst, s.data() + offset, n); |
||||
return s.substr(0, offset); |
||||
} |
||||
} |
||||
|
||||
// Known issue / optimization weirdness: the store associated with the
|
||||
// decrement introduces traffic between cpus (even if the result of that
|
||||
// traffic does nothing), making this faster than a single call to
|
||||
// refcount.Decrement() checking the zero refcount condition.
|
||||
template <typename R, typename Fn> |
||||
inline void FastUnref(R* r, Fn&& fn) { |
||||
if (r->refcount.IsOne()) { |
||||
fn(r); |
||||
} else if (!r->refcount.DecrementExpectHighRefcount()) { |
||||
fn(r); |
||||
} |
||||
} |
||||
|
||||
// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT
|
||||
// node, or a SUBSTRING of an EXTERNAL or FLAT node.
|
||||
void DeleteLeafEdge(CordRep* rep) { |
||||
for (;;) { |
||||
if (rep->tag >= FLAT) { |
||||
CordRepFlat::Delete(rep->flat()); |
||||
return; |
||||
} |
||||
if (rep->tag == EXTERNAL) { |
||||
CordRepExternal::Delete(rep->external()); |
||||
return; |
||||
} |
||||
assert(rep->tag == SUBSTRING); |
||||
CordRepSubstring* substring = rep->substring(); |
||||
rep = substring->child; |
||||
assert(rep->tag == EXTERNAL || rep->tag >= FLAT); |
||||
delete substring; |
||||
if (rep->refcount.Decrement()) return; |
||||
} |
||||
} |
||||
|
||||
// StackOperations contains the logic to build a left-most or right-most stack
|
||||
// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to
|
||||
// propagate node changes up the stack.
|
||||
template <EdgeType edge_type> |
||||
struct StackOperations { |
||||
// Returns true if the node at 'depth' is not shared, i.e. has a refcount
|
||||
// of one and all of its parent nodes have a refcount of one.
|
||||
inline bool owned(int depth) const { return depth < share_depth; } |
||||
|
||||
// Returns the node at 'depth'.
|
||||
inline CordRepBtree* node(int depth) const { return stack[depth]; } |
||||
|
||||
// Builds a `depth` levels deep stack starting at `tree` recording which nodes
|
||||
// are private in the form of the 'share depth' where nodes are shared.
|
||||
inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { |
||||
assert(depth <= tree->height()); |
||||
int current_depth = 0; |
||||
while (current_depth < depth && tree->refcount.IsOne()) { |
||||
stack[current_depth++] = tree; |
||||
tree = tree->Edge(edge_type)->btree(); |
||||
} |
||||
share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0); |
||||
while (current_depth < depth) { |
||||
stack[current_depth++] = tree; |
||||
tree = tree->Edge(edge_type)->btree(); |
||||
} |
||||
return tree; |
||||
} |
||||
|
||||
// Builds a stack with the invariant that all nodes are private owned / not
|
||||
// shared. This is used in iterative updates where a previous propagation
|
||||
// guaranteed all nodes are owned / private.
|
||||
inline void BuildOwnedStack(CordRepBtree* tree, int height) { |
||||
assert(height <= CordRepBtree::kMaxHeight); |
||||
int depth = 0; |
||||
while (depth < height) { |
||||
assert(tree->refcount.IsOne()); |
||||
stack[depth++] = tree; |
||||
tree = tree->Edge(edge_type)->btree(); |
||||
} |
||||
assert(tree->refcount.IsOne()); |
||||
share_depth = depth + 1; |
||||
} |
||||
|
||||
// Processes the final 'top level' result action for the tree.
|
||||
// See the 'Action' enum for the various action implications.
|
||||
static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { |
||||
switch (result.action) { |
||||
case CordRepBtree::kPopped: |
||||
if (ABSL_PREDICT_FALSE(tree->height() >= CordRepBtree::kMaxHeight)) { |
||||
ABSL_RAW_LOG(FATAL, "Max height exceeded"); |
||||
} |
||||
return edge_type == kBack ? CordRepBtree::New(tree, result.tree) |
||||
: CordRepBtree::New(result.tree, tree); |
||||
case CordRepBtree::kCopied: |
||||
CordRep::Unref(tree); |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case CordRepBtree::kSelf: |
||||
return result.tree; |
||||
} |
||||
ABSL_INTERNAL_UNREACHABLE; |
||||
return result.tree; |
||||
} |
||||
|
||||
// Propagate the action result in 'result' up into all nodes of the stack
|
||||
// starting at depth 'depth'. 'length' contains the extra length of data that
|
||||
// was added at the lowest level, and is updated into all nodes of the stack.
|
||||
// See the 'Action' enum for the various action implications.
|
||||
// If 'propagate' is true, then any copied node values are updated into the
|
||||
// stack, which is used for iterative processing on the same stack.
|
||||
template <bool propagate = false> |
||||
inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length, |
||||
OpResult result) { |
||||
// TODO(mvels): revisit the below code to check if 3 loops with 3
|
||||
// (incremental) conditions is faster than 1 loop with a switch.
|
||||
// Benchmarking and perf recordings indicate the loop with switch is
|
||||
// fastest, likely because of indirect jumps on the tight case values and
|
||||
// dense branches. But it's worth considering 3 loops, as the `action`
|
||||
// transitions are mono directional. E.g.:
|
||||
// while (action == kPopped) {
|
||||
// ...
|
||||
// }
|
||||
// while (action == kCopied) {
|
||||
// ...
|
||||
// }
|
||||
// ...
|
||||
// We also found that an "if () do {}" loop here seems faster, possibly
|
||||
// because it allows the branch predictor more granular heuristics on
|
||||
// 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases
|
||||
// which appear to be the most common use cases.
|
||||
if (depth != 0) { |
||||
do { |
||||
CordRepBtree* node = stack[--depth]; |
||||
const bool owned = depth < share_depth; |
||||
switch (result.action) { |
||||
case CordRepBtree::kPopped: |
||||
assert(!propagate); |
||||
result = node->AddEdge<edge_type>(owned, result.tree, length); |
||||
break; |
||||
case CordRepBtree::kCopied: |
||||
result = node->SetEdge<edge_type>(owned, result.tree, length); |
||||
if (propagate) stack[depth] = result.tree; |
||||
break; |
||||
case CordRepBtree::kSelf: |
||||
node->length += length; |
||||
while (depth > 0) { |
||||
node = stack[--depth]; |
||||
node->length += length; |
||||
} |
||||
return node; |
||||
} |
||||
} while (depth > 0); |
||||
} |
||||
return Finalize(tree, result); |
||||
} |
||||
|
||||
// Invokes `Unwind` with `propagate=true` to update the stack node values.
|
||||
inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length, |
||||
OpResult result) { |
||||
return Unwind</*propagate=*/true>(tree, depth, length, result); |
||||
} |
||||
|
||||
// `share_depth` contains the depth at which the nodes in the stack become
|
||||
// shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`),
|
||||
// then `share_depth` is 0. If the 2nd node is shared (and implicitly all
|
||||
// nodes below that) then `share_depth` is 1, etc. A `share_depth` greater
|
||||
// than the depth of the stack indicates that none of the nodes in the stack
|
||||
// are shared.
|
||||
int share_depth; |
||||
|
||||
NodeStack stack; |
||||
}; |
||||
|
||||
} // namespace
|
||||
|
||||
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, |
||||
bool include_contents, std::ostream& stream) { |
||||
stream << "===================================\n"; |
||||
if (!label.empty()) { |
||||
stream << label << '\n'; |
||||
stream << "-----------------------------------\n"; |
||||
} |
||||
if (rep) { |
||||
DumpAll(rep, include_contents, stream); |
||||
} else { |
||||
stream << "NULL\n"; |
||||
} |
||||
} |
||||
|
||||
void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, |
||||
std::ostream& stream) { |
||||
Dump(rep, label, false, stream); |
||||
} |
||||
|
||||
void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) { |
||||
Dump(rep, absl::string_view(), false, stream); |
||||
} |
||||
|
||||
void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) { |
||||
for (CordRep* edge : tree->Edges(begin, end)) { |
||||
FastUnref(edge, DeleteLeafEdge); |
||||
} |
||||
Delete(tree); |
||||
} |
||||
|
||||
void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin, |
||||
size_t end) { |
||||
for (CordRep* edge : tree->Edges(begin, end)) { |
||||
FastUnref(edge->btree(), Destroy); |
||||
} |
||||
Delete(tree); |
||||
} |
||||
|
||||
bool CordRepBtree::IsValid(const CordRepBtree* tree) { |
||||
#define NODE_CHECK_VALID(x) \ |
||||
if (!(x)) { \
|
||||
ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \
|
||||
return false; \
|
||||
} |
||||
#define NODE_CHECK_EQ(x, y) \ |
||||
if ((x) != (y)) { \
|
||||
ABSL_RAW_LOG(ERROR, \
|
||||
"CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \
|
||||
#y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \ |
||||
return false; \
|
||||
} |
||||
|
||||
NODE_CHECK_VALID(tree != nullptr); |
||||
NODE_CHECK_EQ(tree->tag, BTREE); |
||||
NODE_CHECK_VALID(tree->height() <= kMaxHeight); |
||||
NODE_CHECK_VALID(tree->begin() < tree->capacity()); |
||||
NODE_CHECK_VALID(tree->end() <= tree->capacity()); |
||||
NODE_CHECK_VALID(tree->begin() <= tree->end()); |
||||
size_t child_length = 0; |
||||
for (CordRep* edge : tree->Edges()) { |
||||
NODE_CHECK_VALID(edge != nullptr); |
||||
if (tree->height() > 0) { |
||||
NODE_CHECK_VALID(edge->tag == BTREE); |
||||
NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1); |
||||
} else { |
||||
NODE_CHECK_VALID(IsDataEdge(edge)); |
||||
} |
||||
child_length += edge->length; |
||||
} |
||||
NODE_CHECK_EQ(child_length, tree->length); |
||||
if (tree->height() > 0) { |
||||
for (CordRep* edge : tree->Edges()) { |
||||
if (!IsValid(edge->btree())) return false; |
||||
} |
||||
} |
||||
return true; |
||||
|
||||
#undef NODE_CHECK_VALID |
||||
#undef NODE_CHECK_EQ |
||||
} |
||||
|
||||
#ifndef NDEBUG |
||||
|
||||
CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) { |
||||
if (!IsValid(tree)) { |
||||
Dump(tree, "CordRepBtree validation failed:", false, std::cout); |
||||
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); |
||||
} |
||||
return tree; |
||||
} |
||||
|
||||
const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) { |
||||
if (!IsValid(tree)) { |
||||
Dump(tree, "CordRepBtree validation failed:", false, std::cout); |
||||
ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); |
||||
} |
||||
return tree; |
||||
} |
||||
|
||||
#endif // NDEBUG
|
||||
|
||||
template <EdgeType edge_type> |
||||
inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) { |
||||
if (size() >= kMaxCapacity) return {New(edge), kPopped}; |
||||
OpResult result = ToOpResult(owned); |
||||
result.tree->Add<edge_type>(edge); |
||||
result.tree->length += delta; |
||||
return result; |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { |
||||
OpResult result; |
||||
const size_t idx = index(edge_type); |
||||
if (owned) { |
||||
result = {this, kSelf}; |
||||
CordRep::Unref(edges_[idx]); |
||||
} else { |
||||
// Create a copy containing all unchanged edges. Unchanged edges are the
|
||||
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
|
||||
// We conveniently cover both case using a constexpr `shift` being 0 or 1
|
||||
// as `end :== back + 1`.
|
||||
result = {CopyRaw(), kCopied}; |
||||
constexpr int shift = edge_type == kFront ? 1 : 0; |
||||
for (CordRep* r : Edges(begin() + shift, back() + shift)) { |
||||
CordRep::Ref(r); |
||||
} |
||||
} |
||||
result.tree->edges_[idx] = edge; |
||||
result.tree->length += delta; |
||||
return result; |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) { |
||||
const int depth = tree->height(); |
||||
const size_t length = rep->length; |
||||
StackOperations<edge_type> ops; |
||||
CordRepBtree* leaf = ops.BuildStack(tree, depth); |
||||
const OpResult result = |
||||
leaf->AddEdge<edge_type>(ops.owned(depth), rep, length); |
||||
return ops.Unwind(tree, depth, length, result); |
||||
} |
||||
|
||||
template <> |
||||
CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data, |
||||
size_t extra) { |
||||
CordRepBtree* leaf = CordRepBtree::New(0); |
||||
size_t length = 0; |
||||
size_t end = 0; |
||||
const size_t cap = leaf->capacity(); |
||||
while (!data.empty() && end != cap) { |
||||
auto* flat = CordRepFlat::New(data.length() + extra); |
||||
flat->length = (std::min)(data.length(), flat->Capacity()); |
||||
length += flat->length; |
||||
leaf->edges_[end++] = flat; |
||||
data = Consume<kBack>(flat->Data(), data, flat->length); |
||||
} |
||||
leaf->length = length; |
||||
leaf->set_end(end); |
||||
return leaf; |
||||
} |
||||
|
||||
template <> |
||||
CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data, |
||||
size_t extra) { |
||||
CordRepBtree* leaf = CordRepBtree::New(0); |
||||
size_t length = 0; |
||||
size_t begin = leaf->capacity(); |
||||
leaf->set_end(leaf->capacity()); |
||||
while (!data.empty() && begin != 0) { |
||||
auto* flat = CordRepFlat::New(data.length() + extra); |
||||
flat->length = (std::min)(data.length(), flat->Capacity()); |
||||
length += flat->length; |
||||
leaf->edges_[--begin] = flat; |
||||
data = Consume<kFront>(flat->Data(), data, flat->length); |
||||
} |
||||
leaf->length = length; |
||||
leaf->set_begin(begin); |
||||
return leaf; |
||||
} |
||||
|
||||
template <> |
||||
absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data, |
||||
size_t extra) { |
||||
assert(!data.empty()); |
||||
assert(size() < capacity()); |
||||
AlignBegin(); |
||||
const size_t cap = capacity(); |
||||
do { |
||||
CordRepFlat* flat = CordRepFlat::New(data.length() + extra); |
||||
const size_t n = (std::min)(data.length(), flat->Capacity()); |
||||
flat->length = n; |
||||
edges_[fetch_add_end(1)] = flat; |
||||
data = Consume<kBack>(flat->Data(), data, n); |
||||
} while (!data.empty() && end() != cap); |
||||
return data; |
||||
} |
||||
|
||||
template <> |
||||
absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data, |
||||
size_t extra) { |
||||
assert(!data.empty()); |
||||
assert(size() < capacity()); |
||||
AlignEnd(); |
||||
do { |
||||
CordRepFlat* flat = CordRepFlat::New(data.length() + extra); |
||||
const size_t n = (std::min)(data.length(), flat->Capacity()); |
||||
flat->length = n; |
||||
edges_[sub_fetch_begin(1)] = flat; |
||||
data = Consume<kFront>(flat->Data(), data, n); |
||||
} while (!data.empty() && begin() != 0); |
||||
return data; |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data, |
||||
size_t extra) { |
||||
if (ABSL_PREDICT_FALSE(data.empty())) return tree; |
||||
|
||||
const size_t original_data_size = data.size(); |
||||
int depth = tree->height(); |
||||
StackOperations<edge_type> ops; |
||||
CordRepBtree* leaf = ops.BuildStack(tree, depth); |
||||
|
||||
// If there is capacity in the last edge, append as much data
|
||||
// as possible into this last edge.
|
||||
if (leaf->size() < leaf->capacity()) { |
||||
OpResult result = leaf->ToOpResult(ops.owned(depth)); |
||||
data = result.tree->AddData<edge_type>(data, extra); |
||||
if (data.empty()) { |
||||
result.tree->length += original_data_size; |
||||
return ops.Unwind(tree, depth, original_data_size, result); |
||||
} |
||||
|
||||
// We added some data into this leaf, but not all. Propagate the added
|
||||
// length to the top most node, and rebuild the stack with any newly copied
|
||||
// or updated nodes. From this point on, the path (leg) from the top most
|
||||
// node to the right-most node towards the leaf node is privately owned.
|
||||
size_t delta = original_data_size - data.size(); |
||||
assert(delta > 0); |
||||
result.tree->length += delta; |
||||
tree = ops.Propagate(tree, depth, delta, result); |
||||
ops.share_depth = depth + 1; |
||||
} |
||||
|
||||
// We were unable to append all data into the existing right-most leaf node.
|
||||
// This means all remaining data must be put into (a) new leaf node(s) which
|
||||
// we append to the tree. To make this efficient, we iteratively build full
|
||||
// leaf nodes from `data` until the created leaf contains all remaining data.
|
||||
// We utilize the `Unwind` method to merge the created leaf into the first
|
||||
// level towards root that has capacity. On each iteration with remaining
|
||||
// data, we rebuild the stack in the knowledge that right-most nodes are
|
||||
// privately owned after the first `Unwind` completes.
|
||||
for (;;) { |
||||
OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped}; |
||||
if (result.tree->length == data.size()) { |
||||
return ops.Unwind(tree, depth, result.tree->length, result); |
||||
} |
||||
data = Consume<edge_type>(data, result.tree->length); |
||||
tree = ops.Unwind(tree, depth, result.tree->length, result); |
||||
depth = tree->height(); |
||||
ops.BuildOwnedStack(tree, depth); |
||||
} |
||||
} |
||||
|
||||
template <EdgeType edge_type> |
||||
CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) { |
||||
assert(dst->height() >= src->height()); |
||||
|
||||
// Capture source length as we may consume / destroy `src`.
|
||||
const size_t length = src->length; |
||||
|
||||
// We attempt to merge `src` at its corresponding height in `dst`.
|
||||
const int depth = dst->height() - src->height(); |
||||
StackOperations<edge_type> ops; |
||||
CordRepBtree* merge_node = ops.BuildStack(dst, depth); |
||||
|
||||
// If there is enough space in `merge_node` for all edges from `src`, add all
|
||||
// edges to this node, making a fresh copy as needed if not privately owned.
|
||||
// If `merge_node` does not have capacity for `src`, we rely on `Unwind` and
|
||||
// `Finalize` to merge `src` into the first level towards `root` where there
|
||||
// is capacity for another edge, or create a new top level node.
|
||||
OpResult result; |
||||
if (merge_node->size() + src->size() <= kMaxCapacity) { |
||||
result = merge_node->ToOpResult(ops.owned(depth)); |
||||
result.tree->Add<edge_type>(src->Edges()); |
||||
result.tree->length += src->length; |
||||
if (src->refcount.IsOne()) { |
||||
Delete(src); |
||||
} else { |
||||
for (CordRep* edge : src->Edges()) CordRep::Ref(edge); |
||||
CordRepBtree::Unref(src); |
||||
} |
||||
} else { |
||||
result = {src, kPopped}; |
||||
} |
||||
|
||||
// Unless we merged at the top level (i.e.: src and dst are equal height),
|
||||
// unwind the result towards the top level, and finalize the result.
|
||||
if (depth) { |
||||
return ops.Unwind(dst, depth, length, result); |
||||
} |
||||
return ops.Finalize(dst, result); |
||||
} |
||||
|
||||
CopyResult CordRepBtree::CopySuffix(size_t offset) { |
||||
assert(offset < this->length); |
||||
|
||||
// As long as `offset` starts inside the last edge, we can 'drop' the current
|
||||
// depth. For the most extreme example: if offset references the last data
|
||||
// edge in the tree, there is only a single edge / path from the top of the
|
||||
// tree to that last edge, so we can drop all the nodes except that edge.
|
||||
// The fast path check for this is `back->length >= length - offset`.
|
||||
int height = this->height(); |
||||
CordRepBtree* node = this; |
||||
size_t len = node->length - offset; |
||||
CordRep* back = node->Edge(kBack); |
||||
while (back->length >= len) { |
||||
offset = back->length - len; |
||||
if (--height < 0) { |
||||
return {MakeSubstring(CordRep::Ref(back), offset), height}; |
||||
} |
||||
node = back->btree(); |
||||
back = node->Edge(kBack); |
||||
} |
||||
if (offset == 0) return {CordRep::Ref(node), height}; |
||||
|
||||
// Offset does not point into the last edge, so we span at least two edges.
|
||||
// Find the index of offset with `IndexBeyond` which provides us the edge
|
||||
// 'beyond' the offset if offset is not a clean starting point of an edge.
|
||||
Position pos = node->IndexBeyond(offset); |
||||
CordRepBtree* sub = node->CopyToEndFrom(pos.index, len); |
||||
const CopyResult result = {sub, height}; |
||||
|
||||
// `pos.n` contains a non zero value if the offset is not an exact starting
|
||||
// point of an edge. In this case, `pos.n` contains the 'trailing' amount of
|
||||
// bytes of the edge preceding that in `pos.index`. We need to iteratively
|
||||
// adjust the preceding edge with the 'broken' offset until we have a perfect
|
||||
// start of the edge.
|
||||
while (pos.n != 0) { |
||||
assert(pos.index >= 1); |
||||
const size_t begin = pos.index - 1; |
||||
sub->set_begin(begin); |
||||
CordRep* const edge = node->Edge(begin); |
||||
|
||||
len = pos.n; |
||||
offset = edge->length - len; |
||||
|
||||
if (--height < 0) { |
||||
sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len); |
||||
return result; |
||||
} |
||||
|
||||
node = edge->btree(); |
||||
pos = node->IndexBeyond(offset); |
||||
|
||||
CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len); |
||||
sub->edges_[begin] = nsub; |
||||
sub = nsub; |
||||
} |
||||
sub->set_begin(pos.index); |
||||
return result; |
||||
} |
||||
|
||||
CopyResult CordRepBtree::CopyPrefix(size_t n) { |
||||
assert(n > 0); |
||||
assert(n <= this->length); |
||||
|
||||
// As long as `n` does not exceed the length of the first edge, we can 'drop'
|
||||
// the current depth. For the most extreme example: if we'd copy a 1 byte
|
||||
// prefix from a tree, there is only a single edge / path from the top of the
|
||||
// tree to the single data edge containing this byte, so we can drop all the
|
||||
// nodes except the data node.
|
||||
int height = this->height(); |
||||
CordRepBtree* node = this; |
||||
CordRep* front = node->Edge(kFront); |
||||
while (front->length >= n) { |
||||
if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; |
||||
node = front->btree(); |
||||
front = node->Edge(kFront); |
||||
} |
||||
if (node->length == n) return {CordRep::Ref(node), height}; |
||||
|
||||
// `n` spans at least two nodes, find the end point of the span.
|
||||
Position pos = node->IndexOf(n); |
||||
|
||||
// Create a partial copy of the node up to `pos.index`, with a defined length
|
||||
// of `n`. Any 'partial last edge' is added further below as needed.
|
||||
CordRepBtree* sub = node->CopyBeginTo(pos.index, n); |
||||
const CopyResult result = {sub, height}; |
||||
|
||||
// `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as
|
||||
// this is not zero, we don't have a 'clean cut', so we need to make a
|
||||
// (partial) copy of that last edge, and repeat this until pos.n is zero.
|
||||
while (pos.n != 0) { |
||||
size_t end = pos.index; |
||||
n = pos.n; |
||||
|
||||
CordRep* edge = node->Edge(pos.index); |
||||
if (--height < 0) { |
||||
sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n); |
||||
sub->set_end(end); |
||||
AssertValid(result.edge->btree()); |
||||
return result; |
||||
} |
||||
|
||||
node = edge->btree(); |
||||
pos = node->IndexOf(n); |
||||
CordRepBtree* nsub = node->CopyBeginTo(pos.index, n); |
||||
sub->edges_[end++] = nsub; |
||||
sub->set_end(end); |
||||
sub = nsub; |
||||
} |
||||
sub->set_end(pos.index); |
||||
AssertValid(result.edge->btree()); |
||||
return result; |
||||
} |
||||
|
||||
CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { |
||||
assert(n <= this->length); |
||||
assert(offset <= this->length - n); |
||||
if (ABSL_PREDICT_FALSE(n == 0)) return nullptr; |
||||
|
||||
CordRepBtree* node = this; |
||||
int height = node->height(); |
||||
Position front = node->IndexOf(offset); |
||||
CordRep* left = node->edges_[front.index]; |
||||
while (front.n + n <= left->length) { |
||||
if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n); |
||||
node = left->btree(); |
||||
front = node->IndexOf(front.n); |
||||
left = node->edges_[front.index]; |
||||
} |
||||
|
||||
const Position back = node->IndexBefore(front, n); |
||||
CordRep* const right = node->edges_[back.index]; |
||||
assert(back.index > front.index); |
||||
|
||||
// Get partial suffix and prefix entries.
|
||||
CopyResult prefix; |
||||
CopyResult suffix; |
||||
if (height > 0) { |
||||
// Copy prefix and suffix of the boundary nodes.
|
||||
prefix = left->btree()->CopySuffix(front.n); |
||||
suffix = right->btree()->CopyPrefix(back.n); |
||||
|
||||
// If there is an edge between the prefix and suffix edges, then the tree
|
||||
// must remain at its previous (full) height. If we have no edges between
|
||||
// prefix and suffix edges, then the tree must be as high as either the
|
||||
// suffix or prefix edges (which are collapsed to their minimum heights).
|
||||
if (front.index + 1 == back.index) { |
||||
height = (std::max)(prefix.height, suffix.height) + 1; |
||||
} |
||||
|
||||
// Raise prefix and suffixes to the new tree height.
|
||||
for (int h = prefix.height + 1; h < height; ++h) { |
||||
prefix.edge = CordRepBtree::New(prefix.edge); |
||||
} |
||||
for (int h = suffix.height + 1; h < height; ++h) { |
||||
suffix.edge = CordRepBtree::New(suffix.edge); |
||||
} |
||||
} else { |
||||
// Leaf node, simply take substrings for prefix and suffix.
|
||||
prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1}; |
||||
suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1}; |
||||
} |
||||
|
||||
// Compose resulting tree.
|
||||
CordRepBtree* sub = CordRepBtree::New(height); |
||||
size_t end = 0; |
||||
sub->edges_[end++] = prefix.edge; |
||||
for (CordRep* r : node->Edges(front.index + 1, back.index)) { |
||||
sub->edges_[end++] = CordRep::Ref(r); |
||||
} |
||||
sub->edges_[end++] = suffix.edge; |
||||
sub->set_end(end); |
||||
sub->length = n; |
||||
return AssertValid(sub); |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left, |
||||
CordRepBtree* right) { |
||||
return left->height() >= right->height() ? Merge<kBack>(left, right) |
||||
: Merge<kFront>(right, left); |
||||
} |
||||
|
||||
bool CordRepBtree::IsFlat(absl::string_view* fragment) const { |
||||
if (height() == 0 && size() == 1) { |
||||
if (fragment) *fragment = Data(begin()); |
||||
return true; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
bool CordRepBtree::IsFlat(size_t offset, const size_t n, |
||||
absl::string_view* fragment) const { |
||||
assert(n <= this->length); |
||||
assert(offset <= this->length - n); |
||||
if (ABSL_PREDICT_FALSE(n == 0)) return false; |
||||
int height = this->height(); |
||||
const CordRepBtree* node = this; |
||||
for (;;) { |
||||
const Position front = node->IndexOf(offset); |
||||
const CordRep* edge = node->Edge(front.index); |
||||
if (edge->length < front.n + n) return false; |
||||
if (--height < 0) { |
||||
if (fragment) *fragment = EdgeData(edge).substr(front.n, n); |
||||
return true; |
||||
} |
||||
offset = front.n; |
||||
node = node->Edge(front.index)->btree(); |
||||
} |
||||
} |
||||
|
||||
char CordRepBtree::GetCharacter(size_t offset) const { |
||||
assert(offset < length); |
||||
const CordRepBtree* node = this; |
||||
int height = node->height(); |
||||
for (;;) { |
||||
Position front = node->IndexOf(offset); |
||||
if (--height < 0) return node->Data(front.index)[front.n]; |
||||
offset = front.n; |
||||
node = node->Edge(front.index)->btree(); |
||||
} |
||||
} |
||||
|
||||
Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { |
||||
// The inlined version in `GetAppendBuffer()` deals with all heights <= 3.
|
||||
assert(height() >= 4); |
||||
assert(refcount.IsOne()); |
||||
|
||||
// Build a stack of nodes we may potentially need to update if we find a
|
||||
// non-shared FLAT with capacity at the leaf level.
|
||||
const int depth = height(); |
||||
CordRepBtree* node = this; |
||||
CordRepBtree* stack[kMaxDepth]; |
||||
for (int i = 0; i < depth; ++i) { |
||||
node = node->Edge(kBack)->btree(); |
||||
if (!node->refcount.IsOne()) return {}; |
||||
stack[i] = node; |
||||
} |
||||
|
||||
// Must be a privately owned flat.
|
||||
CordRep* const edge = node->Edge(kBack); |
||||
if (!edge->refcount.IsOne() || edge->tag < FLAT) return {}; |
||||
|
||||
// Must have capacity.
|
||||
const size_t avail = edge->flat()->Capacity() - edge->length; |
||||
if (avail == 0) return {}; |
||||
|
||||
// Build span on remaining capacity.
|
||||
size_t delta = (std::min)(size, avail); |
||||
Span<char> span = {edge->flat()->Data() + edge->length, delta}; |
||||
edge->length += delta; |
||||
this->length += delta; |
||||
for (int i = 0; i < depth; ++i) { |
||||
stack[i]->length += delta; |
||||
} |
||||
return span; |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) { |
||||
if (rep->tag == BTREE) return rep->btree(); |
||||
|
||||
CordRepBtree* node = nullptr; |
||||
auto consume = [&node](CordRep* r, size_t offset, size_t length) { |
||||
r = MakeSubstring(r, offset, length); |
||||
if (node == nullptr) { |
||||
node = New(r); |
||||
} else { |
||||
node = CordRepBtree::AddCordRep<kBack>(node, r); |
||||
} |
||||
}; |
||||
Consume(rep, consume); |
||||
return node; |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) { |
||||
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) { |
||||
return MergeTrees(tree, rep->btree()); |
||||
} |
||||
auto consume = [&tree](CordRep* r, size_t offset, size_t length) { |
||||
r = MakeSubstring(r, offset, length); |
||||
tree = CordRepBtree::AddCordRep<kBack>(tree, r); |
||||
}; |
||||
Consume(rep, consume); |
||||
return tree; |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) { |
||||
if (ABSL_PREDICT_TRUE(rep->tag == BTREE)) { |
||||
return MergeTrees(rep->btree(), tree); |
||||
} |
||||
auto consume = [&tree](CordRep* r, size_t offset, size_t length) { |
||||
r = MakeSubstring(r, offset, length); |
||||
tree = CordRepBtree::AddCordRep<kFront>(tree, r); |
||||
}; |
||||
ReverseConsume(rep, consume); |
||||
return tree; |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data, |
||||
size_t extra) { |
||||
return CordRepBtree::AddData<kBack>(tree, data, extra); |
||||
} |
||||
|
||||
CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data, |
||||
size_t extra) { |
||||
return CordRepBtree::AddData<kFront>(tree, data, extra); |
||||
} |
||||
|
||||
template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree, |
||||
CordRep* rep); |
||||
template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree, |
||||
CordRep* rep); |
||||
template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree, |
||||
absl::string_view data, |
||||
size_t extra); |
||||
template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, |
||||
absl::string_view data, |
||||
size_t extra); |
||||
|
||||
} // namespace cord_internal
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
@ -0,0 +1,851 @@ |
||||
// Copyright 2021 The Abseil Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ |
||||
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ |
||||
|
||||
#include <cassert> |
||||
#include <cstdint> |
||||
#include <iosfwd> |
||||
|
||||
#include "absl/base/config.h" |
||||
#include "absl/base/internal/raw_logging.h" |
||||
#include "absl/base/optimization.h" |
||||
#include "absl/strings/internal/cord_internal.h" |
||||
#include "absl/strings/internal/cord_rep_btree.h" |
||||
#include "absl/strings/internal/cord_rep_flat.h" |
||||
#include "absl/strings/string_view.h" |
||||
#include "absl/types/span.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace cord_internal { |
||||
|
||||
class CordRepBtreeNavigator; |
||||
|
||||
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
|
||||
// Data is stored at the leaf level only, non leaf nodes contain down pointers
|
||||
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
|
||||
// or EXTERNAL nodes. The implementation allows for data to be added to either
|
||||
// end of the tree only, it does not provide any 'insert' logic. This has the
|
||||
// benefit that we can expect good fill ratios: all nodes except the outer
|
||||
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
|
||||
// methods. Merged trees will typically have a fill ratio well above 50% as in a
|
||||
// similar fashion, one side of the merged tree will typically have a 100% fill
|
||||
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
|
||||
// better, and the tree never needs balancing.
|
||||
//
|
||||
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
|
||||
// input unless explicitly stated otherwise. All functions returning a CordRep*
|
||||
// or CordRepBtree* instance transfer a reference back to the caller.
|
||||
// Simplified, callers both 'donate' and 'consume' a reference count on each
|
||||
// call, simplifying the API. An example of building a tree:
|
||||
//
|
||||
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
|
||||
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
|
||||
//
|
||||
// In the above example, all inputs are consumed, making each call affecting
|
||||
// `tree` reference count neutral. The returned `tree` value can be different
|
||||
// from the input if the input is shared with other threads, or if the tree
|
||||
// grows in height, but callers typically never have to concern themselves with
|
||||
// that and trust that all methods DTRT at all times.
|
||||
class CordRepBtree : public CordRep { |
||||
public: |
||||
// EdgeType identifies `front` and `back` enum values.
|
||||
// Various implementations in CordRepBtree such as `Add` and `Edge` are
|
||||
// generic and templated on operating on either of the boundary edges.
|
||||
// For more information on the possible edges contained in a CordRepBtree
|
||||
// instance see the documentation for `edges_`.
|
||||
enum class EdgeType { kFront, kBack }; |
||||
|
||||
// Convenience constants into `EdgeType`
|
||||
static constexpr EdgeType kFront = EdgeType::kFront; |
||||
static constexpr EdgeType kBack = EdgeType::kBack; |
||||
|
||||
// Maximum number of edges: based on experiments and performance data, we can
|
||||
// pick suitable values resulting in optimum cacheline aligned values. The
|
||||
// preferred values are based on 64-bit systems where we aim to align this
|
||||
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
|
||||
// TODO(b/192061034): experiment with alternative sizes.
|
||||
static constexpr size_t kMaxCapacity = 6; |
||||
|
||||
// Reasonable maximum height of the btree. We can expect a fill ratio of at
|
||||
// least 50%: trees are always expanded at the front or back. Concatenating
|
||||
// trees will then typically fold at the top most node, where the lower nodes
|
||||
// are at least at capacity on one side of joined inputs. At a lower fill
|
||||
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
|
||||
// We will fail / abort if an application ever exceeds this height, which
|
||||
// should be extremely rare (near impossible) and be an indication of an
|
||||
// application error: we do not assume it reasonable for any application to
|
||||
// operate correctly with such monster trees.
|
||||
// Another compelling reason for the number `12` is that any contextual stack
|
||||
// required for navigation or insertion requires 12 words and 12 bytes, which
|
||||
// fits inside 2 cache lines with some room to spare, and is reasonable as a
|
||||
// local stack variable compared to Cord's current near 400 bytes stack use.
|
||||
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
|
||||
// values start with a value of 0 for leaf nodes.
|
||||
static constexpr int kMaxDepth = 12; |
||||
static constexpr int kMaxHeight = kMaxDepth - 1; |
||||
|
||||
// `Action` defines the action for unwinding changes done at the btree's leaf
|
||||
// level that need to be propagated up to the parent node(s). Each operation
|
||||
// on a node has an effect / action defined as follows:
|
||||
// - kSelf
|
||||
// The operation (add / update, etc) was performed directly on the node as
|
||||
// the node is private to the current thread (i.e.: not shared directly or
|
||||
// indirectly through a refcount > 1). Changes can be propagated directly to
|
||||
// all parent nodes as all parent nodes are also then private to the current
|
||||
// thread.
|
||||
// - kCopied
|
||||
// The operation (add / update, etc) was performed on a copy of the original
|
||||
// node, as the node is (potentially) directly or indirectly shared with
|
||||
// other threads. Changes need to be propagated into the parent nodes where
|
||||
// the old down pointer must be unreffed and replaced with this new copy.
|
||||
// Such changes to parent nodes may themselves require a copy if the parent
|
||||
// node is also shared. A kCopied action can propagate all the way to the
|
||||
// top node where we then must unref the `tree` input provided by the
|
||||
// caller, and return the new copy.
|
||||
// - kPopped
|
||||
// The operation (typically add) could not be satisfied due to insufficient
|
||||
// capacity in the targeted node, and a new 'leg' was created that needs to
|
||||
// be added into the parent node. For example, adding a FLAT inside a leaf
|
||||
// node that is at capacity will create a new leaf node containing that
|
||||
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
|
||||
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
|
||||
// action propagating all the way to the top of the tree will result in
|
||||
// the tree becoming one level higher than the current tree through a final
|
||||
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
|
||||
// referencing the old tree and the new (fully popped upwards) 'leg'.
|
||||
enum Action { kSelf, kCopied, kPopped }; |
||||
|
||||
// Result of an operation on a node. See the `Action` enum for details.
|
||||
struct OpResult { |
||||
CordRepBtree* tree; |
||||
Action action; |
||||
}; |
||||
|
||||
// Return value of the CopyPrefix and CopySuffix methods which can
|
||||
// return a node or data edge at any height inside the tree.
|
||||
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
|
||||
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
|
||||
struct CopyResult { |
||||
CordRep* edge; |
||||
int height; |
||||
}; |
||||
|
||||
// Logical position inside a node:
|
||||
// - index: index of the edge.
|
||||
// - n: size or offset value depending on context.
|
||||
struct Position { |
||||
size_t index; |
||||
size_t n; |
||||
}; |
||||
|
||||
// Creates a btree from the given input. Adopts a ref of `rep`.
|
||||
// If the input `rep` is itself a btree, i.e., `tag == BTREE`, then this
|
||||
// function immediately returns `rep->btree()`. If the input is a valid data
|
||||
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
|
||||
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
|
||||
// tree, and the input is consumed and transformed into a btree().
|
||||
static CordRepBtree* Create(CordRep* rep); |
||||
|
||||
// Destroys the provided tree. Should only be called by cord internal API's,
|
||||
// typically after a ref_count.Decrement() on the last reference count.
|
||||
static void Destroy(CordRepBtree* tree); |
||||
|
||||
// Appends / Prepends an existing CordRep instance to this tree.
|
||||
// The below methods accept three types of input:
|
||||
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
|
||||
// `rep` is appended or prepended to this tree 'as is'.
|
||||
// 2) `rep` is a BTREE.
|
||||
// `rep` is merged into `tree` respecting the Append/Prepend order.
|
||||
// 3) `rep` is some other (legacy) type.
|
||||
// `rep` is converted in place and added to `tree`
|
||||
// Requires `tree` and `rep` to be not null.
|
||||
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); |
||||
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); |
||||
|
||||
// Append/Prepend the data in `data` to this tree.
|
||||
// The `extra` parameter defines how much extra capacity should be allocated
|
||||
// for any additional FLAT being allocated. This is an optimization hint from
|
||||
// the caller. For example, a caller may need to add 2 string_views of data
|
||||
// "abc" and "defghi" which are not consecutive. The caller can in this case
|
||||
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
|
||||
// where possible with at least 6 bytes of extra capacity beyond `length`.
|
||||
// This helps avoiding data getting fragmented over multiple flats.
|
||||
// There is no limit on the size of `data`. If `data` can not be stored inside
|
||||
// a single flat, then the function will iteratively add flats until all data
|
||||
// has been consumed and appended or prepended to the tree.
|
||||
static CordRepBtree* Append(CordRepBtree* tree, string_view data, |
||||
size_t extra = 0); |
||||
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, |
||||
size_t extra = 0); |
||||
|
||||
// Returns a new tree, containing `n` bytes of data from this instance
|
||||
// starting at offset `offset`. Where possible, the returned tree shares
|
||||
// (re-uses) data edges and nodes with this instance to minimize the
|
||||
// combined memory footprint of both trees.
|
||||
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
|
||||
CordRep* SubTree(size_t offset, size_t n); |
||||
|
||||
// Returns the character at the given offset.
|
||||
char GetCharacter(size_t offset) const; |
||||
|
||||
// Returns true if this node holds a single data edge, and if so, sets
|
||||
// `fragment` to reference the contained data. `fragment` is an optional
|
||||
// output parameter and allowed to be null.
|
||||
bool IsFlat(absl::string_view* fragment) const; |
||||
|
||||
// Returns true if the data of `n` bytes starting at offset `offset`
|
||||
// is contained in a single data edge, and if so, sets fragment to reference
|
||||
// the contained data. `fragment` is an optional output parameter and allowed
|
||||
// to be null.
|
||||
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const; |
||||
|
||||
// Returns a span (mutable range of bytes) of up to `size` bytes into the
|
||||
// last FLAT data edge inside this tree under the following conditions:
|
||||
// - none of the nodes down into the FLAT node are shared.
|
||||
// - the last data edge in this tree is a non-shared FLAT.
|
||||
// - the referenced FLAT has additional capacity available.
|
||||
// If all these conditions are met, a non-empty span is returned, and the
|
||||
// length of the flat node and involved tree nodes have been increased by
|
||||
// `span.length()`. The caller is responsible for immediately assigning values
|
||||
// to all uninitialized data reference by the returned span.
|
||||
// Requires `this->refcount.IsOne()`: this function forces the caller to do
|
||||
// this fast path check on the top level node, as this is the most commonly
|
||||
// shared node of a cord tree.
|
||||
Span<char> GetAppendBuffer(size_t size); |
||||
|
||||
// Returns the `height` of the tree. The height of a tree is limited to
|
||||
// kMaxHeight. `height` is implemented as an `int` as in some places we
|
||||
// use negative (-1) values for 'data edges'.
|
||||
int height() const { return static_cast<int>(storage[0]); } |
||||
|
||||
// Properties: begin, back, end, front/back boundary indexes.
|
||||
size_t begin() const { return static_cast<size_t>(storage[1]); } |
||||
size_t back() const { return static_cast<size_t>(storage[2]) - 1; } |
||||
size_t end() const { return static_cast<size_t>(storage[2]); } |
||||
size_t index(EdgeType edge) const { |
||||
return edge == kFront ? begin() : back(); |
||||
} |
||||
|
||||
// Properties: size and capacity.
|
||||
// `capacity` contains the current capacity of this instance, where
|
||||
// `kMaxCapacity` contains the maximum capacity of a btree node.
|
||||
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
|
||||
// change in the future if we see benefit in dynamically sizing 'small' nodes
|
||||
// to 'large' nodes for large data trees.
|
||||
size_t size() const { return end() - begin(); } |
||||
size_t capacity() const { return kMaxCapacity; } |
||||
|
||||
// Edge access
|
||||
inline CordRep* Edge(size_t index) const; |
||||
inline CordRep* Edge(EdgeType edge_type) const; |
||||
inline absl::Span<CordRep* const> Edges() const; |
||||
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; |
||||
|
||||
// Returns reference to the data edge at `index`.
|
||||
// Requires this instance to be a leaf node, and `index` to be valid index.
|
||||
inline absl::string_view Data(size_t index) const; |
||||
|
||||
static const char* EdgeDataPtr(const CordRep* r); |
||||
static absl::string_view EdgeData(const CordRep* r); |
||||
|
||||
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
|
||||
// holding a FLAT or EXTERNAL child rep.
|
||||
static bool IsDataEdge(const CordRep* rep); |
||||
|
||||
// Diagnostics
|
||||
static bool IsValid(const CordRepBtree* tree); |
||||
static CordRepBtree* AssertValid(CordRepBtree* tree); |
||||
static const CordRepBtree* AssertValid(const CordRepBtree* tree); |
||||
static void Dump(const CordRep* rep, std::ostream& stream); |
||||
static void Dump(const CordRep* rep, absl::string_view label, |
||||
std::ostream& stream); |
||||
static void Dump(const CordRep* rep, absl::string_view label, |
||||
bool include_contents, std::ostream& stream); |
||||
|
||||
// Adds the edge `edge` to this node if possible. `owned` indicates if the
|
||||
// current node is potentially shared or not with other threads. Returns:
|
||||
// - {kSelf, <this>}
|
||||
// The edge was directly added to this node.
|
||||
// - {kCopied, <node>}
|
||||
// The edge was added to a copy of this node.
|
||||
// - {kPopped, New(edge, height())}
|
||||
// A new leg with the edge was created as this node has no extra capacity.
|
||||
template <EdgeType edge_type> |
||||
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); |
||||
|
||||
// Replaces the front or back edge with the provided new edge. Returns:
|
||||
// - {kSelf, <this>}
|
||||
// The edge was directly set in this node. The old edge is unreffed.
|
||||
// - {kCopied, <node>}
|
||||
// A copy of this node was created with the new edge value.
|
||||
// In both cases, the function adopts a reference on `edge`.
|
||||
template <EdgeType edge_type> |
||||
OpResult SetEdge(bool owned, CordRep* edge, size_t delta); |
||||
|
||||
// Creates a new empty node at the specified height.
|
||||
static CordRepBtree* New(int height = 0); |
||||
|
||||
// Creates a new node containing `rep`, with the height being computed
|
||||
// automatically based on the type of `rep`.
|
||||
static CordRepBtree* New(CordRep* rep); |
||||
|
||||
// Creates a new node containing both `front` and `back` at height
|
||||
// `front.height() + 1`. Requires `back.height() == front.height()`.
|
||||
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); |
||||
|
||||
private: |
||||
CordRepBtree() = default; |
||||
~CordRepBtree() = default; |
||||
|
||||
// Initializes the main properties `tag`, `begin`, `end`, `height`.
|
||||
inline void InitInstance(int height, size_t begin = 0, size_t end = 0); |
||||
|
||||
// Direct property access begin / end
|
||||
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } |
||||
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } |
||||
|
||||
// Decreases the value of `begin` by `n`, and returns the new value. Notice
|
||||
// how this returns the new value unlike atomic::fetch_add which returns the
|
||||
// old value. This is because this is used to prepend edges at 'begin - 1'.
|
||||
size_t sub_fetch_begin(size_t n) { |
||||
storage[1] -= static_cast<uint8_t>(n); |
||||
return storage[1]; |
||||
} |
||||
|
||||
// Increases the value of `end` by `n`, and returns the previous value. This
|
||||
// function is typically used to append edges at 'end'.
|
||||
size_t fetch_add_end(size_t n) { |
||||
const uint8_t current = storage[2]; |
||||
storage[2] = static_cast<uint8_t>(current + n); |
||||
return current; |
||||
} |
||||
|
||||
// Returns the index of the last edge starting on, or before `offset`, with
|
||||
// `n` containing the relative offset of `offset` inside that edge.
|
||||
// Requires `offset` < length.
|
||||
Position IndexOf(size_t offset) const; |
||||
|
||||
// Returns the index of the last edge starting before `offset`, with `n`
|
||||
// containing the relative offset of `offset` inside that edge.
|
||||
// This function is useful to find the edges for some span of bytes ending at
|
||||
// `offset` (i.e., `n` bytes). For example:
|
||||
//
|
||||
// Position pos = IndexBefore(n)
|
||||
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
|
||||
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
|
||||
//
|
||||
// Requires 0 < `offset` <= length.
|
||||
Position IndexBefore(size_t offset) const; |
||||
|
||||
// Identical to the above function except starting from the position `front`.
|
||||
// This function is equivalent to `IndexBefore(front.n + offset)`, with
|
||||
// the difference that this function is optimized to start at `front.index`.
|
||||
Position IndexBefore(Position front, size_t offset) const; |
||||
|
||||
// Returns the index of the edge directly beyond the edge containing offset
|
||||
// `offset`, with `n` containing the distance of that edge from `offset`.
|
||||
// This function is useful for iteratively finding suffix nodes and remaining
|
||||
// partial bytes in left-most suffix nodes as for example in CopySuffix.
|
||||
// Requires `offset` < length.
|
||||
Position IndexBeyond(size_t offset) const; |
||||
|
||||
// Destruction
|
||||
static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end); |
||||
static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end); |
||||
static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end); |
||||
static void Delete(CordRepBtree* tree) { delete tree; } |
||||
|
||||
// Creates a new leaf node containing as much data as possible from `data`.
|
||||
// The data is added either forwards or reversed depending on `edge_type`.
|
||||
// Callers must check the length of the returned node to determine if all data
|
||||
// was copied or not.
|
||||
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
|
||||
template <EdgeType edge_type> |
||||
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); |
||||
|
||||
// Creates a raw copy of this Btree node, copying all properties, but
|
||||
// without adding any references to existing edges.
|
||||
CordRepBtree* CopyRaw() const; |
||||
|
||||
// Creates a full copy of this Btree node, adding a reference on all edges.
|
||||
CordRepBtree* Copy() const; |
||||
|
||||
// Creates a partial copy of this Btree node, copying all edges up to `end`,
|
||||
// adding a reference on each copied edge, and sets the length of the newly
|
||||
// created copy to `new_length`.
|
||||
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; |
||||
|
||||
// Creates a partial copy of this Btree node, copying all edges starting at
|
||||
// `begin`, adding a reference on each copied edge, and sets the length of
|
||||
// the newly created copy to `new_length`.
|
||||
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; |
||||
|
||||
// Returns a tree containing the result of appending `right` to `left`.
|
||||
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); |
||||
|
||||
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
|
||||
// deal with legacy / non conforming input, i.e.: CONCAT trees.
|
||||
static CordRepBtree* CreateSlow(CordRep* rep); |
||||
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); |
||||
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); |
||||
|
||||
// Aligns existing edges to start at index 0, to allow for a new edge to be
|
||||
// added to the back of the current edges.
|
||||
inline void AlignBegin(); |
||||
|
||||
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
|
||||
// added in front of the current edges.
|
||||
inline void AlignEnd(); |
||||
|
||||
// Adds the provided edge to this node.
|
||||
// Requires this node to have capacity for the edge. Realigns / moves
|
||||
// existing edges as needed to prepend or append the new edge.
|
||||
template <EdgeType edge_type> |
||||
inline void Add(CordRep* rep); |
||||
|
||||
// Adds the provided edges to this node.
|
||||
// Requires this node to have capacity for the edges. Realigns / moves
|
||||
// existing edges as needed to prepend or append the new edges.
|
||||
template <EdgeType edge_type> |
||||
inline void Add(absl::Span<CordRep* const>); |
||||
|
||||
// Adds data from `data` to this node until either all data has been consumed,
|
||||
// or there is no more capacity for additional flat nodes inside this node.
|
||||
// Requires the current node to be a leaf node, data to be non empty, and the
|
||||
// current node to have capacity for at least one more data edge.
|
||||
// Returns any remaining data from `data` that was not added, which is
|
||||
// depending on the edge type (front / back) either the remaining prefix of
|
||||
// suffix of the input.
|
||||
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
|
||||
template <EdgeType edge_type> |
||||
absl::string_view AddData(absl::string_view data, size_t extra); |
||||
|
||||
// Replace the front or back edge with the provided value.
|
||||
// Adopts a reference on `edge` and unrefs the old edge.
|
||||
template <EdgeType edge_type> |
||||
inline void SetEdge(CordRep* edge); |
||||
|
||||
// Returns a partial copy of the current tree containing the first `n` bytes
|
||||
// of data. `CopyResult` contains both the resulting edge and its height. The
|
||||
// resulting tree may be less high than the current tree, or even be a single
|
||||
// matching data edge. For example, if `n == 1`, then the result will be the
|
||||
// single data edge, and height will be set to -1 (one below the owning leaf
|
||||
// node). If n == 0, this function returns null.
|
||||
// Requires `n <= length`
|
||||
CopyResult CopyPrefix(size_t n); |
||||
|
||||
// Returns a partial copy of the current tree containing all data starting
|
||||
// after `offset`. `CopyResult` contains both the resulting edge and its
|
||||
// height. The resulting tree may be less high than the current tree, or even
|
||||
// be a single matching data edge. For example, if `n == length - 1`, then the
|
||||
// result will be a single data edge, and height will be set to -1 (one below
|
||||
// the owning leaf node).
|
||||
// Requires `offset < length`
|
||||
CopyResult CopySuffix(size_t offset); |
||||
|
||||
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
|
||||
// depending on the value of `owned`.
|
||||
inline OpResult ToOpResult(bool owned); |
||||
|
||||
// Adds `rep` to the specified tree, returning the modified tree.
|
||||
template <EdgeType edge_type> |
||||
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); |
||||
|
||||
// Adds `data` to the specified tree, returning the modified tree.
|
||||
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
|
||||
template <EdgeType edge_type> |
||||
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data, |
||||
size_t extra = 0); |
||||
|
||||
// Merges `src` into `dst` with `src` being added either before (kFront) or
|
||||
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
|
||||
// equal to the height of `src`.
|
||||
template <EdgeType edge_type> |
||||
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); |
||||
|
||||
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
|
||||
// implements an inlined version for trees of limited height (3 levels),
|
||||
// GetAppendBufferSlow implements the logic for large trees.
|
||||
Span<char> GetAppendBufferSlow(size_t size); |
||||
|
||||
// `edges_` contains all edges starting from this instance.
|
||||
// These are explicitly `child` edges only, a cord btree (or any cord tree in
|
||||
// that respect) does not store `parent` pointers anywhere: multiple trees /
|
||||
// parents can reference the same shared child edge. The type of these edges
|
||||
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
|
||||
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
|
||||
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
|
||||
CordRep* edges_[kMaxCapacity]; |
||||
|
||||
friend class CordRepBtreeTestPeer; |
||||
friend class CordRepBtreeNavigator; |
||||
}; |
||||
|
||||
inline CordRepBtree* CordRep::btree() { |
||||
assert(tag == BTREE); |
||||
return static_cast<CordRepBtree*>(this); |
||||
} |
||||
|
||||
inline const CordRepBtree* CordRep::btree() const { |
||||
assert(tag == BTREE); |
||||
return static_cast<const CordRepBtree*>(this); |
||||
} |
||||
|
||||
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { |
||||
tag = BTREE; |
||||
storage[0] = height; |
||||
storage[1] = begin; |
||||
storage[2] = end; |
||||
} |
||||
|
||||
inline CordRep* CordRepBtree::Edge(size_t index) const { |
||||
assert(index >= begin()); |
||||
assert(index < end()); |
||||
return edges_[index]; |
||||
} |
||||
|
||||
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { |
||||
return edges_[edge_type == kFront ? begin() : back()]; |
||||
} |
||||
|
||||
inline absl::Span<CordRep* const> CordRepBtree::Edges() const { |
||||
return {edges_ + begin(), size()}; |
||||
} |
||||
|
||||
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, |
||||
size_t end) const { |
||||
assert(begin <= end); |
||||
assert(begin >= this->begin()); |
||||
assert(end <= this->end()); |
||||
return {edges_ + begin, static_cast<size_t>(end - begin)}; |
||||
} |
||||
|
||||
inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) { |
||||
assert(IsDataEdge(r)); |
||||
size_t offset = 0; |
||||
if (r->tag == SUBSTRING) { |
||||
offset = r->substring()->start; |
||||
r = r->substring()->child; |
||||
} |
||||
return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset; |
||||
} |
||||
|
||||
inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) { |
||||
return absl::string_view(EdgeDataPtr(r), r->length); |
||||
} |
||||
|
||||
inline absl::string_view CordRepBtree::Data(size_t index) const { |
||||
assert(height() == 0); |
||||
return EdgeData(Edge(index)); |
||||
} |
||||
|
||||
inline bool CordRepBtree::IsDataEdge(const CordRep* rep) { |
||||
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
|
||||
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
|
||||
// check in the slow path the SUBSTRING check to optimize for the hot path.
|
||||
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; |
||||
if (rep->tag == SUBSTRING) rep = rep->substring()->child; |
||||
return rep->tag == EXTERNAL || rep->tag >= FLAT; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::New(int height) { |
||||
CordRepBtree* tree = new CordRepBtree; |
||||
tree->length = 0; |
||||
tree->InitInstance(height); |
||||
return tree; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::New(CordRep* rep) { |
||||
CordRepBtree* tree = new CordRepBtree; |
||||
int height = rep->tag == BTREE ? rep->btree()->height() + 1 : 0; |
||||
tree->length = rep->length; |
||||
tree->InitInstance(height, /*begin=*/0, /*end=*/1); |
||||
tree->edges_[0] = rep; |
||||
return tree; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, |
||||
CordRepBtree* back) { |
||||
assert(front->height() == back->height()); |
||||
CordRepBtree* tree = new CordRepBtree; |
||||
tree->length = front->length + back->length; |
||||
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); |
||||
tree->edges_[0] = front; |
||||
tree->edges_[1] = back; |
||||
return tree; |
||||
} |
||||
|
||||
inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin, |
||||
size_t end) { |
||||
if (tree->height() == 0) { |
||||
DestroyLeaf(tree, begin, end); |
||||
} else { |
||||
DestroyNonLeaf(tree, begin, end); |
||||
} |
||||
} |
||||
|
||||
inline void CordRepBtree::Destroy(CordRepBtree* tree) { |
||||
DestroyTree(tree, tree->begin(), tree->end()); |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::CopyRaw() const { |
||||
auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); |
||||
memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); |
||||
new (&tree->refcount) Refcount; |
||||
return tree; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::Copy() const { |
||||
CordRepBtree* tree = CopyRaw(); |
||||
for (CordRep* rep : Edges()) CordRep::Ref(rep); |
||||
return tree; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, |
||||
size_t new_length) const { |
||||
assert(begin >= this->begin()); |
||||
assert(begin <= this->end()); |
||||
CordRepBtree* tree = CopyRaw(); |
||||
tree->length = new_length; |
||||
tree->set_begin(begin); |
||||
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); |
||||
return tree; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, |
||||
size_t new_length) const { |
||||
assert(end <= capacity()); |
||||
assert(end >= this->begin()); |
||||
CordRepBtree* tree = CopyRaw(); |
||||
tree->length = new_length; |
||||
tree->set_end(end); |
||||
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); |
||||
return tree; |
||||
} |
||||
|
||||
inline void CordRepBtree::AlignBegin() { |
||||
// The below code itself does not need to be fast as typically we have
|
||||
// mono-directional append/prepend calls, and `begin` / `end` are typically
|
||||
// adjusted no more than once. But we want to avoid potential register clobber
|
||||
// effects, making the compiler emit register save/store/spills, and minimize
|
||||
// the size of code.
|
||||
const size_t delta = begin(); |
||||
if (ABSL_PREDICT_FALSE(delta != 0)) { |
||||
const size_t new_end = end() - delta; |
||||
set_begin(0); |
||||
set_end(new_end); |
||||
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
|
||||
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
|
||||
// size, and then do overlapping load/store of up to 4 pointers (inlined as
|
||||
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
|
||||
// compact and b) not clobbering any registers.
|
||||
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); |
||||
#ifdef __clang__ |
||||
#pragma unroll 1 |
||||
#endif |
||||
for (size_t i = 0; i < new_end; ++i) { |
||||
edges_[i] = edges_[i + delta]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
inline void CordRepBtree::AlignEnd() { |
||||
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
|
||||
const size_t delta = capacity() - end(); |
||||
if (delta != 0) { |
||||
const size_t new_begin = begin() + delta; |
||||
const size_t new_end = end() + delta; |
||||
set_begin(new_begin); |
||||
set_end(new_end); |
||||
ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); |
||||
#ifdef __clang__ |
||||
#pragma unroll 1 |
||||
#endif |
||||
for (size_t i = new_end - 1; i >= new_begin; --i) { |
||||
edges_[i] = edges_[i - delta]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
template <> |
||||
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { |
||||
AlignBegin(); |
||||
edges_[fetch_add_end(1)] = rep; |
||||
} |
||||
|
||||
template <> |
||||
inline void CordRepBtree::Add<CordRepBtree::kBack>( |
||||
absl::Span<CordRep* const> edges) { |
||||
AlignBegin(); |
||||
size_t new_end = end(); |
||||
for (CordRep* edge : edges) edges_[new_end++] = edge; |
||||
set_end(new_end); |
||||
} |
||||
|
||||
template <> |
||||
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { |
||||
AlignEnd(); |
||||
edges_[sub_fetch_begin(1)] = rep; |
||||
} |
||||
|
||||
template <> |
||||
inline void CordRepBtree::Add<CordRepBtree::kFront>( |
||||
absl::Span<CordRep* const> edges) { |
||||
AlignEnd(); |
||||
size_t new_begin = begin() - edges.size(); |
||||
set_begin(new_begin); |
||||
for (CordRep* edge : edges) edges_[new_begin++] = edge; |
||||
} |
||||
|
||||
template <CordRepBtree::EdgeType edge_type> |
||||
inline void CordRepBtree::SetEdge(CordRep* edge) { |
||||
const int idx = edge_type == kFront ? begin() : back(); |
||||
CordRep::Unref(edges_[idx]); |
||||
edges_[idx] = edge; |
||||
} |
||||
|
||||
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { |
||||
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; |
||||
} |
||||
|
||||
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { |
||||
assert(offset < length); |
||||
size_t index = begin(); |
||||
while (offset >= edges_[index]->length) offset -= edges_[index++]->length; |
||||
return {index, offset}; |
||||
} |
||||
|
||||
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { |
||||
assert(offset > 0); |
||||
assert(offset <= length); |
||||
size_t index = begin(); |
||||
while (offset > edges_[index]->length) offset -= edges_[index++]->length; |
||||
return {index, offset}; |
||||
} |
||||
|
||||
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, |
||||
size_t offset) const { |
||||
size_t index = front.index; |
||||
offset = offset + front.n; |
||||
while (offset > edges_[index]->length) offset -= edges_[index++]->length; |
||||
return {index, offset}; |
||||
} |
||||
|
||||
inline CordRepBtree::Position CordRepBtree::IndexBeyond( |
||||
const size_t offset) const { |
||||
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
|
||||
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
|
||||
// track the offset of the `current edge` in `off`, which we increase as we
|
||||
// iterate over the edges until we find the matching edge.
|
||||
size_t off = 0; |
||||
size_t index = begin(); |
||||
while (offset > off) off += edges_[index++]->length; |
||||
return {index, off - offset}; |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { |
||||
if (IsDataEdge(rep)) return New(rep); |
||||
return CreateSlow(rep); |
||||
} |
||||
|
||||
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { |
||||
assert(refcount.IsOne()); |
||||
CordRepBtree* tree = this; |
||||
const int height = this->height(); |
||||
CordRepBtree* n1 = tree; |
||||
CordRepBtree* n2 = tree; |
||||
CordRepBtree* n3 = tree; |
||||
switch (height) { |
||||
case 3: |
||||
tree = tree->Edge(kBack)->btree(); |
||||
if (!tree->refcount.IsOne()) return {}; |
||||
n2 = tree; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 2: |
||||
tree = tree->Edge(kBack)->btree(); |
||||
if (!tree->refcount.IsOne()) return {}; |
||||
n1 = tree; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 1: |
||||
tree = tree->Edge(kBack)->btree(); |
||||
if (!tree->refcount.IsOne()) return {}; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 0: |
||||
CordRep* edge = tree->Edge(kBack); |
||||
if (!edge->refcount.IsOne()) return {}; |
||||
if (edge->tag < FLAT) return {}; |
||||
size_t avail = edge->flat()->Capacity() - edge->length; |
||||
if (avail == 0) return {}; |
||||
size_t delta = (std::min)(size, avail); |
||||
Span<char> span = {edge->flat()->Data() + edge->length, delta}; |
||||
edge->length += delta; |
||||
switch (height) { |
||||
case 3: |
||||
n3->length += delta; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 2: |
||||
n2->length += delta; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 1: |
||||
n1->length += delta; |
||||
ABSL_FALLTHROUGH_INTENDED; |
||||
case 0: |
||||
tree->length += delta; |
||||
return span; |
||||
} |
||||
break; |
||||
} |
||||
return GetAppendBufferSlow(size); |
||||
} |
||||
|
||||
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( |
||||
CordRepBtree* tree, CordRep* rep); |
||||
|
||||
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( |
||||
CordRepBtree* tree, CordRep* rep); |
||||
|
||||
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { |
||||
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { |
||||
return CordRepBtree::AddCordRep<kBack>(tree, rep); |
||||
} |
||||
return AppendSlow(tree, rep); |
||||
} |
||||
|
||||
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { |
||||
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { |
||||
return CordRepBtree::AddCordRep<kFront>(tree, rep); |
||||
} |
||||
return PrependSlow(tree, rep); |
||||
} |
||||
|
||||
#ifdef NDEBUG |
||||
|
||||
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree) { |
||||
return tree; |
||||
} |
||||
|
||||
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree) { |
||||
return tree; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
} // namespace cord_internal
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,185 @@ |
||||
// Copyright 2021 The Abseil Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ |
||||
#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ |
||||
|
||||
#include <cassert> |
||||
#include <memory> |
||||
#include <random> |
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "absl/base/config.h" |
||||
#include "absl/base/internal/raw_logging.h" |
||||
#include "absl/strings/internal/cord_internal.h" |
||||
#include "absl/strings/internal/cord_rep_btree.h" |
||||
#include "absl/strings/internal/cord_rep_flat.h" |
||||
#include "absl/strings/string_view.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace cordrep_testing { |
||||
|
||||
inline cord_internal::CordRepSubstring* MakeSubstring( |
||||
size_t start, size_t len, cord_internal::CordRep* rep) { |
||||
auto* sub = new cord_internal::CordRepSubstring; |
||||
sub->tag = cord_internal::SUBSTRING; |
||||
sub->start = start; |
||||
sub->length = len <= 0 ? rep->length - start + len : len; |
||||
sub->child = rep; |
||||
return sub; |
||||
} |
||||
|
||||
inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left, |
||||
cord_internal::CordRep* right, |
||||
int depth = 0) { |
||||
auto* concat = new cord_internal::CordRepConcat; |
||||
concat->tag = cord_internal::CONCAT; |
||||
concat->length = left->length + right->length; |
||||
concat->left = left; |
||||
concat->right = right; |
||||
concat->set_depth(depth); |
||||
return concat; |
||||
} |
||||
|
||||
inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) { |
||||
assert(value.length() <= cord_internal::kMaxFlatLength); |
||||
auto* flat = cord_internal::CordRepFlat::New(value.length()); |
||||
flat->length = value.length(); |
||||
memcpy(flat->Data(), value.data(), value.length()); |
||||
return flat; |
||||
} |
||||
|
||||
// Creates an external node for testing
|
||||
inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) { |
||||
struct Rep : public cord_internal::CordRepExternal { |
||||
std::string s; |
||||
explicit Rep(absl::string_view sv) : s(sv) { |
||||
this->tag = cord_internal::EXTERNAL; |
||||
this->base = s.data(); |
||||
this->length = s.length(); |
||||
this->releaser_invoker = [](cord_internal::CordRepExternal* self) { |
||||
delete static_cast<Rep*>(self); |
||||
}; |
||||
} |
||||
}; |
||||
return new Rep(s); |
||||
} |
||||
|
||||
inline std::string CreateRandomString(size_t n) { |
||||
absl::string_view data = |
||||
"abcdefghijklmnopqrstuvwxyz" |
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
||||
"0123456789~!@#$%^&*()_+=-<>?:\"{}[]|"; |
||||
std::minstd_rand rnd; |
||||
std::uniform_int_distribution<size_t> dist(0, data.size() - 1); |
||||
std::string s(n, ' '); |
||||
for (size_t i = 0; i < n; ++i) { |
||||
s[i] = data[dist(rnd)]; |
||||
} |
||||
return s; |
||||
} |
||||
|
||||
// Creates an array of flats from the provided string, chopping
|
||||
// the provided string up into flats of size `chunk_size` characters
|
||||
// resulting in roughly `data.size() / chunk_size` total flats.
|
||||
inline std::vector<cord_internal::CordRep*> CreateFlatsFromString( |
||||
absl::string_view data, size_t chunk_size) { |
||||
assert(chunk_size > 0); |
||||
std::vector<cord_internal::CordRep*> flats; |
||||
for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) { |
||||
flats.push_back(MakeFlat(s.substr(0, chunk_size))); |
||||
} |
||||
return flats; |
||||
} |
||||
|
||||
inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( |
||||
absl::Span<cord_internal::CordRep* const> flats) { |
||||
assert(!flats.empty()); |
||||
auto* node = cord_internal::CordRepBtree::Create(flats[0]); |
||||
for (size_t i = 1; i < flats.size(); ++i) { |
||||
node = cord_internal::CordRepBtree::Append(node, flats[i]); |
||||
} |
||||
return node; |
||||
} |
||||
|
||||
inline void CordToString(cord_internal::CordRep* rep, std::string& s) { |
||||
size_t offset = 0; |
||||
size_t length = rep->length; |
||||
while (rep->tag == cord_internal::SUBSTRING) { |
||||
offset += rep->substring()->start; |
||||
rep = rep->substring()->child; |
||||
} |
||||
if (rep->tag == cord_internal::BTREE) { |
||||
for (cord_internal::CordRep* edge : rep->btree()->Edges()) { |
||||
CordToString(edge, s); |
||||
} |
||||
} else if (rep->tag >= cord_internal::FLAT) { |
||||
s.append(rep->flat()->Data() + offset, length); |
||||
} else if (rep->tag == cord_internal::EXTERNAL) { |
||||
s.append(rep->external()->base + offset, length); |
||||
} else { |
||||
ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag); |
||||
} |
||||
} |
||||
|
||||
inline std::string CordToString(cord_internal::CordRep* rep) { |
||||
std::string s; |
||||
s.reserve(rep->length); |
||||
CordToString(rep, s); |
||||
return s; |
||||
} |
||||
|
||||
// RAII Helper class to automatically unref reps on destruction.
|
||||
class AutoUnref { |
||||
public: |
||||
~AutoUnref() { |
||||
for (CordRep* rep : unrefs_) CordRep::Unref(rep); |
||||
} |
||||
|
||||
// Adds `rep` to the list of reps to be unreffed at destruction.
|
||||
template <typename CordRepType> |
||||
CordRepType* Add(CordRepType* rep) { |
||||
unrefs_.push_back(rep); |
||||
return rep; |
||||
} |
||||
|
||||
// Increments the reference count of `rep` by one, and adds it to
|
||||
// the list of reps to be unreffed at destruction.
|
||||
template <typename CordRepType> |
||||
CordRepType* Ref(CordRepType* rep) { |
||||
unrefs_.push_back(CordRep::Ref(rep)); |
||||
return rep; |
||||
} |
||||
|
||||
// Increments the reference count of `rep` by one if `condition` is true,
|
||||
// and adds it to the list of reps to be unreffed at destruction.
|
||||
template <typename CordRepType> |
||||
CordRepType* RefIf(bool condition, CordRepType* rep) { |
||||
if (condition) unrefs_.push_back(CordRep::Ref(rep)); |
||||
return rep; |
||||
} |
||||
|
||||
private: |
||||
using CordRep = absl::cord_internal::CordRep; |
||||
|
||||
std::vector<CordRep*> unrefs_; |
||||
}; |
||||
|
||||
} // namespace cordrep_testing
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
||||
|
||||
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
|
Loading…
Reference in new issue