Export of internal Abseil changes

--
ed829ac612f090375427c3488827c6e74deb2e3f by Derek Mauro <dmauro@google.com>:

Update latest GCC/Clang Linux tests to Bazel 5.0.0 and CMake 3.22.2

PiperOrigin-RevId: 429369775

--
76952303c4d942288c4e7657ffb5893cec54a132 by Martijn Vels <mvels@google.com>:

Optimize Cord::ChunkIterator now that CordRepConcat is removed

PiperOrigin-RevId: 429321455

--
dcd0d287793649aba9b98268c5783e449a34749f by Martijn Vels <mvels@google.com>:

Add IsDataEdge() and DataEdgeValue() helper functions.

This moves repetitive logic accessing data edges into its own header, and more strongly defines the notion of what a data edge is, enforcing the internal invariants. This will also be incorporated in optimized Cord iteration logic once CordRepConcat is totally removed from the Cord code.

PiperOrigin-RevId: 429307248

--
6a0903962155988085bf8656743fda9c4cdcba6c by Abseil Team <absl-team@google.com>:

Make it clear that the probability function given for the zipf distribution is unnormalized, i.e., sum(p(x) for x = 0..k) != 100%.

Quoting Section 7 of the paper cited in the comments, where this formula comes from (emphasis mine): "We will consider the two parameter generalization as defined in Dagpunar [1988] with the *unnormalized* probability function ..."

PiperOrigin-RevId: 429068258

--
3899ff6d444ba755148bc521a6ee031d9e9d4485 by Abseil Team <absl-team@google.com>:

Internal Changes

PiperOrigin-RevId: 428644856

--
319de702d2b537cbb76c4c71277ae89b349b162e by Benjamin Barenblat <bbaren@google.com>:

Support symbolization on PA-RISC

Null out supervisor bits in PA-RISC addresses before symbolizing, and
handle function descriptor tables correctly.

Change symbolize_test.cc to use 32-bit aligned addresses, allowing that
test to pass on PA-RISC.

PiperOrigin-RevId: 428590564
GitOrigin-RevId: ed829ac612f090375427c3488827c6e74deb2e3f
Change-Id: Ie01ff3b9365fd45e5a55f858038552679f3180d3
pull/1110/head
Abseil Team 3 years ago committed by vslashg
parent c2ef703338
commit 7f850b3167
  1. 1
      CMake/AbseilDll.cmake
  2. 45
      absl/debugging/symbolize_elf.inc
  3. 8
      absl/debugging/symbolize_test.cc
  4. 2
      absl/random/zipf_distribution.h
  5. 16
      absl/strings/BUILD.bazel
  6. 18
      absl/strings/CMakeLists.txt
  7. 171
      absl/strings/cord.cc
  8. 54
      absl/strings/cord.h
  9. 14
      absl/strings/cord_analysis.cc
  10. 72
      absl/strings/cord_test.cc
  11. 63
      absl/strings/internal/cord_data_edge.h
  12. 130
      absl/strings/internal/cord_data_edge_test.cc
  13. 7
      absl/strings/internal/cord_rep_btree.cc
  14. 31
      absl/strings/internal/cord_rep_btree.h
  15. 3
      absl/strings/internal/cord_rep_btree_navigator.cc
  16. 5
      absl/strings/internal/cord_rep_btree_reader.cc
  17. 9
      absl/strings/internal/cord_rep_btree_reader.h
  18. 33
      absl/strings/internal/cord_rep_btree_test.cc
  19. 4
      ci/linux_docker_containers.sh

@ -204,6 +204,7 @@ set(ABSL_INTERNAL_DLL_FILES
"strings/internal/charconv_bigint.h"
"strings/internal/charconv_parse.cc"
"strings/internal/charconv_parse.h"
"strings/internal/cord_data_edge.h"
"strings/internal/cord_internal.cc"
"strings/internal/cord_internal.h"
"strings/internal/cord_rep_btree.cc"

@ -323,6 +323,7 @@ class Symbolizer {
const ptrdiff_t relocation,
char *out, int out_size,
char *tmp_buf, int tmp_buf_size);
const char *GetUncachedSymbol(const void *pc);
enum {
SYMBOL_BUF_SIZE = 3072,
@ -1333,13 +1334,7 @@ static bool MaybeInitializeObjFile(ObjFile *obj) {
// they are called here as well.
// To keep stack consumption low, we would like this function to not
// get inlined.
const char *Symbolizer::GetSymbol(const void *const pc) {
const char *entry = FindSymbolInCache(pc);
if (entry != nullptr) {
return entry;
}
symbol_buf_[0] = '\0';
const char *Symbolizer::GetUncachedSymbol(const void *pc) {
ObjFile *const obj = FindObjFile(pc, 1);
ptrdiff_t relocation = 0;
int fd = -1;
@ -1427,6 +1422,42 @@ const char *Symbolizer::GetSymbol(const void *const pc) {
return InsertSymbolInCache(pc, symbol_buf_);
}
const char *Symbolizer::GetSymbol(const void *pc) {
const char *entry = FindSymbolInCache(pc);
if (entry != nullptr) {
return entry;
}
symbol_buf_[0] = '\0';
#ifdef __hppa__
{
// In some contexts (e.g., return addresses), PA-RISC uses the lowest two
// bits of the address to indicate the privilege level. Clear those bits
// before trying to symbolize.
const auto pc_bits = reinterpret_cast<uintptr_t>(pc);
const auto address = pc_bits & ~0x3;
entry = GetUncachedSymbol(reinterpret_cast<const void *>(address));
if (entry != nullptr) {
return entry;
}
// In some contexts, PA-RISC also uses bit 1 of the address to indicate that
// this is a cross-DSO function pointer. Such function pointers actually
// point to a procedure label, a struct whose first 32-bit (pointer) element
// actually points to the function text. With no symbol found for this
// address so far, try interpreting it as a cross-DSO function pointer and
// see how that goes.
if (pc_bits & 0x2) {
return GetUncachedSymbol(*reinterpret_cast<const void *const *>(address));
}
return nullptr;
}
#else
return GetUncachedSymbol(pc);
#endif
}
bool RemoveAllSymbolDecorators(void) {
if (!g_decorators_mu.TryLock()) {
// Someone else is using decorators. Get out.

@ -392,12 +392,14 @@ TEST(Symbolize, InstallAndRemoveSymbolDecorators) {
DummySymbolDecorator, &c_message),
0);
char *address = reinterpret_cast<char *>(1);
EXPECT_STREQ("abc", TrySymbolize(address++));
// Use addresses 4 and 8 here to ensure that we always use valid addresses
// even on systems that require instructions to be 32-bit aligned.
char *address = reinterpret_cast<char *>(4);
EXPECT_STREQ("abc", TrySymbolize(address));
EXPECT_TRUE(absl::debugging_internal::RemoveSymbolDecorator(ticket_b));
EXPECT_STREQ("ac", TrySymbolize(address++));
EXPECT_STREQ("ac", TrySymbolize(address + 4));
// Cleanup: remove all remaining decorators so other stack traces don't
// get mystery "ac" decoration.

@ -30,7 +30,7 @@ namespace absl {
ABSL_NAMESPACE_BEGIN
// absl::zipf_distribution produces random integer-values in the range [0, k],
// distributed according to the discrete probability function:
// distributed according to the unnormalized discrete probability function:
//
// P(x) = (v + x) ^ -q
//

@ -276,6 +276,7 @@ cc_library(
"internal/cord_rep_ring.cc",
],
hdrs = [
"internal/cord_data_edge.h",
"internal/cord_internal.h",
"internal/cord_rep_btree.h",
"internal/cord_rep_btree_navigator.h",
@ -307,6 +308,21 @@ cc_library(
],
)
cc_test(
name = "cord_data_edge_test",
size = "small",
srcs = ["internal/cord_data_edge_test.cc"],
copts = ABSL_TEST_COPTS,
visibility = ["//visibility:private"],
deps = [
":cord_internal",
":cord_rep_test_util",
":strings",
"//absl/base:config",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "cord_rep_btree_test",
size = "medium",

@ -554,6 +554,7 @@ absl_cc_library(
NAME
cord_internal
HDRS
"internal/cord_data_edge.h"
"internal/cord_internal.h"
"internal/cord_rep_btree.h"
"internal/cord_rep_btree_navigator.h"
@ -932,6 +933,23 @@ absl_cc_test(
GTest::gmock_main
)
absl_cc_test(
NAME
cord_data_edge_test
SRCS
"internal/cord_data_edge_test.cc"
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::base
absl::config
absl::cord_internal
absl::cord_rep_test_util
absl::core_headers
absl::strings
GTest::gmock_main
)
absl_cc_test(
NAME
cord_rep_btree_test

@ -35,6 +35,7 @@
#include "absl/container/fixed_array.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/escaping.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
@ -1094,35 +1095,6 @@ void Cord::CopyToArraySlowPath(char* dst) const {
}
}
Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() {
auto& stack_of_right_children = stack_of_right_children_;
if (stack_of_right_children.empty()) {
assert(!current_chunk_.empty()); // Called on invalid iterator.
// We have reached the end of the Cord.
return *this;
}
// Process the next node on the stack.
CordRep* node = stack_of_right_children.back();
stack_of_right_children.pop_back();
// Get the child node if we encounter a SUBSTRING.
size_t offset = 0;
size_t length = node->length;
if (node->IsSubstring()) {
offset = node->substring()->start;
node = node->substring()->child;
}
assert(node->IsExternal() || node->IsFlat());
assert(length != 0);
const char* data =
node->IsExternal() ? node->external()->base : node->flat()->Data();
current_chunk_ = absl::string_view(data + offset, length);
current_leaf_ = node;
return *this;
}
Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
ABSL_HARDENING_ASSERT(bytes_remaining_ >= n &&
"Attempted to iterate past `end()`");
@ -1165,135 +1137,38 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) {
return subcord;
}
auto& stack_of_right_children = stack_of_right_children_;
if (n < current_chunk_.size()) {
// Range to read is a proper subrange of the current chunk.
// Short circuit if reading the entire data edge.
assert(current_leaf_ != nullptr);
CordRep* subnode = CordRep::Ref(current_leaf_);
const char* data = subnode->IsExternal() ? subnode->external()->base
: subnode->flat()->Data();
subnode = NewSubstring(subnode, current_chunk_.data() - data, n);
subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
RemoveChunkPrefix(n);
if (n == current_leaf_->length) {
bytes_remaining_ = 0;
current_chunk_ = {};
CordRep* tree = CordRep::Ref(current_leaf_);
subcord.contents_.EmplaceTree(VerifyTree(tree), method);
return subcord;
}
// Range to read begins with a proper subrange of the current chunk.
assert(!current_chunk_.empty());
assert(current_leaf_ != nullptr);
CordRep* subnode = CordRep::Ref(current_leaf_);
if (current_chunk_.size() < subnode->length) {
const char* data = subnode->IsExternal() ? subnode->external()->base
: subnode->flat()->Data();
subnode = NewSubstring(subnode, current_chunk_.data() - data,
current_chunk_.size());
}
n -= current_chunk_.size();
bytes_remaining_ -= current_chunk_.size();
// Process the next node(s) on the stack, reading whole subtrees depending on
// their length and how many bytes we are advancing.
CordRep* node = nullptr;
while (!stack_of_right_children.empty()) {
node = stack_of_right_children.back();
stack_of_right_children.pop_back();
if (node->length > n) break;
// TODO(qrczak): This might unnecessarily recreate existing concat nodes.
// Avoiding that would need pretty complicated logic (instead of
// current_leaf, keep current_subtree_ which points to the highest node
// such that the current leaf can be found on the path of left children
// starting from current_subtree_; delay creating subnode while node is
// below current_subtree_; find the proper node along the path of left
// children starting from current_subtree_ if this loop exits while staying
// below current_subtree_; etc.; alternatively, push parents instead of
// right children on the stack).
subnode = Concat(subnode, CordRep::Ref(node));
n -= node->length;
bytes_remaining_ -= node->length;
node = nullptr;
}
if (node == nullptr) {
// We have reached the end of the Cord.
assert(bytes_remaining_ == 0);
subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
return subcord;
}
// From this point on, we need a partial substring node.
// Get pointer to the underlying flat or external data payload and
// compute data pointer and offset into current flat or external.
CordRep* payload = current_leaf_->IsSubstring()
? current_leaf_->substring()->child
: current_leaf_;
const char* data = payload->IsExternal() ? payload->external()->base
: payload->flat()->Data();
const size_t offset = current_chunk_.data() - data;
// Get the child node if we encounter a SUBSTRING.
size_t offset = 0;
size_t length = node->length;
if (node->IsSubstring()) {
offset = node->substring()->start;
node = node->substring()->child;
}
CordRepSubstring* tree = new CordRepSubstring();
tree->tag = cord_internal::SUBSTRING;
tree->length = n;
tree->start = offset;
tree->child = CordRep::Ref(payload);
// Range to read ends with a proper (possibly empty) subrange of the current
// chunk.
assert(node->IsExternal() || node->IsFlat());
assert(length > n);
if (n > 0) {
subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n));
}
const char* data =
node->IsExternal() ? node->external()->base : node->flat()->Data();
current_chunk_ = absl::string_view(data + offset + n, length - n);
current_leaf_ = node;
subcord.contents_.EmplaceTree(VerifyTree(tree), method);
bytes_remaining_ -= n;
subcord.contents_.EmplaceTree(VerifyTree(subnode), method);
current_chunk_.remove_prefix(n);
return subcord;
}
void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) {
assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`");
assert(n >= current_chunk_.size()); // This should only be called when
// iterating to a new node.
n -= current_chunk_.size();
bytes_remaining_ -= current_chunk_.size();
if (stack_of_right_children_.empty()) {
// We have reached the end of the Cord.
assert(bytes_remaining_ == 0);
return;
}
// Process the next node(s) on the stack, skipping whole subtrees depending on
// their length and how many bytes we are advancing.
CordRep* node = nullptr;
auto& stack_of_right_children = stack_of_right_children_;
while (!stack_of_right_children.empty()) {
node = stack_of_right_children.back();
stack_of_right_children.pop_back();
if (node->length > n) break;
n -= node->length;
bytes_remaining_ -= node->length;
node = nullptr;
}
if (node == nullptr) {
// We have reached the end of the Cord.
assert(bytes_remaining_ == 0);
return;
}
// Get the child node if we encounter a SUBSTRING.
size_t offset = 0;
size_t length = node->length;
if (node->IsSubstring()) {
offset = node->substring()->start;
node = node->substring()->child;
}
assert(node->IsExternal() || node->IsFlat());
assert(length > n);
const char* data =
node->IsExternal() ? node->external()->base : node->flat()->Data();
current_chunk_ = absl::string_view(data + offset + n, length - n);
current_leaf_ = node;
bytes_remaining_ -= n;
}
char Cord::operator[](size_t i) const {
ABSL_HARDENING_ASSERT(i < size());
size_t offset = i;

@ -80,6 +80,7 @@
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord_analysis.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_reader.h"
@ -388,12 +389,6 @@ class Cord {
using CordRepBtree = absl::cord_internal::CordRepBtree;
using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader;
// Stack of right children of concat nodes that we have to visit.
// Keep this at the end of the structure to avoid cache-thrashing.
// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
// the inlined vector size (47 exists for backward compatibility).
using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>;
// Constructs a `begin()` iterator from `tree`. `tree` must not be null.
explicit ChunkIterator(cord_internal::CordRep* tree);
@ -409,17 +404,10 @@ class Cord {
Cord AdvanceAndReadBytes(size_t n);
void AdvanceBytes(size_t n);
// Stack specific operator++
ChunkIterator& AdvanceStack();
// Btree specific operator++
ChunkIterator& AdvanceBtree();
void AdvanceBytesBtree(size_t n);
// Iterates `n` bytes, where `n` is expected to be greater than or equal to
// `current_chunk_.size()`.
void AdvanceBytesSlowPath(size_t n);
// A view into bytes of the current `CordRep`. It may only be a view to a
// suffix of bytes if this is being used by `CharIterator`.
absl::string_view current_chunk_;
@ -432,9 +420,6 @@ class Cord {
// Cord reader for cord btrees. Empty if not traversing a btree.
CordRepBtreeReader btree_reader_;
// See 'Stack' alias definition.
Stack stack_of_right_children_;
};
// Cord::ChunkIterator::chunk_begin()
@ -725,7 +710,8 @@ class Cord {
// be used by spelling absl::strings_internal::MakeStringConstant, which is
// also an internal API.
template <typename T>
explicit constexpr Cord(strings_internal::StringConstant<T>);
// NOLINTNEXTLINE(google-explicit-constructor)
constexpr Cord(strings_internal::StringConstant<T>);
private:
using CordRep = absl::cord_internal::CordRep;
@ -1321,25 +1307,24 @@ inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) {
tree = cord_internal::SkipCrcNode(tree);
if (tree->tag == cord_internal::BTREE) {
current_chunk_ = btree_reader_.Init(tree->btree());
return;
} else {
current_leaf_ = tree;
current_chunk_ = cord_internal::EdgeData(tree);
}
stack_of_right_children_.push_back(tree);
operator++();
}
inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree)
: bytes_remaining_(tree->length) {
inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) {
bytes_remaining_ = tree->length;
InitTree(tree);
}
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord)
: bytes_remaining_(cord->size()) {
if (cord->contents_.is_tree()) {
InitTree(cord->contents_.as_tree());
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) {
if (CordRep* tree = cord->contents_.tree()) {
bytes_remaining_ = tree->length;
InitTree(tree);
} else {
current_chunk_ =
absl::string_view(cord->contents_.data(), bytes_remaining_);
bytes_remaining_ = cord->contents_.inline_size();
current_chunk_ = {cord->contents_.data(), bytes_remaining_};
}
}
@ -1369,8 +1354,11 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() {
assert(bytes_remaining_ >= current_chunk_.size());
bytes_remaining_ -= current_chunk_.size();
if (bytes_remaining_ > 0) {
return btree_reader_ ? AdvanceBtree() : AdvanceStack();
if (btree_reader_) {
return AdvanceBtree();
} else {
assert(!current_chunk_.empty()); // Called on invalid iterator.
}
current_chunk_ = {};
}
return *this;
@ -1411,7 +1399,11 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) {
if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) {
RemoveChunkPrefix(n);
} else if (n != 0) {
btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n);
if (btree_reader_) {
AdvanceBytesBtree(n);
} else {
bytes_remaining_ = 0;
}
}
}

@ -12,13 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/cord_analysis.h"
#include <cstddef>
#include <cstdint>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/cord_analysis.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
@ -98,16 +100,6 @@ struct RawUsage<Mode::kFairShare> {
}
};
// Returns true if the provided rep is a valid data edge.
bool IsDataEdge(const CordRep* rep) {
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path the SUBSTRING check to optimize for the hot path.
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
if (rep->tag == SUBSTRING) rep = rep->substring()->child;
return rep->tag == EXTERNAL || rep->tag >= FLAT;
}
// Computes the estimated memory size of the provided data edge.
// External reps are assumed 'heap allocated at their exact size'.
template <Mode mode>

@ -1866,6 +1866,78 @@ TEST_P(CordTest, CordChunkIteratorOperations) {
VerifyChunkIterator(subcords, 128);
}
TEST_P(CordTest, AdvanceAndReadOnDataEdge) {
RandomEngine rng(GTEST_FLAG_GET(random_seed));
const std::string data = RandomLowercaseString(&rng, 2000);
for (bool as_flat : {true, false}) {
SCOPED_TRACE(as_flat ? "Flat" : "External");
absl::Cord cord =
as_flat ? absl::Cord(data)
: absl::MakeCordFromExternal(data, [](absl::string_view) {});
auto it = cord.Chars().begin();
#if !defined(NDEBUG) || ABSL_OPTION_HARDENED
EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*");
#endif
it = cord.Chars().begin();
absl::Cord frag = cord.AdvanceAndRead(&it, 2000);
EXPECT_EQ(frag, data);
EXPECT_TRUE(it == cord.Chars().end());
it = cord.Chars().begin();
frag = cord.AdvanceAndRead(&it, 200);
EXPECT_EQ(frag, data.substr(0, 200));
EXPECT_FALSE(it == cord.Chars().end());
frag = cord.AdvanceAndRead(&it, 1500);
EXPECT_EQ(frag, data.substr(200, 1500));
EXPECT_FALSE(it == cord.Chars().end());
frag = cord.AdvanceAndRead(&it, 300);
EXPECT_EQ(frag, data.substr(1700, 300));
EXPECT_TRUE(it == cord.Chars().end());
}
}
TEST_P(CordTest, AdvanceAndReadOnSubstringDataEdge) {
RandomEngine rng(GTEST_FLAG_GET(random_seed));
const std::string data = RandomLowercaseString(&rng, 2500);
for (bool as_flat : {true, false}) {
SCOPED_TRACE(as_flat ? "Flat" : "External");
absl::Cord cord =
as_flat ? absl::Cord(data)
: absl::MakeCordFromExternal(data, [](absl::string_view) {});
cord = cord.Subcord(200, 2000);
const std::string substr = data.substr(200, 2000);
auto it = cord.Chars().begin();
#if !defined(NDEBUG) || ABSL_OPTION_HARDENED
EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*");
#endif
it = cord.Chars().begin();
absl::Cord frag = cord.AdvanceAndRead(&it, 2000);
EXPECT_EQ(frag, substr);
EXPECT_TRUE(it == cord.Chars().end());
it = cord.Chars().begin();
frag = cord.AdvanceAndRead(&it, 200);
EXPECT_EQ(frag, substr.substr(0, 200));
EXPECT_FALSE(it == cord.Chars().end());
frag = cord.AdvanceAndRead(&it, 1500);
EXPECT_EQ(frag, substr.substr(200, 1500));
EXPECT_FALSE(it == cord.Chars().end());
frag = cord.AdvanceAndRead(&it, 300);
EXPECT_EQ(frag, substr.substr(1700, 300));
EXPECT_TRUE(it == cord.Chars().end());
}
}
TEST_P(CordTest, CharIteratorTraits) {
static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value,
"");

@ -0,0 +1,63 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
#define ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
#include <cassert>
#include <cstddef>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep. Requires `rep != nullptr`.
inline bool IsDataEdge(const CordRep* edge) {
assert(edge != nullptr);
// The fast path is that `edge` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path of the SUBSTRING check to optimize for the hot path.
if (edge->tag == EXTERNAL || edge->tag >= FLAT) return true;
if (edge->tag == SUBSTRING) edge = edge->substring()->child;
return edge->tag == EXTERNAL || edge->tag >= FLAT;
}
// Returns the `absl::string_view` data reference for the provided data edge.
// Requires 'IsDataEdge(edge) == true`.
inline absl::string_view EdgeData(const CordRep* edge) {
assert(IsDataEdge(edge));
size_t offset = 0;
const size_t length = edge->length;
if (edge->IsSubstring()) {
offset = edge->substring()->start;
edge = edge->substring()->child;
}
return edge->tag >= FLAT
? absl::string_view{edge->flat()->Data() + offset, length}
: absl::string_view{edge->external()->base + offset, length};
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_

@ -0,0 +1,130 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_data_edge.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::absl::cordrep_testing::MakeExternal;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::MakeSubstring;
TEST(CordDataEdgeTest, IsDataEdgeOnFlat) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
EXPECT_TRUE(IsDataEdge(rep));
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, IsDataEdgeOnExternal) {
CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
EXPECT_TRUE(IsDataEdge(rep));
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfFlat) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_TRUE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfExternal) {
CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_TRUE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, IsDataEdgeOnBtree) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRepBtree* tree = CordRepBtree::New(rep);
EXPECT_FALSE(IsDataEdge(tree));
CordRep::Unref(tree);
}
TEST(CordDataEdgeTest, IsDataEdgeOnBadSubstr) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
EXPECT_FALSE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, EdgeDataOnFlat) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeFlat(value);
EXPECT_EQ(EdgeData(rep), value);
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, EdgeDataOnExternal) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeExternal(value);
EXPECT_EQ(EdgeData(rep), value);
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, EdgeDataOnSubstringOfFlat) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeFlat(value);
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, EdgeDataOnSubstringOfExternal) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeExternal(value);
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
CordRep::Unref(substr);
}
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
TEST(CordDataEdgeTest, IsDataEdgeOnNullPtr) {
EXPECT_DEATH(IsDataEdge(nullptr), ".*");
}
TEST(CordDataEdgeTest, EdgeDataOnNullPtr) {
EXPECT_DEATH(EdgeData(nullptr), ".*");
}
TEST(CordDataEdgeTest, EdgeDataOnBtree) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRepBtree* tree = CordRepBtree::New(rep);
EXPECT_DEATH(EdgeData(tree), ".*");
CordRep::Unref(tree);
}
TEST(CordDataEdgeTest, EdgeDataOnBadSubstr) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
EXPECT_DEATH(EdgeData(substr), ".*");
CordRep::Unref(substr);
}
#endif // GTEST_HAS_DEATH_TEST && !NDEBUG
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

@ -22,6 +22,7 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_consume.h"
#include "absl/strings/internal/cord_rep_flat.h"
@ -69,7 +70,7 @@ void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream,
// indentation and prefix / labels keeps us within roughly 80-100 wide.
constexpr size_t kMaxDataLength = 60;
stream << ", data = \""
<< CordRepBtree::EdgeData(r).substr(0, kMaxDataLength)
<< EdgeData(r).substr(0, kMaxDataLength)
<< (r->length > kMaxDataLength ? "\"..." : "\"");
}
stream << '\n';
@ -150,7 +151,7 @@ inline CordRep* MakeSubstring(CordRep* rep, size_t offset) {
CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) {
assert(length > 0);
assert(length <= edge->length);
assert(CordRepBtree::IsDataEdge(edge));
assert(IsDataEdge(edge));
if (length >= edge->length) return edge;
if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) {
@ -207,7 +208,7 @@ void DeleteSubstring(CordRepSubstring* substring) {
// Deletes a leaf node data edge. Requires `IsDataEdge(rep)`.
void DeleteLeafEdge(CordRep* rep) {
assert(CordRepBtree::IsDataEdge(rep));
assert(IsDataEdge(rep));
if (rep->tag >= FLAT) {
CordRepFlat::Delete(rep->flat());
} else if (rep->tag == EXTERNAL) {

@ -22,6 +22,7 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
@ -310,13 +311,6 @@ class CordRepBtree : public CordRep {
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
static const char* EdgeDataPtr(const CordRep* r);
static absl::string_view EdgeData(const CordRep* r);
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep.
static bool IsDataEdge(const CordRep* rep);
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
@ -631,34 +625,11 @@ inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) {
assert(IsDataEdge(r));
size_t offset = 0;
if (r->tag == SUBSTRING) {
offset = r->substring()->start;
r = r->substring()->child;
}
return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset;
}
inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) {
return absl::string_view(EdgeDataPtr(r), r->length);
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline bool CordRepBtree::IsDataEdge(const CordRep* rep) {
// The fast path is that `rep` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path the SUBSTRING check to optimize for the hot path.
if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true;
if (rep->tag == SUBSTRING) rep = rep->substring()->child;
return rep->tag == EXTERNAL || rep->tag >= FLAT;
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;

@ -16,6 +16,7 @@
#include <cassert>
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
@ -39,7 +40,7 @@ inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
assert(n <= rep->length);
assert(offset < rep->length);
assert(offset <= rep->length - n);
assert(CordRepBtree::IsDataEdge(rep));
assert(IsDataEdge(rep));
if (n == 0) return nullptr;
if (n == rep->length) return CordRep::Ref(rep);

@ -17,6 +17,7 @@
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
@ -44,7 +45,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
// can directly return the substring into the current data edge as the next
// chunk. We can easily establish from the above code that `navigator_.Next()`
// has not been called as that requires `chunk_size` to be zero.
if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n);
if (n < chunk_size) return EdgeData(edge).substr(result.n);
// The amount of data taken from the last edge is `chunk_size` and `result.n`
// contains the offset into the current edge trailing the read data (which can
@ -60,7 +61,7 @@ absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
// We did not read all data, return remaining data from current edge.
edge = navigator_.Current();
remaining_ -= consumed_by_read + edge->length;
return CordRepBtree::EdgeData(edge).substr(result.n);
return EdgeData(edge).substr(result.n);
}
} // namespace cord_internal

@ -18,6 +18,7 @@
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
@ -167,7 +168,7 @@ inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
assert(tree != nullptr);
const CordRep* edge = navigator_.InitFirst(tree);
remaining_ = tree->length - edge->length;
return CordRepBtree::EdgeData(edge);
return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Next() {
@ -175,7 +176,7 @@ inline absl::string_view CordRepBtreeReader::Next() {
const CordRep* edge = navigator_.Next();
assert(edge != nullptr);
remaining_ -= edge->length;
return CordRepBtree::EdgeData(edge);
return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
@ -190,7 +191,7 @@ inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
// The combined length of all edges skipped before `pos.edge` is `skip -
// pos.offset`, all of which are 'consumed', as well as the current edge.
remaining_ -= skip - pos.offset + pos.edge->length;
return CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
return EdgeData(pos.edge).substr(pos.offset);
}
inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
@ -199,7 +200,7 @@ inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
remaining_ = 0;
return {};
}
absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset);
absl::string_view chunk = EdgeData(pos.edge).substr(pos.offset);
remaining_ = length() - offset - chunk.length();
return chunk;
}

@ -25,6 +25,7 @@
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/cleanup/cleanup.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
@ -323,30 +324,26 @@ TEST(CordRepBtreeTest, EdgeData) {
CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external));
CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1));
EXPECT_TRUE(CordRepBtree::IsDataEdge(flat));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat),
TypedEq<const void*>(flat->Data()));
EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world"));
EXPECT_TRUE(IsDataEdge(flat));
EXPECT_THAT(EdgeData(flat).data(), TypedEq<const void*>(flat->Data()));
EXPECT_THAT(EdgeData(flat), Eq("Hello world"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(external));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(external),
TypedEq<const void*>(external->base));
EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external"));
EXPECT_TRUE(IsDataEdge(external));
EXPECT_THAT(EdgeData(external).data(), TypedEq<const void*>(external->base));
EXPECT_THAT(EdgeData(external), Eq("Hello external"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1),
TypedEq<const void*>(flat->Data() + 1));
EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w"));
EXPECT_TRUE(IsDataEdge(substr1));
EXPECT_THAT(EdgeData(substr1).data(), TypedEq<const void*>(flat->Data() + 1));
EXPECT_THAT(EdgeData(substr1), Eq("ello w"));
EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2));
EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2),
EXPECT_TRUE(IsDataEdge(substr2));
EXPECT_THAT(EdgeData(substr2).data(),
TypedEq<const void*>(external->base + 1));
EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e"));
EXPECT_THAT(EdgeData(substr2), Eq("ello e"));
EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr));
EXPECT_FALSE(IsDataEdge(bad_substr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*");
EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*");
EXPECT_DEATH(EdgeData(bad_substr), ".*");
#endif
CordRep::Unref(bad_substr);

@ -16,6 +16,6 @@
# Test scripts should source this file to get the identifiers.
readonly LINUX_ALPINE_CONTAINER="gcr.io/google.com/absl-177019/alpine:20201026"
readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220113"
readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220113"
readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220217"
readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220217"
readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20210617"

Loading…
Cancel
Save