Support empty Cords with an expected checksum

PiperOrigin-RevId: 484578104
Change-Id: Ie4be3e4de27dc28d88395e16fd075fb10ab7a302
pull/1305/head
Derek Mauro 2 years ago committed by Copybara-Service
parent fd9fbe7451
commit 1db72eb03e
  1. 2
      absl/copts/GENERATED_AbseilCopts.cmake
  2. 2
      absl/copts/GENERATED_copts.bzl
  3. 1
      absl/copts/copts.py
  4. 47
      absl/strings/cord.cc
  5. 29
      absl/strings/cord.h
  6. 148
      absl/strings/cord_test.cc
  7. 9
      absl/strings/internal/cord_rep_crc.cc
  8. 2
      absl/strings/internal/cord_rep_crc.h
  9. 15
      absl/strings/internal/cord_rep_crc_test.cc

@ -25,6 +25,7 @@ list(APPEND ABSL_CLANG_CL_TEST_FLAGS
"-Wno-shorten-64-to-32"
"-Wno-sign-compare"
"-Wno-sign-conversion"
"-Wno-unreachable-code-loop-increment"
"-Wno-unused-function"
"-Wno-unused-member-function"
"-Wno-unused-parameter"
@ -112,6 +113,7 @@ list(APPEND ABSL_LLVM_TEST_FLAGS
"-Wno-shorten-64-to-32"
"-Wno-sign-compare"
"-Wno-sign-conversion"
"-Wno-unreachable-code-loop-increment"
"-Wno-unused-function"
"-Wno-unused-member-function"
"-Wno-unused-parameter"

@ -26,6 +26,7 @@ ABSL_CLANG_CL_TEST_FLAGS = [
"-Wno-shorten-64-to-32",
"-Wno-sign-compare",
"-Wno-sign-conversion",
"-Wno-unreachable-code-loop-increment",
"-Wno-unused-function",
"-Wno-unused-member-function",
"-Wno-unused-parameter",
@ -113,6 +114,7 @@ ABSL_LLVM_TEST_FLAGS = [
"-Wno-shorten-64-to-32",
"-Wno-sign-compare",
"-Wno-sign-conversion",
"-Wno-unreachable-code-loop-increment",
"-Wno-unused-function",
"-Wno-unused-member-function",
"-Wno-unused-parameter",

@ -29,6 +29,7 @@ LLVM_TEST_DISABLE_WARNINGS_FLAGS = [
"-Wno-shorten-64-to-32",
"-Wno-sign-compare",
"-Wno-sign-conversion",
"-Wno-unreachable-code-loop-increment",
"-Wno-unused-function",
"-Wno-unused-member-function",
"-Wno-unused-parameter",

@ -420,6 +420,7 @@ Cord& Cord::operator=(absl::string_view src) {
// we keep it here to make diffs easier.
void Cord::InlineRep::AppendArray(absl::string_view src,
MethodIdentifier method) {
MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
size_t appended = 0;
@ -479,6 +480,10 @@ inline CordRep* Cord::TakeRep() && {
template <typename C>
inline void Cord::AppendImpl(C&& src) {
auto constexpr method = CordzUpdateTracker::kAppendCord;
contents_.MaybeRemoveEmptyCrcNode();
if (src.empty()) return;
if (empty()) {
// Since destination is empty, we can avoid allocating a node,
if (src.contents_.is_tree()) {
@ -591,6 +596,9 @@ void Cord::Append(T&& src) {
template void Cord::Append(std::string&& src);
void Cord::Prepend(const Cord& src) {
contents_.MaybeRemoveEmptyCrcNode();
if (src.empty()) return;
CordRep* src_tree = src.contents_.tree();
if (src_tree != nullptr) {
CordRep::Ref(src_tree);
@ -605,7 +613,9 @@ void Cord::Prepend(const Cord& src) {
}
void Cord::PrependArray(absl::string_view src, MethodIdentifier method) {
contents_.MaybeRemoveEmptyCrcNode();
if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined.
if (!contents_.is_tree()) {
size_t cur_size = contents_.inline_size();
if (cur_size + src.size() <= InlineRep::kMaxInline) {
@ -665,6 +675,7 @@ void Cord::RemovePrefix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested prefix size ", n,
" exceeds Cord's size ", size()));
contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.remove_prefix(n);
@ -695,6 +706,7 @@ void Cord::RemoveSuffix(size_t n) {
ABSL_INTERNAL_CHECK(n <= size(),
absl::StrCat("Requested suffix size ", n,
" exceeds Cord's size ", size()));
contents_.MaybeRemoveEmptyCrcNode();
CordRep* tree = contents_.tree();
if (tree == nullptr) {
contents_.reduce_size(n);
@ -844,9 +856,10 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const {
void Cord::SetExpectedChecksum(uint32_t crc) {
auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum;
if (empty()) return;
if (!contents_.is_tree()) {
if (empty()) {
CordRep* rep = CordRepCrc::New(nullptr, crc);
contents_.EmplaceTree(rep, method);
} else if (!contents_.is_tree()) {
CordRep* rep = contents_.MakeFlatWithExtraCapacity(0);
rep = CordRepCrc::New(rep, crc);
contents_.EmplaceTree(rep, method);
@ -929,6 +942,7 @@ inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size,
}
inline absl::string_view Cord::GetFirstChunk(const Cord& c) {
if (c.empty()) return {};
return c.contents_.FindFlatStartPiece();
}
inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) {
@ -1166,6 +1180,10 @@ absl::string_view Cord::FlattenSlowPath() {
/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) {
assert(rep != nullptr);
if (rep->length == 0) {
*fragment = absl::string_view();
return true;
}
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsFlat()) {
*fragment = absl::string_view(rep->flat()->Data(), rep->length);
@ -1197,6 +1215,7 @@ absl::string_view Cord::FlattenSlowPath() {
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback) {
assert(rep != nullptr);
if (rep->length == 0) return;
rep = cord_internal::SkipCrcNode(rep);
if (rep->IsBtree()) {
@ -1230,7 +1249,11 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
if (include_data) *os << static_cast<void*>(rep);
*os << "]";
*os << " " << std::setw(indent) << "";
if (rep->IsCrc()) {
bool leaf = false;
if (rep == nullptr) {
*os << "NULL\n";
leaf = true;
} else if (rep->IsCrc()) {
*os << "CRC crc=" << rep->crc()->crc << "\n";
indent += kIndentStep;
rep = rep->crc()->child;
@ -1239,6 +1262,7 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
indent += kIndentStep;
rep = rep->substring()->child;
} else { // Leaf or ring
leaf = true;
if (rep->IsExternal()) {
*os << "EXTERNAL [";
if (include_data)
@ -1252,6 +1276,8 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os,
} else {
CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os);
}
}
if (leaf) {
if (stack.empty()) break;
rep = stack.back();
stack.pop_back();
@ -1297,11 +1323,14 @@ static bool VerifyNode(CordRep* root, CordRep* start_node,
node->substring()->child->length,
ReportError(root, node));
} else if (node->IsCrc()) {
ABSL_INTERNAL_CHECK(node->crc()->child != nullptr,
ReportError(root, node));
ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
ReportError(root, node));
worklist.push_back(node->crc()->child);
ABSL_INTERNAL_CHECK(
node->crc()->child != nullptr || node->crc()->length == 0,
ReportError(root, node));
if (node->crc()->child != nullptr) {
ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length,
ReportError(root, node));
worklist.push_back(node->crc()->child);
}
}
} while (!worklist.empty());
return true;

@ -926,6 +926,13 @@ class Cord {
void set_inline_size(size_t size) { data_.set_inline_size(size); }
size_t inline_size() const { return data_.inline_size(); }
// Empty cords that carry a checksum have a CordRepCrc node with a null
// child node. The code can avoid lots of special cases where it would
// otherwise transition from tree to inline storage if we just remove the
// CordRepCrc node before mutations. Must never be called inside a
// CordzUpdateScope since it untracks the cordz info.
void MaybeRemoveEmptyCrcNode();
cord_internal::InlineData data_;
};
InlineRep contents_;
@ -1236,6 +1243,18 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const {
cord_internal::SmallMemmove(dst, data_.as_chars(), n);
}
inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() {
CordRep* rep = tree();
if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) {
return;
}
assert(rep->IsCrc());
assert(rep->crc()->child == nullptr);
CordzInfo::MaybeUntrackCord(cordz_info());
CordRep::Unref(rep);
ResetToEmpty();
}
constexpr inline Cord::Cord() noexcept {}
inline Cord::Cord(absl::string_view src)
@ -1285,7 +1304,7 @@ inline size_t Cord::size() const {
return contents_.size();
}
inline bool Cord::empty() const { return contents_.empty(); }
inline bool Cord::empty() const { return size() == 0; }
inline size_t Cord::EstimatedMemoryUsage(
CordMemoryAccounting accounting_method) const {
@ -1411,7 +1430,11 @@ inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) {
inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) {
if (CordRep* tree = cord->contents_.tree()) {
bytes_remaining_ = tree->length;
InitTree(tree);
if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) {
InitTree(tree);
} else {
current_chunk_ = {};
}
} else {
bytes_remaining_ = cord->contents_.inline_size();
current_chunk_ = {cord->contents_.data(), bytes_remaining_};
@ -1580,7 +1603,7 @@ inline void Cord::ForEachChunk(
if (rep == nullptr) {
callback(absl::string_view(contents_.data(), contents_.size()));
} else {
return ForEachChunkAux(rep, callback);
ForEachChunkAux(rep, callback);
}
}

@ -1988,6 +1988,12 @@ TEST_P(CordTest, HugeCord) {
// Tests that Append() works ok when handed a self reference
TEST_P(CordTest, AppendSelf) {
// Test the empty case.
absl::Cord empty;
MaybeHarden(empty);
empty.Append(empty);
ASSERT_EQ(empty, "");
// We run the test until data is ~16K
// This guarantees it covers small, medium and large data.
std::string control_data = "Abc";
@ -2712,7 +2718,7 @@ class CordMutator {
// clang-format off
// This array is constant-initialized in conformant compilers.
CordMutator cord_mutators[] ={
CordMutator cord_mutators[] = {
{"clear", [](absl::Cord& c) { c.Clear(); }},
{"overwrite", [](absl::Cord& c) { c = "overwritten"; }},
{
@ -2741,6 +2747,25 @@ CordMutator cord_mutators[] ={
[](absl::Cord& c) { c.Append(c); },
[](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }
},
{
"append empty string",
[](absl::Cord& c) { c.Append(""); },
[](absl::Cord& c) { }
},
{
"append empty cord",
[](absl::Cord& c) { c.Append(absl::Cord()); },
[](absl::Cord& c) { }
},
{
"append empty checksummed cord",
[](absl::Cord& c) {
absl::Cord to_append;
to_append.SetExpectedChecksum(999);
c.Append(to_append);
},
[](absl::Cord& c) { }
},
{
"prepend string",
[](absl::Cord& c) { c.Prepend("9876543210"); },
@ -2762,13 +2787,34 @@ CordMutator cord_mutators[] ={
},
[](absl::Cord& c) { c.RemovePrefix(10); }
},
{
"prepend empty string",
[](absl::Cord& c) { c.Prepend(""); },
[](absl::Cord& c) { }
},
{
"prepend empty cord",
[](absl::Cord& c) { c.Prepend(absl::Cord()); },
[](absl::Cord& c) { }
},
{
"prepend empty checksummed cord",
[](absl::Cord& c) {
absl::Cord to_prepend;
to_prepend.SetExpectedChecksum(999);
c.Prepend(to_prepend);
},
[](absl::Cord& c) { }
},
{
"prepend self",
[](absl::Cord& c) { c.Prepend(c); },
[](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }
},
{"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }},
{"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }},
{"remove prefix", [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); }},
{"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); }},
{"remove 0-prefix", [](absl::Cord& c) { c.RemovePrefix(0); }},
{"remove 0-suffix", [](absl::Cord& c) { c.RemoveSuffix(0); }},
{"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }},
{
"swap inline",
@ -2834,6 +2880,13 @@ TEST_P(CordTest, ExpectedChecksum) {
c2.SetExpectedChecksum(24680);
mutator.Mutate(c2);
if (c1 == c2) {
// Not a mutation (for example, appending the empty string).
// Whether the checksum is removed is not defined.
continue;
}
EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
if (mutator.CanUndo()) {
@ -2903,3 +2956,92 @@ TEST_P(CordTest, ExpectedChecksum) {
}
}
}
// Test the special cases encountered with an empty checksummed cord.
TEST_P(CordTest, ChecksummedEmptyCord) {
absl::Cord c1;
EXPECT_FALSE(c1.ExpectedChecksum().has_value());
// Setting an expected checksum works.
c1.SetExpectedChecksum(12345);
EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
EXPECT_EQ(c1, "");
EXPECT_TRUE(c1.empty());
// CRC persists through copies, assignments, and moves:
absl::Cord c1_copy_construct = c1;
EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345);
absl::Cord c1_copy_assign;
c1_copy_assign = c1;
EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345);
absl::Cord c1_move(std::move(c1_copy_assign));
EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345);
EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345);
// A CRC Cord compares equal to its non-CRC value.
EXPECT_EQ(c1, absl::Cord());
for (const CordMutator& mutator : cord_mutators) {
SCOPED_TRACE(mutator.Name());
// Exercise mutating an empty checksummed cord to catch crashes and exercise
// memory sanitizers.
absl::Cord c2;
c2.SetExpectedChecksum(24680);
mutator.Mutate(c2);
if (c2.empty()) {
// Not a mutation
continue;
}
EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt);
if (mutator.CanUndo()) {
mutator.Undo(c2);
}
}
absl::Cord c3;
c3.SetExpectedChecksum(999);
const absl::Cord& cc3 = c3;
// Test that all cord reading operations function in the face of an
// expected checksum.
EXPECT_TRUE(cc3.StartsWith(""));
EXPECT_TRUE(cc3.EndsWith(""));
EXPECT_TRUE(cc3.empty());
EXPECT_EQ(cc3, "");
EXPECT_EQ(cc3, absl::Cord());
EXPECT_EQ(cc3.size(), 0);
EXPECT_EQ(cc3.Compare(absl::Cord()), 0);
EXPECT_EQ(cc3.Compare(c1), 0);
EXPECT_EQ(cc3.Compare(cc3), 0);
EXPECT_EQ(cc3.Compare(""), 0);
EXPECT_EQ(cc3.Compare("wxyz"), -1);
EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1);
EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1);
EXPECT_EQ(std::string(cc3), "");
std::string dest;
absl::CopyCordToString(cc3, &dest);
EXPECT_EQ(dest, "");
for (absl::string_view chunk : cc3.Chunks()) { // NOLINT(unreachable loop)
static_cast<void>(chunk);
GTEST_FAIL() << "no chunks expected";
}
EXPECT_TRUE(cc3.chunk_begin() == cc3.chunk_end());
for (char ch : cc3.Chars()) { // NOLINT(unreachable loop)
static_cast<void>(ch);
GTEST_FAIL() << "no chars expected";
}
EXPECT_TRUE(cc3.char_begin() == cc3.char_end());
EXPECT_EQ(cc3.TryFlat(), "");
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::Cord()));
EXPECT_EQ(absl::HashOf(c3), absl::HashOf(absl::string_view()));
}

@ -25,8 +25,7 @@ ABSL_NAMESPACE_BEGIN
namespace cord_internal {
CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
assert(child != nullptr);
if (child->IsCrc()) {
if (child != nullptr && child->IsCrc()) {
if (child->refcount.IsOne()) {
child->crc()->crc = crc;
return child->crc();
@ -37,7 +36,7 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
CordRep::Unref(old);
}
auto* new_cordrep = new CordRepCrc;
new_cordrep->length = child->length;
new_cordrep->length = child != nullptr ? child->length : 0;
new_cordrep->tag = cord_internal::CRC;
new_cordrep->child = child;
new_cordrep->crc = crc;
@ -45,7 +44,9 @@ CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) {
}
void CordRepCrc::Destroy(CordRepCrc* node) {
CordRep::Unref(node->child);
if (node->child != nullptr) {
CordRep::Unref(node->child);
}
delete node;
}

@ -40,7 +40,7 @@ struct CordRepCrc : public CordRep {
// If the specified `child` is itself a CordRepCrc node, then this method
// either replaces the existing node, or directly updates the crc value in it
// depending on the node being shared or not, i.e.: refcount.IsOne().
// `child` must not be null. Never returns null.
// `child` must only be null if the Cord is empty. Never returns null.
static CordRepCrc* New(CordRep* child, uint32_t crc);
// Destroys (deletes) the provided node. `node` must not be null.

@ -27,14 +27,11 @@ namespace {
using ::absl::cordrep_testing::MakeFlat;
using ::testing::Eq;
using ::testing::IsNull;
using ::testing::Ne;
#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
TEST(CordRepCrc, NewWithNullPtr) {
EXPECT_DEATH(CordRepCrc::New(nullptr, 0), "");
}
TEST(CordRepCrc, RemoveCrcWithNullptr) {
EXPECT_DEATH(RemoveCrcNode(nullptr), "");
}
@ -82,6 +79,16 @@ TEST(CordRepCrc, NewExistingCrcShared) {
CordRep::Unref(new_crc);
}
TEST(CordRepCrc, NewEmpty) {
CordRepCrc* crc = CordRepCrc::New(nullptr, 12345);
EXPECT_TRUE(crc->refcount.IsOne());
EXPECT_THAT(crc->child, IsNull());
EXPECT_THAT(crc->length, Eq(0u));
EXPECT_THAT(crc->crc, Eq(12345u));
EXPECT_TRUE(crc->refcount.IsOne());
CordRepCrc::Destroy(crc);
}
TEST(CordRepCrc, RemoveCrcNotCrc) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRep* nocrc = RemoveCrcNode(rep);

Loading…
Cancel
Save