From bbf2ed7890615a587f0209c6d7e9b2c08781ee09 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 17 Oct 2022 12:56:53 -0700 Subject: [PATCH] Implement btree_iterator::operator-, which is faster than std::distance for btree iterators. Note: btree_iterator::operator- is still O(N) because in the worst case (end()-begin()), we will have at least one operation per node in the tree, and there are at least N/M nodes, where M (a constant) is the maximum number of values per node. PiperOrigin-RevId: 481716874 Change-Id: Ic0225b7509208ed96b75a2dc626d2aa4a24f4946 --- absl/container/BUILD.bazel | 4 ++ absl/container/CMakeLists.txt | 2 + absl/container/btree_benchmark.cc | 25 +++++++++++ absl/container/btree_map.h | 3 ++ absl/container/btree_set.h | 3 ++ absl/container/btree_test.cc | 20 +++++++++ absl/container/internal/btree.h | 73 +++++++++++++++++++++++++++++++ 7 files changed, 130 insertions(+) diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index d8dad8e8..70febdda 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -984,11 +984,13 @@ cc_test( ":btree_test_common", ":counting_allocator", ":test_instance_tracker", + "//absl/algorithm:container", "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/flags:flag", "//absl/hash:hash_testing", "//absl/memory", + "//absl/random", "//absl/strings", "//absl/types:compare", "@com_google_googletest//:gtest_main", @@ -1011,10 +1013,12 @@ cc_binary( ":flat_hash_map", ":flat_hash_set", ":hashtable_debug", + "//absl/algorithm:container", "//absl/base:raw_logging_internal", "//absl/hash", "//absl/log", "//absl/memory", + "//absl/random", "//absl/strings:cord", "//absl/strings:str_format", "//absl/time", diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index 25216466..b3776aed 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -72,6 +72,7 @@ absl_cc_test( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS + absl::algorithm_container absl::btree absl::btree_test_common absl::compare @@ -79,6 +80,7 @@ absl_cc_test( absl::counting_allocator absl::flags absl::hash_testing + absl::random_random absl::raw_logging_internal absl::strings absl::test_instance_tracker diff --git a/absl/container/btree_benchmark.cc b/absl/container/btree_benchmark.cc index cc9a106d..0d26fd42 100644 --- a/absl/container/btree_benchmark.cc +++ b/absl/container/btree_benchmark.cc @@ -27,6 +27,7 @@ #include #include "benchmark/benchmark.h" +#include "absl/algorithm/container.h" #include "absl/base/internal/raw_logging.h" #include "absl/container/btree_map.h" #include "absl/container/btree_set.h" @@ -37,6 +38,7 @@ #include "absl/hash/hash.h" #include "absl/log/log.h" #include "absl/memory/memory.h" +#include "absl/random/random.h" #include "absl/strings/cord.h" #include "absl/strings/str_format.h" #include "absl/time/time.h" @@ -733,6 +735,29 @@ double ContainerInfo(const btree_map>& b) { BIG_TYPE_PTR_BENCHMARKS(32); +void BM_BtreeSet_IteratorSubtraction(benchmark::State& state) { + absl::InsecureBitGen bitgen; + std::vector vec; + // Randomize the set's insertion order so the nodes aren't all full. + vec.reserve(state.range(0)); + for (int i = 0; i < state.range(0); ++i) vec.push_back(i); + absl::c_shuffle(vec, bitgen); + + absl::btree_set set; + for (int i : vec) set.insert(i); + + size_t distance = absl::Uniform(bitgen, 0u, set.size()); + while (state.KeepRunningBatch(distance)) { + size_t end = absl::Uniform(bitgen, distance, set.size()); + size_t begin = end - distance; + benchmark::DoNotOptimize(set.find(static_cast(end)) - + set.find(static_cast(begin))); + distance = absl::Uniform(bitgen, 0u, set.size()); + } +} + +BENCHMARK(BM_BtreeSet_IteratorSubtraction)->Range(1 << 10, 1 << 20); + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/container/btree_map.h b/absl/container/btree_map.h index 286817f1..479db8b7 100644 --- a/absl/container/btree_map.h +++ b/absl/container/btree_map.h @@ -46,6 +46,9 @@ // reason, `insert()` and `erase()` return a valid iterator at the current // position. Another important difference is that key-types must be // copy-constructible. +// +// Another API difference is that btree iterators can be subtracted, and this +// is faster than using std::distance. #ifndef ABSL_CONTAINER_BTREE_MAP_H_ #define ABSL_CONTAINER_BTREE_MAP_H_ diff --git a/absl/container/btree_set.h b/absl/container/btree_set.h index e823a2a0..bbff65d1 100644 --- a/absl/container/btree_set.h +++ b/absl/container/btree_set.h @@ -45,6 +45,9 @@ // more than one iterator, pointer, or reference simultaneously. For this // reason, `insert()` and `erase()` return a valid iterator at the current // position. +// +// Another API difference is that btree iterators can be subtracted, and this +// is faster than using std::distance. #ifndef ABSL_CONTAINER_BTREE_SET_H_ #define ABSL_CONTAINER_BTREE_SET_H_ diff --git a/absl/container/btree_test.cc b/absl/container/btree_test.cc index 9386a6b1..6a5351fe 100644 --- a/absl/container/btree_test.cc +++ b/absl/container/btree_test.cc @@ -31,6 +31,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/algorithm/container.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/btree_map.h" @@ -40,6 +41,7 @@ #include "absl/flags/flag.h" #include "absl/hash/hash_testing.h" #include "absl/memory/memory.h" +#include "absl/random/random.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" @@ -3320,6 +3322,24 @@ TEST(Btree, ReusePoisonMemory) { set.insert(0); } +TEST(Btree, IteratorSubtraction) { + absl::BitGen bitgen; + std::vector vec; + // Randomize the set's insertion order so the nodes aren't all full. + for (int i = 0; i < 1000000; ++i) vec.push_back(i); + absl::c_shuffle(vec, bitgen); + + absl::btree_set set; + for (int i : vec) set.insert(i); + + for (int i = 0; i < 1000; ++i) { + size_t begin = absl::Uniform(bitgen, 0u, set.size()); + size_t end = absl::Uniform(bitgen, begin, set.size()); + ASSERT_EQ(end - begin, set.find(end) - set.find(begin)) + << begin << " " << end; + } +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/container/internal/btree.h b/absl/container/internal/btree.h index ecf31bea..9c509073 100644 --- a/absl/container/internal/btree.h +++ b/absl/container/internal/btree.h @@ -1085,6 +1085,16 @@ class btree_iterator { return node_ != other.node_ || position_ != other.position_; } + // Returns n such that n calls to ++other yields *this. + // Precondition: n exists. + difference_type operator-(const_iterator other) const { + if (node_ == other.node_) { + if (node_->is_leaf()) return position_ - other.position_; + if (position_ == other.position_) return 0; + } + return distance_slow(other); + } + // Accessors for the key/value the iterator is pointing at. reference operator*() const { ABSL_HARDENING_ASSERT(node_ != nullptr); @@ -1148,6 +1158,11 @@ class btree_iterator { #endif } + // Returns n such that n calls to ++other yields *this. + // Precondition: n exists && (this->node_ != other.node_ || + // !this->node_->is_leaf() || this->position_ != other.position_). + difference_type distance_slow(const_iterator other) const; + // Increment/decrement the iterator. void increment() { assert_valid_generation(); @@ -1975,6 +1990,64 @@ void btree_node

::clear_and_delete(btree_node *node, allocator_type *alloc) { //// // btree_iterator methods + +// Note: the implementation here is based on btree_node::clear_and_delete. +template +auto btree_iterator::distance_slow(const_iterator other) const + -> difference_type { + const_iterator begin = other; + const_iterator end = *this; + assert(begin.node_ != end.node_ || !begin.node_->is_leaf() || + begin.position_ != end.position_); + + const node_type *node = begin.node_; + // We need to compensate for double counting if begin.node_ is a leaf node. + difference_type count = node->is_leaf() ? -begin.position_ : 0; + + // First navigate to the leftmost leaf node past begin. + if (node->is_internal()) { + ++count; + node = node->child(begin.position_ + 1); + } + while (node->is_internal()) node = node->start_child(); + + // Use `size_type` because `pos` needs to be able to hold `kNodeSlots+1`, + // which isn't guaranteed to be a valid `field_type`. + size_type pos = node->position(); + const node_type *parent = node->parent(); + for (;;) { + // In each iteration of the next loop, we count one leaf node and go right. + assert(pos <= parent->finish()); + do { + node = parent->child(static_cast(pos)); + if (node->is_internal()) { + // Navigate to the leftmost leaf under node. + while (node->is_internal()) node = node->start_child(); + pos = node->position(); + parent = node->parent(); + } + if (node == end.node_) return count + end.position_; + if (parent == end.node_ && pos == static_cast(end.position_)) + return count + node->count(); + // +1 is for the next internal node value. + count += node->count() + 1; + ++pos; + } while (pos <= parent->finish()); + + // Once we've counted all children of parent, go up/right. + assert(pos > parent->finish()); + do { + node = parent; + pos = node->position(); + parent = node->parent(); + // -1 because we counted the value at end and shouldn't. + if (parent == end.node_ && pos == static_cast(end.position_)) + return count - 1; + ++pos; + } while (pos > parent->finish()); + } +} + template void btree_iterator::increment_slow() { if (node_->is_leaf()) {