Implement btree_iterator::operator-, which is faster than std::distance for btree iterators.

Note: btree_iterator::operator- is still O(N) because in the worst case (end()-begin()), we will have at least one operation per node in the tree, and there are at least N/M nodes, where M (a constant) is the maximum number of values per node.
PiperOrigin-RevId: 481716874
Change-Id: Ic0225b7509208ed96b75a2dc626d2aa4a24f4946
pull/1115/head
Evan Brown 2 years ago committed by Copybara-Service
parent 7ab917ec21
commit bbf2ed7890
  1. 4
      absl/container/BUILD.bazel
  2. 2
      absl/container/CMakeLists.txt
  3. 25
      absl/container/btree_benchmark.cc
  4. 3
      absl/container/btree_map.h
  5. 3
      absl/container/btree_set.h
  6. 20
      absl/container/btree_test.cc
  7. 73
      absl/container/internal/btree.h

@ -984,11 +984,13 @@ cc_test(
":btree_test_common", ":btree_test_common",
":counting_allocator", ":counting_allocator",
":test_instance_tracker", ":test_instance_tracker",
"//absl/algorithm:container",
"//absl/base:core_headers", "//absl/base:core_headers",
"//absl/base:raw_logging_internal", "//absl/base:raw_logging_internal",
"//absl/flags:flag", "//absl/flags:flag",
"//absl/hash:hash_testing", "//absl/hash:hash_testing",
"//absl/memory", "//absl/memory",
"//absl/random",
"//absl/strings", "//absl/strings",
"//absl/types:compare", "//absl/types:compare",
"@com_google_googletest//:gtest_main", "@com_google_googletest//:gtest_main",
@ -1011,10 +1013,12 @@ cc_binary(
":flat_hash_map", ":flat_hash_map",
":flat_hash_set", ":flat_hash_set",
":hashtable_debug", ":hashtable_debug",
"//absl/algorithm:container",
"//absl/base:raw_logging_internal", "//absl/base:raw_logging_internal",
"//absl/hash", "//absl/hash",
"//absl/log", "//absl/log",
"//absl/memory", "//absl/memory",
"//absl/random",
"//absl/strings:cord", "//absl/strings:cord",
"//absl/strings:str_format", "//absl/strings:str_format",
"//absl/time", "//absl/time",

@ -72,6 +72,7 @@ absl_cc_test(
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::algorithm_container
absl::btree absl::btree
absl::btree_test_common absl::btree_test_common
absl::compare absl::compare
@ -79,6 +80,7 @@ absl_cc_test(
absl::counting_allocator absl::counting_allocator
absl::flags absl::flags
absl::hash_testing absl::hash_testing
absl::random_random
absl::raw_logging_internal absl::raw_logging_internal
absl::strings absl::strings
absl::test_instance_tracker absl::test_instance_tracker

@ -27,6 +27,7 @@
#include <vector> #include <vector>
#include "benchmark/benchmark.h" #include "benchmark/benchmark.h"
#include "absl/algorithm/container.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/container/btree_map.h" #include "absl/container/btree_map.h"
#include "absl/container/btree_set.h" #include "absl/container/btree_set.h"
@ -37,6 +38,7 @@
#include "absl/hash/hash.h" #include "absl/hash/hash.h"
#include "absl/log/log.h" #include "absl/log/log.h"
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "absl/strings/cord.h" #include "absl/strings/cord.h"
#include "absl/strings/str_format.h" #include "absl/strings/str_format.h"
#include "absl/time/time.h" #include "absl/time/time.h"
@ -733,6 +735,29 @@ double ContainerInfo(const btree_map<int, BigTypePtr<Size>>& b) {
BIG_TYPE_PTR_BENCHMARKS(32); BIG_TYPE_PTR_BENCHMARKS(32);
void BM_BtreeSet_IteratorSubtraction(benchmark::State& state) {
absl::InsecureBitGen bitgen;
std::vector<int> vec;
// Randomize the set's insertion order so the nodes aren't all full.
vec.reserve(state.range(0));
for (int i = 0; i < state.range(0); ++i) vec.push_back(i);
absl::c_shuffle(vec, bitgen);
absl::btree_set<int> set;
for (int i : vec) set.insert(i);
size_t distance = absl::Uniform(bitgen, 0u, set.size());
while (state.KeepRunningBatch(distance)) {
size_t end = absl::Uniform(bitgen, distance, set.size());
size_t begin = end - distance;
benchmark::DoNotOptimize(set.find(static_cast<int>(end)) -
set.find(static_cast<int>(begin)));
distance = absl::Uniform(bitgen, 0u, set.size());
}
}
BENCHMARK(BM_BtreeSet_IteratorSubtraction)->Range(1 << 10, 1 << 20);
} // namespace } // namespace
} // namespace container_internal } // namespace container_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END

@ -46,6 +46,9 @@
// reason, `insert()` and `erase()` return a valid iterator at the current // reason, `insert()` and `erase()` return a valid iterator at the current
// position. Another important difference is that key-types must be // position. Another important difference is that key-types must be
// copy-constructible. // copy-constructible.
//
// Another API difference is that btree iterators can be subtracted, and this
// is faster than using std::distance.
#ifndef ABSL_CONTAINER_BTREE_MAP_H_ #ifndef ABSL_CONTAINER_BTREE_MAP_H_
#define ABSL_CONTAINER_BTREE_MAP_H_ #define ABSL_CONTAINER_BTREE_MAP_H_

@ -45,6 +45,9 @@
// more than one iterator, pointer, or reference simultaneously. For this // more than one iterator, pointer, or reference simultaneously. For this
// reason, `insert()` and `erase()` return a valid iterator at the current // reason, `insert()` and `erase()` return a valid iterator at the current
// position. // position.
//
// Another API difference is that btree iterators can be subtracted, and this
// is faster than using std::distance.
#ifndef ABSL_CONTAINER_BTREE_SET_H_ #ifndef ABSL_CONTAINER_BTREE_SET_H_
#define ABSL_CONTAINER_BTREE_SET_H_ #define ABSL_CONTAINER_BTREE_SET_H_

@ -31,6 +31,7 @@
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "absl/algorithm/container.h"
#include "absl/base/internal/raw_logging.h" #include "absl/base/internal/raw_logging.h"
#include "absl/base/macros.h" #include "absl/base/macros.h"
#include "absl/container/btree_map.h" #include "absl/container/btree_map.h"
@ -40,6 +41,7 @@
#include "absl/flags/flag.h" #include "absl/flags/flag.h"
#include "absl/hash/hash_testing.h" #include "absl/hash/hash_testing.h"
#include "absl/memory/memory.h" #include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h" #include "absl/strings/str_split.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
@ -3320,6 +3322,24 @@ TEST(Btree, ReusePoisonMemory) {
set.insert(0); set.insert(0);
} }
TEST(Btree, IteratorSubtraction) {
absl::BitGen bitgen;
std::vector<int> vec;
// Randomize the set's insertion order so the nodes aren't all full.
for (int i = 0; i < 1000000; ++i) vec.push_back(i);
absl::c_shuffle(vec, bitgen);
absl::btree_set<int> set;
for (int i : vec) set.insert(i);
for (int i = 0; i < 1000; ++i) {
size_t begin = absl::Uniform(bitgen, 0u, set.size());
size_t end = absl::Uniform(bitgen, begin, set.size());
ASSERT_EQ(end - begin, set.find(end) - set.find(begin))
<< begin << " " << end;
}
}
} // namespace } // namespace
} // namespace container_internal } // namespace container_internal
ABSL_NAMESPACE_END ABSL_NAMESPACE_END

@ -1085,6 +1085,16 @@ class btree_iterator {
return node_ != other.node_ || position_ != other.position_; return node_ != other.node_ || position_ != other.position_;
} }
// Returns n such that n calls to ++other yields *this.
// Precondition: n exists.
difference_type operator-(const_iterator other) const {
if (node_ == other.node_) {
if (node_->is_leaf()) return position_ - other.position_;
if (position_ == other.position_) return 0;
}
return distance_slow(other);
}
// Accessors for the key/value the iterator is pointing at. // Accessors for the key/value the iterator is pointing at.
reference operator*() const { reference operator*() const {
ABSL_HARDENING_ASSERT(node_ != nullptr); ABSL_HARDENING_ASSERT(node_ != nullptr);
@ -1148,6 +1158,11 @@ class btree_iterator {
#endif #endif
} }
// Returns n such that n calls to ++other yields *this.
// Precondition: n exists && (this->node_ != other.node_ ||
// !this->node_->is_leaf() || this->position_ != other.position_).
difference_type distance_slow(const_iterator other) const;
// Increment/decrement the iterator. // Increment/decrement the iterator.
void increment() { void increment() {
assert_valid_generation(); assert_valid_generation();
@ -1975,6 +1990,64 @@ void btree_node<P>::clear_and_delete(btree_node *node, allocator_type *alloc) {
//// ////
// btree_iterator methods // btree_iterator methods
// Note: the implementation here is based on btree_node::clear_and_delete.
template <typename N, typename R, typename P>
auto btree_iterator<N, R, P>::distance_slow(const_iterator other) const
-> difference_type {
const_iterator begin = other;
const_iterator end = *this;
assert(begin.node_ != end.node_ || !begin.node_->is_leaf() ||
begin.position_ != end.position_);
const node_type *node = begin.node_;
// We need to compensate for double counting if begin.node_ is a leaf node.
difference_type count = node->is_leaf() ? -begin.position_ : 0;
// First navigate to the leftmost leaf node past begin.
if (node->is_internal()) {
++count;
node = node->child(begin.position_ + 1);
}
while (node->is_internal()) node = node->start_child();
// Use `size_type` because `pos` needs to be able to hold `kNodeSlots+1`,
// which isn't guaranteed to be a valid `field_type`.
size_type pos = node->position();
const node_type *parent = node->parent();
for (;;) {
// In each iteration of the next loop, we count one leaf node and go right.
assert(pos <= parent->finish());
do {
node = parent->child(static_cast<field_type>(pos));
if (node->is_internal()) {
// Navigate to the leftmost leaf under node.
while (node->is_internal()) node = node->start_child();
pos = node->position();
parent = node->parent();
}
if (node == end.node_) return count + end.position_;
if (parent == end.node_ && pos == static_cast<size_type>(end.position_))
return count + node->count();
// +1 is for the next internal node value.
count += node->count() + 1;
++pos;
} while (pos <= parent->finish());
// Once we've counted all children of parent, go up/right.
assert(pos > parent->finish());
do {
node = parent;
pos = node->position();
parent = node->parent();
// -1 because we counted the value at end and shouldn't.
if (parent == end.node_ && pos == static_cast<size_type>(end.position_))
return count - 1;
++pos;
} while (pos > parent->finish());
}
}
template <typename N, typename R, typename P> template <typename N, typename R, typename P>
void btree_iterator<N, R, P>::increment_slow() { void btree_iterator<N, R, P>::increment_slow() {
if (node_->is_leaf()) { if (node_->is_leaf()) {

Loading…
Cancel
Save