Remove unnecessary tree fallback code.

This reduces code complexity.

PiperOrigin-RevId: 697711659
pull/19271/head
Protobuf Team Bot 6 months ago committed by Copybara-Service
parent 43f07dc5b5
commit 5e2db471ae
  1. 17
      csharp/src/Google.Protobuf/Reflection/FeatureSetDescriptor.g.cs
  2. 33
      src/google/protobuf/dynamic_message.cc
  3. 118
      src/google/protobuf/map.cc
  4. 433
      src/google/protobuf/map.h
  5. 20
      src/google/protobuf/map_field.cc
  6. 32
      src/google/protobuf/map_field.h
  7. 35
      src/google/protobuf/map_probe_benchmark.cc
  8. 24
      src/google/protobuf/map_test.inc

@ -1,17 +0,0 @@
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#endregion
namespace Google.Protobuf.Reflection;
internal sealed partial class FeatureSetDescriptor
{
// Canonical serialized form of the edition defaults, generated by embed_edition_defaults.
private const string DefaultsBase64 =
"ChMYhAciACoMCAEQAhgCIAMoATACChMY5wciACoMCAIQARgBIAIoATABChMY6AciDAgBEAEYASACKAEwASoAIOYHKOgH";
}

@ -105,8 +105,6 @@ class DynamicMapKey {
google::protobuf::MapKey ToMapKey() const ABSL_ATTRIBUTE_LIFETIME_BOUND; google::protobuf::MapKey ToMapKey() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
VariantKey ToVariantKey() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
bool IsString() const { bool IsString() const {
return absl::holds_alternative<std::string>(variant_); return absl::holds_alternative<std::string>(variant_);
} }
@ -133,35 +131,8 @@ inline void SetMapKey(MapKey* map_key, const DynamicMapKey& value) {
template <> template <>
struct is_internal_map_key_type<DynamicMapKey> : std::true_type {}; struct is_internal_map_key_type<DynamicMapKey> : std::true_type {};
template <>
struct RealKeyToVariantKey<DynamicMapKey> : public RealKeyToVariantKey<MapKey> {
// Bring in for heterogeneous lookups.
using RealKeyToVariantKey<MapKey>::operator();
VariantKey operator()(const DynamicMapKey& value) const {
return value.ToVariantKey();
}
};
template <>
struct RealKeyToVariantKeyAlternative<DynamicMapKey>
: public RealKeyToVariantKeyAlternative<MapKey> {
using RealKeyToVariantKeyAlternative<MapKey>::operator();
VariantKey operator()(const DynamicMapKey& value) const {
return RealKeyToVariantKey<DynamicMapKey>{}(value);
}
};
template <> template <>
struct TransparentSupport<DynamicMapKey> { struct TransparentSupport<DynamicMapKey> {
using hash = absl::Hash<DynamicMapKey>;
template <typename T, typename U>
static bool Equals(T&& t, U&& u) {
return ToView(t) == ToView(u);
}
template <typename K> template <typename K>
using key_arg = K; using key_arg = K;
@ -222,10 +193,6 @@ google::protobuf::MapKey DynamicMapKey::ToMapKey() const {
return result; return result;
} }
VariantKey DynamicMapKey::ToVariantKey() const {
return absl::visit([](const auto& alt) { return VariantKey(alt); }, variant_);
}
class DynamicMapField final class DynamicMapField final
: public TypeDefinedMapFieldBase<DynamicMapKey, MapValueRef> { : public TypeDefinedMapFieldBase<DynamicMapKey, MapValueRef> {
public: public:

@ -27,108 +27,19 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace internal { namespace internal {
const TableEntryPtr kGlobalEmptyTable[kGlobalEmptyTableSize] = {}; NodeBase* const kGlobalEmptyTable[kGlobalEmptyTableSize] = {};
NodeBase* UntypedMapBase::DestroyTree(Tree* tree) {
NodeBase* head = tree->empty() ? nullptr : tree->begin()->second;
if (alloc_.arena() == nullptr) {
delete tree;
}
return head;
}
void UntypedMapBase::EraseFromTree(map_index_t b,
typename Tree::iterator tree_it) {
ABSL_DCHECK(TableEntryIsTree(b));
Tree* tree = TableEntryToTree(table_[b]);
if (tree_it != tree->begin()) {
NodeBase* prev = std::prev(tree_it)->second;
prev->next = prev->next->next;
}
tree->erase(tree_it);
if (tree->empty()) {
DestroyTree(tree);
table_[b] = TableEntryPtr{};
}
}
void UntypedMapBase::InsertUniqueInTree(map_index_t b, GetKey get_key,
NodeBase* node) {
if (TableEntryIsNonEmptyList(b)) {
// To save in binary size, we delegate to an out-of-line function to do
// the conversion.
table_[b] = ConvertToTree(TableEntryToNode(table_[b]), get_key);
}
ABSL_DCHECK(TableEntryIsTree(b))
<< (void*)table_[b] << " " << (uintptr_t)table_[b];
Tree* tree = TableEntryToTree(table_[b]);
auto it = tree->try_emplace(get_key(node), node).first;
// Maintain the linked list of the nodes in the tree.
// For simplicity, they are in the same order as the tree iteration.
if (it != tree->begin()) {
NodeBase* prev = std::prev(it)->second;
prev->next = node;
}
auto next = std::next(it);
node->next = next != tree->end() ? next->second : nullptr;
}
void UntypedMapBase::TransferTree(Tree* tree, GetKey get_key) {
NodeBase* node = DestroyTree(tree);
do {
NodeBase* next = node->next;
map_index_t b = VariantBucketNumber(get_key(node));
// This is similar to InsertUnique, but with erasure.
if (TableEntryIsEmpty(b)) {
InsertUniqueInList(b, node);
index_of_first_non_null_ = (std::min)(index_of_first_non_null_, b);
} else if (TableEntryIsNonEmptyList(b) && !TableEntryIsTooLong(b)) {
InsertUniqueInList(b, node);
} else {
InsertUniqueInTree(b, get_key, node);
}
node = next;
} while (node != nullptr);
}
TableEntryPtr UntypedMapBase::ConvertToTree(NodeBase* node, GetKey get_key) {
auto* tree = Arena::Create<Tree>(alloc_.arena(), typename Tree::key_compare(),
typename Tree::allocator_type(alloc_));
for (; node != nullptr; node = node->next) {
tree->try_emplace(get_key(node), node);
}
ABSL_DCHECK_EQ(MapTreeLengthThreshold(), tree->size());
// Relink the nodes.
NodeBase* next = nullptr;
auto it = tree->end();
do {
node = (--it)->second;
node->next = next;
next = node;
} while (it != tree->begin());
return TreeToTableEntry(tree);
}
void UntypedMapBase::ClearTable(const ClearInput input) { void UntypedMapBase::ClearTable(const ClearInput input) {
ABSL_DCHECK_NE(num_buckets_, kGlobalEmptyTableSize); ABSL_DCHECK_NE(num_buckets_, kGlobalEmptyTableSize);
if (alloc_.arena() == nullptr) { if (alloc_.arena() == nullptr) {
const auto loop = [&, this](auto destroy_node) { const auto loop = [&, this](auto destroy_node) {
const TableEntryPtr* table = table_; NodeBase** table = table_;
for (map_index_t b = index_of_first_non_null_, end = num_buckets_; for (map_index_t b = index_of_first_non_null_, end = num_buckets_;
b < end; ++b) { b < end; ++b) {
NodeBase* node = for (NodeBase* node = table[b]; node != nullptr;) {
ABSL_PREDICT_FALSE(internal::TableEntryIsTree(table[b]))
? DestroyTree(TableEntryToTree(table[b]))
: TableEntryToNode(table[b]);
while (node != nullptr) {
NodeBase* next = node->next; NodeBase* next = node->next;
absl::PrefetchToLocalCacheNta(next);
destroy_node(node); destroy_node(node);
SizedDelete(node, SizeFromInfo(input.size_info)); SizedDelete(node, SizeFromInfo(input.size_info));
node = next; node = next;
@ -177,7 +88,7 @@ void UntypedMapBase::ClearTable(const ClearInput input) {
} }
if (input.reset_table) { if (input.reset_table) {
std::fill(table_, table_ + num_buckets_, TableEntryPtr{}); std::fill(table_, table_ + num_buckets_, nullptr);
num_elements_ = 0; num_elements_ = 0;
index_of_first_non_null_ = num_buckets_; index_of_first_non_null_ = num_buckets_;
} else { } else {
@ -185,31 +96,12 @@ void UntypedMapBase::ClearTable(const ClearInput input) {
} }
} }
auto UntypedMapBase::FindFromTree(map_index_t b, VariantKey key,
Tree::iterator* it) const -> NodeAndBucket {
Tree* tree = TableEntryToTree(table_[b]);
auto tree_it = tree->find(key);
if (it != nullptr) *it = tree_it;
if (tree_it != tree->end()) {
return {tree_it->second, b};
}
return {nullptr, b};
}
size_t UntypedMapBase::SpaceUsedInTable(size_t sizeof_node) const { size_t UntypedMapBase::SpaceUsedInTable(size_t sizeof_node) const {
size_t size = 0; size_t size = 0;
// The size of the table. // The size of the table.
size += sizeof(void*) * num_buckets_; size += sizeof(void*) * num_buckets_;
// All the nodes. // All the nodes.
size += sizeof_node * num_elements_; size += sizeof_node * num_elements_;
// For each tree, count the overhead of those nodes.
// Two buckets at a time because we only care about trees.
for (map_index_t b = 0; b < num_buckets_; ++b) {
if (TableEntryIsTree(b)) {
size += sizeof(Tree);
size += sizeof(Tree::value_type) * TableEntryToTree(table_[b])->size();
}
}
return size; return size;
} }

@ -221,13 +221,6 @@ struct TransparentSupport {
static_assert(std::is_scalar<key_type>::value, static_assert(std::is_scalar<key_type>::value,
"Should only be used for ints."); "Should only be used for ints.");
// We hash all the scalars as uint64_t so that we can implement the same hash
// function for VariantKey. This way we can have MapKey provide the same hash
// as the underlying value would have.
using hash = absl::Hash<uint64_t>;
static bool Equals(key_type a, key_type b) { return a == b; }
template <typename K> template <typename K>
using key_arg = key_type; using key_arg = key_type;
@ -255,22 +248,6 @@ struct TransparentSupport<std::string> {
} }
} }
struct hash : public absl::Hash<absl::string_view> {
using is_transparent = void;
template <typename T>
size_t operator()(T&& str) const {
return absl::Hash<absl::string_view>::operator()(
ImplicitConvert(std::forward<T>(str)));
}
};
template <typename T, typename U>
static bool Equals(T&& t, U&& u) {
return ImplicitConvert(std::forward<T>(t)) ==
ImplicitConvert(std::forward<U>(u));
}
template <typename K> template <typename K>
using key_arg = K; using key_arg = K;
@ -307,133 +284,6 @@ struct NodeBase {
} }
}; };
inline NodeBase* EraseFromLinkedList(NodeBase* item, NodeBase* head) {
if (head == item) {
return head->next;
} else {
head->next = EraseFromLinkedList(item, head->next);
return head;
}
}
constexpr size_t MapTreeLengthThreshold() { return 8; }
inline bool TableEntryIsTooLong(NodeBase* node) {
const size_t kMaxLength = MapTreeLengthThreshold();
size_t count = 0;
do {
++count;
node = node->next;
} while (node != nullptr);
// Invariant: no linked list ever is more than kMaxLength in length.
ABSL_DCHECK_LE(count, kMaxLength);
return count >= kMaxLength;
}
// Similar to the public MapKey, but specialized for the internal
// implementation.
struct VariantKey {
// We make this value 16 bytes to make it cheaper to pass in the ABI.
// Can't overload string_view this way, so we unpack the fields.
// data==nullptr means this is a number and `integral` is the value.
// data!=nullptr means this is a string and `integral` is the size.
const char* data;
uint64_t integral;
explicit VariantKey(uint64_t v) : data(nullptr), integral(v) {}
explicit VariantKey(absl::string_view v)
: data(v.data()), integral(v.size()) {
// We use `data` to discriminate between the types, so make sure it is never
// null here.
if (data == nullptr) data = "";
}
friend bool operator<(const VariantKey& left, const VariantKey& right) {
ABSL_DCHECK_EQ(left.data == nullptr, right.data == nullptr);
if (left.integral != right.integral) {
// If they are numbers with different value, or strings with different
// size, check the number only.
return left.integral < right.integral;
}
if (left.data == nullptr) {
// If they are numbers they have the same value, so return.
return false;
}
// They are strings of the same size, so check the bytes.
return memcmp(left.data, right.data, left.integral) < 0;
}
};
// This is to be specialized by MapKey.
template <typename T>
struct RealKeyToVariantKey {
VariantKey operator()(T value) const { return VariantKey(value); }
};
template <typename T, typename = void>
struct RealKeyToVariantKeyAlternative;
template <typename T>
struct RealKeyToVariantKeyAlternative<
T, typename std::enable_if<std::is_integral<T>::value>::type> {
uint64_t operator()(uint64_t value) const { return value; }
};
template <>
struct RealKeyToVariantKey<std::string> {
template <typename T>
VariantKey operator()(const T& value) const {
return VariantKey(TransparentSupport<std::string>::ImplicitConvert(value));
}
};
template <>
struct RealKeyToVariantKeyAlternative<std::string> {
absl::string_view operator()(absl::string_view value) const { return value; }
};
// We use a single kind of tree for all maps. This reduces code duplication.
using TreeForMap =
absl::btree_map<VariantKey, NodeBase*, std::less<VariantKey>,
MapAllocator<std::pair<const VariantKey, NodeBase*>>>;
// Type safe tagged pointer.
// We convert to/from nodes and trees using the operations below.
// They ensure that the tags are used correctly.
// There are three states:
// - x == 0: the entry is empty
// - x != 0 && (x&1) == 0: the entry is a node list
// - x != 0 && (x&1) == 1: the entry is a tree
enum class TableEntryPtr : uintptr_t;
inline bool TableEntryIsEmpty(TableEntryPtr entry) {
return entry == TableEntryPtr{};
}
inline bool TableEntryIsTree(TableEntryPtr entry) {
return (static_cast<uintptr_t>(entry) & 1) == 1;
}
inline bool TableEntryIsList(TableEntryPtr entry) {
return !TableEntryIsTree(entry);
}
inline bool TableEntryIsNonEmptyList(TableEntryPtr entry) {
return !TableEntryIsEmpty(entry) && TableEntryIsList(entry);
}
inline NodeBase* TableEntryToNode(TableEntryPtr entry) {
ABSL_DCHECK(TableEntryIsList(entry));
return reinterpret_cast<NodeBase*>(static_cast<uintptr_t>(entry));
}
inline TableEntryPtr NodeToTableEntry(NodeBase* node) {
ABSL_DCHECK((reinterpret_cast<uintptr_t>(node) & 1) == 0);
return static_cast<TableEntryPtr>(reinterpret_cast<uintptr_t>(node));
}
inline TreeForMap* TableEntryToTree(TableEntryPtr entry) {
ABSL_DCHECK(TableEntryIsTree(entry));
return reinterpret_cast<TreeForMap*>(static_cast<uintptr_t>(entry) - 1);
}
inline TableEntryPtr TreeToTableEntry(TreeForMap* node) {
ABSL_DCHECK((reinterpret_cast<uintptr_t>(node) & 1) == 0);
return static_cast<TableEntryPtr>(reinterpret_cast<uintptr_t>(node) | 1);
}
// This captures all numeric types. // This captures all numeric types.
inline size_t MapValueSpaceUsedExcludingSelfLong(bool) { return 0; } inline size_t MapValueSpaceUsedExcludingSelfLong(bool) { return 0; }
inline size_t MapValueSpaceUsedExcludingSelfLong(const std::string& str) { inline size_t MapValueSpaceUsedExcludingSelfLong(const std::string& str) {
@ -446,8 +296,7 @@ size_t MapValueSpaceUsedExcludingSelfLong(const T& message) {
} }
constexpr size_t kGlobalEmptyTableSize = 1; constexpr size_t kGlobalEmptyTableSize = 1;
PROTOBUF_EXPORT extern const TableEntryPtr PROTOBUF_EXPORT extern NodeBase* const kGlobalEmptyTable[kGlobalEmptyTableSize];
kGlobalEmptyTable[kGlobalEmptyTableSize];
template <typename Map, template <typename Map,
typename = typename std::enable_if< typename = typename std::enable_if<
@ -479,10 +328,6 @@ class UntypedMapIterator {
// We do not provide any constructors for this type. We need it to be a // We do not provide any constructors for this type. We need it to be a
// trivial type to ensure that we can safely share it with Rust. // trivial type to ensure that we can safely share it with Rust.
// Advance through buckets, looking for the first that isn't empty.
// If nothing non-empty is found then leave node_ == nullptr.
void SearchFrom(map_index_t start_bucket);
// The definition of operator== is handled by the derived type. If we were // The definition of operator== is handled by the derived type. If we were
// to do it in this class it would allow comparing iterators of different // to do it in this class it would allow comparing iterators of different
// map types. // map types.
@ -492,13 +337,7 @@ class UntypedMapIterator {
// The definition of operator++ is handled in the derived type. We would not // The definition of operator++ is handled in the derived type. We would not
// be able to return the right type from here. // be able to return the right type from here.
void PlusPlus() { void PlusPlus();
if (node_->next == nullptr) {
SearchFrom(bucket_index_ + 1);
} else {
node_ = node_->next;
}
}
// Conversion to and from a typed iterator child class is used by FFI. // Conversion to and from a typed iterator child class is used by FFI.
template <class Iter> template <class Iter>
@ -550,7 +389,6 @@ static_assert(
// parser) by having non-template code that can handle all instantiations. // parser) by having non-template code that can handle all instantiations.
class PROTOBUF_EXPORT UntypedMapBase { class PROTOBUF_EXPORT UntypedMapBase {
using Allocator = internal::MapAllocator<void*>; using Allocator = internal::MapAllocator<void*>;
using Tree = internal::TreeForMap;
public: public:
using size_type = size_t; using size_type = size_t;
@ -559,7 +397,7 @@ class PROTOBUF_EXPORT UntypedMapBase {
: num_elements_(0), : num_elements_(0),
num_buckets_(internal::kGlobalEmptyTableSize), num_buckets_(internal::kGlobalEmptyTableSize),
index_of_first_non_null_(internal::kGlobalEmptyTableSize), index_of_first_non_null_(internal::kGlobalEmptyTableSize),
table_(const_cast<TableEntryPtr*>(internal::kGlobalEmptyTable)), table_(const_cast<NodeBase**>(internal::kGlobalEmptyTable)),
alloc_(arena) {} alloc_(arena) {}
UntypedMapBase(const UntypedMapBase&) = delete; UntypedMapBase(const UntypedMapBase&) = delete;
@ -618,38 +456,6 @@ class PROTOBUF_EXPORT UntypedMapBase {
#endif #endif
} }
// Helper for InsertUnique. Handles the case where bucket b is a
// not-too-long linked list.
void InsertUniqueInList(map_index_t b, NodeBase* node) {
if (!TableEntryIsEmpty(b) && ShouldInsertAfterHead(node)) {
auto* first = TableEntryToNode(table_[b]);
node->next = first->next;
first->next = node;
} else {
node->next = TableEntryToNode(table_[b]);
table_[b] = NodeToTableEntry(node);
}
}
bool TableEntryIsEmpty(map_index_t b) const {
return internal::TableEntryIsEmpty(table_[b]);
}
bool TableEntryIsNonEmptyList(map_index_t b) const {
return internal::TableEntryIsNonEmptyList(table_[b]);
}
bool TableEntryIsTree(map_index_t b) const {
return internal::TableEntryIsTree(table_[b]);
}
bool TableEntryIsList(map_index_t b) const {
return internal::TableEntryIsList(table_[b]);
}
// Return whether table_[b] is a linked list that seems awfully long.
// Requires table_[b] to point to a non-empty linked list.
bool TableEntryIsTooLong(map_index_t b) {
return internal::TableEntryIsTooLong(TableEntryToNode(table_[b]));
}
// Return a power of two no less than max(kMinTableSize, n). // Return a power of two no less than max(kMinTableSize, n).
// Assumes either n < kMinTableSize or n is a power of two. // Assumes either n < kMinTableSize or n is a power of two.
map_index_t TableSize(map_index_t n) { map_index_t TableSize(map_index_t n) {
@ -678,42 +484,18 @@ class PROTOBUF_EXPORT UntypedMapBase {
AllocFor<NodeBase>(alloc_).deallocate(node, node_size / sizeof(NodeBase)); AllocFor<NodeBase>(alloc_).deallocate(node, node_size / sizeof(NodeBase));
} }
void DeleteTable(TableEntryPtr* table, map_index_t n) { void DeleteTable(NodeBase** table, map_index_t n) {
if (auto* a = arena()) { if (auto* a = arena()) {
a->ReturnArrayMemory(table, n * sizeof(TableEntryPtr)); a->ReturnArrayMemory(table, n * sizeof(NodeBase*));
} else { } else {
internal::SizedDelete(table, n * sizeof(TableEntryPtr)); internal::SizedDelete(table, n * sizeof(NodeBase*));
} }
} }
NodeBase* DestroyTree(Tree* tree); NodeBase** CreateEmptyTable(map_index_t n) {
using GetKey = VariantKey (*)(NodeBase*);
void InsertUniqueInTree(map_index_t b, GetKey get_key, NodeBase* node);
void TransferTree(Tree* tree, GetKey get_key);
TableEntryPtr ConvertToTree(NodeBase* node, GetKey get_key);
void EraseFromTree(map_index_t b, typename Tree::iterator tree_it);
map_index_t VariantBucketNumber(VariantKey key) const {
return key.data == nullptr
? VariantBucketNumber(key.integral)
: VariantBucketNumber(absl::string_view(
key.data, static_cast<size_t>(key.integral)));
}
map_index_t VariantBucketNumber(absl::string_view key) const {
return static_cast<map_index_t>(absl::HashOf(key, table_) &
(num_buckets_ - 1));
}
map_index_t VariantBucketNumber(uint64_t key) const {
return static_cast<map_index_t>(absl::HashOf(key, table_) &
(num_buckets_ - 1));
}
TableEntryPtr* CreateEmptyTable(map_index_t n) {
ABSL_DCHECK_GE(n, kMinTableSize); ABSL_DCHECK_GE(n, kMinTableSize);
ABSL_DCHECK_EQ(n & (n - 1), 0u); ABSL_DCHECK_EQ(n & (n - 1), 0u);
TableEntryPtr* result = AllocFor<TableEntryPtr>(alloc_).allocate(n); NodeBase** result = AllocFor<NodeBase*>(alloc_).allocate(n);
memset(result, 0, n * sizeof(result[0])); memset(result, 0, n * sizeof(result[0]));
return result; return result;
} }
@ -768,9 +550,6 @@ class PROTOBUF_EXPORT UntypedMapBase {
void ClearTable(ClearInput input); void ClearTable(ClearInput input);
NodeAndBucket FindFromTree(map_index_t b, VariantKey key,
Tree::iterator* it) const;
// Space used for the table, trees, and nodes. // Space used for the table, trees, and nodes.
// Does not include the indirect space used. Eg the data of a std::string. // Does not include the indirect space used. Eg the data of a std::string.
size_t SpaceUsedInTable(size_t sizeof_node) const; size_t SpaceUsedInTable(size_t sizeof_node) const;
@ -778,7 +557,7 @@ class PROTOBUF_EXPORT UntypedMapBase {
map_index_t num_elements_; map_index_t num_elements_;
map_index_t num_buckets_; map_index_t num_buckets_;
map_index_t index_of_first_non_null_; map_index_t index_of_first_non_null_;
TableEntryPtr* table_; // an array with num_buckets_ entries NodeBase** table_; // an array with num_buckets_ entries
Allocator alloc_; Allocator alloc_;
}; };
@ -790,31 +569,26 @@ inline UntypedMapIterator UntypedMapBase::begin() const {
node = nullptr; node = nullptr;
} else { } else {
bucket_index = index_of_first_non_null_; bucket_index = index_of_first_non_null_;
TableEntryPtr entry = table_[bucket_index]; node = table_[bucket_index];
node = ABSL_PREDICT_TRUE(internal::TableEntryIsList(entry))
? TableEntryToNode(entry)
: TableEntryToTree(entry)->begin()->second;
PROTOBUF_ASSUME(node != nullptr); PROTOBUF_ASSUME(node != nullptr);
} }
return UntypedMapIterator{node, this, bucket_index}; return UntypedMapIterator{node, this, bucket_index};
} }
inline void UntypedMapIterator::SearchFrom(map_index_t start_bucket) { inline void UntypedMapIterator::PlusPlus() {
ABSL_DCHECK(m_->index_of_first_non_null_ == m_->num_buckets_ || if (node_->next != nullptr) {
!m_->TableEntryIsEmpty(m_->index_of_first_non_null_)); node_ = node_->next;
for (map_index_t i = start_bucket; i < m_->num_buckets_; ++i) { return;
TableEntryPtr entry = m_->table_[i]; }
if (entry == TableEntryPtr{}) continue;
for (map_index_t i = bucket_index_ + 1; i < m_->num_buckets_; ++i) {
NodeBase* node = m_->table_[i];
if (node == nullptr) continue;
node_ = node;
bucket_index_ = i; bucket_index_ = i;
if (ABSL_PREDICT_TRUE(TableEntryIsList(entry))) {
node_ = TableEntryToNode(entry);
} else {
TreeForMap* tree = TableEntryToTree(entry);
ABSL_DCHECK(!tree->empty());
node_ = tree->begin()->second;
}
return; return;
} }
node_ = nullptr; node_ = nullptr;
bucket_index_ = 0; bucket_index_ = 0;
} }
@ -862,11 +636,7 @@ struct KeyNode : NodeBase {
decltype(auto) key() const { return ReadKey<Key>(GetVoidKey()); } decltype(auto) key() const { return ReadKey<Key>(GetVoidKey()); }
}; };
// KeyMapBase is a chaining hash map with the additional feature that some // KeyMapBase is a chaining hash map.
// buckets can be converted to use an ordered container. This ensures O(lg n)
// bounds on find, insert, and erase, while avoiding the overheads of ordered
// containers most of the time.
//
// The implementation doesn't need the full generality of unordered_map, // The implementation doesn't need the full generality of unordered_map,
// and it doesn't have it. More bells and whistles can be added as needed. // and it doesn't have it. More bells and whistles can be added as needed.
// Some implementation details: // Some implementation details:
@ -874,16 +644,9 @@ struct KeyNode : NodeBase {
// 2. As is typical for hash_map and such, the Keys and Values are always // 2. As is typical for hash_map and such, the Keys and Values are always
// stored in linked list nodes. Pointers to elements are never invalidated // stored in linked list nodes. Pointers to elements are never invalidated
// until the element is deleted. // until the element is deleted.
// 3. The trees' payload type is pointer to linked-list node. Tree-converting // 3. Mutations to a map do not invalidate the map's iterators, pointers to
// a bucket doesn't copy Key-Value pairs.
// 4. Once we've tree-converted a bucket, it is never converted back unless the
// bucket is completely emptied out. Note that the items a tree contains may
// wind up assigned to trees or lists upon a rehash.
// 5. Mutations to a map do not invalidate the map's iterators, pointers to
// elements, or references to elements. // elements, or references to elements.
// 6. Except for erase(iterator), any non-const method can reorder iterators. // 4. Except for erase(iterator), any non-const method can reorder iterators.
// 7. Uses VariantKey when using the Tree representation, which holds all
// possible key types as a variant value.
template <typename Key> template <typename Key>
class KeyMapBase : public UntypedMapBase { class KeyMapBase : public UntypedMapBase {
@ -893,24 +656,11 @@ class KeyMapBase : public UntypedMapBase {
using TS = TransparentSupport<Key>; using TS = TransparentSupport<Key>;
public: public:
using hasher = typename TS::hash;
using UntypedMapBase::UntypedMapBase; using UntypedMapBase::UntypedMapBase;
protected: protected:
using KeyNode = internal::KeyNode<Key>; using KeyNode = internal::KeyNode<Key>;
// Trees. The payload type is a copy of Key, so that we can query the tree
// with Keys that are not in any particular data structure.
// The value is a void* pointing to Node. We use void* instead of Node* to
// avoid code bloat. That way there is only one instantiation of the tree
// class per key type.
using Tree = internal::TreeForMap;
using TreeIterator = typename Tree::iterator;
public:
hasher hash_function() const { return {}; }
protected: protected:
friend class TcParser; friend class TcParser;
friend struct MapTestPeer; friend struct MapTestPeer;
@ -919,39 +669,41 @@ class KeyMapBase : public UntypedMapBase {
friend class v2::TableDriven; friend class v2::TableDriven;
PROTOBUF_NOINLINE void erase_no_destroy(map_index_t b, KeyNode* node) { PROTOBUF_NOINLINE void erase_no_destroy(map_index_t b, KeyNode* node) {
TreeIterator tree_it; // Force bucket_index to be in range.
const bool is_list = revalidate_if_necessary(b, node, &tree_it); b &= (num_buckets_ - 1);
if (is_list) {
ABSL_DCHECK(TableEntryIsNonEmptyList(b)); const auto find_prev = [&] {
auto* head = TableEntryToNode(table_[b]); NodeBase** prev = table_ + b;
head = EraseFromLinkedList(node, head); for (; *prev != nullptr && *prev != node; prev = &(*prev)->next) {
table_[b] = NodeToTableEntry(head); }
} else { return prev;
EraseFromTree(b, tree_it); };
NodeBase** prev = find_prev();
if (*prev == nullptr) {
// The bucket index is wrong. The table was modified since the iterator
// was made, so let's find the new bucket.
b = FindHelper(TS::ToView(node->key())).bucket;
prev = find_prev();
} }
ABSL_DCHECK_EQ(*prev, node);
*prev = (*prev)->next;
--num_elements_; --num_elements_;
if (ABSL_PREDICT_FALSE(b == index_of_first_non_null_)) { if (ABSL_PREDICT_FALSE(b == index_of_first_non_null_)) {
while (index_of_first_non_null_ < num_buckets_ && while (index_of_first_non_null_ < num_buckets_ &&
TableEntryIsEmpty(index_of_first_non_null_)) { table_[index_of_first_non_null_] == nullptr) {
++index_of_first_non_null_; ++index_of_first_non_null_;
} }
} }
} }
NodeAndBucket FindHelper(typename TS::ViewType k, NodeAndBucket FindHelper(typename TS::ViewType k) const {
TreeIterator* it = nullptr) const {
map_index_t b = BucketNumber(k); map_index_t b = BucketNumber(k);
if (TableEntryIsNonEmptyList(b)) { for (auto* node = table_[b]; node != nullptr; node = node->next) {
auto* node = internal::TableEntryToNode(table_[b]); if (TS::ToView(static_cast<KeyNode*>(node)->key()) == k) {
do { return {node, b};
if (TS::Equals(static_cast<KeyNode*>(node)->key(), k)) { }
return {node, b};
} else {
node = node->next;
}
} while (node != nullptr);
} else if (TableEntryIsTree(b)) {
return FindFromTree(b, internal::RealKeyToVariantKey<Key>{}(k), it);
} }
return {nullptr, b}; return {nullptr, b};
} }
@ -981,27 +733,26 @@ class KeyMapBase : public UntypedMapBase {
// bucket. num_elements_ is not modified. // bucket. num_elements_ is not modified.
void InsertUnique(map_index_t b, KeyNode* node) { void InsertUnique(map_index_t b, KeyNode* node) {
ABSL_DCHECK(index_of_first_non_null_ == num_buckets_ || ABSL_DCHECK(index_of_first_non_null_ == num_buckets_ ||
!TableEntryIsEmpty(index_of_first_non_null_)); table_[index_of_first_non_null_] != nullptr);
// In practice, the code that led to this point may have already // In practice, the code that led to this point may have already
// determined whether we are inserting into an empty list, a short list, // determined whether we are inserting into an empty list, a short list,
// or whatever. But it's probably cheap enough to recompute that here; // or whatever. But it's probably cheap enough to recompute that here;
// it's likely that we're inserting into an empty or short list. // it's likely that we're inserting into an empty or short list.
ABSL_DCHECK(FindHelper(TS::ToView(node->key())).node == nullptr); ABSL_DCHECK(FindHelper(TS::ToView(node->key())).node == nullptr);
if (TableEntryIsEmpty(b)) { auto*& head = table_[b];
InsertUniqueInList(b, node); if (head == nullptr) {
head = node;
node->next = nullptr;
index_of_first_non_null_ = (std::min)(index_of_first_non_null_, b); index_of_first_non_null_ = (std::min)(index_of_first_non_null_, b);
} else if (TableEntryIsNonEmptyList(b) && !TableEntryIsTooLong(b)) { } else if (ShouldInsertAfterHead(node)) {
InsertUniqueInList(b, node); node->next = head->next;
head->next = node;
} else { } else {
InsertUniqueInTree(b, NodeToVariantKey, node); node->next = head;
head = node;
} }
} }
static VariantKey NodeToVariantKey(NodeBase* node) {
return internal::RealKeyToVariantKey<Key>{}(
static_cast<KeyNode*>(node)->key());
}
// Have it a separate function for testing. // Have it a separate function for testing.
static size_type CalculateHiCutoff(size_type num_buckets) { static size_type CalculateHiCutoff(size_type num_buckets) {
// We want the high cutoff to follow this rules: // We want the high cutoff to follow this rules:
@ -1071,58 +822,19 @@ class KeyMapBase : public UntypedMapBase {
const map_index_t start = index_of_first_non_null_; const map_index_t start = index_of_first_non_null_;
index_of_first_non_null_ = num_buckets_; index_of_first_non_null_ = num_buckets_;
for (map_index_t i = start; i < old_table_size; ++i) { for (map_index_t i = start; i < old_table_size; ++i) {
if (internal::TableEntryIsNonEmptyList(old_table[i])) { for (KeyNode* node = static_cast<KeyNode*>(old_table[i]);
TransferList(static_cast<KeyNode*>(TableEntryToNode(old_table[i]))); node != nullptr;) {
} else if (internal::TableEntryIsTree(old_table[i])) { auto* next = static_cast<KeyNode*>(node->next);
this->TransferTree(TableEntryToTree(old_table[i]), NodeToVariantKey); InsertUnique(BucketNumber(TS::ToView(node->key())), node);
node = next;
} }
} }
DeleteTable(old_table, old_table_size); DeleteTable(old_table, old_table_size);
} }
// Transfer all nodes in the list `node` into `this`.
void TransferList(KeyNode* node) {
do {
auto* next = static_cast<KeyNode*>(node->next);
InsertUnique(BucketNumber(TS::ToView(node->key())), node);
node = next;
} while (node != nullptr);
}
map_index_t BucketNumber(typename TS::ViewType k) const { map_index_t BucketNumber(typename TS::ViewType k) const {
ABSL_DCHECK_EQ( return static_cast<map_index_t>(absl::HashOf(k, table_) &
VariantBucketNumber(RealKeyToVariantKeyAlternative<Key>{}(k)), (num_buckets_ - 1));
VariantBucketNumber(RealKeyToVariantKey<Key>{}(k)));
return VariantBucketNumber(RealKeyToVariantKeyAlternative<Key>{}(k));
}
// Assumes node_ and m_ are correct and non-null, but other fields may be
// stale. Fix them as needed. Then return true iff node_ points to a
// Node in a list. If false is returned then *it is modified to be
// a valid iterator for node_.
bool revalidate_if_necessary(map_index_t& bucket_index, KeyNode* node,
TreeIterator* it) const {
// Force bucket_index to be in range.
bucket_index &= (num_buckets_ - 1);
// Common case: the bucket we think is relevant points to `node`.
if (table_[bucket_index] == NodeToTableEntry(node)) return true;
// Less common: the bucket is a linked list with node_ somewhere in it,
// but not at the head.
if (TableEntryIsNonEmptyList(bucket_index)) {
auto* l = TableEntryToNode(table_[bucket_index]);
while ((l = l->next) != nullptr) {
if (l == node) {
return true;
}
}
}
// Well, bucket_index_ still might be correct, but probably
// not. Revalidate just to be sure. This case is rare enough that we
// don't worry about potential optimizations, such as having a custom
// find-like method that compares Node* instead of the key.
auto res = FindHelper(TS::ToView(node->key()), it);
bucket_index = res.bucket;
return TableEntryIsList(bucket_index);
} }
}; };
@ -1238,7 +950,7 @@ class Map : private internal::KeyMapBase<internal::KeyForBase<Key>> {
using const_reference = const value_type&; using const_reference = const value_type&;
using size_type = size_t; using size_type = size_t;
using hasher = typename TS::hash; using hasher = absl::Hash<typename TS::ViewType>;
constexpr Map() : Base(nullptr) { StaticValidityCheck(); } constexpr Map() : Base(nullptr) { StaticValidityCheck(); }
Map(const Map& other) : Map(nullptr, other) {} Map(const Map& other) : Map(nullptr, other) {}
@ -1674,17 +1386,6 @@ class Map : private internal::KeyMapBase<internal::KeyForBase<Key>> {
value_type kv; value_type kv;
}; };
using Tree = internal::TreeForMap;
using TreeIterator = typename Tree::iterator;
using TableEntryPtr = internal::TableEntryPtr;
static Node* NodeFromTreeIterator(TreeIterator it) {
static_assert(
PROTOBUF_FIELD_OFFSET(Node, kv.first) == Base::KeyNode::kOffset, "");
static_assert(alignof(Node) == alignof(internal::NodeBase), "");
return static_cast<Node*>(it->second);
}
void DestroyNode(Node* node) { void DestroyNode(Node* node) {
if (this->alloc_.arena() == nullptr) { if (this->alloc_.arena() == nullptr) {
node->kv.first.~key_type(); node->kv.first.~key_type();

@ -29,26 +29,6 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace internal { namespace internal {
VariantKey RealKeyToVariantKey<MapKey>::operator()(const MapKey& value) const {
switch (value.type()) {
case FieldDescriptor::CPPTYPE_STRING:
return VariantKey(value.GetStringValue());
case FieldDescriptor::CPPTYPE_INT64:
return VariantKey(value.GetInt64Value());
case FieldDescriptor::CPPTYPE_INT32:
return VariantKey(value.GetInt32Value());
case FieldDescriptor::CPPTYPE_UINT64:
return VariantKey(value.GetUInt64Value());
case FieldDescriptor::CPPTYPE_UINT32:
return VariantKey(value.GetUInt32Value());
case FieldDescriptor::CPPTYPE_BOOL:
return VariantKey(static_cast<uint64_t>(value.GetBoolValue()));
default:
Unreachable();
return VariantKey(uint64_t{});
}
}
MapFieldBase::~MapFieldBase() { MapFieldBase::~MapFieldBase() {
ABSL_DCHECK_EQ(arena(), nullptr); ABSL_DCHECK_EQ(arena(), nullptr);
delete maybe_payload(); delete maybe_payload();

@ -235,6 +235,26 @@ class PROTOBUF_EXPORT MapKey {
friend class MapIterator; friend class MapIterator;
friend class internal::DynamicMapField; friend class internal::DynamicMapField;
template <typename H>
friend auto AbslHashValue(H state, const MapKey& key) {
switch (key.type()) {
case FieldDescriptor::CPPTYPE_STRING:
return H::combine(std::move(state), key.GetStringValue());
case FieldDescriptor::CPPTYPE_INT64:
return H::combine(std::move(state), key.GetInt64Value());
case FieldDescriptor::CPPTYPE_INT32:
return H::combine(std::move(state), key.GetInt32Value());
case FieldDescriptor::CPPTYPE_UINT64:
return H::combine(std::move(state), key.GetUInt64Value());
case FieldDescriptor::CPPTYPE_UINT32:
return H::combine(std::move(state), key.GetUInt32Value());
case FieldDescriptor::CPPTYPE_BOOL:
return H::combine(std::move(state), key.GetBoolValue());
default:
internal::Unreachable();
}
}
union KeyValue { union KeyValue {
KeyValue() {} KeyValue() {}
absl::string_view string_value; absl::string_view string_value;
@ -257,18 +277,6 @@ namespace internal {
template <> template <>
struct is_internal_map_key_type<MapKey> : std::true_type {}; struct is_internal_map_key_type<MapKey> : std::true_type {};
template <>
struct RealKeyToVariantKey<MapKey> {
VariantKey operator()(const MapKey& value) const;
};
template <>
struct RealKeyToVariantKeyAlternative<MapKey> {
VariantKey operator()(const MapKey& value) const {
return RealKeyToVariantKey<MapKey>{}(value);
}
};
} // namespace internal } // namespace internal
namespace internal { namespace internal {

@ -30,36 +30,16 @@ struct MapBenchmarkPeer {
static double GetMeanProbeLength(const T& map) { static double GetMeanProbeLength(const T& map) {
double total_probe_cost = 0; double total_probe_cost = 0;
for (map_index_t b = 0; b < map.num_buckets_; ++b) { for (map_index_t b = 0; b < map.num_buckets_; ++b) {
if (map.TableEntryIsList(b)) { auto* node = map.table_[b];
auto* node = internal::TableEntryToNode(map.table_[b]); size_t cost = 0;
size_t cost = 0; while (node != nullptr) {
while (node != nullptr) { total_probe_cost += static_cast<double>(cost);
total_probe_cost += static_cast<double>(cost); cost++;
cost++; node = node->next;
node = node->next;
}
} else if (map.TableEntryIsTree(b)) {
// Overhead factor to account for more costly binary search.
constexpr double kTreeOverhead = 2.0;
size_t tree_size = TableEntryToTree(map.table_[b])->size();
total_probe_cost += kTreeOverhead * static_cast<double>(tree_size) *
std::log2(tree_size);
} }
} }
return total_probe_cost / map.size(); return total_probe_cost / map.size();
} }
template <typename T>
static double GetPercentTree(const T& map) {
size_t total_tree_size = 0;
for (map_index_t b = 0; b < map.num_buckets_; ++b) {
if (map.TableEntryIsTree(b)) {
total_tree_size += TableEntryToTree(map.table_[b])->size();
}
}
return static_cast<double>(total_tree_size) /
static_cast<double>(map.size());
}
}; };
} // namespace protobuf } // namespace protobuf
} // namespace google::internal } // namespace google::internal
@ -111,7 +91,6 @@ struct Ratios {
double min_load; double min_load;
double avg_load; double avg_load;
double max_load; double max_load;
double percent_tree;
}; };
template <class ElemFn> template <class ElemFn>
@ -132,7 +111,6 @@ Ratios CollectMeanProbeLengths() {
while (t.size() < min_max_sizes.max_load) t[elem()]; while (t.size() < min_max_sizes.max_load) t[elem()];
result.max_load = Peer::GetMeanProbeLength(t); result.max_load = Peer::GetMeanProbeLength(t);
result.percent_tree = Peer::GetPercentTree(t);
return result; return result;
} }
@ -297,7 +275,6 @@ int main(int argc, char** argv) {
print("min", &Ratios::min_load); print("min", &Ratios::min_load);
print("avg", &Ratios::avg_load); print("avg", &Ratios::avg_load);
print("max", &Ratios::max_load); print("max", &Ratios::max_load);
print("tree_percent", &Ratios::percent_tree);
} }
absl::PrintF(" ],\n"); absl::PrintF(" ],\n");
absl::PrintF(" \"context\": {\n"); absl::PrintF(" \"context\": {\n");

@ -87,6 +87,11 @@ struct MoveTestKey {
MoveTestKey(MoveTestKey&& other) noexcept MoveTestKey(MoveTestKey&& other) noexcept
: data(other.data), copies(other.copies) {} : data(other.data), copies(other.copies) {}
template <typename H>
friend auto AbslHashValue(H state, const MoveTestKey& m) {
return H::combine(std::move(state), m.data);
}
friend bool operator==(const MoveTestKey& lhs, const MoveTestKey& rhs) { friend bool operator==(const MoveTestKey& lhs, const MoveTestKey& rhs) {
return lhs.data == rhs.data; return lhs.data == rhs.data;
} }
@ -313,28 +318,10 @@ namespace google {
namespace protobuf { namespace protobuf {
namespace internal { namespace internal {
template <>
struct RealKeyToVariantKey<MoveTestKey> {
VariantKey operator()(const MoveTestKey& value) const {
return VariantKey(value.data);
}
};
template <>
struct RealKeyToVariantKeyAlternative<MoveTestKey> {
VariantKey operator()(const MoveTestKey& value) const {
return VariantKey(value.data);
}
};
template <> template <>
struct TransparentSupport<MoveTestKey> { struct TransparentSupport<MoveTestKey> {
using hash = absl::Hash<MoveTestKey>; using hash = absl::Hash<MoveTestKey>;
static bool Equals(const MoveTestKey& a, const MoveTestKey& b) {
return a == b;
}
template <typename K> template <typename K>
using key_arg = MoveTestKey; using key_arg = MoveTestKey;
@ -505,7 +492,6 @@ static int k0 = 812398771;
static int k1 = 1312938717; static int k1 = 1312938717;
static int k2 = 1321555333; static int k2 = 1321555333;
TEST_F(MapImplTest, CopyIteratorStressTest) { TEST_F(MapImplTest, CopyIteratorStressTest) {
std::vector<Map<int32_t, int32_t>::iterator> v; std::vector<Map<int32_t, int32_t>::iterator> v;
const int kIters = 1e5; const int kIters = 1e5;

Loading…
Cancel
Save