Optimize memory usage of Map:

- Reduce the minimum capacity to avoid waste on small tables.
 - Increase max load factor of small tables to 1.0.
 - Return the memory to the arena during growth. This avoids the 2x memory
 usage of the backing array. This was already implemented for RepeatedField in
 the past.

We also update the bucket calculation to do more bit mixing to ensure proper non-determinism on small tables now that the minimum is lower.

PiperOrigin-RevId: 582765057
pull/14690/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent b8c1a2743b
commit 668dbd2507
  1. 1
      src/google/protobuf/BUILD.bazel
  2. 3
      src/google/protobuf/arena.h
  3. 10
      src/google/protobuf/arena_test_util.h
  4. 51
      src/google/protobuf/map.h
  5. 56
      src/google/protobuf/map_test.cc
  6. 24
      src/google/protobuf/map_test.inc

@ -887,6 +887,7 @@ cc_library(
visibility = ["//:__subpackages__"], visibility = ["//:__subpackages__"],
deps = [ deps = [
":cc_lite_test_protos", ":cc_lite_test_protos",
":port_def",
":test_util2", ":test_util2",
"@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_check",

@ -65,6 +65,7 @@ struct ArenaTestPeer; // defined in arena_test_util.h
class InternalMetadata; // defined in metadata_lite.h class InternalMetadata; // defined in metadata_lite.h
class LazyField; // defined in lazy_field.h class LazyField; // defined in lazy_field.h
class EpsCopyInputStream; // defined in parse_context.h class EpsCopyInputStream; // defined in parse_context.h
class UntypedMapBase; // defined in map.h
class RepeatedPtrFieldBase; // defined in repeated_ptr_field.h class RepeatedPtrFieldBase; // defined in repeated_ptr_field.h
class TcParser; // defined in generated_message_tctable_impl.h class TcParser; // defined in generated_message_tctable_impl.h
@ -659,6 +660,8 @@ class PROTOBUF_EXPORT PROTOBUF_ALIGNAS(8) Arena final {
template <typename> template <typename>
friend class RepeatedField; // For ReturnArrayMemory friend class RepeatedField; // For ReturnArrayMemory
friend class internal::RepeatedPtrFieldBase; // For ReturnArrayMemory friend class internal::RepeatedPtrFieldBase; // For ReturnArrayMemory
friend class internal::UntypedMapBase; // For ReturnArenaMemory
friend struct internal::ArenaTestPeer; friend struct internal::ArenaTestPeer;
}; };

@ -16,6 +16,9 @@
#include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl_lite.h" #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
namespace google { namespace google {
namespace protobuf { namespace protobuf {
@ -31,7 +34,12 @@ void TestParseCorruptedString(const T& message) {
out.SetSerializationDeterministic(true); out.SetSerializationDeterministic(true);
message.SerializePartialToCodedStream(&out); message.SerializePartialToCodedStream(&out);
} }
#if defined(PROTOBUF_ASAN) || defined(PROTOBUF_TSAN) || defined(PROTOBUF_MSAN)
// Make the test smaller in sanitizer mode.
const int kMaxIters = 200;
#else
const int kMaxIters = 900; const int kMaxIters = 900;
#endif
const int stride = s.size() <= kMaxIters ? 1 : s.size() / kMaxIters; const int stride = s.size() <= kMaxIters ? 1 : s.size() / kMaxIters;
const int start = stride == 1 || use_arena ? 0 : (stride + 1) / 2; const int start = stride == 1 || use_arena ? 0 : (stride + 1) / 2;
for (int i = start; i < s.size(); i += stride) { for (int i = start; i < s.size(); i += stride) {
@ -135,4 +143,6 @@ class ArenaHolder {
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google
#include "google/protobuf/port_undef.inc"
#endif // GOOGLE_PROTOBUF_ARENA_TEST_UTIL_H__ #endif // GOOGLE_PROTOBUF_ARENA_TEST_UTIL_H__

@ -536,7 +536,8 @@ class PROTOBUF_EXPORT UntypedMapBase {
UntypedMapBase& operator=(const UntypedMapBase&) = delete; UntypedMapBase& operator=(const UntypedMapBase&) = delete;
protected: protected:
enum { kMinTableSize = 8 }; // 16 bytes is the minimum useful size for the array cache in the arena.
enum { kMinTableSize = 16 / sizeof(void*) };
public: public:
Arena* arena() const { return this->alloc_.arena(); } Arena* arena() const { return this->alloc_.arena(); }
@ -649,7 +650,11 @@ class PROTOBUF_EXPORT UntypedMapBase {
} }
void DeleteTable(TableEntryPtr* table, map_index_t n) { void DeleteTable(TableEntryPtr* table, map_index_t n) {
AllocFor<TableEntryPtr>(alloc_).deallocate(table, n); if (auto* a = arena()) {
a->ReturnArrayMemory(table, n * sizeof(TableEntryPtr));
} else {
internal::SizedDelete(table, n * sizeof(TableEntryPtr));
}
} }
NodeBase* DestroyTree(Tree* tree); NodeBase* DestroyTree(Tree* tree);
@ -664,13 +669,8 @@ class PROTOBUF_EXPORT UntypedMapBase {
map_index_t BucketNumberFromHash(uint64_t h) const { map_index_t BucketNumberFromHash(uint64_t h) const {
// We xor the hash value against the random seed so that we effectively // We xor the hash value against the random seed so that we effectively
// have a random hash function. // have a random hash function.
h ^= seed_; // We use absl::Hash to do bit mixing for uniform bucket selection.
return absl::HashOf(h ^ seed_) & (num_buckets_ - 1);
// We use the multiplication method to determine the bucket number from
// the hash value. The constant kPhi (suggested by Knuth) is roughly
// (sqrt(5) - 1) / 2 * 2^64.
constexpr uint64_t kPhi = uint64_t{0x9e3779b97f4a7c15};
return (MultiplyWithOverflow(kPhi, h) >> 32) & (num_buckets_ - 1);
} }
TableEntryPtr* CreateEmptyTable(map_index_t n) { TableEntryPtr* CreateEmptyTable(map_index_t n) {
@ -683,29 +683,29 @@ class PROTOBUF_EXPORT UntypedMapBase {
// Return a randomish value. // Return a randomish value.
map_index_t Seed() const { map_index_t Seed() const {
// We get a little bit of randomness from the address of the map. The uint64_t s = 0;
// lower bits are not very random, due to alignment, so we discard them
// and shift the higher bits into their place.
map_index_t s = reinterpret_cast<uintptr_t>(this) >> 4;
#if !defined(GOOGLE_PROTOBUF_NO_RDTSC) #if !defined(GOOGLE_PROTOBUF_NO_RDTSC)
#if defined(__APPLE__) #if defined(__APPLE__)
// Use a commpage-based fast time function on Apple environments (MacOS, // Use a commpage-based fast time function on Apple environments (MacOS,
// iOS, tvOS, watchOS, etc). // iOS, tvOS, watchOS, etc).
s += mach_absolute_time(); s = mach_absolute_time();
#elif defined(__x86_64__) && defined(__GNUC__) #elif defined(__x86_64__) && defined(__GNUC__)
uint32_t hi, lo; uint32_t hi, lo;
asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
s += ((static_cast<uint64_t>(hi) << 32) | lo); s = ((static_cast<uint64_t>(hi) << 32) | lo);
#elif defined(__aarch64__) && defined(__GNUC__) #elif defined(__aarch64__) && defined(__GNUC__)
// There is no rdtsc on ARMv8. CNTVCT_EL0 is the virtual counter of the // There is no rdtsc on ARMv8. CNTVCT_EL0 is the virtual counter of the
// system timer. It runs at a different frequency than the CPU's, but is // system timer. It runs at a different frequency than the CPU's, but is
// the best source of time-based entropy we get. // the best source of time-based entropy we get.
uint64_t virtual_timer_value; uint64_t virtual_timer_value;
asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
s += virtual_timer_value; s = virtual_timer_value;
#endif #endif
#endif // !defined(GOOGLE_PROTOBUF_NO_RDTSC) #endif // !defined(GOOGLE_PROTOBUF_NO_RDTSC)
return s; // Add entropy from the address of the map and the address of the table
// array.
return static_cast<map_index_t>(
absl::HashOf(s, table_, static_cast<const void*>(this)));
} }
enum { enum {
@ -988,6 +988,18 @@ class KeyMapBase : public UntypedMapBase {
static_cast<KeyNode*>(node)->key()); static_cast<KeyNode*>(node)->key());
} }
// Have it a separate function for testing.
static size_type CalculateHiCutoff(size_type num_buckets) {
// We want the high cutoff to follow this rules:
// - When num_buckets_ == kGlobalEmptyTableSize, then make it 0 to force an
// allocation.
// - When num_buckets_ < 8, then make it num_buckets_ to avoid
// a reallocation. A large load factor is not that important on small
// tables and saves memory.
// - Otherwise, make it 75% of num_buckets_.
return num_buckets - num_buckets / 16 * 4 - num_buckets % 2;
}
// Returns whether it did resize. Currently this is only used when // Returns whether it did resize. Currently this is only used when
// num_elements_ increases, though it could be used in other situations. // num_elements_ increases, though it could be used in other situations.
// It checks for load too low as well as load too high: because any number // It checks for load too low as well as load too high: because any number
@ -997,13 +1009,12 @@ class KeyMapBase : public UntypedMapBase {
// policy that sometimes we resize down as well as up, clients can easily // policy that sometimes we resize down as well as up, clients can easily
// keep O(size()) = O(number of buckets) if they want that. // keep O(size()) = O(number of buckets) if they want that.
bool ResizeIfLoadIsOutOfRange(size_type new_size) { bool ResizeIfLoadIsOutOfRange(size_type new_size) {
const size_type kMaxMapLoadTimes16 = 12; // controls RAM vs CPU tradeoff const size_type hi_cutoff = CalculateHiCutoff(num_buckets_);
const size_type hi_cutoff = num_buckets_ * kMaxMapLoadTimes16 / 16;
const size_type lo_cutoff = hi_cutoff / 4; const size_type lo_cutoff = hi_cutoff / 4;
// We don't care how many elements are in trees. If a lot are, // We don't care how many elements are in trees. If a lot are,
// we may resize even though there are many empty buckets. In // we may resize even though there are many empty buckets. In
// practice, this seems fine. // practice, this seems fine.
if (PROTOBUF_PREDICT_FALSE(new_size >= hi_cutoff)) { if (PROTOBUF_PREDICT_FALSE(new_size > hi_cutoff)) {
if (num_buckets_ <= max_size() / 2) { if (num_buckets_ <= max_size() / 2) {
Resize(num_buckets_ * 2); Resize(num_buckets_ * 2);
return true; return true;

@ -5,13 +5,20 @@
// license that can be found in the LICENSE file or at // license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd // https://developers.google.com/open-source/licenses/bsd
#include "google/protobuf/map.h"
#include <array> #include <array>
#include <cstddef>
#include <cstdint>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_set.h"
#include "absl/strings/str_cat.h"
#include "google/protobuf/arena_test_util.h" #include "google/protobuf/arena_test_util.h"
#include "google/protobuf/internal_visibility_for_testing.h" #include "google/protobuf/internal_visibility_for_testing.h"
#include "google/protobuf/map_proto2_unittest.pb.h" #include "google/protobuf/map_proto2_unittest.pb.h"
@ -54,7 +61,10 @@ struct is_internal_map_value_type<AlignedAs8> : std::true_type {};
namespace { namespace {
using ::testing::AllOf;
using ::testing::FieldsAre; using ::testing::FieldsAre;
using ::testing::Ge;
using ::testing::Le;
using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAre;
@ -160,6 +170,52 @@ TEST(MapTest, CopyConstructMessagesWithArena) {
EXPECT_EQ(map1["2"].GetArena(), &arena); EXPECT_EQ(map1["2"].GetArena(), &arena);
} }
TEST(MapTest, LoadFactorCalculationWorks) {
// Three stages:
// - empty
// - small
// - large
const auto calculate = MapTestPeer::CalculateHiCutoff;
// empty
EXPECT_EQ(calculate(kGlobalEmptyTableSize), 0);
// small
EXPECT_EQ(calculate(2), 2);
EXPECT_EQ(calculate(4), 4);
EXPECT_EQ(calculate(8), 8);
// large
for (int i = 16; i < 10000; i *= 2) {
EXPECT_EQ(calculate(i), .75 * i) << "i=" << i;
}
}
TEST(MapTest, NaturalGrowthOnArenasReuseBlocks) {
Arena arena;
std::vector<Map<int, int>*> values;
static constexpr int kNumFields = 100;
static constexpr int kNumElems = 1000;
for (int i = 0; i < kNumFields; ++i) {
values.push_back(Arena::CreateMessage<Map<int, int>>(&arena));
auto& field = *values.back();
for (int j = 0; j < kNumElems; ++j) {
field[j] = j;
}
}
struct MockNode : internal::NodeBase {
std::pair<int, int> v;
};
size_t expected =
values.size() * (MapTestPeer::NumBuckets(*values[0]) * sizeof(void*) +
values[0]->size() * sizeof(MockNode));
// Use a 2% slack for other overhead. If we were not reusing the blocks, the
// actual value would be ~2x the cost of the bucket array.
EXPECT_THAT(arena.SpaceUsed(), AllOf(Ge(expected), Le(1.02 * expected)));
}
// We changed the internal implementation to use a smaller size type, but the // We changed the internal implementation to use a smaller size type, but the
// public API will still use size_t to avoid changing the API. Test that. // public API will still use size_t to avoid changing the API. Test that.
TEST(MapTest, SizeTypeIsSizeT) { TEST(MapTest, SizeTypeIsSizeT) {

@ -182,6 +182,11 @@ struct MapTestPeer {
return true; return true;
} }
template <typename T>
static size_t NumBuckets(T& map) {
return map.num_buckets_;
}
template <typename T> template <typename T>
static size_t BucketNumber(T& map, typename T::key_type key) { static size_t BucketNumber(T& map, typename T::key_type key) {
return map.BucketNumber(key); return map.BucketNumber(key);
@ -199,6 +204,10 @@ struct MapTestPeer {
} }
return false; return false;
} }
static int CalculateHiCutoff(int num_buckets) {
return Map<int, int>::CalculateHiCutoff(num_buckets);
}
}; };
namespace { namespace {
@ -1276,7 +1285,7 @@ TEST_F(MapImplTest, CopyAssignMapIterator) {
} }
TEST_F(MapImplTest, SpaceUsed) { TEST_F(MapImplTest, SpaceUsed) {
constexpr size_t kMinCap = 8; constexpr size_t kMinCap = 16 / sizeof(void*);
Map<int32_t, int32_t> m; Map<int32_t, int32_t> m;
// An newly constructed map should have no space used. // An newly constructed map should have no space used.
@ -1286,15 +1295,11 @@ TEST_F(MapImplTest, SpaceUsed) {
std::pair<int32_t, int32_t> kv; std::pair<int32_t, int32_t> kv;
}; };
size_t capacity = kMinCap;
for (int i = 0; i < 100; ++i) { for (int i = 0; i < 100; ++i) {
m[i]; m[i];
static constexpr double kMaxLoadFactor = .75;
if (m.size() >= capacity * kMaxLoadFactor) {
capacity *= 2;
}
EXPECT_EQ(m.SpaceUsedExcludingSelfLong(), EXPECT_EQ(m.SpaceUsedExcludingSelfLong(),
sizeof(void*) * capacity + m.size() * sizeof(IntIntNode)); sizeof(void*) * MapTestPeer::NumBuckets(m) +
m.size() * sizeof(IntIntNode));
} }
// Test string, and non-scalar keys. // Test string, and non-scalar keys.
@ -1360,11 +1365,10 @@ void TestTransparent(const Key& key, const Key& miss_key) {
const auto& cm = m; const auto& cm = m;
m.insert({"ABC", 1}); m.insert({"ABC", 1});
const auto abc_it = m.begin();
m.insert({"DEF", 2}); m.insert({"DEF", 2});
const auto abc_it = m.find("ABC");
using testing::Pair; using testing::Pair;
using testing::UnorderedElementsAre; using testing::UnorderedElementsAre;

Loading…
Cancel
Save