Internal change

PiperOrigin-RevId: 572664792
pull/14348/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent b823f7f826
commit 0401c70b92
  1. 4
      src/google/protobuf/map.h
  2. 308
      src/google/protobuf/map_probe_benchmark.cc

@ -70,6 +70,7 @@ template <typename Derived, typename Key, typename T,
class MapField;
struct MapTestPeer;
struct MapBenchmarkPeer;
template <typename Key, typename T>
class TypeDefinedMapFieldBase;
@ -563,6 +564,7 @@ class PROTOBUF_EXPORT UntypedMapBase {
protected:
friend class TcParser;
friend struct MapTestPeer;
friend struct MapBenchmarkPeer;
friend class UntypedMapIterator;
struct NodeAndBucket {
@ -900,6 +902,7 @@ class KeyMapBase : public UntypedMapBase {
protected:
friend class TcParser;
friend struct MapTestPeer;
friend struct MapBenchmarkPeer;
PROTOBUF_NOINLINE void erase_no_destroy(map_index_t b, KeyNode* node) {
TreeIterator tree_it;
@ -1665,6 +1668,7 @@ class Map : private internal::KeyMapBase<internal::KeyForBase<Key>> {
friend class internal::MapFieldLite;
friend class internal::TcParser;
friend struct internal::MapTestPeer;
friend struct internal::MapBenchmarkPeer;
};
namespace internal {

@ -0,0 +1,308 @@
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>
#include "absl/random/random.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/map.h"
namespace google::protobuf::internal {
struct MapBenchmarkPeer {
template <typename T>
static double LoadFactor(const T& map) {
return static_cast<double>(map.size()) /
static_cast<double>(map.num_buckets_);
}
template <typename T>
static double GetMeanProbeLength(const T& map) {
double total_probe_cost = 0;
for (map_index_t b = 0; b < map.num_buckets_; ++b) {
if (map.TableEntryIsList(b)) {
auto* node = internal::TableEntryToNode(map.table_[b]);
size_t cost = 0;
while (node != nullptr) {
total_probe_cost += static_cast<double>(cost);
cost++;
node = node->next;
}
} else if (map.TableEntryIsTree(b)) {
// Overhead factor to account for more costly binary search.
constexpr double kTreeOverhead = 2.0;
size_t tree_size = TableEntryToTree(map.table_[b])->size();
total_probe_cost += kTreeOverhead * static_cast<double>(tree_size) *
std::log2(tree_size);
}
}
return total_probe_cost / map.size();
}
template <typename T>
static double GetPercentTree(const T& map) {
size_t total_tree_size = 0;
for (map_index_t b = 0; b < map.num_buckets_; ++b) {
if (map.TableEntryIsTree(b)) {
total_tree_size += TableEntryToTree(map.table_[b])->size();
}
}
return static_cast<double>(total_tree_size) /
static_cast<double>(map.size());
}
};
} // namespace protobuf
} // namespace google::internal
namespace {
using Peer = google::protobuf::internal::MapBenchmarkPeer;
absl::BitGen& GlobalBitGen() {
static auto* value = new absl::BitGen;
return *value;
}
template <class T>
using Table = google::protobuf::Map<T, int>;
struct LoadSizes {
size_t min_load;
size_t max_load;
};
LoadSizes GetMinMaxLoadSizes() {
static const auto sizes = [] {
Table<int> t;
// First, fill enough to have a good distribution.
constexpr size_t kMinSize = 10000;
while (t.size() < kMinSize) t[static_cast<int>(t.size())];
const auto reach_min_load_factor = [&] {
const double lf = Peer::LoadFactor(t);
while (lf <= Peer::LoadFactor(t)) t[static_cast<int>(t.size())];
};
// Then, insert until we reach min load factor.
reach_min_load_factor();
const size_t min_load_size = t.size();
// Keep going until we hit min load factor again, then go back one.
t[static_cast<int>(t.size())];
reach_min_load_factor();
return LoadSizes{min_load_size, t.size() - 1};
}();
return sizes;
}
struct Ratios {
double min_load;
double avg_load;
double max_load;
double percent_tree;
};
template <class ElemFn>
Ratios CollectMeanProbeLengths() {
const auto min_max_sizes = GetMinMaxLoadSizes();
ElemFn elem;
using Key = decltype(elem());
Table<Key> t;
Ratios result;
while (t.size() < min_max_sizes.min_load) t[elem()];
result.min_load = Peer::GetMeanProbeLength(t);
while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2)
t[elem()];
result.avg_load = Peer::GetMeanProbeLength(t);
while (t.size() < min_max_sizes.max_load) t[elem()];
result.max_load = Peer::GetMeanProbeLength(t);
result.percent_tree = Peer::GetPercentTree(t);
return result;
}
constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt";
template <bool small>
struct String {
std::string value;
static std::string Make(uint32_t v) {
return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)};
}
};
template <class T>
struct Sequential {
T operator()() const { return current++; }
mutable T current{};
};
template <bool small>
struct Sequential<String<small>> {
std::string operator()() const { return String<small>::Make(current++); }
mutable uint32_t current = 0;
};
template <class T, int percent_skip>
struct AlmostSequential {
mutable Sequential<T> current;
auto operator()() const -> decltype(current()) {
while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.)
current();
return current();
}
};
struct Uniform {
template <typename T>
T operator()(T) const {
return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0});
}
};
struct Gaussian {
template <typename T>
T operator()(T) const {
double d;
do {
d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4);
} while (d <= 0 || d > std::numeric_limits<T>::max() / 2);
return static_cast<T>(d);
}
};
struct Zipf {
template <typename T>
T operator()(T) const {
return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6);
}
};
template <class T, class Dist>
struct Random {
T operator()() const { return Dist{}(T{}); }
};
template <class Dist, bool small>
struct Random<String<small>, Dist> {
std::string operator()() const {
return String<small>::Make(Random<uint32_t, Dist>{}());
}
};
template <typename>
std::string Name();
std::string Name(uint64_t*) { return "u64"; }
template <bool small>
std::string Name(String<small>*) {
return small ? "StrS" : "StrL";
}
template <class T>
std::string Name(Sequential<T>*) {
return "Sequential";
}
template <class T, int P>
std::string Name(AlmostSequential<T, P>*) {
return absl::StrCat("AlmostSeq_", P);
}
template <class T>
std::string Name(Random<T, Uniform>*) {
return "UnifRand";
}
template <class T>
std::string Name(Random<T, Gaussian>*) {
return "GausRand";
}
template <class T>
std::string Name(Random<T, Zipf>*) {
return "ZipfRand";
}
template <typename T>
std::string Name() {
return Name(static_cast<T*>(nullptr));
}
struct Result {
std::string name;
std::string dist_name;
Ratios ratios;
};
template <typename T, typename Dist>
void RunForTypeAndDistribution(std::vector<Result>& results) {
results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()});
}
template <class T>
void RunForType(std::vector<Result>& results) {
RunForTypeAndDistribution<T, Sequential<T>>(results);
RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results);
RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results);
RunForTypeAndDistribution<T, Random<T, Uniform>>(results);
RunForTypeAndDistribution<T, Random<T, Gaussian>>(results);
RunForTypeAndDistribution<T, Random<T, Zipf>>(results);
}
} // namespace
int main(int argc, char** argv) {
std::vector<Result> results;
RunForType<uint64_t>(results);
RunForType<String<true>>(results);
RunForType<String<false>>(results);
absl::PrintF("{\n");
absl::PrintF(" \"benchmarks\": [\n");
absl::string_view comma;
for (const auto& result : results) {
auto print = [&](absl::string_view stat, double Ratios::*val) {
std::string name =
absl::StrCat(result.name, "/", result.dist_name, "/", stat);
absl::PrintF(" %s{\n", comma);
absl::PrintF(" \"cpu_time\": 0,\n");
absl::PrintF(" \"real_time\": 0,\n");
absl::PrintF(" \"allocs_per_iter\": %f,\n", result.ratios.*val);
absl::PrintF(" \"iterations\": 1,\n");
absl::PrintF(" \"name\": \"%s\",\n", name);
absl::PrintF(" \"time_unit\": \"ns\"\n");
absl::PrintF(" }\n");
comma = ",";
};
print("min", &Ratios::min_load);
print("avg", &Ratios::avg_load);
print("max", &Ratios::max_load);
print("tree_percent", &Ratios::percent_tree);
}
absl::PrintF(" ],\n");
absl::PrintF(" \"context\": {\n");
absl::PrintF(" }\n");
absl::PrintF("}\n");
return 0;
}
Loading…
Cancel
Save