parent
b823f7f826
commit
0401c70b92
2 changed files with 312 additions and 0 deletions
@ -0,0 +1,308 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2023 Google Inc. All rights reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file or at
|
||||
// https://developers.google.com/open-source/licenses/bsd
|
||||
|
||||
#include <cmath> |
||||
#include <cstddef> |
||||
#include <cstdint> |
||||
#include <limits> |
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "absl/random/random.h" |
||||
#include "absl/strings/str_cat.h" |
||||
#include "absl/strings/str_format.h" |
||||
#include "absl/strings/string_view.h" |
||||
#include "google/protobuf/map.h" |
||||
|
||||
namespace google::protobuf::internal { |
||||
struct MapBenchmarkPeer { |
||||
template <typename T> |
||||
static double LoadFactor(const T& map) { |
||||
return static_cast<double>(map.size()) / |
||||
static_cast<double>(map.num_buckets_); |
||||
} |
||||
|
||||
template <typename T> |
||||
static double GetMeanProbeLength(const T& map) { |
||||
double total_probe_cost = 0; |
||||
for (map_index_t b = 0; b < map.num_buckets_; ++b) { |
||||
if (map.TableEntryIsList(b)) { |
||||
auto* node = internal::TableEntryToNode(map.table_[b]); |
||||
size_t cost = 0; |
||||
while (node != nullptr) { |
||||
total_probe_cost += static_cast<double>(cost); |
||||
cost++; |
||||
node = node->next; |
||||
} |
||||
} else if (map.TableEntryIsTree(b)) { |
||||
// Overhead factor to account for more costly binary search.
|
||||
constexpr double kTreeOverhead = 2.0; |
||||
size_t tree_size = TableEntryToTree(map.table_[b])->size(); |
||||
total_probe_cost += kTreeOverhead * static_cast<double>(tree_size) * |
||||
std::log2(tree_size); |
||||
} |
||||
} |
||||
return total_probe_cost / map.size(); |
||||
} |
||||
|
||||
template <typename T> |
||||
static double GetPercentTree(const T& map) { |
||||
size_t total_tree_size = 0; |
||||
for (map_index_t b = 0; b < map.num_buckets_; ++b) { |
||||
if (map.TableEntryIsTree(b)) { |
||||
total_tree_size += TableEntryToTree(map.table_[b])->size(); |
||||
} |
||||
} |
||||
return static_cast<double>(total_tree_size) / |
||||
static_cast<double>(map.size()); |
||||
} |
||||
}; |
||||
} // namespace protobuf
|
||||
} // namespace google::internal
|
||||
|
||||
namespace { |
||||
|
||||
using Peer = google::protobuf::internal::MapBenchmarkPeer; |
||||
|
||||
absl::BitGen& GlobalBitGen() { |
||||
static auto* value = new absl::BitGen; |
||||
return *value; |
||||
} |
||||
|
||||
template <class T> |
||||
using Table = google::protobuf::Map<T, int>; |
||||
|
||||
struct LoadSizes { |
||||
size_t min_load; |
||||
size_t max_load; |
||||
}; |
||||
|
||||
LoadSizes GetMinMaxLoadSizes() { |
||||
static const auto sizes = [] { |
||||
Table<int> t; |
||||
|
||||
// First, fill enough to have a good distribution.
|
||||
constexpr size_t kMinSize = 10000; |
||||
while (t.size() < kMinSize) t[static_cast<int>(t.size())]; |
||||
|
||||
const auto reach_min_load_factor = [&] { |
||||
const double lf = Peer::LoadFactor(t); |
||||
while (lf <= Peer::LoadFactor(t)) t[static_cast<int>(t.size())]; |
||||
}; |
||||
|
||||
// Then, insert until we reach min load factor.
|
||||
reach_min_load_factor(); |
||||
const size_t min_load_size = t.size(); |
||||
|
||||
// Keep going until we hit min load factor again, then go back one.
|
||||
t[static_cast<int>(t.size())]; |
||||
reach_min_load_factor(); |
||||
|
||||
return LoadSizes{min_load_size, t.size() - 1}; |
||||
}(); |
||||
return sizes; |
||||
} |
||||
|
||||
struct Ratios { |
||||
double min_load; |
||||
double avg_load; |
||||
double max_load; |
||||
double percent_tree; |
||||
}; |
||||
|
||||
template <class ElemFn> |
||||
Ratios CollectMeanProbeLengths() { |
||||
const auto min_max_sizes = GetMinMaxLoadSizes(); |
||||
|
||||
ElemFn elem; |
||||
using Key = decltype(elem()); |
||||
Table<Key> t; |
||||
|
||||
Ratios result; |
||||
while (t.size() < min_max_sizes.min_load) t[elem()]; |
||||
result.min_load = Peer::GetMeanProbeLength(t); |
||||
|
||||
while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2) |
||||
t[elem()]; |
||||
result.avg_load = Peer::GetMeanProbeLength(t); |
||||
|
||||
while (t.size() < min_max_sizes.max_load) t[elem()]; |
||||
result.max_load = Peer::GetMeanProbeLength(t); |
||||
result.percent_tree = Peer::GetPercentTree(t); |
||||
|
||||
return result; |
||||
} |
||||
|
||||
constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt"; |
||||
|
||||
template <bool small> |
||||
struct String { |
||||
std::string value; |
||||
static std::string Make(uint32_t v) { |
||||
return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)}; |
||||
} |
||||
}; |
||||
|
||||
template <class T> |
||||
struct Sequential { |
||||
T operator()() const { return current++; } |
||||
mutable T current{}; |
||||
}; |
||||
|
||||
template <bool small> |
||||
struct Sequential<String<small>> { |
||||
std::string operator()() const { return String<small>::Make(current++); } |
||||
mutable uint32_t current = 0; |
||||
}; |
||||
|
||||
template <class T, int percent_skip> |
||||
struct AlmostSequential { |
||||
mutable Sequential<T> current; |
||||
|
||||
auto operator()() const -> decltype(current()) { |
||||
while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.) |
||||
current(); |
||||
return current(); |
||||
} |
||||
}; |
||||
|
||||
struct Uniform { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0}); |
||||
} |
||||
}; |
||||
|
||||
struct Gaussian { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
double d; |
||||
do { |
||||
d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4); |
||||
} while (d <= 0 || d > std::numeric_limits<T>::max() / 2); |
||||
return static_cast<T>(d); |
||||
} |
||||
}; |
||||
|
||||
struct Zipf { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6); |
||||
} |
||||
}; |
||||
|
||||
template <class T, class Dist> |
||||
struct Random { |
||||
T operator()() const { return Dist{}(T{}); } |
||||
}; |
||||
|
||||
template <class Dist, bool small> |
||||
struct Random<String<small>, Dist> { |
||||
std::string operator()() const { |
||||
return String<small>::Make(Random<uint32_t, Dist>{}()); |
||||
} |
||||
}; |
||||
|
||||
template <typename> |
||||
std::string Name(); |
||||
|
||||
std::string Name(uint64_t*) { return "u64"; } |
||||
|
||||
template <bool small> |
||||
std::string Name(String<small>*) { |
||||
return small ? "StrS" : "StrL"; |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Sequential<T>*) { |
||||
return "Sequential"; |
||||
} |
||||
|
||||
template <class T, int P> |
||||
std::string Name(AlmostSequential<T, P>*) { |
||||
return absl::StrCat("AlmostSeq_", P); |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Uniform>*) { |
||||
return "UnifRand"; |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Gaussian>*) { |
||||
return "GausRand"; |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Zipf>*) { |
||||
return "ZipfRand"; |
||||
} |
||||
|
||||
template <typename T> |
||||
std::string Name() { |
||||
return Name(static_cast<T*>(nullptr)); |
||||
} |
||||
|
||||
struct Result { |
||||
std::string name; |
||||
std::string dist_name; |
||||
Ratios ratios; |
||||
}; |
||||
|
||||
template <typename T, typename Dist> |
||||
void RunForTypeAndDistribution(std::vector<Result>& results) { |
||||
results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()}); |
||||
} |
||||
|
||||
template <class T> |
||||
void RunForType(std::vector<Result>& results) { |
||||
RunForTypeAndDistribution<T, Sequential<T>>(results); |
||||
RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results); |
||||
RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results); |
||||
RunForTypeAndDistribution<T, Random<T, Uniform>>(results); |
||||
RunForTypeAndDistribution<T, Random<T, Gaussian>>(results); |
||||
RunForTypeAndDistribution<T, Random<T, Zipf>>(results); |
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char** argv) { |
||||
std::vector<Result> results; |
||||
RunForType<uint64_t>(results); |
||||
RunForType<String<true>>(results); |
||||
RunForType<String<false>>(results); |
||||
|
||||
absl::PrintF("{\n"); |
||||
absl::PrintF(" \"benchmarks\": [\n"); |
||||
absl::string_view comma; |
||||
for (const auto& result : results) { |
||||
auto print = [&](absl::string_view stat, double Ratios::*val) { |
||||
std::string name = |
||||
absl::StrCat(result.name, "/", result.dist_name, "/", stat); |
||||
absl::PrintF(" %s{\n", comma); |
||||
absl::PrintF(" \"cpu_time\": 0,\n"); |
||||
absl::PrintF(" \"real_time\": 0,\n"); |
||||
absl::PrintF(" \"allocs_per_iter\": %f,\n", result.ratios.*val); |
||||
|
||||
absl::PrintF(" \"iterations\": 1,\n"); |
||||
absl::PrintF(" \"name\": \"%s\",\n", name); |
||||
absl::PrintF(" \"time_unit\": \"ns\"\n"); |
||||
absl::PrintF(" }\n"); |
||||
comma = ","; |
||||
}; |
||||
print("min", &Ratios::min_load); |
||||
print("avg", &Ratios::avg_load); |
||||
print("max", &Ratios::max_load); |
||||
print("tree_percent", &Ratios::percent_tree); |
||||
} |
||||
absl::PrintF(" ],\n"); |
||||
absl::PrintF(" \"context\": {\n"); |
||||
absl::PrintF(" }\n"); |
||||
absl::PrintF("}\n"); |
||||
|
||||
return 0; |
||||
} |
Loading…
Reference in new issue