-- ff793052bd01e1e4fcf639f94d7c30c4855a9372 by Evan Brown <ezb@google.com>: Roll forward of btree_iterator refactoring. PiperOrigin-RevId: 346116047 -- 17984679f16e3e2139b0f14fa76f4a6ca16a3ef9 by Chris Kennelly <ckennelly@google.com>: Extend absl::StrContains to accept single character needles. Single characters are more efficient to search for. Extending this API allows the abseil-string-find-str-contains Clang Tidy to include this pattern. The C++ committee has adopted http://wg21.link/P1679 for inclusion in C++23. PiperOrigin-RevId: 346095060 -- ef20b31c501b1dcaa25e244fd8f8aa43dec09bd6 by Jorg Brown <jorg@google.com>: Internal change for cord ring PiperOrigin-RevId: 346087545 -- b70f2c1cb77fc9e733a126e790967d45c5fd1dc7 by Derek Mauro <dmauro@google.com>: Release layout_benchmark PiperOrigin-RevId: 345968909 -- 3a0eda337ee43622f92cfe14c2aa06f72dc71ee5 by Derek Mauro <dmauro@google.com>: Release raw_hash_set_probe_benchmark PiperOrigin-RevId: 345965969 -- abffdb4bb241a2264cb4e73a6262b660bb10447d by Derek Mauro <dmauro@google.com>: Internal change PiperOrigin-RevId: 345733599 -- 7c9e24a71188df945be17fe98f700bdb51f81b16 by Derek Mauro <dmauro@google.com>: Release hash_benchmark PiperOrigin-RevId: 345721635 -- d68f33f17f9a8cd3f6da8eee3870bdb46402cdc8 by Derek Mauro <dmauro@google.com>: Release raw_hash_set_benchmark PiperOrigin-RevId: 345708384 -- 6e6c547d4d1327b226c0ffe8ff34d0aa103ce24b by Abseil Team <absl-team@google.com>: Updates the implementation of InlinedVector to accurately express the value-initialization semantics of the default constructor PiperOrigin-RevId: 345548260 -- 1532424deda97d468444c217cc0fa4614099c7c1 by Evan Brown <ezb@google.com>: Rollback btree_iterator refactoring. PiperOrigin-RevId: 345543900 GitOrigin-RevId: ff793052bd01e1e4fcf639f94d7c30c4855a9372 Change-Id: I719831981fd056de41939f9addfee3d85e3b49b2pull/861/head
parent
acf3390ca2
commit
fbdff6f3ae
13 changed files with 1481 additions and 19 deletions
@ -0,0 +1,122 @@ |
||||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Every benchmark should have the same performance as the corresponding
|
||||
// headroom benchmark.
|
||||
|
||||
#include "absl/base/internal/raw_logging.h" |
||||
#include "absl/container/internal/layout.h" |
||||
#include "benchmark/benchmark.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace container_internal { |
||||
namespace { |
||||
|
||||
using ::benchmark::DoNotOptimize; |
||||
|
||||
using Int128 = int64_t[2]; |
||||
|
||||
// This benchmark provides the upper bound on performance for BM_OffsetConstant.
|
||||
template <size_t Offset, class... Ts> |
||||
void BM_OffsetConstantHeadroom(benchmark::State& state) { |
||||
for (auto _ : state) { |
||||
DoNotOptimize(Offset); |
||||
} |
||||
} |
||||
|
||||
template <size_t Offset, class... Ts> |
||||
void BM_OffsetConstant(benchmark::State& state) { |
||||
using L = Layout<Ts...>; |
||||
ABSL_RAW_CHECK(L::Partial(3, 5, 7).template Offset<3>() == Offset, |
||||
"Invalid offset"); |
||||
for (auto _ : state) { |
||||
DoNotOptimize(L::Partial(3, 5, 7).template Offset<3>()); |
||||
} |
||||
} |
||||
|
||||
template <class... Ts> |
||||
size_t VariableOffset(size_t n, size_t m, size_t k); |
||||
|
||||
template <> |
||||
size_t VariableOffset<int8_t, int16_t, int32_t, Int128>(size_t n, size_t m, |
||||
size_t k) { |
||||
auto Align = [](size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }; |
||||
return Align(Align(Align(n * 1, 2) + m * 2, 4) + k * 4, 8); |
||||
} |
||||
|
||||
template <> |
||||
size_t VariableOffset<Int128, int32_t, int16_t, int8_t>(size_t n, size_t m, |
||||
size_t k) { |
||||
// No alignment is necessary.
|
||||
return n * 16 + m * 4 + k * 2; |
||||
} |
||||
|
||||
// This benchmark provides the upper bound on performance for BM_OffsetVariable.
|
||||
template <size_t Offset, class... Ts> |
||||
void BM_OffsetVariableHeadroom(benchmark::State& state) { |
||||
size_t n = 3; |
||||
size_t m = 5; |
||||
size_t k = 7; |
||||
ABSL_RAW_CHECK(VariableOffset<Ts...>(n, m, k) == Offset, "Invalid offset"); |
||||
for (auto _ : state) { |
||||
DoNotOptimize(n); |
||||
DoNotOptimize(m); |
||||
DoNotOptimize(k); |
||||
DoNotOptimize(VariableOffset<Ts...>(n, m, k)); |
||||
} |
||||
} |
||||
|
||||
template <size_t Offset, class... Ts> |
||||
void BM_OffsetVariable(benchmark::State& state) { |
||||
using L = Layout<Ts...>; |
||||
size_t n = 3; |
||||
size_t m = 5; |
||||
size_t k = 7; |
||||
ABSL_RAW_CHECK(L::Partial(n, m, k).template Offset<3>() == Offset, |
||||
"Inavlid offset"); |
||||
for (auto _ : state) { |
||||
DoNotOptimize(n); |
||||
DoNotOptimize(m); |
||||
DoNotOptimize(k); |
||||
DoNotOptimize(L::Partial(n, m, k).template Offset<3>()); |
||||
} |
||||
} |
||||
|
||||
// Run all benchmarks in two modes:
|
||||
//
|
||||
// Layout with padding: int8_t[3], int16_t[5], int32_t[7], Int128[?].
|
||||
// Layout without padding: Int128[3], int32_t[5], int16_t[7], int8_t[?].
|
||||
|
||||
#define OFFSET_BENCHMARK(NAME, OFFSET, T1, T2, T3, T4) \ |
||||
auto& NAME##_##OFFSET##_##T1##_##T2##_##T3##_##T4 = \
|
||||
NAME<OFFSET, T1, T2, T3, T4>; \
|
||||
BENCHMARK(NAME##_##OFFSET##_##T1##_##T2##_##T3##_##T4) |
||||
|
||||
OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 48, int8_t, int16_t, int32_t, |
||||
Int128); |
||||
OFFSET_BENCHMARK(BM_OffsetConstant, 48, int8_t, int16_t, int32_t, Int128); |
||||
OFFSET_BENCHMARK(BM_OffsetConstantHeadroom, 82, Int128, int32_t, int16_t, |
||||
int8_t); |
||||
OFFSET_BENCHMARK(BM_OffsetConstant, 82, Int128, int32_t, int16_t, int8_t); |
||||
OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 48, int8_t, int16_t, int32_t, |
||||
Int128); |
||||
OFFSET_BENCHMARK(BM_OffsetVariable, 48, int8_t, int16_t, int32_t, Int128); |
||||
OFFSET_BENCHMARK(BM_OffsetVariableHeadroom, 82, Int128, int32_t, int16_t, |
||||
int8_t); |
||||
OFFSET_BENCHMARK(BM_OffsetVariable, 82, Int128, int32_t, int16_t, int8_t); |
||||
} // namespace
|
||||
} // namespace container_internal
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
@ -0,0 +1,396 @@ |
||||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "absl/container/internal/raw_hash_set.h" |
||||
|
||||
#include <numeric> |
||||
#include <random> |
||||
|
||||
#include "absl/base/internal/raw_logging.h" |
||||
#include "absl/container/internal/hash_function_defaults.h" |
||||
#include "absl/strings/str_format.h" |
||||
#include "benchmark/benchmark.h" |
||||
|
||||
namespace absl { |
||||
ABSL_NAMESPACE_BEGIN |
||||
namespace container_internal { |
||||
|
||||
struct RawHashSetTestOnlyAccess { |
||||
template <typename C> |
||||
static auto GetSlots(const C& c) -> decltype(c.slots_) { |
||||
return c.slots_; |
||||
} |
||||
}; |
||||
|
||||
namespace { |
||||
|
||||
struct IntPolicy { |
||||
using slot_type = int64_t; |
||||
using key_type = int64_t; |
||||
using init_type = int64_t; |
||||
|
||||
static void construct(void*, int64_t* slot, int64_t v) { *slot = v; } |
||||
static void destroy(void*, int64_t*) {} |
||||
static void transfer(void*, int64_t* new_slot, int64_t* old_slot) { |
||||
*new_slot = *old_slot; |
||||
} |
||||
|
||||
static int64_t& element(slot_type* slot) { return *slot; } |
||||
|
||||
template <class F> |
||||
static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) { |
||||
return std::forward<F>(f)(x, x); |
||||
} |
||||
}; |
||||
|
||||
class StringPolicy { |
||||
template <class F, class K, class V, |
||||
class = typename std::enable_if< |
||||
std::is_convertible<const K&, absl::string_view>::value>::type> |
||||
decltype(std::declval<F>()( |
||||
std::declval<const absl::string_view&>(), std::piecewise_construct, |
||||
std::declval<std::tuple<K>>(), |
||||
std::declval<V>())) static apply_impl(F&& f, |
||||
std::pair<std::tuple<K>, V> p) { |
||||
const absl::string_view& key = std::get<0>(p.first); |
||||
return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first), |
||||
std::move(p.second)); |
||||
} |
||||
|
||||
public: |
||||
struct slot_type { |
||||
struct ctor {}; |
||||
|
||||
template <class... Ts> |
||||
slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {} |
||||
|
||||
std::pair<std::string, std::string> pair; |
||||
}; |
||||
|
||||
using key_type = std::string; |
||||
using init_type = std::pair<std::string, std::string>; |
||||
|
||||
template <class allocator_type, class... Args> |
||||
static void construct(allocator_type* alloc, slot_type* slot, Args... args) { |
||||
std::allocator_traits<allocator_type>::construct( |
||||
*alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...); |
||||
} |
||||
|
||||
template <class allocator_type> |
||||
static void destroy(allocator_type* alloc, slot_type* slot) { |
||||
std::allocator_traits<allocator_type>::destroy(*alloc, slot); |
||||
} |
||||
|
||||
template <class allocator_type> |
||||
static void transfer(allocator_type* alloc, slot_type* new_slot, |
||||
slot_type* old_slot) { |
||||
construct(alloc, new_slot, std::move(old_slot->pair)); |
||||
destroy(alloc, old_slot); |
||||
} |
||||
|
||||
static std::pair<std::string, std::string>& element(slot_type* slot) { |
||||
return slot->pair; |
||||
} |
||||
|
||||
template <class F, class... Args> |
||||
static auto apply(F&& f, Args&&... args) |
||||
-> decltype(apply_impl(std::forward<F>(f), |
||||
PairArgs(std::forward<Args>(args)...))) { |
||||
return apply_impl(std::forward<F>(f), |
||||
PairArgs(std::forward<Args>(args)...)); |
||||
} |
||||
}; |
||||
|
||||
struct StringHash : container_internal::hash_default_hash<absl::string_view> { |
||||
using is_transparent = void; |
||||
}; |
||||
struct StringEq : std::equal_to<absl::string_view> { |
||||
using is_transparent = void; |
||||
}; |
||||
|
||||
struct StringTable |
||||
: raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> { |
||||
using Base = typename StringTable::raw_hash_set; |
||||
StringTable() {} |
||||
using Base::Base; |
||||
}; |
||||
|
||||
struct IntTable |
||||
: raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>, |
||||
std::equal_to<int64_t>, std::allocator<int64_t>> { |
||||
using Base = typename IntTable::raw_hash_set; |
||||
IntTable() {} |
||||
using Base::Base; |
||||
}; |
||||
|
||||
struct string_generator { |
||||
template <class RNG> |
||||
std::string operator()(RNG& rng) const { |
||||
std::string res; |
||||
res.resize(12); |
||||
std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E); |
||||
std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); }); |
||||
return res; |
||||
} |
||||
|
||||
size_t size; |
||||
}; |
||||
|
||||
// Model a cache in steady state.
|
||||
//
|
||||
// On a table of size N, keep deleting the LRU entry and add a random one.
|
||||
void BM_CacheInSteadyState(benchmark::State& state) { |
||||
std::random_device rd; |
||||
std::mt19937 rng(rd()); |
||||
string_generator gen{12}; |
||||
StringTable t; |
||||
std::deque<std::string> keys; |
||||
while (t.size() < state.range(0)) { |
||||
auto x = t.emplace(gen(rng), gen(rng)); |
||||
if (x.second) keys.push_back(x.first->first); |
||||
} |
||||
ABSL_RAW_CHECK(state.range(0) >= 10, ""); |
||||
while (state.KeepRunning()) { |
||||
// Some cache hits.
|
||||
std::deque<std::string>::const_iterator it; |
||||
for (int i = 0; i != 90; ++i) { |
||||
if (i % 10 == 0) it = keys.end(); |
||||
::benchmark::DoNotOptimize(t.find(*--it)); |
||||
} |
||||
// Some cache misses.
|
||||
for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng))); |
||||
ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str()); |
||||
keys.pop_front(); |
||||
while (true) { |
||||
auto x = t.emplace(gen(rng), gen(rng)); |
||||
if (x.second) { |
||||
keys.push_back(x.first->first); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
state.SetItemsProcessed(state.iterations()); |
||||
state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor())); |
||||
} |
||||
|
||||
template <typename Benchmark> |
||||
void CacheInSteadyStateArgs(Benchmark* bm) { |
||||
// The default.
|
||||
const float max_load_factor = 0.875; |
||||
// When the cache is at the steady state, the probe sequence will equal
|
||||
// capacity if there is no reclamation of deleted slots. Pick a number large
|
||||
// enough to make the benchmark slow for that case.
|
||||
const size_t capacity = 1 << 10; |
||||
|
||||
// Check N data points to cover load factors in [0.4, 0.8).
|
||||
const size_t kNumPoints = 10; |
||||
for (size_t i = 0; i != kNumPoints; ++i) |
||||
bm->Arg(std::ceil( |
||||
capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2)); |
||||
} |
||||
BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs); |
||||
|
||||
void BM_EndComparison(benchmark::State& state) { |
||||
std::random_device rd; |
||||
std::mt19937 rng(rd()); |
||||
string_generator gen{12}; |
||||
StringTable t; |
||||
while (t.size() < state.range(0)) { |
||||
t.emplace(gen(rng), gen(rng)); |
||||
} |
||||
|
||||
for (auto _ : state) { |
||||
for (auto it = t.begin(); it != t.end(); ++it) { |
||||
benchmark::DoNotOptimize(it); |
||||
benchmark::DoNotOptimize(t); |
||||
benchmark::DoNotOptimize(it != t.end()); |
||||
} |
||||
} |
||||
} |
||||
BENCHMARK(BM_EndComparison)->Arg(400); |
||||
|
||||
void BM_CopyCtor(benchmark::State& state) { |
||||
std::random_device rd; |
||||
std::mt19937 rng(rd()); |
||||
IntTable t; |
||||
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{}); |
||||
|
||||
while (t.size() < state.range(0)) { |
||||
t.emplace(dist(rng)); |
||||
} |
||||
|
||||
for (auto _ : state) { |
||||
IntTable t2 = t; |
||||
benchmark::DoNotOptimize(t2); |
||||
} |
||||
} |
||||
BENCHMARK(BM_CopyCtor)->Range(128, 4096); |
||||
|
||||
void BM_CopyAssign(benchmark::State& state) { |
||||
std::random_device rd; |
||||
std::mt19937 rng(rd()); |
||||
IntTable t; |
||||
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{}); |
||||
while (t.size() < state.range(0)) { |
||||
t.emplace(dist(rng)); |
||||
} |
||||
|
||||
IntTable t2; |
||||
for (auto _ : state) { |
||||
t2 = t; |
||||
benchmark::DoNotOptimize(t2); |
||||
} |
||||
} |
||||
BENCHMARK(BM_CopyAssign)->Range(128, 4096); |
||||
|
||||
void BM_NoOpReserveIntTable(benchmark::State& state) { |
||||
IntTable t; |
||||
t.reserve(100000); |
||||
for (auto _ : state) { |
||||
benchmark::DoNotOptimize(t); |
||||
t.reserve(100000); |
||||
} |
||||
} |
||||
BENCHMARK(BM_NoOpReserveIntTable); |
||||
|
||||
void BM_NoOpReserveStringTable(benchmark::State& state) { |
||||
StringTable t; |
||||
t.reserve(100000); |
||||
for (auto _ : state) { |
||||
benchmark::DoNotOptimize(t); |
||||
t.reserve(100000); |
||||
} |
||||
} |
||||
BENCHMARK(BM_NoOpReserveStringTable); |
||||
|
||||
void BM_ReserveIntTable(benchmark::State& state) { |
||||
int reserve_size = state.range(0); |
||||
for (auto _ : state) { |
||||
state.PauseTiming(); |
||||
IntTable t; |
||||
state.ResumeTiming(); |
||||
benchmark::DoNotOptimize(t); |
||||
t.reserve(reserve_size); |
||||
} |
||||
} |
||||
BENCHMARK(BM_ReserveIntTable)->Range(128, 4096); |
||||
|
||||
void BM_ReserveStringTable(benchmark::State& state) { |
||||
int reserve_size = state.range(0); |
||||
for (auto _ : state) { |
||||
state.PauseTiming(); |
||||
StringTable t; |
||||
state.ResumeTiming(); |
||||
benchmark::DoNotOptimize(t); |
||||
t.reserve(reserve_size); |
||||
} |
||||
} |
||||
BENCHMARK(BM_ReserveStringTable)->Range(128, 4096); |
||||
|
||||
void BM_Group_Match(benchmark::State& state) { |
||||
std::array<ctrl_t, Group::kWidth> group; |
||||
std::iota(group.begin(), group.end(), -4); |
||||
Group g{group.data()}; |
||||
h2_t h = 1; |
||||
for (auto _ : state) { |
||||
::benchmark::DoNotOptimize(h); |
||||
::benchmark::DoNotOptimize(g.Match(h)); |
||||
} |
||||
} |
||||
BENCHMARK(BM_Group_Match); |
||||
|
||||
void BM_Group_MatchEmpty(benchmark::State& state) { |
||||
std::array<ctrl_t, Group::kWidth> group; |
||||
std::iota(group.begin(), group.end(), -4); |
||||
Group g{group.data()}; |
||||
for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmpty()); |
||||
} |
||||
BENCHMARK(BM_Group_MatchEmpty); |
||||
|
||||
void BM_Group_MatchEmptyOrDeleted(benchmark::State& state) { |
||||
std::array<ctrl_t, Group::kWidth> group; |
||||
std::iota(group.begin(), group.end(), -4); |
||||
Group g{group.data()}; |
||||
for (auto _ : state) ::benchmark::DoNotOptimize(g.MatchEmptyOrDeleted()); |
||||
} |
||||
BENCHMARK(BM_Group_MatchEmptyOrDeleted); |
||||
|
||||
void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) { |
||||
std::array<ctrl_t, Group::kWidth> group; |
||||
std::iota(group.begin(), group.end(), -2); |
||||
Group g{group.data()}; |
||||
for (auto _ : state) |
||||
::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted()); |
||||
} |
||||
BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted); |
||||
|
||||
void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) { |
||||
std::array<ctrl_t, Group::kWidth> group; |
||||
std::iota(group.begin(), group.end(), -2); |
||||
Group g{group.data()}; |
||||
for (auto _ : state) ::benchmark::DoNotOptimize(*g.MatchEmptyOrDeleted()); |
||||
} |
||||
BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted); |
||||
|
||||
void BM_DropDeletes(benchmark::State& state) { |
||||
constexpr size_t capacity = (1 << 20) - 1; |
||||
std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth); |
||||
ctrl[capacity] = kSentinel; |
||||
std::vector<ctrl_t> pattern = {kEmpty, 2, kDeleted, 2, kEmpty, 1, kDeleted}; |
||||
for (size_t i = 0; i != capacity; ++i) { |
||||
ctrl[i] = pattern[i % pattern.size()]; |
||||
} |
||||
while (state.KeepRunning()) { |
||||
state.PauseTiming(); |
||||
std::vector<ctrl_t> ctrl_copy = ctrl; |
||||
state.ResumeTiming(); |
||||
ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity); |
||||
::benchmark::DoNotOptimize(ctrl_copy[capacity]); |
||||
} |
||||
} |
||||
BENCHMARK(BM_DropDeletes); |
||||
|
||||
} // namespace
|
||||
} // namespace container_internal
|
||||
ABSL_NAMESPACE_END |
||||
} // namespace absl
|
||||
|
||||
// These methods are here to make it easy to examine the assembly for targeted
|
||||
// parts of the API.
|
||||
auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table, |
||||
int64_t key) -> decltype(table->find(key)) { |
||||
return table->find(key); |
||||
} |
||||
|
||||
bool CodegenAbslRawHashSetInt64FindNeEnd( |
||||
absl::container_internal::IntTable* table, int64_t key) { |
||||
return table->find(key) != table->end(); |
||||
} |
||||
|
||||
bool CodegenAbslRawHashSetInt64Contains( |
||||
absl::container_internal::IntTable* table, int64_t key) { |
||||
return table->contains(key); |
||||
} |
||||
|
||||
void CodegenAbslRawHashSetInt64Iterate( |
||||
absl::container_internal::IntTable* table) { |
||||
for (auto x : *table) benchmark::DoNotOptimize(x); |
||||
} |
||||
|
||||
int odr = |
||||
(::benchmark::DoNotOptimize(std::make_tuple( |
||||
&CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd, |
||||
&CodegenAbslRawHashSetInt64Contains, |
||||
&CodegenAbslRawHashSetInt64Iterate)), |
||||
1); |
@ -0,0 +1,590 @@ |
||||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Generates probe length statistics for many combinations of key types and key
|
||||
// distributions, all using the default hash function for swisstable.
|
||||
|
||||
#include <memory> |
||||
#include <regex> // NOLINT |
||||
#include <vector> |
||||
|
||||
#include "absl/container/flat_hash_map.h" |
||||
#include "absl/container/internal/hash_function_defaults.h" |
||||
#include "absl/container/internal/hashtable_debug.h" |
||||
#include "absl/container/internal/raw_hash_set.h" |
||||
#include "absl/random/distributions.h" |
||||
#include "absl/random/random.h" |
||||
#include "absl/strings/str_cat.h" |
||||
#include "absl/strings/str_format.h" |
||||
#include "absl/strings/string_view.h" |
||||
#include "absl/strings/strip.h" |
||||
|
||||
namespace { |
||||
|
||||
enum class OutputStyle { kRegular, kBenchmark }; |
||||
|
||||
// The --benchmark command line flag.
|
||||
// This is populated from main().
|
||||
// When run in "benchmark" mode, we have different output. This allows
|
||||
// A/B comparisons with tools like `benchy`.
|
||||
absl::string_view benchmarks; |
||||
|
||||
OutputStyle output() { |
||||
return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular; |
||||
} |
||||
|
||||
template <class T> |
||||
struct Policy { |
||||
using slot_type = T; |
||||
using key_type = T; |
||||
using init_type = T; |
||||
|
||||
template <class allocator_type, class Arg> |
||||
static void construct(allocator_type* alloc, slot_type* slot, |
||||
const Arg& arg) { |
||||
std::allocator_traits<allocator_type>::construct(*alloc, slot, arg); |
||||
} |
||||
|
||||
template <class allocator_type> |
||||
static void destroy(allocator_type* alloc, slot_type* slot) { |
||||
std::allocator_traits<allocator_type>::destroy(*alloc, slot); |
||||
} |
||||
|
||||
static slot_type& element(slot_type* slot) { return *slot; } |
||||
|
||||
template <class F, class... Args> |
||||
static auto apply(F&& f, const slot_type& arg) |
||||
-> decltype(std::forward<F>(f)(arg, arg)) { |
||||
return std::forward<F>(f)(arg, arg); |
||||
} |
||||
}; |
||||
|
||||
absl::BitGen& GlobalBitGen() { |
||||
static auto* value = new absl::BitGen; |
||||
return *value; |
||||
} |
||||
|
||||
// Keeps a pool of allocations and randomly gives one out.
|
||||
// This introduces more randomization to the addresses given to swisstable and
|
||||
// should help smooth out this factor from probe length calculation.
|
||||
template <class T> |
||||
class RandomizedAllocator { |
||||
public: |
||||
using value_type = T; |
||||
|
||||
RandomizedAllocator() = default; |
||||
template <typename U> |
||||
RandomizedAllocator(RandomizedAllocator<U>) {} // NOLINT
|
||||
|
||||
static T* allocate(size_t n) { |
||||
auto& pointers = GetPointers(n); |
||||
// Fill the pool
|
||||
while (pointers.size() < kRandomPool) { |
||||
pointers.push_back(std::allocator<T>{}.allocate(n)); |
||||
} |
||||
|
||||
// Choose a random one.
|
||||
size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size()); |
||||
T* result = pointers[i]; |
||||
pointers[i] = pointers.back(); |
||||
pointers.pop_back(); |
||||
return result; |
||||
} |
||||
|
||||
static void deallocate(T* p, size_t n) { |
||||
// Just put it back on the pool. No need to release the memory.
|
||||
GetPointers(n).push_back(p); |
||||
} |
||||
|
||||
private: |
||||
// We keep at least kRandomPool allocations for each size.
|
||||
static constexpr size_t kRandomPool = 20; |
||||
|
||||
static std::vector<T*>& GetPointers(size_t n) { |
||||
static auto* m = new absl::flat_hash_map<size_t, std::vector<T*>>(); |
||||
return (*m)[n]; |
||||
} |
||||
}; |
||||
|
||||
template <class T> |
||||
struct DefaultHash { |
||||
using type = absl::container_internal::hash_default_hash<T>; |
||||
}; |
||||
|
||||
template <class T> |
||||
using DefaultHashT = typename DefaultHash<T>::type; |
||||
|
||||
template <class T> |
||||
struct Table : absl::container_internal::raw_hash_set< |
||||
Policy<T>, DefaultHashT<T>, |
||||
absl::container_internal::hash_default_eq<T>, |
||||
RandomizedAllocator<T>> {}; |
||||
|
||||
struct LoadSizes { |
||||
size_t min_load; |
||||
size_t max_load; |
||||
}; |
||||
|
||||
LoadSizes GetMinMaxLoadSizes() { |
||||
static const auto sizes = [] { |
||||
Table<int> t; |
||||
|
||||
// First, fill enough to have a good distribution.
|
||||
constexpr size_t kMinSize = 10000; |
||||
while (t.size() < kMinSize) t.insert(t.size()); |
||||
|
||||
const auto reach_min_load_factor = [&] { |
||||
const double lf = t.load_factor(); |
||||
while (lf <= t.load_factor()) t.insert(t.size()); |
||||
}; |
||||
|
||||
// Then, insert until we reach min load factor.
|
||||
reach_min_load_factor(); |
||||
const size_t min_load_size = t.size(); |
||||
|
||||
// Keep going until we hit min load factor again, then go back one.
|
||||
t.insert(t.size()); |
||||
reach_min_load_factor(); |
||||
|
||||
return LoadSizes{min_load_size, t.size() - 1}; |
||||
}(); |
||||
return sizes; |
||||
} |
||||
|
||||
struct Ratios { |
||||
double min_load; |
||||
double avg_load; |
||||
double max_load; |
||||
}; |
||||
|
||||
// See absl/container/internal/hashtable_debug.h for details on
|
||||
// probe length calculation.
|
||||
template <class ElemFn> |
||||
Ratios CollectMeanProbeLengths() { |
||||
const auto min_max_sizes = GetMinMaxLoadSizes(); |
||||
|
||||
ElemFn elem; |
||||
using Key = decltype(elem()); |
||||
Table<Key> t; |
||||
|
||||
Ratios result; |
||||
while (t.size() < min_max_sizes.min_load) t.insert(elem()); |
||||
result.min_load = |
||||
absl::container_internal::GetHashtableDebugProbeSummary(t).mean; |
||||
|
||||
while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2) |
||||
t.insert(elem()); |
||||
result.avg_load = |
||||
absl::container_internal::GetHashtableDebugProbeSummary(t).mean; |
||||
|
||||
while (t.size() < min_max_sizes.max_load) t.insert(elem()); |
||||
result.max_load = |
||||
absl::container_internal::GetHashtableDebugProbeSummary(t).mean; |
||||
|
||||
return result; |
||||
} |
||||
|
||||
template <int Align> |
||||
uintptr_t PointerForAlignment() { |
||||
alignas(Align) static constexpr uintptr_t kInitPointer = 0; |
||||
return reinterpret_cast<uintptr_t>(&kInitPointer); |
||||
} |
||||
|
||||
// This incomplete type is used for testing hash of pointers of different
|
||||
// alignments.
|
||||
// NOTE: We are generating invalid pointer values on the fly with
|
||||
// reinterpret_cast. There are not "safely derived" pointers so using them is
|
||||
// technically UB. It is unlikely to be a problem, though.
|
||||
template <int Align> |
||||
struct Ptr; |
||||
|
||||
template <int Align> |
||||
Ptr<Align>* MakePtr(uintptr_t v) { |
||||
if (sizeof(v) == 8) { |
||||
constexpr int kCopyBits = 16; |
||||
// Ensure high bits are all the same.
|
||||
v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >> |
||||
kCopyBits); |
||||
} |
||||
return reinterpret_cast<Ptr<Align>*>(v); |
||||
} |
||||
|
||||
struct IntIdentity { |
||||
uint64_t i; |
||||
friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; } |
||||
IntIdentity operator++(int) { return IntIdentity{i++}; } |
||||
}; |
||||
|
||||
template <int Align> |
||||
struct PtrIdentity { |
||||
explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {} |
||||
uintptr_t i; |
||||
friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; } |
||||
PtrIdentity operator++(int) { |
||||
PtrIdentity p(i); |
||||
i += Align; |
||||
return p; |
||||
} |
||||
}; |
||||
|
||||
constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt"; |
||||
|
||||
template <bool small> |
||||
struct String { |
||||
std::string value; |
||||
static std::string Make(uint32_t v) { |
||||
return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)}; |
||||
} |
||||
}; |
||||
|
||||
template <> |
||||
struct DefaultHash<IntIdentity> { |
||||
struct type { |
||||
size_t operator()(IntIdentity t) const { return t.i; } |
||||
}; |
||||
}; |
||||
|
||||
template <int Align> |
||||
struct DefaultHash<PtrIdentity<Align>> { |
||||
struct type { |
||||
size_t operator()(PtrIdentity<Align> t) const { return t.i; } |
||||
}; |
||||
}; |
||||
|
||||
template <class T> |
||||
struct Sequential { |
||||
T operator()() const { return current++; } |
||||
mutable T current{}; |
||||
}; |
||||
|
||||
template <int Align> |
||||
struct Sequential<Ptr<Align>*> { |
||||
Ptr<Align>* operator()() const { |
||||
auto* result = MakePtr<Align>(current); |
||||
current += Align; |
||||
return result; |
||||
} |
||||
mutable uintptr_t current = PointerForAlignment<Align>(); |
||||
}; |
||||
|
||||
|
||||
template <bool small> |
||||
struct Sequential<String<small>> { |
||||
std::string operator()() const { return String<small>::Make(current++); } |
||||
mutable uint32_t current = 0; |
||||
}; |
||||
|
||||
template <class T, class U> |
||||
struct Sequential<std::pair<T, U>> { |
||||
mutable Sequential<T> tseq; |
||||
mutable Sequential<U> useq; |
||||
|
||||
using RealT = decltype(tseq()); |
||||
using RealU = decltype(useq()); |
||||
|
||||
mutable std::vector<RealT> ts; |
||||
mutable std::vector<RealU> us; |
||||
mutable size_t ti = 0, ui = 0; |
||||
|
||||
std::pair<RealT, RealU> operator()() const { |
||||
std::pair<RealT, RealU> value{get_t(), get_u()}; |
||||
if (ti == 0) { |
||||
ti = ui + 1; |
||||
ui = 0; |
||||
} else { |
||||
--ti; |
||||
++ui; |
||||
} |
||||
return value; |
||||
} |
||||
|
||||
RealT get_t() const { |
||||
while (ti >= ts.size()) ts.push_back(tseq()); |
||||
return ts[ti]; |
||||
} |
||||
|
||||
RealU get_u() const { |
||||
while (ui >= us.size()) us.push_back(useq()); |
||||
return us[ui]; |
||||
} |
||||
}; |
||||
|
||||
template <class T, int percent_skip> |
||||
struct AlmostSequential { |
||||
mutable Sequential<T> current; |
||||
|
||||
auto operator()() const -> decltype(current()) { |
||||
while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.) |
||||
current(); |
||||
return current(); |
||||
} |
||||
}; |
||||
|
||||
struct Uniform { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0}); |
||||
} |
||||
}; |
||||
|
||||
struct Gaussian { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
double d; |
||||
do { |
||||
d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4); |
||||
} while (d <= 0 || d > std::numeric_limits<T>::max() / 2); |
||||
return static_cast<T>(d); |
||||
} |
||||
}; |
||||
|
||||
struct Zipf { |
||||
template <typename T> |
||||
T operator()(T) const { |
||||
return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6); |
||||
} |
||||
}; |
||||
|
||||
template <class T, class Dist> |
||||
struct Random { |
||||
T operator()() const { return Dist{}(T{}); } |
||||
}; |
||||
|
||||
template <class Dist, int Align> |
||||
struct Random<Ptr<Align>*, Dist> { |
||||
Ptr<Align>* operator()() const { |
||||
return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align); |
||||
} |
||||
}; |
||||
|
||||
template <class Dist> |
||||
struct Random<IntIdentity, Dist> { |
||||
IntIdentity operator()() const { |
||||
return IntIdentity{Random<uint64_t, Dist>{}()}; |
||||
} |
||||
}; |
||||
|
||||
template <class Dist, int Align> |
||||
struct Random<PtrIdentity<Align>, Dist> { |
||||
PtrIdentity<Align> operator()() const { |
||||
return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align}; |
||||
} |
||||
}; |
||||
|
||||
template <class Dist, bool small> |
||||
struct Random<String<small>, Dist> { |
||||
std::string operator()() const { |
||||
return String<small>::Make(Random<uint32_t, Dist>{}()); |
||||
} |
||||
}; |
||||
|
||||
template <class T, class U, class Dist> |
||||
struct Random<std::pair<T, U>, Dist> { |
||||
auto operator()() const |
||||
-> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) { |
||||
return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}()); |
||||
} |
||||
}; |
||||
|
||||
template <typename> |
||||
std::string Name(); |
||||
|
||||
std::string Name(uint32_t*) { return "u32"; } |
||||
std::string Name(uint64_t*) { return "u64"; } |
||||
std::string Name(IntIdentity*) { return "IntIdentity"; } |
||||
|
||||
template <int Align> |
||||
std::string Name(Ptr<Align>**) { |
||||
return absl::StrCat("Ptr", Align); |
||||
} |
||||
|
||||
template <int Align> |
||||
std::string Name(PtrIdentity<Align>*) { |
||||
return absl::StrCat("PtrIdentity", Align); |
||||
} |
||||
|
||||
template <bool small> |
||||
std::string Name(String<small>*) { |
||||
return small ? "StrS" : "StrL"; |
||||
} |
||||
|
||||
template <class T, class U> |
||||
std::string Name(std::pair<T, U>*) { |
||||
if (output() == OutputStyle::kBenchmark) |
||||
return absl::StrCat("P_", Name<T>(), "_", Name<U>()); |
||||
return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">"); |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Sequential<T>*) { |
||||
return "Sequential"; |
||||
} |
||||
|
||||
template <class T, int P> |
||||
std::string Name(AlmostSequential<T, P>*) { |
||||
return absl::StrCat("AlmostSeq_", P); |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Uniform>*) { |
||||
return "UnifRand"; |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Gaussian>*) { |
||||
return "GausRand"; |
||||
} |
||||
|
||||
template <class T> |
||||
std::string Name(Random<T, Zipf>*) { |
||||
return "ZipfRand"; |
||||
} |
||||
|
||||
template <typename T> |
||||
std::string Name() { |
||||
return Name(static_cast<T*>(nullptr)); |
||||
} |
||||
|
||||
constexpr int kNameWidth = 15; |
||||
constexpr int kDistWidth = 16; |
||||
|
||||
bool CanRunBenchmark(absl::string_view name) { |
||||
static std::regex* const filter = []() -> std::regex* { |
||||
return benchmarks.empty() || benchmarks == "all" |
||||
? nullptr |
||||
: new std::regex(std::string(benchmarks)); |
||||
}(); |
||||
return filter == nullptr || std::regex_search(std::string(name), *filter); |
||||
} |
||||
|
||||
struct Result { |
||||
std::string name; |
||||
std::string dist_name; |
||||
Ratios ratios; |
||||
}; |
||||
|
||||
template <typename T, typename Dist> |
||||
void RunForTypeAndDistribution(std::vector<Result>& results) { |
||||
std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>()); |
||||
// We have to check against all three names (min/avg/max) before we run it.
|
||||
// If any of them is enabled, we run it.
|
||||
if (!CanRunBenchmark(absl::StrCat(name, "/min")) && |
||||
!CanRunBenchmark(absl::StrCat(name, "/avg")) && |
||||
!CanRunBenchmark(absl::StrCat(name, "/max"))) { |
||||
return; |
||||
} |
||||
results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()}); |
||||
} |
||||
|
||||
template <class T> |
||||
void RunForType(std::vector<Result>& results) { |
||||
RunForTypeAndDistribution<T, Sequential<T>>(results); |
||||
RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results); |
||||
RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results); |
||||
RunForTypeAndDistribution<T, Random<T, Uniform>>(results); |
||||
#ifdef NDEBUG |
||||
// Disable these in non-opt mode because they take too long.
|
||||
RunForTypeAndDistribution<T, Random<T, Gaussian>>(results); |
||||
RunForTypeAndDistribution<T, Random<T, Zipf>>(results); |
||||
#endif // NDEBUG
|
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char** argv) { |
||||
// Parse the benchmark flags. Ignore all of them except the regex pattern.
|
||||
for (int i = 1; i < argc; ++i) { |
||||
absl::string_view arg = argv[i]; |
||||
const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; }; |
||||
|
||||
if (absl::ConsumePrefix(&arg, "--benchmark_filter")) { |
||||
if (arg == "") { |
||||
// --benchmark_filter X
|
||||
benchmarks = next(); |
||||
} else if (absl::ConsumePrefix(&arg, "=")) { |
||||
// --benchmark_filter=X
|
||||
benchmarks = arg; |
||||
} |
||||
} |
||||
|
||||
// Any --benchmark flag turns on the mode.
|
||||
if (absl::ConsumePrefix(&arg, "--benchmark")) { |
||||
if (benchmarks.empty()) benchmarks="all"; |
||||
} |
||||
} |
||||
|
||||
std::vector<Result> results; |
||||
RunForType<uint64_t>(results); |
||||
RunForType<IntIdentity>(results); |
||||
RunForType<Ptr<8>*>(results); |
||||
RunForType<Ptr<16>*>(results); |
||||
RunForType<Ptr<32>*>(results); |
||||
RunForType<Ptr<64>*>(results); |
||||
RunForType<PtrIdentity<8>>(results); |
||||
RunForType<PtrIdentity<16>>(results); |
||||
RunForType<PtrIdentity<32>>(results); |
||||
RunForType<PtrIdentity<64>>(results); |
||||
RunForType<std::pair<uint32_t, uint32_t>>(results); |
||||
RunForType<String<true>>(results); |
||||
RunForType<String<false>>(results); |
||||
RunForType<std::pair<uint64_t, String<true>>>(results); |
||||
RunForType<std::pair<String<true>, uint64_t>>(results); |
||||
RunForType<std::pair<uint64_t, String<false>>>(results); |
||||
RunForType<std::pair<String<false>, uint64_t>>(results); |
||||
|
||||
switch (output()) { |
||||
case OutputStyle::kRegular: |
||||
absl::PrintF("%-*s%-*s Min Avg Max\n%s\n", kNameWidth, |
||||
"Type", kDistWidth, "Distribution", |
||||
std::string(kNameWidth + kDistWidth + 10 * 3, '-')); |
||||
for (const auto& result : results) { |
||||
absl::PrintF("%-*s%-*s %8.4f %8.4f %8.4f\n", kNameWidth, result.name, |
||||
kDistWidth, result.dist_name, result.ratios.min_load, |
||||
result.ratios.avg_load, result.ratios.max_load); |
||||
} |
||||
break; |
||||
case OutputStyle::kBenchmark: { |
||||
absl::PrintF("{\n"); |
||||
absl::PrintF(" \"benchmarks\": [\n"); |
||||
absl::string_view comma; |
||||
for (const auto& result : results) { |
||||
auto print = [&](absl::string_view stat, double Ratios::*val) { |
||||
std::string name = |
||||
absl::StrCat(result.name, "/", result.dist_name, "/", stat); |
||||
// Check the regex again. We might had have enabled only one of the
|
||||
// stats for the benchmark.
|
||||
if (!CanRunBenchmark(name)) return; |
||||
absl::PrintF(" %s{\n", comma); |
||||
absl::PrintF(" \"cpu_time\": %f,\n", 1e9 * result.ratios.*val); |
||||
absl::PrintF(" \"real_time\": %f,\n", 1e9 * result.ratios.*val); |
||||
absl::PrintF(" \"iterations\": 1,\n"); |
||||
absl::PrintF(" \"name\": \"%s\",\n", name); |
||||
absl::PrintF(" \"time_unit\": \"ns\"\n"); |
||||
absl::PrintF(" }\n"); |
||||
comma = ","; |
||||
}; |
||||
print("min", &Ratios::min_load); |
||||
print("avg", &Ratios::avg_load); |
||||
print("max", &Ratios::max_load); |
||||
} |
||||
absl::PrintF(" ],\n"); |
||||
absl::PrintF(" \"context\": {\n"); |
||||
absl::PrintF(" }\n"); |
||||
absl::PrintF("}\n"); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
return 0; |
||||
} |
@ -0,0 +1,249 @@ |
||||
// Copyright 2018 The Abseil Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <typeindex> |
||||
|
||||
#include "absl/base/attributes.h" |
||||
#include "absl/hash/hash.h" |
||||
#include "absl/random/random.h" |
||||
#include "absl/strings/cord.h" |
||||
#include "absl/strings/cord_test_helpers.h" |
||||
#include "benchmark/benchmark.h" |
||||
|
||||
namespace { |
||||
|
||||
using absl::Hash; |
||||
|
||||
template <template <typename> class H, typename T> |
||||
void RunBenchmark(benchmark::State& state, T value) { |
||||
H<T> h; |
||||
for (auto _ : state) { |
||||
benchmark::DoNotOptimize(value); |
||||
benchmark::DoNotOptimize(h(value)); |
||||
} |
||||
} |
||||
|
||||
} // namespace
|
||||
|
||||
template <typename T> |
||||
using AbslHash = absl::Hash<T>; |
||||
|
||||
class TypeErasedInterface { |
||||
public: |
||||
virtual ~TypeErasedInterface() = default; |
||||
|
||||
template <typename H> |
||||
friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) { |
||||
state = H::combine(std::move(state), std::type_index(typeid(wrapper))); |
||||
wrapper.HashValue(absl::HashState::Create(&state)); |
||||
return state; |
||||
} |
||||
|
||||
private: |
||||
virtual void HashValue(absl::HashState state) const = 0; |
||||
}; |
||||
|
||||
template <typename T> |
||||
struct TypeErasedAbslHash { |
||||
class Wrapper : public TypeErasedInterface { |
||||
public: |
||||
explicit Wrapper(const T& value) : value_(value) {} |
||||
|
||||
private: |
||||
void HashValue(absl::HashState state) const override { |
||||
absl::HashState::combine(std::move(state), value_); |
||||
} |
||||
|
||||
const T& value_; |
||||
}; |
||||
|
||||
size_t operator()(const T& value) { |
||||
return absl::Hash<Wrapper>{}(Wrapper(value)); |
||||
} |
||||
}; |
||||
|
||||
template <typename FuncType> |
||||
inline FuncType* ODRUseFunction(FuncType* ptr) { |
||||
volatile FuncType* dummy = ptr; |
||||
return dummy; |
||||
} |
||||
|
||||
absl::Cord FlatCord(size_t size) { |
||||
absl::Cord result(std::string(size, 'a')); |
||||
result.Flatten(); |
||||
return result; |
||||
} |
||||
|
||||
absl::Cord FragmentedCord(size_t size) { |
||||
const size_t orig_size = size; |
||||
std::vector<std::string> chunks; |
||||
size_t chunk_size = std::max<size_t>(1, size / 10); |
||||
while (size > chunk_size) { |
||||
chunks.push_back(std::string(chunk_size, 'a')); |
||||
size -= chunk_size; |
||||
} |
||||
if (size > 0) { |
||||
chunks.push_back(std::string(size, 'a')); |
||||
} |
||||
absl::Cord result = absl::MakeFragmentedCord(chunks); |
||||
(void) orig_size; |
||||
assert(result.size() == orig_size); |
||||
return result; |
||||
} |
||||
|
||||
// Generates a benchmark and a codegen method for the provided types. The
|
||||
// codegen method provides a well known entrypoint for dumping assembly.
|
||||
#define MAKE_BENCHMARK(hash, name, ...) \ |
||||
namespace { \
|
||||
void BM_##hash##_##name(benchmark::State& state) { \
|
||||
RunBenchmark<hash>(state, __VA_ARGS__); \
|
||||
} \
|
||||
BENCHMARK(BM_##hash##_##name); \
|
||||
} \
|
||||
size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \
|
||||
size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
|
||||
return hash<decltype(__VA_ARGS__)>{}(arg); \
|
||||
} \
|
||||
bool absl_hash_test_odr_use##hash##name = \
|
||||
ODRUseFunction(&Codegen##hash##name); |
||||
|
||||
MAKE_BENCHMARK(AbslHash, Int32, int32_t{}); |
||||
MAKE_BENCHMARK(AbslHash, Int64, int64_t{}); |
||||
MAKE_BENCHMARK(AbslHash, Double, 1.2); |
||||
MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0); |
||||
MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{}); |
||||
MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{}); |
||||
MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64, |
||||
std::tuple<int32_t, bool, int64_t>{}); |
||||
MAKE_BENCHMARK(AbslHash, String_0, std::string()); |
||||
MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a')); |
||||
MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a')); |
||||
MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a')); |
||||
MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a')); |
||||
MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a')); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord()); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); |
||||
MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); |
||||
MAKE_BENCHMARK(AbslHash, VectorInt64_10, std::vector<int64_t>(10)); |
||||
MAKE_BENCHMARK(AbslHash, VectorInt64_100, std::vector<int64_t>(100)); |
||||
MAKE_BENCHMARK(AbslHash, VectorDouble_10, std::vector<double>(10, 1.1)); |
||||
MAKE_BENCHMARK(AbslHash, VectorDouble_100, std::vector<double>(100, 1.1)); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_0, |
||||
std::make_pair(std::string(), std::string())); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_10, |
||||
std::make_pair(std::string(10, 'a'), std::string(10, 'b'))); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_30, |
||||
std::make_pair(std::string(30, 'a'), std::string(30, 'b'))); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_90, |
||||
std::make_pair(std::string(90, 'a'), std::string(90, 'b'))); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_200, |
||||
std::make_pair(std::string(200, 'a'), std::string(200, 'b'))); |
||||
MAKE_BENCHMARK(AbslHash, PairStringString_5000, |
||||
std::make_pair(std::string(5000, 'a'), std::string(5000, 'b'))); |
||||
|
||||
MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{}); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{}); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32, |
||||
std::pair<int32_t, int32_t>{}); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64, |
||||
std::pair<int64_t, int64_t>{}); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64, |
||||
std::tuple<int32_t, bool, int64_t>{}); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string()); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a')); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a')); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a')); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a')); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a')); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, |
||||
std::vector<double>(10, 1.1)); |
||||
MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, |
||||
std::vector<double>(100, 1.1)); |
||||
|
||||
// The latency benchmark attempts to model the speed of the hash function in
|
||||
// production. When a hash function is used for hashtable lookups it is rarely
|
||||
// used to hash N items in a tight loop nor on constant sized strings. Instead,
|
||||
// after hashing there is a potential equality test plus a (usually) large
|
||||
// amount of user code. To simulate this effectively we introduce a data
|
||||
// dependency between elements we hash by using the hash of the Nth element as
|
||||
// the selector of the N+1th element to hash. This isolates the hash function
|
||||
// code much like in production. As a bonus we use the hash to generate strings
|
||||
// of size [1,N] (instead of fixed N) to disable perfect branch predictions in
|
||||
// hash function implementations.
|
||||
namespace { |
||||
// 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
|
||||
// will allow us to attribute most time to CPU which means more accurate
|
||||
// measurements.
|
||||
static constexpr size_t kEntropySize = 16 << 10; |
||||
static char entropy[kEntropySize + 1024]; |
||||
ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] { |
||||
absl::BitGen gen; |
||||
static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, ""); |
||||
for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) { |
||||
auto rand = absl::Uniform<uint64_t>(gen); |
||||
memcpy(&entropy[i], &rand, sizeof(uint64_t)); |
||||
} |
||||
return true; |
||||
}(); |
||||
} // namespace
|
||||
|
||||
template <class T> |
||||
struct PodRand { |
||||
static_assert(std::is_pod<T>::value, ""); |
||||
static_assert(kEntropySize + sizeof(T) < sizeof(entropy), ""); |
||||
|
||||
T Get(size_t i) const { |
||||
T v; |
||||
memcpy(&v, &entropy[i % kEntropySize], sizeof(T)); |
||||
return v; |
||||
} |
||||
}; |
||||
|
||||
template <size_t N> |
||||
struct StringRand { |
||||
static_assert(kEntropySize + N < sizeof(entropy), ""); |
||||
|
||||
absl::string_view Get(size_t i) const { |
||||
// This has a small bias towards small numbers. Because max N is ~200 this
|
||||
// is very small and prefer to be very fast instead of absolutely accurate.
|
||||
// Also we pass N = 2^K+1 so that mod reduces to a bitand.
|
||||
size_t s = (i % (N - 1)) + 1; |
||||
return {&entropy[i % kEntropySize], s}; |
||||
} |
||||
}; |
||||
|
||||
#define MAKE_LATENCY_BENCHMARK(hash, name, ...) \ |
||||
namespace { \
|
||||
void BM_latency_##hash##_##name(benchmark::State& state) { \
|
||||
__VA_ARGS__ r; \
|
||||
hash<decltype(r.Get(0))> h; \
|
||||
size_t i = 871401241; \
|
||||
for (auto _ : state) { \
|
||||
benchmark::DoNotOptimize(i = h(r.Get(i))); \
|
||||
} \
|
||||
} \
|
||||
BENCHMARK(BM_latency_##hash##_##name); \
|
||||
} // namespace
|
||||
|
||||
MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>); |
||||
MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>); |
||||
MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>); |
||||
MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>); |
||||
MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>); |
||||
MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>); |
Loading…
Reference in new issue