|
|
|
// Copyright 2018 The Abseil Authors.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
#include <array>
|
|
|
|
#include <cmath>
|
|
|
|
#include <numeric>
|
|
|
|
#include <random>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "absl/base/internal/raw_logging.h"
|
|
|
|
#include "absl/container/internal/hash_function_defaults.h"
|
|
|
|
#include "absl/container/internal/raw_hash_set.h"
|
|
|
|
#include "absl/strings/str_format.h"
|
|
|
|
#include "benchmark/benchmark.h"
|
|
|
|
|
|
|
|
namespace absl {
|
|
|
|
ABSL_NAMESPACE_BEGIN
|
|
|
|
namespace container_internal {
|
|
|
|
|
|
|
|
struct RawHashSetTestOnlyAccess {
|
|
|
|
template <typename C>
|
|
|
|
static auto GetSlots(const C& c) -> decltype(c.slots_) {
|
|
|
|
return c.slots_;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct IntPolicy {
|
|
|
|
using slot_type = int64_t;
|
|
|
|
using key_type = int64_t;
|
|
|
|
using init_type = int64_t;
|
|
|
|
|
|
|
|
static void construct(void*, int64_t* slot, int64_t v) { *slot = v; }
|
|
|
|
static void destroy(void*, int64_t*) {}
|
|
|
|
static void transfer(void*, int64_t* new_slot, int64_t* old_slot) {
|
|
|
|
*new_slot = *old_slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64_t& element(slot_type* slot) { return *slot; }
|
|
|
|
|
|
|
|
template <class F>
|
|
|
|
static auto apply(F&& f, int64_t x) -> decltype(std::forward<F>(f)(x, x)) {
|
|
|
|
return std::forward<F>(f)(x, x);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class StringPolicy {
|
|
|
|
template <class F, class K, class V,
|
|
|
|
class = typename std::enable_if<
|
|
|
|
std::is_convertible<const K&, absl::string_view>::value>::type>
|
|
|
|
decltype(std::declval<F>()(
|
|
|
|
std::declval<const absl::string_view&>(), std::piecewise_construct,
|
|
|
|
std::declval<std::tuple<K>>(),
|
|
|
|
std::declval<V>())) static apply_impl(F&& f,
|
|
|
|
std::pair<std::tuple<K>, V> p) {
|
|
|
|
const absl::string_view& key = std::get<0>(p.first);
|
|
|
|
return std::forward<F>(f)(key, std::piecewise_construct, std::move(p.first),
|
|
|
|
std::move(p.second));
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
|
|
|
struct slot_type {
|
|
|
|
struct ctor {};
|
|
|
|
|
|
|
|
template <class... Ts>
|
|
|
|
slot_type(ctor, Ts&&... ts) : pair(std::forward<Ts>(ts)...) {}
|
|
|
|
|
|
|
|
std::pair<std::string, std::string> pair;
|
|
|
|
};
|
|
|
|
|
|
|
|
using key_type = std::string;
|
|
|
|
using init_type = std::pair<std::string, std::string>;
|
|
|
|
|
|
|
|
template <class allocator_type, class... Args>
|
|
|
|
static void construct(allocator_type* alloc, slot_type* slot, Args... args) {
|
|
|
|
std::allocator_traits<allocator_type>::construct(
|
|
|
|
*alloc, slot, typename slot_type::ctor(), std::forward<Args>(args)...);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class allocator_type>
|
|
|
|
static void destroy(allocator_type* alloc, slot_type* slot) {
|
|
|
|
std::allocator_traits<allocator_type>::destroy(*alloc, slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class allocator_type>
|
|
|
|
static void transfer(allocator_type* alloc, slot_type* new_slot,
|
|
|
|
slot_type* old_slot) {
|
|
|
|
construct(alloc, new_slot, std::move(old_slot->pair));
|
|
|
|
destroy(alloc, old_slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::pair<std::string, std::string>& element(slot_type* slot) {
|
|
|
|
return slot->pair;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class F, class... Args>
|
|
|
|
static auto apply(F&& f, Args&&... args)
|
|
|
|
-> decltype(apply_impl(std::forward<F>(f),
|
|
|
|
PairArgs(std::forward<Args>(args)...))) {
|
|
|
|
return apply_impl(std::forward<F>(f),
|
|
|
|
PairArgs(std::forward<Args>(args)...));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct StringHash : container_internal::hash_default_hash<absl::string_view> {
|
|
|
|
using is_transparent = void;
|
|
|
|
};
|
|
|
|
struct StringEq : std::equal_to<absl::string_view> {
|
|
|
|
using is_transparent = void;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct StringTable
|
|
|
|
: raw_hash_set<StringPolicy, StringHash, StringEq, std::allocator<int>> {
|
|
|
|
using Base = typename StringTable::raw_hash_set;
|
|
|
|
StringTable() {}
|
|
|
|
using Base::Base;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct IntTable
|
|
|
|
: raw_hash_set<IntPolicy, container_internal::hash_default_hash<int64_t>,
|
|
|
|
std::equal_to<int64_t>, std::allocator<int64_t>> {
|
|
|
|
using Base = typename IntTable::raw_hash_set;
|
|
|
|
IntTable() {}
|
|
|
|
using Base::Base;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct string_generator {
|
|
|
|
template <class RNG>
|
|
|
|
std::string operator()(RNG& rng) const {
|
|
|
|
std::string res;
|
|
|
|
res.resize(12);
|
|
|
|
std::uniform_int_distribution<uint32_t> printable_ascii(0x20, 0x7E);
|
|
|
|
std::generate(res.begin(), res.end(), [&] { return printable_ascii(rng); });
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t size;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Model a cache in steady state.
|
|
|
|
//
|
|
|
|
// On a table of size N, keep deleting the LRU entry and add a random one.
|
|
|
|
void BM_CacheInSteadyState(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
string_generator gen{12};
|
|
|
|
StringTable t;
|
|
|
|
std::deque<std::string> keys;
|
|
|
|
while (t.size() < state.range(0)) {
|
|
|
|
auto x = t.emplace(gen(rng), gen(rng));
|
|
|
|
if (x.second) keys.push_back(x.first->first);
|
|
|
|
}
|
|
|
|
ABSL_RAW_CHECK(state.range(0) >= 10, "");
|
|
|
|
while (state.KeepRunning()) {
|
|
|
|
// Some cache hits.
|
|
|
|
std::deque<std::string>::const_iterator it;
|
|
|
|
for (int i = 0; i != 90; ++i) {
|
|
|
|
if (i % 10 == 0) it = keys.end();
|
|
|
|
::benchmark::DoNotOptimize(t.find(*--it));
|
|
|
|
}
|
|
|
|
// Some cache misses.
|
|
|
|
for (int i = 0; i != 10; ++i) ::benchmark::DoNotOptimize(t.find(gen(rng)));
|
|
|
|
ABSL_RAW_CHECK(t.erase(keys.front()), keys.front().c_str());
|
|
|
|
keys.pop_front();
|
|
|
|
while (true) {
|
|
|
|
auto x = t.emplace(gen(rng), gen(rng));
|
|
|
|
if (x.second) {
|
|
|
|
keys.push_back(x.first->first);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
state.SetItemsProcessed(state.iterations());
|
|
|
|
state.SetLabel(absl::StrFormat("load_factor=%.2f", t.load_factor()));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Benchmark>
|
|
|
|
void CacheInSteadyStateArgs(Benchmark* bm) {
|
|
|
|
// The default.
|
|
|
|
const float max_load_factor = 0.875;
|
|
|
|
// When the cache is at the steady state, the probe sequence will equal
|
|
|
|
// capacity if there is no reclamation of deleted slots. Pick a number large
|
|
|
|
// enough to make the benchmark slow for that case.
|
|
|
|
const size_t capacity = 1 << 10;
|
|
|
|
|
|
|
|
// Check N data points to cover load factors in [0.4, 0.8).
|
|
|
|
const size_t kNumPoints = 10;
|
|
|
|
for (size_t i = 0; i != kNumPoints; ++i)
|
|
|
|
bm->Arg(std::ceil(
|
|
|
|
capacity * (max_load_factor + i * max_load_factor / kNumPoints) / 2));
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_CacheInSteadyState)->Apply(CacheInSteadyStateArgs);
|
|
|
|
|
|
|
|
void BM_EndComparison(benchmark::State& state) {
|
|
|
|
StringTable t = {{"a", "a"}, {"b", "b"}};
|
|
|
|
auto it = t.begin();
|
|
|
|
for (auto i : state) {
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
benchmark::DoNotOptimize(it);
|
|
|
|
benchmark::DoNotOptimize(it != t.end());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_EndComparison);
|
|
|
|
|
|
|
|
void BM_Iteration(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
string_generator gen{12};
|
|
|
|
StringTable t;
|
|
|
|
|
|
|
|
size_t capacity = state.range(0);
|
|
|
|
size_t size = state.range(1);
|
|
|
|
t.reserve(capacity);
|
|
|
|
|
|
|
|
while (t.size() < size) {
|
|
|
|
t.emplace(gen(rng), gen(rng));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto i : state) {
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
for (auto it = t.begin(); it != t.end(); ++it) {
|
|
|
|
benchmark::DoNotOptimize(*it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BENCHMARK(BM_Iteration)
|
|
|
|
->ArgPair(1, 1)
|
|
|
|
->ArgPair(2, 2)
|
|
|
|
->ArgPair(4, 4)
|
|
|
|
->ArgPair(7, 7)
|
|
|
|
->ArgPair(10, 10)
|
|
|
|
->ArgPair(15, 15)
|
|
|
|
->ArgPair(16, 16)
|
|
|
|
->ArgPair(54, 54)
|
|
|
|
->ArgPair(100, 100)
|
|
|
|
->ArgPair(400, 400)
|
|
|
|
// empty
|
|
|
|
->ArgPair(0, 0)
|
|
|
|
->ArgPair(10, 0)
|
|
|
|
->ArgPair(100, 0)
|
|
|
|
->ArgPair(1000, 0)
|
|
|
|
->ArgPair(10000, 0)
|
|
|
|
// sparse
|
|
|
|
->ArgPair(100, 1)
|
|
|
|
->ArgPair(1000, 10);
|
|
|
|
|
|
|
|
void BM_CopyCtorSparseInt(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
IntTable t;
|
|
|
|
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
|
|
|
|
|
|
|
|
size_t size = state.range(0);
|
|
|
|
t.reserve(size * 10);
|
|
|
|
while (t.size() < size) {
|
|
|
|
t.emplace(dist(rng));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto i : state) {
|
|
|
|
IntTable t2 = t;
|
|
|
|
benchmark::DoNotOptimize(t2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_CopyCtorSparseInt)->Range(128, 4096);
|
|
|
|
|
|
|
|
void BM_CopyCtorInt(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
IntTable t;
|
|
|
|
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
|
|
|
|
|
|
|
|
size_t size = state.range(0);
|
|
|
|
while (t.size() < size) {
|
|
|
|
t.emplace(dist(rng));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto i : state) {
|
|
|
|
IntTable t2 = t;
|
|
|
|
benchmark::DoNotOptimize(t2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_CopyCtorInt)->Range(128, 4096);
|
|
|
|
|
|
|
|
void BM_CopyCtorString(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
StringTable t;
|
|
|
|
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
|
|
|
|
|
|
|
|
size_t size = state.range(0);
|
|
|
|
while (t.size() < size) {
|
|
|
|
t.emplace(std::to_string(dist(rng)), std::to_string(dist(rng)));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto i : state) {
|
|
|
|
StringTable t2 = t;
|
|
|
|
benchmark::DoNotOptimize(t2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_CopyCtorString)->Range(128, 4096);
|
|
|
|
|
|
|
|
void BM_CopyAssign(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
IntTable t;
|
|
|
|
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
|
|
|
|
while (t.size() < state.range(0)) {
|
|
|
|
t.emplace(dist(rng));
|
|
|
|
}
|
|
|
|
|
|
|
|
IntTable t2;
|
|
|
|
for (auto _ : state) {
|
|
|
|
t2 = t;
|
|
|
|
benchmark::DoNotOptimize(t2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_CopyAssign)->Range(128, 4096);
|
|
|
|
|
|
|
|
void BM_RangeCtor(benchmark::State& state) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 rng(rd());
|
|
|
|
std::uniform_int_distribution<uint64_t> dist(0, ~uint64_t{});
|
|
|
|
std::vector<int> values;
|
|
|
|
const size_t desired_size = state.range(0);
|
|
|
|
while (values.size() < desired_size) {
|
|
|
|
values.emplace_back(dist(rng));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto unused : state) {
|
|
|
|
IntTable t{values.begin(), values.end()};
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_RangeCtor)->Range(128, 65536);
|
|
|
|
|
|
|
|
void BM_NoOpReserveIntTable(benchmark::State& state) {
|
|
|
|
IntTable t;
|
|
|
|
t.reserve(100000);
|
|
|
|
for (auto _ : state) {
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
t.reserve(100000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_NoOpReserveIntTable);
|
|
|
|
|
|
|
|
void BM_NoOpReserveStringTable(benchmark::State& state) {
|
|
|
|
StringTable t;
|
|
|
|
t.reserve(100000);
|
|
|
|
for (auto _ : state) {
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
t.reserve(100000);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_NoOpReserveStringTable);
|
|
|
|
|
|
|
|
void BM_ReserveIntTable(benchmark::State& state) {
|
|
|
|
int reserve_size = state.range(0);
|
|
|
|
for (auto _ : state) {
|
|
|
|
state.PauseTiming();
|
|
|
|
IntTable t;
|
|
|
|
state.ResumeTiming();
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
t.reserve(reserve_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_ReserveIntTable)->Range(128, 4096);
|
|
|
|
|
|
|
|
void BM_ReserveStringTable(benchmark::State& state) {
|
|
|
|
int reserve_size = state.range(0);
|
|
|
|
for (auto _ : state) {
|
|
|
|
state.PauseTiming();
|
|
|
|
StringTable t;
|
|
|
|
state.ResumeTiming();
|
|
|
|
benchmark::DoNotOptimize(t);
|
|
|
|
t.reserve(reserve_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_ReserveStringTable)->Range(128, 4096);
|
|
|
|
|
|
|
|
// Like std::iota, except that ctrl_t doesn't support operator++.
|
|
|
|
template <typename CtrlIter>
|
|
|
|
void Iota(CtrlIter begin, CtrlIter end, int value) {
|
|
|
|
for (; begin != end; ++begin, ++value) {
|
|
|
|
*begin = static_cast<ctrl_t>(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void BM_Group_Match(benchmark::State& state) {
|
|
|
|
std::array<ctrl_t, Group::kWidth> group;
|
|
|
|
Iota(group.begin(), group.end(), -4);
|
|
|
|
Group g{group.data()};
|
|
|
|
h2_t h = 1;
|
|
|
|
for (auto _ : state) {
|
|
|
|
::benchmark::DoNotOptimize(h);
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
::benchmark::DoNotOptimize(g);
|
|
|
|
::benchmark::DoNotOptimize(g.Match(h));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_Group_Match);
|
|
|
|
|
|
|
|
void BM_Group_MaskEmpty(benchmark::State& state) {
|
|
|
|
std::array<ctrl_t, Group::kWidth> group;
|
|
|
|
Iota(group.begin(), group.end(), -4);
|
|
|
|
Group g{group.data()};
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
for (auto _ : state) {
|
|
|
|
::benchmark::DoNotOptimize(g);
|
|
|
|
::benchmark::DoNotOptimize(g.MaskEmpty());
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_Group_MaskEmpty);
|
|
|
|
|
|
|
|
void BM_Group_MaskEmptyOrDeleted(benchmark::State& state) {
|
|
|
|
std::array<ctrl_t, Group::kWidth> group;
|
|
|
|
Iota(group.begin(), group.end(), -4);
|
|
|
|
Group g{group.data()};
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
for (auto _ : state) {
|
|
|
|
::benchmark::DoNotOptimize(g);
|
|
|
|
::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted());
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_Group_MaskEmptyOrDeleted);
|
|
|
|
|
|
|
|
void BM_Group_CountLeadingEmptyOrDeleted(benchmark::State& state) {
|
|
|
|
std::array<ctrl_t, Group::kWidth> group;
|
|
|
|
Iota(group.begin(), group.end(), -2);
|
|
|
|
Group g{group.data()};
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
for (auto _ : state) {
|
|
|
|
::benchmark::DoNotOptimize(g);
|
|
|
|
::benchmark::DoNotOptimize(g.CountLeadingEmptyOrDeleted());
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_Group_CountLeadingEmptyOrDeleted);
|
|
|
|
|
|
|
|
void BM_Group_MatchFirstEmptyOrDeleted(benchmark::State& state) {
|
|
|
|
std::array<ctrl_t, Group::kWidth> group;
|
|
|
|
Iota(group.begin(), group.end(), -2);
|
|
|
|
Group g{group.data()};
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
for (auto _ : state) {
|
|
|
|
::benchmark::DoNotOptimize(g);
|
|
|
|
::benchmark::DoNotOptimize(g.MaskEmptyOrDeleted().LowestBitSet());
|
Export of internal Abseil changes
--
07240ca7822d007cdcc79f2c40bd58b2c2010348 by Abseil Team <absl-team@google.com>:
Correct the comment from "AlphaNum" to "Arg".
PiperOrigin-RevId: 416139192
--
adcba4a6b3763626e1db7b1e8c108b3114903557 by Martijn Vels <mvels@google.com>:
Fix NewExternalRep() to require data being non-empty, and remove nullptr return.
PiperOrigin-RevId: 416135865
--
c0d14cd918fb16f15d1d84de9284b5c5ecc1f8f2 by Abseil Team <absl-team@google.com>:
Fix doc comment for absl::ascii_isprint().
The comment was incorrectly saying that it includes all whitespace.
It doesn't; the only whitespace char it includes is ' '.
PiperOrigin-RevId: 416112524
--
d83327800159c07002b6865e21232a12463e02dd by Abseil Team <absl-team@google.com>:
Internal change
PiperOrigin-RevId: 416099978
--
baf11e9ca42ca9140cdbf8075f971db8d65b1195 by Ilya Tokar <tokarip@google.com>:
Prevent compiler from optimizing Group_Match* benchmarks away.
Currently we benchmark single store of precomputed value.
Not all affected benchmarks show performance changes:
BM_Group_Match 0.53ns ± 1% 0.53ns ± 0% -0.42% (p=0.038 n=10+10)
BM_Group_MatchEmpty 0.26ns ± 1% 0.26ns ± 1% ~ (p=1.000 n=10+10)
BM_Group_MatchEmptyOrDeleted 0.26ns ± 1% 0.26ns ± 1% ~ (p=0.121 n=10+10)
BM_Group_CountLeadingEmptyOrDeleted 0.26ns ± 1% 0.45ns ± 0% +70.05% (p=0.000 n=10+8)
BM_Group_MatchFirstEmptyOrDeleted 0.26ns ± 0% 0.44ns ± 1% +65.91% (p=0.000 n=8+9)
But inspecting the generated code shows the difference,
e. g. BM_Group_MatchFirstEmptyOrDeleted
Before:
add $0xffffffffffffffff,%rbx
jne 30
After:
pcmpeqd %xmm0,%xmm0
pcmpgtb -0x30(%rbp),%xmm0
pmovmskb %xmm0,%eax
add: 0x23$0xffffffffffffffff,%rbx
jne 40
PiperOrigin-RevId: 416083515
--
122fbff893dc4571b3e75e4b241eb4495b925610 by Abseil Team <absl-team@google.com>:
Put namespace guard in ABSL_DECLARE_FLAG to make declaring a flag in a namespace a compiler error instead of a linker error.
PiperOrigin-RevId: 416036072
--
020fd8a20f5fa319e948846e003391fcb9e03868 by Ilya Tokar <tokarip@google.com>:
Make Cord::InlineRep::set_data unconditionally zero out memory.
Currently there is a single case where we don't zero out memory
as an optimization. Unconditional zeroing doesn't show any changes
in benchmarks, except for the unrelated improvement:
BM_CordPartialCopyToCord/1M/1 12.6ns ± 4% 12.6ns ± 4% ~ (p=0.857 n=16+19)
BM_CordPartialCopyToCord/1M/128 44.9ns ± 7% 45.0ns ± 3% ~ (p=0.468 n=18+17)
BM_CordPartialCopyToCord/1M/1k 64.5ns ± 4% 61.4ns ± 4% -4.82% (p=0.000 n=19+17)
BM_CordPartialCopyToCord/1M/8k 139ns ± 3% 128ns ±15% -7.76% (p=0.009 n=17+20)
BM_CordPartialCopyToCord/1M/16k 193ns ± 6% 168ns ± 6% -13.17% (p=0.000 n=17+17)
BM_CordPartialCopyToCord/4M/16k 199ns ± 4% 177ns ± 4% -11.36% (p=0.000 n=17+18)
BM_CordPartialCopyToCord/4M/32k 275ns ± 3% 250ns ± 4% -9.00% (p=0.000 n=18+18)
BM_CordPartialCopyToCord/4M/64k 291ns ± 4% 266ns ± 5% -8.53% (p=0.000 n=18+16)
BM_CordPartialCopyToCord/4M/128k 322ns ± 5% 291ns ± 4% -9.43% (p=0.000 n=20+18)
BM_CordPartialCopyToCord/8M/32k 281ns ± 5% 251ns ± 4% -10.38% (p=0.000 n=20+16)
BM_CordPartialCopyToCord/8M/64k 293ns ± 6% 267ns ± 4% -8.87% (p=0.000 n=16+19)
BM_CordPartialCopyToCord/8M/128k 334ns ± 3% 305ns ± 2% -8.56% (p=0.000 n=17+16)
This is clearly an alignmnet effect since number of the executed instructions is the same:
M_CordPartialCopyToCord/1M/1 155 ± 0% 155 ± 0% ~ (all samples are equal)
BM_CordPartialCopyToCord/1M/128 446 ± 0% 446 ± 0% ~ (p=0.332 n=36+39)
BM_CordPartialCopyToCord/1M/1k 473 ± 0% 473 ± 0% ~ (p=0.969 n=40+40)
BM_CordPartialCopyToCord/1M/8k 808 ± 0% 808 ± 0% ~ (p=0.127 n=40+39)
BM_CordPartialCopyToCord/1M/16k 957 ± 0% 957 ± 0% ~ (p=0.532 n=40+40)
BM_CordPartialCopyToCord/4M/16k 952 ± 0% 952 ± 0% ~ (p=0.686 n=39+39)
BM_CordPartialCopyToCord/4M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.690 n=40+40)
BM_CordPartialCopyToCord/4M/64k 1.23k ± 0% 1.23k ± 0% ~ (p=0.182 n=40+39)
BM_CordPartialCopyToCord/4M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.711 n=40+40)
BM_CordPartialCopyToCord/8M/32k 1.12k ± 0% 1.12k ± 0% ~ (p=0.697 n=40+40)
BM_CordPartialCopyToCord/8M/64k 1.23k ± 0% 1.23k ± 0% +0.00% (p=0.049 n=40+40)
BM_CordPartialCopyToCord/8M/128k 1.44k ± 0% 1.44k ± 0% ~ (p=0.507 n=40+40)
This makes code simpler and doesn't regress performance.
PiperOrigin-RevId: 415560574
--
37305b2690b31682088749e4d62f40d7095bdc54 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 415558737
--
86aaed569b9e743c1eb813a5f48def978a793db3 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415515201
--
6cdb8786cdcb4fa0b8a4b72fc98940877d1fdeff by Abseil Team <absl-team@google.com>:
Update SubmitMutexProfileData to accept wait_cycles instead of wait_timestamp
PiperOrigin-RevId: 415360871
--
9f979d307aa16ad09f214e04876cbe84395c0901 by Abseil Team <absl-team@google.com>:
absl::flat_hash_set compiles with -Wconversion -Wsign-compare
PiperOrigin-RevId: 415357498
--
9eceb14174708f15e61259d449b214a8a4c7f9e7 by Abseil Team <absl-team@google.com>:
Fix AddressIsReadable for the corner case of (aligned) addr == NULL.
PiperOrigin-RevId: 415307792
--
1a39ffe55898375e2d7f88c17c99db5a1b95b313 by Martijn Vels <mvels@google.com>:
Internal change
PiperOrigin-RevId: 415162872
--
64378549b110d5f5762185a5906c520fba70f0e7 by Abseil Team <absl-team@google.com>:
Fix a typo in the comments
PiperOrigin-RevId: 415088461
--
41aae8322e913b82710153c22b97c611fdb6e1fb by Abseil Team <absl-team@google.com>:
Switch from `connect` to `rt_sigreturn` -- the latter is much less problematic
for system call sandboxes.
PiperOrigin-RevId: 415073965
--
870c5e3388b6a35611bff538626fe7a1c8c87171 by Abseil Team <absl-team@google.com>:
Add ABSL_HAVE_HWADDRESS_SANITIZER and ABSL_HAVE_LEAK_SANITIZER
PiperOrigin-RevId: 414871189
--
f213ed60a66b58da7ac40555adfb1d529ff0a4db by Derek Mauro <dmauro@google.com>:
Remove reference to __SANITIZE_MEMORY__, which does not exist
It appears to have been copied by pattern matching from the ASAN/TSAN
code blocks.
https://github.com/gcc-mirror/gcc/blob/f47662204de27f7685699eeef89aa173ccf32d85/gcc/cppbuiltin.c#L79-L126
PiperOrigin-RevId: 414806587
--
b152891e73ab515f397ceb53f66c8ee2f33863ea by Abseil Team <absl-team@google.com>:
Rollback previous commit: SYS_open is not defined in certain environments.
PiperOrigin-RevId: 414521820
--
5a1cbb282331023902e1374dd0d920c4effbe47f by Abseil Team <absl-team@google.com>:
Use syscall(SYS_open, ...) instead of open() to avoid possible symbol
interposition.
Also add some warning notes.
PiperOrigin-RevId: 414508186
--
1824d6593612710aafdc599a89b0adced7d787f6 by Abseil Team <absl-team@google.com>:
Correct aarch64 macro check
The macro is __aarch64__, not __arch64__.
PiperOrigin-RevId: 414446225
--
a1536a57b64dfd53945d33a01cfc08b18c99c97b by Abseil Team <absl-team@google.com>:
Fix backwards comment in the last commit.
PiperOrigin-RevId: 414281214
--
11ac021ba779513667a31cf2563ddafc57d6d913 by Abseil Team <absl-team@google.com>:
AddressIsReadable() didn't work correctly on ARM when the given pointer was
misaligned at the end of the page.
Fix that by aligning the pointer on an 8-byte boundary before checking it.
PiperOrigin-RevId: 414203863
GitOrigin-RevId: 07240ca7822d007cdcc79f2c40bd58b2c2010348
Change-Id: If5f129194d59f5c9e5d84efd8cd9e17a70e072ab
3 years ago
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_Group_MatchFirstEmptyOrDeleted);
|
|
|
|
|
|
|
|
void BM_DropDeletes(benchmark::State& state) {
|
|
|
|
constexpr size_t capacity = (1 << 20) - 1;
|
|
|
|
std::vector<ctrl_t> ctrl(capacity + 1 + Group::kWidth);
|
|
|
|
ctrl[capacity] = ctrl_t::kSentinel;
|
|
|
|
std::vector<ctrl_t> pattern = {ctrl_t::kEmpty, static_cast<ctrl_t>(2),
|
|
|
|
ctrl_t::kDeleted, static_cast<ctrl_t>(2),
|
|
|
|
ctrl_t::kEmpty, static_cast<ctrl_t>(1),
|
|
|
|
ctrl_t::kDeleted};
|
|
|
|
for (size_t i = 0; i != capacity; ++i) {
|
|
|
|
ctrl[i] = pattern[i % pattern.size()];
|
|
|
|
}
|
|
|
|
while (state.KeepRunning()) {
|
|
|
|
state.PauseTiming();
|
|
|
|
std::vector<ctrl_t> ctrl_copy = ctrl;
|
|
|
|
state.ResumeTiming();
|
|
|
|
ConvertDeletedToEmptyAndFullToDeleted(ctrl_copy.data(), capacity);
|
|
|
|
::benchmark::DoNotOptimize(ctrl_copy[capacity]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BENCHMARK(BM_DropDeletes);
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
} // namespace container_internal
|
|
|
|
ABSL_NAMESPACE_END
|
|
|
|
} // namespace absl
|
|
|
|
|
|
|
|
// These methods are here to make it easy to examine the assembly for targeted
|
|
|
|
// parts of the API.
|
|
|
|
auto CodegenAbslRawHashSetInt64Find(absl::container_internal::IntTable* table,
|
|
|
|
int64_t key) -> decltype(table->find(key)) {
|
|
|
|
return table->find(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CodegenAbslRawHashSetInt64FindNeEnd(
|
|
|
|
absl::container_internal::IntTable* table, int64_t key) {
|
|
|
|
return table->find(key) != table->end();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto CodegenAbslRawHashSetInt64Insert(absl::container_internal::IntTable* table,
|
|
|
|
int64_t key)
|
|
|
|
-> decltype(table->insert(key)) {
|
|
|
|
return table->insert(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CodegenAbslRawHashSetInt64Contains(
|
|
|
|
absl::container_internal::IntTable* table, int64_t key) {
|
|
|
|
return table->contains(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodegenAbslRawHashSetInt64Iterate(
|
|
|
|
absl::container_internal::IntTable* table) {
|
|
|
|
for (auto x : *table) benchmark::DoNotOptimize(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
int odr =
|
|
|
|
(::benchmark::DoNotOptimize(std::make_tuple(
|
|
|
|
&CodegenAbslRawHashSetInt64Find, &CodegenAbslRawHashSetInt64FindNeEnd,
|
|
|
|
&CodegenAbslRawHashSetInt64Insert, &CodegenAbslRawHashSetInt64Contains,
|
|
|
|
&CodegenAbslRawHashSetInt64Iterate)),
|
|
|
|
1);
|