Export of internal Abseil changes

--
d3b99682554d339c42556680f4d65f83226005e2 by Martijn Vels <mvels@google.com>:

Inline CycleClock code and remove branch for x86 CycleClockSource function

This CL removes the relaxed load for x86 as there is no acquire price to pay on x86. It inlines the UnscaledCycleClock::Now() which is a single RTDSC op for x86, and likewise inlines CycleClock::Now() for x86. The inlining should mostly have secondary benefits such as reducing spills on outlined calls.

LTO may eventually hoist these functions inline for the hotspots, but it doesn't hurt to default inline this for all builds and let the compiler decide on the first pass.

The perlab benchmark is noisy for the plain BM_Now, but the other benchmarks and the run on my local machine are clear.

------------- Local Benchy Benchmark
name                       old cpu/op  new cpu/op  delta
BM_Now                     3.41ns ± 1%  2.30ns ± 2%  -32.52%  (p=0.000 n=50+50)
BM_NowWithRegisterPresure  4.96ns ± 2%  4.19ns ± 2%  -15.57%  (p=0.000 n=56+55)
BM_NowWithCallback         3.30ns ± 2%  1.91ns ± 2%  -42.00%  (p=0.000 n=47+60)

------------- Perflab Benchy Benchmark
name                       old cpu/op  new cpu/op  delta
BM_Now                     8.20ns ±13%  4.32ns ±83%     ~     (p=0.413 n=4+5)
BM_NowWithRegisterPresure  7.91ns ± 1%  3.68ns ± 2%  -53.45%  (p=0.029 n=4+4)
BM_NowWithCallback         2.66ns ±13%  1.58ns ± 0%  -40.51%  (p=0.008 n=5+5)

PiperOrigin-RevId: 434474766
Change-Id: I991d987ae9233e50f09606c874055cf4c5a56300

--
b38330686a0af176a2679163e4d2fa1b90e2f667 by Laramie Leavitt <lar@google.com>:

Style, comment, and test updates

* Remove a redundant assert in uniform_real_distribution.
* Update comment in internal/generate_real.h
* Style updates to uniform_real_distribution_test
   mainly replacing TypeParam with real_type, using aliases for some limits, etc.
* Add a few more minor tests.

PiperOrigin-RevId: 433902174
Change-Id: Id75be8e24be2fb8f6aea05feec13e3ef320a7254

--
ab2da6047ff7f5dae3add3779fcddf73b03feabf by Abseil Team <absl-team@google.com>:

Remove declaration of method whose definition was previously removed.

PiperOrigin-RevId: 433507828
Change-Id: I0130b689813125250f7de2664e767e181f676c89

--
df0c87f4ec2c010691931c1bef9d26470a6e63a2 by Derek Mauro <dmauro@google.com>:

Internal change

PiperOrigin-RevId: 433289136
Change-Id: Iba157dc83ed99dafd17a2223d2504e49f8afbb9e

--
7445fa312f2995772900eda82467325b3401a17d by Martijn Vels <mvels@google.com>:

Optimize CordReader logic now that CONCAT is removed

This CL cleans up various helper functions and logic remaining from previous complex CONCAT logic that is no longer needed, simplifying the CordReader logic.

PiperOrigin-RevId: 433208748
Change-Id: I5f7b1883573c44e7c6f8af12c3cddbd197cb134d
GitOrigin-RevId: d3b99682554d339c42556680f4d65f83226005e2
pull/1134/head
Abseil Team 3 years ago committed by Andy Getz
parent c5a424a2a2
commit 5ed77665c4
  1. 53
      absl/base/internal/cycleclock.cc
  2. 69
      absl/base/internal/cycleclock.h
  3. 6
      absl/base/internal/unscaledcycleclock.cc
  4. 10
      absl/base/internal/unscaledcycleclock.h
  5. 4
      absl/random/internal/generate_real.h
  6. 2
      absl/random/uniform_real_distribution.h
  7. 121
      absl/random/uniform_real_distribution_test.cc
  8. 4
      absl/status/status.h
  9. 6
      absl/strings/cord.h

@ -25,6 +25,7 @@
#include <atomic>
#include <chrono> // NOLINT(build/c++11)
#include "absl/base/attributes.h"
#include "absl/base/internal/unscaledcycleclock.h"
namespace absl {
@ -33,44 +34,18 @@ namespace base_internal {
#if ABSL_USE_UNSCALED_CYCLECLOCK
namespace {
constexpr int32_t CycleClock::kShift;
constexpr double CycleClock::kFrequencyScale;
#ifdef NDEBUG
#ifdef ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
// Not debug mode and the UnscaledCycleClock frequency is the CPU
// frequency. Scale the CycleClock to prevent overflow if someone
// tries to represent the time as cycles since the Unix epoch.
static constexpr int32_t kShift = 1;
#else
// Not debug mode and the UnscaledCycleClock isn't operating at the
// raw CPU frequency. There is no need to do any scaling, so don't
// needlessly sacrifice precision.
static constexpr int32_t kShift = 0;
#endif
#else
// In debug mode use a different shift to discourage depending on a
// particular shift value.
static constexpr int32_t kShift = 2;
#endif
static constexpr double kFrequencyScale = 1.0 / (1 << kShift);
static std::atomic<CycleClockSourceFunc> cycle_clock_source;
ABSL_CONST_INIT std::atomic<CycleClockSourceFunc>
CycleClock::cycle_clock_source_{nullptr};
CycleClockSourceFunc LoadCycleClockSource() {
// Optimize for the common case (no callback) by first doing a relaxed load;
// this is significantly faster on non-x86 platforms.
if (cycle_clock_source.load(std::memory_order_relaxed) == nullptr) {
return nullptr;
}
// This corresponds to the store(std::memory_order_release) in
// CycleClockSource::Register, and makes sure that any updates made prior to
// registering the callback are visible to this thread before the callback is
// invoked.
return cycle_clock_source.load(std::memory_order_acquire);
void CycleClockSource::Register(CycleClockSourceFunc source) {
// Corresponds to the load(std::memory_order_acquire) in LoadCycleClockSource.
CycleClock::cycle_clock_source_.store(source, std::memory_order_release);
}
} // namespace
#ifdef _WIN32
int64_t CycleClock::Now() {
auto fn = LoadCycleClockSource();
if (fn == nullptr) {
@ -78,15 +53,7 @@ int64_t CycleClock::Now() {
}
return fn() >> kShift;
}
double CycleClock::Frequency() {
return kFrequencyScale * base_internal::UnscaledCycleClock::Frequency();
}
void CycleClockSource::Register(CycleClockSourceFunc source) {
// Corresponds to the load(std::memory_order_acquire) in LoadCycleClockSource.
cycle_clock_source.store(source, std::memory_order_release);
}
#endif
#else

@ -42,14 +42,19 @@
#ifndef ABSL_BASE_INTERNAL_CYCLECLOCK_H_
#define ABSL_BASE_INTERNAL_CYCLECLOCK_H_
#include <atomic>
#include <cstdint>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/unscaledcycleclock.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace base_internal {
using CycleClockSourceFunc = int64_t (*)();
// -----------------------------------------------------------------------------
// CycleClock
// -----------------------------------------------------------------------------
@ -68,12 +73,37 @@ class CycleClock {
static double Frequency();
private:
#if ABSL_USE_UNSCALED_CYCLECLOCK
static CycleClockSourceFunc LoadCycleClockSource();
#ifdef NDEBUG
#ifdef ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
// Not debug mode and the UnscaledCycleClock frequency is the CPU
// frequency. Scale the CycleClock to prevent overflow if someone
// tries to represent the time as cycles since the Unix epoch.
static constexpr int32_t kShift = 1;
#else
// Not debug mode and the UnscaledCycleClock isn't operating at the
// raw CPU frequency. There is no need to do any scaling, so don't
// needlessly sacrifice precision.
static constexpr int32_t kShift = 0;
#endif
#else // NDEBUG
// In debug mode use a different shift to discourage depending on a
// particular shift value.
static constexpr int32_t kShift = 2;
#endif // NDEBUG
static constexpr double kFrequencyScale = 1.0 / (1 << kShift);
ABSL_CONST_INIT static std::atomic<CycleClockSourceFunc> cycle_clock_source_;
#endif // ABSL_USE_UNSCALED_CYCLECLOC
CycleClock() = delete; // no instances
CycleClock(const CycleClock&) = delete;
CycleClock& operator=(const CycleClock&) = delete;
};
using CycleClockSourceFunc = int64_t (*)();
friend class CycleClockSource;
};
class CycleClockSource {
private:
@ -87,6 +117,41 @@ class CycleClockSource {
static void Register(CycleClockSourceFunc source);
};
#if ABSL_USE_UNSCALED_CYCLECLOCK
inline CycleClockSourceFunc CycleClock::LoadCycleClockSource() {
#if !defined(__x86_64__)
// Optimize for the common case (no callback) by first doing a relaxed load;
// this is significantly faster on non-x86 platforms.
if (cycle_clock_source_.load(std::memory_order_relaxed) == nullptr) {
return nullptr;
}
#endif // !defined(__x86_64__)
// This corresponds to the store(std::memory_order_release) in
// CycleClockSource::Register, and makes sure that any updates made prior to
// registering the callback are visible to this thread before the callback
// is invoked.
return cycle_clock_source_.load(std::memory_order_acquire);
}
// Accessing globals in inlined code in Window DLLs is problematic.
#ifndef _WIN32
inline int64_t CycleClock::Now() {
auto fn = LoadCycleClockSource();
if (fn == nullptr) {
return base_internal::UnscaledCycleClock::Now() >> kShift;
}
return fn() >> kShift;
}
#endif
inline double CycleClock::Frequency() {
return kFrequencyScale * base_internal::UnscaledCycleClock::Frequency();
}
#endif // ABSL_USE_UNSCALED_CYCLECLOCK
} // namespace base_internal
ABSL_NAMESPACE_END
} // namespace absl

@ -49,12 +49,6 @@ double UnscaledCycleClock::Frequency() {
#elif defined(__x86_64__)
int64_t UnscaledCycleClock::Now() {
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
}
double UnscaledCycleClock::Frequency() {
return base_internal::NominalCPUFrequency();
}

@ -115,6 +115,16 @@ class UnscaledCycleClock {
friend class base_internal::UnscaledCycleClockWrapperForInitializeFrequency;
};
#if defined(__x86_64__)
inline int64_t UnscaledCycleClock::Now() {
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
}
#endif
} // namespace base_internal
ABSL_NAMESPACE_END
} // namespace absl

@ -50,10 +50,10 @@ struct GenerateSignedTag {};
// inputs, otherwise it never returns 0.
//
// When a value in U(0,1) is required, use:
// Uniform64ToReal<double, PositiveValueT, true>;
// GenerateRealFromBits<double, PositiveValueT, true>;
//
// When a value in U(-1,1) is required, use:
// Uniform64ToReal<double, SignedValueT, false>;
// GenerateRealFromBits<double, SignedValueT, false>;
//
// This generates more distinct values than the mathematical equivalent
// `U(0, 1) * 2.0 - 1.0`.

@ -73,12 +73,12 @@ class uniform_real_distribution {
: lo_(lo), hi_(hi), range_(hi - lo) {
// [rand.dist.uni.real] preconditions 2 & 3
assert(lo <= hi);
// NOTE: For integral types, we can promote the range to an unsigned type,
// which gives full width of the range. However for real (fp) types, this
// is not possible, so value generation cannot use the full range of the
// real type.
assert(range_ <= (std::numeric_limits<result_type>::max)());
assert(std::isfinite(range_));
}
result_type a() const { return lo_; }

@ -78,62 +78,74 @@ TYPED_TEST(UniformRealDistributionTest, ParamSerializeTest) {
GTEST_SKIP()
<< "Skipping the test because we detected x87 floating-point semantics";
#endif
using DistributionType = absl::uniform_real_distribution<TypeParam>;
using real_type = TypeParam;
using param_type = typename DistributionType::param_type;
using param_type =
typename absl::uniform_real_distribution<TypeParam>::param_type;
constexpr const real_type kMax = std::numeric_limits<real_type>::max();
constexpr const real_type kMin = std::numeric_limits<real_type>::min();
constexpr const real_type kEpsilon =
std::numeric_limits<real_type>::epsilon();
constexpr const real_type kLowest =
std::numeric_limits<real_type>::lowest(); // -max
constexpr const TypeParam a{1152921504606846976};
const real_type kDenormMax = std::nextafter(kMin, real_type{0});
const real_type kOneMinusE =
std::nextafter(real_type{1}, real_type{0}); // 1 - epsilon
constexpr const real_type kTwo60{1152921504606846976}; // 2^60
constexpr int kCount = 1000;
absl::InsecureBitGen gen;
for (const auto& param : {
param_type(),
param_type(TypeParam(2.0), TypeParam(2.0)), // Same
param_type(TypeParam(-0.1), TypeParam(0.1)),
param_type(TypeParam(0.05), TypeParam(0.12)),
param_type(TypeParam(-0.05), TypeParam(0.13)),
param_type(TypeParam(-0.05), TypeParam(-0.02)),
param_type(real_type{0}, real_type{1}),
param_type(real_type(-0.1), real_type(0.1)),
param_type(real_type(0.05), real_type(0.12)),
param_type(real_type(-0.05), real_type(0.13)),
param_type(real_type(-0.05), real_type(-0.02)),
// range = 0
param_type(real_type(2.0), real_type(2.0)), // Same
// double range = 0
// 2^60 , 2^60 + 2^6
param_type(a, TypeParam(1152921504606847040)),
param_type(kTwo60, real_type(1152921504606847040)),
// 2^60 , 2^60 + 2^7
param_type(a, TypeParam(1152921504606847104)),
param_type(kTwo60, real_type(1152921504606847104)),
// double range = 2^8
// 2^60 , 2^60 + 2^8
param_type(a, TypeParam(1152921504606847232)),
param_type(kTwo60, real_type(1152921504606847232)),
// float range = 0
// 2^60 , 2^60 + 2^36
param_type(a, TypeParam(1152921573326323712)),
param_type(kTwo60, real_type(1152921573326323712)),
// 2^60 , 2^60 + 2^37
param_type(a, TypeParam(1152921642045800448)),
param_type(kTwo60, real_type(1152921642045800448)),
// float range = 2^38
// 2^60 , 2^60 + 2^38
param_type(a, TypeParam(1152921779484753920)),
param_type(kTwo60, real_type(1152921779484753920)),
// Limits
param_type(0, std::numeric_limits<TypeParam>::max()),
param_type(std::numeric_limits<TypeParam>::lowest(), 0),
param_type(0, std::numeric_limits<TypeParam>::epsilon()),
param_type(-std::numeric_limits<TypeParam>::epsilon(),
std::numeric_limits<TypeParam>::epsilon()),
param_type(std::numeric_limits<TypeParam>::epsilon(),
2 * std::numeric_limits<TypeParam>::epsilon()),
param_type(0, kMax),
param_type(kLowest, 0),
param_type(0, kMin),
param_type(0, kEpsilon),
param_type(-kEpsilon, kEpsilon),
param_type(0, kOneMinusE),
param_type(0, kDenormMax),
}) {
// Validate parameters.
const auto a = param.a();
const auto b = param.b();
absl::uniform_real_distribution<TypeParam> before(a, b);
DistributionType before(a, b);
EXPECT_EQ(before.a(), param.a());
EXPECT_EQ(before.b(), param.b());
{
absl::uniform_real_distribution<TypeParam> via_param(param);
DistributionType via_param(param);
EXPECT_EQ(via_param, before);
}
std::stringstream ss;
ss << before;
absl::uniform_real_distribution<TypeParam> after(TypeParam(1.0),
TypeParam(3.1));
DistributionType after(real_type(1.0), real_type(3.1));
EXPECT_NE(before.a(), after.a());
EXPECT_NE(before.b(), after.b());
@ -168,7 +180,7 @@ TYPED_TEST(UniformRealDistributionTest, ParamSerializeTest) {
}
}
if (!std::is_same<TypeParam, long double>::value) {
if (!std::is_same<real_type, long double>::value) {
// static_cast<double>(long double) can overflow.
std::string msg = absl::StrCat("Range: ", static_cast<double>(sample_min),
", ", static_cast<double>(sample_max));
@ -182,33 +194,52 @@ TYPED_TEST(UniformRealDistributionTest, ParamSerializeTest) {
#pragma warning(disable:4756) // Constant arithmetic overflow.
#endif
TYPED_TEST(UniformRealDistributionTest, ViolatesPreconditionsDeathTest) {
using DistributionType = absl::uniform_real_distribution<TypeParam>;
using real_type = TypeParam;
#if GTEST_HAS_DEATH_TEST
// Hi < Lo
EXPECT_DEBUG_DEATH(
{ absl::uniform_real_distribution<TypeParam> dist(10.0, 1.0); }, "");
EXPECT_DEBUG_DEATH({ DistributionType dist(10.0, 1.0); }, "");
// Hi - Lo > numeric_limits<>::max()
EXPECT_DEBUG_DEATH(
{
absl::uniform_real_distribution<TypeParam> dist(
std::numeric_limits<TypeParam>::lowest(),
std::numeric_limits<TypeParam>::max());
DistributionType dist(std::numeric_limits<real_type>::lowest(),
std::numeric_limits<real_type>::max());
},
"");
// kEpsilon guarantees that max + kEpsilon = inf.
const auto kEpsilon = std::nexttoward(
(std::numeric_limits<real_type>::max() -
std::nexttoward(std::numeric_limits<real_type>::max(), 0.0)) /
2,
std::numeric_limits<real_type>::max());
EXPECT_DEBUG_DEATH(
{
DistributionType dist(-kEpsilon, std::numeric_limits<real_type>::max());
},
"");
EXPECT_DEBUG_DEATH(
{
DistributionType dist(std::numeric_limits<real_type>::lowest(),
kEpsilon);
},
"");
#endif // GTEST_HAS_DEATH_TEST
#if defined(NDEBUG)
// opt-mode, for invalid parameters, will generate a garbage value,
// but should not enter an infinite loop.
absl::InsecureBitGen gen;
{
absl::uniform_real_distribution<TypeParam> dist(10.0, 1.0);
DistributionType dist(10.0, 1.0);
auto x = dist(gen);
EXPECT_FALSE(std::isnan(x)) << x;
}
{
absl::uniform_real_distribution<TypeParam> dist(
std::numeric_limits<TypeParam>::lowest(),
std::numeric_limits<TypeParam>::max());
DistributionType dist(std::numeric_limits<real_type>::lowest(),
std::numeric_limits<real_type>::max());
auto x = dist(gen);
// Infinite result.
EXPECT_FALSE(std::isfinite(x)) << x;
@ -220,6 +251,8 @@ TYPED_TEST(UniformRealDistributionTest, ViolatesPreconditionsDeathTest) {
#endif
TYPED_TEST(UniformRealDistributionTest, TestMoments) {
using DistributionType = absl::uniform_real_distribution<TypeParam>;
constexpr int kSize = 1000000;
std::vector<double> values(kSize);
@ -228,7 +261,7 @@ TYPED_TEST(UniformRealDistributionTest, TestMoments) {
// implementation.
absl::random_internal::pcg64_2018_engine rng{0x2B7E151628AED2A6};
absl::uniform_real_distribution<TypeParam> dist;
DistributionType dist;
for (int i = 0; i < kSize; i++) {
values[i] = dist(rng);
}
@ -242,9 +275,10 @@ TYPED_TEST(UniformRealDistributionTest, TestMoments) {
}
TYPED_TEST(UniformRealDistributionTest, ChiSquaredTest50) {
using DistributionType = absl::uniform_real_distribution<TypeParam>;
using param_type = typename DistributionType::param_type;
using absl::random_internal::kChiSquared;
using param_type =
typename absl::uniform_real_distribution<TypeParam>::param_type;
constexpr size_t kTrials = 100000;
constexpr int kBuckets = 50;
@ -269,7 +303,7 @@ TYPED_TEST(UniformRealDistributionTest, ChiSquaredTest50) {
const double factor = kBuckets / (max_val - min_val);
std::vector<int32_t> counts(kBuckets, 0);
absl::uniform_real_distribution<TypeParam> dist(param);
DistributionType dist(param);
for (size_t i = 0; i < kTrials; i++) {
auto x = dist(rng);
auto bucket = static_cast<size_t>((x - min_val) * factor);
@ -297,8 +331,11 @@ TYPED_TEST(UniformRealDistributionTest, ChiSquaredTest50) {
}
TYPED_TEST(UniformRealDistributionTest, StabilityTest) {
using DistributionType = absl::uniform_real_distribution<TypeParam>;
using real_type = TypeParam;
// absl::uniform_real_distribution stability relies only on
// random_internal::RandU64ToDouble and random_internal::RandU64ToFloat.
// random_internal::GenerateRealFromBits.
absl::random_internal::sequence_urbg urbg(
{0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull,
0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull,
@ -307,9 +344,9 @@ TYPED_TEST(UniformRealDistributionTest, StabilityTest) {
std::vector<int> output(12);
absl::uniform_real_distribution<TypeParam> dist;
DistributionType dist;
std::generate(std::begin(output), std::end(output), [&] {
return static_cast<int>(TypeParam(1000000) * dist(urbg));
return static_cast<int>(real_type(1000000) * dist(urbg));
});
EXPECT_THAT(

@ -613,10 +613,6 @@ class Status final {
const status_internal::Payloads* GetPayloads() const;
status_internal::Payloads* GetPayloads();
// Takes ownership of payload.
static uintptr_t NewRep(
absl::StatusCode code, absl::string_view msg,
std::unique_ptr<status_internal::Payloads> payload);
static bool EqualsSlow(const absl::Status& a, const absl::Status& b);
// MSVC 14.0 limitation requires the const.

@ -768,6 +768,7 @@ class Cord {
// Returns nullptr if holding bytes
absl::cord_internal::CordRep* tree() const;
absl::cord_internal::CordRep* as_tree() const;
const char* as_chars() const;
// Returns non-null iff was holding a pointer
absl::cord_internal::CordRep* clear();
// Converts to pointer if necessary.
@ -1094,6 +1095,11 @@ inline const char* Cord::InlineRep::data() const {
return is_tree() ? nullptr : data_.as_chars();
}
inline const char* Cord::InlineRep::as_chars() const {
assert(!data_.is_tree());
return data_.as_chars();
}
inline absl::cord_internal::CordRep* Cord::InlineRep::as_tree() const {
assert(data_.is_tree());
return data_.as_tree();

Loading…
Cancel
Save