From fbdeb5af0b0f89dec79efc371ba75f838ffad2e8 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Wed, 13 Sep 2023 08:41:31 -0700 Subject: [PATCH] Add prefetching to arena allocations. PiperOrigin-RevId: 565061139 --- protobuf_deps.bzl | 6 +-- src/google/protobuf/BUILD.bazel | 1 + src/google/protobuf/arena.cc | 13 +++--- src/google/protobuf/serial_arena.h | 65 +++++++++++++++++++++++++++++- 4 files changed, 74 insertions(+), 11 deletions(-) diff --git a/protobuf_deps.bzl b/protobuf_deps.bzl index 690bfe097d..5a0f5d6cbd 100644 --- a/protobuf_deps.bzl +++ b/protobuf_deps.bzl @@ -149,7 +149,7 @@ def protobuf_deps(): if not native.existing_rule("upb"): http_archive( name = "upb", - url = "https://github.com/protocolbuffers/protobuf/archive/f85a338d79f05938d1725fba3b2c603a8d06462e.zip", - strip_prefix = "protobuf-f85a338d79f05938d1725fba3b2c603a8d06462e/upb", - sha256 = "cd28ae63e40a146ec1a2d41e96f53e637aaa5d6c746e7120d013aafc65092882", + url = "https://github.com/protocolbuffers/protobuf/archive/7242c3619c6db9843614b2c865681bf397261be8.zip", + strip_prefix = "protobuf-7242c3619c6db9843614b2c865681bf397261be8/upb", + sha256 = "0fc581f5e5caaf30c7119a73f2cff5d45424e4a4f23a52ebba73e3df031ad1c6", ) diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel index 6e70f79336..dc954a4d98 100644 --- a/src/google/protobuf/BUILD.bazel +++ b/src/google/protobuf/BUILD.bazel @@ -331,6 +331,7 @@ cc_library( ":arena_cleanup", ":string_block", "//src/google/protobuf/stubs:lite", + "@com_google_absl//absl/base:prefetch", "@com_google_absl//absl/container:layout", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", diff --git a/src/google/protobuf/arena.cc b/src/google/protobuf/arena.cc index b34e33ebf3..33cabb1aa7 100644 --- a/src/google/protobuf/arena.cc +++ b/src/google/protobuf/arena.cc @@ -114,6 +114,9 @@ class GetDeallocator { SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent) : ptr_{b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)}, limit_{b->Limit()}, + prefetch_ptr_( + b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)), + prefetch_limit_(b->Limit()), head_{b}, space_allocated_{b->size}, parent_{parent} { @@ -130,9 +133,7 @@ SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b, ThreadSafeArena& parent) : head_{b}, space_allocated_{b->size}, parent_{parent} { if (b->IsSentry()) return; - - set_ptr(b->Pointer(kBlockHeaderSize)); - limit_ = b->Limit(); + set_range(b->Pointer(kBlockHeaderSize), b->Limit()); } std::vector SerialArena::PeekCleanupListForTesting() { @@ -159,8 +160,7 @@ std::vector ThreadSafeArena::PeekCleanupListForTesting() { } void SerialArena::Init(ArenaBlock* b, size_t offset) { - set_ptr(b->Pointer(offset)); - limit_ = b->Limit(); + set_range(b->Pointer(offset), b->Limit()); head_.store(b, std::memory_order_relaxed); space_used_.store(0, std::memory_order_relaxed); space_allocated_.store(b->size, std::memory_order_relaxed); @@ -268,8 +268,7 @@ void SerialArena::AllocateNewBlock(size_t n) { /*used=*/used, /*allocated=*/mem.n, wasted); auto* new_head = new (mem.p) ArenaBlock{old_head, mem.n}; - set_ptr(new_head->Pointer(kBlockHeaderSize)); - limit_ = new_head->Limit(); + set_range(new_head->Pointer(kBlockHeaderSize), new_head->Limit()); // Previous writes must take effect before writing new head. head_.store(new_head, std::memory_order_release); diff --git a/src/google/protobuf/serial_arena.h b/src/google/protobuf/serial_arena.h index dd67a70b8e..349f941a42 100644 --- a/src/google/protobuf/serial_arena.h +++ b/src/google/protobuf/serial_arena.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -21,6 +22,8 @@ #include "google/protobuf/stubs/common.h" #include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/log/absl_check.h" #include "absl/numeric/bits.h" #include "google/protobuf/arena_align.h" @@ -29,7 +32,6 @@ #include "google/protobuf/port.h" #include "google/protobuf/string_block.h" - // Must be included last. #include "google/protobuf/port_def.inc" @@ -225,6 +227,7 @@ class PROTOBUF_EXPORT SerialArena { PROTOBUF_UNPOISON_MEMORY_REGION(ret, n); *out = ret; set_ptr(reinterpret_cast(next)); + MaybePrefetchForwards(reinterpret_cast(next)); return true; } @@ -251,6 +254,7 @@ class PROTOBUF_EXPORT SerialArena { set_ptr(reinterpret_cast(next)); AddCleanupFromExisting(ret, destructor); ABSL_DCHECK_GE(limit_, ptr()); + MaybePrefetchForwards(reinterpret_cast(next)); return ret; } @@ -279,10 +283,58 @@ class PROTOBUF_EXPORT SerialArena { PROTOBUF_UNPOISON_MEMORY_REGION(limit_ - n, n); limit_ -= n; + MaybePrefetchBackwards(limit_); ABSL_DCHECK_GE(limit_, ptr()); cleanup::CreateNode(tag, limit_, elem, destructor); } + static constexpr ptrdiff_t kPrefetchForwardsDegree = ABSL_CACHELINE_SIZE * 16; + static constexpr ptrdiff_t kPrefetchBackwardsDegree = ABSL_CACHELINE_SIZE * 6; + + // Prefetch the next kPrefetchForwardsDegree bytes after `prefetch_ptr_` and + // up to `prefetch_limit_`, if `next` is within kPrefetchForwardsDegree bytes + // of `prefetch_ptr_`. + PROTOBUF_ALWAYS_INLINE + void MaybePrefetchForwards(const char* next) { + ABSL_DCHECK(static_cast(prefetch_ptr_) == nullptr || + static_cast(prefetch_ptr_) >= head()); + if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ - next > kPrefetchForwardsDegree)) + return; + if (PROTOBUF_PREDICT_TRUE(prefetch_ptr_ < prefetch_limit_)) { + const char* prefetch_ptr = std::max(next, prefetch_ptr_); + ABSL_DCHECK(prefetch_ptr != nullptr); + const char* end = + std::min(prefetch_limit_, prefetch_ptr + ABSL_CACHELINE_SIZE * 16); + for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) { + absl::PrefetchToLocalCacheForWrite(prefetch_ptr); + } + prefetch_ptr_ = prefetch_ptr; + } + } + + PROTOBUF_ALWAYS_INLINE + // Prefetch up to kPrefetchBackwardsDegree before `prefetch_limit_` and after + // `prefetch_ptr_`, if `limit` is within kPrefetchBackwardsDegree of + // `prefetch_limit_`. + void MaybePrefetchBackwards(const char* limit) { + ABSL_DCHECK(prefetch_limit_ == nullptr || + static_cast(prefetch_limit_) <= + static_cast(head()->Limit())); + if (PROTOBUF_PREDICT_TRUE(limit - prefetch_limit_ > + kPrefetchBackwardsDegree)) + return; + if (PROTOBUF_PREDICT_TRUE(prefetch_limit_ > prefetch_ptr_)) { + const char* prefetch_limit = std::min(limit, prefetch_limit_); + ABSL_DCHECK_NE(prefetch_limit, nullptr); + const char* end = + std::max(prefetch_ptr_, prefetch_limit - kPrefetchBackwardsDegree); + for (; prefetch_limit > end; prefetch_limit -= ABSL_CACHELINE_SIZE) { + absl::PrefetchToLocalCacheForWrite(prefetch_limit); + } + prefetch_limit_ = prefetch_limit; + } + } + private: friend class ThreadSafeArena; @@ -319,6 +371,11 @@ class PROTOBUF_EXPORT SerialArena { std::atomic ptr_{nullptr}; // Limiting address up to which memory can be allocated from the head block. char* limit_ = nullptr; + // Current prefetch positions. Data from `ptr_` up to but not including + // `prefetch_ptr_` is software prefetched. Similarly, data from `limit_` down + // to but not including `prefetch_limit_` is software prefetched. + const char* prefetch_ptr_ = nullptr; + const char* prefetch_limit_ = nullptr; // The active string block. std::atomic string_block_{nullptr}; @@ -356,6 +413,12 @@ class PROTOBUF_EXPORT SerialArena { char* ptr() { return ptr_.load(std::memory_order_relaxed); } const char* ptr() const { return ptr_.load(std::memory_order_relaxed); } void set_ptr(char* ptr) { return ptr_.store(ptr, std::memory_order_relaxed); } + PROTOBUF_ALWAYS_INLINE void set_range(char* ptr, char* limit) { + set_ptr(ptr); + prefetch_ptr_ = ptr; + limit_ = limit; + prefetch_limit_ = limit; + } // Constructor is private as only New() should be used. inline SerialArena(ArenaBlock* b, ThreadSafeArena& parent);