Insert software prefetches into merge functions. This improves performance when hardware prefetchers are disabled on AMD machines.

PiperOrigin-RevId: 671468969
pull/18075/head
Protobuf Team Bot 3 months ago committed by Copybara-Service
parent 8aa0add7d3
commit d99336559e
  1. 1
      src/google/protobuf/extension_set.cc
  2. 9
      src/google/protobuf/port.h
  3. 4
      src/google/protobuf/repeated_ptr_field.cc

@ -971,6 +971,7 @@ size_t SizeOfUnion(ItX it_dest, ItX end_dest, ItY it_source, ItY end_source) {
void ExtensionSet::MergeFrom(const MessageLite* extendee,
const ExtensionSet& other) {
Prefetch5LinesFrom1Line(&other);
if (PROTOBUF_PREDICT_TRUE(!is_large())) {
if (PROTOBUF_PREDICT_TRUE(!other.is_large())) {
GrowCapacity(SizeOfUnion(flat_begin(), flat_end(), other.flat_begin(),

@ -306,6 +306,15 @@ inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom7Lines(const void* ptr) {
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 704);
}
// Prefetch 5 64-byte cache lines starting from 1 cache-line ahead.
inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom1Line(const void* ptr) {
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 64);
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 128);
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 192);
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256);
PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 320);
}
#if defined(NDEBUG) && ABSL_HAVE_BUILTIN(__builtin_unreachable)
[[noreturn]] ABSL_ATTRIBUTE_COLD PROTOBUF_ALWAYS_INLINE inline void
Unreachable() {

@ -132,6 +132,7 @@ memswap<ArenaOffsetHelper<RepeatedPtrFieldBase>::value>(
template <>
void RepeatedPtrFieldBase::MergeFrom<std::string>(
const RepeatedPtrFieldBase& from) {
Prefetch5LinesFrom1Line(&from);
ABSL_DCHECK_NE(&from, this);
int new_size = current_size_ + from.current_size_;
auto dst = reinterpret_cast<std::string**>(InternalReserve(new_size));
@ -159,6 +160,7 @@ void RepeatedPtrFieldBase::MergeFrom<std::string>(
int RepeatedPtrFieldBase::MergeIntoClearedMessages(
const RepeatedPtrFieldBase& from) {
Prefetch5LinesFrom1Line(&from);
auto dst = reinterpret_cast<MessageLite**>(elements() + current_size_);
auto src = reinterpret_cast<MessageLite* const*>(from.elements());
int count = std::min(ClearedCount(), from.current_size_);
@ -173,6 +175,7 @@ int RepeatedPtrFieldBase::MergeIntoClearedMessages(
void RepeatedPtrFieldBase::MergeFromConcreteMessage(
const RepeatedPtrFieldBase& from, CopyFn copy_fn) {
Prefetch5LinesFrom1Line(&from);
ABSL_DCHECK_NE(&from, this);
int new_size = current_size_ + from.current_size_;
void** dst = InternalReserve(new_size);
@ -196,6 +199,7 @@ void RepeatedPtrFieldBase::MergeFromConcreteMessage(
template <>
void RepeatedPtrFieldBase::MergeFrom<MessageLite>(
const RepeatedPtrFieldBase& from) {
Prefetch5LinesFrom1Line(&from);
ABSL_DCHECK_NE(&from, this);
ABSL_DCHECK(from.current_size_ > 0);
int new_size = current_size_ + from.current_size_;

Loading…
Cancel
Save