`RepeatedPtrField::Add` is ~5% faster.

Moving logic to one place allows to remove a branch on `using_sso()`.

Side effect: `.strtab` section increases in size (to store function names), `.text` section reduces in size (less inlined code).

```
name                                   old time/op             new time/op             delta
BM_RepeatedPtrField_Add_Sso            11.0ns ± 4%             10.4ns ± 7%   -5.71%        (p=0.000 n=83+85)
BM_RepeatedPtrField_Add_FallbackToRep  21.8ns ± 4%             20.8ns ± 5%   -4.55%        (p=0.000 n=80+83)
BM_RepeatedPtrField_Add_Small          17.3ns ± 4%             15.8ns ± 5%   -8.86%        (p=0.000 n=78+85)
BM_RepeatedPtrField_Add_Large          22.6ns ±12%             21.1ns ±10%   -6.62%       (p=0.000 n=100+92)
BM_RepeatedPtrField_Add_HasCleared     4.15ns ±11%             4.43ns ± 6%   +6.67%        (p=0.000 n=82+87)
BM_RepeatedPtrField_AddAllocated       12.0ns ±19%             12.0ns ±14%     ~           (p=0.678 n=92+96)

name                                   old INSTRUCTIONS/op     new INSTRUCTIONS/op     delta
BM_RepeatedPtrField_Add_Sso               101 ± 1%                 88 ± 2%  -13.05%      (p=0.000 n=100+100)
BM_RepeatedPtrField_Add_FallbackToRep     222 ± 1%                206 ± 1%   -7.35%      (p=0.000 n=100+100)
BM_RepeatedPtrField_Add_Small             180 ± 0%                164 ± 1%   -8.69%      (p=0.000 n=100+100)
BM_RepeatedPtrField_Add_Large             110 ± 0%                100 ± 0%   -9.07%       (p=0.000 n=100+99)
BM_RepeatedPtrField_Add_HasCleared       27.6 ± 1%               36.6 ± 1%  +32.44%      (p=0.000 n=100+100)
BM_RepeatedPtrField_AddAllocated          114 ± 7%                115 ±11%   +0.95%      (p=0.007 n=100+100)
```

PiperOrigin-RevId: 572634616
pull/14363/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent fdcb5d17b9
commit 1df8ea4988
  1. 27
      src/google/protobuf/repeated_ptr_field.cc
  2. 17
      src/google/protobuf/repeated_ptr_field.h

@ -16,6 +16,7 @@
#include <limits>
#include <string>
#include "absl/base/prefetch.h"
#include "absl/log/absl_check.h"
#include "google/protobuf/arena.h"
#include "google/protobuf/implicit_weak_message.h"
@ -122,6 +123,32 @@ void* RepeatedPtrFieldBase::AddOutOfLineHelper(void* obj) {
return r->elements[ExchangeCurrentSize(current_size_ + 1)] = obj;
}
void* RepeatedPtrFieldBase::AddOutOfLineHelper(ElementFactory factory) {
if (tagged_rep_or_elem_ == nullptr) {
ExchangeCurrentSize(1);
tagged_rep_or_elem_ = factory(GetArena());
return tagged_rep_or_elem_;
}
if (using_sso()) {
if (ExchangeCurrentSize(1) == 0) return tagged_rep_or_elem_;
} else {
absl::PrefetchToLocalCache(rep());
}
if (PROTOBUF_PREDICT_FALSE(current_size_ == total_size_)) {
InternalExtend(1);
} else {
Rep* r = rep();
if (current_size_ != r->allocated_size) {
return r->elements[ExchangeCurrentSize(current_size_ + 1)];
}
}
Rep* r = rep();
++r->allocated_size;
void*& result = r->elements[ExchangeCurrentSize(current_size_ + 1)];
result = factory(GetArena());
return result;
}
void RepeatedPtrFieldBase::CloseGap(int start, int num) {
if (using_sso()) {
if (start == 0 && num == 1) {

@ -72,11 +72,16 @@ class RepeatedPtrOverPtrsIterator;
namespace internal {
template <typename Element>
inline void* NewT(Arena* a) {
return GenericTypeHandler<Element>::New(a);
}
// Swaps two non-overlapping blocks of memory of size `N`
template <size_t N>
inline void memswap(char* PROTOBUF_RESTRICT a, char* PROTOBUF_RESTRICT b) {
// `PROTOBUF_RESTRICT` tells compiler that blocks do not overlapping which
// allows it to genererate optimized code for swap_ranges.
// allows it to generate optimized code for swap_ranges.
std::swap_ranges(a, a + N, b);
}
@ -159,6 +164,8 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
static constexpr int kSSOCapacity = 1;
using ElementFactory = void* (*)(Arena*);
protected:
// We use the same Handler for all Message types to deduplicate generated
// code.
@ -214,8 +221,13 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
return cast<TypeHandler>(element_at(index));
}
template <typename Handler>
Value<Handler>* Add() {
return cast<Handler>(AddOutOfLineHelper(NewT<Value<Handler>>));
}
template <typename TypeHandler>
Value<TypeHandler>* Add(const Value<TypeHandler>* prototype = nullptr) {
Value<TypeHandler>* Add(const Value<TypeHandler>* prototype) {
if (current_size_ < allocated_size()) {
return cast<TypeHandler>(
element_at(ExchangeCurrentSize(current_size_ + 1)));
@ -816,6 +828,7 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
// array, including potentially resizing the array with Reserve if
// needed
void* AddOutOfLineHelper(void* obj);
void* AddOutOfLineHelper(ElementFactory factory);
// A few notes on internal representation:
//

Loading…
Cancel
Save