Optimize RepeatedPtrField constructors.

Writing zeros is faster as they are often available in the cache as well as it can be done with less instructions on common platforms.

```
BM_RepeatedPtrField_Ctor                125ns ± 3%   118ns ± 3%  -5.34%  (p=0.000 n=217+219)
```

PiperOrigin-RevId: 576126835
pull/14497/head
Protobuf Team Bot 1 year ago committed by Copybara-Service
parent 65cdac4ac5
commit c68554cc24
  1. 31
      src/google/protobuf/repeated_ptr_field.cc
  2. 102
      src/google/protobuf/repeated_ptr_field.h

@ -20,6 +20,7 @@
#include "absl/log/absl_check.h" #include "absl/log/absl_check.h"
#include "google/protobuf/arena.h" #include "google/protobuf/arena.h"
#include "google/protobuf/implicit_weak_message.h" #include "google/protobuf/implicit_weak_message.h"
#include "google/protobuf/message_lite.h"
#include "google/protobuf/port.h" #include "google/protobuf/port.h"
#include "google/protobuf/repeated_field.h" #include "google/protobuf/repeated_field.h"
@ -34,10 +35,11 @@ namespace internal {
void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) { void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) {
ABSL_DCHECK(extend_amount > 0); ABSL_DCHECK(extend_amount > 0);
constexpr size_t ptr_size = sizeof(rep()->elements[0]); constexpr size_t ptr_size = sizeof(rep()->elements[0]);
int new_capacity = total_size_ + extend_amount; int capacity = Capacity();
int new_capacity = capacity + extend_amount;
Arena* arena = GetArena(); Arena* arena = GetArena();
new_capacity = internal::CalculateReserveSize<void*, kRepHeaderSize>( new_capacity = internal::CalculateReserveSize<void*, kRepHeaderSize>(
total_size_, new_capacity); capacity, new_capacity);
ABSL_CHECK_LE( ABSL_CHECK_LE(
static_cast<int64_t>(new_capacity), static_cast<int64_t>(new_capacity),
static_cast<int64_t>( static_cast<int64_t>(
@ -45,7 +47,6 @@ void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) {
<< "Requested size is too large to fit into size_t."; << "Requested size is too large to fit into size_t.";
size_t bytes = kRepHeaderSize + ptr_size * new_capacity; size_t bytes = kRepHeaderSize + ptr_size * new_capacity;
Rep* new_rep; Rep* new_rep;
void* old_tagged_ptr = tagged_rep_or_elem_;
if (arena == nullptr) { if (arena == nullptr) {
internal::SizedPtr res = internal::AllocateAtLeast(bytes); internal::SizedPtr res = internal::AllocateAtLeast(bytes);
new_capacity = static_cast<int>((res.n - kRepHeaderSize) / ptr_size); new_capacity = static_cast<int>((res.n - kRepHeaderSize) / ptr_size);
@ -55,18 +56,17 @@ void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) {
} }
if (using_sso()) { if (using_sso()) {
new_rep->allocated_size = old_tagged_ptr != nullptr ? 1 : 0; new_rep->allocated_size = tagged_rep_or_elem_ != nullptr ? 1 : 0;
new_rep->elements[0] = old_tagged_ptr; new_rep->elements[0] = tagged_rep_or_elem_;
} else { } else {
Rep* old_rep = Rep* old_rep = rep();
reinterpret_cast<Rep*>(reinterpret_cast<uintptr_t>(old_tagged_ptr) - 1);
if (old_rep->allocated_size > 0) { if (old_rep->allocated_size > 0) {
memcpy(new_rep->elements, old_rep->elements, memcpy(new_rep->elements, old_rep->elements,
old_rep->allocated_size * ptr_size); old_rep->allocated_size * ptr_size);
} }
new_rep->allocated_size = old_rep->allocated_size; new_rep->allocated_size = old_rep->allocated_size;
size_t old_size = total_size_ * ptr_size + kRepHeaderSize; size_t old_size = capacity * ptr_size + kRepHeaderSize;
if (arena == nullptr) { if (arena == nullptr) {
internal::SizedDelete(old_rep, old_size); internal::SizedDelete(old_rep, old_size);
} else { } else {
@ -76,13 +76,14 @@ void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) {
tagged_rep_or_elem_ = tagged_rep_or_elem_ =
reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(new_rep) + 1); reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(new_rep) + 1);
total_size_ = new_capacity; capacity_proxy_ = new_capacity - kSSOCapacity;
return &new_rep->elements[current_size_]; return &new_rep->elements[current_size_];
} }
void RepeatedPtrFieldBase::Reserve(int capacity) { void RepeatedPtrFieldBase::Reserve(int capacity) {
if (capacity > total_size_) { int delta = capacity - Capacity();
InternalExtend(capacity - total_size_); if (delta > 0) {
InternalExtend(delta);
} }
} }
@ -98,7 +99,7 @@ void RepeatedPtrFieldBase::DestroyProtos() {
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
delete static_cast<MessageLite*>(elements[i]); delete static_cast<MessageLite*>(elements[i]);
} }
const size_t size = total_size_ * sizeof(elements[0]) + kRepHeaderSize; const size_t size = Capacity() * sizeof(elements[0]) + kRepHeaderSize;
internal::SizedDelete(r, size); internal::SizedDelete(r, size);
} }
@ -115,7 +116,9 @@ void* RepeatedPtrFieldBase::AddOutOfLineHelper(void* obj) {
ExchangeCurrentSize(1); ExchangeCurrentSize(1);
return tagged_rep_or_elem_ = obj; return tagged_rep_or_elem_ = obj;
} }
if (using_sso() || rep()->allocated_size == total_size_) { // Not using `AllocatedSizeAtCapacity` because it's already known that
// `tagged_rep_or_elem_ != nullptr`.
if (using_sso() || rep()->allocated_size >= Capacity()) {
InternalExtend(1); // Equivalent to "Reserve(total_size_ + 1)" InternalExtend(1); // Equivalent to "Reserve(total_size_ + 1)"
} }
Rep* r = rep(); Rep* r = rep();
@ -134,7 +137,7 @@ void* RepeatedPtrFieldBase::AddOutOfLineHelper(ElementFactory factory) {
} else { } else {
absl::PrefetchToLocalCache(rep()); absl::PrefetchToLocalCache(rep());
} }
if (PROTOBUF_PREDICT_FALSE(current_size_ == total_size_)) { if (PROTOBUF_PREDICT_FALSE(SizeAtCapacity())) {
InternalExtend(1); InternalExtend(1);
} else { } else {
Rep* r = rep(); Rep* r = rep();

@ -177,12 +177,12 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
constexpr RepeatedPtrFieldBase() constexpr RepeatedPtrFieldBase()
: tagged_rep_or_elem_(nullptr), : tagged_rep_or_elem_(nullptr),
current_size_(0), current_size_(0),
total_size_(kSSOCapacity), capacity_proxy_(0),
arena_(nullptr) {} arena_(nullptr) {}
explicit RepeatedPtrFieldBase(Arena* arena) explicit RepeatedPtrFieldBase(Arena* arena)
: tagged_rep_or_elem_(nullptr), : tagged_rep_or_elem_(nullptr),
current_size_(0), current_size_(0),
total_size_(kSSOCapacity), capacity_proxy_(0),
arena_(arena) {} arena_(arena) {}
RepeatedPtrFieldBase(const RepeatedPtrFieldBase&) = delete; RepeatedPtrFieldBase(const RepeatedPtrFieldBase&) = delete;
@ -198,7 +198,13 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
bool empty() const { return current_size_ == 0; } bool empty() const { return current_size_ == 0; }
int size() const { return current_size_; } int size() const { return current_size_; }
int Capacity() const { return total_size_; } // Returns the size of the buffer with pointers to elements.
//
// Note:
//
// * prefer `SizeAtCapacity()` to `size() == Capacity()`;
// * prefer `AllocatedSizeAtCapacity()` to `allocated_size() == Capacity()`.
int Capacity() const { return capacity_proxy_ + kSSOCapacity; }
template <typename TypeHandler> template <typename TypeHandler>
const Value<TypeHandler>& at(int index) const { const Value<TypeHandler>& at(int index) const {
@ -271,7 +277,7 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
} }
if (!using_sso()) { if (!using_sso()) {
internal::SizedDelete(rep(), internal::SizedDelete(rep(),
total_size_ * sizeof(elems[0]) + kRepHeaderSize); Capacity() * sizeof(elems[0]) + kRepHeaderSize);
} }
} }
@ -417,7 +423,7 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
size_t allocated_bytes = size_t allocated_bytes =
using_sso() using_sso()
? 0 ? 0
: static_cast<size_t>(total_size_) * sizeof(void*) + kRepHeaderSize; : static_cast<size_t>(Capacity()) * sizeof(void*) + kRepHeaderSize;
const int n = allocated_size(); const int n = allocated_size();
void* const* elems = elements(); void* const* elems = elements();
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
@ -451,11 +457,11 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
void UnsafeArenaAddAllocated(Value<TypeHandler>* value) { void UnsafeArenaAddAllocated(Value<TypeHandler>* value) {
ABSL_DCHECK_NE(value, nullptr); ABSL_DCHECK_NE(value, nullptr);
// Make room for the new pointer. // Make room for the new pointer.
if (current_size_ == total_size_) { if (SizeAtCapacity()) {
// The array is completely full with no cleared objects, so grow it. // The array is completely full with no cleared objects, so grow it.
Reserve(total_size_ + 1); InternalExtend(1);
++rep()->allocated_size; ++rep()->allocated_size;
} else if (allocated_size() == total_size_) { } else if (AllocatedSizeAtCapacity()) {
// There is no more space in the pointer array because it contains some // There is no more space in the pointer array because it contains some
// cleared objects awaiting reuse. We don't want to grow the array in // cleared objects awaiting reuse. We don't want to grow the array in
// this case because otherwise a loop calling AddAllocated() followed by // this case because otherwise a loop calling AddAllocated() followed by
@ -539,41 +545,41 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
void AddAllocatedInternal(Value<TypeHandler>* value, std::true_type) { void AddAllocatedInternal(Value<TypeHandler>* value, std::true_type) {
Arena* element_arena = TypeHandler::GetArena(value); Arena* element_arena = TypeHandler::GetArena(value);
Arena* arena = GetArena(); Arena* arena = GetArena();
if (arena == element_arena && allocated_size() < total_size_) { if (arena != element_arena || AllocatedSizeAtCapacity()) {
// Fast path: underlying arena representation (tagged pointer) is equal to
// our arena pointer, and we can add to array without resizing it (at
// least one slot that is not allocated).
void** elems = elements();
if (current_size_ < allocated_size()) {
// Make space at [current] by moving first allocated element to end of
// allocated list.
elems[allocated_size()] = elems[current_size_];
}
elems[ExchangeCurrentSize(current_size_ + 1)] = value;
if (!using_sso()) ++rep()->allocated_size;
} else {
AddAllocatedSlowWithCopy<TypeHandler>(value, element_arena, arena); AddAllocatedSlowWithCopy<TypeHandler>(value, element_arena, arena);
return;
}
// Fast path: underlying arena representation (tagged pointer) is equal to
// our arena pointer, and we can add to array without resizing it (at
// least one slot that is not allocated).
void** elems = elements();
if (current_size_ < allocated_size()) {
// Make space at [current] by moving first allocated element to end of
// allocated list.
elems[allocated_size()] = elems[current_size_];
} }
elems[ExchangeCurrentSize(current_size_ + 1)] = value;
if (!using_sso()) ++rep()->allocated_size;
} }
// AddAllocated version that does not implement arena-safe copying behavior. // AddAllocated version that does not implement arena-safe copying behavior.
template <typename TypeHandler> template <typename TypeHandler>
void AddAllocatedInternal(Value<TypeHandler>* value, std::false_type) { void AddAllocatedInternal(Value<TypeHandler>* value, std::false_type) {
if (allocated_size() < total_size_) { if (AllocatedSizeAtCapacity()) {
// Fast path: underlying arena representation (tagged pointer) is equal to
// our arena pointer, and we can add to array without resizing it (at
// least one slot that is not allocated).
void** elems = elements();
if (current_size_ < allocated_size()) {
// Make space at [current] by moving first allocated element to end of
// allocated list.
elems[allocated_size()] = elems[current_size_];
}
elems[ExchangeCurrentSize(current_size_ + 1)] = value;
if (!using_sso()) ++rep()->allocated_size;
} else {
UnsafeArenaAddAllocated<TypeHandler>(value); UnsafeArenaAddAllocated<TypeHandler>(value);
return;
} }
// Fast path: underlying arena representation (tagged pointer) is equal to
// our arena pointer, and we can add to array without resizing it (at
// least one slot that is not allocated).
void** elems = elements();
if (current_size_ < allocated_size()) {
// Make space at [current] by moving first allocated element to end of
// allocated list.
elems[allocated_size()] = elems[current_size_];
}
elems[ExchangeCurrentSize(current_size_ + 1)] = value;
if (!using_sso()) ++rep()->allocated_size;
} }
// Slowpath handles all cases, copying if necessary. // Slowpath handles all cases, copying if necessary.
@ -715,6 +721,25 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
inline int ExchangeCurrentSize(int new_size) { inline int ExchangeCurrentSize(int new_size) {
return std::exchange(current_size_, new_size); return std::exchange(current_size_, new_size);
} }
inline bool SizeAtCapacity() const {
// Harden invariant size() <= allocated_size() <= Capacity().
ABSL_DCHECK_LE(size(), allocated_size());
ABSL_DCHECK_LE(allocated_size(), Capacity());
// This is equivalent to `current_size_ == Capacity()`.
// Assuming `Capacity()` function is inlined, compiler is likely to optimize
// away "+ kSSOCapacity" and reduce it to "current_size_ > capacity_proxy_"
// which is an instruction less than "current_size_ == capacity_proxy_ + 1".
return current_size_ >= Capacity();
}
inline bool AllocatedSizeAtCapacity() const {
// Harden invariant size() <= allocated_size() <= Capacity().
ABSL_DCHECK_LE(size(), allocated_size());
ABSL_DCHECK_LE(allocated_size(), Capacity());
// This combines optimization mentioned in `SizeAtCapacity()` and simplifies
// `allocated_size()` in sso case.
return using_sso() ? (tagged_rep_or_elem_ != nullptr)
: rep()->allocated_size >= Capacity();
}
void* const* elements() const { void* const* elements() const {
return using_sso() ? &tagged_rep_or_elem_ : +rep()->elements; return using_sso() ? &tagged_rep_or_elem_ : +rep()->elements;
@ -800,8 +825,7 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
// Ensures that capacity is big enough to store one more allocated element. // Ensures that capacity is big enough to store one more allocated element.
inline void MaybeExtend() { inline void MaybeExtend() {
if (using_sso() ? (tagged_rep_or_elem_ != nullptr) if (AllocatedSizeAtCapacity()) {
: (rep()->allocated_size == total_size_)) {
ABSL_DCHECK_EQ(allocated_size(), Capacity()); ABSL_DCHECK_EQ(allocated_size(), Capacity());
InternalExtend(1); InternalExtend(1);
} else { } else {
@ -812,11 +836,11 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
// Ensures that capacity is at least `n` elements. // Ensures that capacity is at least `n` elements.
// Returns a pointer to the element directly beyond the last element. // Returns a pointer to the element directly beyond the last element.
inline void** InternalReserve(int n) { inline void** InternalReserve(int n) {
if (n <= total_size_) { if (n <= Capacity()) {
void** elements = using_sso() ? &tagged_rep_or_elem_ : rep()->elements; void** elements = using_sso() ? &tagged_rep_or_elem_ : rep()->elements;
return elements + current_size_; return elements + current_size_;
} }
return InternalExtend(n - total_size_); return InternalExtend(n - Capacity());
} }
// Internal helper for Add: adds "obj" as the next element in the // Internal helper for Add: adds "obj" as the next element in the
@ -838,7 +862,7 @@ class PROTOBUF_EXPORT RepeatedPtrFieldBase {
// significant performance for memory-sensitive workloads. // significant performance for memory-sensitive workloads.
void* tagged_rep_or_elem_; void* tagged_rep_or_elem_;
int current_size_; int current_size_;
int total_size_; int capacity_proxy_; // we store `capacity - kSSOCapacity` as an optimization
Arena* arena_; Arena* arena_;
}; };

Loading…
Cancel
Save