Change RepeatedField to create only 'current' elements, not 'total' elements.

Currently RepeatedField allocates and constructs all elements on 'Reserve'. This creates inefficiencies, but more problematically, leads to sanitizer issues once we have instances that actively poison their private contents such as Cord. This change fixes RepeatedField to only create actually 'live' elements.

PiperOrigin-RevId: 504121827
pull/11593/head
Martijn Vels 2 years ago committed by Copybara-Service
parent ffa9b503bf
commit 9ca411a46b
  1. 16
      src/google/protobuf/generated_message_tctable_lite.cc
  2. 9
      src/google/protobuf/port_def.inc
  3. 1
      src/google/protobuf/port_undef.inc
  4. 48
      src/google/protobuf/repeated_field.cc
  5. 512
      src/google/protobuf/repeated_field.h
  6. 162
      src/google/protobuf/repeated_field_unittest.cc

@ -602,19 +602,11 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
} }
} }
auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset()); auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
int idx = field.size(); const auto tag = UnalignedLoad<TagType>(ptr);
auto elem = field.Add();
int space = field.Capacity() - idx;
idx = 0;
const auto expected_tag = UnalignedLoad<TagType>(ptr);
do { do {
ptr += sizeof(TagType); field.Add(UnalignedLoad<LayoutType>(ptr + sizeof(TagType)));
elem[idx++] = UnalignedLoad<LayoutType>(ptr); ptr += sizeof(TagType) + sizeof(LayoutType);
ptr += sizeof(LayoutType); } while (ctx->DataAvailable(ptr) && UnalignedLoad<TagType>(ptr) == tag);
if (idx >= space) break;
if (!ctx->DataAvailable(ptr)) break;
} while (UnalignedLoad<TagType>(ptr) == expected_tag);
field.AddNAlreadyReserved(idx - 1);
return ToParseLoop(PROTOBUF_TC_PARAM_PASS); return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
} }

@ -547,6 +547,15 @@ static_assert(PROTOBUF_CPLUSPLUS_MIN(201402L), "Protobuf only supports C++14 and
#define PROTOBUF_NODISCARD #define PROTOBUF_NODISCARD
#endif #endif
#ifdef PROTOBUF_RESTRICT
#error PROTOBUF_RESTRICT was previously defined
#endif
#if defined( __clang__) || defined(__GNUC__)
#define PROTOBUF_RESTRICT __restrict
#else
#define PROTOBUF_RESTRICT
#endif
#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE #ifdef PROTOBUF_FORCE_COPY_IN_RELEASE
#error PROTOBUF_FORCE_COPY_IN_RELEASE was previously defined #error PROTOBUF_FORCE_COPY_IN_RELEASE was previously defined
#endif #endif

@ -73,6 +73,7 @@
#undef PROTOBUF_EXPORT #undef PROTOBUF_EXPORT
#undef PROTOC_EXPORT #undef PROTOC_EXPORT
#undef PROTOBUF_NODISCARD #undef PROTOBUF_NODISCARD
#undef PROTOBUF_RESTRICT
#undef PROTOBUF_FORCE_COPY_IN_RELEASE #undef PROTOBUF_FORCE_COPY_IN_RELEASE
#undef PROTOBUF_FORCE_COPY_IN_SWAP #undef PROTOBUF_FORCE_COPY_IN_SWAP
#undef PROTOBUF_FORCE_COPY_IN_MOVE #undef PROTOBUF_FORCE_COPY_IN_MOVE

@ -48,14 +48,6 @@ namespace google {
namespace protobuf { namespace protobuf {
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Clear() {
for (int i = 0; i < current_size_; i++) {
Mutable(i)->Clear();
}
ExchangeCurrentSize(0);
}
template <> template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE size_t PROTOBUF_EXPORT_TEMPLATE_DEFINE size_t
RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const { RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const {
@ -67,46 +59,6 @@ RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const {
return result; return result;
} }
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Truncate(
int new_size) {
GOOGLE_ABSL_DCHECK_LE(new_size, current_size_);
while (current_size_ > new_size) {
RemoveLast();
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Resize(
int new_size, const absl::Cord& value) {
GOOGLE_ABSL_DCHECK_GE(new_size, 0);
if (new_size > current_size_) {
Reserve(new_size);
std::fill(&rep()->elements()[ExchangeCurrentSize(new_size)],
&rep()->elements()[new_size], value);
} else {
while (current_size_ > new_size) {
RemoveLast();
}
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::MoveArray(
absl::Cord* to, absl::Cord* from, int size) {
for (int i = 0; i < size; i++) {
swap(to[i], from[i]);
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::CopyArray(
absl::Cord* to, const absl::Cord* from, int size) {
for (int i = 0; i < size; i++) {
to[i] = from[i];
}
}
} // namespace protobuf } // namespace protobuf
} // namespace google } // namespace google

@ -47,13 +47,16 @@
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
#include <limits> #include <limits>
#include <memory>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include "google/protobuf/arena.h" #include "google/protobuf/arena.h"
#include "google/protobuf/port.h" #include "google/protobuf/port.h"
#include "absl/base/dynamic_annotations.h"
#include "google/protobuf/stubs/logging.h" #include "google/protobuf/stubs/logging.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord.h" #include "absl/strings/cord.h"
#include "google/protobuf/generated_enum_util.h" #include "google/protobuf/generated_enum_util.h"
#include "google/protobuf/message_lite.h" #include "google/protobuf/message_lite.h"
@ -170,7 +173,7 @@ class RepeatedField final {
constexpr RepeatedField(); constexpr RepeatedField();
explicit RepeatedField(Arena* arena); explicit RepeatedField(Arena* arena);
RepeatedField(const RepeatedField& other); RepeatedField(const RepeatedField& rhs);
template <typename Iter, template <typename Iter,
typename = typename std::enable_if<std::is_constructible< typename = typename std::enable_if<std::is_constructible<
@ -197,7 +200,8 @@ class RepeatedField final {
Element& at(int index); Element& at(int index);
void Set(int index, const Element& value); void Set(int index, const Element& value);
void Add(const Element& value); void Add(Element value);
// Appends a new element and returns a pointer to it. // Appends a new element and returns a pointer to it.
// The new element is uninitialized if |Element| is a POD type. // The new element is uninitialized if |Element| is a POD type.
Element* Add(); Element* Add();
@ -231,14 +235,14 @@ class RepeatedField final {
// Except for RepeatedField<Cord>, for which it is O(size-new_size). // Except for RepeatedField<Cord>, for which it is O(size-new_size).
void Truncate(int new_size); void Truncate(int new_size);
void AddAlreadyReserved(const Element& value); void AddAlreadyReserved(Element value);
// Appends a new element and return a pointer to it.
// The new element is uninitialized if |Element| is a POD type.
// Should be called only if Capacity() > Size().
Element* AddAlreadyReserved();
Element* AddNAlreadyReserved(int elements);
int Capacity() const; int Capacity() const;
// Adds `n` elements to this instance asserting there is enough capacity.
// The added elements are uninitialized if `Element` is trivial.
Element* AddAlreadyReserved();
Element* AddNAlreadyReserved(int n);
// Like STL resize. Uses value to fill appended elements. // Like STL resize. Uses value to fill appended elements.
// Like Truncate() if new_size <= size(), otherwise this is // Like Truncate() if new_size <= size(), otherwise this is
// O(new_size - size()). // O(new_size - size()).
@ -319,7 +323,6 @@ class RepeatedField final {
// This is public due to it being called by generated code. // This is public due to it being called by generated code.
inline void InternalSwap(RepeatedField* other); inline void InternalSwap(RepeatedField* other);
private: private:
template <typename T> friend class Arena::InternalHelper; template <typename T> friend class Arena::InternalHelper;
@ -335,6 +338,38 @@ class RepeatedField final {
// GOOGLE_ABSL_DCHECK (see API docs for details). // GOOGLE_ABSL_DCHECK (see API docs for details).
void UnsafeArenaSwap(RepeatedField* other); void UnsafeArenaSwap(RepeatedField* other);
// Copy constructs `n` instances in place into the array `dst`.
// This function is identical to `std::uninitialized_copy_n(src, n, dst)`
// except that we explicit declare the memory to not be aliased, which will
// result in `memcpy` code generation instead of `memmove` for trivial types.
static inline void UninitializedCopyN(const Element* PROTOBUF_RESTRICT src,
int n, Element* PROTOBUF_RESTRICT dst) {
std::uninitialized_copy_n(src, n, dst);
}
// Copy constructs `[begin, end)` instances in place into the array `dst`.
// See above `UninitializedCopyN()` function comments for more information.
template <typename Iter>
static inline void UninitializedCopy(Iter begin, Iter end,
Element* PROTOBUF_RESTRICT dst) {
std::uninitialized_copy(begin, end, dst);
}
template <typename Iter>
void AddForwardIterator(Iter begin, Iter end);
template <typename Iter>
void AddInputIterator(Iter begin, Iter end);
// Reserves space to expand the field to at least the given size.
// If the array is grown, it will always be at least doubled in size.
// If `annotate_size` is true (the default), then this function will annotate
// the old container from `current_size` to `total_size_` (unpoison memory)
// directly before it is being released, and annotate the new container from
// `total_size_` to `current_size` (poison unused memory).
void Grow(int current_size, int new_size);
void GrowNoAnnotate(int current_size, int new_size);
static constexpr int kInitialSize = 0; static constexpr int kInitialSize = 0;
// A note on the representation here (see also comment below for // A note on the representation here (see also comment below for
// RepeatedPtrFieldBase's struct Rep): // RepeatedPtrFieldBase's struct Rep):
@ -349,11 +384,30 @@ class RepeatedField final {
int current_size_; int current_size_;
int total_size_; int total_size_;
// Annotates a change in size of this instance. This function should be called
// with (total_size, current_size) after new memory has been allocated and
// filled from previous memory), and called with (current_size, total_size)
// right before (previously annotated) memory is released.
void AnnotateSize(int old_size, int new_size) const {
if (old_size != new_size) {
ABSL_ANNOTATE_CONTIGUOUS_CONTAINER(
unsafe_elements(), unsafe_elements() + total_size_,
unsafe_elements() + old_size, unsafe_elements() + new_size);
if (new_size < old_size) {
ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(
unsafe_elements() + new_size,
(old_size - new_size) * sizeof(Element));
}
}
}
// Replaces current_size_ with new_size and returns the previous value of // Replaces current_size_ with new_size and returns the previous value of
// current_size_. This function is intended to be the only place where // current_size_. This function is intended to be the only place where
// current_size_ is modified. // current_size_ is modified, with the exception of `AddInputIterator()`
// where the size of added items is not known in advance.
inline int ExchangeCurrentSize(int new_size) { inline int ExchangeCurrentSize(int new_size) {
const int prev_size = current_size_; const int prev_size = current_size_;
AnnotateSize(prev_size, new_size);
current_size_ = new_size; current_size_ = new_size;
return prev_size; return prev_size;
} }
@ -370,7 +424,6 @@ class RepeatedField final {
} }
}; };
// If total_size_ == 0 this points to an Arena otherwise it points to the // If total_size_ == 0 this points to an Arena otherwise it points to the
// elements member of a Rep struct. Using this invariant allows the storage of // elements member of a Rep struct. Using this invariant allows the storage of
// the arena pointer without an extra allocation in the constructor. // the arena pointer without an extra allocation in the constructor.
@ -401,144 +454,28 @@ class RepeatedField final {
friend class Arena; friend class Arena;
typedef void InternalArenaConstructable_; typedef void InternalArenaConstructable_;
// Moves the contents of |from| into |to|, possibly clobbering |from| in the // Destroys all elements in [begin, end).
// process. For primitive types this is just a memcpy(), but it could be // This function does nothing if `Element` is trivial.
// specialized for non-primitive types to, say, swap each element instead. static void Destroy(const Element* begin, const Element* end) {
// In fact, we do exactly that for Cords. if (!std::is_trivial<Element>::value) {
void MoveArray(Element* to, Element* from, int size); std::for_each(begin, end, [&](const Element& e) { e.~Element(); });
// Copies the elements of |from| into |to|.
void CopyArray(Element* to, const Element* from, int size);
// Internal helper to delete all elements and deallocate the storage.
void InternalDeallocate(Rep* rep, int size, bool in_destructor) {
if (rep != nullptr) {
Element* e = &rep->elements()[0];
if (!std::is_trivial<Element>::value) {
Element* limit = &rep->elements()[size];
for (; e < limit; e++) {
e->~Element();
}
}
const size_t bytes = size * sizeof(*e) + kRepHeaderSize;
if (rep->arena == nullptr) {
internal::SizedDelete(rep, bytes);
} else if (!in_destructor) {
// If we are in the destructor, we might be being destroyed as part of
// the arena teardown. We can't try and return blocks to the arena then.
rep->arena->ReturnArrayMemory(rep, bytes);
}
} }
} }
// This class is a performance wrapper around RepeatedField::Add(const T&) // Internal helper to delete all elements and deallocate the storage.
// function. In general unless a RepeatedField is a local stack variable LLVM template <bool in_destructor = false>
// has a hard time optimizing Add. The machine code tends to be void InternalDeallocate() {
// loop: const size_t bytes = total_size_ * sizeof(Element) + kRepHeaderSize;
// mov %size, dword ptr [%repeated_field] // load if (rep()->arena == nullptr) {
// cmp %size, dword ptr [%repeated_field + 4] internal::SizedDelete(rep(), bytes);
// jae fallback } else if (!in_destructor) {
// mov %buffer, qword ptr [%repeated_field + 8] // If we are in the destructor, we might be being destroyed as part of
// mov dword [%buffer + %size * 4], %value // the arena teardown. We can't try and return blocks to the arena then.
// inc %size // increment rep()->arena->ReturnArrayMemory(rep(), bytes);
// mov dword ptr [%repeated_field], %size // store
// jmp loop
//
// This puts a load/store in each iteration of the important loop variable
// size. It's a pretty bad compile that happens even in simple cases, but
// largely the presence of the fallback path disturbs the compilers mem-to-reg
// analysis.
//
// This class takes ownership of a repeated field for the duration of its
// lifetime. The repeated field should not be accessed during this time, ie.
// only access through this class is allowed. This class should always be a
// function local stack variable. Intended use
//
// void AddSequence(const int* begin, const int* end, RepeatedField<int>* out)
// {
// RepeatedFieldAdder<int> adder(out); // Take ownership of out
// for (auto it = begin; it != end; ++it) {
// adder.Add(*it);
// }
// }
//
// Typically, due to the fact that adder is a local stack variable, the
// compiler will be successful in mem-to-reg transformation and the machine
// code will be
// loop:
// cmp %size, %capacity
// jae fallback
// mov dword ptr [%buffer + %size * 4], %val
// inc %size
// jmp loop
//
// The first version executes at 7 cycles per iteration while the second
// version executes at only 1 or 2 cycles.
template <int = 0, bool = std::is_trivial<Element>::value>
class FastAdderImpl {
public:
explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) {
index_ = repeated_field_->current_size_;
capacity_ = repeated_field_->total_size_;
buffer_ = repeated_field_->unsafe_elements();
}
FastAdderImpl(const FastAdderImpl&) = delete;
FastAdderImpl& operator=(const FastAdderImpl&) = delete;
~FastAdderImpl() {
repeated_field_->current_size_ = index_;
}
void Add(Element val) {
if (index_ == capacity_) {
repeated_field_->current_size_ = index_;
repeated_field_->Reserve(index_ + 1);
capacity_ = repeated_field_->total_size_;
buffer_ = repeated_field_->unsafe_elements();
}
buffer_[index_++] = val;
} }
}
private:
RepeatedField* repeated_field_;
int index_;
int capacity_;
Element* buffer_;
};
// FastAdder is a wrapper for adding fields. The specialization above handles
// POD types more efficiently than RepeatedField.
template <int I>
class FastAdderImpl<I, false> {
public:
explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) {}
FastAdderImpl(const FastAdderImpl&) = delete;
FastAdderImpl& operator=(const FastAdderImpl&) = delete;
void Add(const Element& val) { repeated_field_->Add(val); }
private:
RepeatedField* repeated_field_;
};
using FastAdder = FastAdderImpl<>;
friend class TestRepeatedFieldHelper;
friend class ::google::protobuf::internal::ParseContext;
};
namespace internal {
// This is a helper template to copy an array of elements efficiently when they
// have a trivial copy constructor, and correctly otherwise. This really
// shouldn't be necessary, but our compiler doesn't optimize std::copy very
// effectively.
template <typename Element,
bool HasTrivialCopy = std::is_trivial<Element>::value>
struct ElementCopier {
void operator()(Element* to, const Element* from, int array_size);
}; };
} // namespace internal
// implementation ==================================================== // implementation ====================================================
template <typename Element> template <typename Element>
@ -554,13 +491,13 @@ inline RepeatedField<Element>::RepeatedField(Arena* arena)
} }
template <typename Element> template <typename Element>
inline RepeatedField<Element>::RepeatedField(const RepeatedField& other) inline RepeatedField<Element>::RepeatedField(const RepeatedField& rhs)
: current_size_(0), total_size_(0), arena_or_elements_(nullptr) { : current_size_(0), total_size_(0), arena_or_elements_(nullptr) {
StaticValidityCheck(); StaticValidityCheck();
if (other.current_size_ != 0) { if (size_t size = rhs.current_size_) {
Reserve(other.size()); Grow(0, size);
AddNAlreadyReserved(other.size()); ExchangeCurrentSize(size);
CopyArray(Mutable(0), &other.Get(0), other.size()); UninitializedCopyN(rhs.elements(), size, unsafe_elements());
} }
} }
@ -574,8 +511,6 @@ RepeatedField<Element>::RepeatedField(Iter begin, Iter end)
template <typename Element> template <typename Element>
RepeatedField<Element>::~RepeatedField() { RepeatedField<Element>::~RepeatedField() {
// Fail-safe in case we miss calling this in a constructor. Note: this one
// won't trigger for leaked maps that never get destructed.
StaticValidityCheck(); StaticValidityCheck();
#ifndef NDEBUG #ifndef NDEBUG
// Try to trigger segfault / asan failure in non-opt builds if arena_ // Try to trigger segfault / asan failure in non-opt builds if arena_
@ -584,7 +519,8 @@ RepeatedField<Element>::~RepeatedField() {
if (arena) (void)arena->SpaceAllocated(); if (arena) (void)arena->SpaceAllocated();
#endif #endif
if (total_size_ > 0) { if (total_size_ > 0) {
InternalDeallocate(rep(), total_size_, true); Destroy(unsafe_elements(), unsafe_elements() + current_size_);
InternalDeallocate<true>();
} }
} }
@ -647,36 +583,41 @@ inline int RepeatedField<Element>::Capacity() const {
} }
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::AddAlreadyReserved(const Element& value) { inline void RepeatedField<Element>::AddAlreadyReserved(Element value) {
GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_); GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_);
elements()[ExchangeCurrentSize(current_size_ + 1)] = value; void* p = elements() + ExchangeCurrentSize(current_size_ + 1);
::new (p) Element(std::move(value));
} }
template <typename Element> template <typename Element>
inline Element* RepeatedField<Element>::AddAlreadyReserved() { inline Element* RepeatedField<Element>::AddAlreadyReserved() {
GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_); GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_);
return &elements()[ExchangeCurrentSize(current_size_ + 1)]; // new (p) <TrivialType> compiles into nothing: this is intentional as this
// function is documented to return uninitialized data for trivial types.
void* p = elements() + ExchangeCurrentSize(current_size_ + 1);
return ::new (p) Element;
} }
template <typename Element> template <typename Element>
inline Element* RepeatedField<Element>::AddNAlreadyReserved(int elements) { inline Element* RepeatedField<Element>::AddNAlreadyReserved(int n) {
GOOGLE_ABSL_DCHECK_GE(total_size_ - current_size_, elements) GOOGLE_ABSL_DCHECK_GE(total_size_ - current_size_, n)
<< total_size_ << ", " << current_size_; << total_size_ << ", " << current_size_;
// Warning: sometimes people call this when elements == 0 and Element* p = unsafe_elements() + ExchangeCurrentSize(current_size_ + n);
// total_size_ == 0. In this case the return pointer points to a zero size for (Element *begin = p, *end = p + n; begin != end; ++begin) {
// array (n == 0). Hence we can just use unsafe_elements(), because the user new (static_cast<void*>(begin)) Element;
// cannot dereference the pointer anyway. }
return unsafe_elements() + ExchangeCurrentSize(current_size_ + elements); return p;
} }
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::Resize(int new_size, const Element& value) { inline void RepeatedField<Element>::Resize(int new_size, const Element& value) {
GOOGLE_ABSL_DCHECK_GE(new_size, 0); GOOGLE_ABSL_DCHECK_GE(new_size, 0);
if (new_size > current_size_) { if (new_size > current_size_) {
Reserve(new_size); if (new_size > total_size_) Grow(current_size_, new_size);
std::fill(&elements()[ExchangeCurrentSize(new_size)], &elements()[new_size], Element* first = elements() + ExchangeCurrentSize(new_size);
value); std::uninitialized_fill(first, elements() + current_size_, value);
} else { } else if (new_size < current_size_) {
Destroy(unsafe_elements() + new_size, unsafe_elements() + current_size_);
ExchangeCurrentSize(new_size); ExchangeCurrentSize(new_size);
} }
} }
@ -717,22 +658,73 @@ inline void RepeatedField<Element>::Set(int index, const Element& value) {
} }
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::Add(const Element& value) { inline void RepeatedField<Element>::Add(Element value) {
if (current_size_ == total_size_) { int total_size = total_size_;
// value could reference an element of the array. Reserving new space will Element* elem = unsafe_elements();
// invalidate the reference. So we must make a copy first. if (ABSL_PREDICT_FALSE(current_size_ == total_size)) {
auto tmp = value; Grow(current_size_, current_size_ + 1);
Reserve(total_size_ + 1); total_size = total_size_;
elements()[ExchangeCurrentSize(current_size_ + 1)] = std::move(tmp); elem = unsafe_elements();
} else {
elements()[ExchangeCurrentSize(current_size_ + 1)] = value;
} }
int new_size = current_size_ + 1;
void* p = elem + ExchangeCurrentSize(new_size);
::new (p) Element(std::move(value));
// The below helps the compiler optimize dense loops.
ABSL_ASSUME(new_size == current_size_);
ABSL_ASSUME(elem == arena_or_elements_);
ABSL_ASSUME(total_size == total_size_);
} }
template <typename Element> template <typename Element>
inline Element* RepeatedField<Element>::Add() { inline Element* RepeatedField<Element>::Add() {
if (current_size_ == total_size_) Reserve(total_size_ + 1); if (ABSL_PREDICT_FALSE(current_size_ == total_size_)) {
return &elements()[ExchangeCurrentSize(current_size_ + 1)]; Grow(current_size_, current_size_ + 1);
}
void* p = unsafe_elements() + ExchangeCurrentSize(current_size_ + 1);
return ::new (p) Element;
}
template <typename Element>
template <typename Iter>
inline void RepeatedField<Element>::AddForwardIterator(Iter begin, Iter end) {
int total_size = total_size_;
Element* elem = unsafe_elements();
int new_size = current_size_ + static_cast<int>(std::distance(begin, end));
if (ABSL_PREDICT_FALSE(new_size > total_size)) {
Grow(current_size_, new_size);
elem = unsafe_elements();
total_size = total_size_;
}
UninitializedCopy(begin, end, elem + ExchangeCurrentSize(new_size));
// The below helps the compiler optimize dense loops.
ABSL_ASSUME(new_size == current_size_);
ABSL_ASSUME(elem == arena_or_elements_);
ABSL_ASSUME(total_size == total_size_);
}
template <typename Element>
template <typename Iter>
inline void RepeatedField<Element>::AddInputIterator(Iter begin, Iter end) {
Element* first = unsafe_elements() + current_size_;
Element* last = unsafe_elements() + total_size_;
AnnotateSize(current_size_, total_size_);
while (begin != end) {
if (ABSL_PREDICT_FALSE(first == last)) {
int current_size = first - unsafe_elements();
GrowNoAnnotate(current_size, current_size + 1);
first = unsafe_elements() + current_size;
last = unsafe_elements() + total_size_;
}
::new (static_cast<void*>(first)) Element(*begin);
++begin;
++first;
}
current_size_ = first - unsafe_elements();
AnnotateSize(total_size_, current_size_);
} }
template <typename Element> template <typename Element>
@ -741,27 +733,16 @@ inline void RepeatedField<Element>::Add(Iter begin, Iter end) {
if (std::is_base_of< if (std::is_base_of<
std::forward_iterator_tag, std::forward_iterator_tag,
typename std::iterator_traits<Iter>::iterator_category>::value) { typename std::iterator_traits<Iter>::iterator_category>::value) {
int additional = static_cast<int>(std::distance(begin, end)); AddForwardIterator(begin, end);
if (additional == 0) return;
int new_size = current_size_ + additional;
Reserve(new_size);
// TODO(ckennelly): The compiler loses track of the buffer freshly
// allocated by Reserve() by the time we call elements, so it cannot
// guarantee that elements does not alias [begin(), end()).
//
// If restrict is available, annotating the pointer obtained from elements()
// causes this to lower to memcpy instead of memmove.
std::copy(begin, end, elements() + ExchangeCurrentSize(new_size));
} else { } else {
FastAdder fast_adder(this); AddInputIterator(begin, end);
for (; begin != end; ++begin) fast_adder.Add(*begin);
} }
} }
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::RemoveLast() { inline void RepeatedField<Element>::RemoveLast() {
GOOGLE_ABSL_DCHECK_GT(current_size_, 0); GOOGLE_ABSL_DCHECK_GT(current_size_, 0);
elements()[current_size_ - 1].~Element();
ExchangeCurrentSize(current_size_ - 1); ExchangeCurrentSize(current_size_ - 1);
} }
@ -787,17 +768,17 @@ void RepeatedField<Element>::ExtractSubrange(int start, int num,
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::Clear() { inline void RepeatedField<Element>::Clear() {
Destroy(unsafe_elements(), unsafe_elements() + current_size_);
ExchangeCurrentSize(0); ExchangeCurrentSize(0);
} }
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::MergeFrom(const RepeatedField& other) { inline void RepeatedField<Element>::MergeFrom(const RepeatedField& rhs) {
GOOGLE_ABSL_DCHECK_NE(&other, this); GOOGLE_ABSL_DCHECK_NE(&rhs, this);
if (other.current_size_ != 0) { if (size_t size = rhs.current_size_) {
int existing_size = size(); Reserve(current_size_ + size);
Reserve(existing_size + other.size()); Element* dst = elements() + ExchangeCurrentSize(current_size_ + size);
AddNAlreadyReserved(other.size()); UninitializedCopyN(rhs.elements(), size, dst);
CopyArray(Mutable(existing_size), &other.Get(0), other.size());
} }
} }
@ -950,12 +931,19 @@ inline int CalculateReserveSize(int total_size, int new_size) {
} }
} // namespace internal } // namespace internal
template <typename Element>
void RepeatedField<Element>::Reserve(int new_size) {
if (ABSL_PREDICT_FALSE(new_size > total_size_)) {
Grow(current_size_, new_size);
}
}
// Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant // Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant
// amount of code bloat. // amount of code bloat.
template <typename Element> template <typename Element>
PROTOBUF_NOINLINE void RepeatedField<Element>::Reserve(int new_size) { PROTOBUF_NOINLINE void RepeatedField<Element>::GrowNoAnnotate(int current_size,
if (total_size_ >= new_size) return; int new_size) {
Rep* old_rep = total_size_ > 0 ? rep() : nullptr; GOOGLE_ABSL_DCHECK_GT(new_size, total_size_);
Rep* new_rep; Rep* new_rep;
Arena* arena = GetOwningArena(); Arena* arena = GetOwningArena();
@ -974,116 +962,52 @@ PROTOBUF_NOINLINE void RepeatedField<Element>::Reserve(int new_size) {
new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes)); new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes));
} }
new_rep->arena = arena; new_rep->arena = arena;
int old_total_size = total_size_;
// Already known: new_size >= internal::kMinRepeatedFieldAllocationSize
// Maintain invariant:
// total_size_ == 0 ||
// total_size_ >= internal::kMinRepeatedFieldAllocationSize
total_size_ = new_size;
arena_or_elements_ = new_rep->elements();
// Invoke placement-new on newly allocated elements. We shouldn't have to do
// this, since Element is supposed to be POD, but a previous version of this
// code allocated storage with "new Element[size]" and some code uses
// RepeatedField with non-POD types, relying on constructor invocation. If
// Element has a trivial constructor (e.g., int32_t), gcc (tested with -O2)
// completely removes this loop because the loop body is empty, so this has no
// effect unless its side-effects are required for correctness.
// Note that we do this before MoveArray() below because Element's copy
// assignment implementation will want an initialized instance first.
Element* e = &elements()[0];
Element* limit = e + total_size_;
for (; e < limit; e++) {
new (e) Element;
}
if (current_size_ > 0) {
MoveArray(&elements()[0], old_rep->elements(), current_size_);
}
// Likewise, we need to invoke destructors on the old array.
InternalDeallocate(old_rep, old_total_size, false);
// Note that in the case of Cords, MoveArray() will have conveniently replaced
// all the Cords in the original array with empty values, which means that
// even if the old array was initial_space_, we don't have to worry about
// the old cords sticking around and holding on to memory.
}
template <typename Element> if (total_size_ > 0) {
inline void RepeatedField<Element>::Truncate(int new_size) { if (current_size > 0) {
GOOGLE_ABSL_DCHECK_LE(new_size, current_size_); Element* pnew = new_rep->elements();
if (current_size_ > 0) { Element* pold = elements();
ExchangeCurrentSize(new_size); // TODO(b/263791665): add absl::is_trivially_relocatable<Element>
if (std::is_trivial<Element>::value) {
memcpy(pnew, pold, current_size * sizeof(Element));
} else {
for (Element* end = pnew + current_size; pnew != end; ++pnew, ++pold) {
::new (static_cast<void*>(pnew)) Element(std::move(*pold));
pold->~Element();
}
}
}
InternalDeallocate();
} }
}
template <typename Element> total_size_ = new_size;
inline void RepeatedField<Element>::MoveArray(Element* to, Element* from, arena_or_elements_ = new_rep->elements();
int array_size) {
CopyArray(to, from, array_size);
} }
// TODO(b/266411038): we should really be able to make this:
// template <bool annotate_size = true>
// void Grow();
template <typename Element> template <typename Element>
inline void RepeatedField<Element>::CopyArray(Element* to, const Element* from, PROTOBUF_NOINLINE void RepeatedField<Element>::Grow(int current_size,
int array_size) { int new_size) {
internal::ElementCopier<Element>()(to, from, array_size); AnnotateSize(current_size, total_size_);
} GrowNoAnnotate(current_size, new_size);
AnnotateSize(total_size_, current_size);
namespace internal {
template <typename Element, bool HasTrivialCopy>
void ElementCopier<Element, HasTrivialCopy>::operator()(Element* to,
const Element* from,
int array_size) {
std::copy(from, from + array_size, to);
} }
template <typename Element> template <typename Element>
struct ElementCopier<Element, true> { inline void RepeatedField<Element>::Truncate(int new_size) {
void operator()(Element* to, const Element* from, int array_size) { GOOGLE_ABSL_DCHECK_LE(new_size, current_size_);
memcpy(to, from, static_cast<size_t>(array_size) * sizeof(Element)); if (new_size < current_size_) {
Destroy(unsafe_elements() + new_size, unsafe_elements() + current_size_);
ExchangeCurrentSize(new_size);
} }
};
} // namespace internal
// Cords should be swapped when possible and need explicit clearing, so provide
// some specializations for them. Some definitions are in the .cc file.
template <>
PROTOBUF_EXPORT inline void RepeatedField<absl::Cord>::RemoveLast() {
GOOGLE_ABSL_DCHECK_GT(current_size_, 0);
Mutable(size() - 1)->Clear();
ExchangeCurrentSize(current_size_ - 1);
}
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Clear();
template <>
PROTOBUF_EXPORT inline void RepeatedField<absl::Cord>::SwapElements(
int index1, int index2) {
Mutable(index1)->swap(*Mutable(index2));
} }
template <> template <>
PROTOBUF_EXPORT size_t PROTOBUF_EXPORT size_t
RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const; RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const;
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Truncate(int new_size);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Resize(int new_size,
const absl::Cord& value);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::MoveArray(absl::Cord* to,
absl::Cord* from,
int size);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::CopyArray(
absl::Cord* to, const absl::Cord* from, int size);
// ------------------------------------------------------------------- // -------------------------------------------------------------------

@ -470,6 +470,9 @@ TEST(RepeatedField, ReserveLarge) {
} }
TEST(RepeatedField, ReserveHuge) { TEST(RepeatedField, ReserveHuge) {
#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || defined(ABSL_HAVE_MEMORY_SANITIZER)
GTEST_SKIP() << "Disabled because sanitizer is active";
#endif
// Largest value that does not clamp to the large limit: // Largest value that does not clamp to the large limit:
constexpr int non_clamping_limit = constexpr int non_clamping_limit =
(std::numeric_limits<int>::max() - sizeof(Arena*)) / 2; (std::numeric_limits<int>::max() - sizeof(Arena*)) / 2;
@ -494,7 +497,6 @@ TEST(RepeatedField, ReserveHuge) {
EXPECT_GE(huge_field.Capacity(), min_clamping_size); EXPECT_GE(huge_field.Capacity(), min_clamping_size);
ASSERT_LT(huge_field.Capacity(), std::numeric_limits<int>::max() - 1); ASSERT_LT(huge_field.Capacity(), std::numeric_limits<int>::max() - 1);
#ifndef PROTOBUF_ASAN
// The array containing all the fields is, in theory, up to MAXINT-1 in size. // The array containing all the fields is, in theory, up to MAXINT-1 in size.
// However, some compilers can't handle a struct whose size is larger // However, some compilers can't handle a struct whose size is larger
// than 2GB, and the protocol buffer format doesn't handle more than 2GB of // than 2GB, and the protocol buffer format doesn't handle more than 2GB of
@ -505,7 +507,6 @@ TEST(RepeatedField, ReserveHuge) {
// size must still be clamped to a valid range. // size must still be clamped to a valid range.
huge_field.Reserve(huge_field.Capacity() + 1); huge_field.Reserve(huge_field.Capacity() + 1);
EXPECT_EQ(huge_field.Capacity(), std::numeric_limits<int>::max()); EXPECT_EQ(huge_field.Capacity(), std::numeric_limits<int>::max());
#endif // PROTOBUF_ASAN
#endif // PROTOBUF_TEST_ALLOW_LARGE_ALLOC #endif // PROTOBUF_TEST_ALLOW_LARGE_ALLOC
} }
@ -651,6 +652,41 @@ TEST(RepeatedField, AddRange5) {
ASSERT_EQ(me.Get(2), 2); ASSERT_EQ(me.Get(2), 2);
} }
// Add contents of container with a quirky iterator like std::vector<bool>
TEST(RepeatedField, AddRange6) {
RepeatedField<bool> me;
me.Add(true);
me.Add(false);
std::vector<bool> values;
values.push_back(true);
values.push_back(true);
values.push_back(false);
me.Add(values.begin(), values.end());
ASSERT_EQ(me.size(), 5);
ASSERT_EQ(me.Get(0), true);
ASSERT_EQ(me.Get(1), false);
ASSERT_EQ(me.Get(2), true);
ASSERT_EQ(me.Get(3), true);
ASSERT_EQ(me.Get(4), false);
}
// Add contents of absl::Span which evaluates to const T on access.
TEST(RepeatedField, AddRange7) {
int ints[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
absl::Span<const int> span(ints);
auto p = span.begin();
static_assert(std::is_convertible<decltype(p), const int*>::value, "");
RepeatedField<int> me;
me.Add(span.begin(), span.end());
ASSERT_EQ(me.size(), 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(me.Get(i), i);
}
}
TEST(RepeatedField, AddAndAssignRanges) { TEST(RepeatedField, AddAndAssignRanges) {
RepeatedField<int> field; RepeatedField<int> field;
@ -953,6 +989,23 @@ TEST(RepeatedField, Truncate) {
#endif #endif
} }
TEST(RepeatedCordField, AddRemoveLast) {
RepeatedField<absl::Cord> field;
field.Add(absl::Cord("foo"));
field.RemoveLast();
}
TEST(RepeatedCordField, AddClear) {
RepeatedField<absl::Cord> field;
field.Add(absl::Cord("foo"));
field.Clear();
}
TEST(RepeatedCordField, Resize) {
RepeatedField<absl::Cord> field;
field.Resize(10, absl::Cord("foo"));
}
TEST(RepeatedField, Cords) { TEST(RepeatedField, Cords) {
RepeatedField<absl::Cord> field; RepeatedField<absl::Cord> field;
@ -1007,8 +1060,8 @@ TEST(RepeatedField, TruncateCords) {
// Truncating to the current size should be fine (no-op), but truncating // Truncating to the current size should be fine (no-op), but truncating
// to a larger size should crash. // to a larger size should crash.
field.Truncate(field.size()); field.Truncate(field.size());
#if PROTOBUF_HAS_DEATH_TEST #if defined(PROTOBUF_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEBUG_DEATH(field.Truncate(field.size() + 1), "new_size"); EXPECT_DEATH(field.Truncate(field.size() + 1), "new_size");
#endif #endif
} }
@ -1067,6 +1120,107 @@ TEST(RepeatedField, TestSAddFromSelf) {
} }
} }
// We have, or at least had bad callers that never triggered our DCHECKS
// Here we check we DO fail on bad Truncate calls under debug, and do nothing
// under opt compiles.
TEST(RepeatedField, HardenAgainstBadTruncate) {
RepeatedField<int> field;
for (int size = 0; size < 10; ++size) {
field.Truncate(size);
#if PROTOBUF_HAS_DEATH_TEST
EXPECT_DEBUG_DEATH(field.Truncate(size + 1), "new_size <= current_size_");
EXPECT_DEBUG_DEATH(field.Truncate(size + 2), "new_size <= current_size_");
#elif defined(NDEBUG)
field.Truncate(size + 1);
field.Truncate(size + 1);
#endif
EXPECT_EQ(field.size(), size);
field.Add(1);
}
}
#if defined(PROTOBUF_HAS_DEATH_TEST) && (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
defined(ABSL_HAVE_MEMORY_SANITIZER))
// This function verifies that the code dies under ASAN or MSAN trying to both
// read and write the reserved element directly beyond the last element.
void VerifyDeathOnWriteAndReadAccessBeyondEnd(RepeatedField<int64_t>& field) {
auto* end = field.Mutable(field.size() - 1) + 1;
#if defined(ABSL_HAVE_ADDRESS_SANITIZER)
EXPECT_DEATH(*end = 1, "container-overflow");
EXPECT_DEATH(EXPECT_NE(*end, 1), "container-overflow");
#elif defined(ABSL_HAVE_MEMORY_SANITIZER)
EXPECT_DEATH(EXPECT_NE(*end, 1), "use-of-uninitialized-value");
#endif
// Confirm we died a death of *SAN
EXPECT_EQ(field.AddAlreadyReserved(), end);
*end = 1;
EXPECT_EQ(*end, 1);
}
TEST(RepeatedField, PoisonsMemoryOnAdd) {
RepeatedField<int64_t> field;
do {
field.Add(0);
} while (field.size() == field.Capacity());
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAddAlreadyReserved) {
RepeatedField<int64_t> field;
field.Reserve(2);
field.AddAlreadyReserved();
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAddNAlreadyReserved) {
RepeatedField<int64_t> field;
field.Reserve(10);
field.AddNAlreadyReserved(8);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnResize) {
RepeatedField<int64_t> field;
field.Add(0);
do {
field.Resize(field.size() + 1, 1);
} while (field.size() == field.Capacity());
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
// Shrink size
field.Resize(field.size() - 1, 1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnTruncate) {
RepeatedField<int64_t> field;
field.Add(0);
field.Add(1);
field.Truncate(1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnReserve) {
RepeatedField<int64_t> field;
field.Add(1);
field.Reserve(field.Capacity() + 1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAssign) {
RepeatedField<int64_t> src;
RepeatedField<int64_t> field;
src.Add(1);
src.Add(2);
field.Reserve(3);
field = src;
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
#endif
// =================================================================== // ===================================================================
// RepeatedPtrField tests. These pretty much just mirror the RepeatedField // RepeatedPtrField tests. These pretty much just mirror the RepeatedField
// tests above. // tests above.

Loading…
Cancel
Save