Change RepeatedField to create only 'current' elements, not 'total' elements.

Currently RepeatedField allocates and constructs all elements on 'Reserve'. This creates inefficiencies, but more problematically, leads to sanitizer issues once we have instances that actively poison their private contents such as Cord. This change fixes RepeatedField to only create actually 'live' elements.

PiperOrigin-RevId: 504121827
pull/11593/head
Martijn Vels 2 years ago committed by Copybara-Service
parent ffa9b503bf
commit 9ca411a46b
  1. 16
      src/google/protobuf/generated_message_tctable_lite.cc
  2. 9
      src/google/protobuf/port_def.inc
  3. 1
      src/google/protobuf/port_undef.inc
  4. 48
      src/google/protobuf/repeated_field.cc
  5. 512
      src/google/protobuf/repeated_field.h
  6. 162
      src/google/protobuf/repeated_field_unittest.cc

@ -602,19 +602,11 @@ PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedFixed(
}
}
auto& field = RefAt<RepeatedField<LayoutType>>(msg, data.offset());
int idx = field.size();
auto elem = field.Add();
int space = field.Capacity() - idx;
idx = 0;
const auto expected_tag = UnalignedLoad<TagType>(ptr);
const auto tag = UnalignedLoad<TagType>(ptr);
do {
ptr += sizeof(TagType);
elem[idx++] = UnalignedLoad<LayoutType>(ptr);
ptr += sizeof(LayoutType);
if (idx >= space) break;
if (!ctx->DataAvailable(ptr)) break;
} while (UnalignedLoad<TagType>(ptr) == expected_tag);
field.AddNAlreadyReserved(idx - 1);
field.Add(UnalignedLoad<LayoutType>(ptr + sizeof(TagType)));
ptr += sizeof(TagType) + sizeof(LayoutType);
} while (ctx->DataAvailable(ptr) && UnalignedLoad<TagType>(ptr) == tag);
return ToParseLoop(PROTOBUF_TC_PARAM_PASS);
}

@ -547,6 +547,15 @@ static_assert(PROTOBUF_CPLUSPLUS_MIN(201402L), "Protobuf only supports C++14 and
#define PROTOBUF_NODISCARD
#endif
#ifdef PROTOBUF_RESTRICT
#error PROTOBUF_RESTRICT was previously defined
#endif
#if defined( __clang__) || defined(__GNUC__)
#define PROTOBUF_RESTRICT __restrict
#else
#define PROTOBUF_RESTRICT
#endif
#ifdef PROTOBUF_FORCE_COPY_IN_RELEASE
#error PROTOBUF_FORCE_COPY_IN_RELEASE was previously defined
#endif

@ -73,6 +73,7 @@
#undef PROTOBUF_EXPORT
#undef PROTOC_EXPORT
#undef PROTOBUF_NODISCARD
#undef PROTOBUF_RESTRICT
#undef PROTOBUF_FORCE_COPY_IN_RELEASE
#undef PROTOBUF_FORCE_COPY_IN_SWAP
#undef PROTOBUF_FORCE_COPY_IN_MOVE

@ -48,14 +48,6 @@ namespace google {
namespace protobuf {
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Clear() {
for (int i = 0; i < current_size_; i++) {
Mutable(i)->Clear();
}
ExchangeCurrentSize(0);
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE size_t
RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const {
@ -67,46 +59,6 @@ RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const {
return result;
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Truncate(
int new_size) {
GOOGLE_ABSL_DCHECK_LE(new_size, current_size_);
while (current_size_ > new_size) {
RemoveLast();
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::Resize(
int new_size, const absl::Cord& value) {
GOOGLE_ABSL_DCHECK_GE(new_size, 0);
if (new_size > current_size_) {
Reserve(new_size);
std::fill(&rep()->elements()[ExchangeCurrentSize(new_size)],
&rep()->elements()[new_size], value);
} else {
while (current_size_ > new_size) {
RemoveLast();
}
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::MoveArray(
absl::Cord* to, absl::Cord* from, int size) {
for (int i = 0; i < size; i++) {
swap(to[i], from[i]);
}
}
template <>
PROTOBUF_EXPORT_TEMPLATE_DEFINE void RepeatedField<absl::Cord>::CopyArray(
absl::Cord* to, const absl::Cord* from, int size) {
for (int i = 0; i < size; i++) {
to[i] = from[i];
}
}
} // namespace protobuf
} // namespace google

@ -47,13 +47,16 @@
#include <algorithm>
#include <iterator>
#include <limits>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include "google/protobuf/arena.h"
#include "google/protobuf/port.h"
#include "absl/base/dynamic_annotations.h"
#include "google/protobuf/stubs/logging.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord.h"
#include "google/protobuf/generated_enum_util.h"
#include "google/protobuf/message_lite.h"
@ -170,7 +173,7 @@ class RepeatedField final {
constexpr RepeatedField();
explicit RepeatedField(Arena* arena);
RepeatedField(const RepeatedField& other);
RepeatedField(const RepeatedField& rhs);
template <typename Iter,
typename = typename std::enable_if<std::is_constructible<
@ -197,7 +200,8 @@ class RepeatedField final {
Element& at(int index);
void Set(int index, const Element& value);
void Add(const Element& value);
void Add(Element value);
// Appends a new element and returns a pointer to it.
// The new element is uninitialized if |Element| is a POD type.
Element* Add();
@ -231,14 +235,14 @@ class RepeatedField final {
// Except for RepeatedField<Cord>, for which it is O(size-new_size).
void Truncate(int new_size);
void AddAlreadyReserved(const Element& value);
// Appends a new element and return a pointer to it.
// The new element is uninitialized if |Element| is a POD type.
// Should be called only if Capacity() > Size().
Element* AddAlreadyReserved();
Element* AddNAlreadyReserved(int elements);
void AddAlreadyReserved(Element value);
int Capacity() const;
// Adds `n` elements to this instance asserting there is enough capacity.
// The added elements are uninitialized if `Element` is trivial.
Element* AddAlreadyReserved();
Element* AddNAlreadyReserved(int n);
// Like STL resize. Uses value to fill appended elements.
// Like Truncate() if new_size <= size(), otherwise this is
// O(new_size - size()).
@ -319,7 +323,6 @@ class RepeatedField final {
// This is public due to it being called by generated code.
inline void InternalSwap(RepeatedField* other);
private:
template <typename T> friend class Arena::InternalHelper;
@ -335,6 +338,38 @@ class RepeatedField final {
// GOOGLE_ABSL_DCHECK (see API docs for details).
void UnsafeArenaSwap(RepeatedField* other);
// Copy constructs `n` instances in place into the array `dst`.
// This function is identical to `std::uninitialized_copy_n(src, n, dst)`
// except that we explicit declare the memory to not be aliased, which will
// result in `memcpy` code generation instead of `memmove` for trivial types.
static inline void UninitializedCopyN(const Element* PROTOBUF_RESTRICT src,
int n, Element* PROTOBUF_RESTRICT dst) {
std::uninitialized_copy_n(src, n, dst);
}
// Copy constructs `[begin, end)` instances in place into the array `dst`.
// See above `UninitializedCopyN()` function comments for more information.
template <typename Iter>
static inline void UninitializedCopy(Iter begin, Iter end,
Element* PROTOBUF_RESTRICT dst) {
std::uninitialized_copy(begin, end, dst);
}
template <typename Iter>
void AddForwardIterator(Iter begin, Iter end);
template <typename Iter>
void AddInputIterator(Iter begin, Iter end);
// Reserves space to expand the field to at least the given size.
// If the array is grown, it will always be at least doubled in size.
// If `annotate_size` is true (the default), then this function will annotate
// the old container from `current_size` to `total_size_` (unpoison memory)
// directly before it is being released, and annotate the new container from
// `total_size_` to `current_size` (poison unused memory).
void Grow(int current_size, int new_size);
void GrowNoAnnotate(int current_size, int new_size);
static constexpr int kInitialSize = 0;
// A note on the representation here (see also comment below for
// RepeatedPtrFieldBase's struct Rep):
@ -349,11 +384,30 @@ class RepeatedField final {
int current_size_;
int total_size_;
// Annotates a change in size of this instance. This function should be called
// with (total_size, current_size) after new memory has been allocated and
// filled from previous memory), and called with (current_size, total_size)
// right before (previously annotated) memory is released.
void AnnotateSize(int old_size, int new_size) const {
if (old_size != new_size) {
ABSL_ANNOTATE_CONTIGUOUS_CONTAINER(
unsafe_elements(), unsafe_elements() + total_size_,
unsafe_elements() + old_size, unsafe_elements() + new_size);
if (new_size < old_size) {
ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(
unsafe_elements() + new_size,
(old_size - new_size) * sizeof(Element));
}
}
}
// Replaces current_size_ with new_size and returns the previous value of
// current_size_. This function is intended to be the only place where
// current_size_ is modified.
// current_size_ is modified, with the exception of `AddInputIterator()`
// where the size of added items is not known in advance.
inline int ExchangeCurrentSize(int new_size) {
const int prev_size = current_size_;
AnnotateSize(prev_size, new_size);
current_size_ = new_size;
return prev_size;
}
@ -370,7 +424,6 @@ class RepeatedField final {
}
};
// If total_size_ == 0 this points to an Arena otherwise it points to the
// elements member of a Rep struct. Using this invariant allows the storage of
// the arena pointer without an extra allocation in the constructor.
@ -401,144 +454,28 @@ class RepeatedField final {
friend class Arena;
typedef void InternalArenaConstructable_;
// Moves the contents of |from| into |to|, possibly clobbering |from| in the
// process. For primitive types this is just a memcpy(), but it could be
// specialized for non-primitive types to, say, swap each element instead.
// In fact, we do exactly that for Cords.
void MoveArray(Element* to, Element* from, int size);
// Copies the elements of |from| into |to|.
void CopyArray(Element* to, const Element* from, int size);
// Internal helper to delete all elements and deallocate the storage.
void InternalDeallocate(Rep* rep, int size, bool in_destructor) {
if (rep != nullptr) {
Element* e = &rep->elements()[0];
if (!std::is_trivial<Element>::value) {
Element* limit = &rep->elements()[size];
for (; e < limit; e++) {
e->~Element();
}
}
const size_t bytes = size * sizeof(*e) + kRepHeaderSize;
if (rep->arena == nullptr) {
internal::SizedDelete(rep, bytes);
} else if (!in_destructor) {
// If we are in the destructor, we might be being destroyed as part of
// the arena teardown. We can't try and return blocks to the arena then.
rep->arena->ReturnArrayMemory(rep, bytes);
}
// Destroys all elements in [begin, end).
// This function does nothing if `Element` is trivial.
static void Destroy(const Element* begin, const Element* end) {
if (!std::is_trivial<Element>::value) {
std::for_each(begin, end, [&](const Element& e) { e.~Element(); });
}
}
// This class is a performance wrapper around RepeatedField::Add(const T&)
// function. In general unless a RepeatedField is a local stack variable LLVM
// has a hard time optimizing Add. The machine code tends to be
// loop:
// mov %size, dword ptr [%repeated_field] // load
// cmp %size, dword ptr [%repeated_field + 4]
// jae fallback
// mov %buffer, qword ptr [%repeated_field + 8]
// mov dword [%buffer + %size * 4], %value
// inc %size // increment
// mov dword ptr [%repeated_field], %size // store
// jmp loop
//
// This puts a load/store in each iteration of the important loop variable
// size. It's a pretty bad compile that happens even in simple cases, but
// largely the presence of the fallback path disturbs the compilers mem-to-reg
// analysis.
//
// This class takes ownership of a repeated field for the duration of its
// lifetime. The repeated field should not be accessed during this time, ie.
// only access through this class is allowed. This class should always be a
// function local stack variable. Intended use
//
// void AddSequence(const int* begin, const int* end, RepeatedField<int>* out)
// {
// RepeatedFieldAdder<int> adder(out); // Take ownership of out
// for (auto it = begin; it != end; ++it) {
// adder.Add(*it);
// }
// }
//
// Typically, due to the fact that adder is a local stack variable, the
// compiler will be successful in mem-to-reg transformation and the machine
// code will be
// loop:
// cmp %size, %capacity
// jae fallback
// mov dword ptr [%buffer + %size * 4], %val
// inc %size
// jmp loop
//
// The first version executes at 7 cycles per iteration while the second
// version executes at only 1 or 2 cycles.
template <int = 0, bool = std::is_trivial<Element>::value>
class FastAdderImpl {
public:
explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) {
index_ = repeated_field_->current_size_;
capacity_ = repeated_field_->total_size_;
buffer_ = repeated_field_->unsafe_elements();
}
FastAdderImpl(const FastAdderImpl&) = delete;
FastAdderImpl& operator=(const FastAdderImpl&) = delete;
~FastAdderImpl() {
repeated_field_->current_size_ = index_;
}
void Add(Element val) {
if (index_ == capacity_) {
repeated_field_->current_size_ = index_;
repeated_field_->Reserve(index_ + 1);
capacity_ = repeated_field_->total_size_;
buffer_ = repeated_field_->unsafe_elements();
}
buffer_[index_++] = val;
// Internal helper to delete all elements and deallocate the storage.
template <bool in_destructor = false>
void InternalDeallocate() {
const size_t bytes = total_size_ * sizeof(Element) + kRepHeaderSize;
if (rep()->arena == nullptr) {
internal::SizedDelete(rep(), bytes);
} else if (!in_destructor) {
// If we are in the destructor, we might be being destroyed as part of
// the arena teardown. We can't try and return blocks to the arena then.
rep()->arena->ReturnArrayMemory(rep(), bytes);
}
private:
RepeatedField* repeated_field_;
int index_;
int capacity_;
Element* buffer_;
};
// FastAdder is a wrapper for adding fields. The specialization above handles
// POD types more efficiently than RepeatedField.
template <int I>
class FastAdderImpl<I, false> {
public:
explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) {}
FastAdderImpl(const FastAdderImpl&) = delete;
FastAdderImpl& operator=(const FastAdderImpl&) = delete;
void Add(const Element& val) { repeated_field_->Add(val); }
private:
RepeatedField* repeated_field_;
};
using FastAdder = FastAdderImpl<>;
friend class TestRepeatedFieldHelper;
friend class ::google::protobuf::internal::ParseContext;
};
namespace internal {
// This is a helper template to copy an array of elements efficiently when they
// have a trivial copy constructor, and correctly otherwise. This really
// shouldn't be necessary, but our compiler doesn't optimize std::copy very
// effectively.
template <typename Element,
bool HasTrivialCopy = std::is_trivial<Element>::value>
struct ElementCopier {
void operator()(Element* to, const Element* from, int array_size);
}
};
} // namespace internal
// implementation ====================================================
template <typename Element>
@ -554,13 +491,13 @@ inline RepeatedField<Element>::RepeatedField(Arena* arena)
}
template <typename Element>
inline RepeatedField<Element>::RepeatedField(const RepeatedField& other)
inline RepeatedField<Element>::RepeatedField(const RepeatedField& rhs)
: current_size_(0), total_size_(0), arena_or_elements_(nullptr) {
StaticValidityCheck();
if (other.current_size_ != 0) {
Reserve(other.size());
AddNAlreadyReserved(other.size());
CopyArray(Mutable(0), &other.Get(0), other.size());
if (size_t size = rhs.current_size_) {
Grow(0, size);
ExchangeCurrentSize(size);
UninitializedCopyN(rhs.elements(), size, unsafe_elements());
}
}
@ -574,8 +511,6 @@ RepeatedField<Element>::RepeatedField(Iter begin, Iter end)
template <typename Element>
RepeatedField<Element>::~RepeatedField() {
// Fail-safe in case we miss calling this in a constructor. Note: this one
// won't trigger for leaked maps that never get destructed.
StaticValidityCheck();
#ifndef NDEBUG
// Try to trigger segfault / asan failure in non-opt builds if arena_
@ -584,7 +519,8 @@ RepeatedField<Element>::~RepeatedField() {
if (arena) (void)arena->SpaceAllocated();
#endif
if (total_size_ > 0) {
InternalDeallocate(rep(), total_size_, true);
Destroy(unsafe_elements(), unsafe_elements() + current_size_);
InternalDeallocate<true>();
}
}
@ -647,36 +583,41 @@ inline int RepeatedField<Element>::Capacity() const {
}
template <typename Element>
inline void RepeatedField<Element>::AddAlreadyReserved(const Element& value) {
inline void RepeatedField<Element>::AddAlreadyReserved(Element value) {
GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_);
elements()[ExchangeCurrentSize(current_size_ + 1)] = value;
void* p = elements() + ExchangeCurrentSize(current_size_ + 1);
::new (p) Element(std::move(value));
}
template <typename Element>
inline Element* RepeatedField<Element>::AddAlreadyReserved() {
GOOGLE_ABSL_DCHECK_LT(current_size_, total_size_);
return &elements()[ExchangeCurrentSize(current_size_ + 1)];
// new (p) <TrivialType> compiles into nothing: this is intentional as this
// function is documented to return uninitialized data for trivial types.
void* p = elements() + ExchangeCurrentSize(current_size_ + 1);
return ::new (p) Element;
}
template <typename Element>
inline Element* RepeatedField<Element>::AddNAlreadyReserved(int elements) {
GOOGLE_ABSL_DCHECK_GE(total_size_ - current_size_, elements)
inline Element* RepeatedField<Element>::AddNAlreadyReserved(int n) {
GOOGLE_ABSL_DCHECK_GE(total_size_ - current_size_, n)
<< total_size_ << ", " << current_size_;
// Warning: sometimes people call this when elements == 0 and
// total_size_ == 0. In this case the return pointer points to a zero size
// array (n == 0). Hence we can just use unsafe_elements(), because the user
// cannot dereference the pointer anyway.
return unsafe_elements() + ExchangeCurrentSize(current_size_ + elements);
Element* p = unsafe_elements() + ExchangeCurrentSize(current_size_ + n);
for (Element *begin = p, *end = p + n; begin != end; ++begin) {
new (static_cast<void*>(begin)) Element;
}
return p;
}
template <typename Element>
inline void RepeatedField<Element>::Resize(int new_size, const Element& value) {
GOOGLE_ABSL_DCHECK_GE(new_size, 0);
if (new_size > current_size_) {
Reserve(new_size);
std::fill(&elements()[ExchangeCurrentSize(new_size)], &elements()[new_size],
value);
} else {
if (new_size > total_size_) Grow(current_size_, new_size);
Element* first = elements() + ExchangeCurrentSize(new_size);
std::uninitialized_fill(first, elements() + current_size_, value);
} else if (new_size < current_size_) {
Destroy(unsafe_elements() + new_size, unsafe_elements() + current_size_);
ExchangeCurrentSize(new_size);
}
}
@ -717,22 +658,73 @@ inline void RepeatedField<Element>::Set(int index, const Element& value) {
}
template <typename Element>
inline void RepeatedField<Element>::Add(const Element& value) {
if (current_size_ == total_size_) {
// value could reference an element of the array. Reserving new space will
// invalidate the reference. So we must make a copy first.
auto tmp = value;
Reserve(total_size_ + 1);
elements()[ExchangeCurrentSize(current_size_ + 1)] = std::move(tmp);
} else {
elements()[ExchangeCurrentSize(current_size_ + 1)] = value;
inline void RepeatedField<Element>::Add(Element value) {
int total_size = total_size_;
Element* elem = unsafe_elements();
if (ABSL_PREDICT_FALSE(current_size_ == total_size)) {
Grow(current_size_, current_size_ + 1);
total_size = total_size_;
elem = unsafe_elements();
}
int new_size = current_size_ + 1;
void* p = elem + ExchangeCurrentSize(new_size);
::new (p) Element(std::move(value));
// The below helps the compiler optimize dense loops.
ABSL_ASSUME(new_size == current_size_);
ABSL_ASSUME(elem == arena_or_elements_);
ABSL_ASSUME(total_size == total_size_);
}
template <typename Element>
inline Element* RepeatedField<Element>::Add() {
if (current_size_ == total_size_) Reserve(total_size_ + 1);
return &elements()[ExchangeCurrentSize(current_size_ + 1)];
if (ABSL_PREDICT_FALSE(current_size_ == total_size_)) {
Grow(current_size_, current_size_ + 1);
}
void* p = unsafe_elements() + ExchangeCurrentSize(current_size_ + 1);
return ::new (p) Element;
}
template <typename Element>
template <typename Iter>
inline void RepeatedField<Element>::AddForwardIterator(Iter begin, Iter end) {
int total_size = total_size_;
Element* elem = unsafe_elements();
int new_size = current_size_ + static_cast<int>(std::distance(begin, end));
if (ABSL_PREDICT_FALSE(new_size > total_size)) {
Grow(current_size_, new_size);
elem = unsafe_elements();
total_size = total_size_;
}
UninitializedCopy(begin, end, elem + ExchangeCurrentSize(new_size));
// The below helps the compiler optimize dense loops.
ABSL_ASSUME(new_size == current_size_);
ABSL_ASSUME(elem == arena_or_elements_);
ABSL_ASSUME(total_size == total_size_);
}
template <typename Element>
template <typename Iter>
inline void RepeatedField<Element>::AddInputIterator(Iter begin, Iter end) {
Element* first = unsafe_elements() + current_size_;
Element* last = unsafe_elements() + total_size_;
AnnotateSize(current_size_, total_size_);
while (begin != end) {
if (ABSL_PREDICT_FALSE(first == last)) {
int current_size = first - unsafe_elements();
GrowNoAnnotate(current_size, current_size + 1);
first = unsafe_elements() + current_size;
last = unsafe_elements() + total_size_;
}
::new (static_cast<void*>(first)) Element(*begin);
++begin;
++first;
}
current_size_ = first - unsafe_elements();
AnnotateSize(total_size_, current_size_);
}
template <typename Element>
@ -741,27 +733,16 @@ inline void RepeatedField<Element>::Add(Iter begin, Iter end) {
if (std::is_base_of<
std::forward_iterator_tag,
typename std::iterator_traits<Iter>::iterator_category>::value) {
int additional = static_cast<int>(std::distance(begin, end));
if (additional == 0) return;
int new_size = current_size_ + additional;
Reserve(new_size);
// TODO(ckennelly): The compiler loses track of the buffer freshly
// allocated by Reserve() by the time we call elements, so it cannot
// guarantee that elements does not alias [begin(), end()).
//
// If restrict is available, annotating the pointer obtained from elements()
// causes this to lower to memcpy instead of memmove.
std::copy(begin, end, elements() + ExchangeCurrentSize(new_size));
AddForwardIterator(begin, end);
} else {
FastAdder fast_adder(this);
for (; begin != end; ++begin) fast_adder.Add(*begin);
AddInputIterator(begin, end);
}
}
template <typename Element>
inline void RepeatedField<Element>::RemoveLast() {
GOOGLE_ABSL_DCHECK_GT(current_size_, 0);
elements()[current_size_ - 1].~Element();
ExchangeCurrentSize(current_size_ - 1);
}
@ -787,17 +768,17 @@ void RepeatedField<Element>::ExtractSubrange(int start, int num,
template <typename Element>
inline void RepeatedField<Element>::Clear() {
Destroy(unsafe_elements(), unsafe_elements() + current_size_);
ExchangeCurrentSize(0);
}
template <typename Element>
inline void RepeatedField<Element>::MergeFrom(const RepeatedField& other) {
GOOGLE_ABSL_DCHECK_NE(&other, this);
if (other.current_size_ != 0) {
int existing_size = size();
Reserve(existing_size + other.size());
AddNAlreadyReserved(other.size());
CopyArray(Mutable(existing_size), &other.Get(0), other.size());
inline void RepeatedField<Element>::MergeFrom(const RepeatedField& rhs) {
GOOGLE_ABSL_DCHECK_NE(&rhs, this);
if (size_t size = rhs.current_size_) {
Reserve(current_size_ + size);
Element* dst = elements() + ExchangeCurrentSize(current_size_ + size);
UninitializedCopyN(rhs.elements(), size, dst);
}
}
@ -950,12 +931,19 @@ inline int CalculateReserveSize(int total_size, int new_size) {
}
} // namespace internal
template <typename Element>
void RepeatedField<Element>::Reserve(int new_size) {
if (ABSL_PREDICT_FALSE(new_size > total_size_)) {
Grow(current_size_, new_size);
}
}
// Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant
// amount of code bloat.
template <typename Element>
PROTOBUF_NOINLINE void RepeatedField<Element>::Reserve(int new_size) {
if (total_size_ >= new_size) return;
Rep* old_rep = total_size_ > 0 ? rep() : nullptr;
PROTOBUF_NOINLINE void RepeatedField<Element>::GrowNoAnnotate(int current_size,
int new_size) {
GOOGLE_ABSL_DCHECK_GT(new_size, total_size_);
Rep* new_rep;
Arena* arena = GetOwningArena();
@ -974,116 +962,52 @@ PROTOBUF_NOINLINE void RepeatedField<Element>::Reserve(int new_size) {
new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes));
}
new_rep->arena = arena;
int old_total_size = total_size_;
// Already known: new_size >= internal::kMinRepeatedFieldAllocationSize
// Maintain invariant:
// total_size_ == 0 ||
// total_size_ >= internal::kMinRepeatedFieldAllocationSize
total_size_ = new_size;
arena_or_elements_ = new_rep->elements();
// Invoke placement-new on newly allocated elements. We shouldn't have to do
// this, since Element is supposed to be POD, but a previous version of this
// code allocated storage with "new Element[size]" and some code uses
// RepeatedField with non-POD types, relying on constructor invocation. If
// Element has a trivial constructor (e.g., int32_t), gcc (tested with -O2)
// completely removes this loop because the loop body is empty, so this has no
// effect unless its side-effects are required for correctness.
// Note that we do this before MoveArray() below because Element's copy
// assignment implementation will want an initialized instance first.
Element* e = &elements()[0];
Element* limit = e + total_size_;
for (; e < limit; e++) {
new (e) Element;
}
if (current_size_ > 0) {
MoveArray(&elements()[0], old_rep->elements(), current_size_);
}
// Likewise, we need to invoke destructors on the old array.
InternalDeallocate(old_rep, old_total_size, false);
// Note that in the case of Cords, MoveArray() will have conveniently replaced
// all the Cords in the original array with empty values, which means that
// even if the old array was initial_space_, we don't have to worry about
// the old cords sticking around and holding on to memory.
}
template <typename Element>
inline void RepeatedField<Element>::Truncate(int new_size) {
GOOGLE_ABSL_DCHECK_LE(new_size, current_size_);
if (current_size_ > 0) {
ExchangeCurrentSize(new_size);
if (total_size_ > 0) {
if (current_size > 0) {
Element* pnew = new_rep->elements();
Element* pold = elements();
// TODO(b/263791665): add absl::is_trivially_relocatable<Element>
if (std::is_trivial<Element>::value) {
memcpy(pnew, pold, current_size * sizeof(Element));
} else {
for (Element* end = pnew + current_size; pnew != end; ++pnew, ++pold) {
::new (static_cast<void*>(pnew)) Element(std::move(*pold));
pold->~Element();
}
}
}
InternalDeallocate();
}
}
template <typename Element>
inline void RepeatedField<Element>::MoveArray(Element* to, Element* from,
int array_size) {
CopyArray(to, from, array_size);
total_size_ = new_size;
arena_or_elements_ = new_rep->elements();
}
// TODO(b/266411038): we should really be able to make this:
// template <bool annotate_size = true>
// void Grow();
template <typename Element>
inline void RepeatedField<Element>::CopyArray(Element* to, const Element* from,
int array_size) {
internal::ElementCopier<Element>()(to, from, array_size);
}
namespace internal {
template <typename Element, bool HasTrivialCopy>
void ElementCopier<Element, HasTrivialCopy>::operator()(Element* to,
const Element* from,
int array_size) {
std::copy(from, from + array_size, to);
PROTOBUF_NOINLINE void RepeatedField<Element>::Grow(int current_size,
int new_size) {
AnnotateSize(current_size, total_size_);
GrowNoAnnotate(current_size, new_size);
AnnotateSize(total_size_, current_size);
}
template <typename Element>
struct ElementCopier<Element, true> {
void operator()(Element* to, const Element* from, int array_size) {
memcpy(to, from, static_cast<size_t>(array_size) * sizeof(Element));
inline void RepeatedField<Element>::Truncate(int new_size) {
GOOGLE_ABSL_DCHECK_LE(new_size, current_size_);
if (new_size < current_size_) {
Destroy(unsafe_elements() + new_size, unsafe_elements() + current_size_);
ExchangeCurrentSize(new_size);
}
};
} // namespace internal
// Cords should be swapped when possible and need explicit clearing, so provide
// some specializations for them. Some definitions are in the .cc file.
template <>
PROTOBUF_EXPORT inline void RepeatedField<absl::Cord>::RemoveLast() {
GOOGLE_ABSL_DCHECK_GT(current_size_, 0);
Mutable(size() - 1)->Clear();
ExchangeCurrentSize(current_size_ - 1);
}
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Clear();
template <>
PROTOBUF_EXPORT inline void RepeatedField<absl::Cord>::SwapElements(
int index1, int index2) {
Mutable(index1)->swap(*Mutable(index2));
}
template <>
PROTOBUF_EXPORT size_t
RepeatedField<absl::Cord>::SpaceUsedExcludingSelfLong() const;
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Truncate(int new_size);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::Resize(int new_size,
const absl::Cord& value);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::MoveArray(absl::Cord* to,
absl::Cord* from,
int size);
template <>
PROTOBUF_EXPORT void RepeatedField<absl::Cord>::CopyArray(
absl::Cord* to, const absl::Cord* from, int size);
// -------------------------------------------------------------------

@ -470,6 +470,9 @@ TEST(RepeatedField, ReserveLarge) {
}
TEST(RepeatedField, ReserveHuge) {
#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || defined(ABSL_HAVE_MEMORY_SANITIZER)
GTEST_SKIP() << "Disabled because sanitizer is active";
#endif
// Largest value that does not clamp to the large limit:
constexpr int non_clamping_limit =
(std::numeric_limits<int>::max() - sizeof(Arena*)) / 2;
@ -494,7 +497,6 @@ TEST(RepeatedField, ReserveHuge) {
EXPECT_GE(huge_field.Capacity(), min_clamping_size);
ASSERT_LT(huge_field.Capacity(), std::numeric_limits<int>::max() - 1);
#ifndef PROTOBUF_ASAN
// The array containing all the fields is, in theory, up to MAXINT-1 in size.
// However, some compilers can't handle a struct whose size is larger
// than 2GB, and the protocol buffer format doesn't handle more than 2GB of
@ -505,7 +507,6 @@ TEST(RepeatedField, ReserveHuge) {
// size must still be clamped to a valid range.
huge_field.Reserve(huge_field.Capacity() + 1);
EXPECT_EQ(huge_field.Capacity(), std::numeric_limits<int>::max());
#endif // PROTOBUF_ASAN
#endif // PROTOBUF_TEST_ALLOW_LARGE_ALLOC
}
@ -651,6 +652,41 @@ TEST(RepeatedField, AddRange5) {
ASSERT_EQ(me.Get(2), 2);
}
// Add contents of container with a quirky iterator like std::vector<bool>
TEST(RepeatedField, AddRange6) {
RepeatedField<bool> me;
me.Add(true);
me.Add(false);
std::vector<bool> values;
values.push_back(true);
values.push_back(true);
values.push_back(false);
me.Add(values.begin(), values.end());
ASSERT_EQ(me.size(), 5);
ASSERT_EQ(me.Get(0), true);
ASSERT_EQ(me.Get(1), false);
ASSERT_EQ(me.Get(2), true);
ASSERT_EQ(me.Get(3), true);
ASSERT_EQ(me.Get(4), false);
}
// Add contents of absl::Span which evaluates to const T on access.
TEST(RepeatedField, AddRange7) {
int ints[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
absl::Span<const int> span(ints);
auto p = span.begin();
static_assert(std::is_convertible<decltype(p), const int*>::value, "");
RepeatedField<int> me;
me.Add(span.begin(), span.end());
ASSERT_EQ(me.size(), 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(me.Get(i), i);
}
}
TEST(RepeatedField, AddAndAssignRanges) {
RepeatedField<int> field;
@ -953,6 +989,23 @@ TEST(RepeatedField, Truncate) {
#endif
}
TEST(RepeatedCordField, AddRemoveLast) {
RepeatedField<absl::Cord> field;
field.Add(absl::Cord("foo"));
field.RemoveLast();
}
TEST(RepeatedCordField, AddClear) {
RepeatedField<absl::Cord> field;
field.Add(absl::Cord("foo"));
field.Clear();
}
TEST(RepeatedCordField, Resize) {
RepeatedField<absl::Cord> field;
field.Resize(10, absl::Cord("foo"));
}
TEST(RepeatedField, Cords) {
RepeatedField<absl::Cord> field;
@ -1007,8 +1060,8 @@ TEST(RepeatedField, TruncateCords) {
// Truncating to the current size should be fine (no-op), but truncating
// to a larger size should crash.
field.Truncate(field.size());
#if PROTOBUF_HAS_DEATH_TEST
EXPECT_DEBUG_DEATH(field.Truncate(field.size() + 1), "new_size");
#if defined(PROTOBUF_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(field.Truncate(field.size() + 1), "new_size");
#endif
}
@ -1067,6 +1120,107 @@ TEST(RepeatedField, TestSAddFromSelf) {
}
}
// We have, or at least had bad callers that never triggered our DCHECKS
// Here we check we DO fail on bad Truncate calls under debug, and do nothing
// under opt compiles.
TEST(RepeatedField, HardenAgainstBadTruncate) {
RepeatedField<int> field;
for (int size = 0; size < 10; ++size) {
field.Truncate(size);
#if PROTOBUF_HAS_DEATH_TEST
EXPECT_DEBUG_DEATH(field.Truncate(size + 1), "new_size <= current_size_");
EXPECT_DEBUG_DEATH(field.Truncate(size + 2), "new_size <= current_size_");
#elif defined(NDEBUG)
field.Truncate(size + 1);
field.Truncate(size + 1);
#endif
EXPECT_EQ(field.size(), size);
field.Add(1);
}
}
#if defined(PROTOBUF_HAS_DEATH_TEST) && (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
defined(ABSL_HAVE_MEMORY_SANITIZER))
// This function verifies that the code dies under ASAN or MSAN trying to both
// read and write the reserved element directly beyond the last element.
void VerifyDeathOnWriteAndReadAccessBeyondEnd(RepeatedField<int64_t>& field) {
auto* end = field.Mutable(field.size() - 1) + 1;
#if defined(ABSL_HAVE_ADDRESS_SANITIZER)
EXPECT_DEATH(*end = 1, "container-overflow");
EXPECT_DEATH(EXPECT_NE(*end, 1), "container-overflow");
#elif defined(ABSL_HAVE_MEMORY_SANITIZER)
EXPECT_DEATH(EXPECT_NE(*end, 1), "use-of-uninitialized-value");
#endif
// Confirm we died a death of *SAN
EXPECT_EQ(field.AddAlreadyReserved(), end);
*end = 1;
EXPECT_EQ(*end, 1);
}
TEST(RepeatedField, PoisonsMemoryOnAdd) {
RepeatedField<int64_t> field;
do {
field.Add(0);
} while (field.size() == field.Capacity());
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAddAlreadyReserved) {
RepeatedField<int64_t> field;
field.Reserve(2);
field.AddAlreadyReserved();
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAddNAlreadyReserved) {
RepeatedField<int64_t> field;
field.Reserve(10);
field.AddNAlreadyReserved(8);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnResize) {
RepeatedField<int64_t> field;
field.Add(0);
do {
field.Resize(field.size() + 1, 1);
} while (field.size() == field.Capacity());
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
// Shrink size
field.Resize(field.size() - 1, 1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnTruncate) {
RepeatedField<int64_t> field;
field.Add(0);
field.Add(1);
field.Truncate(1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnReserve) {
RepeatedField<int64_t> field;
field.Add(1);
field.Reserve(field.Capacity() + 1);
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
TEST(RepeatedField, PoisonsMemoryOnAssign) {
RepeatedField<int64_t> src;
RepeatedField<int64_t> field;
src.Add(1);
src.Add(2);
field.Reserve(3);
field = src;
VerifyDeathOnWriteAndReadAccessBeyondEnd(field);
}
#endif
// ===================================================================
// RepeatedPtrField tests. These pretty much just mirror the RepeatedField
// tests above.

Loading…
Cancel
Save