Add arena support to UnknownFieldSet:

- Now it is arena constructible. Allocates the array, strings, and inner sets in the arena.
 - Now it is arena destructible. Will skip destructors when in an arena.

Also, optimize `default_instance()` now that we can provide a `constinit` version of it.

PiperOrigin-RevId: 654743861
pull/17535/head
Protobuf Team Bot 4 months ago committed by Copybara-Service
parent d0395408aa
commit 935783570e
  1. 2
      src/google/protobuf/internal_visibility.h
  2. 2
      src/google/protobuf/repeated_field.h
  3. 116
      src/google/protobuf/unknown_field_set.cc
  4. 179
      src/google/protobuf/unknown_field_set.h
  5. 108
      src/google/protobuf/unknown_field_set_unittest.cc

@ -17,6 +17,7 @@ class MessageLite;
namespace internal {
class InternalVisibilityForTesting;
class InternalMetadata;
// Empty class to use as a mandatory 'internal token' for functions that have to
// be public, such as arena constructors, but that are for internal use only.
@ -29,6 +30,7 @@ class InternalVisibility {
friend class ::google::protobuf::Arena;
friend class ::google::protobuf::Message;
friend class ::google::protobuf::MessageLite;
friend class ::google::protobuf::internal::InternalMetadata;
friend class InternalVisibilityForTesting;
};

@ -56,6 +56,7 @@ namespace google {
namespace protobuf {
class Message;
class UnknownField; // For the allowlist
namespace internal {
@ -141,6 +142,7 @@ class RepeatedField final
absl::disjunction<internal::is_supported_integral_type<Element>,
internal::is_supported_floating_point_type<Element>,
std::is_same<absl::Cord, Element>,
std::is_same<UnknownField, Element>,
is_proto_enum<Element>>::value,
"We only support non-string scalars in RepeatedField.");
}

@ -16,7 +16,6 @@
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/extension_set.h"
#include "google/protobuf/generated_message_tctable_decl.h"
#include "google/protobuf/generated_message_tctable_impl.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
@ -32,38 +31,23 @@
namespace google {
namespace protobuf {
const UnknownFieldSet& UnknownFieldSet::default_instance() {
static auto instance = internal::OnShutdownDelete(new UnknownFieldSet());
return *instance;
}
void UnknownFieldSet::ClearFallback() {
ABSL_DCHECK(!fields_.empty());
int n = fields_.size();
do {
(fields_)[--n].Delete();
} while (n > 0);
fields_.clear();
}
void UnknownFieldSet::InternalMergeFrom(const UnknownFieldSet& other) {
int other_field_count = other.field_count();
if (other_field_count > 0) {
fields_.reserve(fields_.size() + other_field_count);
for (int i = 0; i < other_field_count; i++) {
fields_.push_back((other.fields_)[i]);
fields_.back().DeepCopy((other.fields_)[i]);
}
if (arena() == nullptr) {
int n = fields_.size();
do {
(fields_)[--n].Delete();
} while (n > 0);
}
fields_.Clear();
}
void UnknownFieldSet::MergeFrom(const UnknownFieldSet& other) {
int other_field_count = other.field_count();
if (other_field_count > 0) {
fields_.reserve(fields_.size() + other_field_count);
for (int i = 0; i < other_field_count; i++) {
fields_.push_back((other.fields_)[i]);
fields_.back().DeepCopy((other.fields_)[i]);
fields_.Reserve(fields_.size() + other_field_count);
for (auto elem : other.fields_) {
fields_.Add(elem.DeepCopy(arena()));
}
}
}
@ -71,14 +55,14 @@ void UnknownFieldSet::MergeFrom(const UnknownFieldSet& other) {
// A specialized MergeFrom for performance when we are merging from an UFS that
// is temporary and can be destroyed in the process.
void UnknownFieldSet::MergeFromAndDestroy(UnknownFieldSet* other) {
if (fields_.empty()) {
fields_ = std::move(other->fields_);
if (arena() != other->arena()) {
MergeFrom(*other);
} else if (fields_.empty()) {
fields_.Swap(&other->fields_);
} else {
fields_.insert(fields_.end(),
std::make_move_iterator(other->fields_.begin()),
std::make_move_iterator(other->fields_.end()));
fields_.MergeFrom(other->fields_);
other->fields_.Clear();
}
other->fields_.clear();
}
void UnknownFieldSet::MergeToInternalMetadata(
@ -89,7 +73,7 @@ void UnknownFieldSet::MergeToInternalMetadata(
size_t UnknownFieldSet::SpaceUsedExcludingSelfLong() const {
if (fields_.empty()) return 0;
size_t total_size = sizeof(UnknownField) * fields_.capacity();
size_t total_size = fields_.SpaceUsedExcludingSelfLong();
for (const UnknownField& field : fields_) {
switch (field.type()) {
@ -113,65 +97,54 @@ size_t UnknownFieldSet::SpaceUsedLong() const {
}
void UnknownFieldSet::AddVarint(int number, uint64_t value) {
fields_.emplace_back();
auto& field = fields_.back();
auto& field = *fields_.Add();
field.number_ = number;
field.SetType(UnknownField::TYPE_VARINT);
field.data_.varint_ = value;
}
void UnknownFieldSet::AddFixed32(int number, uint32_t value) {
fields_.emplace_back();
auto& field = fields_.back();
auto& field = *fields_.Add();
field.number_ = number;
field.SetType(UnknownField::TYPE_FIXED32);
field.data_.fixed32_ = value;
}
void UnknownFieldSet::AddFixed64(int number, uint64_t value) {
fields_.emplace_back();
auto& field = fields_.back();
auto& field = *fields_.Add();
field.number_ = number;
field.SetType(UnknownField::TYPE_FIXED64);
field.data_.fixed64_ = value;
}
std::string* UnknownFieldSet::AddLengthDelimited(int number) {
fields_.emplace_back();
auto& field = fields_.back();
auto& field = *fields_.Add();
field.number_ = number;
field.SetType(UnknownField::TYPE_LENGTH_DELIMITED);
field.data_.string_value = new std::string;
field.data_.string_value = Arena::Create<std::string>(arena());
return field.data_.string_value;
}
UnknownFieldSet* UnknownFieldSet::AddGroup(int number) {
fields_.emplace_back();
auto& field = fields_.back();
auto& field = *fields_.Add();
field.number_ = number;
field.SetType(UnknownField::TYPE_GROUP);
field.data_.group_ = new UnknownFieldSet;
field.data_.group_ = Arena::Create<UnknownFieldSet>(arena());
return field.data_.group_;
}
void UnknownFieldSet::AddField(const UnknownField& field) {
fields_.push_back(field);
fields_.back().DeepCopy(field);
fields_.Add(field.DeepCopy(arena()));
}
void UnknownFieldSet::DeleteSubrange(int start, int num) {
// Delete the specified fields.
for (int i = 0; i < num; ++i) {
(fields_)[i + start].Delete();
}
// Slide down the remaining fields.
for (size_t i = start + num; i < fields_.size(); ++i) {
(fields_)[i - num] = (fields_)[i];
}
// Pop off the # of deleted fields.
for (int i = 0; i < num; ++i) {
fields_.pop_back();
if (arena() == nullptr) {
// Delete the specified fields.
for (int i = 0; i < num; ++i) {
(fields_)[i + start].Delete();
}
}
fields_.ExtractSubrange(start, num, nullptr);
}
void UnknownFieldSet::DeleteByNumber(int number) {
@ -179,7 +152,9 @@ void UnknownFieldSet::DeleteByNumber(int number) {
for (size_t i = 0; i < fields_.size(); ++i) {
UnknownField* field = &(fields_)[i];
if (field->number() == number) {
field->Delete();
if (arena() == nullptr) {
field->Delete();
}
} else {
if (i != left) {
(fields_)[left] = (fields_)[i];
@ -187,7 +162,7 @@ void UnknownFieldSet::DeleteByNumber(int number) {
++left;
}
}
fields_.resize(left);
fields_.Truncate(left);
}
bool UnknownFieldSet::MergeFromCodedStream(io::CodedInputStream* input) {
@ -257,21 +232,32 @@ void UnknownField::Delete() {
}
}
void UnknownField::DeepCopy(const UnknownField& other) {
(void)other; // Parameter is used by Google-internal code.
UnknownField UnknownField::DeepCopy(Arena* arena) const {
UnknownField copy = *this;
switch (type()) {
case UnknownField::TYPE_LENGTH_DELIMITED:
data_.string_value = new std::string(*data_.string_value);
copy.data_.string_value =
Arena::Create<std::string>(arena, *data_.string_value);
break;
case UnknownField::TYPE_GROUP: {
UnknownFieldSet* group = new UnknownFieldSet();
group->InternalMergeFrom(*data_.group_);
data_.group_ = group;
UnknownFieldSet* group = Arena::Create<UnknownFieldSet>(arena);
group->MergeFrom(*data_.group_);
copy.data_.group_ = group;
break;
}
default:
break;
}
return copy;
}
void UnknownFieldSet::SwapSlow(UnknownFieldSet* other) {
UnknownFieldSet tmp;
tmp.MergeFrom(*this);
this->Clear();
this->MergeFrom(*other);
other->Clear();
other->MergeFrom(tmp);
}
uint8_t* UnknownField::InternalSerializeLengthDelimitedNoTag(

@ -19,17 +19,19 @@
#include <atomic>
#include <string>
#include <vector>
#include "google/protobuf/stubs/common.h"
#include "absl/log/absl_check.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/arena.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
#include "google/protobuf/message_lite.h"
#include "google/protobuf/metadata_lite.h"
#include "google/protobuf/parse_context.h"
#include "google/protobuf/port.h"
#include "google/protobuf/repeated_field.h"
// Must be included last.
#include "google/protobuf/port_def.inc"
@ -54,7 +56,67 @@ using UFSStringView = const std::string&;
} // namespace internal
class Message; // message.h
class UnknownField; // below
// Represents one field in an UnknownFieldSet.
class PROTOBUF_EXPORT UnknownField {
public:
enum Type {
TYPE_VARINT,
TYPE_FIXED32,
TYPE_FIXED64,
TYPE_LENGTH_DELIMITED,
TYPE_GROUP
};
// The field's field number, as seen on the wire.
inline int number() const;
// The field type.
inline Type type() const;
// Accessors -------------------------------------------------------
// Each method works only for UnknownFields of the corresponding type.
inline uint64_t varint() const;
inline uint32_t fixed32() const;
inline uint64_t fixed64() const;
inline internal::UFSStringView length_delimited() const;
inline const UnknownFieldSet& group() const;
inline void set_varint(uint64_t value);
inline void set_fixed32(uint32_t value);
inline void set_fixed64(uint64_t value);
inline void set_length_delimited(absl::string_view value);
inline std::string* mutable_length_delimited();
inline UnknownFieldSet* mutable_group();
inline size_t GetLengthDelimitedSize() const;
uint8_t* InternalSerializeLengthDelimitedNoTag(
uint8_t* target, io::EpsCopyOutputStream* stream) const;
private:
friend class UnknownFieldSet;
// If this UnknownField contains a pointer, delete it.
void Delete();
// Make a deep copy of any pointers in this UnknownField.
UnknownField DeepCopy(Arena* arena) const;
// Set the wire type of this UnknownField. Should only be used when this
// UnknownField is being created.
inline void SetType(Type type);
uint32_t number_;
uint32_t type_;
union {
uint64_t varint_;
uint32_t fixed32_;
uint64_t fixed64_;
std::string* string_value;
UnknownFieldSet* group_;
} data_;
};
// An UnknownFieldSet contains fields that were encountered while parsing a
// message but were not defined by its type. Keeping track of these can be
@ -70,7 +132,7 @@ class UnknownField; // below
// the Reflection interface which is independent of any serialization scheme.
class PROTOBUF_EXPORT UnknownFieldSet {
public:
UnknownFieldSet();
constexpr UnknownFieldSet();
UnknownFieldSet(const UnknownFieldSet&) = delete;
UnknownFieldSet& operator=(const UnknownFieldSet&) = delete;
~UnknownFieldSet();
@ -167,13 +229,20 @@ class PROTOBUF_EXPORT UnknownFieldSet {
bool SerializeToCodedStream(io::CodedOutputStream* output) const;
static const UnknownFieldSet& default_instance();
UnknownFieldSet(internal::InternalVisibility, Arena* arena)
: UnknownFieldSet(arena) {}
private:
// For InternalMergeFrom
friend class UnknownField;
// Merges from other UnknownFieldSet. This method assumes, that this object
// is newly created and has no fields.
void InternalMergeFrom(const UnknownFieldSet& other);
using InternalArenaConstructable_ = void;
using DestructorSkippable_ = void;
friend class google::protobuf::Arena;
explicit UnknownFieldSet(Arena* arena) : fields_(arena) {}
Arena* arena() { return fields_.GetArena(); }
void ClearFallback();
void SwapSlow(UnknownFieldSet* other);
template <typename MessageType,
typename std::enable_if<
@ -196,7 +265,7 @@ class PROTOBUF_EXPORT UnknownFieldSet {
return MergeFromCodedStream(&coded_stream);
}
std::vector<UnknownField> fields_;
RepeatedField<UnknownField> fields_;
};
namespace internal {
@ -218,74 +287,19 @@ const char* UnknownFieldParse(uint64_t tag, UnknownFieldSet* unknown,
} // namespace internal
// Represents one field in an UnknownFieldSet.
class PROTOBUF_EXPORT UnknownField {
public:
enum Type {
TYPE_VARINT,
TYPE_FIXED32,
TYPE_FIXED64,
TYPE_LENGTH_DELIMITED,
TYPE_GROUP
};
// The field's field number, as seen on the wire.
inline int number() const;
// The field type.
inline Type type() const;
// Accessors -------------------------------------------------------
// Each method works only for UnknownFields of the corresponding type.
inline uint64_t varint() const;
inline uint32_t fixed32() const;
inline uint64_t fixed64() const;
inline internal::UFSStringView length_delimited() const;
inline const UnknownFieldSet& group() const;
inline void set_varint(uint64_t value);
inline void set_fixed32(uint32_t value);
inline void set_fixed64(uint64_t value);
inline void set_length_delimited(absl::string_view value);
inline std::string* mutable_length_delimited();
inline UnknownFieldSet* mutable_group();
inline size_t GetLengthDelimitedSize() const;
uint8_t* InternalSerializeLengthDelimitedNoTag(
uint8_t* target, io::EpsCopyOutputStream* stream) const;
private:
friend class UnknownFieldSet;
// If this UnknownField contains a pointer, delete it.
void Delete();
// Make a deep copy of any pointers in this UnknownField.
void DeepCopy(const UnknownField& other);
// Set the wire type of this UnknownField. Should only be used when this
// UnknownField is being created.
inline void SetType(Type type);
uint32_t number_;
uint32_t type_;
union {
uint64_t varint_;
uint32_t fixed32_;
uint64_t fixed64_;
std::string* string_value;
UnknownFieldSet* group_;
} data_;
};
// ===================================================================
// inline implementations
inline UnknownFieldSet::UnknownFieldSet() {}
constexpr UnknownFieldSet::UnknownFieldSet() = default;
inline UnknownFieldSet::~UnknownFieldSet() { Clear(); }
inline const UnknownFieldSet& UnknownFieldSet::default_instance() {
PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT static const UnknownFieldSet
instance;
return instance;
}
inline void UnknownFieldSet::ClearAndFreeMemory() { Clear(); }
inline void UnknownFieldSet::Clear() {
@ -297,7 +311,12 @@ inline void UnknownFieldSet::Clear() {
inline bool UnknownFieldSet::empty() const { return fields_.empty(); }
inline void UnknownFieldSet::Swap(UnknownFieldSet* x) {
fields_.swap(x->fields_);
if (arena() == x->arena()) {
fields_.Swap(&x->fields_);
} else {
// We might need to do a deep copy, so use Merge instead
SwapSlow(x);
}
}
inline int UnknownFieldSet::field_count() const {
@ -378,6 +397,22 @@ inline size_t UnknownField::GetLengthDelimitedSize() const {
inline void UnknownField::SetType(Type type) { type_ = type; }
namespace internal {
// Add specialization of InternalMetadata::Container to provide arena support.
template <>
struct InternalMetadata::Container<UnknownFieldSet>
: public InternalMetadata::ContainerBase {
UnknownFieldSet unknown_fields;
explicit Container(Arena* input_arena)
: unknown_fields(InternalVisibility{}, input_arena) {}
using InternalArenaConstructable_ = void;
using DestructorSkippable_ = void;
};
} // namespace internal
} // namespace protobuf
} // namespace google

@ -175,6 +175,94 @@ TEST_F(UnknownFieldSetTest, Group) {
EXPECT_EQ(all_fields_.optionalgroup().a(), nested_field.varint());
}
static void PopulateUFS(UnknownFieldSet& set) {
UnknownFieldSet* node = &set;
for (int i = 0; i < 3; ++i) {
node->AddVarint(1, 100);
const char* long_str = "This is a very long string, not sso";
node->AddLengthDelimited(2, long_str);
*node->AddLengthDelimited(3) = long_str;
// Test some recursion too.
node = node->AddGroup(4);
}
}
TEST_F(UnknownFieldSetTest, ArenaSupportWorksWithMergeFrom) {
Arena arena;
for (bool lhs_arena : {false, true}) {
for (bool rhs_arena : {false, true}) {
UnknownFieldSet lhs_stack, rhs_stack;
auto& lhs =
lhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : lhs_stack;
auto& rhs =
rhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : rhs_stack;
PopulateUFS(rhs);
lhs.MergeFrom(rhs);
}
}
}
TEST_F(UnknownFieldSetTest, ArenaSupportWorksWithMergeAndDestroy) {
Arena arena;
for (bool lhs_arena : {false, true}) {
for (bool populate_lhs : {false, true}) {
for (bool rhs_arena : {false, true}) {
for (bool populate_rhs : {false, true}) {
UnknownFieldSet lhs_stack, rhs_stack;
auto& lhs =
lhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : lhs_stack;
auto& rhs =
rhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : rhs_stack;
if (populate_lhs) PopulateUFS(lhs);
if (populate_rhs) PopulateUFS(rhs);
lhs.MergeFromAndDestroy(&rhs);
}
}
}
}
}
TEST_F(UnknownFieldSetTest, ArenaSupportWorksWithSwap) {
Arena arena;
for (bool lhs_arena : {false, true}) {
for (bool rhs_arena : {false, true}) {
UnknownFieldSet lhs_stack, rhs_stack;
auto& lhs =
lhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : lhs_stack;
auto& rhs =
rhs_arena ? *Arena::Create<UnknownFieldSet>(&arena) : rhs_stack;
PopulateUFS(lhs);
lhs.Swap(&rhs);
}
}
}
TEST_F(UnknownFieldSetTest, ArenaSupportWorksWithClear) {
Arena arena;
auto* ufs = Arena::Create<UnknownFieldSet>(&arena);
PopulateUFS(*ufs);
// Clear should not try to delete memory from the arena.
ufs->Clear();
}
TEST_F(UnknownFieldSetTest, ArenaSupportWorksDelete) {
Arena arena;
auto* ufs = Arena::Create<UnknownFieldSet>(&arena);
PopulateUFS(*ufs);
while (ufs->field_count() != 0) {
ufs->DeleteByNumber(ufs->field(0).number());
}
ufs = Arena::Create<UnknownFieldSet>(&arena);
PopulateUFS(*ufs);
ufs->DeleteSubrange(0, ufs->field_count());
}
TEST_F(UnknownFieldSetTest, SerializeFastAndSlowAreEquivalent) {
int size =
WireFormat::ComputeUnknownFieldsSize(empty_message_.unknown_fields());
@ -516,13 +604,15 @@ TEST_F(UnknownFieldSetTest, UnknownEnumValue) {
TEST_F(UnknownFieldSetTest, SpaceUsedExcludingSelf) {
UnknownFieldSet empty;
empty.AddVarint(1, 0);
EXPECT_EQ(sizeof(UnknownField), empty.SpaceUsedExcludingSelf());
RepeatedField<UnknownField> rep;
rep.Add();
EXPECT_EQ(rep.SpaceUsedExcludingSelf(), empty.SpaceUsedExcludingSelf());
}
TEST_F(UnknownFieldSetTest, SpaceUsed) {
// Keep shadow vectors to avoid making assumptions about its capacity growth.
// We imitate the push back calls here to determine the expected capacity.
std::vector<UnknownField> shadow_vector, shadow_vector_group;
RepeatedField<UnknownField> shadow_vector, shadow_vector_group;
unittest::TestEmptyMessage empty_message;
// Make sure an unknown field set has zero space used until a field is
@ -532,8 +622,8 @@ TEST_F(UnknownFieldSetTest, SpaceUsed) {
UnknownFieldSet* group = nullptr;
const auto total = [&] {
size_t result = base;
result += shadow_vector.capacity() * sizeof(UnknownField);
result += shadow_vector_group.capacity() * sizeof(UnknownField);
result += shadow_vector.SpaceUsedExcludingSelfLong();
result += shadow_vector_group.SpaceUsedExcludingSelfLong();
if (str != nullptr) {
result += sizeof(std::string);
static const size_t sso_capacity = std::string().capacity();
@ -550,26 +640,26 @@ TEST_F(UnknownFieldSetTest, SpaceUsed) {
// Make sure each thing we add to the set increases the SpaceUsedLong().
unknown_fields->AddVarint(1, 0);
shadow_vector.emplace_back();
shadow_vector.Add();
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Var";
str = unknown_fields->AddLengthDelimited(1);
shadow_vector.emplace_back();
shadow_vector.Add();
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Str";
str->assign(sizeof(std::string) + 1, 'x');
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Str2";
group = unknown_fields->AddGroup(1);
shadow_vector.emplace_back();
shadow_vector.Add();
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Group";
group->AddVarint(1, 0);
shadow_vector_group.emplace_back();
shadow_vector_group.Add();
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Group2";
unknown_fields->AddVarint(1, 0);
shadow_vector.emplace_back();
shadow_vector.Add();
EXPECT_EQ(total(), empty_message.SpaceUsedLong()) << "Var2";
}

Loading…
Cancel
Save