From 783f555ad1e2e45e076b5c47c8ef4d407bbd5354 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Wed, 9 Aug 2023 12:19:25 -0700 Subject: [PATCH] Do eager parsing when building descriptors. Lazy parsing can use reflection to verify consistency, and reflection while building descriptors causes deadlock. PiperOrigin-RevId: 555238461 --- src/google/protobuf/descriptor.cc | 19 ++++++++++++++----- src/google/protobuf/descriptor.h | 10 ++++++++++ src/google/protobuf/descriptor_database.cc | 4 +++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index d41b38a79a..f187bd372a 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -81,6 +81,7 @@ #include "google/protobuf/generated_message_util.h" #include "google/protobuf/io/strtod.h" #include "google/protobuf/io/tokenizer.h" +#include "google/protobuf/parse_context.h" #include "google/protobuf/port.h" #include "google/protobuf/repeated_ptr_field.h" #include "google/protobuf/text_format.h" @@ -5249,11 +5250,9 @@ typename DescriptorT::OptionsType* DescriptorBuilder::AllocateOptionsImpl( return nullptr; } - // Avoid using MergeFrom()/CopyFrom() in this class to make it -fno-rtti - // friendly. Without RTTI, MergeFrom() and CopyFrom() will fallback to the - // reflection based method, which requires the Descriptor. However, we are in - // the middle of building the descriptors, thus the deadlock. - options->ParseFromString(orig_options.SerializeAsString()); + const bool parse_success = + internal::ParseNoReflection(orig_options.SerializeAsString(), *options); + ABSL_DCHECK(parse_success); // Don't add to options_to_interpret_ unless there were uninterpreted // options. This not only avoids unnecessary work, but prevents a @@ -9424,6 +9423,16 @@ void LazyDescriptor::Once(const ServiceDescriptor* service) { } } +bool ParseNoReflection(absl::string_view from, google::protobuf::MessageLite& to) { + to.Clear(); + const char* ptr; + internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + false, &ptr, from); + ptr = to._InternalParse(ptr, &ctx); + if (ptr == nullptr || !ctx.EndedAtLimit()) return false; + return to.IsInitializedWithErrors(); +} + namespace cpp { bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) { if (field->legacy_enum_field_treated_as_closed()) { diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index 75fd4781f0..2e659d9bd2 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -123,6 +123,9 @@ class UninterpretedOption; class FeatureSet; class SourceCodeInfo; +// Defined in message_lite.h +class MessageLite; + // Defined in message.h class Message; class Reflection; @@ -2830,6 +2833,13 @@ struct FieldRangeImpl { const T* descriptor; }; +// While building descriptors, we need to avoid using MergeFrom()/CopyFrom() to +// be -fno-rtti friendly. Without RTTI, MergeFrom() and CopyFrom() will fallback +// to the reflection based method, which requires the Descriptor. However, while +// building the descriptors, this causes deadlock. We also must disable lazy +// parsing because that uses reflection to verify consistency. +bool ParseNoReflection(absl::string_view from, google::protobuf::MessageLite& to); + // The context for these functions under `cpp` is "for the C++ implementation". // In particular, questions like "does this field have a has bit?" have a // different answer depending on the language. diff --git a/src/google/protobuf/descriptor_database.cc b/src/google/protobuf/descriptor_database.cc index 4616c04668..0ce378b21e 100644 --- a/src/google/protobuf/descriptor_database.cc +++ b/src/google/protobuf/descriptor_database.cc @@ -888,7 +888,9 @@ bool EncodedDescriptorDatabase::FindAllFileNames( bool EncodedDescriptorDatabase::MaybeParse( std::pair encoded_file, FileDescriptorProto* output) { if (encoded_file.first == nullptr) return false; - return output->ParseFromArray(encoded_file.first, encoded_file.second); + absl::string_view source(static_cast(encoded_file.first), + encoded_file.second); + return internal::ParseNoReflection(source, *output); } EncodedDescriptorDatabase::EncodedDescriptorDatabase()