From 6b22fd125ce20f56e89d0f207e36383d19a103fd Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Tue, 17 Sep 2024 10:56:59 -0700 Subject: [PATCH] Add on-demand construction of enum validation data in EnumDescriptor. This can be used by dynamic messages to validate enums instead of going through reflection. This removes one of the few cases where dynamic messages have to fall back to reflection during table-driven parsing. PiperOrigin-RevId: 675630939 --- src/google/protobuf/descriptor.cc | 31 +++++++++++++++++ src/google/protobuf/descriptor.h | 6 +++- .../protobuf/generated_message_reflection.cc | 34 +++++++------------ 3 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index 97d938997a..d3c604e096 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -33,6 +33,7 @@ #include #include +#include "absl/algorithm/container.h" #include "absl/base/attributes.h" #include "absl/base/call_once.h" #include "absl/base/casts.h" @@ -72,6 +73,7 @@ #include "google/protobuf/descriptor_visitor.h" #include "google/protobuf/dynamic_message.h" #include "google/protobuf/feature_resolver.h" +#include "google/protobuf/generated_enum_util.h" #include "google/protobuf/generated_message_util.h" #include "google/protobuf/io/strtod.h" #include "google/protobuf/io/tokenizer.h" @@ -2585,6 +2587,33 @@ const FieldDescriptor* Descriptor::map_value() const { return field(1); } +const uint32_t* EnumDescriptor::GetValidatorData() const { + // We generate the data on demand to delay the memory usage until we actually + // need it. + if (auto* p = validator_data_.load(std::memory_order_acquire)) return p; + + std::vector values; + for (const auto& v : absl::MakeSpan(values_, value_count_)) { + values.push_back(v.number()); + } + absl::c_sort(values); + values.erase(std::unique(values.begin(), values.end()), values.end()); + auto data = internal::GenerateEnumData(values); + + absl::MutexLockMaybe lock(file_->pool_->mutex_); + + // Do the check again under the mutex to avoid wasting memory in case someone + // raced us. The memory is held by the pool, so "leaking" it can add up. + // But we do the preparation work above to not put all of that under the lock. + if (auto* p = validator_data_.load(std::memory_order_acquire)) return p; + + uint32_t* owned_data = static_cast( + file_->pool_->tables_->AllocateBytes(data.size() * sizeof(uint32_t))); + memcpy(owned_data, data.data(), data.size() * sizeof(uint32_t)); + validator_data_.store(owned_data, std::memory_order_release); + return owned_data; +} + const EnumValueDescriptor* EnumDescriptor::FindValueByName( absl::string_view name) const { return file()->tables_->FindNestedSymbol(this, name).enum_value_descriptor(); @@ -5166,6 +5195,7 @@ Symbol DescriptorPool::NewPlaceholderWithMutexHeld( placeholder_enum->merged_features_ = &FeatureSet::default_instance(); placeholder_enum->is_placeholder_ = true; placeholder_enum->is_unqualified_placeholder_ = (name[0] != '.'); + placeholder_enum->validator_data_.store(nullptr, std::memory_order_relaxed); // Enums must have at least one value. placeholder_enum->value_count_ = 1; @@ -7024,6 +7054,7 @@ void DescriptorBuilder::BuildEnum(const EnumDescriptorProto& proto, result->containing_type_ = parent; result->is_placeholder_ = false; result->is_unqualified_placeholder_ = false; + result->validator_data_.store(nullptr, std::memory_order_relaxed); if (proto.value_size() == 0) { // We cannot allow enums with no values because this would mean there diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index d65028a962..532b51d03b 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -1455,6 +1455,8 @@ class PROTOBUF_EXPORT EnumDescriptor : private internal::SymbolBase { // to this descriptor from the file root. void GetLocationPath(std::vector* output) const; + const uint32_t* GetValidatorData() const; + // True if this is a placeholder for an unknown type. bool is_placeholder_ : 1; // True if this is a placeholder and the type name wasn't fully-qualified. @@ -1485,6 +1487,8 @@ class PROTOBUF_EXPORT EnumDescriptor : private internal::SymbolBase { EnumDescriptor::ReservedRange* reserved_ranges_; const std::string** reserved_names_; + mutable std::atomic validator_data_; + // IMPORTANT: If you add a new field, make sure to search for all instances // of Allocate() and AllocateArray() in // descriptor.cc and update them to initialize the field. @@ -1501,7 +1505,7 @@ class PROTOBUF_EXPORT EnumDescriptor : private internal::SymbolBase { friend class Reflection; }; -PROTOBUF_INTERNAL_CHECK_CLASS_SIZE(EnumDescriptor, 88); +PROTOBUF_INTERNAL_CHECK_CLASS_SIZE(EnumDescriptor, 96); // Describes an individual enum constant of a particular type. To get the // EnumValueDescriptor for a given enum value, first get the EnumDescriptor diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index ea5132129b..f61ceb3056 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -3306,29 +3306,18 @@ void Reflection::PopulateTcParseEntries( TcParseTableBase::FieldEntry* entries) const { for (const auto& entry : table_info.field_entries) { const FieldDescriptor* field = entry.field; - if (field->type() == field->TYPE_ENUM && - (entry.type_card & internal::field_layout::kTvMask) == - internal::field_layout::kTvEnum && - table_info.aux_entries[entry.aux_idx].type == - internal::TailCallTableInfo::kEnumValidator) { - // Mini parse can't handle it. Fallback to reflection. - *entries = {}; - table_info.aux_entries[entry.aux_idx] = {}; + const OneofDescriptor* oneof = field->real_containing_oneof(); + entries->offset = schema_.GetFieldOffset(field); + if (oneof != nullptr) { + entries->has_idx = schema_.oneof_case_offset_ + 4 * oneof->index(); + } else if (schema_.HasHasbits()) { + entries->has_idx = + static_cast(8 * schema_.HasBitsOffset() + entry.hasbit_idx); } else { - const OneofDescriptor* oneof = field->real_containing_oneof(); - entries->offset = schema_.GetFieldOffset(field); - if (oneof != nullptr) { - entries->has_idx = schema_.oneof_case_offset_ + 4 * oneof->index(); - } else if (schema_.HasHasbits()) { - entries->has_idx = - static_cast(8 * schema_.HasBitsOffset() + entry.hasbit_idx); - } else { - entries->has_idx = 0; - } - entries->aux_idx = entry.aux_idx; - entries->type_card = entry.type_card; + entries->has_idx = 0; } - + entries->aux_idx = entry.aux_idx; + entries->type_card = entry.type_card; ++entries; } } @@ -3373,7 +3362,8 @@ void Reflection::PopulateTcParseFieldAux( aux_entry.enum_range.size}; break; case internal::TailCallTableInfo::kEnumValidator: - ABSL_LOG(FATAL) << "Not supported."; + field_aux++->enum_data = + aux_entry.field->enum_type()->GetValidatorData(); break; case internal::TailCallTableInfo::kNumericOffset: field_aux++->offset = aux_entry.offset;