From 2dd03a570a385dbee141c657e5be0ac33061dfc7 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Thu, 9 Mar 2023 10:11:54 -0800 Subject: [PATCH] Internal change PiperOrigin-RevId: 515369480 --- .../compiler/cpp/parse_function_generator.cc | 39 ++++++--- .../protobuf/generated_message_reflection.cc | 12 ++- .../protobuf/generated_message_tctable_decl.h | 5 ++ .../protobuf/generated_message_tctable_gen.cc | 82 +++++++++++++------ .../protobuf/generated_message_tctable_gen.h | 8 +- .../protobuf/generated_message_tctable_impl.h | 13 ++- .../generated_message_tctable_lite.cc | 19 ++++- 7 files changed, 132 insertions(+), 46 deletions(-) diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index 864ea45b5b..5b365cb0ae 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -90,14 +90,23 @@ class ParseFunctionGenerator::GeneratedOptionProvider final explicit GeneratedOptionProvider(ParseFunctionGenerator* gen) : gen_(gen) {} TailCallTableInfo::PerFieldOptions GetForField( const FieldDescriptor* field) const final { - return {IsLazy(field, gen_->options_, gen_->scc_analyzer_), - IsStringInlined(field, gen_->options_), - IsImplicitWeakField(field, gen_->options_, gen_->scc_analyzer_), - UseDirectTcParserTable(field, gen_->options_), - GetOptimizeFor(field->file(), gen_->options_) == - FileOptions::LITE_RUNTIME, - ShouldSplit(field, gen_->options_), - true}; + const auto verify_flag = [&] { + if (IsEagerlyVerifiedLazy(field, gen_->options_, gen_->scc_analyzer_)) + return internal::field_layout::kTvEager; + if (IsLazilyVerifiedLazy(field, gen_->options_)) + return internal::field_layout::kTvLazy; + return internal::field_layout::TransformValidation{}; + }; + return { + verify_flag(), + IsStringInlined(field, gen_->options_), + IsImplicitWeakField(field, gen_->options_, gen_->scc_analyzer_), + UseDirectTcParserTable(field, gen_->options_), + GetOptimizeFor(field->file(), gen_->options_) == + FileOptions::LITE_RUNTIME, + ShouldSplit(field, gen_->options_), + /* uses_codegen */ true, + }; } private: @@ -605,6 +614,11 @@ void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) { QualifiedDefaultInstancePtr( aux_entry.field->message_type(), options_)); break; + case TailCallTableInfo::kMessageVerifyFunc: + format("{$1$::InternalVerify},\n", + QualifiedClassName(aux_entry.field->message_type(), + options_)); + break; case TailCallTableInfo::kEnumRange: format("{$1$, $2$},\n", aux_entry.enum_range.start, aux_entry.enum_range.size); @@ -763,13 +777,16 @@ static void FormatFieldKind(Formatter& format, format(" | ::_fl::kRep$1$", rep); } - static constexpr const char* kXFormNames[] = {nullptr, "Default", "Table", - "WeakPtr"}; + static constexpr const char* kXFormNames[2][4] = { + {nullptr, "Default", "Table", "WeakPtr"}, {nullptr, "Eager", "Lazy"}}; + static_assert((fl::kTvDefault >> fl::kTvShift) == 1, ""); static_assert((fl::kTvTable >> fl::kTvShift) == 2, ""); static_assert((fl::kTvWeakPtr >> fl::kTvShift) == 3, ""); + static_assert((fl::kTvEager >> fl::kTvShift) == 1, ""); + static_assert((fl::kTvLazy >> fl::kTvShift) == 2, ""); - if (auto* xform = kXFormNames[tv_index]) { + if (auto* xform = kXFormNames[rep_index == 2][tv_index]) { format(" | ::_fl::kTv$1$", xform); } break; diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index 6fab21493d..74ab33ec5b 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -3087,6 +3087,7 @@ void Reflection::PopulateTcParseFieldAux( case internal::TailCallTableInfo::kSubTable: case internal::TailCallTableInfo::kSubMessageWeak: case internal::TailCallTableInfo::kCreateInArena: + case internal::TailCallTableInfo::kMessageVerifyFunc: ABSL_LOG(FATAL) << "Not supported"; break; case internal::TailCallTableInfo::kMapAuxInfo: @@ -3148,9 +3149,16 @@ const internal::TcParseTableBase* Reflection::CreateTcParseTable() const { explicit ReflectionOptionProvider(const Reflection& ref) : ref_(ref) {} internal::TailCallTableInfo::PerFieldOptions GetForField( const FieldDescriptor* field) const final { + const auto verify_flag = [&] { + if (ref_.IsEagerlyVerifiedLazyField(field)) + return internal::field_layout::kTvEager; + if (ref_.IsLazilyVerifiedLazyField(field)) + return internal::field_layout::kTvLazy; + return internal::field_layout::TransformValidation{}; + }; return { - ref_.IsLazyField(field), // - ref_.IsInlined(field), // + verify_flag(), // + ref_.IsInlined(field), // // Only LITE can be implicitly weak. /* is_implicitly_weak */ false, diff --git a/src/google/protobuf/generated_message_tctable_decl.h b/src/google/protobuf/generated_message_tctable_decl.h index f9affbe1b3..257a99d55a 100644 --- a/src/google/protobuf/generated_message_tctable_decl.h +++ b/src/google/protobuf/generated_message_tctable_decl.h @@ -375,6 +375,8 @@ struct alignas(uint64_t) TcParseTableBase { int32_t has_idx; // has-bit index, relative to the message object uint16_t aux_idx; // index for `field_aux`. uint16_t type_card; // `FieldType` and `Cardinality` (see _impl.h) + + static constexpr uint16_t kNoAuxIdx = 0xFFFF; }; // Returns a begin iterator (pointer) to the start of the field entries array. @@ -402,6 +404,8 @@ struct alignas(uint64_t) TcParseTableBase { constexpr FieldAux(MapAuxInfo map_info) : map_info(map_info) {} constexpr FieldAux(void (*create_in_arena)(Arena*, void*)) : create_in_arena(create_in_arena) {} + constexpr FieldAux(LazyEagerVerifyFnType verify_func) + : verify_func(verify_func) {} bool (*enum_validator)(int); struct { int16_t start; // minimum enum number (if it fits) @@ -412,6 +416,7 @@ struct alignas(uint64_t) TcParseTableBase { const TcParseTableBase* table; MapAuxInfo map_info; void (*create_in_arena)(Arena*, void*); + LazyEagerVerifyFnType verify_func; const MessageLite* message_default() const { return static_cast(message_default_p); diff --git a/src/google/protobuf/generated_message_tctable_gen.cc b/src/google/protobuf/generated_message_tctable_gen.cc index 987b669336..97b010ce09 100644 --- a/src/google/protobuf/generated_message_tctable_gen.cc +++ b/src/google/protobuf/generated_message_tctable_gen.cc @@ -112,6 +112,15 @@ EnumRangeInfo GetEnumRangeInfo(const FieldDescriptor* field, return EnumRangeInfo::kContiguous; } +// options.lazy_opt might be on for fields that don't really support lazy, so we +// make sure we only use lazy rep for singular TYPE_MESSAGE fields. +// We can't trust the `lazy=true` annotation. +bool HasLazyRep(const FieldDescriptor* field, + const TailCallTableInfo::PerFieldOptions options) { + return field->type() == field->TYPE_MESSAGE && !field->is_repeated() && + options.lazy_opt != 0; +} + void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, const TailCallTableInfo::PerFieldOptions& options, TailCallTableInfo::FastFieldInfo& info) { @@ -135,11 +144,6 @@ void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, : options.is_string_inlined ? PROTOBUF_PICK_FUNCTION(fn##iS) \ : PROTOBUF_PICK_REPEATABLE_FUNCTION(fn)) -#define PROTOBUF_PICK_MESSAGE_FUNCTION(fn) \ - (options.use_direct_tcparser_table \ - ? PROTOBUF_PICK_REPEATABLE_FUNCTION(fn##t) \ - : PROTOBUF_PICK_REPEATABLE_FUNCTION(fn##d)) - const FieldDescriptor* field = entry.field; info.aux_idx = static_cast(entry.aux_idx); if (field->type() == FieldDescriptor::TYPE_BYTES || @@ -216,10 +220,16 @@ void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, } break; case FieldDescriptor::TYPE_MESSAGE: - picked = PROTOBUF_PICK_MESSAGE_FUNCTION(kFastM); + picked = + (HasLazyRep(field, options) ? PROTOBUF_PICK_SINGLE_FUNCTION(kFastMl) + : options.use_direct_tcparser_table + ? PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastMt) + : PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastMd)); break; case FieldDescriptor::TYPE_GROUP: - picked = PROTOBUF_PICK_MESSAGE_FUNCTION(kFastG); + picked = (options.use_direct_tcparser_table + ? PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastGt) + : PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastGd)); break; } @@ -232,7 +242,6 @@ void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry, #undef PROTOBUF_PICK_REPEATABLE_FUNCTION #undef PROTOBUF_PICK_PACKABLE_FUNCTION #undef PROTOBUF_PICK_STRING_FUNCTION -#undef PROTOBUF_PICK_MESSAGE_FUNCTION } bool IsFieldEligibleForFastParsing( @@ -243,7 +252,17 @@ bool IsFieldEligibleForFastParsing( // Map, oneof, weak, and lazy fields are not handled on the fast path. if (field->is_map() || field->real_containing_oneof() || field->options().weak() || options.is_implicitly_weak || - options.is_lazy || options.should_split) { + options.should_split) { + return false; + } + + if (HasLazyRep(field, options) && !options.uses_codegen) { + // Can't use TDP on lazy fields if we can't do codegen. + return false; + } + + if (HasLazyRep(field, options) && options.lazy_opt == field_layout::kTvLazy) { + // We only support eagerly verified lazy fields in the fast path. return false; } @@ -391,13 +410,10 @@ std::vector SplitFastFieldsForSize( // Filter out fields that will be handled by mini parsing. std::vector FilterMiniParsedFields( const std::vector& fields, - const TailCallTableInfo::OptionProvider& option_provider -) { + const TailCallTableInfo::OptionProvider& option_provider) { std::vector generated_fallback_fields; for (const auto* field : fields) { - auto options = option_provider.GetForField(field); - bool handled = false; switch (field->type()) { case FieldDescriptor::TYPE_DOUBLE: @@ -425,7 +441,7 @@ std::vector FilterMiniParsedFields( case FieldDescriptor::TYPE_MESSAGE: case FieldDescriptor::TYPE_GROUP: // TODO(b/210762816): support remaining field types. - if (field->options().weak() || options.is_lazy) { + if (field->options().weak()) { handled = false; } else { handled = true; @@ -709,16 +725,18 @@ uint16_t MakeTypeCardForField( type_card |= fl::kMap; } else { type_card |= fl::kMessage; - if (options.is_lazy) { - type_card |= fl::kRepLazy; - } - - if (options.is_implicitly_weak) { - type_card |= fl::kTvWeakPtr; - } else if (options.use_direct_tcparser_table) { - type_card |= fl::kTvTable; + if (HasLazyRep(field, options)) { + ABSL_CHECK(options.lazy_opt == field_layout::kTvEager || + options.lazy_opt == field_layout::kTvLazy); + type_card |= +fl::kRepLazy | options.lazy_opt; } else { - type_card |= fl::kTvDefault; + if (options.is_implicitly_weak) { + type_card |= fl::kTvWeakPtr; + } else if (options.use_direct_tcparser_table) { + type_card |= fl::kTvTable; + } else { + type_card |= fl::kTvDefault; + } } } break; @@ -799,8 +817,19 @@ TailCallTableInfo::TailCallTableInfo( } else if (field->options().weak()) { // Don't generate anything for weak fields. They are handled by the // generated fallback. - } else if (options.is_lazy) { - // Lazy fields are handled by the generated fallback function. + } else if (HasLazyRep(field, options)) { + if (options.uses_codegen) { + field_entries.back().aux_idx = aux_entries.size(); + aux_entries.push_back({kSubMessage, {field}}); + if (options.lazy_opt == field_layout::kTvEager) { + aux_entries.push_back({kMessageVerifyFunc, {field}}); + } else { + aux_entries.push_back({kNothing}); + } + } else { + field_entries.back().aux_idx = + TcParseTableBase::FieldEntry::kNoAuxIdx; + } } else { field_entries.back().aux_idx = aux_entries.size(); aux_entries.push_back({options.is_implicitly_weak ? kSubMessageWeak @@ -891,8 +920,7 @@ TailCallTableInfo::TailCallTableInfo( // Filter out fields that are handled by MiniParse. We don't need to generate // a fallback for these, which saves code size. - fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider - ); + fallback_fields = FilterMiniParsedFields(ordered_fields, option_provider); num_to_entry_table = MakeNumToEntryTable(ordered_fields); ABSL_CHECK_EQ(field_entries.size(), ordered_fields.size()); diff --git a/src/google/protobuf/generated_message_tctable_gen.h b/src/google/protobuf/generated_message_tctable_gen.h index 3f5f7ec13a..2ef7c58536 100644 --- a/src/google/protobuf/generated_message_tctable_gen.h +++ b/src/google/protobuf/generated_message_tctable_gen.h @@ -50,10 +50,15 @@ namespace google { namespace protobuf { namespace internal { +namespace field_layout { +enum TransformValidation : uint16_t; +} // namespace field_layout + // Helper class for generating tailcall parsing functions. struct PROTOBUF_EXPORT TailCallTableInfo { struct PerFieldOptions { - bool is_lazy; + // kTvEager, kTvLazy, or 0 + field_layout::TransformValidation lazy_opt; bool is_string_inlined; bool is_implicitly_weak; bool use_direct_tcparser_table; @@ -104,6 +109,7 @@ struct PROTOBUF_EXPORT TailCallTableInfo { kSubMessage, kSubTable, kSubMessageWeak, + kMessageVerifyFunc, kEnumRange, kEnumValidator, kNumericOffset, diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index 2527fd0819..2551b7a73a 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -173,6 +173,10 @@ enum TransformValidation : uint16_t { kTvDefault = 1 << kTvShift, // Aux has default_instance* kTvTable = 2 << kTvShift, // Aux has TcParseTableBase* kTvWeakPtr = 3 << kTvShift, // Aux has default_instance** (for weak) + + // Lazy message fields: + kTvEager = 1 << kTvShift, + kTvLazy = 2 << kTvShift, }; static_assert((kTvEnum & kTvRange) != 0, @@ -367,6 +371,7 @@ inline void AlignFail(std::integral_constant, PROTOBUF_TC_PARSE_FUNCTION_LIST_REPEATED(FastGt) \ PROTOBUF_TC_PARSE_FUNCTION_LIST_REPEATED(FastMd) \ PROTOBUF_TC_PARSE_FUNCTION_LIST_REPEATED(FastMt) \ + PROTOBUF_TC_PARSE_FUNCTION_LIST_SINGLE(FastMl) \ PROTOBUF_TC_PARSE_FUNCTION_LIST_END_GROUP() #define PROTOBUF_TC_PARSE_FUNCTION_X(value) k##value, @@ -547,7 +552,7 @@ class PROTOBUF_EXPORT TcParser final { // Functions referenced by generated fast tables (message types): // M: message G: group - // d: default* t: TcParseTable* (the contents of aux) + // d: default* t: TcParseTable* (the contents of aux) l: lazy // S: singular R: repeated // 1/2: tag length (bytes) static const char* FastMdS1(PROTOBUF_TC_PARAM_DECL); @@ -568,6 +573,9 @@ class PROTOBUF_EXPORT TcParser final { static const char* FastGtR1(PROTOBUF_TC_PARAM_DECL); static const char* FastGtR2(PROTOBUF_TC_PARAM_DECL); + static const char* FastMlS1(PROTOBUF_TC_PARAM_DECL); + static const char* FastMlS2(PROTOBUF_TC_PARAM_DECL); + template static inline T& RefAt(void* x, size_t offset) { T* target = reinterpret_cast(static_cast(x) + offset); @@ -672,6 +680,8 @@ class PROTOBUF_EXPORT TcParser final { static inline const char* SingularParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL); template static inline const char* RepeatedParseMessageAuxImpl(PROTOBUF_TC_PARAM_DECL); + template + static inline const char* LazyMessage(PROTOBUF_TC_PARAM_DECL); template static const char* FastEndGroupImpl(PROTOBUF_TC_PARAM_DECL); @@ -859,6 +869,7 @@ class PROTOBUF_EXPORT TcParser final { template static const char* MpMessage(PROTOBUF_TC_PARAM_DECL); static const char* MpRepeatedMessage(PROTOBUF_TC_PARAM_DECL); + static const char* MpLazyMessage(PROTOBUF_TC_PARAM_DECL); static const char* MpFallback(PROTOBUF_TC_PARAM_DECL); static const char* MpMap(PROTOBUF_TC_PARAM_DECL); }; diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 95869ff11b..39b80ce411 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -477,6 +477,20 @@ PROTOBUF_NOINLINE const char* TcParser::FastGtS2(PROTOBUF_TC_PARAM_DECL) { PROTOBUF_TC_PARAM_PASS); } +template +const char* TcParser::LazyMessage(PROTOBUF_TC_PARAM_DECL) { + ABSL_LOG(FATAL) << "Unimplemented"; + return nullptr; +} + +PROTOBUF_NOINLINE const char* TcParser::FastMlS1(PROTOBUF_TC_PARAM_DECL) { + PROTOBUF_MUSTTAIL return LazyMessage(PROTOBUF_TC_PARAM_PASS); +} + +PROTOBUF_NOINLINE const char* TcParser::FastMlS2(PROTOBUF_TC_PARAM_DECL) { + PROTOBUF_MUSTTAIL return LazyMessage(PROTOBUF_TC_PARAM_PASS); +} + template inline PROTOBUF_ALWAYS_INLINE const char* TcParser::RepeatedParseMessageAuxImpl( PROTOBUF_TC_PARAM_DECL) { @@ -2361,6 +2375,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString( PROTOBUF_MUSTTAIL return ToParseLoop(PROTOBUF_TC_PARAM_NO_DATA_PASS); } + template PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) { const auto& entry = RefAt(table, data.entry_offset()); @@ -2391,8 +2406,6 @@ PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) { break; default: { fallback: - // Lazy and implicit weak fields are handled by generated code: - // TODO(b/210762816): support these. PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS); } } @@ -2460,8 +2473,6 @@ const char* TcParser::MpRepeatedMessage(PROTOBUF_TC_PARAM_DECL) { break; default: { fallback: - // Lazy and implicit weak fields are handled by generated code: - // TODO(b/210762816): support these. PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS); } }