diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index c14388716d..14d0491396 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -39,15 +39,6 @@ using internal::cpp::Utf8CheckMode; using google::protobuf::internal::WireFormat; using google::protobuf::internal::WireFormatLite; -bool HasWeakFields(const Descriptor* descriptor) { - for (int i = 0; i < descriptor->field_count(); i++) { - if (descriptor->field(i)->options().weak()) { - return true; - } - } - return false; -} - std::vector GetOrderedFields( const Descriptor* descriptor, const Options& options) { std::vector ordered_fields; @@ -280,20 +271,20 @@ static NumToEntryTable MakeNumToEntryTable( return num_to_entry_table; } +static std::string TcParseFunctionName(internal::TcParseFunction func) { +#define PROTOBUF_TC_PARSE_FUNCTION_X(value) #value, + static constexpr absl::string_view kNames[] = { + {}, PROTOBUF_TC_PARSE_FUNCTION_LIST}; +#undef PROTOBUF_TC_PARSE_FUNCTION_X + const int func_index = static_cast(func); + ABSL_CHECK_GE(func_index, 0); + ABSL_CHECK_LT(func_index, std::end(kNames) - std::begin(kNames)); + static constexpr absl::string_view ns = "::_pbi::TcParser::"; + return absl::StrCat(ns, kNames[func_index]); +} + void ParseFunctionGenerator::GenerateTailCallTable(io::Printer* printer) { Formatter format(printer, variables_); - // All entries without a fast-path parsing function need a fallback. - std::string fallback = "::_pbi::TcParser::GenericFallback"; - if (GetOptimizeFor(descriptor_->file(), options_) == - FileOptions::LITE_RUNTIME) { - absl::StrAppend(&fallback, "Lite"); - } else if (HasWeakFields(descriptor_)) { - // weak=true fields are handled with the reflection fallback, but we don't - // want to install that for normal messages because it has more overhead. - ABSL_CHECK(HasDescriptorMethods(descriptor_->file(), options_)); - fallback = "::_pbi::TcParser::ReflectionFallback"; - } - // For simplicity and speed, the table is not covering all proto // configurations. This model uses a fallback to cover all situations that // the table can't accommodate, together with unknown fields or extensions. @@ -366,7 +357,8 @@ void ParseFunctionGenerator::GenerateTailCallTable(io::Printer* printer) { nullptr, // post_loop_handler )cc"); } - format("$1$, // fallback\n", fallback); + format("$1$, // fallback\n", + TcParseFunctionName(tc_table_info_->fallback_function)); std::vector subtable_fields; for (const auto& aux : tc_table_info_->aux_entries) { if (aux.type == internal::TailCallTableInfo::kSubTable) { @@ -555,18 +547,6 @@ void ParseFunctionGenerator::GenerateTailCallTable(io::Printer* printer) { format("};\n\n"); // _table_ } -static std::string TcParseFunctionName(internal::TcParseFunction func) { -#define PROTOBUF_TC_PARSE_FUNCTION_X(value) #value, - static constexpr absl::string_view kNames[] = { - {}, PROTOBUF_TC_PARSE_FUNCTION_LIST}; -#undef PROTOBUF_TC_PARSE_FUNCTION_X - const int func_index = static_cast(func); - ABSL_CHECK_GE(func_index, 0); - ABSL_CHECK_LT(func_index, std::end(kNames) - std::begin(kNames)); - static constexpr absl::string_view ns = "::_pbi::TcParser::"; - return absl::StrCat(ns, kNames[func_index]); -} - void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) { for (const auto& info : tc_table_info_->fast_path_fields) { if (auto* nonfield = info.AsNonField()) { diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index 9356f592f2..436856f57c 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -3460,9 +3460,9 @@ const internal::TcParseTableBase* Reflection::CreateTcParseTable() const { aux_offset, schema_.default_instance_, nullptr, - &internal::TcParser::ReflectionFallback + GetFastParseFunction(table_info.fallback_function) #ifdef PROTOBUF_PREFETCH_PARSE_TABLE - , + , nullptr #endif // PROTOBUF_PREFETCH_PARSE_TABLE }; diff --git a/src/google/protobuf/generated_message_tctable_gen.cc b/src/google/protobuf/generated_message_tctable_gen.cc index d02a352046..55bca3016b 100644 --- a/src/google/protobuf/generated_message_tctable_gen.cc +++ b/src/google/protobuf/generated_message_tctable_gen.cc @@ -724,6 +724,15 @@ uint16_t MakeTypeCardForField( return type_card; } +bool HasWeakFields(const Descriptor* descriptor) { + for (int i = 0; i < descriptor->field_count(); i++) { + if (descriptor->field(i)->options().weak()) { + return true; + } + } + return false; +} + } // namespace TailCallTableInfo::TailCallTableInfo( @@ -733,6 +742,17 @@ TailCallTableInfo::TailCallTableInfo( const OptionProvider& option_provider, const std::vector& has_bit_indices, const std::vector& inlined_string_indices) { + fallback_function = + // Map entries discard unknown data + descriptor->options().map_entry() + ? TcParseFunction::kDiscardEverythingFallback + // Reflection and weak messages have the reflection fallback + : !message_options.uses_codegen || HasWeakFields(descriptor) + ? TcParseFunction::kReflectionFallback + // Codegen messages have lite and non-lite version + : message_options.is_lite ? TcParseFunction::kGenericFallbackLite + : TcParseFunction::kGenericFallback; + if (descriptor->options().message_set_wire_format()) { ABSL_DCHECK(ordered_fields.empty()); ABSL_DCHECK(inlined_string_indices.empty()); diff --git a/src/google/protobuf/generated_message_tctable_gen.h b/src/google/protobuf/generated_message_tctable_gen.h index b323247b8f..667758519b 100644 --- a/src/google/protobuf/generated_message_tctable_gen.h +++ b/src/google/protobuf/generated_message_tctable_gen.h @@ -66,6 +66,8 @@ struct PROTOBUF_EXPORT TailCallTableInfo { const std::vector& has_bit_indices, const std::vector& inlined_string_indices); + TcParseFunction fallback_function; + // Fields parsed by the table fast-path. struct FastFieldInfo { struct Empty {}; diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index 6143afaded..260556d7f9 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -329,6 +329,7 @@ inline void AlignFail(std::integral_constant, // FastV8S1, FastZ64S2, FastEr1P2, FastBcS1, FastMtR2, FastEndG1 // #define PROTOBUF_TC_PARSE_FUNCTION_LIST \ + /* These functions have the Fast entry ABI */ \ PROTOBUF_TC_PARSE_FUNCTION_LIST_PACKED(FastV8) \ PROTOBUF_TC_PARSE_FUNCTION_LIST_PACKED(FastV32) \ PROTOBUF_TC_PARSE_FUNCTION_LIST_PACKED(FastV64) \ @@ -357,7 +358,12 @@ inline void AlignFail(std::integral_constant, PROTOBUF_TC_PARSE_FUNCTION_LIST_END_GROUP() \ PROTOBUF_TC_PARSE_FUNCTION_X(MessageSetWireFormatParseLoopLite) \ PROTOBUF_TC_PARSE_FUNCTION_X(MessageSetWireFormatParseLoop) \ - PROTOBUF_TC_PARSE_FUNCTION_X(ReflectionParseLoop) + PROTOBUF_TC_PARSE_FUNCTION_X(ReflectionParseLoop) \ + /* These functions have the fallback ABI */ \ + PROTOBUF_TC_PARSE_FUNCTION_X(GenericFallback) \ + PROTOBUF_TC_PARSE_FUNCTION_X(GenericFallbackLite) \ + PROTOBUF_TC_PARSE_FUNCTION_X(ReflectionFallback) \ + PROTOBUF_TC_PARSE_FUNCTION_X(DiscardEverythingFallback) #define PROTOBUF_TC_PARSE_FUNCTION_X(value) k##value, enum class TcParseFunction : uint8_t { kNone, PROTOBUF_TC_PARSE_FUNCTION_LIST }; @@ -393,6 +399,11 @@ class PROTOBUF_EXPORT TcParser final { static const char* ReflectionFallback(PROTOBUF_TC_PARAM_DECL); static const char* ReflectionParseLoop(PROTOBUF_TC_PARAM_DECL); + // This fallback will discard any field that reaches there. + // Note that fields parsed via fast/MiniParse are not going to be discarded + // even when this is enabled. + static const char* DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL); + // These follow the "fast" function ABI but implement the whole loop for // message_set_wire_format types. static const char* MessageSetWireFormatParseLoop( diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 53aabd01b8..9d1ec992fe 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -2954,6 +2954,16 @@ std::string TypeCardToString(uint16_t type_card) { return out; } +const char* TcParser::DiscardEverythingFallback(PROTOBUF_TC_PARAM_DECL) { + SyncHasbits(msg, hasbits, table); + uint32_t tag = data.tag(); + if ((tag & 7) == WireFormatLite::WIRETYPE_END_GROUP || tag == 0) { + ctx->SetLastTag(tag); + return ptr; + } + return UnknownFieldParse(tag, nullptr, ptr, ctx); +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/map_test.inc b/src/google/protobuf/map_test.inc index 2165c66539..cb396b79e0 100644 --- a/src/google/protobuf/map_test.inc +++ b/src/google/protobuf/map_test.inc @@ -2731,6 +2731,30 @@ TEST(GeneratedMapFieldTest, Proto2UnknownEnum) { EXPECT_EQ(UNITTEST::PROTO2_MAP_ENUM_FOO, to.known_map_field().at(0)); } +TEST(GeneratedMapFieldTest, Proto2UknownEnumThrowsAwayUnknownData) { + UNITTEST::TestEnumMap to; + // 101: { + // 1: 2 + // 2: 3 # this is an unknown enum. It should be kept. + // 3: 120 # this is an extra field that will be discarded. + // } + constexpr absl::string_view data = "\252\006\006\010\002\020\003\030x"; + // Same as above, but without the extra field. + constexpr absl::string_view expected = "\252\006\004\010\002\020\003"; + ASSERT_TRUE(to.ParseFromString(data)); + EXPECT_EQ(expected, to.SerializeAsString()); + + // Test the same behavior with the reflection based parser. + to.Clear(); + const char* ptr; + internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + false, &ptr, data); + ptr = WireFormat::_InternalParse(&to, ptr, &ctx); + ASSERT_TRUE(ptr); + ASSERT_TRUE(ctx.EndedAtLimit()); + EXPECT_EQ(expected, to.SerializeAsString()); +} + TEST(GeneratedMapFieldTest, Proto2UnknownEnumAllKeyTypesWork) { #define PROTOBUF_INTERNAL_TEST_MAP_KEY_TYPE(Type, ...) \ ABSL_LOG(INFO) << "Testing " << #Type; \