From 01cccd392c2f4f8ca116e2d92e721d619c60ace1 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Thu, 6 Feb 2025 08:32:15 -0800 Subject: [PATCH] Add binary conformance tests for message set encoding 1. straight forward message set group encoding 2. group encoding but with the fields out of order 3. message set encoded as a typical extension submessage 4. A variant of the former, but I "abuse" a oneof to detect when implementations put the message set fields into the unknown field set. Go seems to be the only implementation that fails (3) as it will drop these fields entirely. A couple of implementations fail (4). PiperOrigin-RevId: 723946259 --- conformance/binary_json_conformance_suite.cc | 101 ++++++++++++++++++ conformance/binary_json_conformance_suite.h | 7 ++ conformance/failure_list_jruby_ffi.txt | 1 + conformance/failure_list_python_upb.txt | 2 + conformance/failure_list_ruby.txt | 1 + .../test_messages_proto2_editions.proto | 13 +++ .../protobuf/test_messages_proto2.proto | 12 +++ 7 files changed, 137 insertions(+) diff --git a/conformance/binary_json_conformance_suite.cc b/conformance/binary_json_conformance_suite.cc index 1229aabc43..41f52b5a0a 100644 --- a/conformance/binary_json_conformance_suite.cc +++ b/conformance/binary_json_conformance_suite.cc @@ -363,6 +363,9 @@ void BinaryAndJsonConformanceSuite::RunSuiteImpl() { this, /*run_proto3_tests=*/true); BinaryAndJsonConformanceSuiteImpl( this, /*run_proto3_tests=*/false); + if (!this->performance_) { + RunMessageSetTests(); + } if (maximum_edition_ >= Edition::EDITION_2023) { BinaryAndJsonConformanceSuiteImpl( this, /*run_proto3_tests=*/true); @@ -426,6 +429,92 @@ void BinaryAndJsonConformanceSuite::RunDelimitedFieldTests() { R"pb([protobuf_test_messages.editions.delimited_ext] { c: 99 })pb"); } +void BinaryAndJsonConformanceSuite::RunMessageSetTests() { + RunValidBinaryProtobufTest( + absl::StrCat("ValidMessageSetEncoding"), REQUIRED, + len(500, + group(1, absl::StrCat(field(2, WireFormatLite::WIRETYPE_VARINT, + varint(4135312)), + len(3, field(9, WireFormatLite::WIRETYPE_VARINT, + varint(99)))))), + // clang-format off + R"pb(message_set_correct: { + [protobuf_test_messages.proto2 + .TestAllTypesProto2.MessageSetCorrectExtension2]: { i: 99 } + })pb" + // clang-format on + ); + RunValidBinaryProtobufTest( + absl::StrCat("ValidMessageSetEncoding.OutOfOrderGroupsEntries"), REQUIRED, + len(500, + group(1, absl::StrCat(len(3, field(9, WireFormatLite::WIRETYPE_VARINT, + varint(99))), + field(2, WireFormatLite::WIRETYPE_VARINT, + varint(4135312))))), + // clang-format off + R"pb(message_set_correct: { + [protobuf_test_messages.proto2 + .TestAllTypesProto2.MessageSetCorrectExtension2]: { i: 99 } + })pb" + // clang-format on + ); + + // Test that an unknown message set extension always goes to unknown fields. + // This is done by poisoning the extension payload with an entry for field 0. + RunValidRoundtripProtobufTest( + "MessageSetEncoding.UnknownExtension", REQUIRED, + len(500, + group(1, absl::StrCat(field(2, WireFormatLite::WIRETYPE_VARINT, + varint(4135300)), + len(3, field(0, WireFormatLite::WIRETYPE_VARINT, + varint(99))))))); + + // If an encoder is unaware of the message_set_wire_format option it will be + // encoded like any other extension submessage. Decoders should be able to + // tolerate this format as well. + RunValidBinaryProtobufTest( + absl::StrCat("ValidMessageSetEncoding.SubmessageEncoding"), RECOMMENDED, + len(500, + len(4135312, field(9, WireFormatLite::WIRETYPE_VARINT, varint(99)))), + // clang-format off + R"pb(message_set_correct: { + [protobuf_test_messages.proto2 + .TestAllTypesProto2.MessageSetCorrectExtension2]: { i: 99 } + })pb" + // clang-format on + ); + + // Test again, but this time we'll try to detect if the implementation put the + // submessage encoded entry into the unknown field set. We'll do this by using + // conflicting oneof entries where order matters when the messages are merged. + // + // In a non-compliant implementation submessage encoded messageset entry will + // be moved to unknown fields and then tacked onto the end of the payload. + // Thus we'll see field b set first, and then field a. + // + // In a compliant implementation we expect the submessage encoded messageset + // to be read first with field a set, and then the normal message set entry + // will be read with field b will be set -- thus field b will win. + RunValidBinaryProtobufTest( + absl::StrCat("ValidMessageSetEncoding.SubmessageEncoding.NotUnknown"), + RECOMMENDED, + len(500, absl::StrCat( + len(123456789, + field(1, WireFormatLite::WIRETYPE_VARINT, varint(42))), + group(1, absl::StrCat( + field(2, WireFormatLite::WIRETYPE_VARINT, + varint(123456789)), + len(3, field(2, WireFormatLite::WIRETYPE_VARINT, + varint(99))))))), + // clang-format off + R"pb(message_set_correct: { + [protobuf_test_messages.proto2 + .TestAllTypesProto2.ExtensionWithOneof]: { b: 99 } + })pb" + // clang-format on + ); +} + template void BinaryAndJsonConformanceSuiteImpl:: ExpectParseFailureForProtoWithProtoVersion(const std::string& proto, @@ -545,6 +634,18 @@ void BinaryAndJsonConformanceSuite::RunValidBinaryProtobufTest( RunValidInputTest(binary_to_binary, equivalent_text_format); } +template +void BinaryAndJsonConformanceSuite::RunValidRoundtripProtobufTest( + const std::string& test_name, ConformanceLevel level, + const std::string& input_protobuf) { + MessageType prototype; + + ConformanceRequestSetting binary_to_binary( + level, conformance::PROTOBUF, conformance::PROTOBUF, + conformance::BINARY_TEST, prototype, test_name, input_protobuf); + RunValidBinaryInputTest(binary_to_binary, input_protobuf); +} + template void BinaryAndJsonConformanceSuite::RunValidProtobufTest( const std::string& test_name, ConformanceLevel level, diff --git a/conformance/binary_json_conformance_suite.h b/conformance/binary_json_conformance_suite.h index 9c990557d7..bd1357d017 100644 --- a/conformance/binary_json_conformance_suite.h +++ b/conformance/binary_json_conformance_suite.h @@ -44,6 +44,11 @@ class BinaryAndJsonConformanceSuite : public ConformanceTestSuite { const std::string& input_protobuf, const std::string& equivalent_text_format); + template + void RunValidRoundtripProtobufTest(const std::string& test_name, + ConformanceLevel level, + const std::string& input_protobuf); + template void RunValidProtobufTest(const std::string& test_name, ConformanceLevel level, @@ -52,6 +57,8 @@ class BinaryAndJsonConformanceSuite : public ConformanceTestSuite { void RunDelimitedFieldTests(); + void RunMessageSetTests(); + template friend class BinaryAndJsonConformanceSuiteImpl; diff --git a/conformance/failure_list_jruby_ffi.txt b/conformance/failure_list_jruby_ffi.txt index d22be3bf1d..15298bfc88 100644 --- a/conformance/failure_list_jruby_ffi.txt +++ b/conformance/failure_list_jruby_ffi.txt @@ -1,2 +1,3 @@ +Recommended.Proto2.ProtobufInput.ValidMessageSetEncoding.SubmessageEncoding.NotUnknown.ProtobufOutput # Output was not equivalent to reference message: added: message_set_correct.(protobuf_test_messages.proto2.TestAllTypesProto2.Ext Required.*.JsonInput.Int32FieldQuotedExponentialValue.* # Failed to parse input or produce output. Required.*.JsonInput.AnyWithNoType.* # Failed to parse input or produce output. diff --git a/conformance/failure_list_python_upb.txt b/conformance/failure_list_python_upb.txt index e69de29bb2..15e24e9355 100644 --- a/conformance/failure_list_python_upb.txt +++ b/conformance/failure_list_python_upb.txt @@ -0,0 +1,2 @@ +Recommended.Proto2.ProtobufInput.ValidMessageSetEncoding.SubmessageEncoding.NotUnknown.ProtobufOutput # Output was not equivalent to reference message: added: message_set_correct.(protobuf_test_messages.proto2.TestAllTypesProto2.Ext + diff --git a/conformance/failure_list_ruby.txt b/conformance/failure_list_ruby.txt index 882ab1dc74..8aacaa6871 100644 --- a/conformance/failure_list_ruby.txt +++ b/conformance/failure_list_ruby.txt @@ -1,2 +1,3 @@ +Recommended.Proto2.ProtobufInput.ValidMessageSetEncoding.SubmessageEncoding.NotUnknown.ProtobufOutput # Output was not equivalent to reference message: added: message_set_correct.(protobuf_test_messages.proto2.TestAllTypesProto2.Ext Required.*.JsonInput.Int32FieldQuotedExponentialValue.* # Failed to parse input or produce output. Required.*.JsonInput.AnyWithNoType.* \ No newline at end of file diff --git a/editions/golden/test_messages_proto2_editions.proto b/editions/golden/test_messages_proto2_editions.proto index ba1c79177d..4cfcd2327e 100644 --- a/editions/golden/test_messages_proto2_editions.proto +++ b/editions/golden/test_messages_proto2_editions.proto @@ -316,6 +316,8 @@ message TestAllTypesProto2 { // Reserved for unknown fields test. reserved 1000 to 9999; + MessageSetCorrect message_set_correct = 500; + // message_set test case. message MessageSetCorrect { option message_set_wire_format = true; @@ -338,6 +340,17 @@ message TestAllTypesProto2 { int32 i = 9; } + + message ExtensionWithOneof { + oneof oneof_field { + int32 a = 1; + int32 b = 2; + } + + extend MessageSetCorrect { + ExtensionWithOneof extension_with_oneof = 123456789; + } + } } message ForeignMessageProto2 { diff --git a/src/google/protobuf/test_messages_proto2.proto b/src/google/protobuf/test_messages_proto2.proto index 932717144b..a5d2614034 100644 --- a/src/google/protobuf/test_messages_proto2.proto +++ b/src/google/protobuf/test_messages_proto2.proto @@ -219,6 +219,8 @@ message TestAllTypesProto2 { // Reserved for unknown fields test. reserved 1000 to 9999; + optional MessageSetCorrect message_set_correct = 500; + // message_set test case. message MessageSetCorrect { option message_set_wire_format = true; @@ -239,6 +241,16 @@ message TestAllTypesProto2 { } optional int32 i = 9; } + + message ExtensionWithOneof { + oneof oneof_field { + int32 a = 1; + int32 b = 2; + } + extend MessageSetCorrect { + optional ExtensionWithOneof extension_with_oneof = 123456789; + } + } } message ForeignMessageProto2 {