From a0f27fcd96c5bf2509ca88cca54f00b78f7b8bc5 Mon Sep 17 00:00:00 2001 From: temporal Date: Wed, 6 Aug 2008 01:12:21 +0000 Subject: [PATCH] Heuristically detect sub-messages when printing unknown fields. Patch mostly written by Dilip Joseph . --- CONTRIBUTORS.txt | 8 ++++ src/google/protobuf/text_format.cc | 41 +++++++++++++++++-- src/google/protobuf/text_format.h | 14 ++++++- src/google/protobuf/text_format_unittest.cc | 44 +++++++++++++++++++++ src/google/protobuf/unknown_field_set.cc | 32 +++++++++++++++ src/google/protobuf/unknown_field_set.h | 11 ++++++ 6 files changed, 146 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 74403530b5..a22cda45d7 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -36,6 +36,14 @@ Maven packaging: Non-Google patch contributors: Kevin Ko + * Small patch to handle trailing slashes in --proto_path flag. Johan Euphrosine + * Small patch to fix Pyhton CallMethod(). Ulrich Kunitz + * Small optimizations to Python serialization. Leandro Lucarella + * VI syntax highlighting tweaks. + * Fix compiler to not make output executable. + Dilip Joseph + * Heuristic detection of sub-messages when printing unknown fields in + text format. diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 63a64db17f..d698681c23 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -728,6 +728,16 @@ bool TextFormat::Parser::MergeFromString(const string& input, return result; } +/* static */ bool TextFormat::PrintUnknownFieldsToString( + const UnknownFieldSet& unknown_fields, + string* output) { + GOOGLE_DCHECK(output) << "output specified is NULL"; + + output->clear(); + io::StringOutputStream output_stream(output); + return PrintUnknownFields(unknown_fields, &output_stream); +} + /* static */ bool TextFormat::Print(const Message& message, io::ZeroCopyOutputStream* output) { TextGenerator generator(output); @@ -738,6 +748,17 @@ bool TextFormat::Parser::MergeFromString(const string& input, return !generator.failed(); } +/* static */ bool TextFormat::PrintUnknownFields( + const UnknownFieldSet& unknown_fields, + io::ZeroCopyOutputStream* output) { + TextGenerator generator(output); + + PrintUnknownFields(unknown_fields, generator); + + // Output false if the generator failed internally. + return !generator.failed(); +} + /* static */ void TextFormat::Print(const Descriptor* descriptor, const Message::Reflection* message, TextGenerator& generator) { @@ -922,9 +943,23 @@ static string PaddedHex(IntType value) { } for (int j = 0; j < field.length_delimited_size(); j++) { generator.Print(field_number); - generator.Print(": \""); - generator.Print(CEscape(field.length_delimited(j))); - generator.Print("\"\n"); + const string& value = field.length_delimited(j); + UnknownFieldSet embedded_unknown_fields; + if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) { + // This field is parseable as a Message. + // So it is probably an embedded message. + generator.Print(" {\n"); + generator.Indent(); + PrintUnknownFields(embedded_unknown_fields, generator); + generator.Outdent(); + generator.Print("}\n"); + } else { + // This field is not parseable as a Message. + // So it is probably just a plain string. + generator.Print(": \""); + generator.Print(CEscape(value)); + generator.Print("\"\n"); + } } for (int j = 0; j < field.group_size(); j++) { generator.Print(field_number); diff --git a/src/google/protobuf/text_format.h b/src/google/protobuf/text_format.h index df27710d1f..00d50ad156 100644 --- a/src/google/protobuf/text_format.h +++ b/src/google/protobuf/text_format.h @@ -45,9 +45,20 @@ class LIBPROTOBUF_EXPORT TextFormat { // Outputs a textual representation of the given message to the given // output stream. static bool Print(const Message& message, io::ZeroCopyOutputStream* output); + + // Print the fields in an UnknownFieldSet. They are printed by tag number + // only. Embedded messages are heuristically identified by attempting to + // parse them. + static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, + io::ZeroCopyOutputStream* output); + // Like Print(), but outputs directly to a string. static bool PrintToString(const Message& message, string* output); + // Like PrintUnknownFields(), but outputs directly to a string. + static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, + string* output); + // Outputs a textual representation of the value of the field supplied on // the message supplied. For non-repeated fields, an index of -1 must // be supplied. Note that this method will print the default value for a @@ -130,7 +141,8 @@ class LIBPROTOBUF_EXPORT TextFormat { TextGenerator& generator); // Print the fields in an UnknownFieldSet. They are printed by tag number - // only. + // only. Embedded messages are heuristically identified by attempting to + // parse them. static void PrintUnknownFields(const UnknownFieldSet& unknown_fields, TextGenerator& generator); diff --git a/src/google/protobuf/text_format_unittest.cc b/src/google/protobuf/text_format_unittest.cc index 48c7076344..ac4efc941a 100644 --- a/src/google/protobuf/text_format_unittest.cc +++ b/src/google/protobuf/text_format_unittest.cc @@ -158,6 +158,50 @@ TEST_F(TextFormatTest, PrintUnknownFields) { message.DebugString()); } +TEST_F(TextFormatTest, PrintUnknownMessage) { + // Test heuristic printing of messages in an UnknownFieldSet. + + protobuf_unittest::TestAllTypes message; + + // Cases which should not be interpreted as sub-messages. + + // 'a' is a valid FIXED64 tag, so for the string to be parseable as a message + // it should be followed by 8 bytes. Since this string only has two + // subsequent bytes, it should be treated as a string. + message.add_repeated_string("abc"); + + // 'd' happens to be a valid ENDGROUP tag. So, + // UnknownFieldSet::MergeFromCodedStream() will successfully parse "def", but + // the ConsumedEntireMessage() check should fail. + message.add_repeated_string("def"); + + // A zero-length string should never be interpreted as a message even though + // it is technically valid as one. + message.add_repeated_string(""); + + // Case which should be interpreted as a sub-message. + + // An actual nested message with content should always be interpreted as a + // nested message. + message.add_repeated_nested_message()->set_bb(123); + + string data; + message.SerializeToString(&data); + + string text; + UnknownFieldSet unknown_fields; + EXPECT_TRUE(unknown_fields.ParseFromString(data)); + EXPECT_TRUE(TextFormat::PrintUnknownFieldsToString(unknown_fields, &text)); + EXPECT_EQ( + "44: \"abc\"\n" + "44: \"def\"\n" + "44: \"\"\n" + "48 {\n" + " 1: 123\n" + "}\n", + text); +} + TEST_F(TextFormatTest, ParseBasic) { io::ArrayInputStream input_stream(proto_debug_string_.data(), proto_debug_string_.size()); diff --git a/src/google/protobuf/unknown_field_set.cc b/src/google/protobuf/unknown_field_set.cc index 2f44901e35..3d45002ece 100644 --- a/src/google/protobuf/unknown_field_set.cc +++ b/src/google/protobuf/unknown_field_set.cc @@ -20,6 +20,10 @@ #include #include +#include +#include +#include +#include namespace google { namespace protobuf { @@ -57,6 +61,34 @@ void UnknownFieldSet::MergeFrom(const UnknownFieldSet& other) { } } +bool UnknownFieldSet::MergeFromCodedStream(io::CodedInputStream* input) { + + UnknownFieldSet other; + if (internal::WireFormat::SkipMessage(input, &other) && + input->ConsumedEntireMessage()) { + MergeFrom(other); + return true; + } else { + return false; + } +} + +bool UnknownFieldSet::ParseFromCodedStream(io::CodedInputStream* input) { + Clear(); + return MergeFromCodedStream(input); +} + +bool UnknownFieldSet::ParseFromZeroCopyStream(io::ZeroCopyInputStream* input) { + io::CodedInputStream coded_input(input); + return ParseFromCodedStream(&coded_input) && + coded_input.ConsumedEntireMessage(); +} + +bool UnknownFieldSet::ParseFromArray(const void* data, int size) { + io::ArrayInputStream input(data, size); + return ParseFromZeroCopyStream(&input); +} + const UnknownField* UnknownFieldSet::FindFieldByNumber(int number) const { if (internal_ == NULL) return NULL; diff --git a/src/google/protobuf/unknown_field_set.h b/src/google/protobuf/unknown_field_set.h index 421846215f..2e908c3369 100644 --- a/src/google/protobuf/unknown_field_set.h +++ b/src/google/protobuf/unknown_field_set.h @@ -77,6 +77,17 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet { // the existing UnknownField. UnknownField* AddField(int number); + // Parsing helpers ------------------------------------------------- + // These work exactly like the similarly-named methods of Message. + + bool MergeFromCodedStream(io::CodedInputStream* input); + bool ParseFromCodedStream(io::CodedInputStream* input); + bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input); + bool ParseFromArray(const void* data, int size); + inline bool ParseFromString(const string& data) { + return ParseFromArray(data.data(), data.size()); + } + private: // "Active" fields are ones which have been added since the last time Clear() // was called. Inactive fields are objects we are keeping around incase