Heuristically detect sub-messages when printing unknown fields.

Patch mostly written by Dilip Joseph <dilip.antony.joseph@gmail.com>.
pull/3335/head
temporal 17 years ago
parent 8ccb79057e
commit a0f27fcd96
  1. 8
      CONTRIBUTORS.txt
  2. 37
      src/google/protobuf/text_format.cc
  3. 14
      src/google/protobuf/text_format.h
  4. 44
      src/google/protobuf/text_format_unittest.cc
  5. 32
      src/google/protobuf/unknown_field_set.cc
  6. 11
      src/google/protobuf/unknown_field_set.h

@ -36,6 +36,14 @@ Maven packaging:
Non-Google patch contributors:
Kevin Ko <kevin.s.ko@gmail.com>
* Small patch to handle trailing slashes in --proto_path flag.
Johan Euphrosine <proppy@aminche.com>
* Small patch to fix Pyhton CallMethod().
Ulrich Kunitz <kune@deine-taler.de>
* Small optimizations to Python serialization.
Leandro Lucarella <llucax@gmail.com>
* VI syntax highlighting tweaks.
* Fix compiler to not make output executable.
Dilip Joseph <dilip.antony.joseph@gmail.com>
* Heuristic detection of sub-messages when printing unknown fields in
text format.

@ -728,6 +728,16 @@ bool TextFormat::Parser::MergeFromString(const string& input,
return result;
}
/* static */ bool TextFormat::PrintUnknownFieldsToString(
const UnknownFieldSet& unknown_fields,
string* output) {
GOOGLE_DCHECK(output) << "output specified is NULL";
output->clear();
io::StringOutputStream output_stream(output);
return PrintUnknownFields(unknown_fields, &output_stream);
}
/* static */ bool TextFormat::Print(const Message& message,
io::ZeroCopyOutputStream* output) {
TextGenerator generator(output);
@ -738,6 +748,17 @@ bool TextFormat::Parser::MergeFromString(const string& input,
return !generator.failed();
}
/* static */ bool TextFormat::PrintUnknownFields(
const UnknownFieldSet& unknown_fields,
io::ZeroCopyOutputStream* output) {
TextGenerator generator(output);
PrintUnknownFields(unknown_fields, generator);
// Output false if the generator failed internally.
return !generator.failed();
}
/* static */ void TextFormat::Print(const Descriptor* descriptor,
const Message::Reflection* message,
TextGenerator& generator) {
@ -922,10 +943,24 @@ static string PaddedHex(IntType value) {
}
for (int j = 0; j < field.length_delimited_size(); j++) {
generator.Print(field_number);
const string& value = field.length_delimited(j);
UnknownFieldSet embedded_unknown_fields;
if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
// This field is parseable as a Message.
// So it is probably an embedded message.
generator.Print(" {\n");
generator.Indent();
PrintUnknownFields(embedded_unknown_fields, generator);
generator.Outdent();
generator.Print("}\n");
} else {
// This field is not parseable as a Message.
// So it is probably just a plain string.
generator.Print(": \"");
generator.Print(CEscape(field.length_delimited(j)));
generator.Print(CEscape(value));
generator.Print("\"\n");
}
}
for (int j = 0; j < field.group_size(); j++) {
generator.Print(field_number);
generator.Print(" {\n");

@ -45,9 +45,20 @@ class LIBPROTOBUF_EXPORT TextFormat {
// Outputs a textual representation of the given message to the given
// output stream.
static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
// Print the fields in an UnknownFieldSet. They are printed by tag number
// only. Embedded messages are heuristically identified by attempting to
// parse them.
static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
io::ZeroCopyOutputStream* output);
// Like Print(), but outputs directly to a string.
static bool PrintToString(const Message& message, string* output);
// Like PrintUnknownFields(), but outputs directly to a string.
static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
string* output);
// Outputs a textual representation of the value of the field supplied on
// the message supplied. For non-repeated fields, an index of -1 must
// be supplied. Note that this method will print the default value for a
@ -130,7 +141,8 @@ class LIBPROTOBUF_EXPORT TextFormat {
TextGenerator& generator);
// Print the fields in an UnknownFieldSet. They are printed by tag number
// only.
// only. Embedded messages are heuristically identified by attempting to
// parse them.
static void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
TextGenerator& generator);

@ -158,6 +158,50 @@ TEST_F(TextFormatTest, PrintUnknownFields) {
message.DebugString());
}
TEST_F(TextFormatTest, PrintUnknownMessage) {
// Test heuristic printing of messages in an UnknownFieldSet.
protobuf_unittest::TestAllTypes message;
// Cases which should not be interpreted as sub-messages.
// 'a' is a valid FIXED64 tag, so for the string to be parseable as a message
// it should be followed by 8 bytes. Since this string only has two
// subsequent bytes, it should be treated as a string.
message.add_repeated_string("abc");
// 'd' happens to be a valid ENDGROUP tag. So,
// UnknownFieldSet::MergeFromCodedStream() will successfully parse "def", but
// the ConsumedEntireMessage() check should fail.
message.add_repeated_string("def");
// A zero-length string should never be interpreted as a message even though
// it is technically valid as one.
message.add_repeated_string("");
// Case which should be interpreted as a sub-message.
// An actual nested message with content should always be interpreted as a
// nested message.
message.add_repeated_nested_message()->set_bb(123);
string data;
message.SerializeToString(&data);
string text;
UnknownFieldSet unknown_fields;
EXPECT_TRUE(unknown_fields.ParseFromString(data));
EXPECT_TRUE(TextFormat::PrintUnknownFieldsToString(unknown_fields, &text));
EXPECT_EQ(
"44: \"abc\"\n"
"44: \"def\"\n"
"44: \"\"\n"
"48 {\n"
" 1: 123\n"
"}\n",
text);
}
TEST_F(TextFormatTest, ParseBasic) {
io::ArrayInputStream input_stream(proto_debug_string_.data(),
proto_debug_string_.size());

@ -20,6 +20,10 @@
#include <google/protobuf/unknown_field_set.h>
#include <google/protobuf/stubs/stl_util-inl.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/wire_format.h>
namespace google {
namespace protobuf {
@ -57,6 +61,34 @@ void UnknownFieldSet::MergeFrom(const UnknownFieldSet& other) {
}
}
bool UnknownFieldSet::MergeFromCodedStream(io::CodedInputStream* input) {
UnknownFieldSet other;
if (internal::WireFormat::SkipMessage(input, &other) &&
input->ConsumedEntireMessage()) {
MergeFrom(other);
return true;
} else {
return false;
}
}
bool UnknownFieldSet::ParseFromCodedStream(io::CodedInputStream* input) {
Clear();
return MergeFromCodedStream(input);
}
bool UnknownFieldSet::ParseFromZeroCopyStream(io::ZeroCopyInputStream* input) {
io::CodedInputStream coded_input(input);
return ParseFromCodedStream(&coded_input) &&
coded_input.ConsumedEntireMessage();
}
bool UnknownFieldSet::ParseFromArray(const void* data, int size) {
io::ArrayInputStream input(data, size);
return ParseFromZeroCopyStream(&input);
}
const UnknownField* UnknownFieldSet::FindFieldByNumber(int number) const {
if (internal_ == NULL) return NULL;

@ -77,6 +77,17 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet {
// the existing UnknownField.
UnknownField* AddField(int number);
// Parsing helpers -------------------------------------------------
// These work exactly like the similarly-named methods of Message.
bool MergeFromCodedStream(io::CodedInputStream* input);
bool ParseFromCodedStream(io::CodedInputStream* input);
bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input);
bool ParseFromArray(const void* data, int size);
inline bool ParseFromString(const string& data) {
return ParseFromArray(data.data(), data.size());
}
private:
// "Active" fields are ones which have been added since the last time Clear()
// was called. Inactive fields are objects we are keeping around incase

Loading…
Cancel
Save