Added fuzzer for descriptor parsing/serialization, and fixed several bugs.

The initial motivation for this change was to fix a bug found by fuzzing.  The old fuzz test (built on `cc_fuzz_target()`) detected an infinite loop if a bytes field default has an unterminated `\x` escape.

To fix this bug while expanding fuzz coverage, I created a fuzz test that verifies that we can do a lossless round trip from descriptor -> DefPool -> descriptor.  We use C++ as the source of truth for whether a descriptor is valid or not, and what the canonical serialization back to protobuf form should be.

I wrote the new fuzz test using go/FuzzTest, which makes it easier and more readable to use an arbitrary `FileDescriptorSet` as input, while adding test cases for regressions.

The fuzz test highlighted a handful of errors that I subsequently fixed and added regression tests for:

1. The aforementioned unterminated `\x` bug.
2. We were not propagating the `edition` field.
3. We were missing the CheckIdent() check in a few places.
4. We were rejecting files with empty name, whereas C++ allows this.
5. There were a few bugs with escaping string defaults.

Since FuzzTest is Clang-only, I split the `FUZZ_TEST()` invocation from the regression tests, since the latter are portable and should be run on all platforms.  Only `FUZZ_TEST()` itself is in a google3/Clang-only file.

PiperOrigin-RevId: 506997362
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 0cc83d1adf
commit 6e1cbdfe09
  1. 2
      upb/hash/common.c
  2. 24
      upb/reflection/def_builder.c
  3. 17
      upb/reflection/def_builder_internal.h
  4. 4
      upb/reflection/def_builder_test.cc
  5. 6
      upb/reflection/def_pool.c
  6. 1
      upb/reflection/enum_def.c
  7. 17
      upb/reflection/field_def.c
  8. 26
      upb/reflection/file_def.c
  9. 1
      upb/reflection/file_def.h
  10. 1
      upb/reflection/message_def.c
  11. 1
      upb/reflection/service_def.c
  12. 11
      upb/test/BUILD
  13. 37
      upb/test/parse_text_proto.h
  14. 29
      upb/util/BUILD
  15. 171
      upb/util/def_to_proto.c
  16. 26
      upb/util/def_to_proto_fuzz_test.cc
  17. 134
      upb/util/def_to_proto_test.cc
  18. 117
      upb/util/def_to_proto_test.h
  19. 5
      upb/util/def_to_proto_test.proto

@ -80,7 +80,7 @@ char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
n = len + 1;
p = upb_Arena_Malloc(a, n);
if (p) {
memcpy(p, s, len);
if (len != 0) memcpy(p, s, len);
p[len] = 0;
}
return p;

@ -73,9 +73,27 @@ void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) {
_upb_DefBuilder_FailJmp(ctx);
}
// Verify a relative identifier string. The loop is branchless for speed.
static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
upb_StringView name) {
bool good = name.size > 0;
for (size_t i = 0; i < name.size; i++) {
const char c = name.data[i];
const char d = c | 0x20; // force lowercase
const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
good &= is_alpha | is_numer;
}
if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
}
const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx,
const char* prefix,
upb_StringView name) {
_upb_DefBuilder_CheckIdentNotFull(ctx, name);
if (prefix) {
// ret = prefix + '.' + name;
size_t n = strlen(prefix);
@ -191,7 +209,7 @@ static bool TryGetChar(const char** src, const char* end, char* ch) {
return true;
}
static char TryGetHexDigit(const char** src, const char* end) {
static int TryGetHexDigit(const char** src, const char* end) {
char ch;
if (!TryGetChar(src, end, &ch)) return -1;
if ('0' <= ch && ch <= '9') {
@ -208,10 +226,10 @@ static char TryGetHexDigit(const char** src, const char* end) {
static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx,
const upb_FieldDef* f,
const char** src, const char* end) {
char hex_digit = TryGetHexDigit(src, end);
int hex_digit = TryGetHexDigit(src, end);
if (hex_digit < 0) {
_upb_DefBuilder_Errf(
ctx, "\\x cannot be followed by non-hex digit in field '%s' default",
ctx, "\\x must be followed by at least one hex digit (field='%s')",
upb_FieldDef_FullName(f));
return 0;
}

@ -127,23 +127,6 @@ UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) {
void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name,
bool full);
// Verify a relative identifier string. The loop is branchless for speed.
UPB_INLINE void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx,
upb_StringView name) {
bool good = name.size > 0;
for (size_t i = 0; i < name.size; i++) {
const char c = name.data[i];
const char d = c | 0x20; // force lowercase
const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_');
const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0);
good &= is_alpha | is_numer;
}
if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false);
}
// Verify a full identifier string. This is slightly more complicated than
// verifying a relative identifier string because we must track '.' chars.
UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx,

@ -55,7 +55,9 @@ TEST(DefBuilder, TestIdents) {
upb_StringView sv;
upb_Status status;
upb_DefBuilder ctx;
upb::Arena arena;
ctx.status = &status;
ctx.arena = arena.ptr();
upb_Status_Clear(&status);
for (const auto& test : FullIdentTests) {
@ -77,7 +79,7 @@ TEST(DefBuilder, TestIdents) {
if (UPB_SETJMP(ctx.err)) {
EXPECT_FALSE(test.ok);
} else {
_upb_DefBuilder_CheckIdentNotFull(&ctx, sv);
_upb_DefBuilder_MakeFullName(&ctx, "abc", sv);
EXPECT_TRUE(test.ok);
}
}

@ -318,12 +318,6 @@ static const upb_FileDef* _upb_DefPool_AddFile(
const upb_MiniTableFile* layout, upb_Status* status) {
const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto);
if (name.size == 0) {
upb_Status_SetErrorFormat(status,
"missing name in google_protobuf_FileDescriptorProto");
return NULL;
}
// Determine whether we already know about this file.
{
upb_value v;

@ -238,7 +238,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix,
e->file = _upb_DefBuilder_File(ctx);
name = UPB_DESC(EnumDescriptorProto_name)(enum_proto);
_upb_DefBuilder_CheckIdentNotFull(ctx, name);
e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
_upb_DefBuilder_Add(ctx, e->full_name,

@ -549,7 +549,14 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix,
}
const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto);
_upb_DefBuilder_CheckIdentNotFull(ctx, name);
f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto);
f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto);
f->is_proto3_optional =
UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto);
f->msgdef = m;
f->scope.oneof = NULL;
f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto);
if (f->has_json_name) {
@ -561,14 +568,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix,
}
if (!f->json_name) _upb_DefBuilder_OomErr(ctx);
f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto);
f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto);
f->is_proto3_optional =
UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto);
f->msgdef = m;
f->scope.oneof = NULL;
const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto);
const bool has_type_name =
UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto);

@ -40,6 +40,7 @@ struct upb_FileDef {
const UPB_DESC(FileOptions) * opts;
const char* name;
const char* package;
const char* edition;
const upb_FileDef** deps;
const int32_t* public_deps;
@ -76,6 +77,10 @@ const char* upb_FileDef_Package(const upb_FileDef* f) {
return f->package ? f->package : "";
}
const char* upb_FileDef_Edition(const upb_FileDef* f) {
return f->edition ? f->edition : "";
}
const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; }
upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
@ -223,13 +228,14 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx,
}
}
if (!UPB_DESC(FileDescriptorProto_has_name)(file_proto)) {
_upb_DefBuilder_Errf(ctx, "File has no name");
upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto);
file->name = strviewdup(ctx, name);
if (strlen(file->name) != name.size) {
_upb_DefBuilder_Errf(ctx, "File name contained embedded NULL");
}
file->name = strviewdup(ctx, UPB_DESC(FileDescriptorProto_name)(file_proto));
upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto);
if (package.size) {
_upb_DefBuilder_CheckIdentFull(ctx, package);
file->package = strviewdup(ctx, package);
@ -237,6 +243,18 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx,
file->package = NULL;
}
upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto);
if (edition.size == 0) {
file->edition = NULL;
} else {
// TODO(b/267770604): How should we validate this?
file->edition = strviewdup(ctx, edition);
if (strlen(file->edition) != edition.size) {
_upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL");
}
}
if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) {
upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto);

@ -45,6 +45,7 @@ bool upb_FileDef_HasOptions(const upb_FileDef* f);
const char* upb_FileDef_Name(const upb_FileDef* f);
const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f);
const char* upb_FileDef_Package(const upb_FileDef* f);
const char* upb_FileDef_Edition(const upb_FileDef* f);
const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f);
const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i);

@ -610,7 +610,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix,
m->is_sorted = true;
name = UPB_DESC(DescriptorProto_name)(msg_proto);
_upb_DefBuilder_CheckIdentNotFull(ctx, name);
m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name);
_upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG));

@ -100,7 +100,6 @@ static void create_service(upb_DefBuilder* ctx,
s->file = _upb_DefBuilder_File(ctx);
name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto);
_upb_DefBuilder_CheckIdentNotFull(ctx, name);
const char* package = _upb_FileDef_RawPackage(s->file);
s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name);
_upb_DefBuilder_Add(ctx, s->full_name,

@ -33,6 +33,17 @@ load(
"upb_proto_reflection_library",
)
cc_library(
name = "parse_text_proto",
testonly = 1,
hdrs = ["parse_text_proto.h"],
visibility = ["//:__subpackages__"],
deps = [
"@com_google_googletest//:gtest",
"@com_google_protobuf//:protobuf",
],
)
proto_library(
name = "empty_proto",
srcs = ["empty.proto"],

@ -0,0 +1,37 @@
#ifndef UPB_UPB_TEST_PARSE_TEXT_PROTO_H_
#define UPB_UPB_TEST_PARSE_TEXT_PROTO_H_
#include <string>
#include "gtest/gtest.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
namespace upb_test {
// Replacement for Google ParseTextProtoOrDie.
// Only to be used in unit tests.
// Usage: MyMessage msg = ParseTextProtoOrDie(my_text_proto);
class ParseTextProtoOrDie {
public:
explicit ParseTextProtoOrDie(absl::string_view text_proto)
: text_proto_(text_proto) {}
template <class T>
operator T() { // NOLINT: Needed to support parsing text proto as appropriate
// type.
T message;
if (!google::protobuf::TextFormat::ParseFromString(text_proto_, &message)) {
ADD_FAILURE() << "Failed to parse textproto: " << text_proto_;
abort();
}
return message;
}
private:
std::string text_proto_;
};
} // namespace upb_test
#endif // UPB_UPB_TEST_PARSE_TEXT_PROTO_H_

@ -43,22 +43,51 @@ upb_proto_reflection_library(
deps = ["def_to_proto_test_proto"],
)
cc_library(
name = "def_to_proto_test_lib",
testonly = 1,
hdrs = ["def_to_proto_test.h"],
deps = [
":def_to_proto",
"//:descriptor_upb_proto",
"//:reflection_internal",
"//:upb",
"@com_google_googletest//:gtest",
"@com_google_protobuf//:protobuf",
],
)
cc_test(
name = "def_to_proto_test",
srcs = ["def_to_proto_test.cc"],
deps = [
":def_to_proto",
":def_to_proto_test_lib",
":def_to_proto_test_upb_proto",
":def_to_proto_test_upb_proto_reflection",
"//:descriptor_upb_proto_reflection",
"//:reflection",
"//:upb",
"//upb/test:parse_text_proto",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
"@com_google_protobuf//:protobuf",
],
)
# begin:google_only
# cc_test(
# name = "def_to_proto_fuzz_test",
# srcs = ["def_to_proto_fuzz_test.cc"],
# tags = ["clang_only"],
# deps = [
# ":def_to_proto_test_lib",
# "//testing/fuzzing:fuzztest",
# "@com_google_googletest//:gtest_main",
# ],
# )
# end:google_only
# Required fields
cc_library(

@ -51,17 +51,16 @@ typedef struct {
// We want to copy the options verbatim into the destination options proto.
// We use serialize+parse as our deep copy.
#define SET_OPTIONS(proto, desc_type, options_type, src) \
{ \
size_t size; \
/* MEM: could use a temporary arena here instead. */ \
char* pb = \
google_protobuf_##options_type##_serialize(src, ctx->arena, &size); \
CHK_OOM(pb); \
google_protobuf_##options_type* dst = \
google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
CHK_OOM(dst); \
google_protobuf_##desc_type##_set_options(proto, dst); \
#define SET_OPTIONS(proto, desc_type, options_type, src) \
{ \
size_t size; \
/* MEM: could use a temporary arena here instead. */ \
char* pb = google_protobuf_##options_type##_serialize(src, ctx->arena, &size); \
CHK_OOM(pb); \
google_protobuf_##options_type* dst = \
google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
CHK_OOM(dst); \
google_protobuf_##desc_type##_set_options(proto, dst); \
}
static upb_StringView strviewdup2(upb_ToProto_Context* ctx,
@ -101,11 +100,46 @@ static upb_StringView printf_dup(upb_ToProto_Context* ctx, const char* fmt,
static bool upb_isprint(char ch) { return ch >= 0x20 && ch <= 0x7f; }
static int special_escape(char ch) {
switch (ch) {
case '\a':
return 'a';
case '\b':
return 'b';
case '\f':
return 'f';
case '\n':
return 'n';
case '\r':
return 'r';
case '\t':
return 't';
case '\v':
return 'v';
case '\\':
return '\\';
case '\'':
return '\'';
case '"':
return '"';
case '\?':
return '?';
default:
return -1;
}
}
static upb_StringView default_bytes(upb_ToProto_Context* ctx,
upb_StringView val) {
size_t n = 0;
for (size_t i = 0; i < val.size; i++) {
n += upb_isprint(val.data[i]) ? 1 : 4; // '\123'
char ch = val.data[i];
if (special_escape(ch) >= 0)
n += 2; // '\C'
else if (upb_isprint(ch))
n += 1;
else
n += 4; // '\123'
}
char* p = upb_Arena_Malloc(ctx->arena, n);
CHK_OOM(p);
@ -114,7 +148,10 @@ static upb_StringView default_bytes(upb_ToProto_Context* ctx,
const char* end = src + val.size;
while (src < end) {
unsigned char ch = *src++;
if (upb_isprint(ch)) {
if (special_escape(ch) >= 0) {
*dst++ = '\\';
*dst++ = (char)special_escape(ch);
} else if (upb_isprint(ch)) {
*dst++ = ch;
} else {
*dst++ = '\\';
@ -200,16 +237,15 @@ static google_protobuf_EnumDescriptorProto_EnumReservedRange* enumresrange_topro
return proto;
}
static google_protobuf_FieldDescriptorProto* fielddef_toproto(
upb_ToProto_Context* ctx, const upb_FieldDef* f) {
static google_protobuf_FieldDescriptorProto* fielddef_toproto(upb_ToProto_Context* ctx,
const upb_FieldDef* f) {
google_protobuf_FieldDescriptorProto* proto =
google_protobuf_FieldDescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_FieldDescriptorProto_set_name(
proto, strviewdup(ctx, upb_FieldDef_Name(f)));
google_protobuf_FieldDescriptorProto_set_number(proto,
upb_FieldDef_Number(f));
google_protobuf_FieldDescriptorProto_set_name(proto,
strviewdup(ctx, upb_FieldDef_Name(f)));
google_protobuf_FieldDescriptorProto_set_number(proto, upb_FieldDef_Number(f));
google_protobuf_FieldDescriptorProto_set_label(proto, upb_FieldDef_Label(f));
google_protobuf_FieldDescriptorProto_set_type(proto, upb_FieldDef_Type(f));
@ -234,14 +270,13 @@ static google_protobuf_FieldDescriptorProto* fielddef_toproto(
}
if (upb_FieldDef_HasDefault(f)) {
google_protobuf_FieldDescriptorProto_set_default_value(
proto, default_string(ctx, f));
google_protobuf_FieldDescriptorProto_set_default_value(proto,
default_string(ctx, f));
}
const upb_OneofDef* o = upb_FieldDef_ContainingOneof(f);
if (o) {
google_protobuf_FieldDescriptorProto_set_oneof_index(proto,
upb_OneofDef_Index(o));
google_protobuf_FieldDescriptorProto_set_oneof_index(proto, upb_OneofDef_Index(o));
}
if (_upb_FieldDef_IsProto3Optional(f)) {
@ -256,14 +291,14 @@ static google_protobuf_FieldDescriptorProto* fielddef_toproto(
return proto;
}
static google_protobuf_OneofDescriptorProto* oneofdef_toproto(
upb_ToProto_Context* ctx, const upb_OneofDef* o) {
static google_protobuf_OneofDescriptorProto* oneofdef_toproto(upb_ToProto_Context* ctx,
const upb_OneofDef* o) {
google_protobuf_OneofDescriptorProto* proto =
google_protobuf_OneofDescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_OneofDescriptorProto_set_name(
proto, strviewdup(ctx, upb_OneofDef_Name(o)));
google_protobuf_OneofDescriptorProto_set_name(proto,
strviewdup(ctx, upb_OneofDef_Name(o)));
if (upb_OneofDef_HasOptions(o)) {
SET_OPTIONS(proto, OneofDescriptorProto, OneofOptions,
@ -281,8 +316,7 @@ static google_protobuf_EnumValueDescriptorProto* enumvaldef_toproto(
google_protobuf_EnumValueDescriptorProto_set_name(
proto, strviewdup(ctx, upb_EnumValueDef_Name(e)));
google_protobuf_EnumValueDescriptorProto_set_number(
proto, upb_EnumValueDef_Number(e));
google_protobuf_EnumValueDescriptorProto_set_number(proto, upb_EnumValueDef_Number(e));
if (upb_EnumValueDef_HasOptions(e)) {
SET_OPTIONS(proto, EnumValueDescriptorProto, EnumValueOptions,
@ -292,14 +326,14 @@ static google_protobuf_EnumValueDescriptorProto* enumvaldef_toproto(
return proto;
}
static google_protobuf_EnumDescriptorProto* enumdef_toproto(
upb_ToProto_Context* ctx, const upb_EnumDef* e) {
static google_protobuf_EnumDescriptorProto* enumdef_toproto(upb_ToProto_Context* ctx,
const upb_EnumDef* e) {
google_protobuf_EnumDescriptorProto* proto =
google_protobuf_EnumDescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_EnumDescriptorProto_set_name(
proto, strviewdup(ctx, upb_EnumDef_Name(e)));
google_protobuf_EnumDescriptorProto_set_name(proto,
strviewdup(ctx, upb_EnumDef_Name(e)));
int n = upb_EnumDef_ValueCount(e);
google_protobuf_EnumValueDescriptorProto** vals =
@ -337,10 +371,10 @@ static google_protobuf_DescriptorProto_ExtensionRange* extrange_toproto(
google_protobuf_DescriptorProto_ExtensionRange_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_DescriptorProto_ExtensionRange_set_start(
proto, upb_ExtensionRange_Start(e));
google_protobuf_DescriptorProto_ExtensionRange_set_end(
proto, upb_ExtensionRange_End(e));
google_protobuf_DescriptorProto_ExtensionRange_set_start(proto,
upb_ExtensionRange_Start(e));
google_protobuf_DescriptorProto_ExtensionRange_set_end(proto,
upb_ExtensionRange_End(e));
if (upb_ExtensionRange_HasOptions(e)) {
SET_OPTIONS(proto, DescriptorProto_ExtensionRange, ExtensionRangeOptions,
@ -350,14 +384,13 @@ static google_protobuf_DescriptorProto_ExtensionRange* extrange_toproto(
return proto;
}
static google_protobuf_DescriptorProto* msgdef_toproto(
upb_ToProto_Context* ctx, const upb_MessageDef* m) {
google_protobuf_DescriptorProto* proto =
google_protobuf_DescriptorProto_new(ctx->arena);
static google_protobuf_DescriptorProto* msgdef_toproto(upb_ToProto_Context* ctx,
const upb_MessageDef* m) {
google_protobuf_DescriptorProto* proto = google_protobuf_DescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_DescriptorProto_set_name(
proto, strviewdup(ctx, upb_MessageDef_Name(m)));
google_protobuf_DescriptorProto_set_name(proto,
strviewdup(ctx, upb_MessageDef_Name(m)));
int n;
@ -427,14 +460,14 @@ static google_protobuf_DescriptorProto* msgdef_toproto(
return proto;
}
static google_protobuf_MethodDescriptorProto* methoddef_toproto(
upb_ToProto_Context* ctx, const upb_MethodDef* m) {
static google_protobuf_MethodDescriptorProto* methoddef_toproto(upb_ToProto_Context* ctx,
const upb_MethodDef* m) {
google_protobuf_MethodDescriptorProto* proto =
google_protobuf_MethodDescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_MethodDescriptorProto_set_name(
proto, strviewdup(ctx, upb_MethodDef_Name(m)));
google_protobuf_MethodDescriptorProto_set_name(proto,
strviewdup(ctx, upb_MethodDef_Name(m)));
google_protobuf_MethodDescriptorProto_set_input_type(
proto,
@ -470,8 +503,7 @@ static google_protobuf_ServiceDescriptorProto* servicedef_toproto(
size_t n = upb_ServiceDef_MethodCount(s);
google_protobuf_MethodDescriptorProto** methods =
google_protobuf_ServiceDescriptorProto_resize_method(proto, n,
ctx->arena);
google_protobuf_ServiceDescriptorProto_resize_method(proto, n, ctx->arena);
for (int i = 0; i < n; i++) {
methods[i] = methoddef_toproto(ctx, upb_ServiceDef_Method(s, i));
}
@ -484,63 +516,65 @@ static google_protobuf_ServiceDescriptorProto* servicedef_toproto(
return proto;
}
static google_protobuf_FileDescriptorProto* filedef_toproto(
upb_ToProto_Context* ctx, const upb_FileDef* f) {
static google_protobuf_FileDescriptorProto* filedef_toproto(upb_ToProto_Context* ctx,
const upb_FileDef* f) {
google_protobuf_FileDescriptorProto* proto =
google_protobuf_FileDescriptorProto_new(ctx->arena);
CHK_OOM(proto);
google_protobuf_FileDescriptorProto_set_name(
proto, strviewdup(ctx, upb_FileDef_Name(f)));
google_protobuf_FileDescriptorProto_set_name(proto,
strviewdup(ctx, upb_FileDef_Name(f)));
const char* package = upb_FileDef_Package(f);
if (package) {
size_t n = strlen(package);
if (n) {
google_protobuf_FileDescriptorProto_set_package(
proto, strviewdup(ctx, upb_FileDef_Package(f)));
google_protobuf_FileDescriptorProto_set_package(proto, strviewdup(ctx, package));
}
}
const char* edition = upb_FileDef_Edition(f);
if (edition != NULL) {
size_t n = strlen(edition);
if (n != 0) {
google_protobuf_FileDescriptorProto_set_edition(proto, strviewdup(ctx, edition));
}
}
if (upb_FileDef_Syntax(f) == kUpb_Syntax_Proto3) {
google_protobuf_FileDescriptorProto_set_syntax(proto,
strviewdup(ctx, "proto3"));
google_protobuf_FileDescriptorProto_set_syntax(proto, strviewdup(ctx, "proto3"));
}
size_t n;
n = upb_FileDef_DependencyCount(f);
upb_StringView* deps = google_protobuf_FileDescriptorProto_resize_dependency(
proto, n, ctx->arena);
upb_StringView* deps =
google_protobuf_FileDescriptorProto_resize_dependency(proto, n, ctx->arena);
for (int i = 0; i < n; i++) {
deps[i] = strviewdup(ctx, upb_FileDef_Name(upb_FileDef_Dependency(f, i)));
}
n = upb_FileDef_PublicDependencyCount(f);
int32_t* public_deps =
google_protobuf_FileDescriptorProto_resize_public_dependency(proto, n,
ctx->arena);
google_protobuf_FileDescriptorProto_resize_public_dependency(proto, n, ctx->arena);
const int32_t* public_dep_nums = _upb_FileDef_PublicDependencyIndexes(f);
if (n) memcpy(public_deps, public_dep_nums, n * sizeof(int32_t));
n = upb_FileDef_WeakDependencyCount(f);
int32_t* weak_deps =
google_protobuf_FileDescriptorProto_resize_weak_dependency(proto, n,
ctx->arena);
google_protobuf_FileDescriptorProto_resize_weak_dependency(proto, n, ctx->arena);
const int32_t* weak_dep_nums = _upb_FileDef_WeakDependencyIndexes(f);
if (n) memcpy(weak_deps, weak_dep_nums, n * sizeof(int32_t));
n = upb_FileDef_TopLevelMessageCount(f);
google_protobuf_DescriptorProto** msgs =
google_protobuf_FileDescriptorProto_resize_message_type(proto, n,
ctx->arena);
google_protobuf_FileDescriptorProto_resize_message_type(proto, n, ctx->arena);
for (int i = 0; i < n; i++) {
msgs[i] = msgdef_toproto(ctx, upb_FileDef_TopLevelMessage(f, i));
}
n = upb_FileDef_TopLevelEnumCount(f);
google_protobuf_EnumDescriptorProto** enums =
google_protobuf_FileDescriptorProto_resize_enum_type(proto, n,
ctx->arena);
google_protobuf_FileDescriptorProto_resize_enum_type(proto, n, ctx->arena);
for (int i = 0; i < n; i++) {
enums[i] = enumdef_toproto(ctx, upb_FileDef_TopLevelEnum(f, i));
}
@ -554,8 +588,7 @@ static google_protobuf_FileDescriptorProto* filedef_toproto(
n = upb_FileDef_TopLevelExtensionCount(f);
google_protobuf_FieldDescriptorProto** exts =
google_protobuf_FileDescriptorProto_resize_extension(proto, n,
ctx->arena);
google_protobuf_FileDescriptorProto_resize_extension(proto, n, ctx->arena);
for (int i = 0; i < n; i++) {
exts[i] = fielddef_toproto(ctx, upb_FileDef_TopLevelExtension(f, i));
}

@ -0,0 +1,26 @@
#include <string>
#include "google/protobuf/descriptor.proto.h"
#include "gtest/gtest.h"
#include "testing/fuzzing/fuzztest.h"
#include "upb/util/def_to_proto_test.h"
namespace upb_test {
FUZZ_TEST(FuzzTest, RoundTripDescriptor)
.WithDomains(
::fuzztest::Arbitrary<google::protobuf::FileDescriptorSet>().WithProtobufField(
"file",
::fuzztest::Arbitrary<google::protobuf::FileDescriptorProto>()
// upb_FileDef_ToProto() does not attempt to preserve
// source_code_info.
.WithFieldUnset("source_code_info")
.WithProtobufField(
"service",
::fuzztest::Arbitrary<google::protobuf::ServiceDescriptorProto>()
// streams are google3-only, and we do not currently
// attempt to preserve them.
.WithFieldUnset("stream"))));
} // namespace upb_test

@ -27,6 +27,9 @@
#include "upb/util/def_to_proto.h"
#include <memory>
#include <string>
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/descriptor.upbdefs.h"
#include "gmock/gmock.h"
@ -34,9 +37,13 @@
#include "google/protobuf/dynamic_message.h"
#include "google/protobuf/util/message_differencer.h"
#include "upb/reflection/def.hpp"
#include "upb/test/parse_text_proto.h"
#include "upb/upb.hpp"
#include "upb/util/def_to_proto_test.h"
#include "upb/util/def_to_proto_test.upbdefs.h"
namespace upb_test {
// Loads and retrieves a descriptor for `msgdef` into the given `pool`.
const google::protobuf::Descriptor* AddMessageDescriptor(
upb::MessageDefPtr msgdef, google::protobuf::DescriptorPool* pool) {
@ -144,3 +151,130 @@ TEST(DefToProto, TestRuntimeReflection) {
upb_util_def_to_proto_test_proto_upbdefinit.filename);
CheckFile(file, file_desc);
}
// Fuzz test regressions.
TEST(FuzzTest, EmptyPackage) {
RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { package: "" })pb"));
}
TEST(FuzzTest, EmptyName) {
RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "" })pb"));
}
TEST(FuzzTest, EmptyPackage2) {
RoundTripDescriptor(
ParseTextProtoOrDie(R"pb(file { name: "n" package: "" })pb"));
}
TEST(FuzzTest, FileNameEmbeddedNull) {
RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "\000" })pb"));
}
TEST(FuzzTest, EditionEmbeddedNull) {
RoundTripDescriptor(
ParseTextProtoOrDie(R"pb(file { name: "n" edition: "\000" })pb"));
}
TEST(FuzzTest, NanValue) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
enum_type {
value {
number: 0
options { uninterpreted_option { double_value: nan } }
}
}
})pb"));
}
TEST(FuzzTest, EnumValueEmbeddedNull) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
name: "\035"
enum_type {
name: "f"
value { name: "\000" number: 0 }
}
})pb"));
}
TEST(FuzzTest, EnumValueNoNumber) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
name: "\035"
enum_type {
name: "f"
value { name: "abc" }
}
})pb"));
}
TEST(FuzzTest, DefaultWithUnterminatedHex) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
name: "\035"
message_type {
name: "A"
field {
name: "f"
number: 1
label: LABEL_OPTIONAL
type: TYPE_BYTES
default_value: "\\x"
}
}
})pb"));
}
TEST(FuzzTest, DefaultWithValidHexEscape) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
name: "\035"
message_type {
name: "A"
field {
name: "f"
number: 1
label: LABEL_OPTIONAL
type: TYPE_BYTES
default_value: "\\x03"
}
}
})pb"));
}
TEST(FuzzTest, DefaultWithValidHexEscapePrintable) {
RoundTripDescriptor(ParseTextProtoOrDie(
R"pb(file {
name: "\035"
message_type {
name: "A"
field {
name: "f"
number: 1
label: LABEL_OPTIONAL
type: TYPE_BYTES
default_value: "\\x23" # 0x32 = '#'
}
}
})pb"));
}
// begin:google_only
// TEST(FuzzTest, DependencyWithEmbeddedNull) {
// RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file {
// name: "a"
// dependency: "a\000"
// options { cc_api_version: 0 }
// weak_dependency: 0
// })pb"));
// }
// end:google_only
TEST(FuzzTest, PackageStartsWithNumber) {
RoundTripDescriptor(
ParseTextProtoOrDie(R"pb(file { name: "" package: "0" })pb"));
}
} // namespace upb_test

@ -0,0 +1,117 @@
#ifndef UPB_UTIL_DEF_TO_PROTO_TEST_H_
#define UPB_UTIL_DEF_TO_PROTO_TEST_H_
#include <string>
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/descriptor.upb.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/dynamic_message.h"
#include "google/protobuf/util/field_comparator.h"
#include "upb/reflection/def.hpp"
#include "upb/upb.hpp"
#include "upb/util/def_to_proto.h"
namespace upb_test {
// A gtest matcher that verifies that a proto is equal to `proto`. Both `proto`
// and `arg` must be messages of type `msgdef_func` (a .upbdefs.h function that
// loads a known msgdef into the given defpool).
MATCHER_P(EqualsProtoTreatNansAsEqual, proto,
negation ? "are not equal" : "are equal") {
upb::DefPool defpool;
google::protobuf::DescriptorPool pool;
google::protobuf::DynamicMessageFactory factory;
std::string differences;
google::protobuf::util::DefaultFieldComparator comparator;
comparator.set_treat_nan_as_equal(true);
google::protobuf::util::MessageDifferencer differencer;
differencer.set_field_comparator(&comparator);
differencer.ReportDifferencesToString(&differences);
bool eq = differencer.Equals(proto, arg);
if (!eq) {
*result_listener << differences;
}
return eq;
}
class NullErrorCollector : public google::protobuf::DescriptorPool::ErrorCollector {
void AddError(const std::string& filename, const std::string& element_name,
const google::protobuf::Message* descriptor, ErrorLocation location,
const std::string& message) override {}
void RecordWarning(absl::string_view filename, absl::string_view element_name,
const google::protobuf::Message* descriptor, ErrorLocation location,
absl::string_view message) override {}
};
static void AddFile(google::protobuf::FileDescriptorProto& file, upb::DefPool* pool,
google::protobuf::DescriptorPool* desc_pool) {
NullErrorCollector collector;
const google::protobuf::FileDescriptor* file_desc =
desc_pool->BuildFileCollectingErrors(file, &collector);
if (file_desc != nullptr) {
// The file descriptor was valid according to proto2.
google::protobuf::FileDescriptorProto normalized_file;
file_desc->CopyTo(&normalized_file);
std::string serialized;
normalized_file.SerializeToString(&serialized);
upb::Arena arena;
upb::Status status;
google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse(
serialized.data(), serialized.size(), arena.ptr());
ASSERT_NE(proto, nullptr);
upb::FileDefPtr file_def = pool->AddFile(proto, &status);
// Ideally we could assert that file_def is present here. After all, any
// descriptor accepted by C++ should be by definition valid. However C++
// performs some of its validation at the .proto file parser level instead
// of when validating descriptors. As as result, C++ will accept some
// unreasonable descriptors like:
// file { name: "" package: "0" }
//
// There is no .proto file that will produce this descriptor, but
// BuildFile() accepts it. We should probably clean up these cases so C++
// will reject them too.
if (!file_def) return;
ASSERT_TRUE(status.ok()) << status.error_message();
google_protobuf_FileDescriptorProto* upb_proto =
upb_FileDef_ToProto(file_def.ptr(), arena.ptr());
size_t size;
const char* buf =
google_protobuf_FileDescriptorProto_serialize(upb_proto, arena.ptr(), &size);
google::protobuf::FileDescriptorProto google_proto;
bool ok = google_proto.ParseFromArray(buf, size);
ASSERT_TRUE(ok);
EXPECT_THAT(google_proto, EqualsProtoTreatNansAsEqual(normalized_file));
} else {
// This file was invalid according to proto2. When we parse it with upb,
// it may or may not be accepted, since upb does not perform as much
// validation as proto2. However it must not crash.
std::string serialized;
file.SerializeToString(&serialized);
upb::Arena arena;
upb::Status status;
google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse(
serialized.data(), serialized.size(), arena.ptr());
ASSERT_NE(proto, nullptr);
pool->AddFile(proto, &status);
}
}
inline void RoundTripDescriptor(const google::protobuf::FileDescriptorSet& set) {
upb::DefPool defpool;
google::protobuf::DescriptorPool desc_pool;
desc_pool.EnforceWeakDependencies(true);
for (const auto& file : set.file()) {
google::protobuf::FileDescriptorProto mutable_file(file);
AddFile(mutable_file, &defpool, &desc_pool);
}
}
} // namespace upb_test
#endif // UPB_UTIL_DEF_TO_PROTO_TEST_H_

@ -117,3 +117,8 @@ message MessageSetItem {
optional MessageSetItem message_set_extension = 2147483646;
}
}
message UnusualDefaults {
optional bytes foo = 1 [default = "\\X"];
optional string bar = 2 [default = "\\X"];
}

Loading…
Cancel
Save