diff --git a/upb/hash/common.c b/upb/hash/common.c index efcbf1398e..1e32f36f8a 100644 --- a/upb/hash/common.c +++ b/upb/hash/common.c @@ -80,7 +80,7 @@ char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { n = len + 1; p = upb_Arena_Malloc(a, n); if (p) { - memcpy(p, s, len); + if (len != 0) memcpy(p, s, len); p[len] = 0; } return p; diff --git a/upb/reflection/def_builder.c b/upb/reflection/def_builder.c index c79f45e37d..a23a8721e0 100644 --- a/upb/reflection/def_builder.c +++ b/upb/reflection/def_builder.c @@ -73,9 +73,27 @@ void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { _upb_DefBuilder_FailJmp(ctx); } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, const char* prefix, upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); if (prefix) { // ret = prefix + '.' + name; size_t n = strlen(prefix); @@ -191,7 +209,7 @@ static bool TryGetChar(const char** src, const char* end, char* ch) { return true; } -static char TryGetHexDigit(const char** src, const char* end) { +static int TryGetHexDigit(const char** src, const char* end) { char ch; if (!TryGetChar(src, end, &ch)) return -1; if ('0' <= ch && ch <= '9') { @@ -208,10 +226,10 @@ static char TryGetHexDigit(const char** src, const char* end) { static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, const char** src, const char* end) { - char hex_digit = TryGetHexDigit(src, end); + int hex_digit = TryGetHexDigit(src, end); if (hex_digit < 0) { _upb_DefBuilder_Errf( - ctx, "\\x cannot be followed by non-hex digit in field '%s' default", + ctx, "\\x must be followed by at least one hex digit (field='%s')", upb_FieldDef_FullName(f)); return 0; } diff --git a/upb/reflection/def_builder_internal.h b/upb/reflection/def_builder_internal.h index d8e2f950eb..a4725c38cd 100644 --- a/upb/reflection/def_builder_internal.h +++ b/upb/reflection/def_builder_internal.h @@ -127,23 +127,6 @@ UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, bool full); -// Verify a relative identifier string. The loop is branchless for speed. -UPB_INLINE void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; - } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); -} - // Verify a full identifier string. This is slightly more complicated than // verifying a relative identifier string because we must track '.' chars. UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, diff --git a/upb/reflection/def_builder_test.cc b/upb/reflection/def_builder_test.cc index e8cfcc468c..b57ae285e0 100644 --- a/upb/reflection/def_builder_test.cc +++ b/upb/reflection/def_builder_test.cc @@ -55,7 +55,9 @@ TEST(DefBuilder, TestIdents) { upb_StringView sv; upb_Status status; upb_DefBuilder ctx; + upb::Arena arena; ctx.status = &status; + ctx.arena = arena.ptr(); upb_Status_Clear(&status); for (const auto& test : FullIdentTests) { @@ -77,7 +79,7 @@ TEST(DefBuilder, TestIdents) { if (UPB_SETJMP(ctx.err)) { EXPECT_FALSE(test.ok); } else { - _upb_DefBuilder_CheckIdentNotFull(&ctx, sv); + _upb_DefBuilder_MakeFullName(&ctx, "abc", sv); EXPECT_TRUE(test.ok); } } diff --git a/upb/reflection/def_pool.c b/upb/reflection/def_pool.c index c4800609aa..a3250cd8fe 100644 --- a/upb/reflection/def_pool.c +++ b/upb/reflection/def_pool.c @@ -318,12 +318,6 @@ static const upb_FileDef* _upb_DefPool_AddFile( const upb_MiniTableFile* layout, upb_Status* status) { const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (name.size == 0) { - upb_Status_SetErrorFormat(status, - "missing name in google_protobuf_FileDescriptorProto"); - return NULL; - } - // Determine whether we already know about this file. { upb_value v; diff --git a/upb/reflection/enum_def.c b/upb/reflection/enum_def.c index a6c100b22e..49f047c5be 100644 --- a/upb/reflection/enum_def.c +++ b/upb/reflection/enum_def.c @@ -238,7 +238,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, e->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, e->full_name, diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index f9560d500f..ec7878fe3f 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -549,7 +549,14 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); + + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); if (f->has_json_name) { @@ -561,14 +568,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); const bool has_type_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); diff --git a/upb/reflection/file_def.c b/upb/reflection/file_def.c index eaf18e97a8..37efd5f9e9 100644 --- a/upb/reflection/file_def.c +++ b/upb/reflection/file_def.c @@ -40,6 +40,7 @@ struct upb_FileDef { const UPB_DESC(FileOptions) * opts; const char* name; const char* package; + const char* edition; const upb_FileDef** deps; const int32_t* public_deps; @@ -76,6 +77,10 @@ const char* upb_FileDef_Package(const upb_FileDef* f) { return f->package ? f->package : ""; } +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; +} + const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } @@ -223,13 +228,14 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, } } - if (!UPB_DESC(FileDescriptorProto_has_name)(file_proto)) { - _upb_DefBuilder_Errf(ctx, "File has no name"); + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); } - file->name = strviewdup(ctx, UPB_DESC(FileDescriptorProto_name)(file_proto)); - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); + if (package.size) { _upb_DefBuilder_CheckIdentFull(ctx, package); file->package = strviewdup(ctx, package); @@ -237,6 +243,18 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->package = NULL; } + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); diff --git a/upb/reflection/file_def.h b/upb/reflection/file_def.h index 3d2abc69cf..a8557cb902 100644 --- a/upb/reflection/file_def.h +++ b/upb/reflection/file_def.h @@ -45,6 +45,7 @@ bool upb_FileDef_HasOptions(const upb_FileDef* f); const char* upb_FileDef_Name(const upb_FileDef* f); const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f); const char* upb_FileDef_Package(const upb_FileDef* f); +const char* upb_FileDef_Edition(const upb_FileDef* f); const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f); const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i); diff --git a/upb/reflection/message_def.c b/upb/reflection/message_def.c index 63202451bc..39751b82d4 100644 --- a/upb/reflection/message_def.c +++ b/upb/reflection/message_def.c @@ -610,7 +610,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, m->is_sorted = true; name = UPB_DESC(DescriptorProto_name)(msg_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); diff --git a/upb/reflection/service_def.c b/upb/reflection/service_def.c index cdd3a889ef..453100f1b8 100644 --- a/upb/reflection/service_def.c +++ b/upb/reflection/service_def.c @@ -100,7 +100,6 @@ static void create_service(upb_DefBuilder* ctx, s->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); const char* package = _upb_FileDef_RawPackage(s->file); s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); _upb_DefBuilder_Add(ctx, s->full_name, diff --git a/upb/test/BUILD b/upb/test/BUILD index 62b23c3dbb..7e41faecff 100644 --- a/upb/test/BUILD +++ b/upb/test/BUILD @@ -33,6 +33,17 @@ load( "upb_proto_reflection_library", ) +cc_library( + name = "parse_text_proto", + testonly = 1, + hdrs = ["parse_text_proto.h"], + visibility = ["//:__subpackages__"], + deps = [ + "@com_google_googletest//:gtest", + "@com_google_protobuf//:protobuf", + ], +) + proto_library( name = "empty_proto", srcs = ["empty.proto"], diff --git a/upb/test/parse_text_proto.h b/upb/test/parse_text_proto.h new file mode 100644 index 0000000000..96137c13eb --- /dev/null +++ b/upb/test/parse_text_proto.h @@ -0,0 +1,37 @@ +#ifndef UPB_UPB_TEST_PARSE_TEXT_PROTO_H_ +#define UPB_UPB_TEST_PARSE_TEXT_PROTO_H_ + +#include + +#include "gtest/gtest.h" +#include "google/protobuf/message.h" +#include "google/protobuf/text_format.h" + +namespace upb_test { + +// Replacement for Google ParseTextProtoOrDie. +// Only to be used in unit tests. +// Usage: MyMessage msg = ParseTextProtoOrDie(my_text_proto); +class ParseTextProtoOrDie { + public: + explicit ParseTextProtoOrDie(absl::string_view text_proto) + : text_proto_(text_proto) {} + + template + operator T() { // NOLINT: Needed to support parsing text proto as appropriate + // type. + T message; + if (!google::protobuf::TextFormat::ParseFromString(text_proto_, &message)) { + ADD_FAILURE() << "Failed to parse textproto: " << text_proto_; + abort(); + } + return message; + } + + private: + std::string text_proto_; +}; + +} // namespace upb_test + +#endif // UPB_UPB_TEST_PARSE_TEXT_PROTO_H_ diff --git a/upb/util/BUILD b/upb/util/BUILD index 2643959038..13945e8795 100644 --- a/upb/util/BUILD +++ b/upb/util/BUILD @@ -43,22 +43,51 @@ upb_proto_reflection_library( deps = ["def_to_proto_test_proto"], ) +cc_library( + name = "def_to_proto_test_lib", + testonly = 1, + hdrs = ["def_to_proto_test.h"], + deps = [ + ":def_to_proto", + "//:descriptor_upb_proto", + "//:reflection_internal", + "//:upb", + "@com_google_googletest//:gtest", + "@com_google_protobuf//:protobuf", + ], +) + cc_test( name = "def_to_proto_test", srcs = ["def_to_proto_test.cc"], deps = [ ":def_to_proto", + ":def_to_proto_test_lib", ":def_to_proto_test_upb_proto", ":def_to_proto_test_upb_proto_reflection", "//:descriptor_upb_proto_reflection", "//:reflection", "//:upb", + "//upb/test:parse_text_proto", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", "@com_google_protobuf//:protobuf", ], ) +# begin:google_only +# cc_test( +# name = "def_to_proto_fuzz_test", +# srcs = ["def_to_proto_fuzz_test.cc"], +# tags = ["clang_only"], +# deps = [ +# ":def_to_proto_test_lib", +# "//testing/fuzzing:fuzztest", +# "@com_google_googletest//:gtest_main", +# ], +# ) +# end:google_only + # Required fields cc_library( diff --git a/upb/util/def_to_proto.c b/upb/util/def_to_proto.c index 26d11ecd2b..8513206edf 100644 --- a/upb/util/def_to_proto.c +++ b/upb/util/def_to_proto.c @@ -51,17 +51,16 @@ typedef struct { // We want to copy the options verbatim into the destination options proto. // We use serialize+parse as our deep copy. -#define SET_OPTIONS(proto, desc_type, options_type, src) \ - { \ - size_t size; \ - /* MEM: could use a temporary arena here instead. */ \ - char* pb = \ - google_protobuf_##options_type##_serialize(src, ctx->arena, &size); \ - CHK_OOM(pb); \ - google_protobuf_##options_type* dst = \ - google_protobuf_##options_type##_parse(pb, size, ctx->arena); \ - CHK_OOM(dst); \ - google_protobuf_##desc_type##_set_options(proto, dst); \ +#define SET_OPTIONS(proto, desc_type, options_type, src) \ + { \ + size_t size; \ + /* MEM: could use a temporary arena here instead. */ \ + char* pb = google_protobuf_##options_type##_serialize(src, ctx->arena, &size); \ + CHK_OOM(pb); \ + google_protobuf_##options_type* dst = \ + google_protobuf_##options_type##_parse(pb, size, ctx->arena); \ + CHK_OOM(dst); \ + google_protobuf_##desc_type##_set_options(proto, dst); \ } static upb_StringView strviewdup2(upb_ToProto_Context* ctx, @@ -101,11 +100,46 @@ static upb_StringView printf_dup(upb_ToProto_Context* ctx, const char* fmt, static bool upb_isprint(char ch) { return ch >= 0x20 && ch <= 0x7f; } +static int special_escape(char ch) { + switch (ch) { + case '\a': + return 'a'; + case '\b': + return 'b'; + case '\f': + return 'f'; + case '\n': + return 'n'; + case '\r': + return 'r'; + case '\t': + return 't'; + case '\v': + return 'v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '"': + return '"'; + case '\?': + return '?'; + default: + return -1; + } +} + static upb_StringView default_bytes(upb_ToProto_Context* ctx, upb_StringView val) { size_t n = 0; for (size_t i = 0; i < val.size; i++) { - n += upb_isprint(val.data[i]) ? 1 : 4; // '\123' + char ch = val.data[i]; + if (special_escape(ch) >= 0) + n += 2; // '\C' + else if (upb_isprint(ch)) + n += 1; + else + n += 4; // '\123' } char* p = upb_Arena_Malloc(ctx->arena, n); CHK_OOM(p); @@ -114,7 +148,10 @@ static upb_StringView default_bytes(upb_ToProto_Context* ctx, const char* end = src + val.size; while (src < end) { unsigned char ch = *src++; - if (upb_isprint(ch)) { + if (special_escape(ch) >= 0) { + *dst++ = '\\'; + *dst++ = (char)special_escape(ch); + } else if (upb_isprint(ch)) { *dst++ = ch; } else { *dst++ = '\\'; @@ -200,16 +237,15 @@ static google_protobuf_EnumDescriptorProto_EnumReservedRange* enumresrange_topro return proto; } -static google_protobuf_FieldDescriptorProto* fielddef_toproto( - upb_ToProto_Context* ctx, const upb_FieldDef* f) { +static google_protobuf_FieldDescriptorProto* fielddef_toproto(upb_ToProto_Context* ctx, + const upb_FieldDef* f) { google_protobuf_FieldDescriptorProto* proto = google_protobuf_FieldDescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_FieldDescriptorProto_set_name( - proto, strviewdup(ctx, upb_FieldDef_Name(f))); - google_protobuf_FieldDescriptorProto_set_number(proto, - upb_FieldDef_Number(f)); + google_protobuf_FieldDescriptorProto_set_name(proto, + strviewdup(ctx, upb_FieldDef_Name(f))); + google_protobuf_FieldDescriptorProto_set_number(proto, upb_FieldDef_Number(f)); google_protobuf_FieldDescriptorProto_set_label(proto, upb_FieldDef_Label(f)); google_protobuf_FieldDescriptorProto_set_type(proto, upb_FieldDef_Type(f)); @@ -234,14 +270,13 @@ static google_protobuf_FieldDescriptorProto* fielddef_toproto( } if (upb_FieldDef_HasDefault(f)) { - google_protobuf_FieldDescriptorProto_set_default_value( - proto, default_string(ctx, f)); + google_protobuf_FieldDescriptorProto_set_default_value(proto, + default_string(ctx, f)); } const upb_OneofDef* o = upb_FieldDef_ContainingOneof(f); if (o) { - google_protobuf_FieldDescriptorProto_set_oneof_index(proto, - upb_OneofDef_Index(o)); + google_protobuf_FieldDescriptorProto_set_oneof_index(proto, upb_OneofDef_Index(o)); } if (_upb_FieldDef_IsProto3Optional(f)) { @@ -256,14 +291,14 @@ static google_protobuf_FieldDescriptorProto* fielddef_toproto( return proto; } -static google_protobuf_OneofDescriptorProto* oneofdef_toproto( - upb_ToProto_Context* ctx, const upb_OneofDef* o) { +static google_protobuf_OneofDescriptorProto* oneofdef_toproto(upb_ToProto_Context* ctx, + const upb_OneofDef* o) { google_protobuf_OneofDescriptorProto* proto = google_protobuf_OneofDescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_OneofDescriptorProto_set_name( - proto, strviewdup(ctx, upb_OneofDef_Name(o))); + google_protobuf_OneofDescriptorProto_set_name(proto, + strviewdup(ctx, upb_OneofDef_Name(o))); if (upb_OneofDef_HasOptions(o)) { SET_OPTIONS(proto, OneofDescriptorProto, OneofOptions, @@ -281,8 +316,7 @@ static google_protobuf_EnumValueDescriptorProto* enumvaldef_toproto( google_protobuf_EnumValueDescriptorProto_set_name( proto, strviewdup(ctx, upb_EnumValueDef_Name(e))); - google_protobuf_EnumValueDescriptorProto_set_number( - proto, upb_EnumValueDef_Number(e)); + google_protobuf_EnumValueDescriptorProto_set_number(proto, upb_EnumValueDef_Number(e)); if (upb_EnumValueDef_HasOptions(e)) { SET_OPTIONS(proto, EnumValueDescriptorProto, EnumValueOptions, @@ -292,14 +326,14 @@ static google_protobuf_EnumValueDescriptorProto* enumvaldef_toproto( return proto; } -static google_protobuf_EnumDescriptorProto* enumdef_toproto( - upb_ToProto_Context* ctx, const upb_EnumDef* e) { +static google_protobuf_EnumDescriptorProto* enumdef_toproto(upb_ToProto_Context* ctx, + const upb_EnumDef* e) { google_protobuf_EnumDescriptorProto* proto = google_protobuf_EnumDescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_EnumDescriptorProto_set_name( - proto, strviewdup(ctx, upb_EnumDef_Name(e))); + google_protobuf_EnumDescriptorProto_set_name(proto, + strviewdup(ctx, upb_EnumDef_Name(e))); int n = upb_EnumDef_ValueCount(e); google_protobuf_EnumValueDescriptorProto** vals = @@ -337,10 +371,10 @@ static google_protobuf_DescriptorProto_ExtensionRange* extrange_toproto( google_protobuf_DescriptorProto_ExtensionRange_new(ctx->arena); CHK_OOM(proto); - google_protobuf_DescriptorProto_ExtensionRange_set_start( - proto, upb_ExtensionRange_Start(e)); - google_protobuf_DescriptorProto_ExtensionRange_set_end( - proto, upb_ExtensionRange_End(e)); + google_protobuf_DescriptorProto_ExtensionRange_set_start(proto, + upb_ExtensionRange_Start(e)); + google_protobuf_DescriptorProto_ExtensionRange_set_end(proto, + upb_ExtensionRange_End(e)); if (upb_ExtensionRange_HasOptions(e)) { SET_OPTIONS(proto, DescriptorProto_ExtensionRange, ExtensionRangeOptions, @@ -350,14 +384,13 @@ static google_protobuf_DescriptorProto_ExtensionRange* extrange_toproto( return proto; } -static google_protobuf_DescriptorProto* msgdef_toproto( - upb_ToProto_Context* ctx, const upb_MessageDef* m) { - google_protobuf_DescriptorProto* proto = - google_protobuf_DescriptorProto_new(ctx->arena); +static google_protobuf_DescriptorProto* msgdef_toproto(upb_ToProto_Context* ctx, + const upb_MessageDef* m) { + google_protobuf_DescriptorProto* proto = google_protobuf_DescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_DescriptorProto_set_name( - proto, strviewdup(ctx, upb_MessageDef_Name(m))); + google_protobuf_DescriptorProto_set_name(proto, + strviewdup(ctx, upb_MessageDef_Name(m))); int n; @@ -427,14 +460,14 @@ static google_protobuf_DescriptorProto* msgdef_toproto( return proto; } -static google_protobuf_MethodDescriptorProto* methoddef_toproto( - upb_ToProto_Context* ctx, const upb_MethodDef* m) { +static google_protobuf_MethodDescriptorProto* methoddef_toproto(upb_ToProto_Context* ctx, + const upb_MethodDef* m) { google_protobuf_MethodDescriptorProto* proto = google_protobuf_MethodDescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_MethodDescriptorProto_set_name( - proto, strviewdup(ctx, upb_MethodDef_Name(m))); + google_protobuf_MethodDescriptorProto_set_name(proto, + strviewdup(ctx, upb_MethodDef_Name(m))); google_protobuf_MethodDescriptorProto_set_input_type( proto, @@ -470,8 +503,7 @@ static google_protobuf_ServiceDescriptorProto* servicedef_toproto( size_t n = upb_ServiceDef_MethodCount(s); google_protobuf_MethodDescriptorProto** methods = - google_protobuf_ServiceDescriptorProto_resize_method(proto, n, - ctx->arena); + google_protobuf_ServiceDescriptorProto_resize_method(proto, n, ctx->arena); for (int i = 0; i < n; i++) { methods[i] = methoddef_toproto(ctx, upb_ServiceDef_Method(s, i)); } @@ -484,63 +516,65 @@ static google_protobuf_ServiceDescriptorProto* servicedef_toproto( return proto; } -static google_protobuf_FileDescriptorProto* filedef_toproto( - upb_ToProto_Context* ctx, const upb_FileDef* f) { +static google_protobuf_FileDescriptorProto* filedef_toproto(upb_ToProto_Context* ctx, + const upb_FileDef* f) { google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_new(ctx->arena); CHK_OOM(proto); - google_protobuf_FileDescriptorProto_set_name( - proto, strviewdup(ctx, upb_FileDef_Name(f))); + google_protobuf_FileDescriptorProto_set_name(proto, + strviewdup(ctx, upb_FileDef_Name(f))); const char* package = upb_FileDef_Package(f); if (package) { size_t n = strlen(package); if (n) { - google_protobuf_FileDescriptorProto_set_package( - proto, strviewdup(ctx, upb_FileDef_Package(f))); + google_protobuf_FileDescriptorProto_set_package(proto, strviewdup(ctx, package)); + } + } + + const char* edition = upb_FileDef_Edition(f); + if (edition != NULL) { + size_t n = strlen(edition); + if (n != 0) { + google_protobuf_FileDescriptorProto_set_edition(proto, strviewdup(ctx, edition)); } } if (upb_FileDef_Syntax(f) == kUpb_Syntax_Proto3) { - google_protobuf_FileDescriptorProto_set_syntax(proto, - strviewdup(ctx, "proto3")); + google_protobuf_FileDescriptorProto_set_syntax(proto, strviewdup(ctx, "proto3")); } size_t n; n = upb_FileDef_DependencyCount(f); - upb_StringView* deps = google_protobuf_FileDescriptorProto_resize_dependency( - proto, n, ctx->arena); + upb_StringView* deps = + google_protobuf_FileDescriptorProto_resize_dependency(proto, n, ctx->arena); for (int i = 0; i < n; i++) { deps[i] = strviewdup(ctx, upb_FileDef_Name(upb_FileDef_Dependency(f, i))); } n = upb_FileDef_PublicDependencyCount(f); int32_t* public_deps = - google_protobuf_FileDescriptorProto_resize_public_dependency(proto, n, - ctx->arena); + google_protobuf_FileDescriptorProto_resize_public_dependency(proto, n, ctx->arena); const int32_t* public_dep_nums = _upb_FileDef_PublicDependencyIndexes(f); if (n) memcpy(public_deps, public_dep_nums, n * sizeof(int32_t)); n = upb_FileDef_WeakDependencyCount(f); int32_t* weak_deps = - google_protobuf_FileDescriptorProto_resize_weak_dependency(proto, n, - ctx->arena); + google_protobuf_FileDescriptorProto_resize_weak_dependency(proto, n, ctx->arena); const int32_t* weak_dep_nums = _upb_FileDef_WeakDependencyIndexes(f); if (n) memcpy(weak_deps, weak_dep_nums, n * sizeof(int32_t)); n = upb_FileDef_TopLevelMessageCount(f); google_protobuf_DescriptorProto** msgs = - google_protobuf_FileDescriptorProto_resize_message_type(proto, n, - ctx->arena); + google_protobuf_FileDescriptorProto_resize_message_type(proto, n, ctx->arena); for (int i = 0; i < n; i++) { msgs[i] = msgdef_toproto(ctx, upb_FileDef_TopLevelMessage(f, i)); } n = upb_FileDef_TopLevelEnumCount(f); google_protobuf_EnumDescriptorProto** enums = - google_protobuf_FileDescriptorProto_resize_enum_type(proto, n, - ctx->arena); + google_protobuf_FileDescriptorProto_resize_enum_type(proto, n, ctx->arena); for (int i = 0; i < n; i++) { enums[i] = enumdef_toproto(ctx, upb_FileDef_TopLevelEnum(f, i)); } @@ -554,8 +588,7 @@ static google_protobuf_FileDescriptorProto* filedef_toproto( n = upb_FileDef_TopLevelExtensionCount(f); google_protobuf_FieldDescriptorProto** exts = - google_protobuf_FileDescriptorProto_resize_extension(proto, n, - ctx->arena); + google_protobuf_FileDescriptorProto_resize_extension(proto, n, ctx->arena); for (int i = 0; i < n; i++) { exts[i] = fielddef_toproto(ctx, upb_FileDef_TopLevelExtension(f, i)); } diff --git a/upb/util/def_to_proto_fuzz_test.cc b/upb/util/def_to_proto_fuzz_test.cc new file mode 100644 index 0000000000..a2177c8cd6 --- /dev/null +++ b/upb/util/def_to_proto_fuzz_test.cc @@ -0,0 +1,26 @@ + +#include + +#include "google/protobuf/descriptor.proto.h" +#include "gtest/gtest.h" +#include "testing/fuzzing/fuzztest.h" +#include "upb/util/def_to_proto_test.h" + +namespace upb_test { + +FUZZ_TEST(FuzzTest, RoundTripDescriptor) + .WithDomains( + ::fuzztest::Arbitrary().WithProtobufField( + "file", + ::fuzztest::Arbitrary() + // upb_FileDef_ToProto() does not attempt to preserve + // source_code_info. + .WithFieldUnset("source_code_info") + .WithProtobufField( + "service", + ::fuzztest::Arbitrary() + // streams are google3-only, and we do not currently + // attempt to preserve them. + .WithFieldUnset("stream")))); + +} // namespace upb_test diff --git a/upb/util/def_to_proto_test.cc b/upb/util/def_to_proto_test.cc index a304d9fec8..18d2304401 100644 --- a/upb/util/def_to_proto_test.cc +++ b/upb/util/def_to_proto_test.cc @@ -27,6 +27,9 @@ #include "upb/util/def_to_proto.h" +#include +#include + #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/descriptor.upbdefs.h" #include "gmock/gmock.h" @@ -34,9 +37,13 @@ #include "google/protobuf/dynamic_message.h" #include "google/protobuf/util/message_differencer.h" #include "upb/reflection/def.hpp" +#include "upb/test/parse_text_proto.h" #include "upb/upb.hpp" +#include "upb/util/def_to_proto_test.h" #include "upb/util/def_to_proto_test.upbdefs.h" +namespace upb_test { + // Loads and retrieves a descriptor for `msgdef` into the given `pool`. const google::protobuf::Descriptor* AddMessageDescriptor( upb::MessageDefPtr msgdef, google::protobuf::DescriptorPool* pool) { @@ -144,3 +151,130 @@ TEST(DefToProto, TestRuntimeReflection) { upb_util_def_to_proto_test_proto_upbdefinit.filename); CheckFile(file, file_desc); } + +// Fuzz test regressions. + +TEST(FuzzTest, EmptyPackage) { + RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { package: "" })pb")); +} + +TEST(FuzzTest, EmptyName) { + RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "" })pb")); +} + +TEST(FuzzTest, EmptyPackage2) { + RoundTripDescriptor( + ParseTextProtoOrDie(R"pb(file { name: "n" package: "" })pb")); +} + +TEST(FuzzTest, FileNameEmbeddedNull) { + RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { name: "\000" })pb")); +} + +TEST(FuzzTest, EditionEmbeddedNull) { + RoundTripDescriptor( + ParseTextProtoOrDie(R"pb(file { name: "n" edition: "\000" })pb")); +} + +TEST(FuzzTest, NanValue) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + enum_type { + value { + number: 0 + options { uninterpreted_option { double_value: nan } } + } + } + })pb")); +} + +TEST(FuzzTest, EnumValueEmbeddedNull) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + name: "\035" + enum_type { + name: "f" + value { name: "\000" number: 0 } + } + })pb")); +} + +TEST(FuzzTest, EnumValueNoNumber) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + name: "\035" + enum_type { + name: "f" + value { name: "abc" } + } + })pb")); +} + +TEST(FuzzTest, DefaultWithUnterminatedHex) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + name: "\035" + message_type { + name: "A" + field { + name: "f" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_BYTES + default_value: "\\x" + } + } + })pb")); +} + +TEST(FuzzTest, DefaultWithValidHexEscape) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + name: "\035" + message_type { + name: "A" + field { + name: "f" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_BYTES + default_value: "\\x03" + } + } + })pb")); +} + +TEST(FuzzTest, DefaultWithValidHexEscapePrintable) { + RoundTripDescriptor(ParseTextProtoOrDie( + R"pb(file { + name: "\035" + message_type { + name: "A" + field { + name: "f" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_BYTES + default_value: "\\x23" # 0x32 = '#' + } + } + })pb")); +} + +// begin:google_only +// TEST(FuzzTest, DependencyWithEmbeddedNull) { +// RoundTripDescriptor(ParseTextProtoOrDie(R"pb(file { +// name: "a" +// dependency: "a\000" +// options { cc_api_version: 0 } +// weak_dependency: 0 +// })pb")); +// } +// end:google_only + +TEST(FuzzTest, PackageStartsWithNumber) { + RoundTripDescriptor( + ParseTextProtoOrDie(R"pb(file { name: "" package: "0" })pb")); +} + +} // namespace upb_test diff --git a/upb/util/def_to_proto_test.h b/upb/util/def_to_proto_test.h new file mode 100644 index 0000000000..809a7cab46 --- /dev/null +++ b/upb/util/def_to_proto_test.h @@ -0,0 +1,117 @@ +#ifndef UPB_UTIL_DEF_TO_PROTO_TEST_H_ +#define UPB_UTIL_DEF_TO_PROTO_TEST_H_ + +#include + +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/descriptor.upb.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/dynamic_message.h" +#include "google/protobuf/util/field_comparator.h" +#include "upb/reflection/def.hpp" +#include "upb/upb.hpp" +#include "upb/util/def_to_proto.h" + +namespace upb_test { + +// A gtest matcher that verifies that a proto is equal to `proto`. Both `proto` +// and `arg` must be messages of type `msgdef_func` (a .upbdefs.h function that +// loads a known msgdef into the given defpool). +MATCHER_P(EqualsProtoTreatNansAsEqual, proto, + negation ? "are not equal" : "are equal") { + upb::DefPool defpool; + google::protobuf::DescriptorPool pool; + google::protobuf::DynamicMessageFactory factory; + std::string differences; + google::protobuf::util::DefaultFieldComparator comparator; + comparator.set_treat_nan_as_equal(true); + google::protobuf::util::MessageDifferencer differencer; + differencer.set_field_comparator(&comparator); + differencer.ReportDifferencesToString(&differences); + bool eq = differencer.Equals(proto, arg); + if (!eq) { + *result_listener << differences; + } + return eq; +} + +class NullErrorCollector : public google::protobuf::DescriptorPool::ErrorCollector { + void AddError(const std::string& filename, const std::string& element_name, + const google::protobuf::Message* descriptor, ErrorLocation location, + const std::string& message) override {} + void RecordWarning(absl::string_view filename, absl::string_view element_name, + const google::protobuf::Message* descriptor, ErrorLocation location, + absl::string_view message) override {} +}; + +static void AddFile(google::protobuf::FileDescriptorProto& file, upb::DefPool* pool, + google::protobuf::DescriptorPool* desc_pool) { + NullErrorCollector collector; + const google::protobuf::FileDescriptor* file_desc = + desc_pool->BuildFileCollectingErrors(file, &collector); + + if (file_desc != nullptr) { + // The file descriptor was valid according to proto2. + google::protobuf::FileDescriptorProto normalized_file; + file_desc->CopyTo(&normalized_file); + std::string serialized; + normalized_file.SerializeToString(&serialized); + upb::Arena arena; + upb::Status status; + google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse( + serialized.data(), serialized.size(), arena.ptr()); + ASSERT_NE(proto, nullptr); + upb::FileDefPtr file_def = pool->AddFile(proto, &status); + + // Ideally we could assert that file_def is present here. After all, any + // descriptor accepted by C++ should be by definition valid. However C++ + // performs some of its validation at the .proto file parser level instead + // of when validating descriptors. As as result, C++ will accept some + // unreasonable descriptors like: + // file { name: "" package: "0" } + // + // There is no .proto file that will produce this descriptor, but + // BuildFile() accepts it. We should probably clean up these cases so C++ + // will reject them too. + if (!file_def) return; + + ASSERT_TRUE(status.ok()) << status.error_message(); + google_protobuf_FileDescriptorProto* upb_proto = + upb_FileDef_ToProto(file_def.ptr(), arena.ptr()); + size_t size; + const char* buf = + google_protobuf_FileDescriptorProto_serialize(upb_proto, arena.ptr(), &size); + google::protobuf::FileDescriptorProto google_proto; + bool ok = google_proto.ParseFromArray(buf, size); + ASSERT_TRUE(ok); + EXPECT_THAT(google_proto, EqualsProtoTreatNansAsEqual(normalized_file)); + } else { + // This file was invalid according to proto2. When we parse it with upb, + // it may or may not be accepted, since upb does not perform as much + // validation as proto2. However it must not crash. + std::string serialized; + file.SerializeToString(&serialized); + upb::Arena arena; + upb::Status status; + google_protobuf_FileDescriptorProto* proto = google_protobuf_FileDescriptorProto_parse( + serialized.data(), serialized.size(), arena.ptr()); + ASSERT_NE(proto, nullptr); + pool->AddFile(proto, &status); + } +} + +inline void RoundTripDescriptor(const google::protobuf::FileDescriptorSet& set) { + upb::DefPool defpool; + google::protobuf::DescriptorPool desc_pool; + desc_pool.EnforceWeakDependencies(true); + for (const auto& file : set.file()) { + google::protobuf::FileDescriptorProto mutable_file(file); + AddFile(mutable_file, &defpool, &desc_pool); + } +} + +} // namespace upb_test + +#endif // UPB_UTIL_DEF_TO_PROTO_TEST_H_ diff --git a/upb/util/def_to_proto_test.proto b/upb/util/def_to_proto_test.proto index 5d154106c7..a28c1c55d7 100644 --- a/upb/util/def_to_proto_test.proto +++ b/upb/util/def_to_proto_test.proto @@ -117,3 +117,8 @@ message MessageSetItem { optional MessageSetItem message_set_extension = 2147483646; } } + +message UnusualDefaults { + optional bytes foo = 1 [default = "\\X"]; + optional string bar = 2 [default = "\\X"]; +}