diff --git a/BUILD b/BUILD index 1158e48d0e..296bda23f2 100644 --- a/BUILD +++ b/BUILD @@ -109,6 +109,16 @@ cc_library( ], ) +cc_library( + name = "mini_table_internal", + hdrs = ["upb/msg_internal.h"], + deps = [ + ":port", + ":table", + ":upb", + ], +) + cc_library( name = "mini_table", srcs = ["upb/mini_table.c"], @@ -118,7 +128,11 @@ cc_library( ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], - deps = [":upb"], + deps = [ + ":mini_table_internal", + ":port", + ":upb", + ], ) cc_test( @@ -126,8 +140,10 @@ cc_test( srcs = ["upb/mini_table_test.cc"], deps = [ ":mini_table", - "@com_google_googletest//:gtest_main", + ":mini_table_internal", + ":upb", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_googletest//:gtest_main", ], ) @@ -458,6 +474,47 @@ sh_test( deps = ["@bazel_tools//tools/bash/runfiles"], ) +cc_binary( + name = "conformance_upb_dynamic_minitable", + testonly = 1, + srcs = ["upb/conformance_upb.c"], + copts = UPB_DEFAULT_COPTS + [ + "-DREBUILD_MINITABLES", + ], + data = ["upb/conformance_upb_failures.txt"], + deps = [ + ":conformance_proto_upb", + ":conformance_proto_upbdefs", + ":test_messages_proto2_upbdefs", + ":test_messages_proto3_upbdefs", + "//:json", + "//:port", + "//:reflection", + "//:textformat", + "//:upb", + ], +) + +make_shell_script( + name = "gen_test_conformance_upb_dynamic_minitable", + out = "test_conformance_upb_dynamic_minitable.sh", + contents = "external/com_google_protobuf/conformance_test_runner " + + " --enforce_recommended " + + " --failure_list ./upb/conformance_upb_failures.txt" + + " ./conformance_upb_dynamic_minitable", +) + +sh_test( + name = "test_conformance_upb_dynamic_minitable", + srcs = ["test_conformance_upb_dynamic_minitable.sh"], + data = [ + "upb/conformance_upb_failures.txt", + ":conformance_upb_dynamic_minitable", + "@com_google_protobuf//:conformance_test_runner", + ], + deps = ["@bazel_tools//tools/bash/runfiles"], +) + # Internal C/C++ libraries ##################################################### cc_library( diff --git a/README.md b/README.md index 92a081bbef..9b74b969ce 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,22 @@ For PHP, use [PECL](https://pecl.php.net/package/protobuf): $ sudo pecl install protobuf ``` +Alternatively, you can build and install upb using +[vcpkg](https://github.com/microsoft/vcpkg/) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install upb + +The upb port in vcpkg is kept up to date by microsoft team members and community +contributors. + +If the version is out of date, please +[create an issue or pull request](https://github.com/Microsoft/vcpkg) on the +vcpkg repository. + ## Contributing Please see [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/bazel/amalgamate.py b/bazel/amalgamate.py index 103b11c351..70399085ca 100755 --- a/bazel/amalgamate.py +++ b/bazel/amalgamate.py @@ -44,6 +44,9 @@ class Amalgamator: self.output_c.write("/* Amalgamated source file */\n") self.output_c.write('#include "%supb.h"\n' % (prefix)) + if prefix == "ruby-": + self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n") + self.output_h.write("#define UPB_DISABLE_PROTO2_ENUM_CHECKING\n") self.output_c.write(open("upb/port_def.inc").read()) self.output_h.write("/* Amalgamated source file */\n") diff --git a/bazel/py_extension.bzl b/bazel/py_extension.bzl index beee32148f..eacd39b856 100644 --- a/bazel/py_extension.bzl +++ b/bazel/py_extension.bzl @@ -1,10 +1,9 @@ - load( "//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS", ) -def py_extension(name, srcs, deps=[]): +def py_extension(name, srcs, deps = []): version_script = name + "_version_script.lds" symbol = "PyInit_" + name native.genrule( diff --git a/bazel/pyproto_test_wrapper.bzl b/bazel/pyproto_test_wrapper.bzl index fb909a1db8..e0d01ca225 100644 --- a/bazel/pyproto_test_wrapper.bzl +++ b/bazel/pyproto_test_wrapper.bzl @@ -1,4 +1,3 @@ - # copybara:strip_for_google3_begin def pyproto_test_wrapper(name): diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index f95a09ed1b..93da39f086 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -24,7 +24,7 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # copybara:insert_for_google3_begin -# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library="cc_proto_library") +# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library = "cc_proto_library") # copybara:insert_end # copybara:strip_for_google3_begin @@ -36,7 +36,7 @@ def proto_library(**kwargs): # copybara:insert_for_google3_begin # cc_api_version = 2, # copybara:insert_end - **kwargs, + **kwargs ) def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): @@ -55,7 +55,7 @@ def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): # copybara:insert_end name = name, srcs = srcs, - **kwargs, + **kwargs ) def cc_optimizefor_proto_library(name, srcs, outs, optimize_for): diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 8c27ea72b4..445e61554d 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -81,7 +81,8 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False): print("{} {} {} ns/op".format(*values), file=f) Run("sort {} -o {} ".format(txt_filename, txt_filename)) - Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + extra_args) + Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + + extra_args) Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase)) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index a06bd604ac..8c08321eaa 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -81,11 +81,18 @@ target_link_libraries(upb fastdecode port /third_party/utf8_range) +add_library(mini_table_internal INTERFACE) +target_link_libraries(mini_table_internal INTERFACE + port + table + upb) add_library(mini_table ../upb/mini_table.c ../upb/mini_table.h ../upb/mini_table.hpp) target_link_libraries(mini_table + mini_table_internal + port upb) add_library(fastdecode ../upb/decode.h diff --git a/upb/conformance_upb.c b/upb/conformance_upb.c index 5f912f0c40..b64c112c86 100644 --- a/upb/conformance_upb.c +++ b/upb/conformance_upb.c @@ -323,8 +323,15 @@ bool DoTestIo(upb_DefPool* symtab) { int main(void) { upb_DefPool* symtab = upb_DefPool_New(); +#ifdef REBUILD_MINITABLES + _upb_DefPool_LoadDefInitEx( + symtab, &src_google_protobuf_test_messages_proto2_proto_upbdefinit, true); + _upb_DefPool_LoadDefInitEx( + symtab, &src_google_protobuf_test_messages_proto3_proto_upbdefinit, true); +#else protobuf_test_messages_proto2_TestAllTypesProto2_getmsgdef(symtab); protobuf_test_messages_proto3_TestAllTypesProto3_getmsgdef(symtab); +#endif while (1) { if (!DoTestIo(symtab)) { diff --git a/upb/decode.c b/upb/decode.c index 683e4ec32b..2d5bb49525 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -385,6 +385,18 @@ static char* encode_varint32(uint32_t val, char* ptr) { return ptr; } +static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = encode_varint32(val1, end); + end = encode_varint32(val2, end); + + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + decode_err(d, kUpb_DecodeStatus_OutOfMemory); + } +} + UPB_NOINLINE static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Enum* e, @@ -398,17 +410,9 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, // Unrecognized enum goes into unknown fields. // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag here. - char buf[20]; - char* end = buf; + // just re-encode the tag and value here. uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - end = encode_varint32(tag, end); - end = encode_varint32(v, end); - - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); - } - + upb_Decode_AddUnknownVarints(d, msg, tag, v); return false; } @@ -627,8 +631,20 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr, upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena)); } + const char* start = ptr; ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size); - _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); + // check if ent had any unknown fields + size_t size; + upb_Message_GetUnknown(&ent.k, &size); + if (size != 0) { + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + decode_err(d, kUpb_DecodeStatus_OutOfMemory); + } + } else { + _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); + } return ptr; } diff --git a/upb/def.c b/upb/def.c index 712d68d638..140465aab5 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1413,7 +1413,12 @@ static uint8_t map_descriptortype(const upb_FieldDef* f) { if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) { return kUpb_FieldType_Bytes; } else if (type == kUpb_FieldType_Enum && - f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3) { + (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 || + UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 || + // TODO(https://github.com/protocolbuffers/upb/issues/541): + // fix map enum values to check for unknown enum values and put + // them in the unknown field set. + upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) { return kUpb_FieldType_Int32; } return type; @@ -1580,11 +1585,11 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { fill_fieldlayout(field, f); - if (upb_FieldDef_IsSubMessage(f)) { + if (field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group) { field->submsg_index = sublayout_count++; subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout; - } else if (upb_FieldDef_CType(f) == kUpb_CType_Enum && - f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) { + } else if (field->descriptortype == kUpb_FieldType_Enum) { field->submsg_index = sublayout_count++; subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout; UPB_ASSERT(subs[field->submsg_index].subenum); diff --git a/upb/mini_table.c b/upb/mini_table.c index 59d2c91a8c..471501fb1a 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -173,7 +173,7 @@ char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, upb_FieldType type, uint32_t field_num, - uint64_t modifiers) { + uint64_t field_mod) { static const char kUpb_TypeToEncoded[] = { [kUpb_FieldType_Double] = kUpb_EncodedType_Double, [kUpb_FieldType_Float] = kUpb_EncodedType_Float, @@ -209,7 +209,7 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, // Put field type. int encoded_type = kUpb_TypeToEncoded[type]; - if (modifiers & kUpb_FieldModifier_IsRepeated) { + if (field_mod & kUpb_FieldModifier_IsRepeated) { // Repeated fields shift the type number up (unlike other modifiers which // are bit flags). encoded_type += kUpb_EncodedType_RepeatedBase; @@ -218,13 +218,13 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, if (!ptr) return NULL; uint32_t encoded_modifiers = 0; - if (modifiers & kUpb_FieldModifier_IsProto3Singular) { + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; } - if (modifiers & kUpb_FieldModifier_IsRequired) { + if (field_mod & kUpb_FieldModifier_IsRequired) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - if ((modifiers & kUpb_FieldModifier_IsPacked) != + if ((field_mod & kUpb_FieldModifier_IsPacked) != (in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked; } diff --git a/upb/mini_table.h b/upb/mini_table.h index e6cc886921..319c580843 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -1,9 +1,5 @@ /* -<<<<<<< HEAD - * Copyright (c) 2009-2021, Google LLC -======= * Copyright (c) 2009-2022, Google LLC ->>>>>>> mini-table-1 * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -96,13 +92,13 @@ typedef struct { // ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); // // Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, upb_FieldType type, uint32_t field_num, uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* buf); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, uint32_t field_num); /** upb_MiniTable *************************************************************/ diff --git a/upb/msg_test.cc b/upb/msg_test.cc index cd6fdf67fb..2d1f8e991b 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -398,3 +398,32 @@ TEST(MessageTest, MaxRequiredFields) { test_msg, kUpb_Encode_CheckRequired, arena.ptr(), &size); ASSERT_TRUE(serialized != nullptr); } + +TEST(MessageTest, MapField) { + upb::Arena arena; + upb_test_TestMapFieldExtra* test_msg_extra = + upb_test_TestMapFieldExtra_new(arena.ptr()); + + ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set( + test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr())); + + size_t size; + char* serialized = upb_test_TestMapFieldExtra_serialize_ex( + test_msg_extra, 0, arena.ptr(), &size); + ASSERT_NE(nullptr, serialized); + ASSERT_NE(0, size); + + upb_test_TestMapField* test_msg = + upb_test_TestMapField_parse(serialized, size, arena.ptr()); + ASSERT_NE(nullptr, test_msg); + + ASSERT_FALSE(upb_test_TestMapField_map_field_get(test_msg, 0, nullptr)); + serialized = + upb_test_TestMapField_serialize_ex(test_msg, 0, arena.ptr(), &size); + ASSERT_NE(0, size); + // parse into second instance + upb_test_TestMapFieldExtra* test_msg_extra2 = + upb_test_TestMapFieldExtra_parse(serialized, size, arena.ptr()); + ASSERT_TRUE( + upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr)); +} diff --git a/upb/msg_test.proto b/upb/msg_test.proto index 81c7dee079..1f80f6918c 100644 --- a/upb/msg_test.proto +++ b/upb/msg_test.proto @@ -158,3 +158,22 @@ message TestMaxRequiredFields { required int32 required_int32_61 = 61; required int32 required_int32_62 = 62; } + +message TestMapField { + enum EnumMap { + ZERO = 0; + ONE = 1; + TWO = 2; + } + map map_field = 1; +} + +message TestMapFieldExtra { + enum EnumMap { + ZERO = 0; + ONE = 1; + TWO = 2; + THREE = 3; + } + map map_field = 1; +} diff --git a/upb/port_def.inc b/upb/port_def.inc index 75d416d1ce..2b240ff6e8 100644 --- a/upb/port_def.inc +++ b/upb/port_def.inc @@ -251,3 +251,11 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) #endif + +/* Disable proto2 arena behavior (TEMPORARY) **********************************/ + +#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING +#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1 +#else +#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0 +#endif diff --git a/upb/port_undef.inc b/upb/port_undef.inc index 70956df25d..3ef2d0dbcd 100644 --- a/upb/port_undef.inc +++ b/upb/port_undef.inc @@ -59,3 +59,4 @@ #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN +#undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 diff --git a/upb/table.c b/upb/table.c index 33b3a9dc6e..63fe3cc960 100644 --- a/upb/table.c +++ b/upb/table.c @@ -433,14 +433,14 @@ const uint64_t kWyhashSalt[5] = { 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL, }; -static uint32_t table_hash(const char* p, size_t n) { +uint32_t _upb_Hash(const char* p, size_t n) { return Wyhash(p, n, 0, kWyhashSalt); } static uint32_t strhash(upb_tabkey key) { uint32_t len; char* str = upb_tabstr(key, &len); - return table_hash(str, len); + return _upb_Hash(str, len); } static bool streql(upb_tabkey k1, lookupkey_t k2) { @@ -496,20 +496,20 @@ bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len, tabkey = strcopy(key, a); if (tabkey == 0) return false; - hash = table_hash(key.str.str, key.str.len); + hash = _upb_Hash(key.str.str, key.str.len); insert(&t->t, key, tabkey, v, hash, &strhash, &streql); return true; } bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len, upb_value* v) { - uint32_t hash = table_hash(key, len); + uint32_t hash = _upb_Hash(key, len); return lookup(&t->t, strkey2(key, len), v, hash, &streql); } bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len, upb_value* val) { - uint32_t hash = table_hash(key, len); + uint32_t hash = _upb_Hash(key, len); upb_tabkey tabkey; return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } diff --git a/upb/table_internal.h b/upb/table_internal.h index 9fb2d238cd..74076551de 100644 --- a/upb/table_internal.h +++ b/upb/table_internal.h @@ -374,6 +374,8 @@ void upb_inttable_iter_setdone(upb_inttable_iter* i); bool upb_inttable_iter_isequal(const upb_inttable_iter* i1, const upb_inttable_iter* i2); +uint32_t _upb_Hash(const char* p, size_t n); + #ifdef __cplusplus } /* extern "C" */ #endif