From 5e7d86f388ba5d3acda09fdb909074e9d4d8cc39 Mon Sep 17 00:00:00 2001 From: PhoebeHui <20694052+PhoebeHui@users.noreply.github.com> Date: Tue, 31 Mar 2020 00:30:36 -0700 Subject: [PATCH 01/11] Add vcpkg installation instructions --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index a08aa0e909..4287e76ba1 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,17 @@ Then in your `.c` file you can #include the generated header: /* Insert code that uses generated types. */ ``` +Alternatively, you can build and install upb using [vcpkg](https://github.com/microsoft/vcpkg/) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install upb + +The upb port in vcpkg is kept up to date by microsoft team members and community contributors. +If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + ## Old "handlers" interfaces This library contains several semi-deprecated interfaces (see BUILD From 50c1298f3261872e48ae09dc96ab177c41291b6b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 8 Mar 2022 15:52:37 -0800 Subject: [PATCH 02/11] Added conformance test variant to exercise dynamic minitable building. --- BUILD | 41 +++++++++++++++++++++++++++++++++++++++++ upb/conformance_upb.c | 5 +++++ upb/def.c | 6 +++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/BUILD b/BUILD index 31e93b798b..6e7d35f55c 100644 --- a/BUILD +++ b/BUILD @@ -436,6 +436,47 @@ sh_test( deps = ["@bazel_tools//tools/bash/runfiles"], ) +cc_binary( + name = "conformance_upb_dynamic_minitable", + testonly = 1, + srcs = ["upb/conformance_upb.c"], + copts = UPB_DEFAULT_COPTS + [ + "-DREBUILD_MINITABLES" + ], + data = ["upb/conformance_upb_failures.txt"], + deps = [ + ":conformance_proto_upb", + ":conformance_proto_upbdefs", + ":test_messages_proto2_upbdefs", + ":test_messages_proto3_upbdefs", + "//:json", + "//:port", + "//:reflection", + "//:textformat", + "//:upb", + ], +) + +make_shell_script( + name = "gen_test_conformance_upb_dynamic_minitable", + out = "test_conformance_upb_dynamic_minitable.sh", + contents = "external/com_google_protobuf/conformance_test_runner " + + " --enforce_recommended " + + " --failure_list ./upb/conformance_upb_failures.txt" + + " ./conformance_upb_dynamic_minitable", +) + +sh_test( + name = "test_conformance_upb_dynamic_minitable", + srcs = ["test_conformance_upb_dynamic_minitable.sh"], + data = [ + "upb/conformance_upb_failures.txt", + ":conformance_upb_dynamic_minitable", + "@com_google_protobuf//:conformance_test_runner", + ], + deps = ["@bazel_tools//tools/bash/runfiles"], +) + # Internal C/C++ libraries ##################################################### cc_library( diff --git a/upb/conformance_upb.c b/upb/conformance_upb.c index 5f912f0c40..d696e961df 100644 --- a/upb/conformance_upb.c +++ b/upb/conformance_upb.c @@ -323,8 +323,13 @@ bool DoTestIo(upb_DefPool* symtab) { int main(void) { upb_DefPool* symtab = upb_DefPool_New(); +#ifdef REBUILD_MINITABLES + _upb_DefPool_LoadDefInitEx(symtab, &src_google_protobuf_test_messages_proto2_proto_upbdefinit, true); + _upb_DefPool_LoadDefInitEx(symtab, &src_google_protobuf_test_messages_proto3_proto_upbdefinit, true); +#else protobuf_test_messages_proto2_TestAllTypesProto2_getmsgdef(symtab); protobuf_test_messages_proto3_TestAllTypesProto3_getmsgdef(symtab); +#endif while (1) { if (!DoTestIo(symtab)) { diff --git a/upb/def.c b/upb/def.c index 712d68d638..4528030d0d 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1413,7 +1413,11 @@ static uint8_t map_descriptortype(const upb_FieldDef* f) { if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) { return kUpb_FieldType_Bytes; } else if (type == kUpb_FieldType_Enum && - f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3) { + (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 || + // TODO(https://github.com/protocolbuffers/upb/issues/541): + // fix map enum values to check for unknown enum values and put + // them in the unknown field set. + upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) { return kUpb_FieldType_Int32; } return type; From 91713481fdd3de2a09101c15939483d1113b1a2a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 8 Mar 2022 16:08:24 -0800 Subject: [PATCH 03/11] Clang format. --- upb/conformance_upb.c | 6 ++++-- upb/def.c | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/upb/conformance_upb.c b/upb/conformance_upb.c index d696e961df..b64c112c86 100644 --- a/upb/conformance_upb.c +++ b/upb/conformance_upb.c @@ -324,8 +324,10 @@ int main(void) { upb_DefPool* symtab = upb_DefPool_New(); #ifdef REBUILD_MINITABLES - _upb_DefPool_LoadDefInitEx(symtab, &src_google_protobuf_test_messages_proto2_proto_upbdefinit, true); - _upb_DefPool_LoadDefInitEx(symtab, &src_google_protobuf_test_messages_proto3_proto_upbdefinit, true); + _upb_DefPool_LoadDefInitEx( + symtab, &src_google_protobuf_test_messages_proto2_proto_upbdefinit, true); + _upb_DefPool_LoadDefInitEx( + symtab, &src_google_protobuf_test_messages_proto3_proto_upbdefinit, true); #else protobuf_test_messages_proto2_TestAllTypesProto2_getmsgdef(symtab); protobuf_test_messages_proto3_TestAllTypesProto3_getmsgdef(symtab); diff --git a/upb/def.c b/upb/def.c index 4528030d0d..451aecdaf3 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1414,10 +1414,10 @@ static uint8_t map_descriptortype(const upb_FieldDef* f) { return kUpb_FieldType_Bytes; } else if (type == kUpb_FieldType_Enum && (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 || - // TODO(https://github.com/protocolbuffers/upb/issues/541): - // fix map enum values to check for unknown enum values and put - // them in the unknown field set. - upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) { + // TODO(https://github.com/protocolbuffers/upb/issues/541): + // fix map enum values to check for unknown enum values and put + // them in the unknown field set. + upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) { return kUpb_FieldType_Int32; } return type; From 970c6451404ab30d5de4a641b3818c1969e2740f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 8 Mar 2022 17:44:06 -0800 Subject: [PATCH 04/11] Fixes for google3 (layering check and formatting). --- BUILD | 22 +++++++++++++++++++--- bazel/py_extension.bzl | 3 +-- bazel/pyproto_test_wrapper.bzl | 1 - benchmarks/build_defs.bzl | 6 +++--- benchmarks/compare.py | 3 ++- upb/mini_table.c | 10 +++++----- upb/mini_table.h | 8 ++++---- 7 files changed, 34 insertions(+), 19 deletions(-) diff --git a/BUILD b/BUILD index a65fd92732..296bda23f2 100644 --- a/BUILD +++ b/BUILD @@ -109,6 +109,16 @@ cc_library( ], ) +cc_library( + name = "mini_table_internal", + hdrs = ["upb/msg_internal.h"], + deps = [ + ":port", + ":table", + ":upb", + ], +) + cc_library( name = "mini_table", srcs = ["upb/mini_table.c"], @@ -118,7 +128,11 @@ cc_library( ], copts = UPB_DEFAULT_COPTS, visibility = ["//visibility:public"], - deps = [":upb"], + deps = [ + ":mini_table_internal", + ":port", + ":upb", + ], ) cc_test( @@ -126,8 +140,10 @@ cc_test( srcs = ["upb/mini_table_test.cc"], deps = [ ":mini_table", - "@com_google_googletest//:gtest_main", + ":mini_table_internal", + ":upb", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_googletest//:gtest_main", ], ) @@ -463,7 +479,7 @@ cc_binary( testonly = 1, srcs = ["upb/conformance_upb.c"], copts = UPB_DEFAULT_COPTS + [ - "-DREBUILD_MINITABLES" + "-DREBUILD_MINITABLES", ], data = ["upb/conformance_upb_failures.txt"], deps = [ diff --git a/bazel/py_extension.bzl b/bazel/py_extension.bzl index beee32148f..eacd39b856 100644 --- a/bazel/py_extension.bzl +++ b/bazel/py_extension.bzl @@ -1,10 +1,9 @@ - load( "//bazel:build_defs.bzl", "UPB_DEFAULT_COPTS", ) -def py_extension(name, srcs, deps=[]): +def py_extension(name, srcs, deps = []): version_script = name + "_version_script.lds" symbol = "PyInit_" + name native.genrule( diff --git a/bazel/pyproto_test_wrapper.bzl b/bazel/pyproto_test_wrapper.bzl index fb909a1db8..e0d01ca225 100644 --- a/bazel/pyproto_test_wrapper.bzl +++ b/bazel/pyproto_test_wrapper.bzl @@ -1,4 +1,3 @@ - # copybara:strip_for_google3_begin def pyproto_test_wrapper(name): diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index f95a09ed1b..93da39f086 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -24,7 +24,7 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # copybara:insert_for_google3_begin -# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library="cc_proto_library") +# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library = "cc_proto_library") # copybara:insert_end # copybara:strip_for_google3_begin @@ -36,7 +36,7 @@ def proto_library(**kwargs): # copybara:insert_for_google3_begin # cc_api_version = 2, # copybara:insert_end - **kwargs, + **kwargs ) def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): @@ -55,7 +55,7 @@ def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): # copybara:insert_end name = name, srcs = srcs, - **kwargs, + **kwargs ) def cc_optimizefor_proto_library(name, srcs, outs, optimize_for): diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 8c27ea72b4..445e61554d 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -81,7 +81,8 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False): print("{} {} {} ns/op".format(*values), file=f) Run("sort {} -o {} ".format(txt_filename, txt_filename)) - Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + extra_args) + Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + + extra_args) Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase)) diff --git a/upb/mini_table.c b/upb/mini_table.c index 354e7e5ac3..2a70cd42bf 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -163,7 +163,7 @@ char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, upb_FieldType type, uint32_t field_num, - uint64_t modifiers) { + uint64_t field_mod) { static const char kUpb_TypeToEncoded[] = { [kUpb_FieldType_Double] = kUpb_EncodedType_Double, [kUpb_FieldType_Float] = kUpb_EncodedType_Float, @@ -199,7 +199,7 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, // Put field type. int encoded_type = kUpb_TypeToEncoded[type]; - if (modifiers & kUpb_FieldModifier_IsRepeated) { + if (field_mod & kUpb_FieldModifier_IsRepeated) { // Repeated fields shift the type number up (unlike other modifiers which // are bit flags). encoded_type += kUpb_EncodedType_RepeatedBase; @@ -208,13 +208,13 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, if (!ptr) return NULL; uint32_t encoded_modifiers = 0; - if (modifiers & kUpb_FieldModifier_IsProto3Singular) { + if (field_mod & kUpb_FieldModifier_IsProto3Singular) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; } - if (modifiers & kUpb_FieldModifier_IsRequired) { + if (field_mod & kUpb_FieldModifier_IsRequired) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; } - if ((modifiers & kUpb_FieldModifier_IsPacked) != + if ((field_mod & kUpb_FieldModifier_IsPacked) != (in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) { encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked; } diff --git a/upb/mini_table.h b/upb/mini_table.h index ace5ea9411..319c580843 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -92,13 +92,13 @@ typedef struct { // ptr = upb_MiniTable_PutOneofField(&e, ptr, ...); // // Oneofs must be encoded after all regular fields. -char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, uint64_t msg_mod); -char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, upb_FieldType type, uint32_t field_num, uint64_t field_mod); -char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* buf); -char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* buf, +char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr); +char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, uint32_t field_num); /** upb_MiniTable *************************************************************/ From a1e6f29de91a47e3acd2028f86ff1899c1906817 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 8 Mar 2022 18:18:04 -0800 Subject: [PATCH 05/11] Fixed CMakeLists.txt. --- cmake/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index a06bd604ac..8c08321eaa 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -81,11 +81,18 @@ target_link_libraries(upb fastdecode port /third_party/utf8_range) +add_library(mini_table_internal INTERFACE) +target_link_libraries(mini_table_internal INTERFACE + port + table + upb) add_library(mini_table ../upb/mini_table.c ../upb/mini_table.h ../upb/mini_table.hpp) target_link_libraries(mini_table + mini_table_internal + port upb) add_library(fastdecode ../upb/decode.h From 97273a363867623b6a5d06b199978734c34dc22f Mon Sep 17 00:00:00 2001 From: theodorerose Date: Wed, 9 Mar 2022 19:02:01 +0000 Subject: [PATCH 06/11] WIP --- upb/decode.c | 17 +++++++++++++++-- upb/msg_test.cc | 26 ++++++++++++++++++++++++++ upb/msg_test.proto | 19 +++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 683e4ec32b..02aa351643 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -412,15 +412,20 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, return false; } +#include UPB_FORCEINLINE static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Enum* e, const upb_MiniTable_Field* field, wireval* val) { uint32_t v = val->uint32_val; + printf("@test:we are here\n"); if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - return decode_checkenum_slow(d, ptr, msg, e, field, v); + printf("@test:we are here 2\n"); + bool ans = decode_checkenum_slow(d, ptr, msg, e, field, v); + printf("@test:%d\n", (int)ans); + return ans; } UPB_NOINLINE @@ -628,7 +633,15 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr, } ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size); - _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); + // check if ent had any unknown fields + size_t size; + const char* unknown = upb_Message_GetUnknown(&ent.k, &size); + printf("@test:size = %d %p\n", (int)size, unknown); + if(size != 0) { + + } else { + _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); + } return ptr; } diff --git a/upb/msg_test.cc b/upb/msg_test.cc index cd6fdf67fb..3e9f1ee19e 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -398,3 +398,29 @@ TEST(MessageTest, MaxRequiredFields) { test_msg, kUpb_Encode_CheckRequired, arena.ptr(), &size); ASSERT_TRUE(serialized != nullptr); } + +TEST(MessageTest, MapField) { + upb::Arena arena; + upb_test_TestMapFieldExtra* test_msg_extra = + upb_test_TestMapFieldExtra_new(arena.ptr()); + + ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set(test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr())); + + size_t size; + char* serialized = upb_test_TestMapFieldExtra_serialize_ex( + test_msg_extra, 0, arena.ptr(), &size); + ASSERT_NE(nullptr, serialized); + ASSERT_NE(0, size); + + upb_test_TestMapField* test_msg = upb_test_TestMapField_parse(serialized, size, arena.ptr()); + ASSERT_NE(nullptr, test_msg); + + ASSERT_FALSE(upb_test_TestMapField_map_field_get(test_msg, 0, nullptr)); + serialized = upb_test_TestMapField_serialize_ex( + test_msg, 0, arena.ptr(), &size); + ASSERT_NE(0, size); + // parse into second instance + upb_test_TestMapFieldExtra* test_msg_extra2 = + upb_test_TestMapFieldExtra_parse(serialized, size, arena.ptr()); + ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr)); +} diff --git a/upb/msg_test.proto b/upb/msg_test.proto index 81c7dee079..1f80f6918c 100644 --- a/upb/msg_test.proto +++ b/upb/msg_test.proto @@ -158,3 +158,22 @@ message TestMaxRequiredFields { required int32 required_int32_61 = 61; required int32 required_int32_62 = 62; } + +message TestMapField { + enum EnumMap { + ZERO = 0; + ONE = 1; + TWO = 2; + } + map map_field = 1; +} + +message TestMapFieldExtra { + enum EnumMap { + ZERO = 0; + ONE = 1; + TWO = 2; + THREE = 3; + } + map map_field = 1; +} From 7d5f4cd9b6a2c314b30566e6a2e96f2a275c4186 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Mar 2022 16:33:18 -0800 Subject: [PATCH 07/11] Implemented the functionality to make the test pass. --- upb/decode.c | 40 ++++++++++++++++++++++++---------------- upb/msg_test.cc | 13 ++++++++----- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 02aa351643..67074f5cba 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -385,6 +385,18 @@ static char* encode_varint32(uint32_t val, char* ptr) { return ptr; } +static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg, + uint32_t val1, uint32_t val2) { + char buf[20]; + char* end = buf; + end = encode_varint32(val1, end); + end = encode_varint32(val2, end); + + if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { + decode_err(d, kUpb_DecodeStatus_OutOfMemory); + } +} + UPB_NOINLINE static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Enum* e, @@ -398,17 +410,9 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, // Unrecognized enum goes into unknown fields. // For packed fields the tag could be arbitrarily far in the past, so we - // just re-encode the tag here. - char buf[20]; - char* end = buf; + // just re-encode the tag and value here. uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint; - end = encode_varint32(tag, end); - end = encode_varint32(v, end); - - if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) { - decode_err(d, kUpb_DecodeStatus_OutOfMemory); - } - + upb_Decode_AddUnknownVarints(d, msg, tag, v); return false; } @@ -419,12 +423,12 @@ static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Field* field, wireval* val) { uint32_t v = val->uint32_val; - printf("@test:we are here\n"); + // printf("@test:we are here\n"); if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - printf("@test:we are here 2\n"); + // printf("@test:we are here 2\n"); bool ans = decode_checkenum_slow(d, ptr, msg, e, field, v); - printf("@test:%d\n", (int)ans); + // printf("@test:%d\n", (int)ans); return ans; } @@ -632,13 +636,17 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr, upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena)); } + const char* start = ptr; ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size); // check if ent had any unknown fields size_t size; - const char* unknown = upb_Message_GetUnknown(&ent.k, &size); - printf("@test:size = %d %p\n", (int)size, unknown); + upb_Message_GetUnknown(&ent.k, &size); if(size != 0) { - + uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; + upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); + if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { + decode_err(d, kUpb_DecodeStatus_OutOfMemory); + } } else { _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); } diff --git a/upb/msg_test.cc b/upb/msg_test.cc index 3e9f1ee19e..2d1f8e991b 100644 --- a/upb/msg_test.cc +++ b/upb/msg_test.cc @@ -404,7 +404,8 @@ TEST(MessageTest, MapField) { upb_test_TestMapFieldExtra* test_msg_extra = upb_test_TestMapFieldExtra_new(arena.ptr()); - ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set(test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr())); + ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set( + test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr())); size_t size; char* serialized = upb_test_TestMapFieldExtra_serialize_ex( @@ -412,15 +413,17 @@ TEST(MessageTest, MapField) { ASSERT_NE(nullptr, serialized); ASSERT_NE(0, size); - upb_test_TestMapField* test_msg = upb_test_TestMapField_parse(serialized, size, arena.ptr()); + upb_test_TestMapField* test_msg = + upb_test_TestMapField_parse(serialized, size, arena.ptr()); ASSERT_NE(nullptr, test_msg); ASSERT_FALSE(upb_test_TestMapField_map_field_get(test_msg, 0, nullptr)); - serialized = upb_test_TestMapField_serialize_ex( - test_msg, 0, arena.ptr(), &size); + serialized = + upb_test_TestMapField_serialize_ex(test_msg, 0, arena.ptr(), &size); ASSERT_NE(0, size); // parse into second instance upb_test_TestMapFieldExtra* test_msg_extra2 = upb_test_TestMapFieldExtra_parse(serialized, size, arena.ptr()); - ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr)); + ASSERT_TRUE( + upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr)); } From 1046d778a2a975836e2bfaf6bad3d5fde6dfaf2a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Mar 2022 16:35:13 -0800 Subject: [PATCH 08/11] Removed debug print statements. --- upb/decode.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index 67074f5cba..cf476a4ad3 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -416,19 +416,15 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr, return false; } -#include UPB_FORCEINLINE static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, const upb_MiniTable_Enum* e, const upb_MiniTable_Field* field, wireval* val) { uint32_t v = val->uint32_val; - // printf("@test:we are here\n"); if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - // printf("@test:we are here 2\n"); bool ans = decode_checkenum_slow(d, ptr, msg, e, field, v); - // printf("@test:%d\n", (int)ans); return ans; } From 6509f1356802a626da85b14c3ed984a4aae93976 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Mar 2022 16:36:11 -0800 Subject: [PATCH 09/11] Reverted extra debug assignment. --- upb/decode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/upb/decode.c b/upb/decode.c index cf476a4ad3..0e1dfd14cb 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -424,8 +424,7 @@ static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg, if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true; - bool ans = decode_checkenum_slow(d, ptr, msg, e, field, v); - return ans; + return decode_checkenum_slow(d, ptr, msg, e, field, v); } UPB_NOINLINE From bcb08bf9f0e3860e87ba408c6c59339fb7ecb77d Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 9 Mar 2022 17:05:37 -0800 Subject: [PATCH 10/11] Clang-format. --- upb/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upb/decode.c b/upb/decode.c index 0e1dfd14cb..2d5bb49525 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -636,7 +636,7 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr, // check if ent had any unknown fields size_t size; upb_Message_GetUnknown(&ent.k, &size); - if(size != 0) { + if (size != 0) { uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited; upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start)); if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) { From afffa9eaeb58efedbe695b6f985ea7c810871af2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 10 Mar 2022 15:21:55 -0800 Subject: [PATCH 11/11] Two Ruby changes to unblock the release --- bazel/amalgamate.py | 3 +++ upb/def.c | 7 ++++--- upb/port_def.inc | 8 ++++++++ upb/port_undef.inc | 1 + upb/table.c | 10 +++++----- upb/table_internal.h | 2 ++ 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/bazel/amalgamate.py b/bazel/amalgamate.py index 103b11c351..70399085ca 100755 --- a/bazel/amalgamate.py +++ b/bazel/amalgamate.py @@ -44,6 +44,9 @@ class Amalgamator: self.output_c.write("/* Amalgamated source file */\n") self.output_c.write('#include "%supb.h"\n' % (prefix)) + if prefix == "ruby-": + self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n") + self.output_h.write("#define UPB_DISABLE_PROTO2_ENUM_CHECKING\n") self.output_c.write(open("upb/port_def.inc").read()) self.output_h.write("/* Amalgamated source file */\n") diff --git a/upb/def.c b/upb/def.c index 451aecdaf3..140465aab5 100644 --- a/upb/def.c +++ b/upb/def.c @@ -1414,6 +1414,7 @@ static uint8_t map_descriptortype(const upb_FieldDef* f) { return kUpb_FieldType_Bytes; } else if (type == kUpb_FieldType_Enum && (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 || + UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 || // TODO(https://github.com/protocolbuffers/upb/issues/541): // fix map enum values to check for unknown enum values and put // them in the unknown field set. @@ -1584,11 +1585,11 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) { fill_fieldlayout(field, f); - if (upb_FieldDef_IsSubMessage(f)) { + if (field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group) { field->submsg_index = sublayout_count++; subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout; - } else if (upb_FieldDef_CType(f) == kUpb_CType_Enum && - f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) { + } else if (field->descriptortype == kUpb_FieldType_Enum) { field->submsg_index = sublayout_count++; subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout; UPB_ASSERT(subs[field->submsg_index].subenum); diff --git a/upb/port_def.inc b/upb/port_def.inc index 75d416d1ce..2b240ff6e8 100644 --- a/upb/port_def.inc +++ b/upb/port_def.inc @@ -251,3 +251,11 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); #define UPB_UNPOISON_MEMORY_REGION(addr, size) \ ((void)(addr), (void)(size)) #endif + +/* Disable proto2 arena behavior (TEMPORARY) **********************************/ + +#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING +#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1 +#else +#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0 +#endif diff --git a/upb/port_undef.inc b/upb/port_undef.inc index 70956df25d..3ef2d0dbcd 100644 --- a/upb/port_undef.inc +++ b/upb/port_undef.inc @@ -59,3 +59,4 @@ #undef UPB_POISON_MEMORY_REGION #undef UPB_UNPOISON_MEMORY_REGION #undef UPB_ASAN +#undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 diff --git a/upb/table.c b/upb/table.c index 33b3a9dc6e..63fe3cc960 100644 --- a/upb/table.c +++ b/upb/table.c @@ -433,14 +433,14 @@ const uint64_t kWyhashSalt[5] = { 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL, }; -static uint32_t table_hash(const char* p, size_t n) { +uint32_t _upb_Hash(const char* p, size_t n) { return Wyhash(p, n, 0, kWyhashSalt); } static uint32_t strhash(upb_tabkey key) { uint32_t len; char* str = upb_tabstr(key, &len); - return table_hash(str, len); + return _upb_Hash(str, len); } static bool streql(upb_tabkey k1, lookupkey_t k2) { @@ -496,20 +496,20 @@ bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len, tabkey = strcopy(key, a); if (tabkey == 0) return false; - hash = table_hash(key.str.str, key.str.len); + hash = _upb_Hash(key.str.str, key.str.len); insert(&t->t, key, tabkey, v, hash, &strhash, &streql); return true; } bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len, upb_value* v) { - uint32_t hash = table_hash(key, len); + uint32_t hash = _upb_Hash(key, len); return lookup(&t->t, strkey2(key, len), v, hash, &streql); } bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len, upb_value* val) { - uint32_t hash = table_hash(key, len); + uint32_t hash = _upb_Hash(key, len); upb_tabkey tabkey; return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql); } diff --git a/upb/table_internal.h b/upb/table_internal.h index 9fb2d238cd..74076551de 100644 --- a/upb/table_internal.h +++ b/upb/table_internal.h @@ -374,6 +374,8 @@ void upb_inttable_iter_setdone(upb_inttable_iter* i); bool upb_inttable_iter_isequal(const upb_inttable_iter* i1, const upb_inttable_iter* i2); +uint32_t _upb_Hash(const char* p, size_t n); + #ifdef __cplusplus } /* extern "C" */ #endif