Merge branch 'main' into minitable-api

pull/13171/head
Joshua Haberman 3 years ago
commit f2aab6ed3c
  1. 61
      BUILD
  2. 16
      README.md
  3. 3
      bazel/amalgamate.py
  4. 3
      bazel/py_extension.bzl
  5. 1
      bazel/pyproto_test_wrapper.bzl
  6. 6
      benchmarks/build_defs.bzl
  7. 3
      benchmarks/compare.py
  8. 7
      cmake/CMakeLists.txt
  9. 7
      upb/conformance_upb.c
  10. 38
      upb/decode.c
  11. 13
      upb/def.c
  12. 10
      upb/mini_table.c
  13. 12
      upb/mini_table.h
  14. 29
      upb/msg_test.cc
  15. 19
      upb/msg_test.proto
  16. 8
      upb/port_def.inc
  17. 1
      upb/port_undef.inc
  18. 10
      upb/table.c
  19. 2
      upb/table_internal.h

61
BUILD

@ -109,6 +109,16 @@ cc_library(
],
)
cc_library(
name = "mini_table_internal",
hdrs = ["upb/msg_internal.h"],
deps = [
":port",
":table",
":upb",
],
)
cc_library(
name = "mini_table",
srcs = ["upb/mini_table.c"],
@ -118,7 +128,11 @@ cc_library(
],
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [":upb"],
deps = [
":mini_table_internal",
":port",
":upb",
],
)
cc_test(
@ -126,8 +140,10 @@ cc_test(
srcs = ["upb/mini_table_test.cc"],
deps = [
":mini_table",
"@com_google_googletest//:gtest_main",
":mini_table_internal",
":upb",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_googletest//:gtest_main",
],
)
@ -458,6 +474,47 @@ sh_test(
deps = ["@bazel_tools//tools/bash/runfiles"],
)
cc_binary(
name = "conformance_upb_dynamic_minitable",
testonly = 1,
srcs = ["upb/conformance_upb.c"],
copts = UPB_DEFAULT_COPTS + [
"-DREBUILD_MINITABLES",
],
data = ["upb/conformance_upb_failures.txt"],
deps = [
":conformance_proto_upb",
":conformance_proto_upbdefs",
":test_messages_proto2_upbdefs",
":test_messages_proto3_upbdefs",
"//:json",
"//:port",
"//:reflection",
"//:textformat",
"//:upb",
],
)
make_shell_script(
name = "gen_test_conformance_upb_dynamic_minitable",
out = "test_conformance_upb_dynamic_minitable.sh",
contents = "external/com_google_protobuf/conformance_test_runner " +
" --enforce_recommended " +
" --failure_list ./upb/conformance_upb_failures.txt" +
" ./conformance_upb_dynamic_minitable",
)
sh_test(
name = "test_conformance_upb_dynamic_minitable",
srcs = ["test_conformance_upb_dynamic_minitable.sh"],
data = [
"upb/conformance_upb_failures.txt",
":conformance_upb_dynamic_minitable",
"@com_google_protobuf//:conformance_test_runner",
],
deps = ["@bazel_tools//tools/bash/runfiles"],
)
# Internal C/C++ libraries #####################################################
cc_library(

@ -57,6 +57,22 @@ For PHP, use [PECL](https://pecl.php.net/package/protobuf):
$ sudo pecl install protobuf
```
Alternatively, you can build and install upb using
[vcpkg](https://github.com/microsoft/vcpkg/) dependency manager:
git clone https://github.com/Microsoft/vcpkg.git
cd vcpkg
./bootstrap-vcpkg.sh
./vcpkg integrate install
./vcpkg install upb
The upb port in vcpkg is kept up to date by microsoft team members and community
contributors.
If the version is out of date, please
[create an issue or pull request](https://github.com/Microsoft/vcpkg) on the
vcpkg repository.
## Contributing
Please see [CONTRIBUTING.md](CONTRIBUTING.md).

@ -44,6 +44,9 @@ class Amalgamator:
self.output_c.write("/* Amalgamated source file */\n")
self.output_c.write('#include "%supb.h"\n' % (prefix))
if prefix == "ruby-":
self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n")
self.output_h.write("#define UPB_DISABLE_PROTO2_ENUM_CHECKING\n")
self.output_c.write(open("upb/port_def.inc").read())
self.output_h.write("/* Amalgamated source file */\n")

@ -1,10 +1,9 @@
load(
"//bazel:build_defs.bzl",
"UPB_DEFAULT_COPTS",
)
def py_extension(name, srcs, deps=[]):
def py_extension(name, srcs, deps = []):
version_script = name + "_version_script.lds"
symbol = "PyInit_" + name
native.genrule(

@ -1,4 +1,3 @@
# copybara:strip_for_google3_begin
def pyproto_test_wrapper(name):

@ -24,7 +24,7 @@
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# copybara:insert_for_google3_begin
# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library="cc_proto_library")
# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library = "cc_proto_library")
# copybara:insert_end
# copybara:strip_for_google3_begin
@ -36,7 +36,7 @@ def proto_library(**kwargs):
# copybara:insert_for_google3_begin
# cc_api_version = 2,
# copybara:insert_end
**kwargs,
**kwargs
)
def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs):
@ -55,7 +55,7 @@ def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs):
# copybara:insert_end
name = name,
srcs = srcs,
**kwargs,
**kwargs
)
def cc_optimizefor_proto_library(name, srcs, outs, optimize_for):

@ -81,7 +81,8 @@ def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
print("{} {} {} ns/op".format(*values), file=f)
Run("sort {} -o {} ".format(txt_filename, txt_filename))
Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb" + extra_args)
Run("CC=clang bazel build -c opt --copt=-g --copt=-march=native :conformance_upb"
+ extra_args)
Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))

@ -81,11 +81,18 @@ target_link_libraries(upb
fastdecode
port
/third_party/utf8_range)
add_library(mini_table_internal INTERFACE)
target_link_libraries(mini_table_internal INTERFACE
port
table
upb)
add_library(mini_table
../upb/mini_table.c
../upb/mini_table.h
../upb/mini_table.hpp)
target_link_libraries(mini_table
mini_table_internal
port
upb)
add_library(fastdecode
../upb/decode.h

@ -323,8 +323,15 @@ bool DoTestIo(upb_DefPool* symtab) {
int main(void) {
upb_DefPool* symtab = upb_DefPool_New();
#ifdef REBUILD_MINITABLES
_upb_DefPool_LoadDefInitEx(
symtab, &src_google_protobuf_test_messages_proto2_proto_upbdefinit, true);
_upb_DefPool_LoadDefInitEx(
symtab, &src_google_protobuf_test_messages_proto3_proto_upbdefinit, true);
#else
protobuf_test_messages_proto2_TestAllTypesProto2_getmsgdef(symtab);
protobuf_test_messages_proto3_TestAllTypesProto3_getmsgdef(symtab);
#endif
while (1) {
if (!DoTestIo(symtab)) {

@ -385,6 +385,18 @@ static char* encode_varint32(uint32_t val, char* ptr) {
return ptr;
}
static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
uint32_t val1, uint32_t val2) {
char buf[20];
char* end = buf;
end = encode_varint32(val1, end);
end = encode_varint32(val2, end);
if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
}
UPB_NOINLINE
static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
upb_Message* msg, const upb_MiniTable_Enum* e,
@ -398,17 +410,9 @@ static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
// just re-encode the tag here.
char buf[20];
char* end = buf;
// just re-encode the tag and value here.
uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
end = encode_varint32(tag, end);
end = encode_varint32(v, end);
if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
upb_Decode_AddUnknownVarints(d, msg, tag, v);
return false;
}
@ -627,8 +631,20 @@ static const char* decode_tomap(upb_Decoder* d, const char* ptr,
upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena));
}
const char* start = ptr;
ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
_upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
// check if ent had any unknown fields
size_t size;
upb_Message_GetUnknown(&ent.k, &size);
if (size != 0) {
uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start));
if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
decode_err(d, kUpb_DecodeStatus_OutOfMemory);
}
} else {
_upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
}
return ptr;
}

@ -1413,7 +1413,12 @@ static uint8_t map_descriptortype(const upb_FieldDef* f) {
if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) {
return kUpb_FieldType_Bytes;
} else if (type == kUpb_FieldType_Enum &&
f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3) {
(f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 ||
UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 ||
// TODO(https://github.com/protocolbuffers/upb/issues/541):
// fix map enum values to check for unknown enum values and put
// them in the unknown field set.
upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) {
return kUpb_FieldType_Int32;
}
return type;
@ -1580,11 +1585,11 @@ static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
fill_fieldlayout(field, f);
if (upb_FieldDef_IsSubMessage(f)) {
if (field->descriptortype == kUpb_FieldType_Message ||
field->descriptortype == kUpb_FieldType_Group) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
} else if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
} else if (field->descriptortype == kUpb_FieldType_Enum) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
UPB_ASSERT(subs[field->submsg_index].subenum);

@ -173,7 +173,7 @@ char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
upb_FieldType type, uint32_t field_num,
uint64_t modifiers) {
uint64_t field_mod) {
static const char kUpb_TypeToEncoded[] = {
[kUpb_FieldType_Double] = kUpb_EncodedType_Double,
[kUpb_FieldType_Float] = kUpb_EncodedType_Float,
@ -209,7 +209,7 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
// Put field type.
int encoded_type = kUpb_TypeToEncoded[type];
if (modifiers & kUpb_FieldModifier_IsRepeated) {
if (field_mod & kUpb_FieldModifier_IsRepeated) {
// Repeated fields shift the type number up (unlike other modifiers which
// are bit flags).
encoded_type += kUpb_EncodedType_RepeatedBase;
@ -218,13 +218,13 @@ char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
if (!ptr) return NULL;
uint32_t encoded_modifiers = 0;
if (modifiers & kUpb_FieldModifier_IsProto3Singular) {
if (field_mod & kUpb_FieldModifier_IsProto3Singular) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular;
}
if (modifiers & kUpb_FieldModifier_IsRequired) {
if (field_mod & kUpb_FieldModifier_IsRequired) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired;
}
if ((modifiers & kUpb_FieldModifier_IsPacked) !=
if ((field_mod & kUpb_FieldModifier_IsPacked) !=
(in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) {
encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked;
}

@ -1,9 +1,5 @@
/*
<<<<<<< HEAD
* Copyright (c) 2009-2021, Google LLC
=======
* Copyright (c) 2009-2022, Google LLC
>>>>>>> mini-table-1
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -96,13 +92,13 @@ typedef struct {
// ptr = upb_MiniTable_PutOneofField(&e, ptr, ...);
//
// Oneofs must be encoded after all regular fields.
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* buf,
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
uint64_t msg_mod);
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* buf,
char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr,
upb_FieldType type, uint32_t field_num,
uint64_t field_mod);
char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* buf);
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* buf,
char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr);
char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr,
uint32_t field_num);
/** upb_MiniTable *************************************************************/

@ -398,3 +398,32 @@ TEST(MessageTest, MaxRequiredFields) {
test_msg, kUpb_Encode_CheckRequired, arena.ptr(), &size);
ASSERT_TRUE(serialized != nullptr);
}
TEST(MessageTest, MapField) {
upb::Arena arena;
upb_test_TestMapFieldExtra* test_msg_extra =
upb_test_TestMapFieldExtra_new(arena.ptr());
ASSERT_TRUE(upb_test_TestMapFieldExtra_map_field_set(
test_msg_extra, 0, upb_test_TestMapFieldExtra_THREE, arena.ptr()));
size_t size;
char* serialized = upb_test_TestMapFieldExtra_serialize_ex(
test_msg_extra, 0, arena.ptr(), &size);
ASSERT_NE(nullptr, serialized);
ASSERT_NE(0, size);
upb_test_TestMapField* test_msg =
upb_test_TestMapField_parse(serialized, size, arena.ptr());
ASSERT_NE(nullptr, test_msg);
ASSERT_FALSE(upb_test_TestMapField_map_field_get(test_msg, 0, nullptr));
serialized =
upb_test_TestMapField_serialize_ex(test_msg, 0, arena.ptr(), &size);
ASSERT_NE(0, size);
// parse into second instance
upb_test_TestMapFieldExtra* test_msg_extra2 =
upb_test_TestMapFieldExtra_parse(serialized, size, arena.ptr());
ASSERT_TRUE(
upb_test_TestMapFieldExtra_map_field_get(test_msg_extra2, 0, nullptr));
}

@ -158,3 +158,22 @@ message TestMaxRequiredFields {
required int32 required_int32_61 = 61;
required int32 required_int32_62 = 62;
}
message TestMapField {
enum EnumMap {
ZERO = 0;
ONE = 1;
TWO = 2;
}
map<int32, EnumMap> map_field = 1;
}
message TestMapFieldExtra {
enum EnumMap {
ZERO = 0;
ONE = 1;
TWO = 2;
THREE = 3;
}
map<int32, EnumMap> map_field = 1;
}

@ -251,3 +251,11 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
((void)(addr), (void)(size))
#endif
/* Disable proto2 arena behavior (TEMPORARY) **********************************/
#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
#else
#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
#endif

@ -59,3 +59,4 @@
#undef UPB_POISON_MEMORY_REGION
#undef UPB_UNPOISON_MEMORY_REGION
#undef UPB_ASAN
#undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3

@ -433,14 +433,14 @@ const uint64_t kWyhashSalt[5] = {
0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
};
static uint32_t table_hash(const char* p, size_t n) {
uint32_t _upb_Hash(const char* p, size_t n) {
return Wyhash(p, n, 0, kWyhashSalt);
}
static uint32_t strhash(upb_tabkey key) {
uint32_t len;
char* str = upb_tabstr(key, &len);
return table_hash(str, len);
return _upb_Hash(str, len);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
@ -496,20 +496,20 @@ bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
tabkey = strcopy(key, a);
if (tabkey == 0) return false;
hash = table_hash(key.str.str, key.str.len);
hash = _upb_Hash(key.str.str, key.str.len);
insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
return true;
}
bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
upb_value* v) {
uint32_t hash = table_hash(key, len);
uint32_t hash = _upb_Hash(key, len);
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
upb_value* val) {
uint32_t hash = table_hash(key, len);
uint32_t hash = _upb_Hash(key, len);
upb_tabkey tabkey;
return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
}

@ -374,6 +374,8 @@ void upb_inttable_iter_setdone(upb_inttable_iter* i);
bool upb_inttable_iter_isequal(const upb_inttable_iter* i1,
const upb_inttable_iter* i2);
uint32_t _upb_Hash(const char* p, size_t n);
#ifdef __cplusplus
} /* extern "C" */
#endif

Loading…
Cancel
Save