From 8a3470c543d3432ca375a5b94653e1abb91230ab Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 18 Oct 2020 10:35:39 -0700 Subject: [PATCH 01/18] WIP. --- benchmarks/BUILD | 32 ++++++++++++++++++++++++++++++++ benchmarks/protobuf_binary.cc | 10 ++++++++++ benchmarks/small.proto | 35 +++++++++++++++++++++++++++++++++++ benchmarks/upb_binary.c | 12 ++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 benchmarks/protobuf_binary.cc create mode 100644 benchmarks/small.proto create mode 100644 benchmarks/upb_binary.c diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 87315a34e9..20d28dd121 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -51,3 +51,35 @@ cc_binary( "@com_google_protobuf//:protobuf", ], ) + +# Size benchmarks. + +upb_proto_library( + name = "empty_upb_proto", + deps = ["@com_google_protobuf//:empty_proto"], +) + +cc_proto_library( + name = "empty_cc_proto", + deps = ["@com_google_protobuf//:empty_proto"], +) + +cc_binary( + name = "upb_binary", + testonly = 1, + srcs = ["upb_binary.c"], + deps = [ + ":empty_upb_proto", + ], + #features = ["fully_static_link"], +) + +cc_binary( + name = "protobuf_binary", + testonly = 1, + srcs = ["protobuf_binary.cc"], + deps = [ + ":empty_cc_proto", + ], + #features = ["fully_static_link"], +) diff --git a/benchmarks/protobuf_binary.cc b/benchmarks/protobuf_binary.cc new file mode 100644 index 0000000000..b585aa6454 --- /dev/null +++ b/benchmarks/protobuf_binary.cc @@ -0,0 +1,10 @@ + +#include "google/protobuf/empty.pb.h" + +char buf[1]; + +int main() { + google::protobuf::Empty proto; + proto.ParseFromArray(buf, 1); + proto.SerializeToArray(buf, 1); +} diff --git a/benchmarks/small.proto b/benchmarks/small.proto new file mode 100644 index 0000000000..cfc59c2c2c --- /dev/null +++ b/benchmarks/small.proto @@ -0,0 +1,35 @@ +// A small proto, for measuring the overhead of a minimal use of +// protocol buffers. + +syntax = "proto3"; + +option optimize_for = LITE_RUNTIME; +option cc_enable_arenas = true; + +package upb_benchmark; + +message Person { + string name = 1; + int32 id = 2; // Unique ID number for this person. + string email = 3; + + enum PhoneType { + MOBILE = 0; + HOME = 1; + WORK = 2; + } + + message PhoneNumber { + string number = 1; + PhoneType type = 2; + } + + repeated PhoneNumber phones = 4; + + int64 last_updated = 5; +} + +// Our address book file is just one of these. +message AddressBook { + repeated Person people = 1; +} diff --git a/benchmarks/upb_binary.c b/benchmarks/upb_binary.c new file mode 100644 index 0000000000..8e2d86b904 --- /dev/null +++ b/benchmarks/upb_binary.c @@ -0,0 +1,12 @@ + +#include "google/protobuf/empty.upb.h" + +char buf[1]; + +int main() { + upb_arena *arena = upb_arena_new(); + size_t size; + google_protobuf_Empty *proto = google_protobuf_Empty_parse(buf, 1, arena); + google_protobuf_Empty_serialize(proto, arena, &size); + return 0; +} From 4bd34da105b5390489b2df750858dd31cb5d3328 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 4 Nov 2020 20:16:47 -0800 Subject: [PATCH 02/18] WIP. --- benchmarks/BUILD | 87 +++++++++++++++---- benchmarks/build_defs.bzl | 44 ++++++++++ benchmarks/empty.proto | 6 ++ ...obuf_binary.cc => protobuf_binary.cc.tmpl} | 4 +- benchmarks/upb_binary.c | 12 --- benchmarks/upb_binary.c.tmpl | 12 +++ 6 files changed, 132 insertions(+), 33 deletions(-) create mode 100644 benchmarks/build_defs.bzl create mode 100644 benchmarks/empty.proto rename benchmarks/{protobuf_binary.cc => protobuf_binary.cc.tmpl} (57%) delete mode 100644 benchmarks/upb_binary.c create mode 100644 benchmarks/upb_binary.c.tmpl diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 449c2b5e7f..b0e598c6c0 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -3,6 +3,11 @@ load( "upb_proto_library", "upb_proto_reflection_library", ) +load( + ":build_defs.bzl", + "tmpl_cc_binary", + "cc_lite_proto_library", +) licenses(["notice"]) @@ -60,32 +65,76 @@ cc_binary( # Size benchmarks. -upb_proto_library( - name = "empty_upb_proto", - deps = ["@com_google_protobuf//:empty_proto"], +SIZE_BENCHMARKS = { + "empty": "Empty", + "descriptor": "FileDescriptorSet", + "100_msgs": "Message99", +} + +py_binary( + name = "gen_benchmark_proto", + srcs = ["gen_benchmark_proto.py"], ) -cc_proto_library( - name = "empty_cc_proto", - deps = ["@com_google_protobuf//:empty_proto"], +genrule( + name = "gen_100_msgs", + tools = [":gen_benchmark_proto"], + outs = ["100_msgs.proto"], + cmd = "$(execpath :gen_benchmark_proto) $@", ) -cc_binary( - name = "upb_binary", +[( +proto_library( + name = k + "_proto", + srcs = [k + ".proto"], +), +upb_proto_library( + name = k + "_upb_proto", + deps = [":" + k + "_proto"], +), +cc_proto_library( + name = k + "_cc_proto", + deps = [":" + k + "_proto"], +), +tmpl_cc_binary( + name = k + "_upb_binary", testonly = 1, - srcs = ["upb_binary.c"], + srcs = ["upb_binary.c.tmpl"], + replacements = { + "PROTO": "upb_benchmark_" + v, + "INCLUDE": "benchmarks/" + k + ".upb.h", + }, deps = [ - ":empty_upb_proto", + ":" + k + "_upb_proto", ], - #features = ["fully_static_link"], -) - -cc_binary( - name = "protobuf_binary", +), +tmpl_cc_binary( + name = k + "_protobuf_binary", testonly = 1, - srcs = ["protobuf_binary.cc"], + srcs = ["protobuf_binary.cc.tmpl"], + replacements = { + "PROTO": "upb_benchmark::" + v, + "INCLUDE": "benchmarks/" + k + ".pb.h", + }, deps = [ - ":empty_cc_proto", + ":" + k + "_cc_proto", ], - #features = ["fully_static_link"], -) +), +cc_lite_proto_library( + srcs = [k + ".proto"], + outs = [k + "_lite.proto"], + name = k + "_cc_lite_proto", +), +tmpl_cc_binary( + name = k + "_lite_protobuf_binary", + testonly = 1, + srcs = ["protobuf_binary.cc.tmpl"], + replacements = { + "PROTO": "upb_benchmark::" + v, + "INCLUDE": "benchmarks/" + k + "_lite.pb.h", + }, + deps = [ + ":" + k + "_cc_lite_proto", + ], +)) for k, v in SIZE_BENCHMARKS.items()] + diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl new file mode 100644 index 0000000000..402a523db2 --- /dev/null +++ b/benchmarks/build_defs.bzl @@ -0,0 +1,44 @@ + +def tmpl_cc_binary(name, srcs, replacements = [], **kwargs): + if len(srcs) != 1: + fail("Currently srcs must have exactly 1 element") + src = srcs[0] + if not src.endswith(".tmpl"): + fail("srcs of tmpl_cc_binary must end with .tmpl") + outs = [name + "_" + src[:-5]] + sed_cmds = ["s,{},{},g".format(k, v) for k, v in replacements.items()] + cmd = "sed -e '{}' $< > $@".format("; ".join(sed_cmds)) + + native.genrule( + name = name + "_gen_srcs", + srcs = [src], + outs = outs, + cmd = cmd, + ) + + native.cc_binary( + name = name, + srcs = outs, + **kwargs, + ) + +def cc_lite_proto_library(name, srcs, outs): + if len(srcs) != 1: + fail("Currently srcs must have exactly 1 element") + + native.genrule( + name = name + "_gen_proto", + srcs = srcs, + outs = outs, + cmd = "cp $< $@ && chmod a+w $@ && echo 'option optimize_for = LITE_RUNTIME;' >> $@", + ) + + native.proto_library( + name = name + "_proto", + srcs = outs, + ) + + native.cc_proto_library( + name = name, + deps = [":" + name + "_proto"], + ) diff --git a/benchmarks/empty.proto b/benchmarks/empty.proto new file mode 100644 index 0000000000..bcccaf9abe --- /dev/null +++ b/benchmarks/empty.proto @@ -0,0 +1,6 @@ + +syntax = "proto3"; + +package upb_benchmark; + +message Empty {} diff --git a/benchmarks/protobuf_binary.cc b/benchmarks/protobuf_binary.cc.tmpl similarity index 57% rename from benchmarks/protobuf_binary.cc rename to benchmarks/protobuf_binary.cc.tmpl index b585aa6454..139ad7433c 100644 --- a/benchmarks/protobuf_binary.cc +++ b/benchmarks/protobuf_binary.cc.tmpl @@ -1,10 +1,10 @@ -#include "google/protobuf/empty.pb.h" +#include "INCLUDE" char buf[1]; int main() { - google::protobuf::Empty proto; + PROTO proto; proto.ParseFromArray(buf, 1); proto.SerializeToArray(buf, 1); } diff --git a/benchmarks/upb_binary.c b/benchmarks/upb_binary.c deleted file mode 100644 index 8e2d86b904..0000000000 --- a/benchmarks/upb_binary.c +++ /dev/null @@ -1,12 +0,0 @@ - -#include "google/protobuf/empty.upb.h" - -char buf[1]; - -int main() { - upb_arena *arena = upb_arena_new(); - size_t size; - google_protobuf_Empty *proto = google_protobuf_Empty_parse(buf, 1, arena); - google_protobuf_Empty_serialize(proto, arena, &size); - return 0; -} diff --git a/benchmarks/upb_binary.c.tmpl b/benchmarks/upb_binary.c.tmpl new file mode 100644 index 0000000000..6f2904bbd6 --- /dev/null +++ b/benchmarks/upb_binary.c.tmpl @@ -0,0 +1,12 @@ + +#include "INCLUDE" + +char buf[1]; + +int main() { + upb_arena *arena = upb_arena_new(); + size_t size; + PROTO *proto = PROTO_parse(buf, 1, arena); + PROTO_serialize(proto, arena, &size); + return 0; +} From e5bdfba92c0ac47659f49dd978c99a18920b18c5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 5 Nov 2020 17:30:04 -0800 Subject: [PATCH 03/18] Removed accidentally-added .orig file. --- benchmarks/compare.py | 14 +- upb/decode.c.orig | 718 ------------------------------------------ 2 files changed, 10 insertions(+), 722 deletions(-) delete mode 100644 upb/decode.c.orig diff --git a/benchmarks/compare.py b/benchmarks/compare.py index ad8a1901e8..9824ca0100 100755 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -27,13 +27,16 @@ def GitWorktree(commit): def Run(cmd): subprocess.check_call(cmd, shell=True) -def Benchmark(outbase, bench_cpu=True, runs=12): +def Benchmark(outbase, bench_cpu=True, runs=12, new=False): tmpfile = "/tmp/bench-output.json" Run("rm -rf {}".format(tmpfile)) Run("CC=clang bazel test ...") if bench_cpu: - Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark") + if new: + Run("CC=clang bazel build -c opt --copt=-march=native --//:fasttable_enabled=true benchmarks:benchmark") + else: + Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark") Run("./bazel-bin/benchmarks/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs)) with open(tmpfile) as f: @@ -48,7 +51,10 @@ def Benchmark(outbase, bench_cpu=True, runs=12): values = (name, run["iterations"], run["cpu_time"]) print("{} {} {} ns/op".format(*values), file=f) - Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb") + if new: + Run("CC=clang bazel build -c opt --copt=-g --//:fasttable_enabled=true tests:conformance_upb") + else: + Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb") Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase)) @@ -63,7 +69,7 @@ if len(sys.argv) > 1: pass # Benchmark our current directory first, since it's more likely to be broken. -Benchmark("/tmp/new", bench_cpu) +Benchmark("/tmp/new", bench_cpu, new=True) # Benchmark the baseline. with GitWorktree(baseline): diff --git a/upb/decode.c.orig b/upb/decode.c.orig deleted file mode 100644 index e938ec9f38..0000000000 --- a/upb/decode.c.orig +++ /dev/null @@ -1,718 +0,0 @@ - -#include "upb/decode.h" - -#include -#include - -#include "upb/decode.int.h" -#include "upb/upb.h" -#include "upb/upb.int.h" - -/* Must be last. */ -#include "upb/port_def.inc" - -/* Maps descriptor type -> elem_size_lg2. */ -static const uint8_t desctype_to_elem_size_lg2[] = { - -1, /* invalid descriptor type */ - 3, /* DOUBLE */ - 2, /* FLOAT */ - 3, /* INT64 */ - 3, /* UINT64 */ - 2, /* INT32 */ - 3, /* FIXED64 */ - 2, /* FIXED32 */ - 0, /* BOOL */ - UPB_SIZE(3, 4), /* STRING */ - UPB_SIZE(2, 3), /* GROUP */ - UPB_SIZE(2, 3), /* MESSAGE */ - UPB_SIZE(3, 4), /* BYTES */ - 2, /* UINT32 */ - 2, /* ENUM */ - 2, /* SFIXED32 */ - 3, /* SFIXED64 */ - 2, /* SINT32 */ - 3, /* SINT64 */ -}; - -/* Maps descriptor type -> upb map size. */ -static const uint8_t desctype_to_mapsize[] = { - -1, /* invalid descriptor type */ - 8, /* DOUBLE */ - 4, /* FLOAT */ - 8, /* INT64 */ - 8, /* UINT64 */ - 4, /* INT32 */ - 8, /* FIXED64 */ - 4, /* FIXED32 */ - 1, /* BOOL */ - UPB_MAPTYPE_STRING, /* STRING */ - sizeof(void *), /* GROUP */ - sizeof(void *), /* MESSAGE */ - UPB_MAPTYPE_STRING, /* BYTES */ - 4, /* UINT32 */ - 4, /* ENUM */ - 4, /* SFIXED32 */ - 8, /* SFIXED64 */ - 4, /* SINT32 */ - 8, /* SINT64 */ -}; - -static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) | - (1 << UPB_DTYPE_FIXED32) | - (1 << UPB_DTYPE_SFIXED32); - -static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) | - (1 << UPB_DTYPE_FIXED64) | - (1 << UPB_DTYPE_SFIXED64); - -/* Op: an action to be performed for a wire-type/field-type combination. */ -#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */ -#define OP_STRING 4 -#define OP_BYTES 5 -#define OP_SUBMSG 6 -/* Ops above are scalar-only. Repeated fields can use any op. */ -#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ -#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ - -static const int8_t varint_ops[19] = { - -1, /* field not found */ - -1, /* DOUBLE */ - -1, /* FLOAT */ - OP_SCALAR_LG2(3), /* INT64 */ - OP_SCALAR_LG2(3), /* UINT64 */ - OP_SCALAR_LG2(2), /* INT32 */ - -1, /* FIXED64 */ - -1, /* FIXED32 */ - OP_SCALAR_LG2(0), /* BOOL */ - -1, /* STRING */ - -1, /* GROUP */ - -1, /* MESSAGE */ - -1, /* BYTES */ - OP_SCALAR_LG2(2), /* UINT32 */ - OP_SCALAR_LG2(2), /* ENUM */ - -1, /* SFIXED32 */ - -1, /* SFIXED64 */ - OP_SCALAR_LG2(2), /* SINT32 */ - OP_SCALAR_LG2(3), /* SINT64 */ -}; - -static const int8_t delim_ops[37] = { - /* For non-repeated field type. */ - -1, /* field not found */ - -1, /* DOUBLE */ - -1, /* FLOAT */ - -1, /* INT64 */ - -1, /* UINT64 */ - -1, /* INT32 */ - -1, /* FIXED64 */ - -1, /* FIXED32 */ - -1, /* BOOL */ - OP_STRING, /* STRING */ - -1, /* GROUP */ - OP_SUBMSG, /* MESSAGE */ - OP_BYTES, /* BYTES */ - -1, /* UINT32 */ - -1, /* ENUM */ - -1, /* SFIXED32 */ - -1, /* SFIXED64 */ - -1, /* SINT32 */ - -1, /* SINT64 */ - /* For repeated field type. */ - OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ - OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ - OP_VARPCK_LG2(3), /* REPEATED INT64 */ - OP_VARPCK_LG2(3), /* REPEATED UINT64 */ - OP_VARPCK_LG2(2), /* REPEATED INT32 */ - OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ - OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ - OP_VARPCK_LG2(0), /* REPEATED BOOL */ - OP_STRING, /* REPEATED STRING */ - OP_SUBMSG, /* REPEATED GROUP */ - OP_SUBMSG, /* REPEATED MESSAGE */ - OP_BYTES, /* REPEATED BYTES */ - OP_VARPCK_LG2(2), /* REPEATED UINT32 */ - OP_VARPCK_LG2(2), /* REPEATED ENUM */ - OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ - OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ - OP_VARPCK_LG2(2), /* REPEATED SINT32 */ - OP_VARPCK_LG2(3), /* REPEATED SINT64 */ -}; - -typedef union { - bool bool_val; - uint32_t uint32_val; - uint64_t uint64_val; - uint32_t size; -} wireval; - -static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout); - -UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); } - -const char *fastdecode_err(upb_decstate *d) { - longjmp(d->err, 1); - return NULL; -} - -void decode_verifyutf8(upb_decstate *d, const char *buf, int len) { - static const uint8_t utf8_offset[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, - }; - - int i, j; - uint8_t offset; - - i = 0; - while (i < len) { - offset = utf8_offset[(uint8_t)buf[i]]; - if (offset == 0 || i + offset > len) { - decode_err(d); - } - for (j = i + 1; j < i + offset; j++) { - if ((buf[j] & 0xc0) != 0x80) { - decode_err(d); - } - } - i += offset; - } - if (i != len) decode_err(d); -} - -static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) { - bool need_realloc = arr->size - arr->len < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) { - decode_err(d); - } - return need_realloc; -} - -typedef struct { - const char *ptr; - uint64_t val; -} decode_vret; - -UPB_NOINLINE -static decode_vret decode_longvarint64(const char *ptr, uint64_t val) { - decode_vret ret = {NULL, 0}; - uint64_t byte; - int i; - for (i = 1; i < 10; i++) { - byte = (uint8_t)ptr[i]; - val += (byte - 1) << (i * 7); - if (!(byte & 0x80)) { - ret.ptr = ptr + i + 1; - ret.val = val; - return ret; - } - } - return ret; -} - -UPB_FORCEINLINE -static const char *decode_varint64(upb_decstate *d, const char *ptr, - uint64_t *val) { - uint64_t byte = (uint8_t)*ptr; - if (UPB_LIKELY((byte & 0x80) == 0)) { - *val = byte; - return ptr + 1; - } else { - decode_vret res = decode_longvarint64(ptr, byte); - if (!res.ptr) decode_err(d); - *val = res.val; - return res.ptr; - } -} - -UPB_FORCEINLINE -static const char *decode_varint32(upb_decstate *d, const char *ptr, - uint32_t *val) { - uint64_t u64; - ptr = decode_varint64(d, ptr, &u64); - if (u64 > UINT32_MAX) decode_err(d); - *val = (uint32_t)u64; - return ptr; -} - -static void decode_munge(int type, wireval *val) { - switch (type) { - case UPB_DESCRIPTOR_TYPE_BOOL: - val->bool_val = val->uint64_val != 0; - break; - case UPB_DESCRIPTOR_TYPE_SINT32: { - uint32_t n = val->uint32_val; - val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT64: { - uint64_t n = val->uint64_val; - val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_UINT32: - if (!_upb_isle()) { - /* The next stage will memcpy(dst, &val, 4) */ - val->uint32_val = val->uint64_val; - } - break; - } -} - -static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, - uint32_t field_number) { - static upb_msglayout_field none = {0, 0, 0, 0, 0, 0}; - - /* Lots of optimization opportunities here. */ - int i; - if (l == NULL) return &none; - for (i = 0; i < l->field_count; i++) { - if (l->fields[i].number == field_number) { - return &l->fields[i]; - } - } - - return &none; /* Unknown field. */ -} - -static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout, - const upb_msglayout_field *field) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - return _upb_msg_new_inl(subl, &d->arena); -} - -typedef struct { - bool ok; - const char *ptr; -} decode_doneret; - -UPB_NOINLINE -const char *decode_isdonefallback(upb_decstate *d, const char *ptr, - int overrun) { - ptr = decode_isdonefallback_inl(d, ptr, overrun); - if (ptr == NULL) { - decode_err(d); - } - return ptr; -} - -static const char *decode_readstr(upb_decstate *d, const char *ptr, int size, - upb_strview *str) { - if (d->alias) { - str->data = ptr; - } else { - char *data = upb_arena_malloc(&d->arena, size); - if (!data) decode_err(d); - memcpy(data, ptr, size); - str->data = data; - } - str->size = size; - return ptr + size; -} - -static const char *decode_tosubmsg(upb_decstate *d, const char *ptr, - upb_msg *submsg, const upb_msglayout *layout, - const upb_msglayout_field *field, int size) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - int saved_delta = decode_pushlimit(d, ptr, size); - if (--d->depth < 0) decode_err(d); - ptr = decode_msg(d, ptr, submsg, subl); - decode_poplimit(d, saved_delta); - if (d->end_group != 0) decode_err(d); - d->depth++; - return ptr; -} - -static const char *decode_group(upb_decstate *d, const char *ptr, - upb_msg *submsg, const upb_msglayout *subl, - uint32_t number) { - if (--d->depth < 0) decode_err(d); - ptr = decode_msg(d, ptr, submsg, subl); - if (d->end_group != number) decode_err(d); - d->end_group = 0; - d->depth++; - return ptr; -} - -static const char *decode_togroup(upb_decstate *d, const char *ptr, - upb_msg *submsg, const upb_msglayout *layout, - const upb_msglayout_field *field) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - return decode_group(d, ptr, submsg, subl, field->number); -} - -UPB_FORCEINLINE -static const char *decode_toarray(upb_decstate *d, const char *ptr, - upb_msg *msg, const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val, - int op) { - upb_array **arrp = UPB_PTR_AT(msg, field->offset, void); - upb_array *arr = *arrp; - void *mem; - - if (arr) { - decode_reserve(d, arr, 1); - } else { - size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype]; - arr = _upb_array_new(&d->arena, 4, lg2); - if (!arr) decode_err(d); - *arrp = arr; - } - - switch (op) { - case OP_SCALAR_LG2(0): - case OP_SCALAR_LG2(2): - case OP_SCALAR_LG2(3): - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void); - arr->len++; - memcpy(mem, &val, 1 << op); - return ptr; - case OP_STRING: - decode_verifyutf8(d, ptr, val.size); - /* Fallthrough. */ - case OP_BYTES: { - /* Append bytes. */ - upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len; - arr->len++; - return decode_readstr(d, ptr, val.size, str); - } - case OP_SUBMSG: { - /* Append submessage / group. */ - upb_msg *submsg = decode_newsubmsg(d, layout, field); - *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) = - submsg; - arr->len++; - if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) { - return decode_togroup(d, ptr, submsg, layout, field); - } else { - return decode_tosubmsg(d, ptr, submsg, layout, field, val.size); - } - } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): { - /* Fixed packed. */ - int lg2 = op - OP_FIXPCK_LG2(0); - int mask = (1 << lg2) - 1; - size_t count = val.size >> lg2; - if ((val.size & mask) != 0) { - decode_err(d); /* Length isn't a round multiple of elem size. */ - } - decode_reserve(d, arr, count); - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - arr->len += count; - memcpy(mem, ptr, val.size); /* XXX: ptr boundary. */ - return ptr + val.size; - } - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): { - /* Varint packed. */ - int lg2 = op - OP_VARPCK_LG2(0); - int scale = 1 << lg2; - int saved_limit = decode_pushlimit(d, ptr, val.size); - char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - while (!decode_isdone(d, &ptr)) { - wireval elem; - ptr = decode_varint64(d, ptr, &elem.uint64_val); - decode_munge(field->descriptortype, &elem); - if (decode_reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - } - arr->len++; - memcpy(out, &elem, scale); - out += scale; - } - decode_poplimit(d, saved_limit); - return ptr; - } - default: - UPB_UNREACHABLE(); - } -} - -static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val) { - upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *); - upb_map *map = *map_p; - upb_map_entry ent; - const upb_msglayout *entry = layout->submsgs[field->submsg_index]; - - if (!map) { - /* Lazily create map. */ - const upb_msglayout *entry = layout->submsgs[field->submsg_index]; - const upb_msglayout_field *key_field = &entry->fields[0]; - const upb_msglayout_field *val_field = &entry->fields[1]; - char key_size = desctype_to_mapsize[key_field->descriptortype]; - char val_size = desctype_to_mapsize[val_field->descriptortype]; - UPB_ASSERT(key_field->offset == 0); - UPB_ASSERT(val_field->offset == sizeof(upb_strview)); - map = _upb_map_new(&d->arena, key_size, val_size); - *map_p = map; - } - - /* Parse map entry. */ - memset(&ent, 0, sizeof(ent)); - - if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || - entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { - /* Create proactively to handle the case where it doesn't appear. */ - ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], &d->arena)); - } - - ptr = decode_tosubmsg(d, ptr, &ent.k, layout, field, val.size); - _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); - return ptr; -} - -UPB_FORCEINLINE -static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val, - int op) { - void *mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->descriptortype; - - /* Set presence if necessary. */ - if (field->presence < 0) { - /* Oneof case */ - uint32_t *oneof_case = _upb_oneofcase_field(msg, field); - if (op == OP_SUBMSG && *oneof_case != field->number) { - memset(mem, 0, sizeof(void*)); - } - *oneof_case = field->number; - } else if (field->presence > 0) { - _upb_sethas_field(msg, field); - } - - /* Store into message. */ - switch (op) { - case OP_SUBMSG: { - upb_msg **submsgp = mem; - upb_msg *submsg = *submsgp; - if (!submsg) { - submsg = decode_newsubmsg(d, layout, field); - *submsgp = submsg; - } - if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) { - ptr = decode_togroup(d, ptr, submsg, layout, field); - } else { - ptr = decode_tosubmsg(d, ptr, submsg, layout, field, val.size); - } - break; - } - case OP_STRING: - decode_verifyutf8(d, ptr, val.size); - /* Fallthrough. */ - case OP_BYTES: - return decode_readstr(d, ptr, val.size, mem); - case OP_SCALAR_LG2(3): - memcpy(mem, &val, 8); - break; - case OP_SCALAR_LG2(2): - memcpy(mem, &val, 4); - break; - case OP_SCALAR_LG2(0): - memcpy(mem, &val, 1); - break; - default: - UPB_UNREACHABLE(); - } - - return ptr; -} - -typedef struct { - const char *ptr; - bool group_end; -} decode_parseret; - -UPB_FORCEINLINE -static decode_parseret decode_field(upb_decstate *d, const char *ptr, - upb_msg *msg, const upb_msglayout *layout) { - uint32_t tag; - const upb_msglayout_field *field; - int field_number; - int wire_type; - const char *field_start = ptr; - wireval val; - int op; - decode_parseret ret; - - ptr = decode_varint32(d, ptr, &tag); - field_number = tag >> 3; - wire_type = tag & 7; - - field = upb_find_field(layout, field_number); - - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: - ptr = decode_varint64(d, ptr, &val.uint64_val); - op = varint_ops[field->descriptortype]; - decode_munge(field->descriptortype, &val); - break; - case UPB_WIRE_TYPE_32BIT: - memcpy(&val.uint32_val, ptr, 4); - val.uint32_val = _upb_be_swap32(val.uint32_val); - ptr += 4; - op = OP_SCALAR_LG2(2); - if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_64BIT: - memcpy(&val.uint64_val, ptr, 8); - val.uint64_val = _upb_be_swap64(val.uint64_val); - ptr += 8; - op = OP_SCALAR_LG2(3); - if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_DELIMITED: { - int ndx = field->descriptortype; - if (_upb_isrepeated(field)) ndx += 18; - ptr = decode_varint32(d, ptr, &val.size); - if (val.size >= INT32_MAX || ptr - d->end + val.size > d->limit) { - decode_err(d); /* Length overflow. */ - } - op = delim_ops[ndx]; - break; - } - case UPB_WIRE_TYPE_START_GROUP: - val.uint32_val = field_number; - op = OP_SUBMSG; - if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown; - break; - case UPB_WIRE_TYPE_END_GROUP: - d->end_group = field_number; - ret.ptr = ptr; - ret.group_end = true; - return ret; - default: - decode_err(d); - } - - if (op >= 0) { - /* Parse, using op for dispatch. */ - switch (field->label) { - case UPB_LABEL_REPEATED: - case _UPB_LABEL_PACKED: - ptr = decode_toarray(d, ptr, msg, layout, field, val, op); - break; - case _UPB_LABEL_MAP: - ptr = decode_tomap(d, ptr, msg, layout, field, val); - break; - default: - ptr = decode_tomsg(d, ptr, msg, layout, field, val, op); - break; - } - } else { - unknown: - /* Skip unknown field. */ - if (field_number == 0) decode_err(d); - if (wire_type == UPB_WIRE_TYPE_START_GROUP) { - ptr = decode_group(d, ptr, NULL, NULL, field_number); - } - if (msg) { - if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size; - if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, - &d->arena)) { - decode_err(d); - } - } - } - - ret.ptr = ptr; - ret.group_end = false; - return ret; -} - -UPB_NOINLINE -const char *fastdecode_generic(upb_decstate *d, const char *ptr, upb_msg *msg, - intptr_t table, uint64_t hasbits, - uint64_t data) { - decode_parseret ret; - *(uint32_t*)msg |= hasbits; /* Sync hasbits. */ - (void)data; -<<<<<<< HEAD - if (ptr == d->limit) return ptr; - ret = decode_field(d, ptr, msg, decode_totablep(table)); -======= - if (decode_isdone(d, &ptr)) return ptr; - ret = decode_field(d, ptr, msg, table); ->>>>>>> fastest-table - if (ret.group_end) return ptr; - return fastdecode_dispatch(d, ret.ptr, msg, table, hasbits); -} - -UPB_NOINLINE -static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout) { - if (msg && layout->table_mask != (unsigned char)-1) { - ptr = fastdecode_dispatch(d, ptr, msg, decode_totable(layout), 0); - } else { - while (!decode_isdone(d, &ptr)) { - decode_parseret ret = decode_field(d, ptr, msg, layout); - ptr = ret.ptr; - if (ret.group_end) return ptr; - } - } - return ptr; -} - -bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, - upb_arena *arena) { - bool ok; - upb_decstate state; - - if (size == 0) { - return true; - } else if (size < 16) { - memset(&state.patch, 0, 32); - memcpy(&state.patch, buf, size); - buf = state.patch; - state.end = buf + size; - state.limit = 0; - state.alias = false; - } else { - state.end = buf + size - 16; - state.limit = 16; - state.alias = true; - } - - state.limit_ptr = state.end; - state.depth = 64; - state.end_group = 0; - state.arena.head = arena->head; - state.arena.last_size = arena->last_size; - state.arena.parent = arena; - -#ifdef __APPLE__ - if (UPB_UNLIKELY(_setjmp(state.err))) { -#else - if (UPB_UNLIKELY(setjmp(state.err))) { -#endif - ok = false; - } else { - decode_msg(&state, buf, msg, l); - ok = state.end_group == 0; - } - - arena->head.ptr = state.arena.head.ptr; - arena->head.end = state.arena.head.end; - return ok; -} - -#undef OP_SCALAR_LG2 -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 -#undef OP_STRING -#undef OP_SUBMSG From 555fbbc0bcace40c19ab2b9e8ad0269180c93f89 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 20:20:06 -0800 Subject: [PATCH 04/18] Size benchmarks are working pretty well. --- bazel/upb_proto_library.bzl | 14 +--- benchmarks/BUILD | 110 +++++++++++++++++++-------- benchmarks/build_defs.bzl | 27 ++++--- benchmarks/descriptor.proto | 4 - benchmarks/descriptor_sv.proto | 4 - benchmarks/gen_protobuf_binary_cc.py | 38 +++++++++ benchmarks/gen_synthetic_protos.py | 85 +++++++++++++++++++++ benchmarks/gen_upb_binary_c.py | 39 ++++++++++ benchmarks/protobuf_binary.cc.tmpl | 10 --- benchmarks/upb_binary.c.tmpl | 12 --- 10 files changed, 259 insertions(+), 84 deletions(-) create mode 100644 benchmarks/gen_protobuf_binary_cc.py create mode 100644 benchmarks/gen_synthetic_protos.py create mode 100644 benchmarks/gen_upb_binary_c.py delete mode 100644 benchmarks/protobuf_binary.cc.tmpl delete mode 100644 benchmarks/upb_binary.c.tmpl diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index d4d51604f4..bec5d9cbd1 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -21,23 +21,17 @@ def _get_real_short_path(file): # Sometimes it has another few prefixes like: # _virtual_imports/any_proto/google/protobuf/any.proto + # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto # We want just google/protobuf/any.proto. - if short_path.startswith("_virtual_imports"): - short_path = short_path.split("/", 2)[-1] + virtual_imports = "_virtual_imports/" + if virtual_imports in short_path: + short_path = short_path.split(virtual_imports)[1].split("/", 1)[1] return short_path def _get_real_root(file): real_short_path = _get_real_short_path(file) return file.path[:-len(real_short_path) - 1] -def _get_real_roots(files): - roots = {} - for file in files: - real_root = _get_real_root(file) - if real_root: - roots[real_root] = True - return roots.keys() - def _generate_output_file(ctx, src, extension): real_short_path = _get_real_short_path(src) real_short_path = paths.relativize(real_short_path, ctx.label.package) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index a937accc9a..5c32e10444 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -7,23 +7,24 @@ load( ":build_defs.bzl", "tmpl_cc_binary", "cc_lite_proto_library", + "expand_suffixes", ) licenses(["notice"]) proto_library( - name = "benchmark_descriptor_proto", + name = "descriptor_proto", srcs = ["descriptor.proto"], ) upb_proto_library( name = "benchmark_descriptor_upb_proto", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) upb_proto_reflection_library( name = "benchmark_descriptor_upb_proto_reflection", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) upb_proto_reflection_library( @@ -33,7 +34,7 @@ upb_proto_reflection_library( cc_proto_library( name = "benchmark_descriptor_cc_proto", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) proto_library( @@ -69,26 +70,65 @@ cc_binary( SIZE_BENCHMARKS = { "empty": "Empty", "descriptor": "FileDescriptorSet", - "100_msgs": "Message99", + "100_msgs": "Message100", + "200_msgs": "Message200", + "100_fields": "Message", + "200_fields": "Message", } py_binary( - name = "gen_benchmark_proto", - srcs = ["gen_benchmark_proto.py"], + name = "gen_synthetic_protos", + srcs = ["gen_synthetic_protos.py"], +) + +py_binary( + name = "gen_upb_binary_c", + srcs = ["gen_upb_binary_c.py"], +) + +py_binary( + name = "gen_protobuf_binary_cc", + srcs = ["gen_protobuf_binary_cc.py"], ) genrule( - name = "gen_100_msgs", - tools = [":gen_benchmark_proto"], - outs = ["100_msgs.proto"], - cmd = "$(execpath :gen_benchmark_proto) $@", + name = "do_gen_synthetic_protos", + tools = [":gen_synthetic_protos"], + outs = [ + "100_msgs.proto", + "200_msgs.proto", + "100_fields.proto", + "200_fields.proto", + ], + cmd = "$(execpath :gen_synthetic_protos) $(GENDIR)", ) -[( proto_library( - name = k + "_proto", - srcs = [k + ".proto"], -), + name = "100_msgs_proto", + srcs = ["100_msgs.proto"], +) + +proto_library( + name = "200_msgs_proto", + srcs = ["200_msgs.proto"], +) + +proto_library( + name = "100_fields_proto", + srcs = ["100_fields.proto"], +) + +proto_library( + name = "200_fields_proto", + srcs = ["200_fields.proto"], +) + +proto_library( + name = "empty_proto", + srcs = ["empty.proto"], +) + +[( upb_proto_library( name = k + "_upb_proto", deps = [":" + k + "_proto"], @@ -100,11 +140,11 @@ cc_proto_library( tmpl_cc_binary( name = k + "_upb_binary", testonly = 1, - srcs = ["upb_binary.c.tmpl"], - replacements = { - "PROTO": "upb_benchmark_" + v, - "INCLUDE": "benchmarks/" + k + ".upb.h", - }, + gen = ":gen_upb_binary_c", + args = [ + "benchmarks/" + k + ".upb.h", + "upb_benchmark_" + v, + ], deps = [ ":" + k + "_upb_proto", ], @@ -112,11 +152,11 @@ tmpl_cc_binary( tmpl_cc_binary( name = k + "_protobuf_binary", testonly = 1, - srcs = ["protobuf_binary.cc.tmpl"], - replacements = { - "PROTO": "upb_benchmark::" + v, - "INCLUDE": "benchmarks/" + k + ".pb.h", - }, + gen = ":gen_protobuf_binary_cc", + args = [ + "benchmarks/" + k + ".pb.h", + "upb_benchmark::" + v, + ], deps = [ ":" + k + "_cc_proto", ], @@ -129,13 +169,23 @@ cc_lite_proto_library( tmpl_cc_binary( name = k + "_lite_protobuf_binary", testonly = 1, - srcs = ["protobuf_binary.cc.tmpl"], - replacements = { - "PROTO": "upb_benchmark::" + v, - "INCLUDE": "benchmarks/" + k + "_lite.pb.h", - }, + gen = ":gen_protobuf_binary_cc", + args = [ + "benchmarks/" + k + "_lite.pb.h", + "upb_benchmark::" + v, + ], deps = [ ":" + k + "_cc_lite_proto", ], )) for k, v in SIZE_BENCHMARKS.items()] +genrule( + testonly = 1, + name = "size_data", + srcs = expand_suffixes( + SIZE_BENCHMARKS.keys(), + suffixes = ["_upb_binary", "_protobuf_binary"], + ), + outs = ["size_data.txt"], + cmd = "size --format=GNU -d $(SRCS) > $@", +) diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index 402a523db2..157be4d03b 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -1,24 +1,16 @@ -def tmpl_cc_binary(name, srcs, replacements = [], **kwargs): - if len(srcs) != 1: - fail("Currently srcs must have exactly 1 element") - src = srcs[0] - if not src.endswith(".tmpl"): - fail("srcs of tmpl_cc_binary must end with .tmpl") - outs = [name + "_" + src[:-5]] - sed_cmds = ["s,{},{},g".format(k, v) for k, v in replacements.items()] - cmd = "sed -e '{}' $< > $@".format("; ".join(sed_cmds)) - +def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): + srcs = [name + ".cc"] native.genrule( name = name + "_gen_srcs", - srcs = [src], - outs = outs, - cmd = cmd, + tools = [gen], + outs = srcs, + cmd = "$(location " + gen + ") " + " ".join(args) + " > $@", ) native.cc_binary( name = name, - srcs = outs, + srcs = srcs, **kwargs, ) @@ -42,3 +34,10 @@ def cc_lite_proto_library(name, srcs, outs): name = name, deps = [":" + name + "_proto"], ) + +def expand_suffixes(vals, suffixes): + ret = [] + for val in vals: + for suffix in suffixes: + ret.append(val + suffix) + return ret diff --git a/benchmarks/descriptor.proto b/benchmarks/descriptor.proto index a95371d4da..b69b27f89e 100644 --- a/benchmarks/descriptor.proto +++ b/benchmarks/descriptor.proto @@ -48,10 +48,6 @@ option csharp_namespace = "Google.Protobuf.Reflection"; option objc_class_prefix = "GPB"; option cc_enable_arenas = true; -// descriptor.proto must be optimized for speed because reflection-based -// algorithms don't work during bootstrapping. -option optimize_for = SPEED; - // The protocol compiler can output a FileDescriptorSet containing the .proto // files it parses. message FileDescriptorSet { diff --git a/benchmarks/descriptor_sv.proto b/benchmarks/descriptor_sv.proto index c595a68649..8ca0888da7 100644 --- a/benchmarks/descriptor_sv.proto +++ b/benchmarks/descriptor_sv.proto @@ -47,10 +47,6 @@ option csharp_namespace = "Google.Protobuf.Reflection"; option objc_class_prefix = "GPB"; option cc_enable_arenas = true; -// descriptor.proto must be optimized for speed because reflection-based -// algorithms don't work during bootstrapping. -option optimize_for = SPEED; - // The protocol compiler can output a FileDescriptorSet containing the .proto // files it parses. message FileDescriptorSet { diff --git a/benchmarks/gen_protobuf_binary_cc.py b/benchmarks/gen_protobuf_binary_cc.py new file mode 100644 index 0000000000..181cafcde8 --- /dev/null +++ b/benchmarks/gen_protobuf_binary_cc.py @@ -0,0 +1,38 @@ + +import sys +import re + +include = sys.argv[1] +msg_basename = sys.argv[2] +count = 1 + +m = re.search(r'(.*\D)(\d+)$', sys.argv[2]) +if m: + msg_basename = m.group(1) + count = int(m.group(2)) + +print(f''' +#include "{include}" + +char buf[1]; + +int main() {{ +''') + +def RefMessage(name): + print(f''' + {{ + {name} proto; + proto.ParseFromArray(buf, 0); + proto.SerializePartialToArray(&buf[0], 0); + }} + ''') + +RefMessage(msg_basename) + +for i in range(2, count + 1): + RefMessage(msg_basename + str(i)) + +print(''' + return 0; +}''') diff --git a/benchmarks/gen_synthetic_protos.py b/benchmarks/gen_synthetic_protos.py new file mode 100644 index 0000000000..36d1f7d7a7 --- /dev/null +++ b/benchmarks/gen_synthetic_protos.py @@ -0,0 +1,85 @@ + +import sys +import random + +base = sys.argv[1] + +field_freqs = [ + (('bool', 'optional'), 8.321), + (('bool', 'repeated'), 0.033), + (('bytes', 'optional'), 0.809), + (('bytes', 'repeated'), 0.065), + (('double', 'optional'), 2.845), + (('double', 'repeated'), 0.143), + (('fixed32', 'optional'), 0.084), + (('fixed32', 'repeated'), 0.012), + (('fixed64', 'optional'), 0.204), + (('fixed64', 'repeated'), 0.027), + (('float', 'optional'), 2.355), + (('float', 'repeated'), 0.132), + (('int32', 'optional'), 6.717), + (('int32', 'repeated'), 0.366), + (('int64', 'optional'), 9.678), + (('int64', 'repeated'), 0.425), + (('sfixed32', 'optional'), 0.018), + (('sfixed32', 'repeated'), 0.005), + (('sfixed64', 'optional'), 0.022), + (('sfixed64', 'repeated'), 0.005), + (('sint32', 'optional'), 0.026), + (('sint32', 'repeated'), 0.009), + (('sint64', 'optional'), 0.018), + (('sint64', 'repeated'), 0.006), + (('string', 'optional'), 25.461), + (('string', 'repeated'), 2.606), + (('Enum', 'optional'), 6.16), + (('Enum', 'repeated'), 0.576), + (('Message', 'optional'), 22.472), + (('Message', 'repeated'), 7.766), + (('uint32', 'optional'), 1.289), + (('uint32', 'repeated'), 0.051), + (('uint64', 'optional'), 1.044), + (('uint64', 'repeated'), 0.079), +] + +population = [item[0] for item in field_freqs] +weights = [item[1] for item in field_freqs] + +with open(base + "/benchmarks/100_msgs.proto", "w") as f: + f.write('syntax = "proto3";\n') + f.write('package upb_benchmark;\n') + f.write('message Message {}\n') + for i in range(2, 101): + f.write(f'message Message{i} {{}}\n') + +with open(base + "/benchmarks/200_msgs.proto", "w") as f: + f.write('syntax = "proto3";\n') + f.write('package upb_benchmark;\n') + f.write('message Message {}\n') + for i in range(2, 501): + f.write(f'message Message{i} {{}}\n') + +with open(base + "/benchmarks/100_fields.proto", "w") as f: + f.write('syntax = "proto2";\n') + f.write('package upb_benchmark;\n') + f.write('enum Enum { ZERO = 0; }\n') + f.write('message Message {\n') + i = 1 + random.seed(a=0, version=2) + for field in random.choices(population=population, weights=weights, k=100): + field_type, label = field + f.write(f' {label} {field_type} field{i} = {i};\n') + i += 1 + f.write('}\n') + +with open(base + "/benchmarks/200_fields.proto", "w") as f: + f.write('syntax = "proto2";\n') + f.write('package upb_benchmark;\n') + f.write('enum Enum { ZERO = 0; }\n') + f.write('message Message {\n') + i = 1 + random.seed(a=0, version=2) + for field in random.choices(population=population, weights=weights, k=200): + field_type, label = field + f.write(f' {label} {field_type} field{i} = {i};\n') + i += 1 + f.write('}\n') diff --git a/benchmarks/gen_upb_binary_c.py b/benchmarks/gen_upb_binary_c.py new file mode 100644 index 0000000000..430d8c45c9 --- /dev/null +++ b/benchmarks/gen_upb_binary_c.py @@ -0,0 +1,39 @@ + +import sys +import re + +include = sys.argv[1] +msg_basename = sys.argv[2] +count = 1 + +m = re.search(r'(.*\D)(\d+)$', sys.argv[2]) +if m: + msg_basename = m.group(1) + count = int(m.group(2)) + +print(f''' +#include "{include}" + +char buf[1]; + +int main() {{ + upb_arena *arena = upb_arena_new(); + size_t size; +''') + +def RefMessage(name): + print(f''' + {{ + {name} *proto = {name}_parse(buf, 1, arena); + {name}_serialize(proto, arena, &size); + }} + ''') + +RefMessage(msg_basename) + +for i in range(2, count + 1): + RefMessage(msg_basename + str(i)) + +print(''' + return 0; +}''') diff --git a/benchmarks/protobuf_binary.cc.tmpl b/benchmarks/protobuf_binary.cc.tmpl deleted file mode 100644 index 139ad7433c..0000000000 --- a/benchmarks/protobuf_binary.cc.tmpl +++ /dev/null @@ -1,10 +0,0 @@ - -#include "INCLUDE" - -char buf[1]; - -int main() { - PROTO proto; - proto.ParseFromArray(buf, 1); - proto.SerializeToArray(buf, 1); -} diff --git a/benchmarks/upb_binary.c.tmpl b/benchmarks/upb_binary.c.tmpl deleted file mode 100644 index 6f2904bbd6..0000000000 --- a/benchmarks/upb_binary.c.tmpl +++ /dev/null @@ -1,12 +0,0 @@ - -#include "INCLUDE" - -char buf[1]; - -int main() { - upb_arena *arena = upb_arena_new(); - size_t size; - PROTO *proto = PROTO_parse(buf, 1, arena); - PROTO_serialize(proto, arena, &size); - return 0; -} From 0f79d47215e482b25b18b8a3462c701d4588b9f5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 20:23:44 -0800 Subject: [PATCH 05/18] Added missing lite binaries to size_data.txt. --- benchmarks/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 5c32e10444..246e96764a 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -184,7 +184,7 @@ genrule( name = "size_data", srcs = expand_suffixes( SIZE_BENCHMARKS.keys(), - suffixes = ["_upb_binary", "_protobuf_binary"], + suffixes = ["_upb_binary", "_protobuf_binary", "_lite_protobuf_binary"], ), outs = ["size_data.txt"], cmd = "size --format=GNU -d $(SRCS) > $@", From 8e08282c3b6bb0d933b7d10d65c9b0e37449bb11 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 20:29:05 -0800 Subject: [PATCH 06/18] Removed unused small.proto. --- benchmarks/small.proto | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 benchmarks/small.proto diff --git a/benchmarks/small.proto b/benchmarks/small.proto deleted file mode 100644 index cfc59c2c2c..0000000000 --- a/benchmarks/small.proto +++ /dev/null @@ -1,35 +0,0 @@ -// A small proto, for measuring the overhead of a minimal use of -// protocol buffers. - -syntax = "proto3"; - -option optimize_for = LITE_RUNTIME; -option cc_enable_arenas = true; - -package upb_benchmark; - -message Person { - string name = 1; - int32 id = 2; // Unique ID number for this person. - string email = 3; - - enum PhoneType { - MOBILE = 0; - HOME = 1; - WORK = 2; - } - - message PhoneNumber { - string number = 1; - PhoneType type = 2; - } - - repeated PhoneNumber phones = 4; - - int64 last_updated = 5; -} - -// Our address book file is just one of these. -message AddressBook { - repeated Person people = 1; -} From 8b7dabe1a25a4758e89b5dc23c17585ebda141c3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 20:44:27 -0800 Subject: [PATCH 07/18] Use format() instead of string interpolation, for old Python versions. --- benchmarks/BUILD | 2 ++ benchmarks/gen_protobuf_binary_cc.py | 8 ++++---- benchmarks/gen_upb_binary_c.py | 8 ++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 246e96764a..641e1389f1 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -84,11 +84,13 @@ py_binary( py_binary( name = "gen_upb_binary_c", srcs = ["gen_upb_binary_c.py"], + python_version = "PY3", ) py_binary( name = "gen_protobuf_binary_cc", srcs = ["gen_protobuf_binary_cc.py"], + python_version = "PY3", ) genrule( diff --git a/benchmarks/gen_protobuf_binary_cc.py b/benchmarks/gen_protobuf_binary_cc.py index 181cafcde8..787e391547 100644 --- a/benchmarks/gen_protobuf_binary_cc.py +++ b/benchmarks/gen_protobuf_binary_cc.py @@ -11,22 +11,22 @@ if m: msg_basename = m.group(1) count = int(m.group(2)) -print(f''' +print(''' #include "{include}" char buf[1]; int main() {{ -''') +'''.format(include=include)) def RefMessage(name): - print(f''' + print(''' {{ {name} proto; proto.ParseFromArray(buf, 0); proto.SerializePartialToArray(&buf[0], 0); }} - ''') + '''.format(name=name)) RefMessage(msg_basename) diff --git a/benchmarks/gen_upb_binary_c.py b/benchmarks/gen_upb_binary_c.py index 430d8c45c9..4df8fd7327 100644 --- a/benchmarks/gen_upb_binary_c.py +++ b/benchmarks/gen_upb_binary_c.py @@ -11,7 +11,7 @@ if m: msg_basename = m.group(1) count = int(m.group(2)) -print(f''' +print(''' #include "{include}" char buf[1]; @@ -19,15 +19,15 @@ char buf[1]; int main() {{ upb_arena *arena = upb_arena_new(); size_t size; -''') +'''.format(include=include)) def RefMessage(name): - print(f''' + print(''' {{ {name} *proto = {name}_parse(buf, 1, arena); {name}_serialize(proto, arena, &size); }} - ''') + '''.format(name=name)) RefMessage(msg_basename) From 881ddac7fe6f0c19a46e9f676b4e813d765cd391 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 20:53:17 -0800 Subject: [PATCH 08/18] Also use .format() for gen_synthetic_protos.py. --- benchmarks/gen_synthetic_protos.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/gen_synthetic_protos.py b/benchmarks/gen_synthetic_protos.py index 36d1f7d7a7..c90e4e92bc 100644 --- a/benchmarks/gen_synthetic_protos.py +++ b/benchmarks/gen_synthetic_protos.py @@ -49,14 +49,14 @@ with open(base + "/benchmarks/100_msgs.proto", "w") as f: f.write('package upb_benchmark;\n') f.write('message Message {}\n') for i in range(2, 101): - f.write(f'message Message{i} {{}}\n') + f.write('message Message{i} {{}}\n'.format(i=i)) with open(base + "/benchmarks/200_msgs.proto", "w") as f: f.write('syntax = "proto3";\n') f.write('package upb_benchmark;\n') f.write('message Message {}\n') for i in range(2, 501): - f.write(f'message Message{i} {{}}\n') + f.write('message Message{i} {{}}\n'.format(i=i)) with open(base + "/benchmarks/100_fields.proto", "w") as f: f.write('syntax = "proto2";\n') @@ -67,7 +67,7 @@ with open(base + "/benchmarks/100_fields.proto", "w") as f: random.seed(a=0, version=2) for field in random.choices(population=population, weights=weights, k=100): field_type, label = field - f.write(f' {label} {field_type} field{i} = {i};\n') + f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label, field_type=field_type)) i += 1 f.write('}\n') @@ -80,6 +80,6 @@ with open(base + "/benchmarks/200_fields.proto", "w") as f: random.seed(a=0, version=2) for field in random.choices(population=population, weights=weights, k=200): field_type, label = field - f.write(f' {label} {field_type} field{i} = {i};\n') + f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label,field_type=field_type)) i += 1 f.write('}\n') From 9df96874e995cf4fb74b57af0015ef2b2886214a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Tue, 10 Nov 2020 21:21:50 -0800 Subject: [PATCH 09/18] Start arena block doubling at initial block size. If an initial block is provided, we should start our block doubling at the size of the initial block, not 128. This saves us from unnecessary overhead when we overflow the initial block. --- upb/upb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/upb/upb.c b/upb/upb.c index 8e6ad77142..a12656973d 100644 --- a/upb/upb.c +++ b/upb/upb.c @@ -178,15 +178,14 @@ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { } a = UPB_PTR_AT(mem, n - sizeof(*a), upb_arena); - n -= sizeof(*a); a->head.alloc.func = &upb_arena_doalloc; a->block_alloc = alloc; a->parent = a; a->refcount = 1; - a->last_size = 128; + a->last_size = UPB_MAX(128, n); a->head.ptr = mem; - a->head.end = UPB_PTR_AT(mem, n, char); + a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); a->freelist = NULL; a->cleanups = NULL; From 65d166a6ba7d4be32b3699277694bedd30db945c Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 15:39:54 -0800 Subject: [PATCH 10/18] Added API for copy vs. alias and added benchmarks to test both. Benchmark output: $ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse' 2020-11-11 15:39:04 Running bazel-bin/benchmarks/benchmark Run on (72 X 3700 MHz CPU s) CPU Caches: L1 Data 32K (x36) L1 Instruction 32K (x36) L2 Unified 1024K (x36) L3 Unified 25344K (x2) ------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------------------------- BM_Parse_Upb_FileDesc 4134 ns 4134 ns 168714 1.69152GB/s BM_Parse_Upb_FileDesc 3487 ns 3487 ns 199509 2.00526GB/s BM_Parse_Upb_FileDesc 3727 ns 3726 ns 187581 1.87643GB/s BM_Parse_Upb_FileDesc 3110 ns 3110 ns 224970 2.24866GB/s BM_Parse_Proto2 31132 ns 31132 ns 22437 229.995MB/s BM_Parse_Proto2 21011 ns 21009 ns 33922 340.812MB/s BM_Parse_Proto2 17976 ns 17975 ns 38808 398.337MB/s BM_Parse_Proto2 17357 ns 17356 ns 40244 412.539MB/s --- benchmarks/benchmark.cc | 87 ++++++++++++++++++++--------------------- upb/decode.c | 9 +++-- upb/decode.h | 19 ++++++++- upb/decode_fast.c | 2 +- upb/def.c | 4 +- upbc/generator.cc | 6 +++ 6 files changed, 75 insertions(+), 52 deletions(-) diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc index cc510bdd6c..7f4765a88c 100644 --- a/benchmarks/benchmark.cc +++ b/benchmarks/benchmark.cc @@ -115,31 +115,31 @@ static void BM_LoadAdsDescriptor_Proto2(benchmark::State& state) { } BENCHMARK(BM_LoadAdsDescriptor_Proto2); -static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) { - size_t bytes = 0; - for (auto _ : state) { - upb_arena* arena = upb_arena_new(); - upb_benchmark_FileDescriptorProto* set = - upb_benchmark_FileDescriptorProto_parse(descriptor.data, - descriptor.size, arena); - if (!set) { - printf("Failed to parse.\n"); - exit(1); - } - bytes += descriptor.size; - upb_arena_free(arena); - } - state.SetBytesProcessed(state.iterations() * descriptor.size); -} -BENCHMARK(BM_Parse_Upb_FileDesc_WithArena); +enum CopyStrings { + Copy, + Alias, +}; + +enum ArenaMode { + NoArena, + UseArena, + InitBlock, +}; -static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) { +template +static void BM_Parse_Upb_FileDesc(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { - upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL); + upb_arena *arena; + if (AMode == InitBlock) { + arena = upb_arena_init(buf, sizeof(buf), NULL); + } else { + arena = upb_arena_new(); + } upb_benchmark_FileDescriptorProto* set = - upb_benchmark_FileDescriptorProto_parse(descriptor.data, - descriptor.size, arena); + upb_benchmark_FileDescriptorProto_parse_ex( + descriptor.data, descriptor.size, arena, + Copy == Alias ? UPB_DECODE_ALIAS : 0); if (!set) { printf("Failed to parse.\n"); exit(1); @@ -149,10 +149,16 @@ static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_Parse_Upb_FileDesc_WithInitialBlock); +BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Copy); +BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Alias); +BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Copy); +BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Alias); -template -struct NoArena { +template +struct Proto2Factory; + +template +struct Proto2Factory { public: P* GetProto() { return &proto_; } @@ -161,7 +167,7 @@ struct NoArena { }; template -struct WithArena { +struct Proto2Factory { public: P* GetProto() { return protobuf::Arena::CreateMessage

(&arena_); } @@ -170,9 +176,9 @@ struct WithArena { }; template -struct WithInitialBlock { +struct Proto2Factory { public: - WithInitialBlock() : arena_(GetOptions()) {} + Proto2Factory() : arena_(GetOptions()) {} P* GetProto() { return protobuf::Arena::CreateMessage

(&arena_); } private: @@ -189,17 +195,15 @@ struct WithInitialBlock { using FileDesc = ::upb_benchmark::FileDescriptorProto; using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto; -const protobuf::MessageLite::ParseFlags kMergePartial = - protobuf::MessageLite::ParseFlags::kMergePartial; -const protobuf::MessageLite::ParseFlags kAlias = - protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing; - -template class Factory, - protobuf::MessageLite::ParseFlags kParseFlags = kMergePartial> +template void BM_Parse_Proto2(benchmark::State& state) { size_t bytes = 0; + constexpr protobuf::MessageLite::ParseFlags kParseFlags = + kCopy == Copy + ? protobuf::MessageLite::ParseFlags::kMergePartial + : protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing; for (auto _ : state) { - Factory

proto_factory; + Proto2Factory proto_factory; auto proto = proto_factory.GetProto(); protobuf::StringPiece input(descriptor.data,descriptor.size); bool ok = proto->template ParseFrom(input); @@ -211,15 +215,10 @@ void BM_Parse_Proto2(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithInitialBlock); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena, kAlias); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena, kAlias); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock, kAlias); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena, Copy); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, UseArena, Copy); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, InitBlock, Copy); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, InitBlock, Alias); static void BM_SerializeDescriptor_Proto2(benchmark::State& state) { size_t bytes = 0; diff --git a/upb/decode.c b/upb/decode.c index d38b46d7b8..f49918ecaf 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -643,10 +643,11 @@ const char *fastdecode_generic(struct upb_decstate *d, const char *ptr, return decode_msg(d, ptr, msg, decode_totablep(table)); } -bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, - upb_arena *arena) { +bool _upb_decode(const char *buf, size_t size, void *msg, + const upb_msglayout *l, upb_arena *arena, int options) { bool ok; upb_decstate state; + unsigned depth = (unsigned)options >> 16; if (size == 0) { return true; @@ -660,12 +661,12 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, } else { state.end = buf + size - 16; state.limit = 16; - state.alias = true; + state.alias = options & UPB_DECODE_ALIAS; } state.limit_ptr = state.end; state.unknown_msg = NULL; - state.depth = 64; + state.depth = depth ? depth : 64; state.end_group = DECODE_NOGROUP; state.arena.head = arena->head; state.arena.last_size = arena->last_size; diff --git a/upb/decode.h b/upb/decode.h index 9de8638de5..00419ab373 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -7,15 +7,32 @@ #include "upb/msg.h" +/* Must be last. */ +#include "upb/port_def.inc" + #ifdef __cplusplus extern "C" { #endif +enum { + UPB_DECODE_ALIAS = 1, +}; + +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) + +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena); + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} #ifdef __cplusplus } /* extern "C" */ #endif +#include "upb/port_undef.inc" + #endif /* UPB_DECODE_H_ */ diff --git a/upb/decode_fast.c b/upb/decode_fast.c index f58f70ebec..f628e6dbd4 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -763,7 +763,7 @@ again: } else if (UPB_LIKELY(size <= 64)) { if (UPB_UNLIKELY(common_has < 64)) goto longstr; fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else if (UPB_LIKELY(size <= 128)) { + } else if (UPB_LIKELY(size < 128)) { if (UPB_UNLIKELY(common_has < 128)) goto longstr; fastdecode_docopy(d, ptr, size, 128, buf, dst); } else { diff --git a/upb/def.c b/upb/def.c index 19b30fe7ae..74c33ccb17 100644 --- a/upb/def.c +++ b/upb/def.c @@ -2140,8 +2140,8 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { if (!_upb_symtab_loaddefinit(s, *deps)) goto err; } - file = google_protobuf_FileDescriptorProto_parse( - init->descriptor.data, init->descriptor.size, arena); + file = google_protobuf_FileDescriptorProto_parse_ex( + init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS); s->bytes_loaded += init->descriptor.size; if (!file) { diff --git a/upbc/generator.cc b/upbc/generator.cc index f4d1bb9b8d..4f9db9ad91 100644 --- a/upbc/generator.cc +++ b/upbc/generator.cc @@ -348,6 +348,12 @@ void GenerateMessageInHeader(const protobuf::Descriptor* message, Output& output " $0 *ret = $0_new(arena);\n" " return (ret && upb_decode(buf, size, ret, &$1, arena)) ? ret : NULL;\n" "}\n" + "UPB_INLINE $0 *$0_parse_ex(const char *buf, size_t size,\n" + " upb_arena *arena, int options) {\n" + " $0 *ret = $0_new(arena);\n" + " return (ret && _upb_decode(buf, size, ret, &$1, arena, options))\n" + " ? ret : NULL;\n" + "}\n" "UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t " "*len) {\n" " return upb_encode(msg, &$1, arena, len);\n" From 5f8bb5de1d03926f270c1fbc4eb9b6df97795a23 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 15:46:30 -0800 Subject: [PATCH 11/18] Updated generated code. --- cmake/google/protobuf/descriptor.upb.h | 162 +++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/cmake/google/protobuf/descriptor.upb.h b/cmake/google/protobuf/descriptor.upb.h index ccb902ba12..a8ed71a688 100644 --- a/cmake/google/protobuf/descriptor.upb.h +++ b/cmake/google/protobuf/descriptor.upb.h @@ -164,6 +164,12 @@ UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_ google_protobuf_FileDescriptorSet *ret = google_protobuf_FileDescriptorSet_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileDescriptorSet *ret = google_protobuf_FileDescriptorSet_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileDescriptorSet_serialize(const google_protobuf_FileDescriptorSet *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorSet_msginit, arena, len); } @@ -195,6 +201,12 @@ UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorPr google_protobuf_FileDescriptorProto *ret = google_protobuf_FileDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileDescriptorProto *ret = google_protobuf_FileDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileDescriptorProto_serialize(const google_protobuf_FileDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorProto_msginit, arena, len); } @@ -352,6 +364,12 @@ UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_pars google_protobuf_DescriptorProto *ret = google_protobuf_DescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto *ret = google_protobuf_DescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_serialize(const google_protobuf_DescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_msginit, arena, len); } @@ -505,6 +523,12 @@ UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_Descr google_protobuf_DescriptorProto_ExtensionRange *ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_DescriptorProto_ExtensionRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto_ExtensionRange *ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_ExtensionRange_serialize(const google_protobuf_DescriptorProto_ExtensionRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena, len); } @@ -548,6 +572,12 @@ UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_Descri google_protobuf_DescriptorProto_ReservedRange *ret = google_protobuf_DescriptorProto_ReservedRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_DescriptorProto_ReservedRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto_ReservedRange *ret = google_protobuf_DescriptorProto_ReservedRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_ReservedRange_serialize(const google_protobuf_DescriptorProto_ReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena, len); } @@ -576,6 +606,12 @@ UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRange google_protobuf_ExtensionRangeOptions *ret = google_protobuf_ExtensionRangeOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRangeOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ExtensionRangeOptions *ret = google_protobuf_ExtensionRangeOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_ExtensionRangeOptions_serialize(const google_protobuf_ExtensionRangeOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ExtensionRangeOptions_msginit, arena, len); } @@ -607,6 +643,12 @@ UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptor google_protobuf_FieldDescriptorProto *ret = google_protobuf_FieldDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FieldDescriptorProto *ret = google_protobuf_FieldDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FieldDescriptorProto_serialize(const google_protobuf_FieldDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldDescriptorProto_msginit, arena, len); } @@ -698,6 +740,12 @@ UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptor google_protobuf_OneofDescriptorProto *ret = google_protobuf_OneofDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_OneofDescriptorProto *ret = google_protobuf_OneofDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_OneofDescriptorProto_serialize(const google_protobuf_OneofDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofDescriptorProto_msginit, arena, len); } @@ -735,6 +783,12 @@ UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorPr google_protobuf_EnumDescriptorProto *ret = google_protobuf_EnumDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumDescriptorProto *ret = google_protobuf_EnumDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumDescriptorProto_serialize(const google_protobuf_EnumDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_msginit, arena, len); } @@ -813,6 +867,12 @@ UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobu google_protobuf_EnumDescriptorProto_EnumReservedRange *ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobuf_EnumDescriptorProto_EnumReservedRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumDescriptorProto_EnumReservedRange *ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumDescriptorProto_EnumReservedRange_serialize(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena, len); } @@ -841,6 +901,12 @@ UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDe google_protobuf_EnumValueDescriptorProto *ret = google_protobuf_EnumValueDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumValueDescriptorProto *ret = google_protobuf_EnumValueDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumValueDescriptorProto_serialize(const google_protobuf_EnumValueDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueDescriptorProto_msginit, arena, len); } @@ -884,6 +950,12 @@ UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescri google_protobuf_ServiceDescriptorProto *ret = google_protobuf_ServiceDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ServiceDescriptorProto *ret = google_protobuf_ServiceDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_ServiceDescriptorProto_serialize(const google_protobuf_ServiceDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceDescriptorProto_msginit, arena, len); } @@ -936,6 +1008,12 @@ UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescript google_protobuf_MethodDescriptorProto *ret = google_protobuf_MethodDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MethodDescriptorProto *ret = google_protobuf_MethodDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MethodDescriptorProto_serialize(const google_protobuf_MethodDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodDescriptorProto_msginit, arena, len); } @@ -997,6 +1075,12 @@ UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_parse(const google_protobuf_FileOptions *ret = google_protobuf_FileOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileOptions *ret = google_protobuf_FileOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileOptions_serialize(const google_protobuf_FileOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileOptions_msginit, arena, len); } @@ -1148,6 +1232,12 @@ UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_parse( google_protobuf_MessageOptions *ret = google_protobuf_MessageOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MessageOptions *ret = google_protobuf_MessageOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MessageOptions_serialize(const google_protobuf_MessageOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MessageOptions_msginit, arena, len); } @@ -1203,6 +1293,12 @@ UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_parse(cons google_protobuf_FieldOptions *ret = google_protobuf_FieldOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FieldOptions *ret = google_protobuf_FieldOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FieldOptions_serialize(const google_protobuf_FieldOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldOptions_msginit, arena, len); } @@ -1270,6 +1366,12 @@ UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_parse(cons google_protobuf_OneofOptions *ret = google_protobuf_OneofOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_OneofOptions *ret = google_protobuf_OneofOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_OneofOptions_serialize(const google_protobuf_OneofOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofOptions_msginit, arena, len); } @@ -1301,6 +1403,12 @@ UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_parse(const google_protobuf_EnumOptions *ret = google_protobuf_EnumOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumOptions *ret = google_protobuf_EnumOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumOptions_serialize(const google_protobuf_EnumOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumOptions_msginit, arena, len); } @@ -1344,6 +1452,12 @@ UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_pa google_protobuf_EnumValueOptions *ret = google_protobuf_EnumValueOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumValueOptions *ret = google_protobuf_EnumValueOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumValueOptions_serialize(const google_protobuf_EnumValueOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueOptions_msginit, arena, len); } @@ -1381,6 +1495,12 @@ UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_parse( google_protobuf_ServiceOptions *ret = google_protobuf_ServiceOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ServiceOptions *ret = google_protobuf_ServiceOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_ServiceOptions_serialize(const google_protobuf_ServiceOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceOptions_msginit, arena, len); } @@ -1418,6 +1538,12 @@ UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_parse(co google_protobuf_MethodOptions *ret = google_protobuf_MethodOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MethodOptions *ret = google_protobuf_MethodOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MethodOptions_serialize(const google_protobuf_MethodOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodOptions_msginit, arena, len); } @@ -1461,6 +1587,12 @@ UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOpt google_protobuf_UninterpretedOption *ret = google_protobuf_UninterpretedOption_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOption_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_UninterpretedOption *ret = google_protobuf_UninterpretedOption_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_UninterpretedOption_serialize(const google_protobuf_UninterpretedOption *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_msginit, arena, len); } @@ -1528,6 +1660,12 @@ UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_Uninter google_protobuf_UninterpretedOption_NamePart *ret = google_protobuf_UninterpretedOption_NamePart_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_UninterpretedOption_NamePart_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_UninterpretedOption_NamePart *ret = google_protobuf_UninterpretedOption_NamePart_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_UninterpretedOption_NamePart_serialize(const google_protobuf_UninterpretedOption_NamePart *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_NamePart_msginit, arena, len); } @@ -1556,6 +1694,12 @@ UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_parse( google_protobuf_SourceCodeInfo *ret = google_protobuf_SourceCodeInfo_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_SourceCodeInfo *ret = google_protobuf_SourceCodeInfo_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_SourceCodeInfo_serialize(const google_protobuf_SourceCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_msginit, arena, len); } @@ -1587,6 +1731,12 @@ UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeIn google_protobuf_SourceCodeInfo_Location *ret = google_protobuf_SourceCodeInfo_Location_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeInfo_Location_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_SourceCodeInfo_Location *ret = google_protobuf_SourceCodeInfo_Location_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_SourceCodeInfo_Location_serialize(const google_protobuf_SourceCodeInfo_Location *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_Location_msginit, arena, len); } @@ -1648,6 +1798,12 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_ google_protobuf_GeneratedCodeInfo *ret = google_protobuf_GeneratedCodeInfo_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_GeneratedCodeInfo *ret = google_protobuf_GeneratedCodeInfo_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_GeneratedCodeInfo_serialize(const google_protobuf_GeneratedCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_msginit, arena, len); } @@ -1679,6 +1835,12 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_Generat google_protobuf_GeneratedCodeInfo_Annotation *ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_GeneratedCodeInfo_Annotation_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_GeneratedCodeInfo_Annotation *ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_GeneratedCodeInfo_Annotation_serialize(const google_protobuf_GeneratedCodeInfo_Annotation *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena, len); } From 165e01ec6fe54788ef5dbed4f4ddf7e547c285d4 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 17:08:25 -0800 Subject: [PATCH 12/18] Fix for old Python versions. --- benchmarks/gen_synthetic_protos.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/benchmarks/gen_synthetic_protos.py b/benchmarks/gen_synthetic_protos.py index c90e4e92bc..2d28aa885d 100644 --- a/benchmarks/gen_synthetic_protos.py +++ b/benchmarks/gen_synthetic_protos.py @@ -44,6 +44,13 @@ field_freqs = [ population = [item[0] for item in field_freqs] weights = [item[1] for item in field_freqs] +def choices(k): + if sys.version_info >= (3, 6): + return random.choices(population=population, weights=weights, k=k) + else: + print("WARNING: old Python version, field types are not properly weighted!") + return [random.choice(population) for _ in range(k)] + with open(base + "/benchmarks/100_msgs.proto", "w") as f: f.write('syntax = "proto3";\n') f.write('package upb_benchmark;\n') @@ -65,7 +72,7 @@ with open(base + "/benchmarks/100_fields.proto", "w") as f: f.write('message Message {\n') i = 1 random.seed(a=0, version=2) - for field in random.choices(population=population, weights=weights, k=100): + for field in choices(100): field_type, label = field f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label, field_type=field_type)) i += 1 @@ -78,7 +85,7 @@ with open(base + "/benchmarks/200_fields.proto", "w") as f: f.write('message Message {\n') i = 1 random.seed(a=0, version=2) - for field in random.choices(population=population, weights=weights, k=200): + for field in choices(200): field_type, label = field f.write(' {label} {field_type} field{i} = {i};\n'.format(i=i, label=label,field_type=field_type)) i += 1 From 86f671d5fdd6b59ed74c0f50a06417e22b787ac7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 17:13:40 -0800 Subject: [PATCH 13/18] Fix for Darwin (output is different, but it won't error out). --- benchmarks/BUILD | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 641e1389f1..df08ca249e 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -189,5 +189,6 @@ genrule( suffixes = ["_upb_binary", "_protobuf_binary", "_lite_protobuf_binary"], ), outs = ["size_data.txt"], - cmd = "size --format=GNU -d $(SRCS) > $@", + # We want --format=GNU which counts rodata with data, not text. + cmd = "size $$($$OSTYPE == 'linux-gnu' ? '--format=GNU -d' : '') $(SRCS) > $@", ) From 4a84390c89e9246053164a6f0ea715ff61dc3ff5 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 17:56:55 -0800 Subject: [PATCH 14/18] Added cc_proto_library() tweaks for google3. --- benchmarks/build_defs.bzl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index 157be4d03b..827f8f8fea 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -1,4 +1,17 @@ +# copybara:insert_for_google3_begin +# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", cc_proto_library="cc_proto_library") +# def _cc_proto_library(**kwargs): +# cc_proto_library( +# cc_api_version = 2, +# **kwargs, +# ) +# copybara:insert_end + +# copybara:strip_for_google3_begin +_cc_proto_library = native.cc_proto_library +# copybara:strip_end + def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): srcs = [name + ".cc"] native.genrule( @@ -30,7 +43,7 @@ def cc_lite_proto_library(name, srcs, outs): srcs = outs, ) - native.cc_proto_library( + _cc_proto_library( name = name, deps = [":" + name + "_proto"], ) From d2446fd2dbda3bf15033ad140ca20d540b96b5f6 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 18:02:42 -0800 Subject: [PATCH 15/18] Moved cc_api_version attribute to proto_library(). --- benchmarks/BUILD | 1 + benchmarks/build_defs.bzl | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index df08ca249e..1697e341a5 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -8,6 +8,7 @@ load( "tmpl_cc_binary", "cc_lite_proto_library", "expand_suffixes", + "proto_library", ) licenses(["notice"]) diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index 827f8f8fea..731ba04381 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -1,17 +1,20 @@ # copybara:insert_for_google3_begin -# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", cc_proto_library="cc_proto_library") -# def _cc_proto_library(**kwargs): -# cc_proto_library( -# cc_api_version = 2, -# **kwargs, -# ) +# load("//tools/build_defs/proto/cpp:cc_proto_library.bzl", _cc_proto_library="cc_proto_library") # copybara:insert_end # copybara:strip_for_google3_begin _cc_proto_library = native.cc_proto_library # copybara:strip_end +def proto_library(**kwargs): + native.proto_library( + # copybara:insert_for_google3_begin + # cc_api_version = 2, + # copybara:insert_end + **kwargs, + ) + def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): srcs = [name + ".cc"] native.genrule( @@ -49,8 +52,8 @@ def cc_lite_proto_library(name, srcs, outs): ) def expand_suffixes(vals, suffixes): - ret = [] - for val in vals: - for suffix in suffixes: - ret.append(val + suffix) - return ret + ret = [] + for val in vals: + for suffix in suffixes: + ret.append(val + suffix) + return ret From da48e01f058a97835a9bbf2fad9c5b2db49998a8 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 19:00:52 -0800 Subject: [PATCH 16/18] More google3 fixes. --- benchmarks/BUILD | 9 +++++---- benchmarks/build_defs.bzl | 6 +++++- benchmarks/gen_synthetic_protos.py | 8 ++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 1697e341a5..7d0523278c 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -80,6 +80,7 @@ SIZE_BENCHMARKS = { py_binary( name = "gen_synthetic_protos", srcs = ["gen_synthetic_protos.py"], + python_version = "PY3", ) py_binary( @@ -103,7 +104,7 @@ genrule( "100_fields.proto", "200_fields.proto", ], - cmd = "$(execpath :gen_synthetic_protos) $(GENDIR)", + cmd = "$(execpath :gen_synthetic_protos) $(RULEDIR)", ) proto_library( @@ -145,7 +146,7 @@ tmpl_cc_binary( testonly = 1, gen = ":gen_upb_binary_c", args = [ - "benchmarks/" + k + ".upb.h", + package_name() + "/" + k + ".upb.h", "upb_benchmark_" + v, ], deps = [ @@ -157,7 +158,7 @@ tmpl_cc_binary( testonly = 1, gen = ":gen_protobuf_binary_cc", args = [ - "benchmarks/" + k + ".pb.h", + package_name() + "/" + k + ".pb.h", "upb_benchmark::" + v, ], deps = [ @@ -174,7 +175,7 @@ tmpl_cc_binary( testonly = 1, gen = ":gen_protobuf_binary_cc", args = [ - "benchmarks/" + k + "_lite.pb.h", + package_name() + "/" + k + "_lite.pb.h", "upb_benchmark::" + v, ], deps = [ diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index 731ba04381..533c84454c 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -25,6 +25,10 @@ def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): ) native.cc_binary( + # copybara:insert_for_google3_begin + # malloc="//base:system_malloc", + # features = ["-static_linking_mode"], + # copybara:insert_end name = name, srcs = srcs, **kwargs, @@ -41,7 +45,7 @@ def cc_lite_proto_library(name, srcs, outs): cmd = "cp $< $@ && chmod a+w $@ && echo 'option optimize_for = LITE_RUNTIME;' >> $@", ) - native.proto_library( + proto_library( name = name + "_proto", srcs = outs, ) diff --git a/benchmarks/gen_synthetic_protos.py b/benchmarks/gen_synthetic_protos.py index 2d28aa885d..a95deff080 100644 --- a/benchmarks/gen_synthetic_protos.py +++ b/benchmarks/gen_synthetic_protos.py @@ -51,21 +51,21 @@ def choices(k): print("WARNING: old Python version, field types are not properly weighted!") return [random.choice(population) for _ in range(k)] -with open(base + "/benchmarks/100_msgs.proto", "w") as f: +with open(base + "/100_msgs.proto", "w") as f: f.write('syntax = "proto3";\n') f.write('package upb_benchmark;\n') f.write('message Message {}\n') for i in range(2, 101): f.write('message Message{i} {{}}\n'.format(i=i)) -with open(base + "/benchmarks/200_msgs.proto", "w") as f: +with open(base + "/200_msgs.proto", "w") as f: f.write('syntax = "proto3";\n') f.write('package upb_benchmark;\n') f.write('message Message {}\n') for i in range(2, 501): f.write('message Message{i} {{}}\n'.format(i=i)) -with open(base + "/benchmarks/100_fields.proto", "w") as f: +with open(base + "/100_fields.proto", "w") as f: f.write('syntax = "proto2";\n') f.write('package upb_benchmark;\n') f.write('enum Enum { ZERO = 0; }\n') @@ -78,7 +78,7 @@ with open(base + "/benchmarks/100_fields.proto", "w") as f: i += 1 f.write('}\n') -with open(base + "/benchmarks/200_fields.proto", "w") as f: +with open(base + "/200_fields.proto", "w") as f: f.write('syntax = "proto2";\n') f.write('package upb_benchmark;\n') f.write('enum Enum { ZERO = 0; }\n') From 378a27b640e5f7ec4a25b57cb70839e4445c78ed Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Wed, 11 Nov 2020 19:06:14 -0800 Subject: [PATCH 17/18] Force "size" to run locally. --- benchmarks/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index 7d0523278c..720f8c681b 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -193,4 +193,6 @@ genrule( outs = ["size_data.txt"], # We want --format=GNU which counts rodata with data, not text. cmd = "size $$($$OSTYPE == 'linux-gnu' ? '--format=GNU -d' : '') $(SRCS) > $@", + # "size" sometimes isn't available remotely. + local = 1, ) From 4be07a2dcef75c32f5cd583a741cffef5b3e975f Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 12 Nov 2020 08:43:25 -0800 Subject: [PATCH 18/18] Increase build timeout until we can parallelize the build. --- kokoro/ubuntu/presubmit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kokoro/ubuntu/presubmit.cfg b/kokoro/ubuntu/presubmit.cfg index fa97583b34..0a7f20c6d1 100644 --- a/kokoro/ubuntu/presubmit.cfg +++ b/kokoro/ubuntu/presubmit.cfg @@ -1,2 +1,2 @@ build_file: "upb/kokoro/ubuntu/build.sh" -timeout_mins: 15 +timeout_mins: 30