Size benchmarks are working pretty well.

pull/13171/head
Joshua Haberman 4 years ago
parent 85cbc41a89
commit 555fbbc0bc
  1. 14
      bazel/upb_proto_library.bzl
  2. 110
      benchmarks/BUILD
  3. 27
      benchmarks/build_defs.bzl
  4. 4
      benchmarks/descriptor.proto
  5. 4
      benchmarks/descriptor_sv.proto
  6. 38
      benchmarks/gen_protobuf_binary_cc.py
  7. 85
      benchmarks/gen_synthetic_protos.py
  8. 39
      benchmarks/gen_upb_binary_c.py
  9. 10
      benchmarks/protobuf_binary.cc.tmpl
  10. 12
      benchmarks/upb_binary.c.tmpl

@ -21,23 +21,17 @@ def _get_real_short_path(file):
# Sometimes it has another few prefixes like:
# _virtual_imports/any_proto/google/protobuf/any.proto
# benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto
# We want just google/protobuf/any.proto.
if short_path.startswith("_virtual_imports"):
short_path = short_path.split("/", 2)[-1]
virtual_imports = "_virtual_imports/"
if virtual_imports in short_path:
short_path = short_path.split(virtual_imports)[1].split("/", 1)[1]
return short_path
def _get_real_root(file):
real_short_path = _get_real_short_path(file)
return file.path[:-len(real_short_path) - 1]
def _get_real_roots(files):
roots = {}
for file in files:
real_root = _get_real_root(file)
if real_root:
roots[real_root] = True
return roots.keys()
def _generate_output_file(ctx, src, extension):
real_short_path = _get_real_short_path(src)
real_short_path = paths.relativize(real_short_path, ctx.label.package)

@ -7,23 +7,24 @@ load(
":build_defs.bzl",
"tmpl_cc_binary",
"cc_lite_proto_library",
"expand_suffixes",
)
licenses(["notice"])
proto_library(
name = "benchmark_descriptor_proto",
name = "descriptor_proto",
srcs = ["descriptor.proto"],
)
upb_proto_library(
name = "benchmark_descriptor_upb_proto",
deps = [":benchmark_descriptor_proto"],
deps = [":descriptor_proto"],
)
upb_proto_reflection_library(
name = "benchmark_descriptor_upb_proto_reflection",
deps = [":benchmark_descriptor_proto"],
deps = [":descriptor_proto"],
)
upb_proto_reflection_library(
@ -33,7 +34,7 @@ upb_proto_reflection_library(
cc_proto_library(
name = "benchmark_descriptor_cc_proto",
deps = [":benchmark_descriptor_proto"],
deps = [":descriptor_proto"],
)
proto_library(
@ -69,26 +70,65 @@ cc_binary(
SIZE_BENCHMARKS = {
"empty": "Empty",
"descriptor": "FileDescriptorSet",
"100_msgs": "Message99",
"100_msgs": "Message100",
"200_msgs": "Message200",
"100_fields": "Message",
"200_fields": "Message",
}
py_binary(
name = "gen_benchmark_proto",
srcs = ["gen_benchmark_proto.py"],
name = "gen_synthetic_protos",
srcs = ["gen_synthetic_protos.py"],
)
py_binary(
name = "gen_upb_binary_c",
srcs = ["gen_upb_binary_c.py"],
)
py_binary(
name = "gen_protobuf_binary_cc",
srcs = ["gen_protobuf_binary_cc.py"],
)
genrule(
name = "gen_100_msgs",
tools = [":gen_benchmark_proto"],
outs = ["100_msgs.proto"],
cmd = "$(execpath :gen_benchmark_proto) $@",
name = "do_gen_synthetic_protos",
tools = [":gen_synthetic_protos"],
outs = [
"100_msgs.proto",
"200_msgs.proto",
"100_fields.proto",
"200_fields.proto",
],
cmd = "$(execpath :gen_synthetic_protos) $(GENDIR)",
)
[(
proto_library(
name = k + "_proto",
srcs = [k + ".proto"],
),
name = "100_msgs_proto",
srcs = ["100_msgs.proto"],
)
proto_library(
name = "200_msgs_proto",
srcs = ["200_msgs.proto"],
)
proto_library(
name = "100_fields_proto",
srcs = ["100_fields.proto"],
)
proto_library(
name = "200_fields_proto",
srcs = ["200_fields.proto"],
)
proto_library(
name = "empty_proto",
srcs = ["empty.proto"],
)
[(
upb_proto_library(
name = k + "_upb_proto",
deps = [":" + k + "_proto"],
@ -100,11 +140,11 @@ cc_proto_library(
tmpl_cc_binary(
name = k + "_upb_binary",
testonly = 1,
srcs = ["upb_binary.c.tmpl"],
replacements = {
"PROTO": "upb_benchmark_" + v,
"INCLUDE": "benchmarks/" + k + ".upb.h",
},
gen = ":gen_upb_binary_c",
args = [
"benchmarks/" + k + ".upb.h",
"upb_benchmark_" + v,
],
deps = [
":" + k + "_upb_proto",
],
@ -112,11 +152,11 @@ tmpl_cc_binary(
tmpl_cc_binary(
name = k + "_protobuf_binary",
testonly = 1,
srcs = ["protobuf_binary.cc.tmpl"],
replacements = {
"PROTO": "upb_benchmark::" + v,
"INCLUDE": "benchmarks/" + k + ".pb.h",
},
gen = ":gen_protobuf_binary_cc",
args = [
"benchmarks/" + k + ".pb.h",
"upb_benchmark::" + v,
],
deps = [
":" + k + "_cc_proto",
],
@ -129,13 +169,23 @@ cc_lite_proto_library(
tmpl_cc_binary(
name = k + "_lite_protobuf_binary",
testonly = 1,
srcs = ["protobuf_binary.cc.tmpl"],
replacements = {
"PROTO": "upb_benchmark::" + v,
"INCLUDE": "benchmarks/" + k + "_lite.pb.h",
},
gen = ":gen_protobuf_binary_cc",
args = [
"benchmarks/" + k + "_lite.pb.h",
"upb_benchmark::" + v,
],
deps = [
":" + k + "_cc_lite_proto",
],
)) for k, v in SIZE_BENCHMARKS.items()]
genrule(
testonly = 1,
name = "size_data",
srcs = expand_suffixes(
SIZE_BENCHMARKS.keys(),
suffixes = ["_upb_binary", "_protobuf_binary"],
),
outs = ["size_data.txt"],
cmd = "size --format=GNU -d $(SRCS) > $@",
)

@ -1,24 +1,16 @@
def tmpl_cc_binary(name, srcs, replacements = [], **kwargs):
if len(srcs) != 1:
fail("Currently srcs must have exactly 1 element")
src = srcs[0]
if not src.endswith(".tmpl"):
fail("srcs of tmpl_cc_binary must end with .tmpl")
outs = [name + "_" + src[:-5]]
sed_cmds = ["s,{},{},g".format(k, v) for k, v in replacements.items()]
cmd = "sed -e '{}' $< > $@".format("; ".join(sed_cmds))
def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs):
srcs = [name + ".cc"]
native.genrule(
name = name + "_gen_srcs",
srcs = [src],
outs = outs,
cmd = cmd,
tools = [gen],
outs = srcs,
cmd = "$(location " + gen + ") " + " ".join(args) + " > $@",
)
native.cc_binary(
name = name,
srcs = outs,
srcs = srcs,
**kwargs,
)
@ -42,3 +34,10 @@ def cc_lite_proto_library(name, srcs, outs):
name = name,
deps = [":" + name + "_proto"],
)
def expand_suffixes(vals, suffixes):
ret = []
for val in vals:
for suffix in suffixes:
ret.append(val + suffix)
return ret

@ -48,10 +48,6 @@ option csharp_namespace = "Google.Protobuf.Reflection";
option objc_class_prefix = "GPB";
option cc_enable_arenas = true;
// descriptor.proto must be optimized for speed because reflection-based
// algorithms don't work during bootstrapping.
option optimize_for = SPEED;
// The protocol compiler can output a FileDescriptorSet containing the .proto
// files it parses.
message FileDescriptorSet {

@ -47,10 +47,6 @@ option csharp_namespace = "Google.Protobuf.Reflection";
option objc_class_prefix = "GPB";
option cc_enable_arenas = true;
// descriptor.proto must be optimized for speed because reflection-based
// algorithms don't work during bootstrapping.
option optimize_for = SPEED;
// The protocol compiler can output a FileDescriptorSet containing the .proto
// files it parses.
message FileDescriptorSet {

@ -0,0 +1,38 @@
import sys
import re
include = sys.argv[1]
msg_basename = sys.argv[2]
count = 1
m = re.search(r'(.*\D)(\d+)$', sys.argv[2])
if m:
msg_basename = m.group(1)
count = int(m.group(2))
print(f'''
#include "{include}"
char buf[1];
int main() {{
''')
def RefMessage(name):
print(f'''
{{
{name} proto;
proto.ParseFromArray(buf, 0);
proto.SerializePartialToArray(&buf[0], 0);
}}
''')
RefMessage(msg_basename)
for i in range(2, count + 1):
RefMessage(msg_basename + str(i))
print('''
return 0;
}''')

@ -0,0 +1,85 @@
import sys
import random
base = sys.argv[1]
field_freqs = [
(('bool', 'optional'), 8.321),
(('bool', 'repeated'), 0.033),
(('bytes', 'optional'), 0.809),
(('bytes', 'repeated'), 0.065),
(('double', 'optional'), 2.845),
(('double', 'repeated'), 0.143),
(('fixed32', 'optional'), 0.084),
(('fixed32', 'repeated'), 0.012),
(('fixed64', 'optional'), 0.204),
(('fixed64', 'repeated'), 0.027),
(('float', 'optional'), 2.355),
(('float', 'repeated'), 0.132),
(('int32', 'optional'), 6.717),
(('int32', 'repeated'), 0.366),
(('int64', 'optional'), 9.678),
(('int64', 'repeated'), 0.425),
(('sfixed32', 'optional'), 0.018),
(('sfixed32', 'repeated'), 0.005),
(('sfixed64', 'optional'), 0.022),
(('sfixed64', 'repeated'), 0.005),
(('sint32', 'optional'), 0.026),
(('sint32', 'repeated'), 0.009),
(('sint64', 'optional'), 0.018),
(('sint64', 'repeated'), 0.006),
(('string', 'optional'), 25.461),
(('string', 'repeated'), 2.606),
(('Enum', 'optional'), 6.16),
(('Enum', 'repeated'), 0.576),
(('Message', 'optional'), 22.472),
(('Message', 'repeated'), 7.766),
(('uint32', 'optional'), 1.289),
(('uint32', 'repeated'), 0.051),
(('uint64', 'optional'), 1.044),
(('uint64', 'repeated'), 0.079),
]
population = [item[0] for item in field_freqs]
weights = [item[1] for item in field_freqs]
with open(base + "/benchmarks/100_msgs.proto", "w") as f:
f.write('syntax = "proto3";\n')
f.write('package upb_benchmark;\n')
f.write('message Message {}\n')
for i in range(2, 101):
f.write(f'message Message{i} {{}}\n')
with open(base + "/benchmarks/200_msgs.proto", "w") as f:
f.write('syntax = "proto3";\n')
f.write('package upb_benchmark;\n')
f.write('message Message {}\n')
for i in range(2, 501):
f.write(f'message Message{i} {{}}\n')
with open(base + "/benchmarks/100_fields.proto", "w") as f:
f.write('syntax = "proto2";\n')
f.write('package upb_benchmark;\n')
f.write('enum Enum { ZERO = 0; }\n')
f.write('message Message {\n')
i = 1
random.seed(a=0, version=2)
for field in random.choices(population=population, weights=weights, k=100):
field_type, label = field
f.write(f' {label} {field_type} field{i} = {i};\n')
i += 1
f.write('}\n')
with open(base + "/benchmarks/200_fields.proto", "w") as f:
f.write('syntax = "proto2";\n')
f.write('package upb_benchmark;\n')
f.write('enum Enum { ZERO = 0; }\n')
f.write('message Message {\n')
i = 1
random.seed(a=0, version=2)
for field in random.choices(population=population, weights=weights, k=200):
field_type, label = field
f.write(f' {label} {field_type} field{i} = {i};\n')
i += 1
f.write('}\n')

@ -0,0 +1,39 @@
import sys
import re
include = sys.argv[1]
msg_basename = sys.argv[2]
count = 1
m = re.search(r'(.*\D)(\d+)$', sys.argv[2])
if m:
msg_basename = m.group(1)
count = int(m.group(2))
print(f'''
#include "{include}"
char buf[1];
int main() {{
upb_arena *arena = upb_arena_new();
size_t size;
''')
def RefMessage(name):
print(f'''
{{
{name} *proto = {name}_parse(buf, 1, arena);
{name}_serialize(proto, arena, &size);
}}
''')
RefMessage(msg_basename)
for i in range(2, count + 1):
RefMessage(msg_basename + str(i))
print('''
return 0;
}''')

@ -1,10 +0,0 @@
#include "INCLUDE"
char buf[1];
int main() {
PROTO proto;
proto.ParseFromArray(buf, 1);
proto.SerializeToArray(buf, 1);
}

@ -1,12 +0,0 @@
#include "INCLUDE"
char buf[1];
int main() {
upb_arena *arena = upb_arena_new();
size_t size;
PROTO *proto = PROTO_parse(buf, 1, arena);
PROTO_serialize(proto, arena, &size);
return 0;
}
Loading…
Cancel
Save