diff --git a/bazel/upb_proto_library.bzl b/bazel/upb_proto_library.bzl index d4d51604f4..bec5d9cbd1 100644 --- a/bazel/upb_proto_library.bzl +++ b/bazel/upb_proto_library.bzl @@ -21,23 +21,17 @@ def _get_real_short_path(file): # Sometimes it has another few prefixes like: # _virtual_imports/any_proto/google/protobuf/any.proto + # benchmarks/_virtual_imports/100_msgs_proto/benchmarks/100_msgs.proto # We want just google/protobuf/any.proto. - if short_path.startswith("_virtual_imports"): - short_path = short_path.split("/", 2)[-1] + virtual_imports = "_virtual_imports/" + if virtual_imports in short_path: + short_path = short_path.split(virtual_imports)[1].split("/", 1)[1] return short_path def _get_real_root(file): real_short_path = _get_real_short_path(file) return file.path[:-len(real_short_path) - 1] -def _get_real_roots(files): - roots = {} - for file in files: - real_root = _get_real_root(file) - if real_root: - roots[real_root] = True - return roots.keys() - def _generate_output_file(ctx, src, extension): real_short_path = _get_real_short_path(src) real_short_path = paths.relativize(real_short_path, ctx.label.package) diff --git a/benchmarks/BUILD b/benchmarks/BUILD index a937accc9a..5c32e10444 100644 --- a/benchmarks/BUILD +++ b/benchmarks/BUILD @@ -7,23 +7,24 @@ load( ":build_defs.bzl", "tmpl_cc_binary", "cc_lite_proto_library", + "expand_suffixes", ) licenses(["notice"]) proto_library( - name = "benchmark_descriptor_proto", + name = "descriptor_proto", srcs = ["descriptor.proto"], ) upb_proto_library( name = "benchmark_descriptor_upb_proto", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) upb_proto_reflection_library( name = "benchmark_descriptor_upb_proto_reflection", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) upb_proto_reflection_library( @@ -33,7 +34,7 @@ upb_proto_reflection_library( cc_proto_library( name = "benchmark_descriptor_cc_proto", - deps = [":benchmark_descriptor_proto"], + deps = [":descriptor_proto"], ) proto_library( @@ -69,26 +70,65 @@ cc_binary( SIZE_BENCHMARKS = { "empty": "Empty", "descriptor": "FileDescriptorSet", - "100_msgs": "Message99", + "100_msgs": "Message100", + "200_msgs": "Message200", + "100_fields": "Message", + "200_fields": "Message", } py_binary( - name = "gen_benchmark_proto", - srcs = ["gen_benchmark_proto.py"], + name = "gen_synthetic_protos", + srcs = ["gen_synthetic_protos.py"], +) + +py_binary( + name = "gen_upb_binary_c", + srcs = ["gen_upb_binary_c.py"], +) + +py_binary( + name = "gen_protobuf_binary_cc", + srcs = ["gen_protobuf_binary_cc.py"], ) genrule( - name = "gen_100_msgs", - tools = [":gen_benchmark_proto"], - outs = ["100_msgs.proto"], - cmd = "$(execpath :gen_benchmark_proto) $@", + name = "do_gen_synthetic_protos", + tools = [":gen_synthetic_protos"], + outs = [ + "100_msgs.proto", + "200_msgs.proto", + "100_fields.proto", + "200_fields.proto", + ], + cmd = "$(execpath :gen_synthetic_protos) $(GENDIR)", ) -[( proto_library( - name = k + "_proto", - srcs = [k + ".proto"], -), + name = "100_msgs_proto", + srcs = ["100_msgs.proto"], +) + +proto_library( + name = "200_msgs_proto", + srcs = ["200_msgs.proto"], +) + +proto_library( + name = "100_fields_proto", + srcs = ["100_fields.proto"], +) + +proto_library( + name = "200_fields_proto", + srcs = ["200_fields.proto"], +) + +proto_library( + name = "empty_proto", + srcs = ["empty.proto"], +) + +[( upb_proto_library( name = k + "_upb_proto", deps = [":" + k + "_proto"], @@ -100,11 +140,11 @@ cc_proto_library( tmpl_cc_binary( name = k + "_upb_binary", testonly = 1, - srcs = ["upb_binary.c.tmpl"], - replacements = { - "PROTO": "upb_benchmark_" + v, - "INCLUDE": "benchmarks/" + k + ".upb.h", - }, + gen = ":gen_upb_binary_c", + args = [ + "benchmarks/" + k + ".upb.h", + "upb_benchmark_" + v, + ], deps = [ ":" + k + "_upb_proto", ], @@ -112,11 +152,11 @@ tmpl_cc_binary( tmpl_cc_binary( name = k + "_protobuf_binary", testonly = 1, - srcs = ["protobuf_binary.cc.tmpl"], - replacements = { - "PROTO": "upb_benchmark::" + v, - "INCLUDE": "benchmarks/" + k + ".pb.h", - }, + gen = ":gen_protobuf_binary_cc", + args = [ + "benchmarks/" + k + ".pb.h", + "upb_benchmark::" + v, + ], deps = [ ":" + k + "_cc_proto", ], @@ -129,13 +169,23 @@ cc_lite_proto_library( tmpl_cc_binary( name = k + "_lite_protobuf_binary", testonly = 1, - srcs = ["protobuf_binary.cc.tmpl"], - replacements = { - "PROTO": "upb_benchmark::" + v, - "INCLUDE": "benchmarks/" + k + "_lite.pb.h", - }, + gen = ":gen_protobuf_binary_cc", + args = [ + "benchmarks/" + k + "_lite.pb.h", + "upb_benchmark::" + v, + ], deps = [ ":" + k + "_cc_lite_proto", ], )) for k, v in SIZE_BENCHMARKS.items()] +genrule( + testonly = 1, + name = "size_data", + srcs = expand_suffixes( + SIZE_BENCHMARKS.keys(), + suffixes = ["_upb_binary", "_protobuf_binary"], + ), + outs = ["size_data.txt"], + cmd = "size --format=GNU -d $(SRCS) > $@", +) diff --git a/benchmarks/build_defs.bzl b/benchmarks/build_defs.bzl index 402a523db2..157be4d03b 100644 --- a/benchmarks/build_defs.bzl +++ b/benchmarks/build_defs.bzl @@ -1,24 +1,16 @@ -def tmpl_cc_binary(name, srcs, replacements = [], **kwargs): - if len(srcs) != 1: - fail("Currently srcs must have exactly 1 element") - src = srcs[0] - if not src.endswith(".tmpl"): - fail("srcs of tmpl_cc_binary must end with .tmpl") - outs = [name + "_" + src[:-5]] - sed_cmds = ["s,{},{},g".format(k, v) for k, v in replacements.items()] - cmd = "sed -e '{}' $< > $@".format("; ".join(sed_cmds)) - +def tmpl_cc_binary(name, gen, args, replacements = [], **kwargs): + srcs = [name + ".cc"] native.genrule( name = name + "_gen_srcs", - srcs = [src], - outs = outs, - cmd = cmd, + tools = [gen], + outs = srcs, + cmd = "$(location " + gen + ") " + " ".join(args) + " > $@", ) native.cc_binary( name = name, - srcs = outs, + srcs = srcs, **kwargs, ) @@ -42,3 +34,10 @@ def cc_lite_proto_library(name, srcs, outs): name = name, deps = [":" + name + "_proto"], ) + +def expand_suffixes(vals, suffixes): + ret = [] + for val in vals: + for suffix in suffixes: + ret.append(val + suffix) + return ret diff --git a/benchmarks/descriptor.proto b/benchmarks/descriptor.proto index a95371d4da..b69b27f89e 100644 --- a/benchmarks/descriptor.proto +++ b/benchmarks/descriptor.proto @@ -48,10 +48,6 @@ option csharp_namespace = "Google.Protobuf.Reflection"; option objc_class_prefix = "GPB"; option cc_enable_arenas = true; -// descriptor.proto must be optimized for speed because reflection-based -// algorithms don't work during bootstrapping. -option optimize_for = SPEED; - // The protocol compiler can output a FileDescriptorSet containing the .proto // files it parses. message FileDescriptorSet { diff --git a/benchmarks/descriptor_sv.proto b/benchmarks/descriptor_sv.proto index c595a68649..8ca0888da7 100644 --- a/benchmarks/descriptor_sv.proto +++ b/benchmarks/descriptor_sv.proto @@ -47,10 +47,6 @@ option csharp_namespace = "Google.Protobuf.Reflection"; option objc_class_prefix = "GPB"; option cc_enable_arenas = true; -// descriptor.proto must be optimized for speed because reflection-based -// algorithms don't work during bootstrapping. -option optimize_for = SPEED; - // The protocol compiler can output a FileDescriptorSet containing the .proto // files it parses. message FileDescriptorSet { diff --git a/benchmarks/gen_protobuf_binary_cc.py b/benchmarks/gen_protobuf_binary_cc.py new file mode 100644 index 0000000000..181cafcde8 --- /dev/null +++ b/benchmarks/gen_protobuf_binary_cc.py @@ -0,0 +1,38 @@ + +import sys +import re + +include = sys.argv[1] +msg_basename = sys.argv[2] +count = 1 + +m = re.search(r'(.*\D)(\d+)$', sys.argv[2]) +if m: + msg_basename = m.group(1) + count = int(m.group(2)) + +print(f''' +#include "{include}" + +char buf[1]; + +int main() {{ +''') + +def RefMessage(name): + print(f''' + {{ + {name} proto; + proto.ParseFromArray(buf, 0); + proto.SerializePartialToArray(&buf[0], 0); + }} + ''') + +RefMessage(msg_basename) + +for i in range(2, count + 1): + RefMessage(msg_basename + str(i)) + +print(''' + return 0; +}''') diff --git a/benchmarks/gen_synthetic_protos.py b/benchmarks/gen_synthetic_protos.py new file mode 100644 index 0000000000..36d1f7d7a7 --- /dev/null +++ b/benchmarks/gen_synthetic_protos.py @@ -0,0 +1,85 @@ + +import sys +import random + +base = sys.argv[1] + +field_freqs = [ + (('bool', 'optional'), 8.321), + (('bool', 'repeated'), 0.033), + (('bytes', 'optional'), 0.809), + (('bytes', 'repeated'), 0.065), + (('double', 'optional'), 2.845), + (('double', 'repeated'), 0.143), + (('fixed32', 'optional'), 0.084), + (('fixed32', 'repeated'), 0.012), + (('fixed64', 'optional'), 0.204), + (('fixed64', 'repeated'), 0.027), + (('float', 'optional'), 2.355), + (('float', 'repeated'), 0.132), + (('int32', 'optional'), 6.717), + (('int32', 'repeated'), 0.366), + (('int64', 'optional'), 9.678), + (('int64', 'repeated'), 0.425), + (('sfixed32', 'optional'), 0.018), + (('sfixed32', 'repeated'), 0.005), + (('sfixed64', 'optional'), 0.022), + (('sfixed64', 'repeated'), 0.005), + (('sint32', 'optional'), 0.026), + (('sint32', 'repeated'), 0.009), + (('sint64', 'optional'), 0.018), + (('sint64', 'repeated'), 0.006), + (('string', 'optional'), 25.461), + (('string', 'repeated'), 2.606), + (('Enum', 'optional'), 6.16), + (('Enum', 'repeated'), 0.576), + (('Message', 'optional'), 22.472), + (('Message', 'repeated'), 7.766), + (('uint32', 'optional'), 1.289), + (('uint32', 'repeated'), 0.051), + (('uint64', 'optional'), 1.044), + (('uint64', 'repeated'), 0.079), +] + +population = [item[0] for item in field_freqs] +weights = [item[1] for item in field_freqs] + +with open(base + "/benchmarks/100_msgs.proto", "w") as f: + f.write('syntax = "proto3";\n') + f.write('package upb_benchmark;\n') + f.write('message Message {}\n') + for i in range(2, 101): + f.write(f'message Message{i} {{}}\n') + +with open(base + "/benchmarks/200_msgs.proto", "w") as f: + f.write('syntax = "proto3";\n') + f.write('package upb_benchmark;\n') + f.write('message Message {}\n') + for i in range(2, 501): + f.write(f'message Message{i} {{}}\n') + +with open(base + "/benchmarks/100_fields.proto", "w") as f: + f.write('syntax = "proto2";\n') + f.write('package upb_benchmark;\n') + f.write('enum Enum { ZERO = 0; }\n') + f.write('message Message {\n') + i = 1 + random.seed(a=0, version=2) + for field in random.choices(population=population, weights=weights, k=100): + field_type, label = field + f.write(f' {label} {field_type} field{i} = {i};\n') + i += 1 + f.write('}\n') + +with open(base + "/benchmarks/200_fields.proto", "w") as f: + f.write('syntax = "proto2";\n') + f.write('package upb_benchmark;\n') + f.write('enum Enum { ZERO = 0; }\n') + f.write('message Message {\n') + i = 1 + random.seed(a=0, version=2) + for field in random.choices(population=population, weights=weights, k=200): + field_type, label = field + f.write(f' {label} {field_type} field{i} = {i};\n') + i += 1 + f.write('}\n') diff --git a/benchmarks/gen_upb_binary_c.py b/benchmarks/gen_upb_binary_c.py new file mode 100644 index 0000000000..430d8c45c9 --- /dev/null +++ b/benchmarks/gen_upb_binary_c.py @@ -0,0 +1,39 @@ + +import sys +import re + +include = sys.argv[1] +msg_basename = sys.argv[2] +count = 1 + +m = re.search(r'(.*\D)(\d+)$', sys.argv[2]) +if m: + msg_basename = m.group(1) + count = int(m.group(2)) + +print(f''' +#include "{include}" + +char buf[1]; + +int main() {{ + upb_arena *arena = upb_arena_new(); + size_t size; +''') + +def RefMessage(name): + print(f''' + {{ + {name} *proto = {name}_parse(buf, 1, arena); + {name}_serialize(proto, arena, &size); + }} + ''') + +RefMessage(msg_basename) + +for i in range(2, count + 1): + RefMessage(msg_basename + str(i)) + +print(''' + return 0; +}''') diff --git a/benchmarks/protobuf_binary.cc.tmpl b/benchmarks/protobuf_binary.cc.tmpl deleted file mode 100644 index 139ad7433c..0000000000 --- a/benchmarks/protobuf_binary.cc.tmpl +++ /dev/null @@ -1,10 +0,0 @@ - -#include "INCLUDE" - -char buf[1]; - -int main() { - PROTO proto; - proto.ParseFromArray(buf, 1); - proto.SerializeToArray(buf, 1); -} diff --git a/benchmarks/upb_binary.c.tmpl b/benchmarks/upb_binary.c.tmpl deleted file mode 100644 index 6f2904bbd6..0000000000 --- a/benchmarks/upb_binary.c.tmpl +++ /dev/null @@ -1,12 +0,0 @@ - -#include "INCLUDE" - -char buf[1]; - -int main() { - upb_arena *arena = upb_arena_new(); - size_t size; - PROTO *proto = PROTO_parse(buf, 1, arena); - PROTO_serialize(proto, arena, &size); - return 0; -}