From de1b6b0718ebe08e219577ccc8d82b3b229d125b Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 15 Oct 2020 17:16:03 -0700 Subject: [PATCH 1/4] Refactored proto2 benchmarks and added StringView benchmark. --- BUILD | 11 ++++++ tests/benchmark.cc | 98 ++++++++++++++++++++++++++-------------------- 2 files changed, 66 insertions(+), 43 deletions(-) diff --git a/BUILD b/BUILD index fb7b18a9a4..c336aee2a0 100644 --- a/BUILD +++ b/BUILD @@ -365,12 +365,23 @@ cc_proto_library( deps = [":benchmark_descriptor_proto"], ) +proto_library( + name = "benchmark_descriptor_sv_proto", + srcs = ["tests/descriptor_sv.proto"], +) + +cc_proto_library( + name = "benchmark_descriptor_sv_cc_proto", + deps = [":benchmark_descriptor_sv_proto"], +) + cc_binary( name = "benchmark", testonly = 1, srcs = ["tests/benchmark.cc"], deps = [ ":benchmark_descriptor_cc_proto", + ":benchmark_descriptor_sv_cc_proto", ":benchmark_descriptor_upb_proto", ":benchmark_descriptor_upb_proto_reflection", ":descriptor_upb_proto", diff --git a/tests/benchmark.cc b/tests/benchmark.cc index 84034768ad..5fb99e6147 100644 --- a/tests/benchmark.cc +++ b/tests/benchmark.cc @@ -6,6 +6,7 @@ #include "tests/descriptor.pb.h" #include "tests/descriptor.upb.h" #include "tests/descriptor.upbdefs.h" +#include "tests/descriptor_sv.pb.h" // For for benchmarks of building descriptors. #include "google/protobuf/descriptor.upb.h" @@ -108,55 +109,58 @@ static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) { } BENCHMARK(BM_ParseDescriptor_Upb_LargeInitialBlock); -static void BM_ParseDescriptor_Proto2_NoArena(benchmark::State& state) { - size_t bytes = 0; - for (auto _ : state) { - upb_benchmark::FileDescriptorProto proto; - protobuf::StringPiece input(descriptor.data,descriptor.size); - bool ok = proto.ParseFrom(input); - if (!ok) { - printf("Failed to parse.\n"); - exit(1); - } - bytes += descriptor.size; +template +struct NoArena { + public: + P* GetProto() { return &proto_; } + + private: + P proto_; +}; + +template +struct WithArena { + public: + P* GetProto() { return protobuf::Arena::CreateMessage

(&arena_); } + + private: + protobuf::Arena arena_; +}; + +template +struct WithArenaInitialBlock { + public: + WithArenaInitialBlock() : arena_(GetOptions()) {} + P* GetProto() { return protobuf::Arena::CreateMessage

(&arena_); } + + private: + protobuf::ArenaOptions GetOptions() { + protobuf::ArenaOptions opts; + opts.initial_block = buf; + opts.initial_block_size = sizeof(buf); + return opts; } - state.SetBytesProcessed(state.iterations() * descriptor.size); -} -BENCHMARK(BM_ParseDescriptor_Proto2_NoArena); -static void BM_ParseDescriptor_Proto2_Arena(benchmark::State& state) { - size_t bytes = 0; - for (auto _ : state) { - protobuf::Arena arena; - protobuf::StringPiece input(descriptor.data,descriptor.size); - auto proto = - protobuf::Arena::CreateMessage( - &arena); - bool ok = proto->ParseFrom(input); + protobuf::Arena arena_; +}; - if (!ok) { - printf("Failed to parse.\n"); - exit(1); - } - bytes += descriptor.size; - } - state.SetBytesProcessed(state.iterations() * descriptor.size); -} -BENCHMARK(BM_ParseDescriptor_Proto2_Arena); +using FileDescriptor = ::upb_benchmark::FileDescriptorProto; +using FileDescriptorSV = ::upb_benchmark::sv::FileDescriptorProto; + +const protobuf::MessageLite::ParseFlags kMergePartial = + protobuf::MessageLite::ParseFlags::kMergePartial; +const protobuf::MessageLite::ParseFlags kMergePartialWithAliasing = + protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing; -static void BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock(benchmark::State& state) { +template class Factory, + protobuf::MessageLite::ParseFlags kParseFlags = kMergePartial> +void BM_Parse_Proto2(benchmark::State& state) { size_t bytes = 0; - protobuf::ArenaOptions opts; - opts.initial_block = buf; - opts.initial_block_size = sizeof(buf); for (auto _ : state) { - protobuf::Arena arena(opts); + Factory

proto_factory; + auto proto = proto_factory.GetProto(); protobuf::StringPiece input(descriptor.data,descriptor.size); - auto proto = - protobuf::Arena::CreateMessage( - &arena); - bool ok = proto->ParseFrom(input); - + bool ok = proto->template ParseFrom(input); if (!ok) { printf("Failed to parse.\n"); exit(1); @@ -165,7 +169,15 @@ static void BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock(benchmark::State& } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptor_Proto2_Arena_LargeInitialBlock); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, NoArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, WithArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, WithArenaInitialBlock); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, NoArena); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArenaInitialBlock); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, NoArena, kMergePartialWithAliasing); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArena, kMergePartialWithAliasing); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArenaInitialBlock, kMergePartialWithAliasing); static void BM_SerializeDescriptor_Proto2(benchmark::State& state) { size_t bytes = 0; From ee7da95367e9d18ad83ccf6c3f5214f56c8b02a2 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 15 Oct 2020 17:17:55 -0700 Subject: [PATCH 2/4] Bzl formatting fix per buildifier. --- bazel/workspace_deps.bzl | 1 - 1 file changed, 1 deletion(-) diff --git a/bazel/workspace_deps.bzl b/bazel/workspace_deps.bzl index 4251d55ee5..c724d7c2c2 100644 --- a/bazel/workspace_deps.bzl +++ b/bazel/workspace_deps.bzl @@ -1,4 +1,3 @@ - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") From 9eb8414b3124fe785ea9265ff7d57c5a88e33898 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 15 Oct 2020 17:18:18 -0700 Subject: [PATCH 3/4] Added descriptor_sv.proto. --- tests/descriptor_sv.proto | 894 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 894 insertions(+) create mode 100644 tests/descriptor_sv.proto diff --git a/tests/descriptor_sv.proto b/tests/descriptor_sv.proto new file mode 100644 index 0000000000..c595a68649 --- /dev/null +++ b/tests/descriptor_sv.proto @@ -0,0 +1,894 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// The messages in this file describe the definitions found in .proto files. +// A valid .proto file can be translated directly to a FileDescriptorProto +// without any other information (e.g. without reading its imports). + +syntax = "proto2"; + +package upb_benchmark.sv; + +option go_package = "google.golang.org/protobuf/types/descriptorpb"; +option java_package = "com.google.protobuf"; +option java_outer_classname = "DescriptorProtos"; +option csharp_namespace = "Google.Protobuf.Reflection"; +option objc_class_prefix = "GPB"; +option cc_enable_arenas = true; + +// descriptor.proto must be optimized for speed because reflection-based +// algorithms don't work during bootstrapping. +option optimize_for = SPEED; + +// The protocol compiler can output a FileDescriptorSet containing the .proto +// files it parses. +message FileDescriptorSet { + repeated FileDescriptorProto file = 1; +} + +// Describes a complete .proto file. +message FileDescriptorProto { + optional string name = 1 + [ctype = STRING_PIECE]; // file name, relative to root of source tree + optional string package = 2 + [ctype = STRING_PIECE]; // e.g. "foo", "foo.bar", etc. + + // Names of files imported by this file. + repeated string dependency = 3 [ctype = STRING_PIECE]; + // Indexes of the public imported files in the dependency list above. + repeated int32 public_dependency = 10; + // Indexes of the weak imported files in the dependency list. + // For Google-internal migration only. Do not use. + repeated int32 weak_dependency = 11; + + // All top-level definitions in this file. + repeated DescriptorProto message_type = 4; + repeated EnumDescriptorProto enum_type = 5; + repeated ServiceDescriptorProto service = 6; + repeated FieldDescriptorProto extension = 7; + + optional FileOptions options = 8; + + // This field contains optional information about the original source code. + // You may safely remove this entire field without harming runtime + // functionality of the descriptors -- the information is needed only by + // development tools. + optional SourceCodeInfo source_code_info = 9; + + // The syntax of the proto file. + // The supported values are "proto2" and "proto3". + optional string syntax = 12 [ctype = STRING_PIECE]; +} + +// Describes a message type. +message DescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + + repeated FieldDescriptorProto field = 2; + repeated FieldDescriptorProto extension = 6; + + repeated DescriptorProto nested_type = 3; + repeated EnumDescriptorProto enum_type = 4; + + message ExtensionRange { + optional int32 start = 1; // Inclusive. + optional int32 end = 2; // Exclusive. + + optional ExtensionRangeOptions options = 3; + } + repeated ExtensionRange extension_range = 5; + + repeated OneofDescriptorProto oneof_decl = 8; + + optional MessageOptions options = 7; + + // Range of reserved tag numbers. Reserved tag numbers may not be used by + // fields or extension ranges in the same message. Reserved ranges may + // not overlap. + message ReservedRange { + optional int32 start = 1; // Inclusive. + optional int32 end = 2; // Exclusive. + } + repeated ReservedRange reserved_range = 9; + // Reserved field names, which may not be used by fields in the same message. + // A given name may only be reserved once. + repeated string reserved_name = 10 [ctype = STRING_PIECE]; +} + +message ExtensionRangeOptions { + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +// Describes a field within a message. +message FieldDescriptorProto { + enum Type { + // 0 is reserved for errors. + // Order is weird for historical reasons. + TYPE_DOUBLE = 1; + TYPE_FLOAT = 2; + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if + // negative values are likely. + TYPE_INT64 = 3; + TYPE_UINT64 = 4; + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if + // negative values are likely. + TYPE_INT32 = 5; + TYPE_FIXED64 = 6; + TYPE_FIXED32 = 7; + TYPE_BOOL = 8; + TYPE_STRING = 9; + // Tag-delimited aggregate. + // Group type is deprecated and not supported in proto3. However, Proto3 + // implementations should still be able to parse the group wire format and + // treat group fields as unknown fields. + TYPE_GROUP = 10; + TYPE_MESSAGE = 11; // Length-delimited aggregate. + + // New in version 2. + TYPE_BYTES = 12; + TYPE_UINT32 = 13; + TYPE_ENUM = 14; + TYPE_SFIXED32 = 15; + TYPE_SFIXED64 = 16; + TYPE_SINT32 = 17; // Uses ZigZag encoding. + TYPE_SINT64 = 18; // Uses ZigZag encoding. + } + + enum Label { + // 0 is reserved for errors + LABEL_OPTIONAL = 1; + LABEL_REQUIRED = 2; + LABEL_REPEATED = 3; + } + + optional string name = 1 [ctype = STRING_PIECE]; + optional int32 number = 3; + optional Label label = 4; + + // If type_name is set, this need not be set. If both this and type_name + // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP. + optional Type type = 5; + + // For message and enum types, this is the name of the type. If the name + // starts with a '.', it is fully-qualified. Otherwise, C++-like scoping + // rules are used to find the type (i.e. first the nested types within this + // message are searched, then within the parent, on up to the root + // namespace). + optional string type_name = 6 [ctype = STRING_PIECE]; + + // For extensions, this is the name of the type being extended. It is + // resolved in the same manner as type_name. + optional string extendee = 2 [ctype = STRING_PIECE]; + + // For numeric types, contains the original text representation of the value. + // For booleans, "true" or "false". + // For strings, contains the default text contents (not escaped in any way). + // For bytes, contains the C escaped value. All bytes >= 128 are escaped. + // TODO(kenton): Base-64 encode? + optional string default_value = 7 [ctype = STRING_PIECE]; + + // If set, gives the index of a oneof in the containing type's oneof_decl + // list. This field is a member of that oneof. + optional int32 oneof_index = 9; + + // JSON name of this field. The value is set by protocol compiler. If the + // user has set a "json_name" option on this field, that option's value + // will be used. Otherwise, it's deduced from the field's name by converting + // it to camelCase. + optional string json_name = 10 [ctype = STRING_PIECE]; + + optional FieldOptions options = 8; + + // If true, this is a proto3 "optional". When a proto3 field is optional, it + // tracks presence regardless of field type. + // + // When proto3_optional is true, this field must be belong to a oneof to + // signal to old proto3 clients that presence is tracked for this field. This + // oneof is known as a "synthetic" oneof, and this field must be its sole + // member (each proto3 optional field gets its own synthetic oneof). Synthetic + // oneofs exist in the descriptor only, and do not generate any API. Synthetic + // oneofs must be ordered after all "real" oneofs. + // + // For message fields, proto3_optional doesn't create any semantic change, + // since non-repeated message fields always track presence. However it still + // indicates the semantic detail of whether the user wrote "optional" or not. + // This can be useful for round-tripping the .proto file. For consistency we + // give message fields a synthetic oneof also, even though it is not required + // to track presence. This is especially important because the parser can't + // tell if a field is a message or an enum, so it must always create a + // synthetic oneof. + // + // Proto2 optional fields do not set this flag, because they already indicate + // optional with `LABEL_OPTIONAL`. + optional bool proto3_optional = 17; +} + +// Describes a oneof. +message OneofDescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + optional OneofOptions options = 2; +} + +// Describes an enum type. +message EnumDescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + + repeated EnumValueDescriptorProto value = 2; + + optional EnumOptions options = 3; + + // Range of reserved numeric values. Reserved values may not be used by + // entries in the same enum. Reserved ranges may not overlap. + // + // Note that this is distinct from DescriptorProto.ReservedRange in that it + // is inclusive such that it can appropriately represent the entire int32 + // domain. + message EnumReservedRange { + optional int32 start = 1; // Inclusive. + optional int32 end = 2; // Inclusive. + } + + // Range of reserved numeric values. Reserved numeric values may not be used + // by enum values in the same enum declaration. Reserved ranges may not + // overlap. + repeated EnumReservedRange reserved_range = 4; + + // Reserved enum value names, which may not be reused. A given name may only + // be reserved once. + repeated string reserved_name = 5 [ctype = STRING_PIECE]; +} + +// Describes a value within an enum. +message EnumValueDescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + optional int32 number = 2; + + optional EnumValueOptions options = 3; +} + +// Describes a service. +message ServiceDescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + repeated MethodDescriptorProto method = 2; + + optional ServiceOptions options = 3; +} + +// Describes a method of a service. +message MethodDescriptorProto { + optional string name = 1 [ctype = STRING_PIECE]; + + // Input and output type names. These are resolved in the same way as + // FieldDescriptorProto.type_name, but must refer to a message type. + optional string input_type = 2 [ctype = STRING_PIECE]; + optional string output_type = 3 [ctype = STRING_PIECE]; + + optional MethodOptions options = 4; + + // Identifies if client streams multiple client messages + optional bool client_streaming = 5 [default = false]; + // Identifies if server streams multiple server messages + optional bool server_streaming = 6 [default = false]; +} + +// =================================================================== +// Options + +// Each of the definitions above may have "options" attached. These are +// just annotations which may cause code to be generated slightly differently +// or may contain hints for code that manipulates protocol messages. +// +// Clients may define custom options as extensions of the *Options messages. +// These extensions may not yet be known at parsing time, so the parser cannot +// store the values in them. Instead it stores them in a field in the *Options +// message called uninterpreted_option. This field must have the same name +// across all *Options messages. We then use this field to populate the +// extensions when we build a descriptor, at which point all protos have been +// parsed and so all extensions are known. +// +// Extension numbers for custom options may be chosen as follows: +// * For options which will only be used within a single application or +// organization, or for experimental options, use field numbers 50000 +// through 99999. It is up to you to ensure that you do not use the +// same number for multiple options. +// * For options which will be published and used publicly by multiple +// independent entities, e-mail protobuf-global-extension-registry@google.com +// to reserve extension numbers. Simply provide your project name (e.g. +// Objective-C plugin) and your project website (if available) -- there's no +// need to explain how you intend to use them. Usually you only need one +// extension number. You can declare multiple options with only one extension +// number by putting them in a sub-message. See the Custom Options section of +// the docs for examples: +// https://developers.google.com/protocol-buffers/docs/proto#options +// If this turns out to be popular, a web service will be set up +// to automatically assign option numbers. + +message FileOptions { + // Sets the Java package where classes generated from this .proto will be + // placed. By default, the proto package is used, but this is often + // inappropriate because proto packages do not normally start with backwards + // domain names. + optional string java_package = 1 [ctype = STRING_PIECE]; + + // If set, all the classes from the .proto file are wrapped in a single + // outer class with the given name. This applies to both Proto1 + // (equivalent to the old "--one_java_file" option) and Proto2 (where + // a .proto always translates to a single class, but you may want to + // explicitly choose the class name). + optional string java_outer_classname = 8 [ctype = STRING_PIECE]; + + // If set true, then the Java code generator will generate a separate .java + // file for each top-level message, enum, and service defined in the .proto + // file. Thus, these types will *not* be nested inside the outer class + // named by java_outer_classname. However, the outer class will still be + // generated to contain the file's getDescriptor() method as well as any + // top-level extensions defined in the file. + optional bool java_multiple_files = 10 [default = false]; + + // This option does nothing. + optional bool java_generate_equals_and_hash = 20 [deprecated = true]; + + // If set true, then the Java2 code generator will generate code that + // throws an exception whenever an attempt is made to assign a non-UTF-8 + // byte sequence to a string field. + // Message reflection will do the same. + // However, an extension field still accepts non-UTF-8 byte sequences. + // This option has no effect on when used with the lite runtime. + optional bool java_string_check_utf8 = 27 [default = false]; + + // Generated classes can be optimized for speed or code size. + enum OptimizeMode { + SPEED = 1; // Generate complete code for parsing, serialization, + // etc. + CODE_SIZE = 2; // Use ReflectionOps to implement these methods. + LITE_RUNTIME = 3; // Generate code using MessageLite and the lite runtime. + } + optional OptimizeMode optimize_for = 9 [default = SPEED]; + + // Sets the Go package where structs generated from this .proto will be + // placed. If omitted, the Go package will be derived from the following: + // - The basename of the package import path, if provided. + // - Otherwise, the package statement in the .proto file, if present. + // - Otherwise, the basename of the .proto file, without extension. + optional string go_package = 11 [ctype = STRING_PIECE]; + + // Should generic services be generated in each language? "Generic" services + // are not specific to any particular RPC system. They are generated by the + // main code generators in each language (without additional plugins). + // Generic services were the only kind of service generation supported by + // early versions of google.protobuf. + // + // Generic services are now considered deprecated in favor of using plugins + // that generate code specific to your particular RPC system. Therefore, + // these default to false. Old code which depends on generic services should + // explicitly set them to true. + optional bool cc_generic_services = 16 [default = false]; + optional bool java_generic_services = 17 [default = false]; + optional bool py_generic_services = 18 [default = false]; + optional bool php_generic_services = 42 [default = false]; + + // Is this file deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for everything in the file, or it will be completely ignored; in the very + // least, this is a formalization for deprecating files. + optional bool deprecated = 23 [default = false]; + + // Enables the use of arenas for the proto messages in this file. This applies + // only to generated classes for C++. + optional bool cc_enable_arenas = 31 [default = true]; + + // Sets the objective c class prefix which is prepended to all objective c + // generated classes from this .proto. There is no default. + optional string objc_class_prefix = 36 [ctype = STRING_PIECE]; + + // Namespace for generated classes; defaults to the package. + optional string csharp_namespace = 37 [ctype = STRING_PIECE]; + + // By default Swift generators will take the proto package and CamelCase it + // replacing '.' with underscore and use that to prefix the types/symbols + // defined. When this options is provided, they will use this value instead + // to prefix the types/symbols defined. + optional string swift_prefix = 39 [ctype = STRING_PIECE]; + + // Sets the php class prefix which is prepended to all php generated classes + // from this .proto. Default is empty. + optional string php_class_prefix = 40 [ctype = STRING_PIECE]; + + // Use this option to change the namespace of php generated classes. Default + // is empty. When this option is empty, the package name will be used for + // determining the namespace. + optional string php_namespace = 41 [ctype = STRING_PIECE]; + + // Use this option to change the namespace of php generated metadata classes. + // Default is empty. When this option is empty, the proto file name will be + // used for determining the namespace. + optional string php_metadata_namespace = 44 [ctype = STRING_PIECE]; + + // Use this option to change the package of ruby generated classes. Default + // is empty. When this option is not set, the package name will be used for + // determining the ruby package. + optional string ruby_package = 45 [ctype = STRING_PIECE]; + + // The parser stores options it doesn't recognize here. + // See the documentation for the "Options" section above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. + // See the documentation for the "Options" section above. + extensions 1000 to max; + + reserved 38; +} + +message MessageOptions { + // Set true to use the old proto1 MessageSet wire format for extensions. + // This is provided for backwards-compatibility with the MessageSet wire + // format. You should not use this for any other reason: It's less + // efficient, has fewer features, and is more complicated. + // + // The message must be defined exactly as follows: + // message Foo { + // option message_set_wire_format = true; + // extensions 4 to max; + // } + // Note that the message cannot have any defined fields; MessageSets only + // have extensions. + // + // All extensions of your type must be singular messages; e.g. they cannot + // be int32s, enums, or repeated messages. + // + // Because this is an option, the above two restrictions are not enforced by + // the protocol compiler. + optional bool message_set_wire_format = 1 [default = false]; + + // Disables the generation of the standard "descriptor()" accessor, which can + // conflict with a field of the same name. This is meant to make migration + // from proto1 easier; new code should avoid fields named "descriptor". + optional bool no_standard_descriptor_accessor = 2 [default = false]; + + // Is this message deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the message, or it will be completely ignored; in the very least, + // this is a formalization for deprecating messages. + optional bool deprecated = 3 [default = false]; + + // Whether the message is an automatically generated map entry type for the + // maps field. + // + // For maps fields: + // map map_field = 1; + // The parsed descriptor looks like: + // message MapFieldEntry { + // option map_entry = true; + // optional KeyType key = 1; + // optional ValueType value = 2; + // } + // repeated MapFieldEntry map_field = 1; + // + // Implementations may choose not to generate the map_entry=true message, but + // use a native map in the target language to hold the keys and values. + // The reflection APIs in such implementations still need to work as + // if the field is a repeated message field. + // + // NOTE: Do not set the option in .proto files. Always use the maps syntax + // instead. The option should only be implicitly set by the proto compiler + // parser. + optional bool map_entry = 7; + + reserved 8; // javalite_serializable + reserved 9; // javanano_as_lite + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message FieldOptions { + // The ctype option instructs the C++ code generator to use a different + // representation of the field than it normally would. See the specific + // options below. This option is not yet implemented in the open source + // release -- sorry, we'll try to include it in a future version! + optional CType ctype = 1 [default = STRING]; + enum CType { + // Default mode. + STRING = 0; + + CORD = 1; + + STRING_PIECE = 2; + } + // The packed option can be enabled for repeated primitive fields to enable + // a more efficient representation on the wire. Rather than repeatedly + // writing the tag and type for each element, the entire array is encoded as + // a single length-delimited blob. In proto3, only explicit setting it to + // false will avoid using packed encoding. + optional bool packed = 2; + + // The jstype option determines the JavaScript type used for values of the + // field. The option is permitted only for 64 bit integral and fixed types + // (int64, uint64, sint64, fixed64, sfixed64). A field with jstype JS_STRING + // is represented as JavaScript string, which avoids loss of precision that + // can happen when a large value is converted to a floating point JavaScript. + // Specifying JS_NUMBER for the jstype causes the generated JavaScript code to + // use the JavaScript "number" type. The behavior of the default option + // JS_NORMAL is implementation dependent. + // + // This option is an enum to permit additional types to be added, e.g. + // goog.math.Integer. + optional JSType jstype = 6 [default = JS_NORMAL]; + enum JSType { + // Use the default type. + JS_NORMAL = 0; + + // Use JavaScript strings. + JS_STRING = 1; + + // Use JavaScript numbers. + JS_NUMBER = 2; + } + + // Should this field be parsed lazily? Lazy applies only to message-type + // fields. It means that when the outer message is initially parsed, the + // inner message's contents will not be parsed but instead stored in encoded + // form. The inner message will actually be parsed when it is first accessed. + // + // This is only a hint. Implementations are free to choose whether to use + // eager or lazy parsing regardless of the value of this option. However, + // setting this option true suggests that the protocol author believes that + // using lazy parsing on this field is worth the additional bookkeeping + // overhead typically needed to implement it. + // + // This option does not affect the public interface of any generated code; + // all method signatures remain the same. Furthermore, thread-safety of the + // interface is not affected by this option; const methods remain safe to + // call from multiple threads concurrently, while non-const methods continue + // to require exclusive access. + // + // + // Note that implementations may choose not to check required fields within + // a lazy sub-message. That is, calling IsInitialized() on the outer message + // may return true even if the inner message has missing required fields. + // This is necessary because otherwise the inner message would have to be + // parsed in order to perform the check, defeating the purpose of lazy + // parsing. An implementation which chooses not to check required fields + // must be consistent about it. That is, for any particular sub-message, the + // implementation must either *always* check its required fields, or *never* + // check its required fields, regardless of whether or not the message has + // been parsed. + optional bool lazy = 5 [default = false]; + + // Is this field deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for accessors, or it will be completely ignored; in the very least, this + // is a formalization for deprecating fields. + optional bool deprecated = 3 [default = false]; + + // For Google-internal migration only. Do not use. + optional bool weak = 10 [default = false]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; + + reserved 4; // removed jtype +} + +message OneofOptions { + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message EnumOptions { + // Set this option to true to allow mapping different tag names to the same + // value. + optional bool allow_alias = 2; + + // Is this enum deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum, or it will be completely ignored; in the very least, this + // is a formalization for deprecating enums. + optional bool deprecated = 3 [default = false]; + + reserved 5; // javanano_as_lite + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message EnumValueOptions { + // Is this enum value deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum value, or it will be completely ignored; in the very least, + // this is a formalization for deprecating enum values. + optional bool deprecated = 1 [default = false]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message ServiceOptions { + // Note: Field numbers 1 through 32 are reserved for Google's internal RPC + // framework. We apologize for hoarding these numbers to ourselves, but + // we were already using them long before we decided to release Protocol + // Buffers. + + // Is this service deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the service, or it will be completely ignored; in the very least, + // this is a formalization for deprecating services. + optional bool deprecated = 33 [default = false]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +message MethodOptions { + // Note: Field numbers 1 through 32 are reserved for Google's internal RPC + // framework. We apologize for hoarding these numbers to ourselves, but + // we were already using them long before we decided to release Protocol + // Buffers. + + // Is this method deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the method, or it will be completely ignored; in the very least, + // this is a formalization for deprecating methods. + optional bool deprecated = 33 [default = false]; + + // Is this method side-effect-free (or safe in HTTP parlance), or idempotent, + // or neither? HTTP based RPC implementation may choose GET verb for safe + // methods, and PUT verb for idempotent methods instead of the default POST. + enum IdempotencyLevel { + IDEMPOTENCY_UNKNOWN = 0; + NO_SIDE_EFFECTS = 1; // implies idempotent + IDEMPOTENT = 2; // idempotent, but may have side effects + } + optional IdempotencyLevel idempotency_level = 34 + [default = IDEMPOTENCY_UNKNOWN]; + + // The parser stores options it doesn't recognize here. See above. + repeated UninterpretedOption uninterpreted_option = 999; + + // Clients can define custom options in extensions of this message. See above. + extensions 1000 to max; +} + +// A message representing a option the parser does not recognize. This only +// appears in options protos created by the compiler::Parser class. +// DescriptorPool resolves these when building Descriptor objects. Therefore, +// options protos in descriptor objects (e.g. returned by Descriptor::options(), +// or produced by Descriptor::CopyTo()) will never have UninterpretedOptions +// in them. +message UninterpretedOption { + // The name of the uninterpreted option. Each string represents a segment in + // a dot-separated name. is_extension is true iff a segment represents an + // extension (denoted with parentheses in options specs in .proto files). + // E.g.,{ ["foo", false], ["bar.baz", true], ["qux", false] } represents + // "foo.(bar.baz).qux". + message NamePart { + optional string name_part = 1 [ctype = STRING_PIECE]; + optional bool is_extension = 2; + } + repeated NamePart name = 2; + + // The value of the uninterpreted option, in whatever type the tokenizer + // identified it as during parsing. Exactly one of these should be set. + optional string identifier_value = 3 [ctype = STRING_PIECE]; + optional uint64 positive_int_value = 4; + optional int64 negative_int_value = 5; + optional double double_value = 6; + optional bytes string_value = 7; + optional string aggregate_value = 8 [ctype = STRING_PIECE]; +} + +// =================================================================== +// Optional source code info + +// Encapsulates information about the original source file from which a +// FileDescriptorProto was generated. +message SourceCodeInfo { + // A Location identifies a piece of source code in a .proto file which + // corresponds to a particular definition. This information is intended + // to be useful to IDEs, code indexers, documentation generators, and similar + // tools. + // + // For example, say we have a file like: + // message Foo { + // optional string foo = 1 [ctype = STRING_PIECE]; + // } + // Let's look at just the field definition: + // optional string foo = 1 [ctype = STRING_PIECE]; + // ^ ^^ ^^ ^ ^^^ + // a bc de f ghi + // We have the following locations: + // span path represents + // [a,i) [ 4, 0, 2, 0 ] The whole field definition. + // [a,b) [ 4, 0, 2, 0, 4 ] The label (optional). + // [c,d) [ 4, 0, 2, 0, 5 ] The type (string). + // [e,f) [ 4, 0, 2, 0, 1 ] The name (foo). + // [g,h) [ 4, 0, 2, 0, 3 ] The number (1). + // + // Notes: + // - A location may refer to a repeated field itself (i.e. not to any + // particular index within it). This is used whenever a set of elements are + // logically enclosed in a single code segment. For example, an entire + // extend block (possibly containing multiple extension definitions) will + // have an outer location whose path refers to the "extensions" repeated + // field without an index. + // - Multiple locations may have the same path. This happens when a single + // logical declaration is spread out across multiple places. The most + // obvious example is the "extend" block again -- there may be multiple + // extend blocks in the same scope, each of which will have the same path. + // - A location's span is not always a subset of its parent's span. For + // example, the "extendee" of an extension declaration appears at the + // beginning of the "extend" block and is shared by all extensions within + // the block. + // - Just because a location's span is a subset of some other location's span + // does not mean that it is a descendant. For example, a "group" defines + // both a type and a field in a single declaration. Thus, the locations + // corresponding to the type and field and their components will overlap. + // - Code which tries to interpret locations should probably be designed to + // ignore those that it doesn't understand, as more types of locations could + // be recorded in the future. + repeated Location location = 1; + message Location { + // Identifies which part of the FileDescriptorProto was defined at this + // location. + // + // Each element is a field number or an index. They form a path from + // the root FileDescriptorProto to the place where the definition. For + // example, this path: + // [ 4, 3, 2, 7, 1 ] + // refers to: + // file.message_type(3) // 4, 3 + // .field(7) // 2, 7 + // .name() // 1 + // This is because FileDescriptorProto.message_type has field number 4: + // repeated DescriptorProto message_type = 4; + // and DescriptorProto.field has field number 2: + // repeated FieldDescriptorProto field = 2; + // and FieldDescriptorProto.name has field number 1: + // optional string name = 1 [ctype = STRING_PIECE]; + // + // Thus, the above path gives the location of a field name. If we removed + // the last element: + // [ 4, 3, 2, 7 ] + // this path refers to the whole field declaration (from the beginning + // of the label to the terminating semicolon). + repeated int32 path = 1 [packed = true]; + + // Always has exactly three or four elements: start line, start column, + // end line (optional, otherwise assumed same as start line), end column. + // These are packed into a single field for efficiency. Note that line + // and column numbers are zero-based -- typically you will want to add + // 1 to each before displaying to a user. + repeated int32 span = 2 [packed = true]; + + // If this SourceCodeInfo represents a complete declaration, these are any + // comments appearing before and after the declaration which appear to be + // attached to the declaration. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // leading_detached_comments will keep paragraphs of comments that appear + // before (but not connected to) the current element. Each paragraph, + // separated by empty lines, will be one comment element in the repeated + // field. + // + // Only the comment content is provided; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk + // will be stripped from the beginning of each line other than the first. + // Newlines are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3 [ctype = STRING_PIECE]; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // // Detached comment for corge. This is not leading or trailing comments + // // to qux or corge because there are blank lines separating it from + // // both. + // + // // Detached comment for corge paragraph 2. + // + // optional string corge = 5 [ctype = STRING_PIECE]; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + // + // // ignored detached comments. + optional string leading_comments = 3 [ctype = STRING_PIECE]; + optional string trailing_comments = 4 [ctype = STRING_PIECE]; + repeated string leading_detached_comments = 6 [ctype = STRING_PIECE]; + } +} + +// Describes the relationship between generated code and its original source +// file. A GeneratedCodeInfo message is associated with only one generated +// source file, but may contain references to different source .proto files. +message GeneratedCodeInfo { + // An Annotation connects some span of text in generated code to an element + // of its generating .proto file. + repeated Annotation annotation = 1; + message Annotation { + // Identifies the element in the original source .proto file. This field + // is formatted the same as SourceCodeInfo.Location.path. + repeated int32 path = 1 [packed = true]; + + // Identifies the filesystem path to the original source .proto. + optional string source_file = 2 [ctype = STRING_PIECE]; + + // Identifies the starting offset in bytes in the generated code + // that relates to the identified object. + optional int32 begin = 3; + + // Identifies the ending offset in bytes in the generated code that + // relates to the identified offset. The end offset should be one past + // the last relevant byte (so the length of the text = end - begin). + optional int32 end = 4; + } +} From bf393bf086017df1d8f7082514269f94d22e10a7 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 15 Oct 2020 17:34:55 -0700 Subject: [PATCH 4/4] Cleaned up benchmark names. --- tests/benchmark.cc | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/benchmark.cc b/tests/benchmark.cc index 5fb99e6147..c608572764 100644 --- a/tests/benchmark.cc +++ b/tests/benchmark.cc @@ -73,7 +73,7 @@ static void BM_LoadDescriptor_Proto2(benchmark::State& state) { } BENCHMARK(BM_LoadDescriptor_Proto2); -static void BM_ParseDescriptor_Upb(benchmark::State& state) { +static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { upb_arena* arena = upb_arena_new(); @@ -89,9 +89,9 @@ static void BM_ParseDescriptor_Upb(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptor_Upb); +BENCHMARK(BM_Parse_Upb_FileDesc_WithArena); -static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) { +static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) { size_t bytes = 0; for (auto _ : state) { upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL); @@ -107,7 +107,7 @@ static void BM_ParseDescriptor_Upb_LargeInitialBlock(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK(BM_ParseDescriptor_Upb_LargeInitialBlock); +BENCHMARK(BM_Parse_Upb_FileDesc_WithInitialBlock); template struct NoArena { @@ -128,9 +128,9 @@ struct WithArena { }; template -struct WithArenaInitialBlock { +struct WithInitialBlock { public: - WithArenaInitialBlock() : arena_(GetOptions()) {} + WithInitialBlock() : arena_(GetOptions()) {} P* GetProto() { return protobuf::Arena::CreateMessage

(&arena_); } private: @@ -144,12 +144,12 @@ struct WithArenaInitialBlock { protobuf::Arena arena_; }; -using FileDescriptor = ::upb_benchmark::FileDescriptorProto; -using FileDescriptorSV = ::upb_benchmark::sv::FileDescriptorProto; +using FileDesc = ::upb_benchmark::FileDescriptorProto; +using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto; const protobuf::MessageLite::ParseFlags kMergePartial = protobuf::MessageLite::ParseFlags::kMergePartial; -const protobuf::MessageLite::ParseFlags kMergePartialWithAliasing = +const protobuf::MessageLite::ParseFlags kAliasStrings = protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing; template class Factory, @@ -169,15 +169,15 @@ void BM_Parse_Proto2(benchmark::State& state) { } state.SetBytesProcessed(state.iterations() * descriptor.size); } -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, NoArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, WithArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptor, WithArenaInitialBlock); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, NoArena); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArena); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArenaInitialBlock); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, NoArena, kMergePartialWithAliasing); -//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArena, kMergePartialWithAliasing); -BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescriptorSV, WithArenaInitialBlock, kMergePartialWithAliasing); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithInitialBlock); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena, kAliasStrings); +//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena, kAliasStrings); +BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock, kAliasStrings); static void BM_SerializeDescriptor_Proto2(benchmark::State& state) { size_t bytes = 0;