From 2e83110230b7e91b07835e9c718a1d6fbcb8b617 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 27 Apr 2016 18:22:22 -0700 Subject: [PATCH 1/7] Added framework for generating/consuming benchmarking data sets. This takes the code that was sitting in benchmarks/ already and makes it easier for language-specific benchmarks to consume. Future PRs will enhance this so that the language-specific benchmarks can report metrics back that will be tracked over time in PerfKit. --- Makefile.am | 6 +- benchmarks/Makefile.am | 75 ++++++++++++ ....proto => benchmark_messages_proto2.proto} | 19 +-- benchmarks/benchmark_messages_proto3.proto | 76 ++++++++++++ benchmarks/benchmarks.proto | 102 ++++++++++++++++ benchmarks/generate_datasets.cc | 114 ++++++++++++++++++ configure.ac | 2 +- 7 files changed, 384 insertions(+), 10 deletions(-) create mode 100644 benchmarks/Makefile.am rename benchmarks/{google_speed.proto => benchmark_messages_proto2.proto} (91%) create mode 100644 benchmarks/benchmark_messages_proto3.proto create mode 100644 benchmarks/benchmarks.proto create mode 100644 benchmarks/generate_datasets.cc diff --git a/Makefile.am b/Makefile.am index a7a1f413b2..3e9888166f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign SUBDIRS = . src # Always include gmock in distributions. -DIST_SUBDIRS = $(subdirs) src conformance +DIST_SUBDIRS = $(subdirs) src conformance benchmarks # Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS # because then "make check" would also build and run all of gmock's own tests, @@ -36,6 +36,10 @@ clean-local: echo "Making clean in conformance"; \ cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \ fi; \ + if test -e benchmarks/Makefile; then \ + echo "Making clean in benchmarks"; \ + cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \ + fi; \ if test -e objectivec/DevTools; then \ echo "Cleaning any ObjC pyc files"; \ rm -f objectivec/DevTools/*.pyc; \ diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am new file mode 100644 index 0000000000..79581ee983 --- /dev/null +++ b/benchmarks/Makefile.am @@ -0,0 +1,75 @@ + +benchmarks_protoc_inputs = \ + benchmarks.proto \ + benchmark_messages_proto3.proto + +benchmarks_protoc_inputs_proto2 = \ + benchmark_messages_proto2.proto + +benchmarks_protoc_outputs = \ + benchmarks.pb.cc \ + benchmarks.pb.h \ + benchmark_messages_proto3.pb.cc \ + benchmark_messages_proto3.pb.h + +benchmarks_protoc_outputs_proto2 = \ + benchmark_messages_proto2.pb.cc \ + benchmark_messages_proto2.pb.h + +bin_PROGRAMS = generate-datasets + +generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la +generate_datasets_SOURCES = generate_datasets.cc +generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) +nodist_generate_datasets_SOURCES = \ + google_message1.h \ + google_message2.h \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) + +# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check" +# so a direct "make test_cpp" could fail if parallel enough. +generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h + +$(benchmarks_protoc_outputs): protoc_middleman +$(benchmarks_protoc_outputs_proto2): protoc_middleman2 + +google_message1.h: google_message1.dat + xxd -i $< $@ + +google_message2.h: google_message2.dat + xxd -i $< $@ + +CLEANFILES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + google_message1.h \ + google_message2.h \ + protoc_middleman \ + protoc_middleman2 \ + dataset.* + +if USE_EXTERNAL_PROTOC + +protoc_middleman: $(benchmarks_protoc_inputs) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs) + touch protoc_middleman + +protoc_middleman2: $(benchmarks_protoc_inputs_proto2) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2) + touch protoc_middleman2 + +else + +# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is +# relative to srcdir, which may not be the same as the current directory when +# building out-of-tree. +protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) ) + touch protoc_middleman + +protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) ) + touch protoc_middleman + +endif diff --git a/benchmarks/google_speed.proto b/benchmarks/benchmark_messages_proto2.proto similarity index 91% rename from benchmarks/google_speed.proto rename to benchmarks/benchmark_messages_proto2.proto index 16f6d67804..c7103be5f6 100644 --- a/benchmarks/google_speed.proto +++ b/benchmarks/benchmark_messages_proto2.proto @@ -1,11 +1,14 @@ +// Benchmark messages for proto2. + syntax = "proto2"; -package benchmarks; +package benchmarks.p2; +option java_package = "com.google.protobuf.benchmarks"; -option java_outer_classname = "GoogleSpeed"; +// This is the default, but we specify it here explicitly. option optimize_for = SPEED; -message SpeedMessage1 { +message GoogleMessage1 { required string field1 = 1; optional string field9 = 9; optional string field18 = 18; @@ -40,7 +43,7 @@ message SpeedMessage1 { optional int32 field23 = 23 [default=0]; optional bool field24 = 24 [default=false]; optional int32 field25 = 25 [default=0]; - optional SpeedMessage1SubMessage field15 = 15; + optional GoogleMessage1SubMessage field15 = 15; optional bool field78 = 78; optional int32 field67 = 67 [default=0]; optional int32 field68 = 68; @@ -49,7 +52,7 @@ message SpeedMessage1 { optional int32 field131 = 131 [default=0]; } -message SpeedMessage1SubMessage { +message GoogleMessage1SubMessage { optional int32 field1 = 1 [default=0]; optional int32 field2 = 2 [default=0]; optional int32 field3 = 3 [default=0]; @@ -72,7 +75,7 @@ message SpeedMessage1SubMessage { optional uint64 field300 = 300; } -message SpeedMessage2 { +message GoogleMessage2 { optional string field1 = 1; optional int64 field3 = 3; optional int64 field4 = 4; @@ -112,7 +115,7 @@ message SpeedMessage2 { repeated int32 field73 = 73; optional int32 field20 = 20 [default=0]; optional string field24 = 24; - optional SpeedMessage2GroupedMessage field31 = 31; + optional GoogleMessage2GroupedMessage field31 = 31; } repeated string field128 = 128; optional int64 field131 = 131; @@ -123,7 +126,7 @@ message SpeedMessage2 { optional bool field206 = 206 [default=false]; } -message SpeedMessage2GroupedMessage { +message GoogleMessage2GroupedMessage { optional float field1 = 1; optional float field2 = 2; optional float field3 = 3 [default=0.0]; diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto new file mode 100644 index 0000000000..4ea39c22f2 --- /dev/null +++ b/benchmarks/benchmark_messages_proto3.proto @@ -0,0 +1,76 @@ +// Benchmark messages for proto3. + +syntax = "proto3"; + +package benchmarks.p3; +option java_package = "com.google.protobuf.benchmarks"; + +// This is the default, but we specify it here explicitly. +option optimize_for = SPEED; + +message GoogleMessage1 { + string field1 = 1; + string field9 = 9; + string field18 = 18; + bool field80 = 80; + bool field81 = 81; + int32 field2 = 2; + int32 field3 = 3; + int32 field280 = 280; + int32 field6 = 6; + int64 field22 = 22; + string field4 = 4; + repeated fixed64 field5 = 5; + bool field59 = 59; + string field7 = 7; + int32 field16 = 16; + int32 field130 = 130; + bool field12 = 12; + bool field17 = 17; + bool field13 = 13; + bool field14 = 14; + int32 field104 = 104; + int32 field100 = 100; + int32 field101 = 101; + string field102 = 102; + string field103 = 103; + int32 field29 = 29; + bool field30 = 30; + int32 field60 = 60; + int32 field271 = 271; + int32 field272 = 272; + int32 field150 = 150; + int32 field23 = 23; + bool field24 = 24; + int32 field25 = 25; + GoogleMessage1SubMessage field15 = 15; + bool field78 = 78; + int32 field67 = 67; + int32 field68 = 68; + int32 field128 = 128; + string field129 = 129; + int32 field131 = 131; +} + +message GoogleMessage1SubMessage { + int32 field1 = 1; + int32 field2 = 2; + int32 field3 = 3; + string field15 = 15; + bool field12 = 12; + int64 field13 = 13; + int64 field14 = 14; + int32 field16 = 16; + int32 field19 = 19; + bool field20 = 20; + bool field28 = 28; + fixed64 field21 = 21; + int32 field22 = 22; + bool field23 = 23; + bool field206 = 206; + fixed32 field203 = 203; + int32 field204 = 204; + string field205 = 205; + uint64 field207 = 207; + uint64 field300 = 300; +} diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto new file mode 100644 index 0000000000..a891eb9ec7 --- /dev/null +++ b/benchmarks/benchmarks.proto @@ -0,0 +1,102 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; +package benchmarks; +option java_package = "com.google.protobuf.benchmarks"; + +message BenchmarkDataset { + // Name of the benchmark dataset. This should be unique across all datasets. + // Should only contain word characters: [a-zA-Z0-9_] + string name = 1; + + // Fully-qualified name of the protobuf message for this dataset. + // It will be one of the messages defined benchmark_messages.proto. + // Implementations that do not support reflection can implement this with + // an explicit "if/else" chain that lists every possible message defined + // in this file. + string message_name = 2; + + // The payload(s) for this dataset. They should be parsed or serialized + // in sequence, in a loop, ie. + // + // while (!benchmarkDone) { // Benchmark runner decides when to exit. + // for (i = 0; i < benchmark.payload.length; i++) { + // parse(benchmark.payload[i]) + // } + // } + // + // This is intended to let datasets include a variety of data to provide + // potentially more realistic results than just parsing the same message + // over and over. A single message parsed repeatedly could yield unusually + // good branch prediction performance. + repeated bytes payload = 3; +} + +// A benchmark can write out metrics that we will then upload to our metrics +// database for tracking over time. +message Metric { + // A unique ID for these results. Used for de-duping. + string guid = 1; + + // The tags specify exactly what benchmark was run against the dataset. + // The specific benchmark suite can decide what these mean, but here are + // some common tags that have a predefined meaning: + // + // - "dataset": for tests that pertain to a specific dataset. + // + // For example: + // + // # Tests parsing from binary proto string using arenas. + // tags={ + // dataset: "testalltypes", + // op: "parse", + // format: "binaryproto", + // input: "string" + // arena: "true" + // } + // + // # Tests serializing to JSON string. + // tags={ + // dataset: "testalltypes", + // op: "serialize", + // format: "json", + // input: "string" + // } + map labels = 2; + + // Unit of measurement for the metric: + // - a speed test might be "mb_per_second" or "ops_per_second" + // - a size test might be "kb". + string unit = 3; + + // Metric value. + double value = 4; +} diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc new file mode 100644 index 0000000000..f6f30cd85b --- /dev/null +++ b/benchmarks/generate_datasets.cc @@ -0,0 +1,114 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +const char *file_prefix = "dataset."; +const char *file_suffix = ".pb"; + +#include +#include +#include "benchmarks.pb.h" +#include "google_message1.h" +#include "google_message2.h" + +using benchmarks::BenchmarkDataset; +using google::protobuf::Descriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::Message; +using google::protobuf::MessageFactory; + +#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr)) + +std::set names; + +void WriteFileWithPayloads(const std::string& name, + const std::string& message_name, + const std::vector& payload) { + if (!names.insert(name).second) { + std::cerr << "Duplicate test name: " << name << "\n"; + abort(); + } + + // First verify that this message name exists in our set of benchmark messages + // and that these payloads are valid for the given message. + const Descriptor* d = + DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); + + if (!d) { + std::cerr << "For dataset " << name << ", no such message: " + << message_name << "\n"; + abort(); + } + + Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); + + for (size_t i = 0; i < payload.size(); i++) { + if (!m->ParseFromString(payload[i])) { + std::cerr << "For dataset " << name << ", payload[" << i << "] fails " + << "to parse\n"; + abort(); + } + } + + BenchmarkDataset dataset; + dataset.set_name(name); + dataset.set_message_name(message_name); + for (size_t i = 0; i < payload.size(); i++) { + dataset.add_payload()->assign(payload[i]); + } + + std::string serialized; + dataset.SerializeToString(&serialized); + + std::ofstream writer; + std::string fname = file_prefix + name + file_suffix; + writer.open(fname); + writer << serialized; + writer.close(); + + std::cerr << "Wrote dataset: " << fname << "\n"; +} + +void WriteFile(const std::string& name, const std::string& message_name, + const std::string& payload) { + std::vector payloads; + payloads.push_back(payload); + WriteFileWithPayloads(name, message_name, payloads); +} + +int main() { + WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", + ARRAY_TO_STRING(google_message1_dat)); + WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", + ARRAY_TO_STRING(google_message1_dat)); + + // Not in proto3 because it has a group, which is not supported. + WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", + ARRAY_TO_STRING(google_message2_dat)); +} diff --git a/configure.ac b/configure.ac index 33a6c64d6f..d56a704705 100644 --- a/configure.ac +++ b/configure.ac @@ -180,5 +180,5 @@ export CFLAGS export CXXFLAGS AC_CONFIG_SUBDIRS([gmock]) -AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) +AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) AC_OUTPUT From 30a2f70eb33a216c53c56f765f09aea63c0cf53b Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 27 Apr 2016 18:34:33 -0700 Subject: [PATCH 2/7] Added README describing the directory. --- benchmarks/README.md | 28 ++++++++++++++++++++++++++++ benchmarks/benchmarks.proto | 8 +++++--- 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 benchmarks/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..c902780582 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,28 @@ + +# Protocol Buffers Benchmarks + +This directory contains benchmarking schemas and data sets that you +can use to test a variety of performance scenarios against your +protobuf language runtime. + +The schema for the datasets is described in `benchmarks.proto`. + +Generate the data sets like so: + +``` +$ make +$ ./generate-datasets +Wrote dataset: dataset.google_message1_proto3.pb +Wrote dataset: dataset.google_message1_proto2.pb +Wrote dataset: dataset.google_message2.pb +$ +``` + +Each data set will be written to its own file. Benchmarks will +likely want to run several benchmarks against each data set (parse, +serialize, possibly JSON, possibly using different APIs, etc). + +We would like to add more data sets. In general we will favor data sets +that make the overall suite diverse without being too large or having +too many similar tests. Ideally everyone can run through the entire +suite without the test run getting too long. diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index a891eb9ec7..5c2706df50 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -38,10 +38,12 @@ message BenchmarkDataset { string name = 1; // Fully-qualified name of the protobuf message for this dataset. - // It will be one of the messages defined benchmark_messages.proto. + // It will be one of the messages defined benchmark_messages_proto2.proto + // or benchmark_messages_proto3.proto. + // // Implementations that do not support reflection can implement this with - // an explicit "if/else" chain that lists every possible message defined - // in this file. + // an explicit "if/else" chain that lists every known message defined + // in those files. string message_name = 2; // The payload(s) for this dataset. They should be parsed or serialized From 1ce5bd8e84085a3f89017d71442c75fd4a8dc9f6 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 09:39:26 -0700 Subject: [PATCH 3/7] Updates for PR comments. --- benchmarks/Makefile.am | 3 ++- benchmarks/benchmarks.proto | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 79581ee983..f0ed489937 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -27,8 +27,9 @@ nodist_generate_datasets_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) -# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check" +# Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. +# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h $(benchmarks_protoc_outputs): protoc_middleman diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index 5c2706df50..0ac3bf3383 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -68,16 +68,16 @@ message Metric { // A unique ID for these results. Used for de-duping. string guid = 1; - // The tags specify exactly what benchmark was run against the dataset. + // The labels specify exactly what benchmark was run against the dataset. // The specific benchmark suite can decide what these mean, but here are - // some common tags that have a predefined meaning: + // some common labels that have a predefined meaning: // // - "dataset": for tests that pertain to a specific dataset. // // For example: // // # Tests parsing from binary proto string using arenas. - // tags={ + // labels={ // dataset: "testalltypes", // op: "parse", // format: "binaryproto", @@ -86,7 +86,7 @@ message Metric { // } // // # Tests serializing to JSON string. - // tags={ + // labels={ // dataset: "testalltypes", // op: "serialize", // format: "json", From cb36bde04716436fc9560ac908ca4551bdc614fb Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 09:52:20 -0700 Subject: [PATCH 4/7] Make the C++ tests build the benchmarking code. --- tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests.sh b/tests.sh index fd81b764dc..75bf9001c6 100755 --- a/tests.sh +++ b/tests.sh @@ -36,6 +36,9 @@ build_cpp() { internal_build_cpp make check -j2 cd conformance && make test_cpp && cd .. + + # Verify benchmarking code can build successfully. + cd benchmarks && make && cd .. } build_cpp_distcheck() { From 49a8918e9742d4bc9f577df9599061e342516b96 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 10:19:03 -0700 Subject: [PATCH 5/7] Read files directly from filesystem since xxd isn't always available. --- benchmarks/Makefile.am | 12 +----------- benchmarks/generate_datasets.cc | 18 +++++++++++++----- tests.sh | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index f0ed489937..1e162eb1ef 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -22,30 +22,20 @@ generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la generate_datasets_SOURCES = generate_datasets.cc generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) nodist_generate_datasets_SOURCES = \ - google_message1.h \ - google_message2.h \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) # Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. # See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually -generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h +generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h $(benchmarks_protoc_outputs): protoc_middleman $(benchmarks_protoc_outputs_proto2): protoc_middleman2 -google_message1.h: google_message1.dat - xxd -i $< $@ - -google_message2.h: google_message2.dat - xxd -i $< $@ - CLEANFILES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ - google_message1.h \ - google_message2.h \ protoc_middleman \ protoc_middleman2 \ dataset.* diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index f6f30cd85b..8e9b441cf7 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -34,8 +34,6 @@ const char *file_suffix = ".pb"; #include #include #include "benchmarks.pb.h" -#include "google_message1.h" -#include "google_message2.h" using benchmarks::BenchmarkDataset; using google::protobuf::Descriptor; @@ -102,13 +100,23 @@ void WriteFile(const std::string& name, const std::string& message_name, WriteFileWithPayloads(name, message_name, payloads); } +std::string ReadFile(const std::string& name) { + std::ifstream file(name); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << + "', please make sure you are running " + "this command from the benchmarks/ " + "directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + int main() { WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", - ARRAY_TO_STRING(google_message1_dat)); + ReadFile("google_message1.dat")); WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", - ARRAY_TO_STRING(google_message1_dat)); + ReadFile("google_message1.dat")); // Not in proto3 because it has a group, which is not supported. WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", - ARRAY_TO_STRING(google_message2_dat)); + ReadFile("google_message2.dat")); } diff --git a/tests.sh b/tests.sh index 75bf9001c6..6a9439a551 100755 --- a/tests.sh +++ b/tests.sh @@ -38,7 +38,7 @@ build_cpp() { cd conformance && make test_cpp && cd .. # Verify benchmarking code can build successfully. - cd benchmarks && make && cd .. + cd benchmarks && make && ./generate-datasets && cd .. } build_cpp_distcheck() { From b2d4b1a528a4b2e808ac2924e0552e4ea94c1d87 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 10:22:56 -0700 Subject: [PATCH 6/7] Fixed for pre-C++11 ifstream which does not accept std::string. --- benchmarks/generate_datasets.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index 8e9b441cf7..dab635e77d 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -86,7 +86,7 @@ void WriteFileWithPayloads(const std::string& name, std::ofstream writer; std::string fname = file_prefix + name + file_suffix; - writer.open(fname); + writer.open(fname.c_str()); writer << serialized; writer.close(); @@ -101,7 +101,7 @@ void WriteFile(const std::string& name, const std::string& message_name, } std::string ReadFile(const std::string& name) { - std::ifstream file(name); + std::ifstream file(name.c_str()); GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << "', please make sure you are running " "this command from the benchmarks/ " From 247ef1f0df4ebb08a2bd8d47912a9e42b88abdc2 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 3 May 2016 12:53:49 -0700 Subject: [PATCH 7/7] Addressed PR comments. --- benchmarks/benchmark_messages_proto2.proto | 2 +- benchmarks/benchmark_messages_proto3.proto | 2 +- benchmarks/benchmarks.proto | 41 ---------------------- benchmarks/generate_datasets.cc | 19 ++++------ 4 files changed, 9 insertions(+), 55 deletions(-) diff --git a/benchmarks/benchmark_messages_proto2.proto b/benchmarks/benchmark_messages_proto2.proto index c7103be5f6..01f67a1af3 100644 --- a/benchmarks/benchmark_messages_proto2.proto +++ b/benchmarks/benchmark_messages_proto2.proto @@ -2,7 +2,7 @@ syntax = "proto2"; -package benchmarks.p2; +package benchmarks.proto2; option java_package = "com.google.protobuf.benchmarks"; // This is the default, but we specify it here explicitly. diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto index 4ea39c22f2..32f586986b 100644 --- a/benchmarks/benchmark_messages_proto3.proto +++ b/benchmarks/benchmark_messages_proto3.proto @@ -2,7 +2,7 @@ syntax = "proto3"; -package benchmarks.p3; +package benchmarks.proto3; option java_package = "com.google.protobuf.benchmarks"; // This is the default, but we specify it here explicitly. diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index 0ac3bf3383..51c0b54877 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -61,44 +61,3 @@ message BenchmarkDataset { // good branch prediction performance. repeated bytes payload = 3; } - -// A benchmark can write out metrics that we will then upload to our metrics -// database for tracking over time. -message Metric { - // A unique ID for these results. Used for de-duping. - string guid = 1; - - // The labels specify exactly what benchmark was run against the dataset. - // The specific benchmark suite can decide what these mean, but here are - // some common labels that have a predefined meaning: - // - // - "dataset": for tests that pertain to a specific dataset. - // - // For example: - // - // # Tests parsing from binary proto string using arenas. - // labels={ - // dataset: "testalltypes", - // op: "parse", - // format: "binaryproto", - // input: "string" - // arena: "true" - // } - // - // # Tests serializing to JSON string. - // labels={ - // dataset: "testalltypes", - // op: "serialize", - // format: "json", - // input: "string" - // } - map labels = 2; - - // Unit of measurement for the metric: - // - a speed test might be "mb_per_second" or "ops_per_second" - // - a size test might be "kb". - string unit = 3; - - // Metric value. - double value = 4; -} diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index dab635e77d..61e7adf1ba 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -28,9 +28,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -const char *file_prefix = "dataset."; -const char *file_suffix = ".pb"; - #include #include #include "benchmarks.pb.h" @@ -41,10 +38,11 @@ using google::protobuf::DescriptorPool; using google::protobuf::Message; using google::protobuf::MessageFactory; -#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr)) - std::set names; +const char *file_prefix = "dataset."; +const char *file_suffix = ".pb"; + void WriteFileWithPayloads(const std::string& name, const std::string& message_name, const std::vector& payload) { @@ -81,13 +79,10 @@ void WriteFileWithPayloads(const std::string& name, dataset.add_payload()->assign(payload[i]); } - std::string serialized; - dataset.SerializeToString(&serialized); - std::ofstream writer; std::string fname = file_prefix + name + file_suffix; writer.open(fname.c_str()); - writer << serialized; + dataset.SerializeToOstream(&writer); writer.close(); std::cerr << "Wrote dataset: " << fname << "\n"; @@ -111,12 +106,12 @@ std::string ReadFile(const std::string& name) { } int main() { - WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", + WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1", ReadFile("google_message1.dat")); - WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", + WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1", ReadFile("google_message1.dat")); // Not in proto3 because it has a group, which is not supported. - WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", + WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2", ReadFile("google_message2.dat")); }