Merge pull request #1464 from google/benchmarks
Added framework for generating/consuming benchmarking data sets.pull/1482/head
commit
07bcf21a9c
9 changed files with 370 additions and 10 deletions
@ -0,0 +1,66 @@ |
|||||||
|
|
||||||
|
benchmarks_protoc_inputs = \
|
||||||
|
benchmarks.proto \
|
||||||
|
benchmark_messages_proto3.proto
|
||||||
|
|
||||||
|
benchmarks_protoc_inputs_proto2 = \
|
||||||
|
benchmark_messages_proto2.proto
|
||||||
|
|
||||||
|
benchmarks_protoc_outputs = \
|
||||||
|
benchmarks.pb.cc \
|
||||||
|
benchmarks.pb.h \
|
||||||
|
benchmark_messages_proto3.pb.cc \
|
||||||
|
benchmark_messages_proto3.pb.h
|
||||||
|
|
||||||
|
benchmarks_protoc_outputs_proto2 = \
|
||||||
|
benchmark_messages_proto2.pb.cc \
|
||||||
|
benchmark_messages_proto2.pb.h
|
||||||
|
|
||||||
|
bin_PROGRAMS = generate-datasets
|
||||||
|
|
||||||
|
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
|
||||||
|
generate_datasets_SOURCES = generate_datasets.cc
|
||||||
|
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
|
||||||
|
nodist_generate_datasets_SOURCES = \
|
||||||
|
$(benchmarks_protoc_outputs) \
|
||||||
|
$(benchmarks_protoc_outputs_proto2)
|
||||||
|
|
||||||
|
# Explicit deps because BUILT_SOURCES are only done before a "make all/check"
|
||||||
|
# so a direct "make test_cpp" could fail if parallel enough.
|
||||||
|
# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually
|
||||||
|
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h |
||||||
|
|
||||||
|
$(benchmarks_protoc_outputs): protoc_middleman |
||||||
|
$(benchmarks_protoc_outputs_proto2): protoc_middleman2 |
||||||
|
|
||||||
|
CLEANFILES = \
|
||||||
|
$(benchmarks_protoc_outputs) \
|
||||||
|
$(benchmarks_protoc_outputs_proto2) \
|
||||||
|
protoc_middleman \
|
||||||
|
protoc_middleman2 \
|
||||||
|
dataset.*
|
||||||
|
|
||||||
|
if USE_EXTERNAL_PROTOC |
||||||
|
|
||||||
|
protoc_middleman: $(benchmarks_protoc_inputs) |
||||||
|
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
|
||||||
|
touch protoc_middleman
|
||||||
|
|
||||||
|
protoc_middleman2: $(benchmarks_protoc_inputs_proto2) |
||||||
|
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
|
||||||
|
touch protoc_middleman2
|
||||||
|
|
||||||
|
else |
||||||
|
|
||||||
|
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
|
||||||
|
# relative to srcdir, which may not be the same as the current directory when
|
||||||
|
# building out-of-tree.
|
||||||
|
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) |
||||||
|
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
|
||||||
|
touch protoc_middleman
|
||||||
|
|
||||||
|
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) |
||||||
|
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
|
||||||
|
touch protoc_middleman
|
||||||
|
|
||||||
|
endif |
@ -0,0 +1,28 @@ |
|||||||
|
|
||||||
|
# Protocol Buffers Benchmarks |
||||||
|
|
||||||
|
This directory contains benchmarking schemas and data sets that you |
||||||
|
can use to test a variety of performance scenarios against your |
||||||
|
protobuf language runtime. |
||||||
|
|
||||||
|
The schema for the datasets is described in `benchmarks.proto`. |
||||||
|
|
||||||
|
Generate the data sets like so: |
||||||
|
|
||||||
|
``` |
||||||
|
$ make |
||||||
|
$ ./generate-datasets |
||||||
|
Wrote dataset: dataset.google_message1_proto3.pb |
||||||
|
Wrote dataset: dataset.google_message1_proto2.pb |
||||||
|
Wrote dataset: dataset.google_message2.pb |
||||||
|
$ |
||||||
|
``` |
||||||
|
|
||||||
|
Each data set will be written to its own file. Benchmarks will |
||||||
|
likely want to run several benchmarks against each data set (parse, |
||||||
|
serialize, possibly JSON, possibly using different APIs, etc). |
||||||
|
|
||||||
|
We would like to add more data sets. In general we will favor data sets |
||||||
|
that make the overall suite diverse without being too large or having |
||||||
|
too many similar tests. Ideally everyone can run through the entire |
||||||
|
suite without the test run getting too long. |
@ -0,0 +1,76 @@ |
|||||||
|
// Benchmark messages for proto3. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
|
||||||
|
package benchmarks.proto3; |
||||||
|
option java_package = "com.google.protobuf.benchmarks"; |
||||||
|
|
||||||
|
// This is the default, but we specify it here explicitly. |
||||||
|
option optimize_for = SPEED; |
||||||
|
|
||||||
|
message GoogleMessage1 { |
||||||
|
string field1 = 1; |
||||||
|
string field9 = 9; |
||||||
|
string field18 = 18; |
||||||
|
bool field80 = 80; |
||||||
|
bool field81 = 81; |
||||||
|
int32 field2 = 2; |
||||||
|
int32 field3 = 3; |
||||||
|
int32 field280 = 280; |
||||||
|
int32 field6 = 6; |
||||||
|
int64 field22 = 22; |
||||||
|
string field4 = 4; |
||||||
|
repeated fixed64 field5 = 5; |
||||||
|
bool field59 = 59; |
||||||
|
string field7 = 7; |
||||||
|
int32 field16 = 16; |
||||||
|
int32 field130 = 130; |
||||||
|
bool field12 = 12; |
||||||
|
bool field17 = 17; |
||||||
|
bool field13 = 13; |
||||||
|
bool field14 = 14; |
||||||
|
int32 field104 = 104; |
||||||
|
int32 field100 = 100; |
||||||
|
int32 field101 = 101; |
||||||
|
string field102 = 102; |
||||||
|
string field103 = 103; |
||||||
|
int32 field29 = 29; |
||||||
|
bool field30 = 30; |
||||||
|
int32 field60 = 60; |
||||||
|
int32 field271 = 271; |
||||||
|
int32 field272 = 272; |
||||||
|
int32 field150 = 150; |
||||||
|
int32 field23 = 23; |
||||||
|
bool field24 = 24; |
||||||
|
int32 field25 = 25; |
||||||
|
GoogleMessage1SubMessage field15 = 15; |
||||||
|
bool field78 = 78; |
||||||
|
int32 field67 = 67; |
||||||
|
int32 field68 = 68; |
||||||
|
int32 field128 = 128; |
||||||
|
string field129 = 129; |
||||||
|
int32 field131 = 131; |
||||||
|
} |
||||||
|
|
||||||
|
message GoogleMessage1SubMessage { |
||||||
|
int32 field1 = 1; |
||||||
|
int32 field2 = 2; |
||||||
|
int32 field3 = 3; |
||||||
|
string field15 = 15; |
||||||
|
bool field12 = 12; |
||||||
|
int64 field13 = 13; |
||||||
|
int64 field14 = 14; |
||||||
|
int32 field16 = 16; |
||||||
|
int32 field19 = 19; |
||||||
|
bool field20 = 20; |
||||||
|
bool field28 = 28; |
||||||
|
fixed64 field21 = 21; |
||||||
|
int32 field22 = 22; |
||||||
|
bool field23 = 23; |
||||||
|
bool field206 = 206; |
||||||
|
fixed32 field203 = 203; |
||||||
|
int32 field204 = 204; |
||||||
|
string field205 = 205; |
||||||
|
uint64 field207 = 207; |
||||||
|
uint64 field300 = 300; |
||||||
|
} |
@ -0,0 +1,63 @@ |
|||||||
|
// Protocol Buffers - Google's data interchange format |
||||||
|
// Copyright 2008 Google Inc. All rights reserved. |
||||||
|
// https://developers.google.com/protocol-buffers/ |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without |
||||||
|
// modification, are permitted provided that the following conditions are |
||||||
|
// met: |
||||||
|
// |
||||||
|
// * Redistributions of source code must retain the above copyright |
||||||
|
// notice, this list of conditions and the following disclaimer. |
||||||
|
// * Redistributions in binary form must reproduce the above |
||||||
|
// copyright notice, this list of conditions and the following disclaimer |
||||||
|
// in the documentation and/or other materials provided with the |
||||||
|
// distribution. |
||||||
|
// * Neither the name of Google Inc. nor the names of its |
||||||
|
// contributors may be used to endorse or promote products derived from |
||||||
|
// this software without specific prior written permission. |
||||||
|
// |
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||||||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||||||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||||||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||||||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||||||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||||
|
|
||||||
|
syntax = "proto3"; |
||||||
|
package benchmarks; |
||||||
|
option java_package = "com.google.protobuf.benchmarks"; |
||||||
|
|
||||||
|
message BenchmarkDataset { |
||||||
|
// Name of the benchmark dataset. This should be unique across all datasets. |
||||||
|
// Should only contain word characters: [a-zA-Z0-9_] |
||||||
|
string name = 1; |
||||||
|
|
||||||
|
// Fully-qualified name of the protobuf message for this dataset. |
||||||
|
// It will be one of the messages defined benchmark_messages_proto2.proto |
||||||
|
// or benchmark_messages_proto3.proto. |
||||||
|
// |
||||||
|
// Implementations that do not support reflection can implement this with |
||||||
|
// an explicit "if/else" chain that lists every known message defined |
||||||
|
// in those files. |
||||||
|
string message_name = 2; |
||||||
|
|
||||||
|
// The payload(s) for this dataset. They should be parsed or serialized |
||||||
|
// in sequence, in a loop, ie. |
||||||
|
// |
||||||
|
// while (!benchmarkDone) { // Benchmark runner decides when to exit. |
||||||
|
// for (i = 0; i < benchmark.payload.length; i++) { |
||||||
|
// parse(benchmark.payload[i]) |
||||||
|
// } |
||||||
|
// } |
||||||
|
// |
||||||
|
// This is intended to let datasets include a variety of data to provide |
||||||
|
// potentially more realistic results than just parsing the same message |
||||||
|
// over and over. A single message parsed repeatedly could yield unusually |
||||||
|
// good branch prediction performance. |
||||||
|
repeated bytes payload = 3; |
||||||
|
} |
@ -0,0 +1,117 @@ |
|||||||
|
// Protocol Buffers - Google's data interchange format
|
||||||
|
// Copyright 2008 Google Inc. All rights reserved.
|
||||||
|
// https://developers.google.com/protocol-buffers/
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
// * Redistributions in binary form must reproduce the above
|
||||||
|
// copyright notice, this list of conditions and the following disclaimer
|
||||||
|
// in the documentation and/or other materials provided with the
|
||||||
|
// distribution.
|
||||||
|
// * Neither the name of Google Inc. nor the names of its
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
#include <fstream> |
||||||
|
#include <iostream> |
||||||
|
#include "benchmarks.pb.h" |
||||||
|
|
||||||
|
using benchmarks::BenchmarkDataset; |
||||||
|
using google::protobuf::Descriptor; |
||||||
|
using google::protobuf::DescriptorPool; |
||||||
|
using google::protobuf::Message; |
||||||
|
using google::protobuf::MessageFactory; |
||||||
|
|
||||||
|
std::set<std::string> names; |
||||||
|
|
||||||
|
const char *file_prefix = "dataset."; |
||||||
|
const char *file_suffix = ".pb"; |
||||||
|
|
||||||
|
void WriteFileWithPayloads(const std::string& name, |
||||||
|
const std::string& message_name, |
||||||
|
const std::vector<std::string>& payload) { |
||||||
|
if (!names.insert(name).second) { |
||||||
|
std::cerr << "Duplicate test name: " << name << "\n"; |
||||||
|
abort(); |
||||||
|
} |
||||||
|
|
||||||
|
// First verify that this message name exists in our set of benchmark messages
|
||||||
|
// and that these payloads are valid for the given message.
|
||||||
|
const Descriptor* d = |
||||||
|
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); |
||||||
|
|
||||||
|
if (!d) { |
||||||
|
std::cerr << "For dataset " << name << ", no such message: " |
||||||
|
<< message_name << "\n"; |
||||||
|
abort(); |
||||||
|
} |
||||||
|
|
||||||
|
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); |
||||||
|
|
||||||
|
for (size_t i = 0; i < payload.size(); i++) { |
||||||
|
if (!m->ParseFromString(payload[i])) { |
||||||
|
std::cerr << "For dataset " << name << ", payload[" << i << "] fails " |
||||||
|
<< "to parse\n"; |
||||||
|
abort(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
BenchmarkDataset dataset; |
||||||
|
dataset.set_name(name); |
||||||
|
dataset.set_message_name(message_name); |
||||||
|
for (size_t i = 0; i < payload.size(); i++) { |
||||||
|
dataset.add_payload()->assign(payload[i]); |
||||||
|
} |
||||||
|
|
||||||
|
std::ofstream writer; |
||||||
|
std::string fname = file_prefix + name + file_suffix; |
||||||
|
writer.open(fname.c_str()); |
||||||
|
dataset.SerializeToOstream(&writer); |
||||||
|
writer.close(); |
||||||
|
|
||||||
|
std::cerr << "Wrote dataset: " << fname << "\n"; |
||||||
|
} |
||||||
|
|
||||||
|
void WriteFile(const std::string& name, const std::string& message_name, |
||||||
|
const std::string& payload) { |
||||||
|
std::vector<std::string> payloads; |
||||||
|
payloads.push_back(payload); |
||||||
|
WriteFileWithPayloads(name, message_name, payloads); |
||||||
|
} |
||||||
|
|
||||||
|
std::string ReadFile(const std::string& name) { |
||||||
|
std::ifstream file(name.c_str()); |
||||||
|
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << |
||||||
|
"', please make sure you are running " |
||||||
|
"this command from the benchmarks/ " |
||||||
|
"directory.\n"; |
||||||
|
return std::string((std::istreambuf_iterator<char>(file)), |
||||||
|
std::istreambuf_iterator<char>()); |
||||||
|
} |
||||||
|
|
||||||
|
int main() { |
||||||
|
WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1", |
||||||
|
ReadFile("google_message1.dat")); |
||||||
|
WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1", |
||||||
|
ReadFile("google_message1.dat")); |
||||||
|
|
||||||
|
// Not in proto3 because it has a group, which is not supported.
|
||||||
|
WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2", |
||||||
|
ReadFile("google_message2.dat")); |
||||||
|
} |
Loading…
Reference in new issue