Merge pull request #1464 from google/benchmarks
Added framework for generating/consuming benchmarking data sets.pull/1482/head
commit
07bcf21a9c
9 changed files with 370 additions and 10 deletions
@ -0,0 +1,66 @@ |
||||
|
||||
benchmarks_protoc_inputs = \
|
||||
benchmarks.proto \
|
||||
benchmark_messages_proto3.proto
|
||||
|
||||
benchmarks_protoc_inputs_proto2 = \
|
||||
benchmark_messages_proto2.proto
|
||||
|
||||
benchmarks_protoc_outputs = \
|
||||
benchmarks.pb.cc \
|
||||
benchmarks.pb.h \
|
||||
benchmark_messages_proto3.pb.cc \
|
||||
benchmark_messages_proto3.pb.h
|
||||
|
||||
benchmarks_protoc_outputs_proto2 = \
|
||||
benchmark_messages_proto2.pb.cc \
|
||||
benchmark_messages_proto2.pb.h
|
||||
|
||||
bin_PROGRAMS = generate-datasets
|
||||
|
||||
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
|
||||
generate_datasets_SOURCES = generate_datasets.cc
|
||||
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
|
||||
nodist_generate_datasets_SOURCES = \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2)
|
||||
|
||||
# Explicit deps because BUILT_SOURCES are only done before a "make all/check"
|
||||
# so a direct "make test_cpp" could fail if parallel enough.
|
||||
# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually
|
||||
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h |
||||
|
||||
$(benchmarks_protoc_outputs): protoc_middleman |
||||
$(benchmarks_protoc_outputs_proto2): protoc_middleman2 |
||||
|
||||
CLEANFILES = \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2) \
|
||||
protoc_middleman \
|
||||
protoc_middleman2 \
|
||||
dataset.*
|
||||
|
||||
if USE_EXTERNAL_PROTOC |
||||
|
||||
protoc_middleman: $(benchmarks_protoc_inputs) |
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(benchmarks_protoc_inputs_proto2) |
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
|
||||
touch protoc_middleman2
|
||||
|
||||
else |
||||
|
||||
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
|
||||
# relative to srcdir, which may not be the same as the current directory when
|
||||
# building out-of-tree.
|
||||
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) |
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) |
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
|
||||
touch protoc_middleman
|
||||
|
||||
endif |
@ -0,0 +1,28 @@ |
||||
|
||||
# Protocol Buffers Benchmarks |
||||
|
||||
This directory contains benchmarking schemas and data sets that you |
||||
can use to test a variety of performance scenarios against your |
||||
protobuf language runtime. |
||||
|
||||
The schema for the datasets is described in `benchmarks.proto`. |
||||
|
||||
Generate the data sets like so: |
||||
|
||||
``` |
||||
$ make |
||||
$ ./generate-datasets |
||||
Wrote dataset: dataset.google_message1_proto3.pb |
||||
Wrote dataset: dataset.google_message1_proto2.pb |
||||
Wrote dataset: dataset.google_message2.pb |
||||
$ |
||||
``` |
||||
|
||||
Each data set will be written to its own file. Benchmarks will |
||||
likely want to run several benchmarks against each data set (parse, |
||||
serialize, possibly JSON, possibly using different APIs, etc). |
||||
|
||||
We would like to add more data sets. In general we will favor data sets |
||||
that make the overall suite diverse without being too large or having |
||||
too many similar tests. Ideally everyone can run through the entire |
||||
suite without the test run getting too long. |
@ -0,0 +1,76 @@ |
||||
// Benchmark messages for proto3. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package benchmarks.proto3; |
||||
option java_package = "com.google.protobuf.benchmarks"; |
||||
|
||||
// This is the default, but we specify it here explicitly. |
||||
option optimize_for = SPEED; |
||||
|
||||
message GoogleMessage1 { |
||||
string field1 = 1; |
||||
string field9 = 9; |
||||
string field18 = 18; |
||||
bool field80 = 80; |
||||
bool field81 = 81; |
||||
int32 field2 = 2; |
||||
int32 field3 = 3; |
||||
int32 field280 = 280; |
||||
int32 field6 = 6; |
||||
int64 field22 = 22; |
||||
string field4 = 4; |
||||
repeated fixed64 field5 = 5; |
||||
bool field59 = 59; |
||||
string field7 = 7; |
||||
int32 field16 = 16; |
||||
int32 field130 = 130; |
||||
bool field12 = 12; |
||||
bool field17 = 17; |
||||
bool field13 = 13; |
||||
bool field14 = 14; |
||||
int32 field104 = 104; |
||||
int32 field100 = 100; |
||||
int32 field101 = 101; |
||||
string field102 = 102; |
||||
string field103 = 103; |
||||
int32 field29 = 29; |
||||
bool field30 = 30; |
||||
int32 field60 = 60; |
||||
int32 field271 = 271; |
||||
int32 field272 = 272; |
||||
int32 field150 = 150; |
||||
int32 field23 = 23; |
||||
bool field24 = 24; |
||||
int32 field25 = 25; |
||||
GoogleMessage1SubMessage field15 = 15; |
||||
bool field78 = 78; |
||||
int32 field67 = 67; |
||||
int32 field68 = 68; |
||||
int32 field128 = 128; |
||||
string field129 = 129; |
||||
int32 field131 = 131; |
||||
} |
||||
|
||||
message GoogleMessage1SubMessage { |
||||
int32 field1 = 1; |
||||
int32 field2 = 2; |
||||
int32 field3 = 3; |
||||
string field15 = 15; |
||||
bool field12 = 12; |
||||
int64 field13 = 13; |
||||
int64 field14 = 14; |
||||
int32 field16 = 16; |
||||
int32 field19 = 19; |
||||
bool field20 = 20; |
||||
bool field28 = 28; |
||||
fixed64 field21 = 21; |
||||
int32 field22 = 22; |
||||
bool field23 = 23; |
||||
bool field206 = 206; |
||||
fixed32 field203 = 203; |
||||
int32 field204 = 204; |
||||
string field205 = 205; |
||||
uint64 field207 = 207; |
||||
uint64 field300 = 300; |
||||
} |
@ -0,0 +1,63 @@ |
||||
// Protocol Buffers - Google's data interchange format |
||||
// Copyright 2008 Google Inc. All rights reserved. |
||||
// https://developers.google.com/protocol-buffers/ |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without |
||||
// modification, are permitted provided that the following conditions are |
||||
// met: |
||||
// |
||||
// * Redistributions of source code must retain the above copyright |
||||
// notice, this list of conditions and the following disclaimer. |
||||
// * Redistributions in binary form must reproduce the above |
||||
// copyright notice, this list of conditions and the following disclaimer |
||||
// in the documentation and/or other materials provided with the |
||||
// distribution. |
||||
// * Neither the name of Google Inc. nor the names of its |
||||
// contributors may be used to endorse or promote products derived from |
||||
// this software without specific prior written permission. |
||||
// |
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
|
||||
syntax = "proto3"; |
||||
package benchmarks; |
||||
option java_package = "com.google.protobuf.benchmarks"; |
||||
|
||||
message BenchmarkDataset { |
||||
// Name of the benchmark dataset. This should be unique across all datasets. |
||||
// Should only contain word characters: [a-zA-Z0-9_] |
||||
string name = 1; |
||||
|
||||
// Fully-qualified name of the protobuf message for this dataset. |
||||
// It will be one of the messages defined benchmark_messages_proto2.proto |
||||
// or benchmark_messages_proto3.proto. |
||||
// |
||||
// Implementations that do not support reflection can implement this with |
||||
// an explicit "if/else" chain that lists every known message defined |
||||
// in those files. |
||||
string message_name = 2; |
||||
|
||||
// The payload(s) for this dataset. They should be parsed or serialized |
||||
// in sequence, in a loop, ie. |
||||
// |
||||
// while (!benchmarkDone) { // Benchmark runner decides when to exit. |
||||
// for (i = 0; i < benchmark.payload.length; i++) { |
||||
// parse(benchmark.payload[i]) |
||||
// } |
||||
// } |
||||
// |
||||
// This is intended to let datasets include a variety of data to provide |
||||
// potentially more realistic results than just parsing the same message |
||||
// over and over. A single message parsed repeatedly could yield unusually |
||||
// good branch prediction performance. |
||||
repeated bytes payload = 3; |
||||
} |
@ -0,0 +1,117 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include "benchmarks.pb.h" |
||||
|
||||
using benchmarks::BenchmarkDataset; |
||||
using google::protobuf::Descriptor; |
||||
using google::protobuf::DescriptorPool; |
||||
using google::protobuf::Message; |
||||
using google::protobuf::MessageFactory; |
||||
|
||||
std::set<std::string> names; |
||||
|
||||
const char *file_prefix = "dataset."; |
||||
const char *file_suffix = ".pb"; |
||||
|
||||
void WriteFileWithPayloads(const std::string& name, |
||||
const std::string& message_name, |
||||
const std::vector<std::string>& payload) { |
||||
if (!names.insert(name).second) { |
||||
std::cerr << "Duplicate test name: " << name << "\n"; |
||||
abort(); |
||||
} |
||||
|
||||
// First verify that this message name exists in our set of benchmark messages
|
||||
// and that these payloads are valid for the given message.
|
||||
const Descriptor* d = |
||||
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); |
||||
|
||||
if (!d) { |
||||
std::cerr << "For dataset " << name << ", no such message: " |
||||
<< message_name << "\n"; |
||||
abort(); |
||||
} |
||||
|
||||
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); |
||||
|
||||
for (size_t i = 0; i < payload.size(); i++) { |
||||
if (!m->ParseFromString(payload[i])) { |
||||
std::cerr << "For dataset " << name << ", payload[" << i << "] fails " |
||||
<< "to parse\n"; |
||||
abort(); |
||||
} |
||||
} |
||||
|
||||
BenchmarkDataset dataset; |
||||
dataset.set_name(name); |
||||
dataset.set_message_name(message_name); |
||||
for (size_t i = 0; i < payload.size(); i++) { |
||||
dataset.add_payload()->assign(payload[i]); |
||||
} |
||||
|
||||
std::ofstream writer; |
||||
std::string fname = file_prefix + name + file_suffix; |
||||
writer.open(fname.c_str()); |
||||
dataset.SerializeToOstream(&writer); |
||||
writer.close(); |
||||
|
||||
std::cerr << "Wrote dataset: " << fname << "\n"; |
||||
} |
||||
|
||||
void WriteFile(const std::string& name, const std::string& message_name, |
||||
const std::string& payload) { |
||||
std::vector<std::string> payloads; |
||||
payloads.push_back(payload); |
||||
WriteFileWithPayloads(name, message_name, payloads); |
||||
} |
||||
|
||||
std::string ReadFile(const std::string& name) { |
||||
std::ifstream file(name.c_str()); |
||||
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << |
||||
"', please make sure you are running " |
||||
"this command from the benchmarks/ " |
||||
"directory.\n"; |
||||
return std::string((std::istreambuf_iterator<char>(file)), |
||||
std::istreambuf_iterator<char>()); |
||||
} |
||||
|
||||
int main() { |
||||
WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1", |
||||
ReadFile("google_message1.dat")); |
||||
WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1", |
||||
ReadFile("google_message1.dat")); |
||||
|
||||
// Not in proto3 because it has a group, which is not supported.
|
||||
WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2", |
||||
ReadFile("google_message2.dat")); |
||||
} |
Loading…
Reference in new issue