This takes the code that was sitting in benchmarks/ already and makes it easier for language-specific benchmarks to consume. Future PRs will enhance this so that the language-specific benchmarks can report metrics back that will be tracked over time in PerfKit.pull/1464/head
parent
f53f911793
commit
2e83110230
7 changed files with 384 additions and 10 deletions
@ -0,0 +1,75 @@ |
||||
|
||||
benchmarks_protoc_inputs = \
|
||||
benchmarks.proto \
|
||||
benchmark_messages_proto3.proto
|
||||
|
||||
benchmarks_protoc_inputs_proto2 = \
|
||||
benchmark_messages_proto2.proto
|
||||
|
||||
benchmarks_protoc_outputs = \
|
||||
benchmarks.pb.cc \
|
||||
benchmarks.pb.h \
|
||||
benchmark_messages_proto3.pb.cc \
|
||||
benchmark_messages_proto3.pb.h
|
||||
|
||||
benchmarks_protoc_outputs_proto2 = \
|
||||
benchmark_messages_proto2.pb.cc \
|
||||
benchmark_messages_proto2.pb.h
|
||||
|
||||
bin_PROGRAMS = generate-datasets
|
||||
|
||||
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
|
||||
generate_datasets_SOURCES = generate_datasets.cc
|
||||
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
|
||||
nodist_generate_datasets_SOURCES = \
|
||||
google_message1.h \
|
||||
google_message2.h \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2)
|
||||
|
||||
# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check"
|
||||
# so a direct "make test_cpp" could fail if parallel enough.
|
||||
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h |
||||
|
||||
$(benchmarks_protoc_outputs): protoc_middleman |
||||
$(benchmarks_protoc_outputs_proto2): protoc_middleman2 |
||||
|
||||
google_message1.h: google_message1.dat |
||||
xxd -i $< $@
|
||||
|
||||
google_message2.h: google_message2.dat |
||||
xxd -i $< $@
|
||||
|
||||
CLEANFILES = \
|
||||
$(benchmarks_protoc_outputs) \
|
||||
$(benchmarks_protoc_outputs_proto2) \
|
||||
google_message1.h \
|
||||
google_message2.h \
|
||||
protoc_middleman \
|
||||
protoc_middleman2 \
|
||||
dataset.*
|
||||
|
||||
if USE_EXTERNAL_PROTOC |
||||
|
||||
protoc_middleman: $(benchmarks_protoc_inputs) |
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(benchmarks_protoc_inputs_proto2) |
||||
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
|
||||
touch protoc_middleman2
|
||||
|
||||
else |
||||
|
||||
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
|
||||
# relative to srcdir, which may not be the same as the current directory when
|
||||
# building out-of-tree.
|
||||
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) |
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
|
||||
touch protoc_middleman
|
||||
|
||||
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) |
||||
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
|
||||
touch protoc_middleman
|
||||
|
||||
endif |
@ -0,0 +1,76 @@ |
||||
// Benchmark messages for proto3. |
||||
|
||||
syntax = "proto3"; |
||||
|
||||
package benchmarks.p3; |
||||
option java_package = "com.google.protobuf.benchmarks"; |
||||
|
||||
// This is the default, but we specify it here explicitly. |
||||
option optimize_for = SPEED; |
||||
|
||||
message GoogleMessage1 { |
||||
string field1 = 1; |
||||
string field9 = 9; |
||||
string field18 = 18; |
||||
bool field80 = 80; |
||||
bool field81 = 81; |
||||
int32 field2 = 2; |
||||
int32 field3 = 3; |
||||
int32 field280 = 280; |
||||
int32 field6 = 6; |
||||
int64 field22 = 22; |
||||
string field4 = 4; |
||||
repeated fixed64 field5 = 5; |
||||
bool field59 = 59; |
||||
string field7 = 7; |
||||
int32 field16 = 16; |
||||
int32 field130 = 130; |
||||
bool field12 = 12; |
||||
bool field17 = 17; |
||||
bool field13 = 13; |
||||
bool field14 = 14; |
||||
int32 field104 = 104; |
||||
int32 field100 = 100; |
||||
int32 field101 = 101; |
||||
string field102 = 102; |
||||
string field103 = 103; |
||||
int32 field29 = 29; |
||||
bool field30 = 30; |
||||
int32 field60 = 60; |
||||
int32 field271 = 271; |
||||
int32 field272 = 272; |
||||
int32 field150 = 150; |
||||
int32 field23 = 23; |
||||
bool field24 = 24; |
||||
int32 field25 = 25; |
||||
GoogleMessage1SubMessage field15 = 15; |
||||
bool field78 = 78; |
||||
int32 field67 = 67; |
||||
int32 field68 = 68; |
||||
int32 field128 = 128; |
||||
string field129 = 129; |
||||
int32 field131 = 131; |
||||
} |
||||
|
||||
message GoogleMessage1SubMessage { |
||||
int32 field1 = 1; |
||||
int32 field2 = 2; |
||||
int32 field3 = 3; |
||||
string field15 = 15; |
||||
bool field12 = 12; |
||||
int64 field13 = 13; |
||||
int64 field14 = 14; |
||||
int32 field16 = 16; |
||||
int32 field19 = 19; |
||||
bool field20 = 20; |
||||
bool field28 = 28; |
||||
fixed64 field21 = 21; |
||||
int32 field22 = 22; |
||||
bool field23 = 23; |
||||
bool field206 = 206; |
||||
fixed32 field203 = 203; |
||||
int32 field204 = 204; |
||||
string field205 = 205; |
||||
uint64 field207 = 207; |
||||
uint64 field300 = 300; |
||||
} |
@ -0,0 +1,102 @@ |
||||
// Protocol Buffers - Google's data interchange format |
||||
// Copyright 2008 Google Inc. All rights reserved. |
||||
// https://developers.google.com/protocol-buffers/ |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without |
||||
// modification, are permitted provided that the following conditions are |
||||
// met: |
||||
// |
||||
// * Redistributions of source code must retain the above copyright |
||||
// notice, this list of conditions and the following disclaimer. |
||||
// * Redistributions in binary form must reproduce the above |
||||
// copyright notice, this list of conditions and the following disclaimer |
||||
// in the documentation and/or other materials provided with the |
||||
// distribution. |
||||
// * Neither the name of Google Inc. nor the names of its |
||||
// contributors may be used to endorse or promote products derived from |
||||
// this software without specific prior written permission. |
||||
// |
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
|
||||
syntax = "proto3"; |
||||
package benchmarks; |
||||
option java_package = "com.google.protobuf.benchmarks"; |
||||
|
||||
message BenchmarkDataset { |
||||
// Name of the benchmark dataset. This should be unique across all datasets. |
||||
// Should only contain word characters: [a-zA-Z0-9_] |
||||
string name = 1; |
||||
|
||||
// Fully-qualified name of the protobuf message for this dataset. |
||||
// It will be one of the messages defined benchmark_messages.proto. |
||||
// Implementations that do not support reflection can implement this with |
||||
// an explicit "if/else" chain that lists every possible message defined |
||||
// in this file. |
||||
string message_name = 2; |
||||
|
||||
// The payload(s) for this dataset. They should be parsed or serialized |
||||
// in sequence, in a loop, ie. |
||||
// |
||||
// while (!benchmarkDone) { // Benchmark runner decides when to exit. |
||||
// for (i = 0; i < benchmark.payload.length; i++) { |
||||
// parse(benchmark.payload[i]) |
||||
// } |
||||
// } |
||||
// |
||||
// This is intended to let datasets include a variety of data to provide |
||||
// potentially more realistic results than just parsing the same message |
||||
// over and over. A single message parsed repeatedly could yield unusually |
||||
// good branch prediction performance. |
||||
repeated bytes payload = 3; |
||||
} |
||||
|
||||
// A benchmark can write out metrics that we will then upload to our metrics |
||||
// database for tracking over time. |
||||
message Metric { |
||||
// A unique ID for these results. Used for de-duping. |
||||
string guid = 1; |
||||
|
||||
// The tags specify exactly what benchmark was run against the dataset. |
||||
// The specific benchmark suite can decide what these mean, but here are |
||||
// some common tags that have a predefined meaning: |
||||
// |
||||
// - "dataset": for tests that pertain to a specific dataset. |
||||
// |
||||
// For example: |
||||
// |
||||
// # Tests parsing from binary proto string using arenas. |
||||
// tags={ |
||||
// dataset: "testalltypes", |
||||
// op: "parse", |
||||
// format: "binaryproto", |
||||
// input: "string" |
||||
// arena: "true" |
||||
// } |
||||
// |
||||
// # Tests serializing to JSON string. |
||||
// tags={ |
||||
// dataset: "testalltypes", |
||||
// op: "serialize", |
||||
// format: "json", |
||||
// input: "string" |
||||
// } |
||||
map<string, string> labels = 2; |
||||
|
||||
// Unit of measurement for the metric: |
||||
// - a speed test might be "mb_per_second" or "ops_per_second" |
||||
// - a size test might be "kb". |
||||
string unit = 3; |
||||
|
||||
// Metric value. |
||||
double value = 4; |
||||
} |
@ -0,0 +1,114 @@ |
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
const char *file_prefix = "dataset."; |
||||
const char *file_suffix = ".pb"; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include "benchmarks.pb.h" |
||||
#include "google_message1.h" |
||||
#include "google_message2.h" |
||||
|
||||
using benchmarks::BenchmarkDataset; |
||||
using google::protobuf::Descriptor; |
||||
using google::protobuf::DescriptorPool; |
||||
using google::protobuf::Message; |
||||
using google::protobuf::MessageFactory; |
||||
|
||||
#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr)) |
||||
|
||||
std::set<std::string> names; |
||||
|
||||
void WriteFileWithPayloads(const std::string& name, |
||||
const std::string& message_name, |
||||
const std::vector<std::string>& payload) { |
||||
if (!names.insert(name).second) { |
||||
std::cerr << "Duplicate test name: " << name << "\n"; |
||||
abort(); |
||||
} |
||||
|
||||
// First verify that this message name exists in our set of benchmark messages
|
||||
// and that these payloads are valid for the given message.
|
||||
const Descriptor* d = |
||||
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); |
||||
|
||||
if (!d) { |
||||
std::cerr << "For dataset " << name << ", no such message: " |
||||
<< message_name << "\n"; |
||||
abort(); |
||||
} |
||||
|
||||
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); |
||||
|
||||
for (size_t i = 0; i < payload.size(); i++) { |
||||
if (!m->ParseFromString(payload[i])) { |
||||
std::cerr << "For dataset " << name << ", payload[" << i << "] fails " |
||||
<< "to parse\n"; |
||||
abort(); |
||||
} |
||||
} |
||||
|
||||
BenchmarkDataset dataset; |
||||
dataset.set_name(name); |
||||
dataset.set_message_name(message_name); |
||||
for (size_t i = 0; i < payload.size(); i++) { |
||||
dataset.add_payload()->assign(payload[i]); |
||||
} |
||||
|
||||
std::string serialized; |
||||
dataset.SerializeToString(&serialized); |
||||
|
||||
std::ofstream writer; |
||||
std::string fname = file_prefix + name + file_suffix; |
||||
writer.open(fname); |
||||
writer << serialized; |
||||
writer.close(); |
||||
|
||||
std::cerr << "Wrote dataset: " << fname << "\n"; |
||||
} |
||||
|
||||
void WriteFile(const std::string& name, const std::string& message_name, |
||||
const std::string& payload) { |
||||
std::vector<std::string> payloads; |
||||
payloads.push_back(payload); |
||||
WriteFileWithPayloads(name, message_name, payloads); |
||||
} |
||||
|
||||
int main() { |
||||
WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", |
||||
ARRAY_TO_STRING(google_message1_dat)); |
||||
WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", |
||||
ARRAY_TO_STRING(google_message1_dat)); |
||||
|
||||
// Not in proto3 because it has a group, which is not supported.
|
||||
WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", |
||||
ARRAY_TO_STRING(google_message2_dat)); |
||||
} |
Loading…
Reference in new issue