Add python benchmark

pull/4065/head
Yilun Chong 7 years ago
parent 43caa38d6e
commit 2fc69b1561
  1. 79
      benchmarks/Makefile.am
  2. 69
      benchmarks/README.md
  3. 0
      benchmarks/__init__.py
  4. 115
      benchmarks/py_benchmark.py
  5. 29
      benchmarks/python_benchmark_messages.cc

@ -39,11 +39,11 @@ else
# relative to srcdir, which may not be the same as the current directory when
# building out-of-tree.
protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs) )
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
touch protoc_middleman
protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs_proto2) )
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) )
touch protoc_middleman2
endif
@ -155,6 +155,75 @@ java: protoc_middleman protoc_middleman2 java-benchmark
############# JAVA RULES END ##############
############# PYTHON RULES ##############
python_add_init: protoc_middleman protoc_middleman2
all_file=`find tmp -type f -regex '.*\.py'` && \
for file in $${all_file[@]}; do \
path="$${file%/*}"; \
while true; do \
touch "$$path/__init__.py" && chmod +x "$$path/__init__.py"; \
if [[ $$path != *"/"* ]]; then break; fi; \
path=$${path%/*}; \
done \
done
python_cpp_pkg_flags = `pkg-config --cflags --libs python`
lib_LTLIBRARIES = libbenchmark_messages.la
libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc
libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la
libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic
libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags)
libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2)
nodist_libbenchmark_messages_la_SOURCES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2) \
$(benchmarks_protoc_outputs_proto2_header) \
$(benchmarks_protoc_outputs_header)
python-pure-python-benchmark: python_add_init
@echo "Writing shortcut script python-pure-python-benchmark..."
@echo '#! /bin/sh' > python-pure-python-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark
@echo cp py_benchmark.py tmp >> python-pure-python-benchmark
@echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark
@chmod +x python-pure-python-benchmark
python-cpp-reflection-benchmark: python_add_init
@echo "Writing shortcut script python-cpp-reflection-benchmark..."
@echo '#! /bin/sh' > python-cpp-reflection-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark
@echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark
@echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark
@chmod +x python-cpp-reflection-benchmark
python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la
@echo "Writing shortcut script python-cpp-generated-code-benchmark..."
@echo '#! /bin/sh' > python-cpp-generated-code-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark
@echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark
@echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark
@chmod +x python-cpp-generated-code-benchmark
python-pure-python: python-pure-python-benchmark
./python-pure-python-benchmark $(all_data)
python-cpp-reflection: python-cpp-reflection-benchmark
./python-cpp-reflection-benchmark $(all_data)
python-cpp-generated-code: python-cpp-generated-code-benchmark
./python-cpp-generated-code-benchmark $(all_data)
############# PYTHON RULES END ##############
MAINTAINERCLEANFILES = \
Makefile.in
@ -168,7 +237,11 @@ CLEANFILES = \
protoc_middleman \
protoc_middleman2 \
javac_middleman \
java-benchmark
java-benchmark \
python_cpp_proto_library \
python-pure-python-benchmark \
python-cpp-reflection-benchmark \
python-cpp-generated-code-benchmark
clean-local:
-rm -rf tmp/*

@ -17,12 +17,25 @@ We are using [google/benchmark](https://github.com/google/benchmark) as the
benchmark tool for testing cpp. This will be automaticly made during build the
cpp benchmark.
### JAVA
### Java
We're using maven to build the java benchmarks, which is the same as to build
the Java protobuf. There're no other tools need to install. We're using
[google/caliper](https://github.com/google/caliper) as benchmark tool, which
can be automaticly included by maven.
### Python
We're using python C++ API for testing the generated
CPP proto version of python protobuf, which is also a prerequisite for Python
protobuf cpp implementation. You need to install the correct version of Python
C++ extension package before run generated CPP proto version of Python
protobuf's benchmark. e.g. under Ubuntu, you need to
```
$ sudo apt-get install python-dev
$ sudo apt-get install python3-dev
```
And you also need to make sure `pkg-config` is installed.
### Big data
There's some optional big testing data which is not included in the directory initially, you need to
@ -38,34 +51,80 @@ After doing this the big data file will automaticly generated in the benchmark d
To run all the benchmark dataset:
For java:
### Java:
```
$ make java
```
For cpp:
### CPP:
```
$ make cpp
```
### Python:
We have three versions of python protobuf implementation: pure python, cpp reflection and
cpp generated code. To run these version benchmark, you need to:
#### Pure Python:
```
$ make python-pure-python
```
#### CPP reflection:
```
$ make python-cpp-reflection
```
#### CPP generated code:
```
$ make python-cpp-generated-code
```
To run a specific dataset:
For java:
### Java:
```
$ make java-benchmark
$ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)]
```
For cpp:
### CPP:
```
$ make cpp-benchmark
$ ./cpp-benchmark $(specific generated dataset file name)
```
### Python:
#### Pure Python:
```
$ make python-pure-python-benchmark
$ ./python-pure-python-benchmark $(specific generated dataset file name)
```
#### CPP reflection:
```
$ make python-cpp-reflection-benchmark
$ ./python-cpp-reflection-benchmark $(specific generated dataset file name)
```
#### CPP generated code:
```
$ make python-cpp-generated-code-benchmark
$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name)
```
## Benchmark datasets
Each data set is in the format of benchmarks.proto:

@ -0,0 +1,115 @@
import sys
import os
import timeit
import math
import fnmatch
# CPP generated code must be linked before importing the generated Python code
# for the descriptor can be found in the pool
if len(sys.argv) < 2:
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
if sys.argv[1] == "true":
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" )
import libbenchmark_messages
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" )
elif sys.argv[1] != "false":
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2
import benchmarks_pb2
def run_one_test(filename):
data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload),
module="py_benchmark",
setup_method="init")
print "Message %s of dataset file %s" % \
(benchmark_dataset.message_name, filename)
benchmark_util.set_test_method("parse_from_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
benchmark_util.set_test_method("serialize_to_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
print ""
def init(filename):
global benchmark_dataset, message_class, message_list, counter
message_list=[]
counter = 0
data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1":
message_class = benchmark_message1_proto3_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1":
message_class = benchmark_message1_proto2_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2":
message_class = benchmark_message2_pb2.GoogleMessage2
elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3":
message_class = benchmark_message3_pb2.GoogleMessage3
elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4":
message_class = benchmark_message4_pb2.GoogleMessage4
else:
raise IOError("Message %s not found!" % (benchmark_dataset.message_name))
for one_payload in benchmark_dataset.payload:
temp = message_class()
temp.ParseFromString(one_payload)
message_list.append(temp)
def parse_from_benchmark():
global counter, message_class, benchmark_dataset
m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)])
counter = counter + 1
def serialize_to_benchmark():
global counter, message_list, message_class
s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString()
counter = counter + 1
class Benchmark:
def __init__(self, module=None, test_method=None,
setup_method=None, full_iteration = 1):
self.full_iteration = full_iteration
self.module = module
self.test_method = test_method
self.setup_method = setup_method
def set_test_method(self, test_method):
self.test_method = test_method
def full_setup_code(self, setup_method_args=''):
setup_code = ""
setup_code += "from %s import %s\n" % (self.module, self.test_method)
setup_code += "from %s import %s\n" % (self.module, self.setup_method)
setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args)
return setup_code
def dry_run(self, test_method_args='', setup_method_args=''):
return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=self.full_iteration);
def run_benchmark(self, test_method_args='', setup_method_args=''):
reps = self.full_iteration;
t = self.dry_run(test_method_args, setup_method_args);
if t < 3 :
reps = int(math.ceil(3 / t)) * self.full_iteration
t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=reps);
return "Average time for %s: %.2f ns" % \
(self.test_method, 1.0 * t / reps * (10 ** 9))
if __name__ == "__main__":
for i in range(2, len(sys.argv)):
run_one_test(sys.argv[i])

@ -0,0 +1,29 @@
#include <Python.h>
#include "benchmarks.pb.h"
#include "datasets/google_message1/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
static PyMethodDef python_benchmark_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyMODINIT_FUNC
initlibbenchmark_messages() {
benchmarks::BenchmarkDataset().descriptor();
benchmarks::proto3::GoogleMessage1().descriptor();
benchmarks::proto2::GoogleMessage1().descriptor();
benchmarks::proto2::GoogleMessage2().descriptor();
benchmarks::google_message3::GoogleMessage3().descriptor();
benchmarks::google_message4::GoogleMessage4().descriptor();
PyObject *m;
m = Py_InitModule("libbenchmark_messages", python_benchmark_methods);
if (m == NULL)
return;
}
Loading…
Cancel
Save