commit
5fb73f80f2
14 changed files with 743 additions and 208 deletions
@ -1,12 +1,12 @@ |
||||
package main |
||||
|
||||
import ( |
||||
benchmarkWrapper "./tmp" |
||||
googleMessage1Proto2 "./tmp/datasets/google_message1/proto2" |
||||
googleMessage1Proto3 "./tmp/datasets/google_message1/proto3" |
||||
googleMessage2 "./tmp/datasets/google_message2" |
||||
googleMessage3 "./tmp/datasets/google_message3" |
||||
googleMessage4 "./tmp/datasets/google_message4" |
||||
benchmarkWrapper "../tmp" |
||||
googleMessage1Proto2 "../tmp/datasets/google_message1/proto2" |
||||
googleMessage1Proto3 "../tmp/datasets/google_message1/proto3" |
||||
googleMessage2 "../tmp/datasets/google_message2" |
||||
googleMessage3 "../tmp/datasets/google_message3" |
||||
googleMessage4 "../tmp/datasets/google_message4" |
||||
"flag" |
||||
"github.com/golang/protobuf/proto" |
||||
"io/ioutil" |
@ -1,8 +1,8 @@ |
||||
#include <Python.h> |
||||
|
||||
#include "benchmarks.pb.h" |
||||
#include "datasets/google_message1/benchmark_message1_proto2.pb.h" |
||||
#include "datasets/google_message1/benchmark_message1_proto3.pb.h" |
||||
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" |
||||
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" |
||||
#include "datasets/google_message2/benchmark_message2.pb.h" |
||||
#include "datasets/google_message3/benchmark_message3.pb.h" |
||||
#include "datasets/google_message4/benchmark_message4.pb.h" |
@ -0,0 +1,188 @@ |
||||
#!/usr/bin/env python2.7 |
||||
|
||||
import argparse |
||||
import json |
||||
import uuid |
||||
import httplib2 |
||||
|
||||
from apiclient import discovery |
||||
from apiclient.errors import HttpError |
||||
from oauth2client.client import GoogleCredentials |
||||
|
||||
# 30 days in milliseconds |
||||
_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 |
||||
NUM_RETRIES = 3 |
||||
|
||||
|
||||
def create_big_query(): |
||||
"""Authenticates with cloud platform and gets a BiqQuery service object |
||||
""" |
||||
creds = GoogleCredentials.get_application_default() |
||||
return discovery.build( |
||||
'bigquery', 'v2', credentials=creds, cache_discovery=False) |
||||
|
||||
|
||||
def create_dataset(biq_query, project_id, dataset_id): |
||||
is_success = True |
||||
body = { |
||||
'datasetReference': { |
||||
'projectId': project_id, |
||||
'datasetId': dataset_id |
||||
} |
||||
} |
||||
|
||||
try: |
||||
dataset_req = biq_query.datasets().insert( |
||||
projectId=project_id, body=body) |
||||
dataset_req.execute(num_retries=NUM_RETRIES) |
||||
except HttpError as http_error: |
||||
if http_error.resp.status == 409: |
||||
print 'Warning: The dataset %s already exists' % dataset_id |
||||
else: |
||||
# Note: For more debugging info, print "http_error.content" |
||||
print 'Error in creating dataset: %s. Err: %s' % (dataset_id, |
||||
http_error) |
||||
is_success = False |
||||
return is_success |
||||
|
||||
|
||||
def create_table(big_query, project_id, dataset_id, table_id, table_schema, |
||||
description): |
||||
fields = [{ |
||||
'name': field_name, |
||||
'type': field_type, |
||||
'description': field_description |
||||
} for (field_name, field_type, field_description) in table_schema] |
||||
return create_table2(big_query, project_id, dataset_id, table_id, fields, |
||||
description) |
||||
|
||||
|
||||
def create_partitioned_table(big_query, |
||||
project_id, |
||||
dataset_id, |
||||
table_id, |
||||
table_schema, |
||||
description, |
||||
partition_type='DAY', |
||||
expiration_ms=_EXPIRATION_MS): |
||||
"""Creates a partitioned table. By default, a date-paritioned table is created with |
||||
each partition lasting 30 days after it was last modified. |
||||
""" |
||||
fields = [{ |
||||
'name': field_name, |
||||
'type': field_type, |
||||
'description': field_description |
||||
} for (field_name, field_type, field_description) in table_schema] |
||||
return create_table2(big_query, project_id, dataset_id, table_id, fields, |
||||
description, partition_type, expiration_ms) |
||||
|
||||
|
||||
def create_table2(big_query, |
||||
project_id, |
||||
dataset_id, |
||||
table_id, |
||||
fields_schema, |
||||
description, |
||||
partition_type=None, |
||||
expiration_ms=None): |
||||
is_success = True |
||||
|
||||
body = { |
||||
'description': description, |
||||
'schema': { |
||||
'fields': fields_schema |
||||
}, |
||||
'tableReference': { |
||||
'datasetId': dataset_id, |
||||
'projectId': project_id, |
||||
'tableId': table_id |
||||
} |
||||
} |
||||
|
||||
if partition_type and expiration_ms: |
||||
body["timePartitioning"] = { |
||||
"type": partition_type, |
||||
"expirationMs": expiration_ms |
||||
} |
||||
|
||||
try: |
||||
table_req = big_query.tables().insert( |
||||
projectId=project_id, datasetId=dataset_id, body=body) |
||||
res = table_req.execute(num_retries=NUM_RETRIES) |
||||
print 'Successfully created %s "%s"' % (res['kind'], res['id']) |
||||
except HttpError as http_error: |
||||
if http_error.resp.status == 409: |
||||
print 'Warning: Table %s already exists' % table_id |
||||
else: |
||||
print 'Error in creating table: %s. Err: %s' % (table_id, |
||||
http_error) |
||||
is_success = False |
||||
return is_success |
||||
|
||||
|
||||
def patch_table(big_query, project_id, dataset_id, table_id, fields_schema): |
||||
is_success = True |
||||
|
||||
body = { |
||||
'schema': { |
||||
'fields': fields_schema |
||||
}, |
||||
'tableReference': { |
||||
'datasetId': dataset_id, |
||||
'projectId': project_id, |
||||
'tableId': table_id |
||||
} |
||||
} |
||||
|
||||
try: |
||||
table_req = big_query.tables().patch( |
||||
projectId=project_id, |
||||
datasetId=dataset_id, |
||||
tableId=table_id, |
||||
body=body) |
||||
res = table_req.execute(num_retries=NUM_RETRIES) |
||||
print 'Successfully patched %s "%s"' % (res['kind'], res['id']) |
||||
except HttpError as http_error: |
||||
print 'Error in creating table: %s. Err: %s' % (table_id, http_error) |
||||
is_success = False |
||||
return is_success |
||||
|
||||
|
||||
def insert_rows(big_query, project_id, dataset_id, table_id, rows_list): |
||||
is_success = True |
||||
body = {'rows': rows_list} |
||||
try: |
||||
insert_req = big_query.tabledata().insertAll( |
||||
projectId=project_id, |
||||
datasetId=dataset_id, |
||||
tableId=table_id, |
||||
body=body) |
||||
res = insert_req.execute(num_retries=NUM_RETRIES) |
||||
if res.get('insertErrors', None): |
||||
print 'Error inserting rows! Response: %s' % res |
||||
is_success = False |
||||
except HttpError as http_error: |
||||
print 'Error inserting rows to the table %s' % table_id |
||||
is_success = False |
||||
|
||||
return is_success |
||||
|
||||
|
||||
def sync_query_job(big_query, project_id, query, timeout=5000): |
||||
query_data = {'query': query, 'timeoutMs': timeout} |
||||
query_job = None |
||||
try: |
||||
query_job = big_query.jobs().query( |
||||
projectId=project_id, |
||||
body=query_data).execute(num_retries=NUM_RETRIES) |
||||
except HttpError as http_error: |
||||
print 'Query execute job failed with error: %s' % http_error |
||||
print http_error.content |
||||
return query_job |
||||
|
||||
|
||||
# List of (column name, column type, description) tuples |
||||
def make_row(unique_row_id, row_values_dict): |
||||
"""row_values_dict is a dictionary of column name and column value. |
||||
""" |
||||
return {'insertId': unique_row_id, 'json': row_values_dict} |
@ -0,0 +1,290 @@ |
||||
import argparse |
||||
import os |
||||
import re |
||||
import copy |
||||
import uuid |
||||
import calendar |
||||
import time |
||||
import big_query_utils |
||||
import datetime |
||||
import json |
||||
# This import depends on the automake rule protoc_middleman, please make sure |
||||
# protoc_middleman has been built before run this file. |
||||
import os.path, sys |
||||
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) |
||||
import tmp.benchmarks_pb2 as benchmarks_pb2 |
||||
from click.types import STRING |
||||
|
||||
_PROJECT_ID = 'grpc-testing' |
||||
_DATASET = 'protobuf_benchmark_result' |
||||
_TABLE = 'opensource_result_v1' |
||||
_NOW = "%d%02d%02d" % (datetime.datetime.now().year, |
||||
datetime.datetime.now().month, |
||||
datetime.datetime.now().day) |
||||
|
||||
file_size_map = {} |
||||
|
||||
def get_data_size(file_name): |
||||
if file_name in file_size_map: |
||||
return file_size_map[file_name] |
||||
benchmark_dataset = benchmarks_pb2.BenchmarkDataset() |
||||
benchmark_dataset.ParseFromString( |
||||
open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) |
||||
size = 0 |
||||
count = 0 |
||||
for payload in benchmark_dataset.payload: |
||||
size += len(payload) |
||||
count += 1 |
||||
file_size_map[file_name] = (size, 1.0 * size / count) |
||||
return size, 1.0 * size / count |
||||
|
||||
|
||||
def extract_file_name(file_name): |
||||
name_list = re.split("[/\.]", file_name) |
||||
short_file_name = "" |
||||
for name in name_list: |
||||
if name[:14] == "google_message": |
||||
short_file_name = name |
||||
return short_file_name |
||||
|
||||
|
||||
cpp_result = [] |
||||
python_result = [] |
||||
java_result = [] |
||||
go_result = [] |
||||
|
||||
|
||||
# CPP results example: |
||||
# [ |
||||
# "benchmarks": [ |
||||
# { |
||||
# "bytes_per_second": int, |
||||
# "cpu_time": int, |
||||
# "name: string, |
||||
# "time_unit: string, |
||||
# ... |
||||
# }, |
||||
# ... |
||||
# ], |
||||
# ... |
||||
# ] |
||||
def parse_cpp_result(filename): |
||||
global cpp_result |
||||
if filename == "": |
||||
return |
||||
if filename[0] != '/': |
||||
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
||||
with open(filename) as f: |
||||
results = json.loads(f.read()) |
||||
for benchmark in results["benchmarks"]: |
||||
data_filename = "".join( |
||||
re.split("(_parse_|_serialize)", benchmark["name"])[0]) |
||||
behavior = benchmark["name"][len(data_filename) + 1:] |
||||
cpp_result.append({ |
||||
"language": "cpp", |
||||
"dataFileName": data_filename, |
||||
"behavior": behavior, |
||||
"throughput": benchmark["bytes_per_second"] / 2.0 ** 20 |
||||
}) |
||||
|
||||
|
||||
# Python results example: |
||||
# [ |
||||
# [ |
||||
# { |
||||
# "filename": string, |
||||
# "benchmarks": { |
||||
# behavior: results, |
||||
# ... |
||||
# }, |
||||
# "message_name": STRING |
||||
# }, |
||||
# ... |
||||
# ], #pure-python |
||||
# ... |
||||
# ] |
||||
def parse_python_result(filename): |
||||
global python_result |
||||
if filename == "": |
||||
return |
||||
if filename[0] != '/': |
||||
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
||||
with open(filename) as f: |
||||
results_list = json.loads(f.read()) |
||||
for results in results_list: |
||||
for result in results: |
||||
_, avg_size = get_data_size(result["filename"]) |
||||
for behavior in result["benchmarks"]: |
||||
python_result.append({ |
||||
"language": "python", |
||||
"dataFileName": extract_file_name(result["filename"]), |
||||
"behavior": behavior, |
||||
"throughput": avg_size / |
||||
result["benchmarks"][behavior] * 1e9 / 2 ** 20 |
||||
}) |
||||
|
||||
|
||||
# Java results example: |
||||
# [ |
||||
# { |
||||
# "id": string, |
||||
# "instrumentSpec": {...}, |
||||
# "measurements": [ |
||||
# { |
||||
# "weight": float, |
||||
# "value": { |
||||
# "magnitude": float, |
||||
# "unit": string |
||||
# }, |
||||
# ... |
||||
# }, |
||||
# ... |
||||
# ], |
||||
# "run": {...}, |
||||
# "scenario": { |
||||
# "benchmarkSpec": { |
||||
# "methodName": string, |
||||
# "parameters": { |
||||
# defined parameters in the benchmark: parameters value |
||||
# }, |
||||
# ... |
||||
# }, |
||||
# ... |
||||
# } |
||||
# |
||||
# }, |
||||
# ... |
||||
# ] |
||||
def parse_java_result(filename): |
||||
global average_bytes_per_message, java_result |
||||
if filename == "": |
||||
return |
||||
if filename[0] != '/': |
||||
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
||||
with open(filename) as f: |
||||
results = json.loads(f.read()) |
||||
for result in results: |
||||
total_weight = 0 |
||||
total_value = 0 |
||||
for measurement in result["measurements"]: |
||||
total_weight += measurement["weight"] |
||||
total_value += measurement["value"]["magnitude"] |
||||
avg_time = total_value * 1.0 / total_weight |
||||
total_size, _ = get_data_size( |
||||
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) |
||||
java_result.append({ |
||||
"language": "java", |
||||
"throughput": total_size / avg_time * 1e9 / 2 ** 20, |
||||
"behavior": result["scenario"]["benchmarkSpec"]["methodName"], |
||||
"dataFileName": extract_file_name( |
||||
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) |
||||
}) |
||||
|
||||
|
||||
# Go benchmark results: |
||||
# |
||||
# goos: linux |
||||
# goarch: amd64 |
||||
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op |
||||
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op |
||||
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op |
||||
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op |
||||
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op |
||||
# PASS |
||||
# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s |
||||
def parse_go_result(filename): |
||||
global go_result |
||||
if filename == "": |
||||
return |
||||
if filename[0] != '/': |
||||
filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename |
||||
with open(filename) as f: |
||||
for line in f: |
||||
result_list = re.split("[\ \t]+", line) |
||||
if result_list[0][:9] != "Benchmark": |
||||
continue |
||||
first_slash_index = result_list[0].find('/') |
||||
last_slash_index = result_list[0].rfind('/') |
||||
full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix |
||||
total_bytes, _ = get_data_size(full_filename) |
||||
behavior_with_suffix = result_list[0][last_slash_index+1:] |
||||
last_dash = behavior_with_suffix.rfind("-") |
||||
if last_dash == -1: |
||||
behavior = behavior_with_suffix |
||||
else: |
||||
behavior = behavior_with_suffix[:last_dash] |
||||
go_result.append({ |
||||
"dataFilename": extract_file_name(full_filename), |
||||
"throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, |
||||
"behavior": behavior, |
||||
"language": "go" |
||||
}) |
||||
|
||||
|
||||
def get_metadata(): |
||||
build_number = os.getenv('BUILD_NUMBER') |
||||
build_url = os.getenv('BUILD_URL') |
||||
job_name = os.getenv('JOB_NAME') |
||||
git_commit = os.getenv('GIT_COMMIT') |
||||
# actual commit is the actual head of PR that is getting tested |
||||
git_actual_commit = os.getenv('ghprbActualCommit') |
||||
|
||||
utc_timestamp = str(calendar.timegm(time.gmtime())) |
||||
metadata = {'created': utc_timestamp} |
||||
|
||||
if build_number: |
||||
metadata['buildNumber'] = build_number |
||||
if build_url: |
||||
metadata['buildUrl'] = build_url |
||||
if job_name: |
||||
metadata['jobName'] = job_name |
||||
if git_commit: |
||||
metadata['gitCommit'] = git_commit |
||||
if git_actual_commit: |
||||
metadata['gitActualCommit'] = git_actual_commit |
||||
|
||||
return metadata |
||||
|
||||
|
||||
def upload_result(result_list, metadata): |
||||
for result in result_list: |
||||
new_result = copy.deepcopy(result) |
||||
new_result['metadata'] = metadata |
||||
bq = big_query_utils.create_big_query() |
||||
row = big_query_utils.make_row(str(uuid.uuid4()), new_result) |
||||
if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, |
||||
_TABLE + "$" + _NOW, |
||||
[row]): |
||||
print 'Error when uploading result', new_result |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser() |
||||
parser.add_argument("-cpp", "--cpp_input_file", |
||||
help="The CPP benchmark result file's name", |
||||
default="") |
||||
parser.add_argument("-java", "--java_input_file", |
||||
help="The Java benchmark result file's name", |
||||
default="") |
||||
parser.add_argument("-python", "--python_input_file", |
||||
help="The Python benchmark result file's name", |
||||
default="") |
||||
parser.add_argument("-go", "--go_input_file", |
||||
help="The golang benchmark result file's name", |
||||
default="") |
||||
args = parser.parse_args() |
||||
|
||||
parse_cpp_result(args.cpp_input_file) |
||||
parse_python_result(args.python_input_file) |
||||
parse_java_result(args.java_input_file) |
||||
parse_go_result(args.go_input_file) |
||||
|
||||
metadata = get_metadata() |
||||
print "uploading cpp results..." |
||||
upload_result(cpp_result, metadata) |
||||
print "uploading java results..." |
||||
upload_result(java_result, metadata) |
||||
print "uploading python results..." |
||||
upload_result(python_result, metadata) |
||||
print "uploading go results..." |
||||
upload_result(go_result, metadata) |
@ -0,0 +1,86 @@ |
||||
#!/bin/bash |
||||
# |
||||
# Change to repo root |
||||
cd $(dirname $0)/../../.. |
||||
|
||||
export OUTPUT_DIR=testoutput |
||||
oldpwd=`pwd` |
||||
|
||||
# tcmalloc |
||||
if [ ! -f gperftools/.libs/libtcmalloc.so ]; then |
||||
git clone https://github.com/gperftools/gperftools.git |
||||
cd gperftools |
||||
./autogen.sh |
||||
./configure |
||||
make -j8 |
||||
cd .. |
||||
fi |
||||
|
||||
# download datasets for benchmark |
||||
cd benchmarks |
||||
./download_data.sh |
||||
datasets=`find . -type f -name "dataset.*.pb"` |
||||
cd $oldpwd |
||||
|
||||
# build Python protobuf |
||||
./autogen.sh |
||||
./configure CXXFLAGS="-fPIC -O2 -fno-semantic-interposition" |
||||
make -j8 |
||||
cd python |
||||
python setup.py build --cpp_implementation |
||||
pip install . |
||||
|
||||
# build and run Python benchmark |
||||
cd ../benchmarks |
||||
make python-pure-python-benchmark |
||||
make python-cpp-reflection-benchmark |
||||
make -j8 python-cpp-generated-code-benchmark |
||||
echo "[" > tmp/python_result.json |
||||
echo "benchmarking pure python..." |
||||
./python-pure-python-benchmark --json --behavior_prefix="pure-python-benchmark" $datasets >> tmp/python_result.json |
||||
echo "," >> "tmp/python_result.json" |
||||
echo "benchmarking python cpp reflection..." |
||||
env LD_PRELOAD="$oldpwd/gperftools/.libs/libtcmalloc.so" ./python-cpp-reflection-benchmark --json --behavior_prefix="cpp-reflection-benchmark" $datasets >> tmp/python_result.json |
||||
echo "," >> "tmp/python_result.json" |
||||
echo "benchmarking python cpp generated code..." |
||||
env LD_PRELOAD="$oldpwd/gperftools/.libs/libtcmalloc.so" ./python-cpp-generated-code-benchmark --json --behavior_prefix="cpp-generated-code-benchmark" $datasets >> tmp/python_result.json |
||||
echo "]" >> "tmp/python_result.json" |
||||
cd $oldpwd |
||||
|
||||
# build CPP protobuf |
||||
./configure |
||||
make clean && make -j8 |
||||
|
||||
# build CPP benchmark |
||||
cd benchmarks |
||||
mv tmp/python_result.json . && make clean && make -j8 cpp-benchmark && mv python_result.json tmp |
||||
echo "benchmarking cpp..." |
||||
env LD_PRELOAD="$oldpwd/gperftools/.libs/libtcmalloc.so" ./cpp-benchmark --benchmark_min_time=5.0 --benchmark_out_format=json --benchmark_out="tmp/cpp_result.json" $datasets |
||||
cd $oldpwd |
||||
|
||||
# build go protobuf |
||||
export PATH="`pwd`/src:$PATH" |
||||
export GOPATH="$HOME/gocode" |
||||
mkdir -p "$GOPATH/src/github.com/google" |
||||
rm -f "$GOPATH/src/github.com/google/protobuf" |
||||
ln -s "`pwd`" "$GOPATH/src/github.com/google/protobuf" |
||||
export PATH="$GOPATH/bin:$PATH" |
||||
go get github.com/golang/protobuf/protoc-gen-go |
||||
|
||||
# build go benchmark |
||||
cd benchmarks |
||||
make go-benchmark |
||||
echo "benchmarking go..." |
||||
./go-benchmark $datasets > tmp/go_result.txt |
||||
|
||||
# build java benchmark |
||||
make java-benchmark |
||||
echo "benchmarking java..." |
||||
./java-benchmark -Cresults.file.options.file="tmp/java_result.json" $datasets |
||||
|
||||
# upload result to bq |
||||
make python_add_init |
||||
python util/run_and_upload.py -cpp="../tmp/cpp_result.json" -java="../tmp/java_result.json" \ |
||||
-python="../tmp/python_result.json" -go="../tmp/go_result.txt" |
||||
|
||||
cd $oldpwd |
@ -0,0 +1,11 @@ |
||||
# Config file for running tests in Kokoro |
||||
|
||||
# Location of the build script in repository |
||||
build_file: "protobuf/kokoro/linux/benchmark/build.sh" |
||||
timeout_mins: 240 |
||||
|
||||
action { |
||||
define_artifacts { |
||||
regex: "**/sponge_log.xml" |
||||
} |
||||
} |
Loading…
Reference in new issue