Merge pull request #9642 from ctiller/bm_stats

Allow uploading results to BigQuery
pull/9678/merge
Craig Tiller 8 years ago committed by GitHub
commit 2be48b6166
  1. 2
      tools/profiling/latency_profile/run_latency_profile.sh
  2. 162
      tools/profiling/microbenchmarks/bm2bq.py
  3. 62
      tools/run_tests/run_microbenchmark.py

@ -44,4 +44,4 @@ else
PYTHON=python2.7
fi
$PYTHON tools/run_tests/run_microbenchmark.py bm_fullstack
$PYTHON tools/run_tests/run_microbenchmark.py --collect summary perf latency --bigquery_upload

@ -0,0 +1,162 @@
#!/usr/bin/env python2.7
#
# Convert google-benchmark json output to something that can be uploaded to
# BigQuery
#
#
# Copyright 2017, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
import json
import csv
import os
columns = [
('jenkins_build', 'integer'),
('jenkins_job', 'string'),
('date', 'timestamp'),
('cpu_scaling_enabled', 'boolean'),
('num_cpus', 'integer'),
('mhz_per_cpu', 'integer'),
('library_build_type', 'string'),
('name', 'string'),
('fixture', 'string'),
('client_mutator', 'string'),
('server_mutator', 'string'),
('request_size', 'integer'),
('response_size', 'integer'),
('request_count', 'integer'),
('iterations', 'integer'),
('time_unit', 'string'),
('real_time', 'integer'),
('cpu_time', 'integer'),
('bytes_per_second', 'float'),
('allocs_per_iteration', 'float'),
('locks_per_iteration', 'float'),
('writes_per_iteration', 'float'),
]
if sys.argv[1] == '--schema':
print ',\n'.join('%s:%s' % (k, t.upper()) for k, t in columns)
sys.exit(0)
with open(sys.argv[1]) as f:
js = json.loads(f.read())
writer = csv.DictWriter(sys.stdout, [c for c,t in columns])
bm_specs = {
'BM_UnaryPingPong': {
'tpl': ['fixture', 'client_mutator', 'server_mutator'],
'dyn': ['request_size', 'response_size'],
},
'BM_PumpStreamClientToServer': {
'tpl': ['fixture'],
'dyn': ['request_size'],
},
'BM_PumpStreamServerToClient': {
'tpl': ['fixture'],
'dyn': ['request_size'],
},
'BM_StreamingPingPong': {
'tpl': ['fixture', 'client_mutator', 'server_mutator'],
'dyn': ['request_size', 'request_count'],
},
'BM_StreamingPingPongMsgs': {
'tpl': ['fixture', 'client_mutator', 'server_mutator'],
'dyn': ['request_size'],
}
}
def numericalize(s):
if not s: return ''
if s[-1] == 'k':
return int(s[:-1]) * 1024
if s[-1] == 'M':
return int(s[:-1]) * 1024 * 1024
if 0 <= (ord(s[-1]) - ord('0')) <= 9:
return int(s)
assert 'not a number: %s' % s
def parse_name(name):
rest = name
out = {}
tpl_args = []
dyn_args = []
if '<' in rest:
tpl_bit = rest[rest.find('<') + 1 : rest.rfind('>')]
arg = ''
nesting = 0
for c in tpl_bit:
if c == '<':
nesting += 1
arg += c
elif c == '>':
nesting -= 1
arg += c
elif c == ',':
if nesting == 0:
tpl_args.append(arg.strip())
arg = ''
else:
arg += c
else:
arg += c
tpl_args.append(arg.strip())
rest = rest[:rest.find('<')] + rest[rest.rfind('>') + 1:]
if '/' in rest:
s = rest.split('/')
rest = s[0]
dyn_args = s[1:]
name = rest
assert name in bm_specs
assert len(dyn_args) == len(bm_specs[name]['dyn'])
assert len(tpl_args) == len(bm_specs[name]['tpl'])
out['name'] = name
out.update(dict((k, numericalize(v)) for k, v in zip(bm_specs[name]['dyn'], dyn_args)))
out.update(dict(zip(bm_specs[name]['tpl'], tpl_args)))
return out
for bm in js['benchmarks']:
context = js['context']
labels_list = [s.split(':') for s in bm.get('label', '').split(' ')]
for el in labels_list:
el[0] = el[0].replace('/iter', '_per_iteration')
labels = dict(labels_list)
row = {
'jenkins_build': os.environ.get('BUILD_NUMBER', ''),
'jenkins_job': os.environ.get('JOB_NAME', ''),
}
row.update(context)
row.update(bm)
row.update(parse_name(row['name']))
row.update(labels)
del row['label']
writer.writerow(row)

@ -32,6 +32,7 @@ import multiprocessing
import os
import subprocess
import sys
import argparse
import python_utils.jobset as jobset
import python_utils.start_port_server as start_port_server
@ -72,12 +73,16 @@ def link(txt, tgt):
global index_html
index_html += "<p><a href=\"%s\">%s</a></p>\n" % (tgt, txt)
benchmarks = []
profile_analysis = []
cleanup = []
def text(txt):
global index_html
index_html += "<p><pre>%s</pre></p>\n" % txt
def collect_latency(bm_name, args):
"""generate latency profiles"""
benchmarks = []
profile_analysis = []
cleanup = []
for bm_name in sys.argv[1:]:
# generate latency profiles
heading('Latency Profiles: %s' % bm_name)
subprocess.check_call(
['make', bm_name,
@ -115,14 +120,16 @@ for bm_name in sys.argv[1:]:
jobset.run(profile_analysis, maxjobs=multiprocessing.cpu_count())
jobset.run(cleanup, maxjobs=multiprocessing.cpu_count())
# generate flamegraphs
def collect_perf(bm_name, args):
"""generate flamegraphs"""
heading('Flamegraphs: %s' % bm_name)
subprocess.check_call(
['make', bm_name,
'CONFIG=mutrace', '-j', '%d' % multiprocessing.cpu_count()])
for line in subprocess.check_output(['bins/mutrace/%s' % bm_name,
'--benchmark_list_tests']).splitlines():
subprocess.check_call(['sudo', 'perf', 'record', '-g', '-c', '1000',
subprocess.check_call(['sudo', 'perf', 'record', '-o', 'perf.data',
'-g', '-c', '1000',
'bins/mutrace/%s' % bm_name,
'--benchmark_filter=^%s$' % line,
'--benchmark_min_time=20'])
@ -136,6 +143,47 @@ for bm_name in sys.argv[1:]:
f.write(subprocess.check_output([
'%s/flamegraph.pl' % flamegraph_dir, 'bm.folded']))
def collect_summary(bm_name, args):
heading('Summary: %s' % bm_name)
subprocess.check_call(
['make', bm_name,
'CONFIG=counters', '-j', '%d' % multiprocessing.cpu_count()])
text(subprocess.check_output(['bins/counters/%s' % bm_name,
'--benchmark_out=out.json',
'--benchmark_out_format=json']))
if args.bigquery_upload:
with open('/tmp/out.csv', 'w') as f:
f.write(subprocess.check_output(['tools/profiling/microbenchmarks/bm2bq.py', 'out.json']))
subprocess.check_call(['bq', 'load', 'microbenchmarks.microbenchmarks', 'out.csv'])
collectors = {
'latency': collect_latency,
'perf': collect_perf,
'summary': collect_summary,
}
argp = argparse.ArgumentParser(description='Collect data from microbenchmarks')
argp.add_argument('-c', '--collect',
choices=sorted(collectors.keys()),
nargs='+',
default=sorted(collectors.keys()),
help='Which collectors should be run against each benchmark')
argp.add_argument('-b', '--benchmarks',
default=['bm_fullstack'],
nargs='+',
type=str,
help='Which microbenchmarks should be run')
argp.add_argument('--bigquery_upload',
default=False,
action='store_const',
const=True,
help='Upload results from summary collection to bigquery')
args = argp.parse_args()
for bm_name in args.benchmarks:
for collect in args.collect:
collectors[collect](bm_name, args)
index_html += "</body>\n</html>\n"
with open('reports/index.html', 'w') as f:
f.write(index_html)

Loading…
Cancel
Save