From ab69ea3f9df983ac7a2d14b71342ca98c20f34cc Mon Sep 17 00:00:00 2001 From: ncteisen Date: Fri, 5 May 2017 16:39:07 -0700 Subject: [PATCH 01/19] Modulized the bm_*.py functions --- tools/profiling/microbenchmarks/bm_build.py | 68 ++++++ .../profiling/microbenchmarks/bm_constants.py | 56 +++++ tools/profiling/microbenchmarks/bm_diff.py | 200 ++++-------------- tools/profiling/microbenchmarks/bm_run.py | 77 +++++++ .../{speedup.py => bm_speedup.py} | 2 + 5 files changed, 250 insertions(+), 153 deletions(-) create mode 100755 tools/profiling/microbenchmarks/bm_build.py create mode 100644 tools/profiling/microbenchmarks/bm_constants.py create mode 100755 tools/profiling/microbenchmarks/bm_run.py rename tools/profiling/microbenchmarks/{speedup.py => bm_speedup.py} (98%) diff --git a/tools/profiling/microbenchmarks/bm_build.py b/tools/profiling/microbenchmarks/bm_build.py new file mode 100755 index 00000000000..13ca1dfd1b4 --- /dev/null +++ b/tools/profiling/microbenchmarks/bm_build.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python2.7 +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### Python utility to build opt and counters benchmarks """ + +import bm_constants + +import argparse +import subprocess +import multiprocessing +import os +import shutil + +def _args(): + argp = argparse.ArgumentParser(description='Builds microbenchmarks') + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) + argp.add_argument('-n', '--name', type=str, help='Unique name of this build') + return argp.parse_args() + +def _make_cmd(cfg, jobs, benchmarks): + return ['make'] + benchmarks + [ + 'CONFIG=%s' % cfg, '-j', '%d' % jobs] + +def build(name, jobs, benchmarks): + shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) + subprocess.check_call(['git', 'submodule', 'update']) + try: + subprocess.check_call(_make_cmd('opt', jobs, benchmarks)) + subprocess.check_call(_make_cmd('counters', jobs, benchmarks)) + except subprocess.CalledProcessError, e: + subprocess.check_call(['make', 'clean']) + subprocess.check_call(_make_cmd('opt', jobs, benchmarks)) + subprocess.check_call(_make_cmd('counters', jobs, benchmarks)) + os.rename('bins', 'bm_diff_%s' % name, ) + +if __name__ == '__main__': + args = _args() + build(args.name, args.jobs, args.benchmarks) + + diff --git a/tools/profiling/microbenchmarks/bm_constants.py b/tools/profiling/microbenchmarks/bm_constants.py new file mode 100644 index 00000000000..ada1e32e72e --- /dev/null +++ b/tools/profiling/microbenchmarks/bm_constants.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2.7 +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### Configurable constants for the bm_*.py family """ + +_AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong', + 'bm_fullstack_streaming_ping_pong', + 'bm_fullstack_streaming_pump', + 'bm_closure', + 'bm_cq', + 'bm_call_create', + 'bm_error', + 'bm_chttp2_hpack', + 'bm_chttp2_transport', + 'bm_pollset', + 'bm_metadata', + 'bm_fullstack_trickle'] + + +_INTERESTING = ( + 'cpu_time', + 'real_time', + 'locks_per_iteration', + 'allocs_per_iteration', + 'writes_per_iteration', + 'atm_cas_per_iteration', + 'atm_add_per_iteration', + 'nows_per_iteration', +) diff --git a/tools/profiling/microbenchmarks/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff.py index 299abb5fdb7..4de318fec7c 100755 --- a/tools/profiling/microbenchmarks/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff.py @@ -28,47 +28,18 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import sys -import json +""" Computes the diff between two bm runs and outputs significant results """ + import bm_json +import bm_constants +import bm_speedup + +import json import tabulate import argparse -from scipy import stats -import subprocess -import multiprocessing import collections -import pipes -import os -sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) -import comment_on_pr -import jobset -import itertools -import speedup -import random -import shutil -import errno - -_INTERESTING = ( - 'cpu_time', - 'real_time', - 'locks_per_iteration', - 'allocs_per_iteration', - 'writes_per_iteration', - 'atm_cas_per_iteration', - 'atm_add_per_iteration', - 'cli_transport_stalls_per_iteration', - 'cli_stream_stalls_per_iteration', - 'svr_transport_stalls_per_iteration', - 'svr_stream_stalls_per_iteration' - 'nows_per_iteration', -) -def changed_ratio(n, o): - if float(o) <= .0001: o = 0 - if float(n) <= .0001: n = 0 - if o == 0 and n == 0: return 0 - if o == 0: return 100 - return (float(n)-float(o))/float(o) +verbose = False def median(ary): ary = sorted(ary) @@ -78,91 +49,27 @@ def median(ary): else: return ary[n/2] -def min_change(pct): - return lambda n, o: abs(changed_ratio(n,o)) > pct/100.0 - -_AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong', - 'bm_fullstack_streaming_ping_pong', - 'bm_fullstack_streaming_pump', - 'bm_closure', - 'bm_cq', - 'bm_call_create', - 'bm_error', - 'bm_chttp2_hpack', - 'bm_chttp2_transport', - 'bm_pollset', - 'bm_metadata', - 'bm_fullstack_trickle'] - -argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks') -argp.add_argument('-t', '--track', - choices=sorted(_INTERESTING), - nargs='+', - default=sorted(_INTERESTING), - help='Which metrics to track') -argp.add_argument('-b', '--benchmarks', nargs='+', choices=_AVAILABLE_BENCHMARK_TESTS, default=['bm_cq']) -argp.add_argument('-d', '--diff_base', type=str) -argp.add_argument('-r', '--repetitions', type=int, default=1) -argp.add_argument('-l', '--loops', type=int, default=20) -argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) -args = argp.parse_args() - -assert args.diff_base - -def avg(lst): - sum = 0.0 - n = 0.0 - for el in lst: - sum += el - n += 1 - return sum / n - -def make_cmd(cfg): - return ['make'] + args.benchmarks + [ - 'CONFIG=%s' % cfg, '-j', '%d' % args.jobs] - -def build(dest): - shutil.rmtree('bm_diff_%s' % dest, ignore_errors=True) - subprocess.check_call(['git', 'submodule', 'update']) - try: - subprocess.check_call(make_cmd('opt')) - subprocess.check_call(make_cmd('counters')) - except subprocess.CalledProcessError, e: - subprocess.check_call(['make', 'clean']) - subprocess.check_call(make_cmd('opt')) - subprocess.check_call(make_cmd('counters')) - os.rename('bins', 'bm_diff_%s' % dest) - -def collect1(bm, cfg, ver, idx): - cmd = ['bm_diff_%s/%s/%s' % (ver, cfg, bm), - '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, ver, idx), - '--benchmark_out_format=json', - '--benchmark_repetitions=%d' % (args.repetitions) - ] - return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, ver, idx+1, args.loops), - verbose_success=True, timeout_seconds=None) - -build('new') - -where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() -subprocess.check_call(['git', 'checkout', args.diff_base]) -try: - build('old') -finally: - subprocess.check_call(['git', 'checkout', where_am_i]) - subprocess.check_call(['git', 'submodule', 'update']) - -jobs = [] -for loop in range(0, args.loops): - jobs.extend(x for x in itertools.chain( - (collect1(bm, 'opt', 'new', loop) for bm in args.benchmarks), - (collect1(bm, 'counters', 'new', loop) for bm in args.benchmarks), - (collect1(bm, 'opt', 'old', loop) for bm in args.benchmarks), - (collect1(bm, 'counters', 'old', loop) for bm in args.benchmarks), - )) -random.shuffle(jobs, random.SystemRandom().random) - -jobset.run(jobs, maxjobs=args.jobs) +def _args(): + argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks') + argp.add_argument('-t', '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) + argp.add_argument('-l', '--loops', type=int, default=20) + argp.add_argument('-n', '--new', type=str, help='New benchmark name') + argp.add_argument('-o', '--old', type=str, help='Old benchmark name') + argp.add_argument('-v', '--verbose', type=bool, help='print details of before/after') + args = argp.parse_args() + global verbose + if args.verbose: verbose = True + assert args.new + assert args.old + return args + +def maybe_print(str): + if verbose: print str class Benchmark: @@ -173,18 +80,18 @@ class Benchmark: } self.final = {} - def add_sample(self, data, new): - for f in args.track: + def add_sample(self, track, data, new): + for f in track: if f in data: self.samples[new][f].append(float(data[f])) - def process(self): - for f in sorted(args.track): + def process(self, track): + for f in sorted(track): new = self.samples[True][f] old = self.samples[False][f] if not new or not old: continue mdn_diff = abs(median(new) - median(old)) - print '%s: new=%r old=%r mdn_diff=%r' % (f, new, old, mdn_diff) + maybe_print('%s: new=%r old=%r mdn_diff=%r' % (f, new, old, mdn_diff)) s = speedup.speedup(new, old) if abs(s) > 3 and mdn_diff > 0.5: self.final[f] = '%+d%%' % s @@ -196,29 +103,17 @@ class Benchmark: def row(self, flds): return [self.final[f] if f in self.final else '' for f in flds] - -def eintr_be_gone(fn): - """Run fn until it doesn't stop because of EINTR""" - while True: - try: - return fn() - except IOError, e: - if e.errno != errno.EINTR: - raise - - def read_json(filename): try: with open(filename) as f: return json.loads(f.read()) except ValueError, e: return None - -def finalize(): +def finalize(bms, loops, track): benchmarks = collections.defaultdict(Benchmark) - for bm in args.benchmarks: - for loop in range(0, args.loops): + for bm in bms: + for loop in range(0, loops): js_new_ctr = read_json('%s.counters.new.%d.json' % (bm, loop)) js_new_opt = read_json('%s.opt.new.%d.json' % (bm, loop)) js_old_ctr = read_json('%s.counters.old.%d.json' % (bm, loop)) @@ -226,22 +121,20 @@ def finalize(): if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): - print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue - benchmarks[name].add_sample(row, True) + benchmarks[name].add_sample(track, row, True) if js_old_ctr: for row in bm_json.expand_json(js_old_ctr, js_old_opt): - print row name = row['cpp_name'] if name.endswith('_mean') or name.endswith('_stddev'): continue - benchmarks[name].add_sample(row, False) + benchmarks[name].add_sample(track, row, False) really_interesting = set() for name, bm in benchmarks.items(): - print name - really_interesting.update(bm.process()) - fields = [f for f in args.track if f in really_interesting] + maybe_print(name) + really_interesting.update(bm.process(track)) + fields = [f for f in track if f in really_interesting] headers = ['Benchmark'] + fields rows = [] @@ -249,11 +142,12 @@ def finalize(): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) if rows: - text = 'Performance differences noted:\n' + tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') + return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') else: - text = 'No significant performance differences' - print text - comment_on_pr.comment_on_pr('```\n%s\n```' % text) + return None + +if __name__ == '__main__': + args = _args() + print finalize(args.benchmarks, args.loops, args.track) -eintr_be_gone(finalize) diff --git a/tools/profiling/microbenchmarks/bm_run.py b/tools/profiling/microbenchmarks/bm_run.py new file mode 100755 index 00000000000..458e3194403 --- /dev/null +++ b/tools/profiling/microbenchmarks/bm_run.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python2.7 +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +### Python utility to run opt and counters benchmarks and save json output """ + +import bm_constants + +import argparse +import multiprocessing +import random +import itertools +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) +import jobset + +def _args(): + argp = argparse.ArgumentParser(description='Runs microbenchmarks') + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) + argp.add_argument('-n', '--name', type=str, help='Unique name of this build') + argp.add_argument('-r', '--repetitions', type=int, default=1) + argp.add_argument('-l', '--loops', type=int, default=20) + return argp.parse_args() + +def _collect_bm_data(bm, cfg, name, reps, idx): + cmd = ['bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, name, idx), + '--benchmark_out_format=json', + '--benchmark_repetitions=%d' % (reps) + ] + return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, name, idx+1, args.loops), + verbose_success=True, timeout_seconds=None) + +def _run_bms(benchmarks, name, loops, reps): + jobs = [] + for loop in range(0, loops): + jobs.extend(x for x in itertools.chain( + (_collect_bm_data(bm, 'opt', name, reps, loop) for bm in benchmarks), + (_collect_bm_data(bm, 'counters', name, reps, loop) for bm in benchmarks), + )) + random.shuffle(jobs, random.SystemRandom().random) + + jobset.run(jobs, maxjobs=args.jobs) + +if __name__ == '__main__': + args = _args() + assert args.name + _run_bms(args.benchmarks, args.name, args.loops, args.repetitions) diff --git a/tools/profiling/microbenchmarks/speedup.py b/tools/profiling/microbenchmarks/bm_speedup.py similarity index 98% rename from tools/profiling/microbenchmarks/speedup.py rename to tools/profiling/microbenchmarks/bm_speedup.py index 8af0066c9df..9e395a782ed 100644 --- a/tools/profiling/microbenchmarks/speedup.py +++ b/tools/profiling/microbenchmarks/bm_speedup.py @@ -27,6 +27,8 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" The math behind the diff functionality """ + from scipy import stats import math From dc76c66521e5e26ba6634b4dedcfbb0a9766aed5 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Fri, 5 May 2017 17:13:07 -0700 Subject: [PATCH 02/19] Add driver for bm_*.py pipeline --- tools/profiling/microbenchmarks/README.md | 0 tools/profiling/microbenchmarks/bm_build.py | 14 +-- tools/profiling/microbenchmarks/bm_diff.py | 31 +++--- tools/profiling/microbenchmarks/bm_main.py | 100 ++++++++++++++++++++ tools/profiling/microbenchmarks/bm_run.py | 20 ++-- 5 files changed, 133 insertions(+), 32 deletions(-) create mode 100644 tools/profiling/microbenchmarks/README.md create mode 100755 tools/profiling/microbenchmarks/bm_main.py diff --git a/tools/profiling/microbenchmarks/README.md b/tools/profiling/microbenchmarks/README.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/profiling/microbenchmarks/bm_build.py b/tools/profiling/microbenchmarks/bm_build.py index 13ca1dfd1b4..a5d1ec34475 100755 --- a/tools/profiling/microbenchmarks/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_build.py @@ -45,24 +45,24 @@ def _args(): argp.add_argument('-n', '--name', type=str, help='Unique name of this build') return argp.parse_args() -def _make_cmd(cfg, jobs, benchmarks): +def _make_cmd(cfg, benchmarks, jobs): return ['make'] + benchmarks + [ 'CONFIG=%s' % cfg, '-j', '%d' % jobs] -def build(name, jobs, benchmarks): +def build(name, benchmarks, jobs): shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) subprocess.check_call(['git', 'submodule', 'update']) try: - subprocess.check_call(_make_cmd('opt', jobs, benchmarks)) - subprocess.check_call(_make_cmd('counters', jobs, benchmarks)) + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) except subprocess.CalledProcessError, e: subprocess.check_call(['make', 'clean']) - subprocess.check_call(_make_cmd('opt', jobs, benchmarks)) - subprocess.check_call(_make_cmd('counters', jobs, benchmarks)) + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) os.rename('bins', 'bm_diff_%s' % name, ) if __name__ == '__main__': args = _args() - build(args.name, args.jobs, args.benchmarks) + build(args.name, args.benchmarks, args.jobs) diff --git a/tools/profiling/microbenchmarks/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff.py index 4de318fec7c..f68dff840ec 100755 --- a/tools/profiling/microbenchmarks/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff.py @@ -41,7 +41,7 @@ import collections verbose = False -def median(ary): +def _median(ary): ary = sorted(ary) n = len(ary) if n%2 == 0: @@ -68,7 +68,7 @@ def _args(): assert args.old return args -def maybe_print(str): +def _maybe_print(str): if verbose: print str class Benchmark: @@ -85,14 +85,15 @@ class Benchmark: if f in data: self.samples[new][f].append(float(data[f])) - def process(self, track): + def process(self, track, new_name, old_name): for f in sorted(track): new = self.samples[True][f] old = self.samples[False][f] if not new or not old: continue - mdn_diff = abs(median(new) - median(old)) - maybe_print('%s: new=%r old=%r mdn_diff=%r' % (f, new, old, mdn_diff)) - s = speedup.speedup(new, old) + mdn_diff = abs(_median(new) - _median(old)) + _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' % + (f, new_name, new, old_name, old, mdn_diff)) + s = bm_speedup.speedup(new, old) if abs(s) > 3 and mdn_diff > 0.5: self.final[f] = '%+d%%' % s return self.final.keys() @@ -103,21 +104,21 @@ class Benchmark: def row(self, flds): return [self.final[f] if f in self.final else '' for f in flds] -def read_json(filename): +def _read_json(filename): try: with open(filename) as f: return json.loads(f.read()) except ValueError, e: return None -def finalize(bms, loops, track): +def diff(bms, loops, track, old, new): benchmarks = collections.defaultdict(Benchmark) for bm in bms: for loop in range(0, loops): - js_new_ctr = read_json('%s.counters.new.%d.json' % (bm, loop)) - js_new_opt = read_json('%s.opt.new.%d.json' % (bm, loop)) - js_old_ctr = read_json('%s.counters.old.%d.json' % (bm, loop)) - js_old_opt = read_json('%s.opt.old.%d.json' % (bm, loop)) + js_new_ctr = _read_json('%s.counters.%s.%d.json' % (bm, new, loop)) + js_new_opt = _read_json('%s.opt.%s.%d.json' % (bm, new, loop)) + js_old_ctr = _read_json('%s.counters.%s.%d.json' % (bm, old, loop)) + js_old_opt = _read_json('%s.opt.%s.%d.json' % (bm, old, loop)) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): @@ -132,8 +133,8 @@ def finalize(bms, loops, track): really_interesting = set() for name, bm in benchmarks.items(): - maybe_print(name) - really_interesting.update(bm.process(track)) + _maybe_print(name) + really_interesting.update(bm.process(track, new, old)) fields = [f for f in track if f in really_interesting] headers = ['Benchmark'] + fields @@ -148,6 +149,6 @@ def finalize(bms, loops, track): if __name__ == '__main__': args = _args() - print finalize(args.benchmarks, args.loops, args.track) + print diff(args.benchmarks, args.loops, args.track, args.old, args.new) diff --git a/tools/profiling/microbenchmarks/bm_main.py b/tools/profiling/microbenchmarks/bm_main.py new file mode 100755 index 00000000000..1a46b170155 --- /dev/null +++ b/tools/profiling/microbenchmarks/bm_main.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python2.7 +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" Runs the entire bm_*.py pipeline, and possible comments on the PR """ + +import bm_constants +import bm_build +import bm_run +import bm_diff + +import sys +import os +import argparse +import multiprocessing +import subprocess + +sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) +import comment_on_pr + +def _args(): + argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks') + argp.add_argument('-t', '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) + argp.add_argument('-d', '--diff_base', type=str) + argp.add_argument('-r', '--repetitions', type=int, default=1) + argp.add_argument('-l', '--loops', type=int, default=20) + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) + args = argp.parse_args() + assert args.diff_base + return args + + +def eintr_be_gone(fn): + """Run fn until it doesn't stop because of EINTR""" + def inner(*args): + while True: + try: + return fn(*args) + except IOError, e: + if e.errno != errno.EINTR: + raise + return inner + +def main(args): + + bm_build.build('new', args.benchmarks, args.jobs) + + where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + subprocess.check_call(['git', 'checkout', args.diff_base]) + try: + bm_build.build('old', args.benchmarks, args.jobs) + finally: + subprocess.check_call(['git', 'checkout', where_am_i]) + subprocess.check_call(['git', 'submodule', 'update']) + + bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) + bm_run.run('old', args.benchmarks, args.jobs, args.loops, args.repetitions) + + diff = bm_diff.diff(args.benchmarks, args.loops, args.track, 'old', 'new') + if diff: + text = 'Performance differences noted:\n' + diff + else: + text = 'No significant performance differences' + print text + comment_on_pr.comment_on_pr('```\n%s\n```' % text) + +if __name__ == '__main__': + args = _args() + main(args) diff --git a/tools/profiling/microbenchmarks/bm_run.py b/tools/profiling/microbenchmarks/bm_run.py index 458e3194403..a476a7e19fb 100755 --- a/tools/profiling/microbenchmarks/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_run.py @@ -51,27 +51,27 @@ def _args(): argp.add_argument('-l', '--loops', type=int, default=20) return argp.parse_args() -def _collect_bm_data(bm, cfg, name, reps, idx): +def _collect_bm_data(bm, cfg, name, reps, idx, loops): cmd = ['bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, name, idx), '--benchmark_out_format=json', '--benchmark_repetitions=%d' % (reps) ] - return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, name, idx+1, args.loops), + return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, name, idx+1, loops), verbose_success=True, timeout_seconds=None) -def _run_bms(benchmarks, name, loops, reps): - jobs = [] +def run(name, benchmarks, jobs, loops, reps): + jobs_list = [] for loop in range(0, loops): - jobs.extend(x for x in itertools.chain( - (_collect_bm_data(bm, 'opt', name, reps, loop) for bm in benchmarks), - (_collect_bm_data(bm, 'counters', name, reps, loop) for bm in benchmarks), + jobs_list.extend(x for x in itertools.chain( + (_collect_bm_data(bm, 'opt', name, reps, loop, loops) for bm in benchmarks), + (_collect_bm_data(bm, 'counters', name, reps, loop, loops) for bm in benchmarks), )) - random.shuffle(jobs, random.SystemRandom().random) + random.shuffle(jobs_list, random.SystemRandom().random) - jobset.run(jobs, maxjobs=args.jobs) + jobset.run(jobs_list, maxjobs=jobs) if __name__ == '__main__': args = _args() assert args.name - _run_bms(args.benchmarks, args.name, args.loops, args.repetitions) + run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) From fe4b1ee9b0ab574a7abb9b255ca7b21affb1da23 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Mon, 8 May 2017 17:51:16 -0700 Subject: [PATCH 03/19] Relink jenkins back into the picture --- tools/jenkins/run_performance.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/jenkins/run_performance.sh b/tools/jenkins/run_performance.sh index f530fb46b86..52bc2e5d634 100755 --- a/tools/jenkins/run_performance.sh +++ b/tools/jenkins/run_performance.sh @@ -38,4 +38,4 @@ BENCHMARKS_TO_RUN="bm_fullstack_unary_ping_pong bm_fullstack_streaming_ping_pong cd $(dirname $0)/../.. tools/run_tests/start_port_server.py -tools/profiling/microbenchmarks/bm_diff.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN +tools/profiling/microbenchmarks/bm_main.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN From 64637b7c8c040bfad7043b62b462612eb0b88a22 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Tue, 9 May 2017 14:23:16 -0700 Subject: [PATCH 04/19] restructure --- tools/jenkins/run_performance.sh | 2 +- .../microbenchmarks/bm_diff/README.md | 0 .../microbenchmarks/{ => bm_diff}/bm_build.py | 0 .../{ => bm_diff}/bm_constants.py | 0 .../microbenchmarks/{ => bm_diff}/bm_diff.py | 7 ++++++- .../microbenchmarks/{ => bm_diff}/bm_main.py | 0 .../microbenchmarks/{ => bm_diff}/bm_run.py | 2 +- .../{ => bm_diff}/bm_speedup.py | 0 tools/run_tests/run_microbenchmark.py | 18 ++++-------------- 9 files changed, 12 insertions(+), 17 deletions(-) create mode 100644 tools/profiling/microbenchmarks/bm_diff/README.md rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_build.py (100%) rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_constants.py (100%) rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_diff.py (98%) rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_main.py (100%) rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_run.py (98%) rename tools/profiling/microbenchmarks/{ => bm_diff}/bm_speedup.py (100%) diff --git a/tools/jenkins/run_performance.sh b/tools/jenkins/run_performance.sh index 52bc2e5d634..99214ab0b1f 100755 --- a/tools/jenkins/run_performance.sh +++ b/tools/jenkins/run_performance.sh @@ -38,4 +38,4 @@ BENCHMARKS_TO_RUN="bm_fullstack_unary_ping_pong bm_fullstack_streaming_ping_pong cd $(dirname $0)/../.. tools/run_tests/start_port_server.py -tools/profiling/microbenchmarks/bm_main.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN +tools/profiling/microbenchmarks/bm_diff/bm_main.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN diff --git a/tools/profiling/microbenchmarks/bm_diff/README.md b/tools/profiling/microbenchmarks/bm_diff/README.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/profiling/microbenchmarks/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py similarity index 100% rename from tools/profiling/microbenchmarks/bm_build.py rename to tools/profiling/microbenchmarks/bm_diff/bm_build.py diff --git a/tools/profiling/microbenchmarks/bm_constants.py b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py similarity index 100% rename from tools/profiling/microbenchmarks/bm_constants.py rename to tools/profiling/microbenchmarks/bm_diff/bm_constants.py diff --git a/tools/profiling/microbenchmarks/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py similarity index 98% rename from tools/profiling/microbenchmarks/bm_diff.py rename to tools/profiling/microbenchmarks/bm_diff/bm_diff.py index f68dff840ec..3c871c1743b 100755 --- a/tools/profiling/microbenchmarks/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -30,10 +30,15 @@ """ Computes the diff between two bm runs and outputs significant results """ -import bm_json import bm_constants import bm_speedup +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..')) +import bm_json + import json import tabulate import argparse diff --git a/tools/profiling/microbenchmarks/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py similarity index 100% rename from tools/profiling/microbenchmarks/bm_main.py rename to tools/profiling/microbenchmarks/bm_diff/bm_main.py diff --git a/tools/profiling/microbenchmarks/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py similarity index 98% rename from tools/profiling/microbenchmarks/bm_run.py rename to tools/profiling/microbenchmarks/bm_diff/bm_run.py index a476a7e19fb..14b3718ecb3 100755 --- a/tools/profiling/microbenchmarks/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -39,7 +39,7 @@ import itertools import sys import os -sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) +sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests', 'python_utils')) import jobset def _args(): diff --git a/tools/profiling/microbenchmarks/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py similarity index 100% rename from tools/profiling/microbenchmarks/bm_speedup.py rename to tools/profiling/microbenchmarks/bm_diff/bm_speedup.py diff --git a/tools/run_tests/run_microbenchmark.py b/tools/run_tests/run_microbenchmark.py index 17b156c78f9..dadebb1b54b 100755 --- a/tools/run_tests/run_microbenchmark.py +++ b/tools/run_tests/run_microbenchmark.py @@ -38,18 +38,8 @@ import argparse import python_utils.jobset as jobset import python_utils.start_port_server as start_port_server -_AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong', - 'bm_fullstack_streaming_ping_pong', - 'bm_fullstack_streaming_pump', - 'bm_closure', - 'bm_cq', - 'bm_call_create', - 'bm_error', - 'bm_chttp2_hpack', - 'bm_chttp2_transport', - 'bm_pollset', - 'bm_metadata', - 'bm_fullstack_trickle'] +sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'profiling', 'microbenchmarks', 'bm_diff')) +import bm_constants flamegraph_dir = os.path.join(os.path.expanduser('~'), 'FlameGraph') @@ -214,8 +204,8 @@ argp.add_argument('-c', '--collect', default=sorted(collectors.keys()), help='Which collectors should be run against each benchmark') argp.add_argument('-b', '--benchmarks', - choices=_AVAILABLE_BENCHMARK_TESTS, - default=_AVAILABLE_BENCHMARK_TESTS, + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, nargs='+', type=str, help='Which microbenchmarks should be run') From 9aba7fef8d53ec9ddc540dcd81a679ed720b7e6b Mon Sep 17 00:00:00 2001 From: ncteisen Date: Tue, 9 May 2017 15:39:17 -0700 Subject: [PATCH 05/19] Fix ttest_ind crash --- tools/profiling/microbenchmarks/bm_diff/bm_speedup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) mode change 100644 => 100755 tools/profiling/microbenchmarks/bm_diff/bm_speedup.py diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py old mode 100644 new mode 100755 index 9e395a782ed..fb6622760b9 --- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python2.7 # Copyright 2017, Google Inc. # All rights reserved. # @@ -41,7 +42,9 @@ def cmp(a, b): return stats.ttest_ind(a, b) def speedup(new, old): + if (len(set(new))) == 1 and new == old: return 0 s0, p0 = cmp(new, old) + print s0, p0 if math.isnan(p0): return 0 if s0 == 0: return 0 if p0 > _THRESHOLD: return 0 @@ -49,6 +52,7 @@ def speedup(new, old): pct = 1 while pct < 101: sp, pp = cmp(new, scale(old, 1 - pct/100.0)) + print sp, pp if sp > 0: break if pp > _THRESHOLD: break pct += 1 @@ -57,13 +61,14 @@ def speedup(new, old): pct = 1 while pct < 100000: sp, pp = cmp(new, scale(old, 1 + pct/100.0)) + print sp, pp if sp < 0: break if pp > _THRESHOLD: break pct += 1 return pct - 1 if __name__ == "__main__": - new=[66034560.0, 126765693.0, 99074674.0, 98588433.0, 96731372.0, 110179725.0, 103802110.0, 101139800.0, 102357205.0, 99016353.0, 98840824.0, 99585632.0, 98791720.0, 96171521.0, 95327098.0, 95629704.0, 98209772.0, 99779411.0, 100182488.0, 98354192.0, 99644781.0, 98546709.0, 99019176.0, 99543014.0, 99077269.0, 98046601.0, 99319039.0, 98542572.0, 98886614.0, 72560968.0] - old=[60423464.0, 71249570.0, 73213089.0, 73200055.0, 72911768.0, 72347798.0, 72494672.0, 72756976.0, 72116565.0, 71541342.0, 73442538.0, 74817383.0, 73007780.0, 72499062.0, 72404945.0, 71843504.0, 73245405.0, 72778304.0, 74004519.0, 73694464.0, 72919931.0, 72955481.0, 71583857.0, 71350467.0, 71836817.0, 70064115.0, 70355345.0, 72516202.0, 71716777.0, 71532266.0] + new=[1.0, 1.0, 1.0, 1.0] + old=[2.0, 2.0, 2.0, 2.0] print speedup(new, old) print speedup(old, new) From 48d973a27603c14db440336da25af1315d47b1e6 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Tue, 9 May 2017 16:59:17 -0700 Subject: [PATCH 06/19] Add readme and flags --- tools/profiling/microbenchmarks/README.md | 4 + .../microbenchmarks/bm_diff/README.md | 101 ++++++++++++++++++ .../microbenchmarks/bm_diff/bm_build.py | 10 +- .../microbenchmarks/bm_diff/bm_diff.py | 4 +- .../microbenchmarks/bm_diff/bm_main.py | 36 ++++--- .../microbenchmarks/bm_diff/bm_run.py | 17 +-- .../microbenchmarks/bm_diff/bm_speedup.py | 3 - 7 files changed, 144 insertions(+), 31 deletions(-) diff --git a/tools/profiling/microbenchmarks/README.md b/tools/profiling/microbenchmarks/README.md index e69de29bb2d..035888ee188 100644 --- a/tools/profiling/microbenchmarks/README.md +++ b/tools/profiling/microbenchmarks/README.md @@ -0,0 +1,4 @@ +Microbenchmarks +==== + +This directory contains helper scripts for the microbenchmark suites. diff --git a/tools/profiling/microbenchmarks/bm_diff/README.md b/tools/profiling/microbenchmarks/bm_diff/README.md index e69de29bb2d..e1c728ffef3 100644 --- a/tools/profiling/microbenchmarks/bm_diff/README.md +++ b/tools/profiling/microbenchmarks/bm_diff/README.md @@ -0,0 +1,101 @@ +The bm_diff Family +==== + +This family of python scripts can be incredibly useful for fast iteration over +different performance tweaks. The tools allow you to save performance data from +a baseline commit, then quickly compare data from your working branch to that +baseline data to see if you have made any performance wins. + +The tools operates with three concrete steps, which can be invoked separately, +or all together via the driver script, bm_main.py. This readme will describe +the typical workflow for these scripts, then it will include sections on the +details of every script for advanced usage. + +## Normal Workflow + +Let's say you are working on a performance optimization for grpc_error. You have +made some significant changes and want to see some data. From your branch, run +(ensure everything is committed first): + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master` + +This will build the `bm_error` binary on your branch and master. It will then +run these benchmarks 5 times each. Lastly it will compute the statistically +significant performance differences between the two branches. This should show +the nice performance wins your changes have made. + +If you have already invoked bm_main with `-d master`, you should instead use +`-o old` for subsequent runs. This allows the script to skip re-building and +re-running the unchanged master branch. + +## bm_build.py + +This scrips builds the benchmarks. It takes in a name parameter, and will +store the binaries based on that. Both `opt` and `counter` configurations +will be used. The `opt` is used to get cpu_time and real_time, and the +`counters` build is used to track other metrics like allocs, atomic adds, +etc etc etc. + +For example, if you were to invoke (we assume everything is run from the +root of the repo): + +`tools/profiling/microbenchmarks/bm_diff/bm_build.py -b bm_error -n baseline` + +then the microbenchmark binaries will show up under +`bm_diff_baseline/{opt,counters}/bm_error` + +## bm_run.py + +This script runs the benchmarks. It takes a name parameter that must match the +name that was passed to `bm_build.py`. The script then runs the benchmark +multiple times (default is 20, can be toggled via the loops parameter). The +output is saved as `....json` + +For example, if you were to run: + +`tools/profiling/microbenchmarks/bm_diff/bm_run.py -b bm_error -b baseline -l 5` + +Then an example output file would be `bm_error.opt.baseline.1.json` + +## bm_diff.py + +This script takes in the output from two benchmark runs, computes the diff +between them, and prints any significant improvements or regressions. It takes +in two name parameters, old and new. These must have previously been built and +run. + +For example, assuming you had already built and run a 'baseline' microbenchmark +from master, and then you also built and ran a 'current' microbenchmark from +the branch you were working on, you could invoke: + +`tools/profiling/microbenchmarks/bm_diff/bm_diff.py -b bm_error -o baseline -n current -l 5` + +This would output the percent difference between your branch and master. + +## bm_main.py + +This is the driver script. It uses the previous three modules and does +everything for you. You pass in the benchmarks to be run, the number of loops, +number of CPUs to use, and the commit to compare to. Then the script will: +* Build the benchmarks at head, then checkout the branch to compare to and + build the benchmarks there +* Run both sets of microbenchmarks +* Run bm_diff.py to compare the two, outputs the difference. + +For example, one might run: + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master` + +This would compare the current branch's error benchmarks to master. + +This script is invoked by our infrastructure on every PR to protect against +regressions and demonstrate performance wins. + +However, if you are iterating over different performance tweaks quickly, it is +unnecessary to build and run the baseline commit every time. That is why we +provide a different flag in case you are sure that the baseline benchmark has +already been built and run. In that case use the --old flag to pass in the name +of the baseline. This will only build and run the current branch. For example: + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o old` + diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index a5d1ec34475..83c3c695e77 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -40,10 +40,12 @@ import shutil def _args(): argp = argparse.ArgumentParser(description='Builds microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) - argp.add_argument('-n', '--name', type=str, help='Unique name of this build') - return argp.parse_args() + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to build') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='How many CPUs to dedicate to this task') + argp.add_argument('-n', '--name', type=str, help='Unique name of this build. To be used as a handle to pass to the other bm* scripts') + args = argp.parse_args() + assert args.name + return args def _make_cmd(cfg, benchmarks, jobs): return ['make'] + benchmarks + [ diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 3c871c1743b..7b1c7e28bf2 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -61,8 +61,8 @@ def _args(): nargs='+', default=sorted(bm_constants._INTERESTING), help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-l', '--loops', type=int, default=20) + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py') argp.add_argument('-n', '--new', type=str, help='New benchmark name') argp.add_argument('-o', '--old', type=str, help='Old benchmark name') argp.add_argument('-v', '--verbose', type=bool, help='print details of before/after') diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 1a46b170155..82b0a10e07c 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -51,13 +51,16 @@ def _args(): nargs='+', default=sorted(bm_constants._INTERESTING), help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-d', '--diff_base', type=str) - argp.add_argument('-r', '--repetitions', type=int, default=1) - argp.add_argument('-l', '--loops', type=int, default=20) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') + argp.add_argument('-d', '--diff_base', type=str, help='Commit or branch to compare the current one to') + argp.add_argument('-o', '--old', type=str, help='Name of baseline run to compare to. Ususally just called "old"') + argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') args = argp.parse_args() - assert args.diff_base + assert args.diff_base or args.old, "One of diff_base or old must be set!" + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." return args @@ -76,18 +79,21 @@ def main(args): bm_build.build('new', args.benchmarks, args.jobs) - where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() - subprocess.check_call(['git', 'checkout', args.diff_base]) - try: - bm_build.build('old', args.benchmarks, args.jobs) - finally: - subprocess.check_call(['git', 'checkout', where_am_i]) - subprocess.check_call(['git', 'submodule', 'update']) + old = args.old + if args.diff_base: + old = 'old' + where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + subprocess.check_call(['git', 'checkout', args.diff_base]) + try: + bm_build.build('old', args.benchmarks, args.jobs) + finally: + subprocess.check_call(['git', 'checkout', where_am_i]) + subprocess.check_call(['git', 'submodule', 'update']) bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) - bm_run.run('old', args.benchmarks, args.jobs, args.loops, args.repetitions) + bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - diff = bm_diff.diff(args.benchmarks, args.loops, args.track, 'old', 'new') + diff = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') if diff: text = 'Performance differences noted:\n' + diff else: diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 14b3718ecb3..b36e660f29f 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -44,12 +44,16 @@ import jobset def _args(): argp = argparse.ArgumentParser(description='Runs microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) - argp.add_argument('-n', '--name', type=str, help='Unique name of this build') - argp.add_argument('-r', '--repetitions', type=int, default=1) - argp.add_argument('-l', '--loops', type=int, default=20) - return argp.parse_args() + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Benchmarks to run') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') + argp.add_argument('-n', '--name', type=str, help='Unique name of the build to run. Needs to match the handle passed to bm_build.py') + argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') + args = argp.parse_args() + assert args.name + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." + return args def _collect_bm_data(bm, cfg, name, reps, idx, loops): cmd = ['bm_diff_%s/%s/%s' % (name, cfg, bm), @@ -73,5 +77,4 @@ def run(name, benchmarks, jobs, loops, reps): if __name__ == '__main__': args = _args() - assert args.name run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py index fb6622760b9..99f1a073f5d 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py @@ -44,7 +44,6 @@ def cmp(a, b): def speedup(new, old): if (len(set(new))) == 1 and new == old: return 0 s0, p0 = cmp(new, old) - print s0, p0 if math.isnan(p0): return 0 if s0 == 0: return 0 if p0 > _THRESHOLD: return 0 @@ -52,7 +51,6 @@ def speedup(new, old): pct = 1 while pct < 101: sp, pp = cmp(new, scale(old, 1 - pct/100.0)) - print sp, pp if sp > 0: break if pp > _THRESHOLD: break pct += 1 @@ -61,7 +59,6 @@ def speedup(new, old): pct = 1 while pct < 100000: sp, pp = cmp(new, scale(old, 1 + pct/100.0)) - print sp, pp if sp < 0: break if pp > _THRESHOLD: break pct += 1 From 738be24db4b1df10b90f40c8011a47ec78e4f1f5 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Tue, 9 May 2017 17:36:11 -0700 Subject: [PATCH 07/19] Py fmt --- .../microbenchmarks/bm_diff/bm_build.py | 65 ++++-- .../microbenchmarks/bm_diff/bm_constants.py | 33 +-- .../microbenchmarks/bm_diff/bm_diff.py | 219 ++++++++++-------- .../microbenchmarks/bm_diff/bm_main.py | 146 +++++++----- .../microbenchmarks/bm_diff/bm_run.py | 98 +++++--- .../microbenchmarks/bm_diff/bm_speedup.py | 59 ++--- 6 files changed, 366 insertions(+), 254 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index 83c3c695e77..3d1ccbae30b 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -38,33 +38,52 @@ import multiprocessing import os import shutil + def _args(): - argp = argparse.ArgumentParser(description='Builds microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to build') - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='How many CPUs to dedicate to this task') - argp.add_argument('-n', '--name', type=str, help='Unique name of this build. To be used as a handle to pass to the other bm* scripts') - args = argp.parse_args() - assert args.name - return args + argp = argparse.ArgumentParser(description='Builds microbenchmarks') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to build') + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='How many CPUs to dedicate to this task') + argp.add_argument( + '-n', + '--name', + type=str, + help='Unique name of this build. To be used as a handle to pass to the other bm* scripts' + ) + args = argp.parse_args() + assert args.name + return args + def _make_cmd(cfg, benchmarks, jobs): - return ['make'] + benchmarks + [ - 'CONFIG=%s' % cfg, '-j', '%d' % jobs] + return ['make'] + benchmarks + ['CONFIG=%s' % cfg, '-j', '%d' % jobs] -def build(name, benchmarks, jobs): - shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) - subprocess.check_call(['git', 'submodule', 'update']) - try: - subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) - subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) - except subprocess.CalledProcessError, e: - subprocess.check_call(['make', 'clean']) - subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) - subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) - os.rename('bins', 'bm_diff_%s' % name, ) -if __name__ == '__main__': - args = _args() - build(args.name, args.benchmarks, args.jobs) +def build(name, benchmarks, jobs): + shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) + subprocess.check_call(['git', 'submodule', 'update']) + try: + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) + except subprocess.CalledProcessError, e: + subprocess.check_call(['make', 'clean']) + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) + os.rename( + 'bins', + 'bm_diff_%s' % name,) +if __name__ == '__main__': + args = _args() + build(args.name, args.benchmarks, args.jobs) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py index ada1e32e72e..bcefdfb6fe6 100644 --- a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py @@ -30,27 +30,14 @@ ### Configurable constants for the bm_*.py family """ -_AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong', - 'bm_fullstack_streaming_ping_pong', - 'bm_fullstack_streaming_pump', - 'bm_closure', - 'bm_cq', - 'bm_call_create', - 'bm_error', - 'bm_chttp2_hpack', - 'bm_chttp2_transport', - 'bm_pollset', - 'bm_metadata', - 'bm_fullstack_trickle'] +_AVAILABLE_BENCHMARK_TESTS = [ + 'bm_fullstack_unary_ping_pong', 'bm_fullstack_streaming_ping_pong', + 'bm_fullstack_streaming_pump', 'bm_closure', 'bm_cq', 'bm_call_create', + 'bm_error', 'bm_chttp2_hpack', 'bm_chttp2_transport', 'bm_pollset', + 'bm_metadata', 'bm_fullstack_trickle' +] - -_INTERESTING = ( - 'cpu_time', - 'real_time', - 'locks_per_iteration', - 'allocs_per_iteration', - 'writes_per_iteration', - 'atm_cas_per_iteration', - 'atm_add_per_iteration', - 'nows_per_iteration', -) +_INTERESTING = ('cpu_time', 'real_time', 'locks_per_iteration', + 'allocs_per_iteration', 'writes_per_iteration', + 'atm_cas_per_iteration', 'atm_add_per_iteration', + 'nows_per_iteration',) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 7b1c7e28bf2..bc02b42bf20 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ Computes the diff between two bm runs and outputs significant results """ import bm_constants @@ -46,114 +45,138 @@ import collections verbose = False + def _median(ary): - ary = sorted(ary) - n = len(ary) - if n%2 == 0: - return (ary[n/2] + ary[n/2+1]) / 2.0 - else: - return ary[n/2] + ary = sorted(ary) + n = len(ary) + if n % 2 == 0: + return (ary[n / 2] + ary[n / 2 + 1]) / 2.0 + else: + return ary[n / 2] + def _args(): - argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks') - argp.add_argument('-t', '--track', - choices=sorted(bm_constants._INTERESTING), - nargs='+', - default=sorted(bm_constants._INTERESTING), - help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') - argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py') - argp.add_argument('-n', '--new', type=str, help='New benchmark name') - argp.add_argument('-o', '--old', type=str, help='Old benchmark name') - argp.add_argument('-v', '--verbose', type=bool, help='print details of before/after') - args = argp.parse_args() - global verbose - if args.verbose: verbose = True - assert args.new - assert args.old - return args + argp = argparse.ArgumentParser( + description='Perform diff on microbenchmarks') + argp.add_argument( + '-t', + '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to run') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py' + ) + argp.add_argument('-n', '--new', type=str, help='New benchmark name') + argp.add_argument('-o', '--old', type=str, help='Old benchmark name') + argp.add_argument( + '-v', '--verbose', type=bool, help='print details of before/after') + args = argp.parse_args() + global verbose + if args.verbose: verbose = True + assert args.new + assert args.old + return args + def _maybe_print(str): - if verbose: print str + if verbose: print str + class Benchmark: - def __init__(self): - self.samples = { - True: collections.defaultdict(list), - False: collections.defaultdict(list) - } - self.final = {} - - def add_sample(self, track, data, new): - for f in track: - if f in data: - self.samples[new][f].append(float(data[f])) - - def process(self, track, new_name, old_name): - for f in sorted(track): - new = self.samples[True][f] - old = self.samples[False][f] - if not new or not old: continue - mdn_diff = abs(_median(new) - _median(old)) - _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' % - (f, new_name, new, old_name, old, mdn_diff)) - s = bm_speedup.speedup(new, old) - if abs(s) > 3 and mdn_diff > 0.5: - self.final[f] = '%+d%%' % s - return self.final.keys() - - def skip(self): - return not self.final - - def row(self, flds): - return [self.final[f] if f in self.final else '' for f in flds] + def __init__(self): + self.samples = { + True: collections.defaultdict(list), + False: collections.defaultdict(list) + } + self.final = {} + + def add_sample(self, track, data, new): + for f in track: + if f in data: + self.samples[new][f].append(float(data[f])) + + def process(self, track, new_name, old_name): + for f in sorted(track): + new = self.samples[True][f] + old = self.samples[False][f] + if not new or not old: continue + mdn_diff = abs(_median(new) - _median(old)) + _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' % + (f, new_name, new, old_name, old, mdn_diff)) + s = bm_speedup.speedup(new, old) + if abs(s) > 3 and mdn_diff > 0.5: + self.final[f] = '%+d%%' % s + return self.final.keys() + + def skip(self): + return not self.final + + def row(self, flds): + return [self.final[f] if f in self.final else '' for f in flds] + def _read_json(filename): - try: - with open(filename) as f: return json.loads(f.read()) - except ValueError, e: - return None + try: + with open(filename) as f: + return json.loads(f.read()) + except ValueError, e: + return None -def diff(bms, loops, track, old, new): - benchmarks = collections.defaultdict(Benchmark) - - for bm in bms: - for loop in range(0, loops): - js_new_ctr = _read_json('%s.counters.%s.%d.json' % (bm, new, loop)) - js_new_opt = _read_json('%s.opt.%s.%d.json' % (bm, new, loop)) - js_old_ctr = _read_json('%s.counters.%s.%d.json' % (bm, old, loop)) - js_old_opt = _read_json('%s.opt.%s.%d.json' % (bm, old, loop)) - - if js_new_ctr: - for row in bm_json.expand_json(js_new_ctr, js_new_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): continue - benchmarks[name].add_sample(track, row, True) - if js_old_ctr: - for row in bm_json.expand_json(js_old_ctr, js_old_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): continue - benchmarks[name].add_sample(track, row, False) - - really_interesting = set() - for name, bm in benchmarks.items(): - _maybe_print(name) - really_interesting.update(bm.process(track, new, old)) - fields = [f for f in track if f in really_interesting] - - headers = ['Benchmark'] + fields - rows = [] - for name in sorted(benchmarks.keys()): - if benchmarks[name].skip(): continue - rows.append([name] + benchmarks[name].row(fields)) - if rows: - return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') - else: - return None -if __name__ == '__main__': - args = _args() - print diff(args.benchmarks, args.loops, args.track, args.old, args.new) +def diff(bms, loops, track, old, new): + benchmarks = collections.defaultdict(Benchmark) + + for bm in bms: + for loop in range(0, loops): + js_new_ctr = _read_json('%s.counters.%s.%d.json' % (bm, new, loop)) + js_new_opt = _read_json('%s.opt.%s.%d.json' % (bm, new, loop)) + js_old_ctr = _read_json('%s.counters.%s.%d.json' % (bm, old, loop)) + js_old_opt = _read_json('%s.opt.%s.%d.json' % (bm, old, loop)) + + if js_new_ctr: + for row in bm_json.expand_json(js_new_ctr, js_new_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, True) + if js_old_ctr: + for row in bm_json.expand_json(js_old_ctr, js_old_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, False) + + really_interesting = set() + for name, bm in benchmarks.items(): + _maybe_print(name) + really_interesting.update(bm.process(track, new, old)) + fields = [f for f in track if f in really_interesting] + + headers = ['Benchmark'] + fields + rows = [] + for name in sorted(benchmarks.keys()): + if benchmarks[name].skip(): continue + rows.append([name] + benchmarks[name].row(fields)) + if rows: + return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') + else: + return None +if __name__ == '__main__': + args = _args() + print diff(args.benchmarks, args.loops, args.track, args.old, args.new) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 82b0a10e07c..812c671873d 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ Runs the entire bm_*.py pipeline, and possible comments on the PR """ import bm_constants @@ -41,66 +40,107 @@ import argparse import multiprocessing import subprocess -sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) +sys.path.append( + os.path.join( + os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) import comment_on_pr + def _args(): - argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks') - argp.add_argument('-t', '--track', - choices=sorted(bm_constants._INTERESTING), - nargs='+', - default=sorted(bm_constants._INTERESTING), - help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') - argp.add_argument('-d', '--diff_base', type=str, help='Commit or branch to compare the current one to') - argp.add_argument('-o', '--old', type=str, help='Name of baseline run to compare to. Ususally just called "old"') - argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') - argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') - args = argp.parse_args() - assert args.diff_base or args.old, "One of diff_base or old must be set!" - if args.loops < 3: - print "WARNING: This run will likely be noisy. Increase loops." - return args + argp = argparse.ArgumentParser( + description='Perform diff on microbenchmarks') + argp.add_argument( + '-t', + '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to run') + argp.add_argument( + '-d', + '--diff_base', + type=str, + help='Commit or branch to compare the current one to') + argp.add_argument( + '-o', + '--old', + type=str, + help='Name of baseline run to compare to. Ususally just called "old"') + argp.add_argument( + '-r', + '--repetitions', + type=int, + default=1, + help='Number of repetitions to pass to the benchmarks') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. More loops cuts down on noise' + ) + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='Number of CPUs to use') + args = argp.parse_args() + assert args.diff_base or args.old, "One of diff_base or old must be set!" + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." + return args def eintr_be_gone(fn): - """Run fn until it doesn't stop because of EINTR""" - def inner(*args): - while True: - try: - return fn(*args) - except IOError, e: - if e.errno != errno.EINTR: - raise - return inner + """Run fn until it doesn't stop because of EINTR""" + + def inner(*args): + while True: + try: + return fn(*args) + except IOError, e: + if e.errno != errno.EINTR: + raise + + return inner + def main(args): - bm_build.build('new', args.benchmarks, args.jobs) - - old = args.old - if args.diff_base: - old = 'old' - where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() - subprocess.check_call(['git', 'checkout', args.diff_base]) - try: - bm_build.build('old', args.benchmarks, args.jobs) - finally: - subprocess.check_call(['git', 'checkout', where_am_i]) - subprocess.check_call(['git', 'submodule', 'update']) - - bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) - bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - - diff = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') - if diff: - text = 'Performance differences noted:\n' + diff - else: - text = 'No significant performance differences' - print text - comment_on_pr.comment_on_pr('```\n%s\n```' % text) + bm_build.build('new', args.benchmarks, args.jobs) + + old = args.old + if args.diff_base: + old = 'old' + where_am_i = subprocess.check_output( + ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + subprocess.check_call(['git', 'checkout', args.diff_base]) + try: + bm_build.build('old', args.benchmarks, args.jobs) + finally: + subprocess.check_call(['git', 'checkout', where_am_i]) + subprocess.check_call(['git', 'submodule', 'update']) + + bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) + bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) + + diff = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') + if diff: + text = 'Performance differences noted:\n' + diff + else: + text = 'No significant performance differences' + print text + comment_on_pr.comment_on_pr('```\n%s\n```' % text) + if __name__ == '__main__': - args = _args() - main(args) + args = _args() + main(args) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index b36e660f29f..d52617ce2ff 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -39,42 +39,82 @@ import itertools import sys import os -sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests', 'python_utils')) +sys.path.append( + os.path.join( + os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests', + 'python_utils')) import jobset + def _args(): - argp = argparse.ArgumentParser(description='Runs microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Benchmarks to run') - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') - argp.add_argument('-n', '--name', type=str, help='Unique name of the build to run. Needs to match the handle passed to bm_build.py') - argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') - argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') - args = argp.parse_args() - assert args.name - if args.loops < 3: - print "WARNING: This run will likely be noisy. Increase loops." - return args + argp = argparse.ArgumentParser(description='Runs microbenchmarks') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Benchmarks to run') + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='Number of CPUs to use') + argp.add_argument( + '-n', + '--name', + type=str, + help='Unique name of the build to run. Needs to match the handle passed to bm_build.py' + ) + argp.add_argument( + '-r', + '--repetitions', + type=int, + default=1, + help='Number of repetitions to pass to the benchmarks') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. More loops cuts down on noise' + ) + args = argp.parse_args() + assert args.name + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." + return args + def _collect_bm_data(bm, cfg, name, reps, idx, loops): - cmd = ['bm_diff_%s/%s/%s' % (name, cfg, bm), - '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, name, idx), - '--benchmark_out_format=json', - '--benchmark_repetitions=%d' % (reps) - ] - return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, name, idx+1, loops), - verbose_success=True, timeout_seconds=None) + cmd = [ + 'bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, name, idx), + '--benchmark_out_format=json', '--benchmark_repetitions=%d' % (reps) + ] + return jobset.JobSpec( + cmd, + shortname='%s %s %s %d/%d' % (bm, cfg, name, idx + 1, loops), + verbose_success=True, + timeout_seconds=None) + def run(name, benchmarks, jobs, loops, reps): - jobs_list = [] - for loop in range(0, loops): - jobs_list.extend(x for x in itertools.chain( - (_collect_bm_data(bm, 'opt', name, reps, loop, loops) for bm in benchmarks), - (_collect_bm_data(bm, 'counters', name, reps, loop, loops) for bm in benchmarks), - )) - random.shuffle(jobs_list, random.SystemRandom().random) + jobs_list = [] + for loop in range(0, loops): + jobs_list.extend( + x + for x in itertools.chain( + (_collect_bm_data(bm, 'opt', name, reps, loop, loops) + for bm in benchmarks), + (_collect_bm_data(bm, 'counters', name, reps, loop, loops) + for bm in benchmarks),)) + random.shuffle(jobs_list, random.SystemRandom().random) + + jobset.run(jobs_list, maxjobs=jobs) - jobset.run(jobs_list, maxjobs=jobs) if __name__ == '__main__': - args = _args() - run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) + args = _args() + run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py index 99f1a073f5d..63f07aea38c 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ The math behind the diff functionality """ from scipy import stats @@ -35,37 +34,41 @@ import math _THRESHOLD = 1e-10 + def scale(a, mul): - return [x*mul for x in a] + return [x * mul for x in a] + def cmp(a, b): - return stats.ttest_ind(a, b) + return stats.ttest_ind(a, b) + def speedup(new, old): - if (len(set(new))) == 1 and new == old: return 0 - s0, p0 = cmp(new, old) - if math.isnan(p0): return 0 - if s0 == 0: return 0 - if p0 > _THRESHOLD: return 0 - if s0 < 0: - pct = 1 - while pct < 101: - sp, pp = cmp(new, scale(old, 1 - pct/100.0)) - if sp > 0: break - if pp > _THRESHOLD: break - pct += 1 - return -(pct - 1) - else: - pct = 1 - while pct < 100000: - sp, pp = cmp(new, scale(old, 1 + pct/100.0)) - if sp < 0: break - if pp > _THRESHOLD: break - pct += 1 - return pct - 1 + if (len(set(new))) == 1 and new == old: return 0 + s0, p0 = cmp(new, old) + if math.isnan(p0): return 0 + if s0 == 0: return 0 + if p0 > _THRESHOLD: return 0 + if s0 < 0: + pct = 1 + while pct < 101: + sp, pp = cmp(new, scale(old, 1 - pct / 100.0)) + if sp > 0: break + if pp > _THRESHOLD: break + pct += 1 + return -(pct - 1) + else: + pct = 1 + while pct < 100000: + sp, pp = cmp(new, scale(old, 1 + pct / 100.0)) + if sp < 0: break + if pp > _THRESHOLD: break + pct += 1 + return pct - 1 + if __name__ == "__main__": - new=[1.0, 1.0, 1.0, 1.0] - old=[2.0, 2.0, 2.0, 2.0] - print speedup(new, old) - print speedup(old, new) + new = [1.0, 1.0, 1.0, 1.0] + old = [2.0, 2.0, 2.0, 2.0] + print speedup(new, old) + print speedup(old, new) From d21040bf1ef56a96808eebe4961039e726fbc80c Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 11 May 2017 17:54:52 -0700 Subject: [PATCH 08/19] Update readme --- .../microbenchmarks/bm_diff/README.md | 29 ++++++++++++++----- .../microbenchmarks/bm_diff/bm_main.py | 1 + 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/README.md b/tools/profiling/microbenchmarks/bm_diff/README.md index e1c728ffef3..3d01ea25ba9 100644 --- a/tools/profiling/microbenchmarks/bm_diff/README.md +++ b/tools/profiling/microbenchmarks/bm_diff/README.md @@ -19,14 +19,29 @@ made some significant changes and want to see some data. From your branch, run `tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master` -This will build the `bm_error` binary on your branch and master. It will then -run these benchmarks 5 times each. Lastly it will compute the statistically -significant performance differences between the two branches. This should show -the nice performance wins your changes have made. +This will build the `bm_error` binary on your branch, and then it will checkout +master and build it there too. It will then run these benchmarks 5 times each. +Lastly it will compute the statistically significant performance differences +between the two branches. This should show the nice performance wins your +changes have made. If you have already invoked bm_main with `-d master`, you should instead use -`-o old` for subsequent runs. This allows the script to skip re-building and -re-running the unchanged master branch. +`-o` for subsequent runs. This allows the script to skip re-building and +re-running the unchanged master branch. For example: + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o` + +This will only build and run `bm_error` on your branch. It will then compare +the output to the saved runs from master. + +## Advanced Workflow + +If you have a deeper knowledge of these scripts, you can use them to do more +fine tuned benchmark comparisons. For example, you could build, run, and save +the benchmark output from two different base branches. Then you could diff both +of these baselines against you working branch to see how the different metrics +change. The rest of this doc goes over the details of what each of the +individual modules accomplishes. ## bm_build.py @@ -55,7 +70,7 @@ For example, if you were to run: `tools/profiling/microbenchmarks/bm_diff/bm_run.py -b bm_error -b baseline -l 5` -Then an example output file would be `bm_error.opt.baseline.1.json` +Then an example output file would be `bm_error.opt.baseline.0.json` ## bm_diff.py diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 812c671873d..5be9aca411f 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -71,6 +71,7 @@ def _args(): argp.add_argument( '-o', '--old', + default='old', type=str, help='Name of baseline run to compare to. Ususally just called "old"') argp.add_argument( From bea7c1954c922387d405d8ffc9469ae7171753ec Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 10:33:42 -0700 Subject: [PATCH 09/19] Split bm runs by individual bm --- .../microbenchmarks/bm_diff/bm_diff.py | 38 ++++++++++--------- .../microbenchmarks/bm_diff/bm_run.py | 38 ++++++++++--------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index bc02b42bf20..77c0015ba18 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -42,6 +42,7 @@ import json import tabulate import argparse import collections +import subprocess verbose = False @@ -142,23 +143,26 @@ def diff(bms, loops, track, old, new): for bm in bms: for loop in range(0, loops): - js_new_ctr = _read_json('%s.counters.%s.%d.json' % (bm, new, loop)) - js_new_opt = _read_json('%s.opt.%s.%d.json' % (bm, new, loop)) - js_old_ctr = _read_json('%s.counters.%s.%d.json' % (bm, old, loop)) - js_old_opt = _read_json('%s.opt.%s.%d.json' % (bm, old, loop)) - - if js_new_ctr: - for row in bm_json.expand_json(js_new_ctr, js_new_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): - continue - benchmarks[name].add_sample(track, row, True) - if js_old_ctr: - for row in bm_json.expand_json(js_old_ctr, js_old_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): - continue - benchmarks[name].add_sample(track, row, False) + for line in subprocess.check_output(['bm_diff_%s/opt/%s' % (old, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/","_").replace("<","_").replace(">","_") + js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop)) + js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop)) + js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop)) + js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop)) + + if js_new_ctr: + for row in bm_json.expand_json(js_new_ctr, js_new_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, True) + if js_old_ctr: + for row in bm_json.expand_json(js_old_ctr, js_old_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, False) really_interesting = set() for name, bm in benchmarks.items(): diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index d52617ce2ff..b382b7b3771 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -33,6 +33,7 @@ import bm_constants import argparse +import subprocess import multiprocessing import random import itertools @@ -88,30 +89,31 @@ def _args(): def _collect_bm_data(bm, cfg, name, reps, idx, loops): - cmd = [ - 'bm_diff_%s/%s/%s' % (name, cfg, bm), - '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, name, idx), - '--benchmark_out_format=json', '--benchmark_repetitions=%d' % (reps) - ] - return jobset.JobSpec( - cmd, - shortname='%s %s %s %d/%d' % (bm, cfg, name, idx + 1, loops), - verbose_success=True, - timeout_seconds=None) + jobs_list = [] + for line in subprocess.check_output(['bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/","_").replace("<","_").replace(">","_") + cmd = [ + 'bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_filter=^%s$' % line, + '--benchmark_out=%s.%s.%s.%s.%d.json' % (bm, stripped_line, cfg, name, idx), + '--benchmark_out_format=json', '--benchmark_repetitions=%d' % (reps) + ] + jobs_list.append(jobset.JobSpec( + cmd, + shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, loops), + verbose_success=True, + timeout_seconds=None)) + return jobs_list def run(name, benchmarks, jobs, loops, reps): jobs_list = [] for loop in range(0, loops): - jobs_list.extend( - x - for x in itertools.chain( - (_collect_bm_data(bm, 'opt', name, reps, loop, loops) - for bm in benchmarks), - (_collect_bm_data(bm, 'counters', name, reps, loop, loops) - for bm in benchmarks),)) + for bm in benchmarks: + jobs_list += _collect_bm_data(bm, 'opt', name, reps, loop, loops) + jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop, loops) random.shuffle(jobs_list, random.SystemRandom().random) - jobset.run(jobs_list, maxjobs=jobs) From 3992a3a4990be46cfaf1a6ace1c57d3e3b4fc211 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 10:36:05 -0700 Subject: [PATCH 10/19] Yapf fmt code --- .../microbenchmarks/bm_diff/bm_diff.py | 20 +++++++----- .../microbenchmarks/bm_diff/bm_run.py | 31 +++++++++++-------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 77c0015ba18..796ddac7147 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -143,13 +143,19 @@ def diff(bms, loops, track, old, new): for bm in bms: for loop in range(0, loops): - for line in subprocess.check_output(['bm_diff_%s/opt/%s' % (old, bm), - '--benchmark_list_tests']).splitlines(): - stripped_line = line.strip().replace("/","_").replace("<","_").replace(">","_") - js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop)) - js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop)) - js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop)) - js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop)) + for line in subprocess.check_output( + ['bm_diff_%s/opt/%s' % (old, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/", "_").replace( + "<", "_").replace(">", "_") + js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % + (bm, stripped_line, new, loop)) + js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % + (bm, stripped_line, new, loop)) + js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % + (bm, stripped_line, old, loop)) + js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % + (bm, stripped_line, old, loop)) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index b382b7b3771..9873df04121 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -90,20 +90,24 @@ def _args(): def _collect_bm_data(bm, cfg, name, reps, idx, loops): jobs_list = [] - for line in subprocess.check_output(['bm_diff_%s/%s/%s' % (name, cfg, bm), - '--benchmark_list_tests']).splitlines(): - stripped_line = line.strip().replace("/","_").replace("<","_").replace(">","_") + for line in subprocess.check_output( + ['bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/", "_").replace( + "<", "_").replace(">", "_") cmd = [ - 'bm_diff_%s/%s/%s' % (name, cfg, bm), - '--benchmark_filter=^%s$' % line, - '--benchmark_out=%s.%s.%s.%s.%d.json' % (bm, stripped_line, cfg, name, idx), - '--benchmark_out_format=json', '--benchmark_repetitions=%d' % (reps) + 'bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_filter=^%s$' % + line, '--benchmark_out=%s.%s.%s.%s.%d.json' % + (bm, stripped_line, cfg, name, idx), '--benchmark_out_format=json', + '--benchmark_repetitions=%d' % (reps) ] - jobs_list.append(jobset.JobSpec( - cmd, - shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, loops), - verbose_success=True, - timeout_seconds=None)) + jobs_list.append( + jobset.JobSpec( + cmd, + shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, + loops), + verbose_success=True, + timeout_seconds=None)) return jobs_list @@ -112,7 +116,8 @@ def run(name, benchmarks, jobs, loops, reps): for loop in range(0, loops): for bm in benchmarks: jobs_list += _collect_bm_data(bm, 'opt', name, reps, loop, loops) - jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop, loops) + jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop, + loops) random.shuffle(jobs_list, random.SystemRandom().random) jobset.run(jobs_list, maxjobs=jobs) From 3f05c21f55aabaa347566aee7c0e091b36d26c9a Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 12:43:58 -0700 Subject: [PATCH 11/19] Add timeout and retries to fix flakes --- tools/profiling/microbenchmarks/bm_diff/bm_diff.py | 2 +- tools/profiling/microbenchmarks/bm_diff/bm_run.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 796ddac7147..c389d03adb4 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -129,9 +129,9 @@ class Benchmark: def row(self, flds): return [self.final[f] if f in self.final else '' for f in flds] - def _read_json(filename): try: + with open(filename) as f: return json.loads(f.read()) except ValueError, e: diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 9873df04121..59429299853 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -107,7 +107,8 @@ def _collect_bm_data(bm, cfg, name, reps, idx, loops): shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, loops), verbose_success=True, - timeout_seconds=None)) + timeout_seconds=60*10, + timeout_retries=3)) return jobs_list From a278759ec9e9d913afa84b0e50544571c4e664ff Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 15:32:08 -0700 Subject: [PATCH 12/19] Add flakiness data to PR comment --- .../microbenchmarks/bm_diff/bm_diff.py | 27 ++++++++++++------- .../microbenchmarks/bm_diff/bm_main.py | 4 +-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index c389d03adb4..97fb6b9310c 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -129,18 +129,23 @@ class Benchmark: def row(self, flds): return [self.final[f] if f in self.final else '' for f in flds] -def _read_json(filename): +def _read_json(filename, badfiles): + stripped = ".".join(filename.split(".")[:-2]) try: - with open(filename) as f: return json.loads(f.read()) except ValueError, e: + if stripped in badfiles: + badfiles[stripped] += 1 + else: + badfiles[stripped] = 1 return None def diff(bms, loops, track, old, new): benchmarks = collections.defaultdict(Benchmark) + badfiles = {} for bm in bms: for loop in range(0, loops): for line in subprocess.check_output( @@ -149,13 +154,13 @@ def diff(bms, loops, track, old, new): stripped_line = line.strip().replace("/", "_").replace( "<", "_").replace(">", "_") js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, new, loop)) + (bm, stripped_line, new, loop), badfiles) js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, new, loop)) + (bm, stripped_line, new, loop), badfiles) js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, old, loop)) + (bm, stripped_line, old, loop), badfiles) js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, old, loop)) + (bm, stripped_line, old, loop), badfiles) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): @@ -181,12 +186,16 @@ def diff(bms, loops, track, old, new): for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) + note += 'flakiness data = %s' % str(badfiles) if rows: - return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f') + return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note else: - return None + return None, note if __name__ == '__main__': args = _args() - print diff(args.benchmarks, args.loops, args.track, args.old, args.new) + diff, note = diff(args.benchmarks, args.loops, args.track, args.old, args.new) + print note + print "" + print diff diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 5be9aca411f..f7ef700de14 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -133,13 +133,13 @@ def main(args): bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - diff = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') + diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') if diff: text = 'Performance differences noted:\n' + diff else: text = 'No significant performance differences' print text - comment_on_pr.comment_on_pr('```\n%s\n```' % text) + comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text)) if __name__ == '__main__': From bf1ee7b12f4a3517e00e1e813ceaafa1ce41c6de Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 16:18:25 -0700 Subject: [PATCH 13/19] More filename stripping --- tools/profiling/microbenchmarks/bm_diff/bm_diff.py | 2 +- tools/profiling/microbenchmarks/bm_diff/bm_run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 97fb6b9310c..82b39874dde 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -152,7 +152,7 @@ def diff(bms, loops, track, old, new): ['bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests']).splitlines(): stripped_line = line.strip().replace("/", "_").replace( - "<", "_").replace(">", "_") + "<", "_").replace(">", "_").replace(", ", "_") js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop), badfiles) js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 59429299853..b9cce3ae5a3 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -94,7 +94,7 @@ def _collect_bm_data(bm, cfg, name, reps, idx, loops): ['bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_list_tests']).splitlines(): stripped_line = line.strip().replace("/", "_").replace( - "<", "_").replace(">", "_") + "<", "_").replace(">", "_").replace(", ", "_") cmd = [ 'bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_filter=^%s$' % line, '--benchmark_out=%s.%s.%s.%s.%d.json' % From 2b96949d3d07f5a3a4c3fb5be7f597da63a28c13 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 1 Jun 2017 16:19:49 -0700 Subject: [PATCH 14/19] Yapf fmt --- .../microbenchmarks/bm_diff/bm_diff.py | 18 ++++++++++++------ .../microbenchmarks/bm_diff/bm_main.py | 3 ++- .../microbenchmarks/bm_diff/bm_run.py | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 82b39874dde..b049f41ca0c 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -129,6 +129,7 @@ class Benchmark: def row(self, flds): return [self.final[f] if f in self.final else '' for f in flds] + def _read_json(filename, badfiles): stripped = ".".join(filename.split(".")[:-2]) try: @@ -154,13 +155,17 @@ def diff(bms, loops, track, old, new): stripped_line = line.strip().replace("/", "_").replace( "<", "_").replace(">", "_").replace(", ", "_") js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, new, loop), badfiles) + (bm, stripped_line, new, loop), + badfiles) js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, new, loop), badfiles) + (bm, stripped_line, new, loop), + badfiles) js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, old, loop), badfiles) + (bm, stripped_line, old, loop), + badfiles) js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, old, loop), badfiles) + (bm, stripped_line, old, loop), + badfiles) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): @@ -186,7 +191,7 @@ def diff(bms, loops, track, old, new): for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) - note += 'flakiness data = %s' % str(badfiles) + note = 'flakiness data = %s' % str(badfiles) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note else: @@ -195,7 +200,8 @@ def diff(bms, loops, track, old, new): if __name__ == '__main__': args = _args() - diff, note = diff(args.benchmarks, args.loops, args.track, args.old, args.new) + diff, note = diff(args.benchmarks, args.loops, args.track, args.old, + args.new) print note print "" print diff diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index f7ef700de14..4c6eb8b48c1 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -133,7 +133,8 @@ def main(args): bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') + diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old, + 'new') if diff: text = 'Performance differences noted:\n' + diff else: diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index b9cce3ae5a3..e281e9e61c0 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -107,7 +107,7 @@ def _collect_bm_data(bm, cfg, name, reps, idx, loops): shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, loops), verbose_success=True, - timeout_seconds=60*10, + timeout_seconds=60 * 10, timeout_retries=3)) return jobs_list From f1e19fdd3118d36316223b6720f08e3aadcc2b4a Mon Sep 17 00:00:00 2001 From: ncteisen Date: Wed, 7 Jun 2017 11:50:04 -0700 Subject: [PATCH 15/19] Fix typos in README --- tools/profiling/microbenchmarks/bm_diff/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/README.md b/tools/profiling/microbenchmarks/bm_diff/README.md index 3d01ea25ba9..caa47702299 100644 --- a/tools/profiling/microbenchmarks/bm_diff/README.md +++ b/tools/profiling/microbenchmarks/bm_diff/README.md @@ -6,7 +6,7 @@ different performance tweaks. The tools allow you to save performance data from a baseline commit, then quickly compare data from your working branch to that baseline data to see if you have made any performance wins. -The tools operates with three concrete steps, which can be invoked separately, +The tools operate with three concrete steps, which can be invoked separately, or all together via the driver script, bm_main.py. This readme will describe the typical workflow for these scripts, then it will include sections on the details of every script for advanced usage. @@ -39,7 +39,7 @@ the output to the saved runs from master. If you have a deeper knowledge of these scripts, you can use them to do more fine tuned benchmark comparisons. For example, you could build, run, and save the benchmark output from two different base branches. Then you could diff both -of these baselines against you working branch to see how the different metrics +of these baselines against your working branch to see how the different metrics change. The rest of this doc goes over the details of what each of the individual modules accomplishes. From 7cd7b7fc433186f6b1f831ca11d015d28b3f7fac Mon Sep 17 00:00:00 2001 From: ncteisen Date: Wed, 7 Jun 2017 15:01:56 -0700 Subject: [PATCH 16/19] Differentiate between timeouts and crashes --- .../microbenchmarks/bm_diff/bm_diff.py | 32 ++++++++++++------- .../microbenchmarks/bm_diff/bm_main.py | 2 ++ .../microbenchmarks/bm_diff/bm_run.py | 3 +- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index b049f41ca0c..72a8d11eea9 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -51,7 +51,7 @@ def _median(ary): ary = sorted(ary) n = len(ary) if n % 2 == 0: - return (ary[n / 2] + ary[n / 2 + 1]) / 2.0 + return (ary[(n - 1) / 2] + ary[(n - 1) / 2 + 1]) / 2.0 else: return ary[n / 2] @@ -130,23 +130,30 @@ class Benchmark: return [self.final[f] if f in self.final else '' for f in flds] -def _read_json(filename, badfiles): +def _read_json(filename, badjson_files, nonexistant_files): stripped = ".".join(filename.split(".")[:-2]) try: with open(filename) as f: return json.loads(f.read()) + except IOError, e: + if stripped in nonexistant_files: + nonexistant_files[stripped] += 1 + else: + nonexistant_files[stripped] = 1 + return None except ValueError, e: - if stripped in badfiles: - badfiles[stripped] += 1 + if stripped in badjson_files: + badjson_files[stripped] += 1 else: - badfiles[stripped] = 1 + badjson_files[stripped] = 1 return None def diff(bms, loops, track, old, new): benchmarks = collections.defaultdict(Benchmark) - badfiles = {} + badjson_files = {} + nonexistant_files = {} for bm in bms: for loop in range(0, loops): for line in subprocess.check_output( @@ -156,16 +163,16 @@ def diff(bms, loops, track, old, new): "<", "_").replace(">", "_").replace(", ", "_") js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, new, loop), - badfiles) + badjson_files, nonexistant_files) js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop), - badfiles) + badjson_files, nonexistant_files) js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % (bm, stripped_line, old, loop), - badfiles) + badjson_files, nonexistant_files) js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop), - badfiles) + badjson_files, nonexistant_files) if js_new_ctr: for row in bm_json.expand_json(js_new_ctr, js_new_opt): @@ -191,7 +198,8 @@ def diff(bms, loops, track, old, new): for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) - note = 'flakiness data = %s' % str(badfiles) + note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str(badjson_files) + note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note else: @@ -204,4 +212,4 @@ if __name__ == '__main__': args.new) print note print "" - print diff + print diff if diff else "No performance differences" diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 4c6eb8b48c1..47381f4ec8d 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -139,6 +139,8 @@ def main(args): text = 'Performance differences noted:\n' + diff else: text = 'No significant performance differences' + print note + print "" print text comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text)) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index e281e9e61c0..6ad9f1a3b70 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -107,8 +107,7 @@ def _collect_bm_data(bm, cfg, name, reps, idx, loops): shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, loops), verbose_success=True, - timeout_seconds=60 * 10, - timeout_retries=3)) + timeout_seconds=60 * 2)) return jobs_list From 0ac47d28cdc597804e50ac83ae89308aab250f2f Mon Sep 17 00:00:00 2001 From: ncteisen Date: Wed, 7 Jun 2017 15:19:24 -0700 Subject: [PATCH 17/19] Github comments --- tools/profiling/microbenchmarks/bm_diff/bm_build.py | 2 +- tools/profiling/microbenchmarks/bm_diff/bm_constants.py | 2 +- tools/profiling/microbenchmarks/bm_diff/bm_diff.py | 7 +++---- tools/profiling/microbenchmarks/bm_diff/bm_main.py | 4 +--- tools/profiling/microbenchmarks/bm_diff/bm_run.py | 4 ++-- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index 3d1ccbae30b..bc0310fbdd6 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -28,7 +28,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -### Python utility to build opt and counters benchmarks """ +""" Python utility to build opt and counters benchmarks """ import bm_constants diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py index bcefdfb6fe6..7d2781ea159 100644 --- a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py @@ -28,7 +28,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -### Configurable constants for the bm_*.py family """ +""" Configurable constants for the bm_*.py family """ _AVAILABLE_BENCHMARK_TESTS = [ 'bm_fullstack_unary_ping_pong', 'bm_fullstack_streaming_ping_pong', diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 72a8d11eea9..82791256972 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -48,6 +48,7 @@ verbose = False def _median(ary): + assert(len(ary)) ary = sorted(ary) n = len(ary) if n % 2 == 0: @@ -83,7 +84,7 @@ def _args(): argp.add_argument('-n', '--new', type=str, help='New benchmark name') argp.add_argument('-o', '--old', type=str, help='Old benchmark name') argp.add_argument( - '-v', '--verbose', type=bool, help='print details of before/after') + '-v', '--verbose', type=bool, help='Print details of before/after') args = argp.parse_args() global verbose if args.verbose: verbose = True @@ -210,6 +211,4 @@ if __name__ == '__main__': args = _args() diff, note = diff(args.benchmarks, args.loops, args.track, args.old, args.new) - print note - print "" - print diff if diff else "No performance differences" + print('%s\n%s' % (note, diff if diff else "No performance differences")) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 47381f4ec8d..3879848d10e 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -139,9 +139,7 @@ def main(args): text = 'Performance differences noted:\n' + diff else: text = 'No significant performance differences' - print note - print "" - print text + print('%s\n%s' % (note, text)) comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text)) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 6ad9f1a3b70..1b3d664d278 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -28,7 +28,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -### Python utility to run opt and counters benchmarks and save json output """ +""" Python utility to run opt and counters benchmarks and save json output """ import bm_constants @@ -84,7 +84,7 @@ def _args(): args = argp.parse_args() assert args.name if args.loops < 3: - print "WARNING: This run will likely be noisy. Increase loops." + print "WARNING: This run will likely be noisy. Increase loops to at least 3." return args From 251b025b89bab55fc8db992436494ce6914241ab Mon Sep 17 00:00:00 2001 From: ncteisen Date: Wed, 7 Jun 2017 15:21:15 -0700 Subject: [PATCH 18/19] Yapf code --- tools/profiling/microbenchmarks/bm_diff/bm_build.py | 1 - tools/profiling/microbenchmarks/bm_diff/bm_constants.py | 1 - tools/profiling/microbenchmarks/bm_diff/bm_diff.py | 7 ++++--- tools/profiling/microbenchmarks/bm_diff/bm_run.py | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index bc0310fbdd6..4edfe463bdc 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ Python utility to build opt and counters benchmarks """ import bm_constants diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py index 7d2781ea159..616dd6cdf7e 100644 --- a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ Configurable constants for the bm_*.py family """ _AVAILABLE_BENCHMARK_TESTS = [ diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 82791256972..47cd35c2188 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -48,7 +48,7 @@ verbose = False def _median(ary): - assert(len(ary)) + assert (len(ary)) ary = sorted(ary) n = len(ary) if n % 2 == 0: @@ -141,7 +141,7 @@ def _read_json(filename, badjson_files, nonexistant_files): nonexistant_files[stripped] += 1 else: nonexistant_files[stripped] = 1 - return None + return None except ValueError, e: if stripped in badjson_files: badjson_files[stripped] += 1 @@ -199,7 +199,8 @@ def diff(bms, loops, track, old, new): for name in sorted(benchmarks.keys()): if benchmarks[name].skip(): continue rows.append([name] + benchmarks[name].row(fields)) - note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str(badjson_files) + note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str( + badjson_files) note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files) if rows: return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 1b3d664d278..0c2e7e36f6a 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -27,7 +27,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ Python utility to run opt and counters benchmarks and save json output """ import bm_constants From 07639167fcdf78c5c8c6fce366a78e9eab509842 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Thu, 8 Jun 2017 08:08:25 -0700 Subject: [PATCH 19/19] 2 space indentation --- .../microbenchmarks/bm_diff/bm_build.py | 76 ++--- .../microbenchmarks/bm_diff/bm_constants.py | 14 +- .../microbenchmarks/bm_diff/bm_diff.py | 292 +++++++++--------- .../microbenchmarks/bm_diff/bm_main.py | 178 +++++------ .../microbenchmarks/bm_diff/bm_run.py | 142 ++++----- .../microbenchmarks/bm_diff/bm_speedup.py | 54 ++-- 6 files changed, 378 insertions(+), 378 deletions(-) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index 4edfe463bdc..644d12b83ec 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -39,50 +39,50 @@ import shutil def _args(): - argp = argparse.ArgumentParser(description='Builds microbenchmarks') - argp.add_argument( - '-b', - '--benchmarks', - nargs='+', - choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, - default=bm_constants._AVAILABLE_BENCHMARK_TESTS, - help='Which benchmarks to build') - argp.add_argument( - '-j', - '--jobs', - type=int, - default=multiprocessing.cpu_count(), - help='How many CPUs to dedicate to this task') - argp.add_argument( - '-n', - '--name', - type=str, - help='Unique name of this build. To be used as a handle to pass to the other bm* scripts' - ) - args = argp.parse_args() - assert args.name - return args + argp = argparse.ArgumentParser(description='Builds microbenchmarks') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to build') + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='How many CPUs to dedicate to this task') + argp.add_argument( + '-n', + '--name', + type=str, + help='Unique name of this build. To be used as a handle to pass to the other bm* scripts' + ) + args = argp.parse_args() + assert args.name + return args def _make_cmd(cfg, benchmarks, jobs): - return ['make'] + benchmarks + ['CONFIG=%s' % cfg, '-j', '%d' % jobs] + return ['make'] + benchmarks + ['CONFIG=%s' % cfg, '-j', '%d' % jobs] def build(name, benchmarks, jobs): - shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) - subprocess.check_call(['git', 'submodule', 'update']) - try: - subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) - subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) - except subprocess.CalledProcessError, e: - subprocess.check_call(['make', 'clean']) - subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) - subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) - os.rename( - 'bins', - 'bm_diff_%s' % name,) + shutil.rmtree('bm_diff_%s' % name, ignore_errors=True) + subprocess.check_call(['git', 'submodule', 'update']) + try: + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) + except subprocess.CalledProcessError, e: + subprocess.check_call(['make', 'clean']) + subprocess.check_call(_make_cmd('opt', benchmarks, jobs)) + subprocess.check_call(_make_cmd('counters', benchmarks, jobs)) + os.rename( + 'bins', + 'bm_diff_%s' % name,) if __name__ == '__main__': - args = _args() - build(args.name, args.benchmarks, args.jobs) + args = _args() + build(args.name, args.benchmarks, args.jobs) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py index 616dd6cdf7e..83dcbecce49 100644 --- a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py @@ -30,13 +30,13 @@ """ Configurable constants for the bm_*.py family """ _AVAILABLE_BENCHMARK_TESTS = [ - 'bm_fullstack_unary_ping_pong', 'bm_fullstack_streaming_ping_pong', - 'bm_fullstack_streaming_pump', 'bm_closure', 'bm_cq', 'bm_call_create', - 'bm_error', 'bm_chttp2_hpack', 'bm_chttp2_transport', 'bm_pollset', - 'bm_metadata', 'bm_fullstack_trickle' + 'bm_fullstack_unary_ping_pong', 'bm_fullstack_streaming_ping_pong', + 'bm_fullstack_streaming_pump', 'bm_closure', 'bm_cq', 'bm_call_create', + 'bm_error', 'bm_chttp2_hpack', 'bm_chttp2_transport', 'bm_pollset', + 'bm_metadata', 'bm_fullstack_trickle' ] _INTERESTING = ('cpu_time', 'real_time', 'locks_per_iteration', - 'allocs_per_iteration', 'writes_per_iteration', - 'atm_cas_per_iteration', 'atm_add_per_iteration', - 'nows_per_iteration',) + 'allocs_per_iteration', 'writes_per_iteration', + 'atm_cas_per_iteration', 'atm_add_per_iteration', + 'nows_per_iteration',) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 47cd35c2188..2ee205b80c0 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -48,168 +48,168 @@ verbose = False def _median(ary): - assert (len(ary)) - ary = sorted(ary) - n = len(ary) - if n % 2 == 0: - return (ary[(n - 1) / 2] + ary[(n - 1) / 2 + 1]) / 2.0 - else: - return ary[n / 2] + assert (len(ary)) + ary = sorted(ary) + n = len(ary) + if n % 2 == 0: + return (ary[(n - 1) / 2] + ary[(n - 1) / 2 + 1]) / 2.0 + else: + return ary[n / 2] def _args(): - argp = argparse.ArgumentParser( - description='Perform diff on microbenchmarks') - argp.add_argument( - '-t', - '--track', - choices=sorted(bm_constants._INTERESTING), - nargs='+', - default=sorted(bm_constants._INTERESTING), - help='Which metrics to track') - argp.add_argument( - '-b', - '--benchmarks', - nargs='+', - choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, - default=bm_constants._AVAILABLE_BENCHMARK_TESTS, - help='Which benchmarks to run') - argp.add_argument( - '-l', - '--loops', - type=int, - default=20, - help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py' - ) - argp.add_argument('-n', '--new', type=str, help='New benchmark name') - argp.add_argument('-o', '--old', type=str, help='Old benchmark name') - argp.add_argument( - '-v', '--verbose', type=bool, help='Print details of before/after') - args = argp.parse_args() - global verbose - if args.verbose: verbose = True - assert args.new - assert args.old - return args + argp = argparse.ArgumentParser( + description='Perform diff on microbenchmarks') + argp.add_argument( + '-t', + '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to run') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py' + ) + argp.add_argument('-n', '--new', type=str, help='New benchmark name') + argp.add_argument('-o', '--old', type=str, help='Old benchmark name') + argp.add_argument( + '-v', '--verbose', type=bool, help='Print details of before/after') + args = argp.parse_args() + global verbose + if args.verbose: verbose = True + assert args.new + assert args.old + return args def _maybe_print(str): - if verbose: print str + if verbose: print str class Benchmark: - def __init__(self): - self.samples = { - True: collections.defaultdict(list), - False: collections.defaultdict(list) - } - self.final = {} - - def add_sample(self, track, data, new): - for f in track: - if f in data: - self.samples[new][f].append(float(data[f])) - - def process(self, track, new_name, old_name): - for f in sorted(track): - new = self.samples[True][f] - old = self.samples[False][f] - if not new or not old: continue - mdn_diff = abs(_median(new) - _median(old)) - _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' % - (f, new_name, new, old_name, old, mdn_diff)) - s = bm_speedup.speedup(new, old) - if abs(s) > 3 and mdn_diff > 0.5: - self.final[f] = '%+d%%' % s - return self.final.keys() - - def skip(self): - return not self.final - - def row(self, flds): - return [self.final[f] if f in self.final else '' for f in flds] + def __init__(self): + self.samples = { + True: collections.defaultdict(list), + False: collections.defaultdict(list) + } + self.final = {} + + def add_sample(self, track, data, new): + for f in track: + if f in data: + self.samples[new][f].append(float(data[f])) + + def process(self, track, new_name, old_name): + for f in sorted(track): + new = self.samples[True][f] + old = self.samples[False][f] + if not new or not old: continue + mdn_diff = abs(_median(new) - _median(old)) + _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' % + (f, new_name, new, old_name, old, mdn_diff)) + s = bm_speedup.speedup(new, old) + if abs(s) > 3 and mdn_diff > 0.5: + self.final[f] = '%+d%%' % s + return self.final.keys() + + def skip(self): + return not self.final + + def row(self, flds): + return [self.final[f] if f in self.final else '' for f in flds] def _read_json(filename, badjson_files, nonexistant_files): - stripped = ".".join(filename.split(".")[:-2]) - try: - with open(filename) as f: - return json.loads(f.read()) - except IOError, e: - if stripped in nonexistant_files: - nonexistant_files[stripped] += 1 - else: - nonexistant_files[stripped] = 1 - return None - except ValueError, e: - if stripped in badjson_files: - badjson_files[stripped] += 1 - else: - badjson_files[stripped] = 1 - return None + stripped = ".".join(filename.split(".")[:-2]) + try: + with open(filename) as f: + return json.loads(f.read()) + except IOError, e: + if stripped in nonexistant_files: + nonexistant_files[stripped] += 1 + else: + nonexistant_files[stripped] = 1 + return None + except ValueError, e: + if stripped in badjson_files: + badjson_files[stripped] += 1 + else: + badjson_files[stripped] = 1 + return None def diff(bms, loops, track, old, new): - benchmarks = collections.defaultdict(Benchmark) - - badjson_files = {} - nonexistant_files = {} - for bm in bms: - for loop in range(0, loops): - for line in subprocess.check_output( - ['bm_diff_%s/opt/%s' % (old, bm), - '--benchmark_list_tests']).splitlines(): - stripped_line = line.strip().replace("/", "_").replace( - "<", "_").replace(">", "_").replace(", ", "_") - js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, new, loop), - badjson_files, nonexistant_files) - js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, new, loop), - badjson_files, nonexistant_files) - js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % - (bm, stripped_line, old, loop), - badjson_files, nonexistant_files) - js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % - (bm, stripped_line, old, loop), - badjson_files, nonexistant_files) - - if js_new_ctr: - for row in bm_json.expand_json(js_new_ctr, js_new_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): - continue - benchmarks[name].add_sample(track, row, True) - if js_old_ctr: - for row in bm_json.expand_json(js_old_ctr, js_old_opt): - name = row['cpp_name'] - if name.endswith('_mean') or name.endswith('_stddev'): - continue - benchmarks[name].add_sample(track, row, False) - - really_interesting = set() - for name, bm in benchmarks.items(): - _maybe_print(name) - really_interesting.update(bm.process(track, new, old)) - fields = [f for f in track if f in really_interesting] - - headers = ['Benchmark'] + fields - rows = [] - for name in sorted(benchmarks.keys()): - if benchmarks[name].skip(): continue - rows.append([name] + benchmarks[name].row(fields)) - note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str( - badjson_files) - note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files) - if rows: - return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note - else: - return None, note + benchmarks = collections.defaultdict(Benchmark) + + badjson_files = {} + nonexistant_files = {} + for bm in bms: + for loop in range(0, loops): + for line in subprocess.check_output( + ['bm_diff_%s/opt/%s' % (old, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/", "_").replace( + "<", "_").replace(">", "_").replace(", ", "_") + js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' % + (bm, stripped_line, new, loop), + badjson_files, nonexistant_files) + js_new_opt = _read_json('%s.%s.opt.%s.%d.json' % + (bm, stripped_line, new, loop), + badjson_files, nonexistant_files) + js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' % + (bm, stripped_line, old, loop), + badjson_files, nonexistant_files) + js_old_opt = _read_json('%s.%s.opt.%s.%d.json' % + (bm, stripped_line, old, loop), + badjson_files, nonexistant_files) + + if js_new_ctr: + for row in bm_json.expand_json(js_new_ctr, js_new_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, True) + if js_old_ctr: + for row in bm_json.expand_json(js_old_ctr, js_old_opt): + name = row['cpp_name'] + if name.endswith('_mean') or name.endswith('_stddev'): + continue + benchmarks[name].add_sample(track, row, False) + + really_interesting = set() + for name, bm in benchmarks.items(): + _maybe_print(name) + really_interesting.update(bm.process(track, new, old)) + fields = [f for f in track if f in really_interesting] + + headers = ['Benchmark'] + fields + rows = [] + for name in sorted(benchmarks.keys()): + if benchmarks[name].skip(): continue + rows.append([name] + benchmarks[name].row(fields)) + note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str( + badjson_files) + note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files) + if rows: + return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note + else: + return None, note if __name__ == '__main__': - args = _args() - diff, note = diff(args.benchmarks, args.loops, args.track, args.old, - args.new) - print('%s\n%s' % (note, diff if diff else "No performance differences")) + args = _args() + diff, note = diff(args.benchmarks, args.loops, args.track, args.old, + args.new) + print('%s\n%s' % (note, diff if diff else "No performance differences")) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 3879848d10e..3817522dffd 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -41,108 +41,108 @@ import multiprocessing import subprocess sys.path.append( - os.path.join( - os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) + os.path.join( + os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils')) import comment_on_pr def _args(): - argp = argparse.ArgumentParser( - description='Perform diff on microbenchmarks') - argp.add_argument( - '-t', - '--track', - choices=sorted(bm_constants._INTERESTING), - nargs='+', - default=sorted(bm_constants._INTERESTING), - help='Which metrics to track') - argp.add_argument( - '-b', - '--benchmarks', - nargs='+', - choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, - default=bm_constants._AVAILABLE_BENCHMARK_TESTS, - help='Which benchmarks to run') - argp.add_argument( - '-d', - '--diff_base', - type=str, - help='Commit or branch to compare the current one to') - argp.add_argument( - '-o', - '--old', - default='old', - type=str, - help='Name of baseline run to compare to. Ususally just called "old"') - argp.add_argument( - '-r', - '--repetitions', - type=int, - default=1, - help='Number of repetitions to pass to the benchmarks') - argp.add_argument( - '-l', - '--loops', - type=int, - default=20, - help='Number of times to loops the benchmarks. More loops cuts down on noise' - ) - argp.add_argument( - '-j', - '--jobs', - type=int, - default=multiprocessing.cpu_count(), - help='Number of CPUs to use') - args = argp.parse_args() - assert args.diff_base or args.old, "One of diff_base or old must be set!" - if args.loops < 3: - print "WARNING: This run will likely be noisy. Increase loops." - return args + argp = argparse.ArgumentParser( + description='Perform diff on microbenchmarks') + argp.add_argument( + '-t', + '--track', + choices=sorted(bm_constants._INTERESTING), + nargs='+', + default=sorted(bm_constants._INTERESTING), + help='Which metrics to track') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Which benchmarks to run') + argp.add_argument( + '-d', + '--diff_base', + type=str, + help='Commit or branch to compare the current one to') + argp.add_argument( + '-o', + '--old', + default='old', + type=str, + help='Name of baseline run to compare to. Ususally just called "old"') + argp.add_argument( + '-r', + '--repetitions', + type=int, + default=1, + help='Number of repetitions to pass to the benchmarks') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. More loops cuts down on noise' + ) + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='Number of CPUs to use') + args = argp.parse_args() + assert args.diff_base or args.old, "One of diff_base or old must be set!" + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." + return args def eintr_be_gone(fn): - """Run fn until it doesn't stop because of EINTR""" + """Run fn until it doesn't stop because of EINTR""" - def inner(*args): - while True: - try: - return fn(*args) - except IOError, e: - if e.errno != errno.EINTR: - raise + def inner(*args): + while True: + try: + return fn(*args) + except IOError, e: + if e.errno != errno.EINTR: + raise - return inner + return inner def main(args): - bm_build.build('new', args.benchmarks, args.jobs) - - old = args.old - if args.diff_base: - old = 'old' - where_am_i = subprocess.check_output( - ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() - subprocess.check_call(['git', 'checkout', args.diff_base]) - try: - bm_build.build('old', args.benchmarks, args.jobs) - finally: - subprocess.check_call(['git', 'checkout', where_am_i]) - subprocess.check_call(['git', 'submodule', 'update']) - - bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) - bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - - diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old, - 'new') - if diff: - text = 'Performance differences noted:\n' + diff - else: - text = 'No significant performance differences' - print('%s\n%s' % (note, text)) - comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text)) + bm_build.build('new', args.benchmarks, args.jobs) + + old = args.old + if args.diff_base: + old = 'old' + where_am_i = subprocess.check_output( + ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + subprocess.check_call(['git', 'checkout', args.diff_base]) + try: + bm_build.build('old', args.benchmarks, args.jobs) + finally: + subprocess.check_call(['git', 'checkout', where_am_i]) + subprocess.check_call(['git', 'submodule', 'update']) + + bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) + bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) + + diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old, + 'new') + if diff: + text = 'Performance differences noted:\n' + diff + else: + text = 'No significant performance differences' + print('%s\n%s' % (note, text)) + comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text)) if __name__ == '__main__': - args = _args() - main(args) + args = _args() + main(args) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 0c2e7e36f6a..ba04e879f70 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -40,87 +40,87 @@ import sys import os sys.path.append( - os.path.join( - os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests', - 'python_utils')) + os.path.join( + os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests', + 'python_utils')) import jobset def _args(): - argp = argparse.ArgumentParser(description='Runs microbenchmarks') - argp.add_argument( - '-b', - '--benchmarks', - nargs='+', - choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, - default=bm_constants._AVAILABLE_BENCHMARK_TESTS, - help='Benchmarks to run') - argp.add_argument( - '-j', - '--jobs', - type=int, - default=multiprocessing.cpu_count(), - help='Number of CPUs to use') - argp.add_argument( - '-n', - '--name', - type=str, - help='Unique name of the build to run. Needs to match the handle passed to bm_build.py' - ) - argp.add_argument( - '-r', - '--repetitions', - type=int, - default=1, - help='Number of repetitions to pass to the benchmarks') - argp.add_argument( - '-l', - '--loops', - type=int, - default=20, - help='Number of times to loops the benchmarks. More loops cuts down on noise' - ) - args = argp.parse_args() - assert args.name - if args.loops < 3: - print "WARNING: This run will likely be noisy. Increase loops to at least 3." - return args + argp = argparse.ArgumentParser(description='Runs microbenchmarks') + argp.add_argument( + '-b', + '--benchmarks', + nargs='+', + choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, + default=bm_constants._AVAILABLE_BENCHMARK_TESTS, + help='Benchmarks to run') + argp.add_argument( + '-j', + '--jobs', + type=int, + default=multiprocessing.cpu_count(), + help='Number of CPUs to use') + argp.add_argument( + '-n', + '--name', + type=str, + help='Unique name of the build to run. Needs to match the handle passed to bm_build.py' + ) + argp.add_argument( + '-r', + '--repetitions', + type=int, + default=1, + help='Number of repetitions to pass to the benchmarks') + argp.add_argument( + '-l', + '--loops', + type=int, + default=20, + help='Number of times to loops the benchmarks. More loops cuts down on noise' + ) + args = argp.parse_args() + assert args.name + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops to at least 3." + return args def _collect_bm_data(bm, cfg, name, reps, idx, loops): - jobs_list = [] - for line in subprocess.check_output( - ['bm_diff_%s/%s/%s' % (name, cfg, bm), - '--benchmark_list_tests']).splitlines(): - stripped_line = line.strip().replace("/", "_").replace( - "<", "_").replace(">", "_").replace(", ", "_") - cmd = [ - 'bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_filter=^%s$' % - line, '--benchmark_out=%s.%s.%s.%s.%d.json' % - (bm, stripped_line, cfg, name, idx), '--benchmark_out_format=json', - '--benchmark_repetitions=%d' % (reps) - ] - jobs_list.append( - jobset.JobSpec( - cmd, - shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, - loops), - verbose_success=True, - timeout_seconds=60 * 2)) - return jobs_list + jobs_list = [] + for line in subprocess.check_output( + ['bm_diff_%s/%s/%s' % (name, cfg, bm), + '--benchmark_list_tests']).splitlines(): + stripped_line = line.strip().replace("/", "_").replace( + "<", "_").replace(">", "_").replace(", ", "_") + cmd = [ + 'bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_filter=^%s$' % + line, '--benchmark_out=%s.%s.%s.%s.%d.json' % + (bm, stripped_line, cfg, name, idx), '--benchmark_out_format=json', + '--benchmark_repetitions=%d' % (reps) + ] + jobs_list.append( + jobset.JobSpec( + cmd, + shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1, + loops), + verbose_success=True, + timeout_seconds=60 * 2)) + return jobs_list def run(name, benchmarks, jobs, loops, reps): - jobs_list = [] - for loop in range(0, loops): - for bm in benchmarks: - jobs_list += _collect_bm_data(bm, 'opt', name, reps, loop, loops) - jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop, - loops) - random.shuffle(jobs_list, random.SystemRandom().random) - jobset.run(jobs_list, maxjobs=jobs) + jobs_list = [] + for loop in range(0, loops): + for bm in benchmarks: + jobs_list += _collect_bm_data(bm, 'opt', name, reps, loop, loops) + jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop, + loops) + random.shuffle(jobs_list, random.SystemRandom().random) + jobset.run(jobs_list, maxjobs=jobs) if __name__ == '__main__': - args = _args() - run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) + args = _args() + run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py index 63f07aea38c..9c70b92d26e 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py @@ -36,39 +36,39 @@ _THRESHOLD = 1e-10 def scale(a, mul): - return [x * mul for x in a] + return [x * mul for x in a] def cmp(a, b): - return stats.ttest_ind(a, b) + return stats.ttest_ind(a, b) def speedup(new, old): - if (len(set(new))) == 1 and new == old: return 0 - s0, p0 = cmp(new, old) - if math.isnan(p0): return 0 - if s0 == 0: return 0 - if p0 > _THRESHOLD: return 0 - if s0 < 0: - pct = 1 - while pct < 101: - sp, pp = cmp(new, scale(old, 1 - pct / 100.0)) - if sp > 0: break - if pp > _THRESHOLD: break - pct += 1 - return -(pct - 1) - else: - pct = 1 - while pct < 100000: - sp, pp = cmp(new, scale(old, 1 + pct / 100.0)) - if sp < 0: break - if pp > _THRESHOLD: break - pct += 1 - return pct - 1 + if (len(set(new))) == 1 and new == old: return 0 + s0, p0 = cmp(new, old) + if math.isnan(p0): return 0 + if s0 == 0: return 0 + if p0 > _THRESHOLD: return 0 + if s0 < 0: + pct = 1 + while pct < 101: + sp, pp = cmp(new, scale(old, 1 - pct / 100.0)) + if sp > 0: break + if pp > _THRESHOLD: break + pct += 1 + return -(pct - 1) + else: + pct = 1 + while pct < 100000: + sp, pp = cmp(new, scale(old, 1 + pct / 100.0)) + if sp < 0: break + if pp > _THRESHOLD: break + pct += 1 + return pct - 1 if __name__ == "__main__": - new = [1.0, 1.0, 1.0, 1.0] - old = [2.0, 2.0, 2.0, 2.0] - print speedup(new, old) - print speedup(old, new) + new = [1.0, 1.0, 1.0, 1.0] + old = [2.0, 2.0, 2.0, 2.0] + print speedup(new, old) + print speedup(old, new)