Merge pull request #11367 from ncteisen/faster-bm-diff

Split Up Microbenchmark by Individual Benchmark
8 years ago · 2cc7e39d7d
parent 28e88b1e2e 0ff7384f6d
commit 2cc7e39d7d
11 changed files with 690 additions and 264 deletions
--- a/tools/jenkins/run_performance.sh
+++ b/tools/jenkins/run_performance.sh
@ -23,4 +23,4 @@ BENCHMARKS_TO_RUN="bm_fullstack_unary_ping_pong bm_fullstack_streaming_ping_pong
 cd $(dirname $0)/../..
 tools/run_tests/start_port_server.py
-tools/profiling/microbenchmarks/bm_diff.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN
+tools/profiling/microbenchmarks/bm_diff/bm_main.py -d origin/$ghprbTargetBranch -b $BENCHMARKS_TO_RUN
--- a/tools/profiling/microbenchmarks/README.md
+++ b/tools/profiling/microbenchmarks/README.md
@ -0,0 +1,4 @@
 Microbenchmarks
 ====
 This directory contains helper scripts for the microbenchmark suites.
--- a/tools/profiling/microbenchmarks/bm_diff.py
+++ b/tools/profiling/microbenchmarks/bm_diff.py
@ -1,244 +0,0 @@
 #!/usr/bin/env python2.7
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import sys
 import json
 import bm_json
 import tabulate
 import argparse
 from scipy import stats
 import subprocess
 import multiprocessing
 import collections
 import pipes
 import os
 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils'))
 import comment_on_pr
 import jobset
 import itertools
 import speedup
 import random
 import shutil
 import errno
 _INTERESTING = (
  'cpu_time',
  'real_time',
  'locks_per_iteration',
  'allocs_per_iteration',
  'writes_per_iteration',
  'atm_cas_per_iteration',
  'atm_add_per_iteration',
  'cli_transport_stalls_per_iteration',
  'cli_stream_stalls_per_iteration',
  'svr_transport_stalls_per_iteration',
  'svr_stream_stalls_per_iteration'
  'nows_per_iteration',
 )
 def changed_ratio(n, o):
  if float(o) <= .0001: o = 0
  if float(n) <= .0001: n = 0
  if o == 0 and n == 0: return 0
  if o == 0: return 100
  return (float(n)-float(o))/float(o)
 def median(ary):
  ary = sorted(ary)
  n = len(ary)
  if n%2 == 0:
    return (ary[n/2] + ary[n/2+1]) / 2.0
  else:
    return ary[n/2]
 def min_change(pct):
  return lambda n, o: abs(changed_ratio(n,o)) > pct/100.0
 _AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong',
                              'bm_fullstack_streaming_ping_pong',
                              'bm_fullstack_streaming_pump',
                              'bm_closure',
                              'bm_cq',
                              'bm_call_create',
                              'bm_error',
                              'bm_chttp2_hpack',
                              'bm_chttp2_transport',
                              'bm_pollset',
                              'bm_metadata',
                              'bm_fullstack_trickle']
 argp = argparse.ArgumentParser(description='Perform diff on microbenchmarks')
 argp.add_argument('-t', '--track',
                  choices=sorted(_INTERESTING),
                  nargs='+',
                  default=sorted(_INTERESTING),
                  help='Which metrics to track')
 argp.add_argument('-b', '--benchmarks', nargs='+', choices=_AVAILABLE_BENCHMARK_TESTS, default=['bm_cq'])
 argp.add_argument('-d', '--diff_base', type=str)
 argp.add_argument('-r', '--repetitions', type=int, default=1)
 argp.add_argument('-l', '--loops', type=int, default=20)
 argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count())
 args = argp.parse_args()
 assert args.diff_base
 def avg(lst):
  sum = 0.0
  n = 0.0
  for el in lst:
    sum += el
    n += 1
  return sum / n
 def make_cmd(cfg):
  return ['make'] + args.benchmarks + [
      'CONFIG=%s' % cfg, '-j', '%d' % args.jobs]
 def build(dest):
  shutil.rmtree('bm_diff_%s' % dest, ignore_errors=True)
  subprocess.check_call(['git', 'submodule', 'update'])
  try:
    subprocess.check_call(make_cmd('opt'))
    subprocess.check_call(make_cmd('counters'))
  except subprocess.CalledProcessError, e:
    subprocess.check_call(['make', 'clean'])
    subprocess.check_call(make_cmd('opt'))
    subprocess.check_call(make_cmd('counters'))
  os.rename('bins', 'bm_diff_%s' % dest)
 def collect1(bm, cfg, ver, idx):
  cmd = ['bm_diff_%s/%s/%s' % (ver, cfg, bm),
         '--benchmark_out=%s.%s.%s.%d.json' % (bm, cfg, ver, idx),
         '--benchmark_out_format=json',
         '--benchmark_repetitions=%d' % (args.repetitions)
         ]
  return jobset.JobSpec(cmd, shortname='%s %s %s %d/%d' % (bm, cfg, ver, idx+1, args.loops),
                             verbose_success=True, timeout_seconds=None)
 build('new')
 where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
 subprocess.check_call(['git', 'checkout', args.diff_base])
 try:
  build('old')
 finally:
  subprocess.check_call(['git', 'checkout', where_am_i])
  subprocess.check_call(['git', 'submodule', 'update'])
 jobs = []
 for loop in range(0, args.loops):
  jobs.extend(x for x in itertools.chain(
    (collect1(bm, 'opt', 'new', loop) for bm in args.benchmarks),
    (collect1(bm, 'counters', 'new', loop) for bm in args.benchmarks),
    (collect1(bm, 'opt', 'old', loop) for bm in args.benchmarks),
    (collect1(bm, 'counters', 'old', loop) for bm in args.benchmarks),
  ))
 random.shuffle(jobs, random.SystemRandom().random)
 jobset.run(jobs, maxjobs=args.jobs)
 class Benchmark:
  def __init__(self):
    self.samples = {
      True: collections.defaultdict(list),
      False: collections.defaultdict(list)
    }
    self.final = {}
  def add_sample(self, data, new):
    for f in args.track:
      if f in data:
        self.samples[new][f].append(float(data[f]))
  def process(self):
    for f in sorted(args.track):
      new = self.samples[True][f]
      old = self.samples[False][f]
      if not new or not old: continue
      mdn_diff = abs(median(new) - median(old))
      print '%s: new=%r old=%r mdn_diff=%r' % (f, new, old, mdn_diff)
      s = speedup.speedup(new, old)
      if abs(s) > 3 and mdn_diff > 0.5:
        self.final[f] = '%+d%%' % s
    return self.final.keys()
  def skip(self):
    return not self.final
  def row(self, flds):
    return [self.final[f] if f in self.final else '' for f in flds]
 def eintr_be_gone(fn):
  """Run fn until it doesn't stop because of EINTR"""
  while True:
    try:
      return fn()
    except IOError, e:
      if e.errno != errno.EINTR:
        raise
 def read_json(filename):
  try:
    with open(filename) as f: return json.loads(f.read())
  except ValueError, e:
    return None
 def finalize():
  benchmarks = collections.defaultdict(Benchmark)
  for bm in args.benchmarks:
    for loop in range(0, args.loops):
      js_new_ctr = read_json('%s.counters.new.%d.json' % (bm, loop))
      js_new_opt = read_json('%s.opt.new.%d.json' % (bm, loop))
      js_old_ctr = read_json('%s.counters.old.%d.json' % (bm, loop))
      js_old_opt = read_json('%s.opt.old.%d.json' % (bm, loop))
      if js_new_ctr:
        for row in bm_json.expand_json(js_new_ctr, js_new_opt):
          print row
          name = row['cpp_name']
          if name.endswith('_mean') or name.endswith('_stddev'): continue
          benchmarks[name].add_sample(row, True)
      if js_old_ctr:
        for row in bm_json.expand_json(js_old_ctr, js_old_opt):
          print row
          name = row['cpp_name']
          if name.endswith('_mean') or name.endswith('_stddev'): continue
          benchmarks[name].add_sample(row, False)
  really_interesting = set()
  for name, bm in benchmarks.items():
    print name
    really_interesting.update(bm.process())
  fields = [f for f in args.track if f in really_interesting]
  headers = ['Benchmark'] + fields
  rows = []
  for name in sorted(benchmarks.keys()):
    if benchmarks[name].skip(): continue
    rows.append([name] + benchmarks[name].row(fields))
  if rows:
    text = 'Performance differences noted:\n' + tabulate.tabulate(rows, headers=headers, floatfmt='+.2f')
  else:
    text = 'No significant performance differences'
  print text
  comment_on_pr.comment_on_pr('```\n%s\n```' % text)
 eintr_be_gone(finalize)
--- a/tools/profiling/microbenchmarks/bm_diff/README.md
+++ b/tools/profiling/microbenchmarks/bm_diff/README.md
@ -0,0 +1,116 @@
 The bm_diff Family
 ====
 This family of python scripts can be incredibly useful for fast iteration over
 different performance tweaks. The tools allow you to save performance data from
 a baseline commit, then quickly compare data from your working branch to that
 baseline data to see if you have made any performance wins.
 The tools operate with three concrete steps, which can be invoked separately,
 or all together via the driver script, bm_main.py. This readme will describe 
 the typical workflow for these scripts, then it will include sections on the
 details of every script for advanced usage.
 ## Normal Workflow
 Let's say you are working on a performance optimization for grpc_error. You have
 made some significant changes and want to see some data. From your branch, run
 (ensure everything is committed first):
 `tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master`
 This will build the `bm_error` binary on your branch, and then it will checkout 
 master and build it there too. It will then run these benchmarks 5 times each. 
 Lastly it will compute the statistically significant performance differences 
 between the two branches. This should show the nice performance wins your 
 changes have made.
 If you have already invoked bm_main with `-d master`, you should instead use 
 `-o` for subsequent runs. This allows the script to skip re-building and 
 re-running the unchanged master branch. For example:
 `tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o`
 This will only build and run `bm_error` on your branch. It will then compare
 the output to the saved runs from master.
 ## Advanced Workflow
 If you have a deeper knowledge of these scripts, you can use them to do more
 fine tuned benchmark comparisons. For example, you could build, run, and save
 the benchmark output from two different base branches. Then you could diff both
 of these baselines against your working branch to see how the different metrics
 change. The rest of this doc goes over the details of what each of the
 individual modules accomplishes.
 ## bm_build.py
 This scrips builds the benchmarks. It takes in a name parameter, and will
 store the binaries based on that. Both `opt` and `counter` configurations
 will be used. The `opt` is used to get cpu_time and real_time, and the
 `counters` build is used to track other metrics like allocs, atomic adds,
 etc etc etc.
 For example, if you were to invoke (we assume everything is run from the 
 root of the repo):
 `tools/profiling/microbenchmarks/bm_diff/bm_build.py -b bm_error -n baseline`
 then the microbenchmark binaries will show up under 
 `bm_diff_baseline/{opt,counters}/bm_error`
 ## bm_run.py
 This script runs the benchmarks. It takes a name parameter that must match the
 name that was passed to `bm_build.py`. The script then runs the benchmark
 multiple times (default is 20, can be toggled via the loops parameter). The
 output is saved as `<benchmark name>.<config>.<name>.<loop idx>.json`
 For example, if you were to run:
 `tools/profiling/microbenchmarks/bm_diff/bm_run.py -b bm_error -b baseline -l 5`
 Then an example output file would be `bm_error.opt.baseline.0.json`
 ## bm_diff.py
 This script takes in the output from two benchmark runs, computes the diff
 between them, and prints any significant improvements or regressions. It takes
 in two name parameters, old and new. These must have previously been built and
 run.
 For example, assuming you had already built and run a 'baseline' microbenchmark
 from master, and then you also built and ran a 'current' microbenchmark from
 the branch you were working on, you could invoke:
 `tools/profiling/microbenchmarks/bm_diff/bm_diff.py -b bm_error -o baseline -n current -l 5`
 This would output the percent difference between your branch and master.
 ## bm_main.py
 This is the driver script. It uses the previous three modules and does
 everything for you. You pass in the benchmarks to be run, the number of loops,
 number of CPUs to use, and the commit to compare to. Then the script will:
 * Build the benchmarks at head, then checkout the branch to compare to and
  build the benchmarks there
 * Run both sets of microbenchmarks
 * Run bm_diff.py to compare the two, outputs the difference.
 For example, one might run:
 `tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master`
 This would compare the current branch's error benchmarks to master.
 This script is invoked by our infrastructure on every PR to protect against
 regressions and demonstrate performance wins.
 However, if you are iterating over different performance tweaks quickly, it is
 unnecessary to build and run the baseline commit every time. That is why we
 provide a different flag in case you are sure that the baseline benchmark has
 already been built and run. In that case use the --old flag to pass in the name
 of the baseline. This will only build and run the current branch. For example:
 `tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o old`
--- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py
@ -0,0 +1,75 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Python utility to build opt and counters benchmarks """
 import bm_constants
 import argparse
 import subprocess
 import multiprocessing
 import os
 import shutil
 def _args():
  argp = argparse.ArgumentParser(description='Builds microbenchmarks')
  argp.add_argument(
    '-b',
    '--benchmarks',
    nargs='+',
    choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    help='Which benchmarks to build')
  argp.add_argument(
    '-j',
    '--jobs',
    type=int,
    default=multiprocessing.cpu_count(),
    help='How many CPUs to dedicate to this task')
  argp.add_argument(
    '-n',
    '--name',
    type=str,
    help='Unique name of this build. To be used as a handle to pass to the other bm* scripts'
  )
  args = argp.parse_args()
  assert args.name
  return args
 def _make_cmd(cfg, benchmarks, jobs):
  return ['make'] + benchmarks + ['CONFIG=%s' % cfg, '-j', '%d' % jobs]
 def build(name, benchmarks, jobs):
  shutil.rmtree('bm_diff_%s' % name, ignore_errors=True)
  subprocess.check_call(['git', 'submodule', 'update'])
  try:
    subprocess.check_call(_make_cmd('opt', benchmarks, jobs))
    subprocess.check_call(_make_cmd('counters', benchmarks, jobs))
  except subprocess.CalledProcessError, e:
    subprocess.check_call(['make', 'clean'])
    subprocess.check_call(_make_cmd('opt', benchmarks, jobs))
    subprocess.check_call(_make_cmd('counters', benchmarks, jobs))
  os.rename(
    'bins',
    'bm_diff_%s' % name,)
 if __name__ == '__main__':
  args = _args()
  build(args.name, args.benchmarks, args.jobs)
--- a/tools/profiling/microbenchmarks/bm_diff/bm_constants.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_constants.py
@ -0,0 +1,29 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Configurable constants for the bm_*.py family """
 _AVAILABLE_BENCHMARK_TESTS = [
  'bm_fullstack_unary_ping_pong', 'bm_fullstack_streaming_ping_pong',
  'bm_fullstack_streaming_pump', 'bm_closure', 'bm_cq', 'bm_call_create',
  'bm_error', 'bm_chttp2_hpack', 'bm_chttp2_transport', 'bm_pollset',
  'bm_metadata', 'bm_fullstack_trickle'
 ]
 _INTERESTING = ('cpu_time', 'real_time', 'locks_per_iteration',
        'allocs_per_iteration', 'writes_per_iteration',
        'atm_cas_per_iteration', 'atm_add_per_iteration',
        'nows_per_iteration',)
--- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py
@ -0,0 +1,201 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Computes the diff between two bm runs and outputs significant results """
 import bm_constants
 import bm_speedup
 import sys
 import os
 sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..'))
 import bm_json
 import json
 import tabulate
 import argparse
 import collections
 import subprocess
 verbose = False
 def _median(ary):
  assert (len(ary))
  ary = sorted(ary)
  n = len(ary)
  if n % 2 == 0:
    return (ary[(n - 1) / 2] + ary[(n - 1) / 2 + 1]) / 2.0
  else:
    return ary[n / 2]
 def _args():
  argp = argparse.ArgumentParser(
    description='Perform diff on microbenchmarks')
  argp.add_argument(
    '-t',
    '--track',
    choices=sorted(bm_constants._INTERESTING),
    nargs='+',
    default=sorted(bm_constants._INTERESTING),
    help='Which metrics to track')
  argp.add_argument(
    '-b',
    '--benchmarks',
    nargs='+',
    choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    help='Which benchmarks to run')
  argp.add_argument(
    '-l',
    '--loops',
    type=int,
    default=20,
    help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py'
  )
  argp.add_argument('-n', '--new', type=str, help='New benchmark name')
  argp.add_argument('-o', '--old', type=str, help='Old benchmark name')
  argp.add_argument(
    '-v', '--verbose', type=bool, help='Print details of before/after')
  args = argp.parse_args()
  global verbose
  if args.verbose: verbose = True
  assert args.new
  assert args.old
  return args
 def _maybe_print(str):
  if verbose: print str
 class Benchmark:
  def __init__(self):
    self.samples = {
      True: collections.defaultdict(list),
      False: collections.defaultdict(list)
    }
    self.final = {}
  def add_sample(self, track, data, new):
    for f in track:
      if f in data:
        self.samples[new][f].append(float(data[f]))
  def process(self, track, new_name, old_name):
    for f in sorted(track):
      new = self.samples[True][f]
      old = self.samples[False][f]
      if not new or not old: continue
      mdn_diff = abs(_median(new) - _median(old))
      _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' %
             (f, new_name, new, old_name, old, mdn_diff))
      s = bm_speedup.speedup(new, old)
      if abs(s) > 3 and mdn_diff > 0.5:
        self.final[f] = '%+d%%' % s
    return self.final.keys()
  def skip(self):
    return not self.final
  def row(self, flds):
    return [self.final[f] if f in self.final else '' for f in flds]
 def _read_json(filename, badjson_files, nonexistant_files):
  stripped = ".".join(filename.split(".")[:-2])
  try:
    with open(filename) as f:
      return json.loads(f.read())
  except IOError, e:
    if stripped in nonexistant_files:
      nonexistant_files[stripped] += 1
    else:
      nonexistant_files[stripped] = 1
    return None
  except ValueError, e:
    if stripped in badjson_files:
      badjson_files[stripped] += 1
    else:
      badjson_files[stripped] = 1
    return None
 def diff(bms, loops, track, old, new):
  benchmarks = collections.defaultdict(Benchmark)
  badjson_files = {}
  nonexistant_files = {}
  for bm in bms:
    for loop in range(0, loops):
      for line in subprocess.check_output(
        ['bm_diff_%s/opt/%s' % (old, bm),
         '--benchmark_list_tests']).splitlines():
        stripped_line = line.strip().replace("/", "_").replace(
          "<", "_").replace(">", "_").replace(", ", "_")
        js_new_ctr = _read_json('%s.%s.counters.%s.%d.json' %
                    (bm, stripped_line, new, loop),
                    badjson_files, nonexistant_files)
        js_new_opt = _read_json('%s.%s.opt.%s.%d.json' %
                    (bm, stripped_line, new, loop),
                    badjson_files, nonexistant_files)
        js_old_ctr = _read_json('%s.%s.counters.%s.%d.json' %
                    (bm, stripped_line, old, loop),
                    badjson_files, nonexistant_files)
        js_old_opt = _read_json('%s.%s.opt.%s.%d.json' %
                    (bm, stripped_line, old, loop),
                    badjson_files, nonexistant_files)
        if js_new_ctr:
          for row in bm_json.expand_json(js_new_ctr, js_new_opt):
            name = row['cpp_name']
            if name.endswith('_mean') or name.endswith('_stddev'):
              continue
            benchmarks[name].add_sample(track, row, True)
        if js_old_ctr:
          for row in bm_json.expand_json(js_old_ctr, js_old_opt):
            name = row['cpp_name']
            if name.endswith('_mean') or name.endswith('_stddev'):
              continue
            benchmarks[name].add_sample(track, row, False)
  really_interesting = set()
  for name, bm in benchmarks.items():
    _maybe_print(name)
    really_interesting.update(bm.process(track, new, old))
  fields = [f for f in track if f in really_interesting]
  headers = ['Benchmark'] + fields
  rows = []
  for name in sorted(benchmarks.keys()):
    if benchmarks[name].skip(): continue
    rows.append([name] + benchmarks[name].row(fields))
  note = 'Corrupt JSON data (indicates timeout or crash) = %s' % str(
    badjson_files)
  note += '\n\nMissing files (new benchmark) = %s' % str(nonexistant_files)
  if rows:
    return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note
  else:
    return None, note
 if __name__ == '__main__':
  args = _args()
  diff, note = diff(args.benchmarks, args.loops, args.track, args.old,
            args.new)
  print('%s\n%s' % (note, diff if diff else "No performance differences"))
--- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py
@ -0,0 +1,135 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Runs the entire bm_*.py pipeline, and possible comments on the PR """
 import bm_constants
 import bm_build
 import bm_run
 import bm_diff
 import sys
 import os
 import argparse
 import multiprocessing
 import subprocess
 sys.path.append(
  os.path.join(
    os.path.dirname(sys.argv[0]), '..', '..', 'run_tests', 'python_utils'))
 import comment_on_pr
 def _args():
  argp = argparse.ArgumentParser(
    description='Perform diff on microbenchmarks')
  argp.add_argument(
    '-t',
    '--track',
    choices=sorted(bm_constants._INTERESTING),
    nargs='+',
    default=sorted(bm_constants._INTERESTING),
    help='Which metrics to track')
  argp.add_argument(
    '-b',
    '--benchmarks',
    nargs='+',
    choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    help='Which benchmarks to run')
  argp.add_argument(
    '-d',
    '--diff_base',
    type=str,
    help='Commit or branch to compare the current one to')
  argp.add_argument(
    '-o',
    '--old',
    default='old',
    type=str,
    help='Name of baseline run to compare to. Ususally just called "old"')
  argp.add_argument(
    '-r',
    '--repetitions',
    type=int,
    default=1,
    help='Number of repetitions to pass to the benchmarks')
  argp.add_argument(
    '-l',
    '--loops',
    type=int,
    default=20,
    help='Number of times to loops the benchmarks. More loops cuts down on noise'
  )
  argp.add_argument(
    '-j',
    '--jobs',
    type=int,
    default=multiprocessing.cpu_count(),
    help='Number of CPUs to use')
  args = argp.parse_args()
  assert args.diff_base or args.old, "One of diff_base or old must be set!"
  if args.loops < 3:
    print "WARNING: This run will likely be noisy. Increase loops."
  return args
 def eintr_be_gone(fn):
  """Run fn until it doesn't stop because of EINTR"""
  def inner(*args):
    while True:
      try:
        return fn(*args)
      except IOError, e:
        if e.errno != errno.EINTR:
          raise
  return inner
 def main(args):
  bm_build.build('new', args.benchmarks, args.jobs)
  old = args.old
  if args.diff_base:
    old = 'old'
    where_am_i = subprocess.check_output(
      ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
    subprocess.check_call(['git', 'checkout', args.diff_base])
    try:
      bm_build.build('old', args.benchmarks, args.jobs)
    finally:
      subprocess.check_call(['git', 'checkout', where_am_i])
      subprocess.check_call(['git', 'submodule', 'update'])
  bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions)
  bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions)
  diff, note = bm_diff.diff(args.benchmarks, args.loops, args.track, old,
                'new')
  if diff:
    text = 'Performance differences noted:\n' + diff
  else:
    text = 'No significant performance differences'
  print('%s\n%s' % (note, text))
  comment_on_pr.comment_on_pr('```\n%s\n\n%s\n```' % (note, text))
 if __name__ == '__main__':
  args = _args()
  main(args)
--- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py
@ -0,0 +1,113 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Python utility to run opt and counters benchmarks and save json output """
 import bm_constants
 import argparse
 import subprocess
 import multiprocessing
 import random
 import itertools
 import sys
 import os
 sys.path.append(
  os.path.join(
    os.path.dirname(sys.argv[0]), '..', '..', '..', 'run_tests',
    'python_utils'))
 import jobset
 def _args():
  argp = argparse.ArgumentParser(description='Runs microbenchmarks')
  argp.add_argument(
    '-b',
    '--benchmarks',
    nargs='+',
    choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
    help='Benchmarks to run')
  argp.add_argument(
    '-j',
    '--jobs',
    type=int,
    default=multiprocessing.cpu_count(),
    help='Number of CPUs to use')
  argp.add_argument(
    '-n',
    '--name',
    type=str,
    help='Unique name of the build to run. Needs to match the handle passed to bm_build.py'
  )
  argp.add_argument(
    '-r',
    '--repetitions',
    type=int,
    default=1,
    help='Number of repetitions to pass to the benchmarks')
  argp.add_argument(
    '-l',
    '--loops',
    type=int,
    default=20,
    help='Number of times to loops the benchmarks. More loops cuts down on noise'
  )
  args = argp.parse_args()
  assert args.name
  if args.loops < 3:
    print "WARNING: This run will likely be noisy. Increase loops to at least 3."
  return args
 def _collect_bm_data(bm, cfg, name, reps, idx, loops):
  jobs_list = []
  for line in subprocess.check_output(
    ['bm_diff_%s/%s/%s' % (name, cfg, bm),
     '--benchmark_list_tests']).splitlines():
    stripped_line = line.strip().replace("/", "_").replace(
      "<", "_").replace(">", "_").replace(", ", "_")
    cmd = [
      'bm_diff_%s/%s/%s' % (name, cfg, bm), '--benchmark_filter=^%s$' %
      line, '--benchmark_out=%s.%s.%s.%s.%d.json' %
      (bm, stripped_line, cfg, name, idx), '--benchmark_out_format=json',
      '--benchmark_repetitions=%d' % (reps)
    ]
    jobs_list.append(
      jobset.JobSpec(
        cmd,
        shortname='%s %s %s %s %d/%d' % (bm, line, cfg, name, idx + 1,
                         loops),
        verbose_success=True,
        timeout_seconds=60 * 2))
  return jobs_list
 def run(name, benchmarks, jobs, loops, reps):
  jobs_list = []
  for loop in range(0, loops):
    for bm in benchmarks:
      jobs_list += _collect_bm_data(bm, 'opt', name, reps, loop, loops)
      jobs_list += _collect_bm_data(bm, 'counters', name, reps, loop,
                      loops)
  random.shuffle(jobs_list, random.SystemRandom().random)
  jobset.run(jobs_list, maxjobs=jobs)
 if __name__ == '__main__':
  args = _args()
  run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions)
--- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py
+++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py
@ -1,3 +1,5 @@
 #!/usr/bin/env python2.7
 #
 # Copyright 2017 gRPC authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -17,13 +19,17 @@ import math
 _THRESHOLD = 1e-10
 def scale(a, mul):
-  return [x*mul for x in a]
+  return [x * mul for x in a]
 def cmp(a, b):
  return stats.ttest_ind(a, b)
 def speedup(new, old):
  if (len(set(new))) == 1 and new == old: return 0
  s0, p0 = cmp(new, old)
  if math.isnan(p0): return 0
  if s0 == 0: return 0
@ -31,7 +37,7 @@ def speedup(new, old):
  if s0 < 0:
    pct = 1
    while pct < 101:
-      sp, pp = cmp(new, scale(old, 1 - pct/100.0))
+      sp, pp = cmp(new, scale(old, 1 - pct / 100.0))
      if sp > 0: break
      if pp > _THRESHOLD: break
      pct += 1
@ -39,14 +45,15 @@ def speedup(new, old):
  else:
    pct = 1
    while pct < 100000:
-      sp, pp = cmp(new, scale(old, 1 + pct/100.0))
+      sp, pp = cmp(new, scale(old, 1 + pct / 100.0))
      if sp < 0: break
      if pp > _THRESHOLD: break
      pct += 1
    return pct - 1
 if __name__ == "__main__":
-  new=[66034560.0, 126765693.0, 99074674.0, 98588433.0, 96731372.0, 110179725.0, 103802110.0, 101139800.0, 102357205.0, 99016353.0, 98840824.0, 99585632.0, 98791720.0, 96171521.0, 95327098.0, 95629704.0, 98209772.0, 99779411.0, 100182488.0, 98354192.0, 99644781.0, 98546709.0, 99019176.0, 99543014.0, 99077269.0, 98046601.0, 99319039.0, 98542572.0, 98886614.0, 72560968.0]
+  new = [1.0, 1.0, 1.0, 1.0]
-  old=[60423464.0, 71249570.0, 73213089.0, 73200055.0, 72911768.0, 72347798.0, 72494672.0, 72756976.0, 72116565.0, 71541342.0, 73442538.0, 74817383.0, 73007780.0, 72499062.0, 72404945.0, 71843504.0, 73245405.0, 72778304.0, 74004519.0, 73694464.0, 72919931.0, 72955481.0, 71583857.0, 71350467.0, 71836817.0, 70064115.0, 70355345.0, 72516202.0, 71716777.0, 71532266.0]
+  old = [2.0, 2.0, 2.0, 2.0]
  print speedup(new, old)
  print speedup(old, new)
--- a/tools/run_tests/run_microbenchmark.py
+++ b/tools/run_tests/run_microbenchmark.py
@ -23,18 +23,8 @@ import argparse
 import python_utils.jobset as jobset
 import python_utils.start_port_server as start_port_server
-_AVAILABLE_BENCHMARK_TESTS = ['bm_fullstack_unary_ping_pong',
+sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..', 'profiling', 'microbenchmarks', 'bm_diff'))
-                              'bm_fullstack_streaming_ping_pong',
+import bm_constants
                              'bm_fullstack_streaming_pump',
                              'bm_closure',
                              'bm_cq',
                              'bm_call_create',
                              'bm_error',
                              'bm_chttp2_hpack',
                              'bm_chttp2_transport',
                              'bm_pollset',
                              'bm_metadata',
                              'bm_fullstack_trickle']
 flamegraph_dir = os.path.join(os.path.expanduser('~'), 'FlameGraph')
@ -199,8 +189,8 @@ argp.add_argument('-c', '--collect',
                  default=sorted(collectors.keys()),
                  help='Which collectors should be run against each benchmark')
 argp.add_argument('-b', '--benchmarks',
-                  choices=_AVAILABLE_BENCHMARK_TESTS,
+                  choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
-                  default=_AVAILABLE_BENCHMARK_TESTS,
+                  default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
                  nargs='+',
                  type=str,
                  help='Which microbenchmarks should be run')