Merge pull request #10048 from ctiller/nudiff

Move to a customized benchmark diff tool
8 years ago · 0de314b39c
parent c19f6b7804 62e062fa26
commit 0de314b39c
4 changed files with 322 additions and 173 deletions
--- a/tools/profiling/microbenchmarks/bm2bq.py
+++ b/tools/profiling/microbenchmarks/bm2bq.py
@ -36,7 +36,7 @@
 import sys
 import json
 import csv
-import os
+import bm_json

 columns = [
  ('jenkins_build', 'integer'),
@ -88,147 +88,8 @@ else:

 writer = csv.DictWriter(sys.stdout, [c for c,t in columns])

-bm_specs = {
-  'BM_UnaryPingPong': {
-    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
-    'dyn': ['request_size', 'response_size'],
-  },
-  'BM_PumpStreamClientToServer': {
-    'tpl': ['fixture'],
-    'dyn': ['request_size'],
-  },
-  'BM_PumpStreamServerToClient': {
-    'tpl': ['fixture'],
-    'dyn': ['request_size'],
-  },
-  'BM_StreamingPingPong': {
-    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
-    'dyn': ['request_size', 'request_count'],
-  },
-  'BM_StreamingPingPongMsgs': {
-    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
-    'dyn': ['request_size'],
-  },
-  'BM_PumpStreamServerToClient_Trickle': {
-    'tpl': [],
-    'dyn': ['request_size', 'bandwidth_kilobits'],
-  },
-  'BM_ErrorStringOnNewError': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_ErrorStringRepeatedly': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_ErrorGetStatus': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_ErrorGetStatusCode': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_ErrorHttpError': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_HasClearGrpcStatus': {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_IsolatedFilter' : {
-    'tpl': ['fixture', 'client_mutator'],
-    'dyn': [],
-  },
-  'BM_HpackEncoderEncodeHeader' : {
-    'tpl': ['fixture'],
-    'dyn': ['end_of_stream', 'request_size'],
-  },
-  'BM_HpackParserParseHeader' : {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-  'BM_CallCreateDestroy' : {
-    'tpl': ['fixture'],
-    'dyn': [],
-  },
-}
-
-def numericalize(s):
-  if not s: return ''
-  if s[-1] == 'k':
-    return int(s[:-1]) * 1024
-  if s[-1] == 'M':
-    return int(s[:-1]) * 1024 * 1024
-  if 0 <= (ord(s[-1]) - ord('0')) <= 9:
-    return int(s)
-  assert 'not a number: %s' % s
-
-def parse_name(name):
-  if '<' not in name and '/' not in name and name not in bm_specs:
-    return {'name': name}
-  rest = name
-  out = {}
-  tpl_args = []
-  dyn_args = []
-  if '<' in rest:
-    tpl_bit = rest[rest.find('<') + 1 : rest.rfind('>')]
-    arg = ''
-    nesting = 0
-    for c in tpl_bit:
-      if c == '<':
-        nesting += 1
-        arg += c
-      elif c == '>':
-        nesting -= 1
-        arg += c
-      elif c == ',':
-        if nesting == 0:
-          tpl_args.append(arg.strip())
-          arg = ''
-        else:
-          arg += c
-      else:
-        arg += c
-    tpl_args.append(arg.strip())
-    rest = rest[:rest.find('<')] + rest[rest.rfind('>') + 1:]
-  if '/' in rest:
-    s = rest.split('/')
-    rest = s[0]
-    dyn_args = s[1:]
-  name = rest
-  assert name in bm_specs, 'bm_specs needs to be expanded for %s' % name
-  assert len(dyn_args) == len(bm_specs[name]['dyn'])
-  assert len(tpl_args) == len(bm_specs[name]['tpl'])
-  out['name'] = name
-  out.update(dict((k, numericalize(v)) for k, v in zip(bm_specs[name]['dyn'], dyn_args)))
-  out.update(dict(zip(bm_specs[name]['tpl'], tpl_args)))
-  return out
-
-for bm in js['benchmarks']:
-  context = js['context']
-  if 'label' in bm:
-    labels_list = [s.split(':') for s in bm['label'].strip().split(' ') if len(s) and s[0] != '#']
-    for el in labels_list:
-      el[0] = el[0].replace('/iter', '_per_iteration')
-    labels = dict(labels_list)
-  else:
-    labels = {}
-  row = {
-    'jenkins_build': os.environ.get('BUILD_NUMBER', ''),
-    'jenkins_job': os.environ.get('JOB_NAME', ''),
-  }
-  row.update(context)
-  row.update(bm)
-  row.update(parse_name(row['name']))
-  row.update(labels)
+for row in bm_json.expand_json(js, js2):
  if 'label' in row:
    del row['label']
-  if js2:
-    for bm2 in js2['benchmarks']:
-      if bm['name'] == bm2['name']:
-        row['cpu_time'] = bm2['cpu_time']
-        row['real_time'] = bm2['real_time']
-        row['iterations'] = bm2['iterations']
+  del row['cpp_name']
  writer.writerow(row)
--- a/tools/profiling/microbenchmarks/bm_diff.py
+++ b/tools/profiling/microbenchmarks/bm_diff.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python2.7
+# Copyright 2017, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import json
+import bm_json
+import tabulate
+
+with open(sys.argv[1]) as f:
+  js_new_ctr = json.loads(f.read())
+with open(sys.argv[2]) as f:
+  js_new_opt = json.loads(f.read())
+with open(sys.argv[3]) as f:
+  js_old_ctr = json.loads(f.read())
+with open(sys.argv[4]) as f:
+  js_old_opt = json.loads(f.read())
+
+new = {}
+old = {}
+
+for row in bm_json.expand_json(js_new_ctr, js_new_opt):
+  new[row['cpp_name']] = row
+for row in bm_json.expand_json(js_old_ctr, js_old_opt):
+  old[row['cpp_name']] = row
+
+def changed_ratio(n, o):
+  return float(n-o)/float(o)
+
+def min_change(pct):
+  return lambda n, o: abs(changed_ratio(n,o)) > pct/100.0
+
+_INTERESTING = (
+  ('cpu_time', min_change(10)),
+  ('real_time', min_change(10)),
+  ('locks_per_iteration', min_change(5)),
+  ('allocs_per_iteration', min_change(5)),
+  ('writes_per_iteration', min_change(5)),
+  ('atm_cas_per_iteration', min_change(1)),
+  ('atm_add_per_iteration', min_change(5)),
+)
+
+changed = []
+for fld, chk in _INTERESTING:
+  for bm in new.keys():
+    if bm not in old: continue
+    n = new[bm]
+    o = old[bm]
+    if fld not in n or fld not in o: continue
+    if chk(n[fld], o[fld]):
+      changed.append((fld, chk))
+      break
+
+headers = ['Benchmark'] + [c[0] for c in changed] + ['Details']
+rows = []
+for bm in sorted(new.keys()):
+  if bm not in old: continue
+  row = [bm]
+  any_changed = False
+  n = new[bm]
+  o = old[bm]
+  details = ''
+  for fld, chk in _INTERESTING:
+    if fld not in n or fld not in o: continue
+    if chk(n[fld], o[fld]):
+      row.append(changed_ratio(n[fld], o[fld]))
+      if details: details += ', '
+      details += '%s:%r-->%r' % (fld, o[fld], n[fld])
+      any_changed = True
+    else:
+      row.append('')
+  if any_changed:
+    row.append(details)
+    rows.append(row)
+print tabulate.tabulate(rows, headers=headers, floatfmt='+.2f')
--- a/tools/profiling/microbenchmarks/bm_json.py
+++ b/tools/profiling/microbenchmarks/bm_json.py
@ -0,0 +1,176 @@
+# Copyright 2017, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+_BM_SPECS = {
+  'BM_UnaryPingPong': {
+    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
+    'dyn': ['request_size', 'response_size'],
+  },
+  'BM_PumpStreamClientToServer': {
+    'tpl': ['fixture'],
+    'dyn': ['request_size'],
+  },
+  'BM_PumpStreamServerToClient': {
+    'tpl': ['fixture'],
+    'dyn': ['request_size'],
+  },
+  'BM_StreamingPingPong': {
+    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
+    'dyn': ['request_size', 'request_count'],
+  },
+  'BM_StreamingPingPongMsgs': {
+    'tpl': ['fixture', 'client_mutator', 'server_mutator'],
+    'dyn': ['request_size'],
+  },
+  'BM_PumpStreamServerToClient_Trickle': {
+    'tpl': [],
+    'dyn': ['request_size', 'bandwidth_kilobits'],
+  },
+  'BM_ErrorStringOnNewError': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_ErrorStringRepeatedly': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_ErrorGetStatus': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_ErrorGetStatusCode': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_ErrorHttpError': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_HasClearGrpcStatus': {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_IsolatedFilter' : {
+    'tpl': ['fixture', 'client_mutator'],
+    'dyn': [],
+  },
+  'BM_HpackEncoderEncodeHeader' : {
+    'tpl': ['fixture'],
+    'dyn': ['end_of_stream', 'request_size'],
+  },
+  'BM_HpackParserParseHeader' : {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+  'BM_CallCreateDestroy' : {
+    'tpl': ['fixture'],
+    'dyn': [],
+  },
+}
+
+def numericalize(s):
+  if not s: return ''
+  if s[-1] == 'k':
+    return int(s[:-1]) * 1024
+  if s[-1] == 'M':
+    return int(s[:-1]) * 1024 * 1024
+  if 0 <= (ord(s[-1]) - ord('0')) <= 9:
+    return int(s)
+  assert 'not a number: %s' % s
+
+def parse_name(name):
+  cpp_name = name
+  if '<' not in name and '/' not in name and name not in _BM_SPECS:
+    return {'name': name, 'cpp_name': name}
+  rest = name
+  out = {}
+  tpl_args = []
+  dyn_args = []
+  if '<' in rest:
+    tpl_bit = rest[rest.find('<') + 1 : rest.rfind('>')]
+    arg = ''
+    nesting = 0
+    for c in tpl_bit:
+      if c == '<':
+        nesting += 1
+        arg += c
+      elif c == '>':
+        nesting -= 1
+        arg += c
+      elif c == ',':
+        if nesting == 0:
+          tpl_args.append(arg.strip())
+          arg = ''
+        else:
+          arg += c
+      else:
+        arg += c
+    tpl_args.append(arg.strip())
+    rest = rest[:rest.find('<')] + rest[rest.rfind('>') + 1:]
+  if '/' in rest:
+    s = rest.split('/')
+    rest = s[0]
+    dyn_args = s[1:]
+  name = rest
+  assert name in _BM_SPECS, '_BM_SPECS needs to be expanded for %s' % name
+  assert len(dyn_args) == len(_BM_SPECS[name]['dyn'])
+  assert len(tpl_args) == len(_BM_SPECS[name]['tpl'])
+  out['name'] = name
+  out['cpp_name'] = cpp_name
+  out.update(dict((k, numericalize(v)) for k, v in zip(_BM_SPECS[name]['dyn'], dyn_args)))
+  out.update(dict(zip(_BM_SPECS[name]['tpl'], tpl_args)))
+  return out
+
+def expand_json(js, js2 = None):
+  for bm in js['benchmarks']:
+    context = js['context']
+    if 'label' in bm:
+      labels_list = [s.split(':') for s in bm['label'].strip().split(' ') if len(s) and s[0] != '#']
+      for el in labels_list:
+        el[0] = el[0].replace('/iter', '_per_iteration')
+      labels = dict(labels_list)
+    else:
+      labels = {}
+    row = {
+      'jenkins_build': os.environ.get('BUILD_NUMBER', ''),
+      'jenkins_job': os.environ.get('JOB_NAME', ''),
+    }
+    row.update(context)
+    row.update(bm)
+    row.update(parse_name(row['name']))
+    row.update(labels)
+    if js2:
+      for bm2 in js2['benchmarks']:
+        if bm['name'] == bm2['name']:
+          row['cpu_time'] = bm2['cpu_time']
+          row['real_time'] = bm2['real_time']
+          row['iterations'] = bm2['iterations']
+    yield row
--- a/tools/run_tests/run_microbenchmark.py
+++ b/tools/run_tests/run_microbenchmark.py
@ -178,13 +178,15 @@ def run_summary(bm_name, cfg, base_json_name):

 def collect_summary(bm_name, args):
  heading('Summary: %s [no counters]' % bm_name)
-  text(run_summary(bm_name, 'opt', 'out'))
+  text(run_summary(bm_name, 'opt', bm_name))
  heading('Summary: %s [with counters]' % bm_name)
-  text(run_summary(bm_name, 'counters', 'out'))
+  text(run_summary(bm_name, 'counters', bm_name))
  if args.bigquery_upload:
-    with open('out.csv', 'w') as f:
-      f.write(subprocess.check_output(['tools/profiling/microbenchmarks/bm2bq.py', 'out.counters.json', 'out.opt.json']))
-    subprocess.check_call(['bq', 'load', 'microbenchmarks.microbenchmarks', 'out.csv'])
+    with open('%s.csv' % bm_name, 'w') as f:
+      f.write(subprocess.check_output(['tools/profiling/microbenchmarks/bm2bq.py',
+                                       '%s.counters.json' % bm_name,
+                                       '%s.opt.json' % bm_name]))
+    subprocess.check_call(['bq', 'load', 'microbenchmarks.microbenchmarks', '%s.csv' % bm_name])

 collectors = {
  'latency': collect_latency,
@ -228,30 +230,39 @@ argp.add_argument('--summary_time',
                  help='Minimum time to run benchmarks for the summary collection')
 args = argp.parse_args()

-for bm_name in args.benchmarks:
-  for collect in args.collect:
-    collectors[collect](bm_name, args)
-if args.diff_perf:
+try:
  for bm_name in args.benchmarks:
-    run_summary(bm_name, 'opt', '%s.new' % bm_name)
-  where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
-  subprocess.check_call(['git', 'checkout', args.diff_perf])
-  comparables = []
-  subprocess.check_call(['make', 'clean'])
-  try:
-    for bm_name in args.benchmarks:
-      try:
-        run_summary(bm_name, 'opt', '%s.old' % bm_name)
-        comparables.append(bm_name)
-      except subprocess.CalledProcessError, e:
-        pass
-  finally:
-    subprocess.check_call(['git', 'checkout', where_am_i])
-  for bm_name in comparables:
-    subprocess.check_call(['third_party/benchmark/tools/compare_bench.py',
-                          '%s.new.opt.json' % bm_name,
-                          '%s.old.opt.json' % bm_name])
-
-index_html += "</body>\n</html>\n"
-with open('reports/index.html', 'w') as f:
-  f.write(index_html)
+    for collect in args.collect:
+      collectors[collect](bm_name, args)
+  if args.diff_perf:
+    if 'summary' not in args.collect:
+      for bm_name in args.benchmarks:
+        run_summary(bm_name, 'opt', bm_name)
+        run_summary(bm_name, 'counters', bm_name)
+    where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
+    subprocess.check_call(['git', 'checkout', args.diff_perf])
+    comparables = []
+    subprocess.check_call(['make', 'clean'])
+    try:
+      for bm_name in args.benchmarks:
+        try:
+          run_summary(bm_name, 'opt', '%s.old' % bm_name)
+          run_summary(bm_name, 'counters', '%s.old' % bm_name)
+          comparables.append(bm_name)
+        except subprocess.CalledProcessError, e:
+          pass
+    finally:
+      subprocess.check_call(['git', 'checkout', where_am_i])
+    for bm_name in comparables:
+      diff = subprocess.check_output(['tools/profiling/microbenchmarks/bm_diff.py',
+                                      '%s.counters.json' % bm_name,
+                                      '%s.opt.json' % bm_name,
+                                      '%s.old.counters.json' % bm_name,
+                                      '%s.old.opt.json' % bm_name]).strip()
+      if diff:
+        heading('Performance diff: %s' % bm_name)
+        text(diff)
+finally:
+  index_html += "</body>\n</html>\n"
+  with open('reports/index.html', 'w') as f:
+    f.write(index_html)