Label microbenchmark differences similarly to bloat (#27998)

* benchmark differences as a label

* debug

* Automated change: Fix sanity tests

Co-authored-by: ctiller <ctiller@users.noreply.github.com>
pull/28012/head
Craig Tiller 3 years ago committed by GitHub
parent 05b695c95a
commit 6003710534
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 42
      tools/profiling/microbenchmarks/bm_diff/bm_diff.py
  2. 6
      tools/profiling/microbenchmarks/bm_diff/bm_main.py

@ -101,6 +101,7 @@ class Benchmark:
False: collections.defaultdict(list)
}
self.final = {}
self.speedup = {}
def add_sample(self, track, data, new):
for f in track:
@ -117,6 +118,7 @@ class Benchmark:
_maybe_print('%s: %s=%r %s=%r mdn_diff=%r' %
(f, new_name, new, old_name, old, mdn_diff))
s = bm_speedup.speedup(new, old, 1e-5)
self.speedup[f] = s
if abs(s) > 3:
if mdn_diff > 0.5:
self.final[f] = '%+d%%' % s
@ -128,6 +130,11 @@ class Benchmark:
def row(self, flds):
return [self.final[f] if f in self.final else '' for f in flds]
def speedup(self, name):
if name in self.speedup:
return self.speedup[name]
return None
def _read_json(filename, badjson_files, nonexistant_files):
stripped = ".".join(filename.split(".")[:-2])
@ -204,6 +211,36 @@ def diff(bms, loops, regex, track, old, new, counters):
really_interesting.update(bm.process(track, new, old))
fields = [f for f in track if f in really_interesting]
# figure out the significance of the changes... right now we take the 95%-ile
# benchmark delta %-age, and then apply some hand chosen thresholds
histogram = []
for bm in benchmarks.values():
if bm.skip():
continue
d = bm.speedup('cpu_time')
if d is None:
continue
histogram.append(d)
histogram.sort()
print("histogram of speedups: ", histogram)
if len(histogram) == 0:
significance = 0
else:
delta = histogram[int(len(histogram) * 0.95)]
mul = 1
if delta < 0:
delta = -delta
mul = -1
if delta < 2:
significance = 0
elif delta < 5:
significance = 1
elif delta < 10:
significance = 2
else:
significance = 3
significance *= mul
headers = ['Benchmark'] + fields
rows = []
for name in sorted(benchmarks.keys()):
@ -222,9 +259,10 @@ def diff(bms, loops, regex, track, old, new, counters):
note = '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict(
nonexistant_files)
if rows:
return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note
return tabulate.tabulate(rows, headers=headers,
floatfmt='+.2f'), note, significance
else:
return None, note
return None, note, 0
if __name__ == '__main__':

@ -135,8 +135,9 @@ def main(args):
random.shuffle(jobs_list, random.SystemRandom().random)
jobset.run(jobs_list, maxjobs=args.jobs)
diff, note = bm_diff.diff(args.benchmarks, args.loops, args.regex,
args.track, old, 'new', args.counters)
diff, note, significance = bm_diff.diff(args.benchmarks, args.loops,
args.regex, args.track, old, 'new',
args.counters)
if diff:
text = '[%s] Performance differences noted:\n%s' % (
args.pr_comment_name, diff)
@ -146,6 +147,7 @@ def main(args):
text = note + '\n\n' + text
print('%s' % text)
check_on_pr.check_on_pr('Benchmark', '```\n%s\n```' % text)
check_on_pr.label_significance_on_pr('perf-change', significance)
if __name__ == '__main__':

Loading…
Cancel
Save