From 48d973a27603c14db440336da25af1315d47b1e6 Mon Sep 17 00:00:00 2001 From: ncteisen Date: Tue, 9 May 2017 16:59:17 -0700 Subject: [PATCH] Add readme and flags --- tools/profiling/microbenchmarks/README.md | 4 + .../microbenchmarks/bm_diff/README.md | 101 ++++++++++++++++++ .../microbenchmarks/bm_diff/bm_build.py | 10 +- .../microbenchmarks/bm_diff/bm_diff.py | 4 +- .../microbenchmarks/bm_diff/bm_main.py | 36 ++++--- .../microbenchmarks/bm_diff/bm_run.py | 17 +-- .../microbenchmarks/bm_diff/bm_speedup.py | 3 - 7 files changed, 144 insertions(+), 31 deletions(-) diff --git a/tools/profiling/microbenchmarks/README.md b/tools/profiling/microbenchmarks/README.md index e69de29bb2d..035888ee188 100644 --- a/tools/profiling/microbenchmarks/README.md +++ b/tools/profiling/microbenchmarks/README.md @@ -0,0 +1,4 @@ +Microbenchmarks +==== + +This directory contains helper scripts for the microbenchmark suites. diff --git a/tools/profiling/microbenchmarks/bm_diff/README.md b/tools/profiling/microbenchmarks/bm_diff/README.md index e69de29bb2d..e1c728ffef3 100644 --- a/tools/profiling/microbenchmarks/bm_diff/README.md +++ b/tools/profiling/microbenchmarks/bm_diff/README.md @@ -0,0 +1,101 @@ +The bm_diff Family +==== + +This family of python scripts can be incredibly useful for fast iteration over +different performance tweaks. The tools allow you to save performance data from +a baseline commit, then quickly compare data from your working branch to that +baseline data to see if you have made any performance wins. + +The tools operates with three concrete steps, which can be invoked separately, +or all together via the driver script, bm_main.py. This readme will describe +the typical workflow for these scripts, then it will include sections on the +details of every script for advanced usage. + +## Normal Workflow + +Let's say you are working on a performance optimization for grpc_error. You have +made some significant changes and want to see some data. From your branch, run +(ensure everything is committed first): + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master` + +This will build the `bm_error` binary on your branch and master. It will then +run these benchmarks 5 times each. Lastly it will compute the statistically +significant performance differences between the two branches. This should show +the nice performance wins your changes have made. + +If you have already invoked bm_main with `-d master`, you should instead use +`-o old` for subsequent runs. This allows the script to skip re-building and +re-running the unchanged master branch. + +## bm_build.py + +This scrips builds the benchmarks. It takes in a name parameter, and will +store the binaries based on that. Both `opt` and `counter` configurations +will be used. The `opt` is used to get cpu_time and real_time, and the +`counters` build is used to track other metrics like allocs, atomic adds, +etc etc etc. + +For example, if you were to invoke (we assume everything is run from the +root of the repo): + +`tools/profiling/microbenchmarks/bm_diff/bm_build.py -b bm_error -n baseline` + +then the microbenchmark binaries will show up under +`bm_diff_baseline/{opt,counters}/bm_error` + +## bm_run.py + +This script runs the benchmarks. It takes a name parameter that must match the +name that was passed to `bm_build.py`. The script then runs the benchmark +multiple times (default is 20, can be toggled via the loops parameter). The +output is saved as `....json` + +For example, if you were to run: + +`tools/profiling/microbenchmarks/bm_diff/bm_run.py -b bm_error -b baseline -l 5` + +Then an example output file would be `bm_error.opt.baseline.1.json` + +## bm_diff.py + +This script takes in the output from two benchmark runs, computes the diff +between them, and prints any significant improvements or regressions. It takes +in two name parameters, old and new. These must have previously been built and +run. + +For example, assuming you had already built and run a 'baseline' microbenchmark +from master, and then you also built and ran a 'current' microbenchmark from +the branch you were working on, you could invoke: + +`tools/profiling/microbenchmarks/bm_diff/bm_diff.py -b bm_error -o baseline -n current -l 5` + +This would output the percent difference between your branch and master. + +## bm_main.py + +This is the driver script. It uses the previous three modules and does +everything for you. You pass in the benchmarks to be run, the number of loops, +number of CPUs to use, and the commit to compare to. Then the script will: +* Build the benchmarks at head, then checkout the branch to compare to and + build the benchmarks there +* Run both sets of microbenchmarks +* Run bm_diff.py to compare the two, outputs the difference. + +For example, one might run: + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master` + +This would compare the current branch's error benchmarks to master. + +This script is invoked by our infrastructure on every PR to protect against +regressions and demonstrate performance wins. + +However, if you are iterating over different performance tweaks quickly, it is +unnecessary to build and run the baseline commit every time. That is why we +provide a different flag in case you are sure that the baseline benchmark has +already been built and run. In that case use the --old flag to pass in the name +of the baseline. This will only build and run the current branch. For example: + +`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o old` + diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_build.py b/tools/profiling/microbenchmarks/bm_diff/bm_build.py index a5d1ec34475..83c3c695e77 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_build.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_build.py @@ -40,10 +40,12 @@ import shutil def _args(): argp = argparse.ArgumentParser(description='Builds microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) - argp.add_argument('-n', '--name', type=str, help='Unique name of this build') - return argp.parse_args() + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to build') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='How many CPUs to dedicate to this task') + argp.add_argument('-n', '--name', type=str, help='Unique name of this build. To be used as a handle to pass to the other bm* scripts') + args = argp.parse_args() + assert args.name + return args def _make_cmd(cfg, benchmarks, jobs): return ['make'] + benchmarks + [ diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py index 3c871c1743b..7b1c7e28bf2 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_diff.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_diff.py @@ -61,8 +61,8 @@ def _args(): nargs='+', default=sorted(bm_constants._INTERESTING), help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-l', '--loops', type=int, default=20) + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. Must match what was passed to bm_run.py') argp.add_argument('-n', '--new', type=str, help='New benchmark name') argp.add_argument('-o', '--old', type=str, help='Old benchmark name') argp.add_argument('-v', '--verbose', type=bool, help='print details of before/after') diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_main.py b/tools/profiling/microbenchmarks/bm_diff/bm_main.py index 1a46b170155..82b0a10e07c 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_main.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_main.py @@ -51,13 +51,16 @@ def _args(): nargs='+', default=sorted(bm_constants._INTERESTING), help='Which metrics to track') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-d', '--diff_base', type=str) - argp.add_argument('-r', '--repetitions', type=int, default=1) - argp.add_argument('-l', '--loops', type=int, default=20) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Which benchmarks to run') + argp.add_argument('-d', '--diff_base', type=str, help='Commit or branch to compare the current one to') + argp.add_argument('-o', '--old', type=str, help='Name of baseline run to compare to. Ususally just called "old"') + argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') args = argp.parse_args() - assert args.diff_base + assert args.diff_base or args.old, "One of diff_base or old must be set!" + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." return args @@ -76,18 +79,21 @@ def main(args): bm_build.build('new', args.benchmarks, args.jobs) - where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() - subprocess.check_call(['git', 'checkout', args.diff_base]) - try: - bm_build.build('old', args.benchmarks, args.jobs) - finally: - subprocess.check_call(['git', 'checkout', where_am_i]) - subprocess.check_call(['git', 'submodule', 'update']) + old = args.old + if args.diff_base: + old = 'old' + where_am_i = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip() + subprocess.check_call(['git', 'checkout', args.diff_base]) + try: + bm_build.build('old', args.benchmarks, args.jobs) + finally: + subprocess.check_call(['git', 'checkout', where_am_i]) + subprocess.check_call(['git', 'submodule', 'update']) bm_run.run('new', args.benchmarks, args.jobs, args.loops, args.repetitions) - bm_run.run('old', args.benchmarks, args.jobs, args.loops, args.repetitions) + bm_run.run(old, args.benchmarks, args.jobs, args.loops, args.repetitions) - diff = bm_diff.diff(args.benchmarks, args.loops, args.track, 'old', 'new') + diff = bm_diff.diff(args.benchmarks, args.loops, args.track, old, 'new') if diff: text = 'Performance differences noted:\n' + diff else: diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_run.py b/tools/profiling/microbenchmarks/bm_diff/bm_run.py index 14b3718ecb3..b36e660f29f 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_run.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_run.py @@ -44,12 +44,16 @@ import jobset def _args(): argp = argparse.ArgumentParser(description='Runs microbenchmarks') - argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS) - argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count()) - argp.add_argument('-n', '--name', type=str, help='Unique name of this build') - argp.add_argument('-r', '--repetitions', type=int, default=1) - argp.add_argument('-l', '--loops', type=int, default=20) - return argp.parse_args() + argp.add_argument('-b', '--benchmarks', nargs='+', choices=bm_constants._AVAILABLE_BENCHMARK_TESTS, default=bm_constants._AVAILABLE_BENCHMARK_TESTS, help='Benchmarks to run') + argp.add_argument('-j', '--jobs', type=int, default=multiprocessing.cpu_count(), help='Number of CPUs to use') + argp.add_argument('-n', '--name', type=str, help='Unique name of the build to run. Needs to match the handle passed to bm_build.py') + argp.add_argument('-r', '--repetitions', type=int, default=1, help='Number of repetitions to pass to the benchmarks') + argp.add_argument('-l', '--loops', type=int, default=20, help='Number of times to loops the benchmarks. More loops cuts down on noise') + args = argp.parse_args() + assert args.name + if args.loops < 3: + print "WARNING: This run will likely be noisy. Increase loops." + return args def _collect_bm_data(bm, cfg, name, reps, idx, loops): cmd = ['bm_diff_%s/%s/%s' % (name, cfg, bm), @@ -73,5 +77,4 @@ def run(name, benchmarks, jobs, loops, reps): if __name__ == '__main__': args = _args() - assert args.name run(args.name, args.benchmarks, args.jobs, args.loops, args.repetitions) diff --git a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py index fb6622760b9..99f1a073f5d 100755 --- a/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py +++ b/tools/profiling/microbenchmarks/bm_diff/bm_speedup.py @@ -44,7 +44,6 @@ def cmp(a, b): def speedup(new, old): if (len(set(new))) == 1 and new == old: return 0 s0, p0 = cmp(new, old) - print s0, p0 if math.isnan(p0): return 0 if s0 == 0: return 0 if p0 > _THRESHOLD: return 0 @@ -52,7 +51,6 @@ def speedup(new, old): pct = 1 while pct < 101: sp, pp = cmp(new, scale(old, 1 - pct/100.0)) - print sp, pp if sp > 0: break if pp > _THRESHOLD: break pct += 1 @@ -61,7 +59,6 @@ def speedup(new, old): pct = 1 while pct < 100000: sp, pp = cmp(new, scale(old, 1 + pct/100.0)) - print sp, pp if sp < 0: break if pp > _THRESHOLD: break pct += 1