[cleanup] Remove old microbenchmarking diff framework (#36952)

There's something new in the works, so it's time that this unmaintained & broken system got garbage collected.

Closes #36952

PiperOrigin-RevId: 644184198
pull/36960/head
Craig Tiller 5 months ago committed by Copybara-Service
parent bc26f27c32
commit 13a8023268
  1. 3
      .gitignore
  2. 26
      tools/internal_ci/linux/grpc_microbenchmark_diff.sh
  3. 32
      tools/internal_ci/linux/grpc_microbenchmark_diff_in_docker.sh
  4. 33
      tools/internal_ci/linux/pull_request/grpc_microbenchmark_diff.cfg
  5. 4
      tools/profiling/microbenchmarks/README.md
  6. 70
      tools/profiling/microbenchmarks/bm2bq.py
  7. 116
      tools/profiling/microbenchmarks/bm_diff/README.md
  8. 98
      tools/profiling/microbenchmarks/bm_diff/bm_build.py
  9. 41
      tools/profiling/microbenchmarks/bm_diff/bm_constants.py
  10. 300
      tools/profiling/microbenchmarks/bm_diff/bm_diff.py
  11. 182
      tools/profiling/microbenchmarks/bm_diff/bm_main.py
  12. 148
      tools/profiling/microbenchmarks/bm_diff/bm_run.py
  13. 68
      tools/profiling/microbenchmarks/bm_diff/bm_speedup.py
  14. 214
      tools/profiling/microbenchmarks/bm_json.py
  15. 191
      tools/profiling/qps/qps_diff.py
  16. 168
      tools/run_tests/run_microbenchmark.py

3
.gitignore vendored

@ -143,9 +143,6 @@ perf.data
perf.data.old
# bm_diff
bm_diff_new/
bm_diff_old/
bm_*.json
bloat_diff_new/
bloat_diff_old/
bloaty-build/

@ -1,26 +0,0 @@
#!/usr/bin/env bash
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is invoked by Kokoro and runs a diff on the microbenchmarks
set -ex
# Enter the gRPC repo root
cd $(dirname $0)/../../..
source tools/internal_ci/helper_scripts/prepare_build_linux_rc
export DOCKERFILE_DIR=tools/dockerfile/test/cxx_debian11_x64
export DOCKER_RUN_SCRIPT=tools/internal_ci/linux/grpc_microbenchmark_diff_in_docker.sh
exec tools/run_tests/dockerize/build_and_run_docker.sh

@ -1,32 +0,0 @@
#!/usr/bin/env bash
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex
# Enter the gRPC repo root
cd $(dirname $0)/../../..
# some extra pip packages are needed for the check_on_pr.py script to work
# TODO(jtattermusch): avoid needing to install these pip packages each time
time python3 -m pip install --user -r tools/internal_ci/helper_scripts/requirements.linux_perf.txt
# List of benchmarks that provide good signal for analyzing performance changes in pull requests
BENCHMARKS_TO_RUN="bm_fullstack_unary_ping_pong bm_fullstack_streaming_ping_pong bm_fullstack_streaming_pump bm_closure bm_cq bm_chttp2_hpack"
tools/run_tests/start_port_server.py
tools/internal_ci/linux/run_if_c_cpp_modified.sh tools/profiling/microbenchmarks/bm_diff/bm_main.py \
-d "origin/$KOKORO_GITHUB_PULL_REQUEST_TARGET_BRANCH" \
-b $BENCHMARKS_TO_RUN

@ -1,33 +0,0 @@
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Config file for the internal CI (in protobuf text format)
# Location of the continuous shell script in repository.
build_file: "grpc/tools/internal_ci/linux/grpc_microbenchmark_diff.sh"
timeout_mins: 120
before_action {
fetch_keystore {
keystore_resource {
keystore_config_id: 73836
keyname: "grpc_checks_private_key"
}
}
}
action {
define_artifacts {
regex: "**/*sponge_log.*"
regex: "github/grpc/reports/**"
}
}

@ -1,4 +0,0 @@
Microbenchmarks
====
This directory contains helper scripts for the microbenchmark suites.

@ -1,70 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Convert google-benchmark json output to something that can be uploaded to
# BigQuery
import csv
import json
import subprocess
import sys
import bm_json
columns = []
for row in json.loads(
# TODO(jtattermusch): make sure the dataset name is not hardcoded
subprocess.check_output(
["bq", "--format=json", "show", "microbenchmarks.microbenchmarks"]
)
)["schema"]["fields"]:
columns.append((row["name"], row["type"].lower()))
SANITIZE = {
"integer": int,
"float": float,
"boolean": bool,
"string": str,
"timestamp": str,
}
# TODO(jtattermusch): add proper argparse argument, rather than trying
# to emulate with manual argv inspection.
if sys.argv[1] == "--schema":
print(",\n".join("%s:%s" % (k, t.upper()) for k, t in columns))
sys.exit(0)
with open(sys.argv[1]) as f:
js = json.loads(f.read())
if len(sys.argv) > 2:
with open(sys.argv[2]) as f:
js2 = json.loads(f.read())
else:
js2 = None
# TODO(jtattermusch): write directly to a file instead of stdout
writer = csv.DictWriter(sys.stdout, [c for c, t in columns])
for row in bm_json.expand_json(js, js2):
sane_row = {}
for name, sql_type in columns:
if name in row:
if row[name] == "":
continue
sane_row[name] = SANITIZE[sql_type](row[name])
writer.writerow(sane_row)

@ -1,116 +0,0 @@
The bm_diff Family
====
This family of python scripts can be incredibly useful for fast iteration over
different performance tweaks. The tools allow you to save performance data from
a baseline commit, then quickly compare data from your working branch to that
baseline data to see if you have made any performance wins.
The tools operate with three concrete steps, which can be invoked separately,
or all together via the driver script, bm_main.py. This readme will describe
the typical workflow for these scripts, then it will include sections on the
details of every script for advanced usage.
## Normal Workflow
Let's say you are working on a performance optimization for grpc_error. You have
made some significant changes and want to see some data. From your branch, run
(ensure everything is committed first):
`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master`
This will build the `bm_error` binary on your branch, and then it will checkout
master and build it there too. It will then run these benchmarks 5 times each.
Lastly it will compute the statistically significant performance differences
between the two branches. This should show the nice performance wins your
changes have made.
If you have already invoked bm_main with `-d master`, you should instead use
`-o` for subsequent runs. This allows the script to skip re-building and
re-running the unchanged master branch. For example:
`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o`
This will only build and run `bm_error` on your branch. It will then compare
the output to the saved runs from master.
## Advanced Workflow
If you have a deeper knowledge of these scripts, you can use them to do more
fine tuned benchmark comparisons. For example, you could build, run, and save
the benchmark output from two different base branches. Then you could diff both
of these baselines against your working branch to see how the different metrics
change. The rest of this doc goes over the details of what each of the
individual modules accomplishes.
## bm_build.py
This scrips builds the benchmarks. It takes in a name parameter, and will
store the binaries based on that. Both `opt` and `counter` configurations
will be used. The `opt` is used to get cpu_time and real_time, and the
`counters` build is used to track other metrics like allocs, atomic adds,
etc etc etc.
For example, if you were to invoke (we assume everything is run from the
root of the repo):
`tools/profiling/microbenchmarks/bm_diff/bm_build.py -b bm_error -n baseline`
then the microbenchmark binaries will show up under
`bm_diff_baseline/{opt,counters}/bm_error`
## bm_run.py
This script runs the benchmarks. It takes a name parameter that must match the
name that was passed to `bm_build.py`. The script then runs the benchmark
multiple times (default is 20, can be toggled via the loops parameter). The
output is saved as `<benchmark name>.<config>.<name>.<loop idx>.json`
For example, if you were to run:
`tools/profiling/microbenchmarks/bm_diff/bm_run.py -b bm_error -b baseline -l 5`
Then an example output file would be `bm_error.opt.baseline.0.json`
## bm_diff.py
This script takes in the output from two benchmark runs, computes the diff
between them, and prints any significant improvements or regressions. It takes
in two name parameters, old and new. These must have previously been built and
run.
For example, assuming you had already built and run a 'baseline' microbenchmark
from master, and then you also built and ran a 'current' microbenchmark from
the branch you were working on, you could invoke:
`tools/profiling/microbenchmarks/bm_diff/bm_diff.py -b bm_error -o baseline -n current -l 5`
This would output the percent difference between your branch and master.
## bm_main.py
This is the driver script. It uses the previous three modules and does
everything for you. You pass in the benchmarks to be run, the number of loops,
number of CPUs to use, and the commit to compare to. Then the script will:
* Build the benchmarks at head, then checkout the branch to compare to and
build the benchmarks there
* Run both sets of microbenchmarks
* Run bm_diff.py to compare the two, outputs the difference.
For example, one might run:
`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -d master`
This would compare the current branch's error benchmarks to master.
This script is invoked by our infrastructure on every PR to protect against
regressions and demonstrate performance wins.
However, if you are iterating over different performance tweaks quickly, it is
unnecessary to build and run the baseline commit every time. That is why we
provide a different flag in case you are sure that the baseline benchmark has
already been built and run. In that case use the --old flag to pass in the name
of the baseline. This will only build and run the current branch. For example:
`tools/profiling/microbenchmarks/bm_diff/bm_main.py -b bm_error -l 5 -o old`

@ -1,98 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Python utility to build opt and counters benchmarks """
import argparse
import multiprocessing
import os
import shutil
import subprocess
import bm_constants
def _args():
argp = argparse.ArgumentParser(description="Builds microbenchmarks")
argp.add_argument(
"-b",
"--benchmarks",
nargs="+",
choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
help="Which benchmarks to build",
)
argp.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
help=(
"Deprecated. Bazel chooses number of CPUs to build with"
" automatically."
),
)
argp.add_argument(
"-n",
"--name",
type=str,
help=(
"Unique name of this build. To be used as a handle to pass to the"
" other bm* scripts"
),
)
args = argp.parse_args()
assert args.name
return args
def _build_cmd(cfg, benchmarks):
bazel_targets = [
"//test/cpp/microbenchmarks:%s" % benchmark for benchmark in benchmarks
]
# --dynamic_mode=off makes sure that we get a monolithic binary that can be safely
# moved outside of the bazel-bin directory
return [
"tools/bazel",
"build",
"--config=%s" % cfg,
"--dynamic_mode=off",
] + bazel_targets
def _build_config_and_copy(cfg, benchmarks, dest_dir):
"""Build given config and copy resulting binaries to dest_dir/CONFIG"""
subprocess.check_call(_build_cmd(cfg, benchmarks))
cfg_dir = dest_dir + "/%s" % cfg
os.makedirs(cfg_dir)
subprocess.check_call(
["cp"]
+ [
"bazel-bin/test/cpp/microbenchmarks/%s" % benchmark
for benchmark in benchmarks
]
+ [cfg_dir]
)
def build(name, benchmarks, jobs):
dest_dir = "bm_diff_%s" % name
shutil.rmtree(dest_dir, ignore_errors=True)
_build_config_and_copy("opt", benchmarks, dest_dir)
if __name__ == "__main__":
args = _args()
build(args.name, args.benchmarks, args.jobs)

@ -1,41 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Configurable constants for the bm_*.py family """
_AVAILABLE_BENCHMARK_TESTS = [
"bm_fullstack_unary_ping_pong",
"bm_fullstack_streaming_ping_pong",
"bm_fullstack_streaming_pump",
"bm_closure",
"bm_cq",
"bm_chttp2_hpack",
]
_INTERESTING = (
"cpu_time",
"real_time",
"locks_per_iteration",
"allocs_per_iteration",
"writes_per_iteration",
"atm_cas_per_iteration",
"atm_add_per_iteration",
"nows_per_iteration",
"cli_transport_stalls_per_iteration",
"cli_stream_stalls_per_iteration",
"svr_transport_stalls_per_iteration",
"svr_stream_stalls_per_iteration",
"http2_pings_sent_per_iteration",
)

@ -1,300 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Computes the diff between two bm runs and outputs significant results """
import argparse
import collections
import json
import os
import subprocess
import sys
sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), ".."))
import bm_constants
import bm_json
import bm_speedup
import tabulate
verbose = False
def _median(ary):
assert len(ary)
ary = sorted(ary)
n = len(ary)
if n % 2 == 0:
return (ary[(n - 1) // 2] + ary[(n - 1) // 2 + 1]) / 2.0
else:
return ary[n // 2]
def _args():
argp = argparse.ArgumentParser(
description="Perform diff on microbenchmarks"
)
argp.add_argument(
"-t",
"--track",
choices=sorted(bm_constants._INTERESTING),
nargs="+",
default=sorted(bm_constants._INTERESTING),
help="Which metrics to track",
)
argp.add_argument(
"-b",
"--benchmarks",
nargs="+",
choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
help="Which benchmarks to run",
)
argp.add_argument(
"-l",
"--loops",
type=int,
default=20,
help=(
"Number of times to loops the benchmarks. Must match what was"
" passed to bm_run.py"
),
)
argp.add_argument(
"-r",
"--regex",
type=str,
default="",
help="Regex to filter benchmarks run",
)
argp.add_argument("-n", "--new", type=str, help="New benchmark name")
argp.add_argument("-o", "--old", type=str, help="Old benchmark name")
argp.add_argument(
"-v", "--verbose", type=bool, help="Print details of before/after"
)
args = argp.parse_args()
global verbose
if args.verbose:
verbose = True
assert args.new
assert args.old
return args
def _maybe_print(str):
if verbose:
print(str)
class Benchmark:
def __init__(self):
self.samples = {
True: collections.defaultdict(list),
False: collections.defaultdict(list),
}
self.final = {}
self.speedup = {}
def add_sample(self, track, data, new):
for f in track:
if f in data:
self.samples[new][f].append(float(data[f]))
def process(self, track, new_name, old_name):
for f in sorted(track):
new = self.samples[True][f]
old = self.samples[False][f]
if not new or not old:
continue
mdn_diff = abs(_median(new) - _median(old))
_maybe_print(
"%s: %s=%r %s=%r mdn_diff=%r"
% (f, new_name, new, old_name, old, mdn_diff)
)
s = bm_speedup.speedup(new, old, 1e-5)
self.speedup[f] = s
if abs(s) > 3:
if mdn_diff > 0.5:
self.final[f] = "%+d%%" % s
return self.final.keys()
def skip(self):
return not self.final
def row(self, flds):
return [self.final[f] if f in self.final else "" for f in flds]
def speedup(self, name):
if name in self.speedup:
return self.speedup[name]
return None
def _read_json(filename, badjson_files, nonexistant_files):
stripped = ".".join(filename.split(".")[:-2])
try:
with open(filename) as f:
r = f.read()
return json.loads(r)
except IOError as e:
if stripped in nonexistant_files:
nonexistant_files[stripped] += 1
else:
nonexistant_files[stripped] = 1
return None
except ValueError as e:
print(r)
if stripped in badjson_files:
badjson_files[stripped] += 1
else:
badjson_files[stripped] = 1
return None
def fmt_dict(d):
return "".join([" " + k + ": " + str(d[k]) + "\n" for k in d])
def diff(bms, loops, regex, track, old, new):
benchmarks = collections.defaultdict(Benchmark)
badjson_files = {}
nonexistant_files = {}
for bm in bms:
for loop in range(0, loops):
for line in subprocess.check_output(
[
"bm_diff_%s/opt/%s" % (old, bm),
"--benchmark_list_tests",
"--benchmark_filter=%s" % regex,
]
).splitlines():
line = line.decode("UTF-8")
stripped_line = (
line.strip()
.replace("/", "_")
.replace("<", "_")
.replace(">", "_")
.replace(", ", "_")
)
js_new_opt = _read_json(
"%s.%s.opt.%s.%d.json" % (bm, stripped_line, new, loop),
badjson_files,
nonexistant_files,
)
js_old_opt = _read_json(
"%s.%s.opt.%s.%d.json" % (bm, stripped_line, old, loop),
badjson_files,
nonexistant_files,
)
if js_new_opt:
for row in bm_json.expand_json(js_new_opt):
name = row["cpp_name"]
if name.endswith("_mean") or name.endswith("_stddev"):
continue
benchmarks[name].add_sample(track, row, True)
if js_old_opt:
for row in bm_json.expand_json(js_old_opt):
name = row["cpp_name"]
if name.endswith("_mean") or name.endswith("_stddev"):
continue
benchmarks[name].add_sample(track, row, False)
really_interesting = set()
for name, bm in benchmarks.items():
_maybe_print(name)
really_interesting.update(bm.process(track, new, old))
fields = [f for f in track if f in really_interesting]
# figure out the significance of the changes... right now we take the 95%-ile
# benchmark delta %-age, and then apply some hand chosen thresholds
histogram = []
_NOISY = ["BM_WellFlushed"]
for name, bm in benchmarks.items():
if name in _NOISY:
print(
"skipping noisy benchmark '%s' for labelling evaluation" % name
)
if bm.skip():
continue
d = bm.speedup["cpu_time"]
if d is None:
continue
histogram.append(d)
histogram.sort()
print("histogram of speedups: ", histogram)
if len(histogram) == 0:
significance = 0
else:
delta = histogram[int(len(histogram) * 0.95)]
mul = 1
if delta < 0:
delta = -delta
mul = -1
if delta < 2:
significance = 0
elif delta < 5:
significance = 1
elif delta < 10:
significance = 2
else:
significance = 3
significance *= mul
headers = ["Benchmark"] + fields
rows = []
for name in sorted(benchmarks.keys()):
if benchmarks[name].skip():
continue
rows.append([name] + benchmarks[name].row(fields))
note = None
if len(badjson_files):
note = (
"Corrupt JSON data (indicates timeout or crash): \n%s"
% fmt_dict(badjson_files)
)
if len(nonexistant_files):
if note:
note += (
"\n\nMissing files (indicates new benchmark): \n%s"
% fmt_dict(nonexistant_files)
)
else:
note = (
"\n\nMissing files (indicates new benchmark): \n%s"
% fmt_dict(nonexistant_files)
)
if rows:
return (
tabulate.tabulate(rows, headers=headers, floatfmt="+.2f"),
note,
significance,
)
else:
return None, note, 0
if __name__ == "__main__":
args = _args()
diff, note = diff(
args.benchmarks,
args.loops,
args.regex,
args.track,
args.old,
args.new,
args.counters,
)
print("%s\n%s" % (note, diff if diff else "No performance differences"))

@ -1,182 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Runs the entire bm_*.py pipeline, and possible comments on the PR """
import argparse
import multiprocessing
import os
import random
import subprocess
import sys
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]), "..", "..", "run_tests", "python_utils"
)
)
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]),
"..",
"..",
"..",
"run_tests",
"python_utils",
)
)
import bm_build
import bm_constants
import bm_diff
import bm_run
import check_on_pr
import jobset
def _args():
argp = argparse.ArgumentParser(
description="Perform diff on microbenchmarks"
)
argp.add_argument(
"-t",
"--track",
choices=sorted(bm_constants._INTERESTING),
nargs="+",
default=sorted(bm_constants._INTERESTING),
help="Which metrics to track",
)
argp.add_argument(
"-b",
"--benchmarks",
nargs="+",
choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
help="Which benchmarks to run",
)
argp.add_argument(
"-d",
"--diff_base",
type=str,
help="Commit or branch to compare the current one to",
)
argp.add_argument(
"-o",
"--old",
default="old",
type=str,
help='Name of baseline run to compare to. Usually just called "old"',
)
argp.add_argument(
"-r",
"--regex",
type=str,
default="",
help="Regex to filter benchmarks run",
)
argp.add_argument(
"-l",
"--loops",
type=int,
default=10,
help=(
"Number of times to loops the benchmarks. More loops cuts down on"
" noise"
),
)
argp.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
help="Number of CPUs to use",
)
argp.add_argument(
"--pr_comment_name",
type=str,
default="microbenchmarks",
help="Name that Jenkins will use to comment on the PR",
)
args = argp.parse_args()
assert args.diff_base or args.old, "One of diff_base or old must be set!"
if args.loops < 3:
print("WARNING: This run will likely be noisy. Increase loops.")
return args
def eintr_be_gone(fn):
"""Run fn until it doesn't stop because of EINTR"""
def inner(*args):
while True:
try:
return fn(*args)
except IOError as e:
if e.errno != errno.EINTR:
raise
return inner
def main(args):
bm_build.build("new", args.benchmarks, args.jobs)
old = args.old
if args.diff_base:
old = "old"
where_am_i = subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"]
).strip()
subprocess.check_call(["git", "checkout", args.diff_base])
try:
bm_build.build(old, args.benchmarks, args.jobs)
finally:
subprocess.check_call(["git", "checkout", where_am_i])
subprocess.check_call(["git", "submodule", "update"])
jobs_list = []
jobs_list += bm_run.create_jobs(
"new", args.benchmarks, args.loops, args.regex
)
jobs_list += bm_run.create_jobs(
old, args.benchmarks, args.loops, args.regex
)
# shuffle all jobs to eliminate noise from GCE CPU drift
random.shuffle(jobs_list, random.SystemRandom().random)
jobset.run(jobs_list, maxjobs=args.jobs)
diff, note, significance = bm_diff.diff(
args.benchmarks, args.loops, args.regex, args.track, old, "new"
)
if diff:
text = "[%s] Performance differences noted:\n%s" % (
args.pr_comment_name,
diff,
)
else:
text = (
"[%s] No significant performance differences" % args.pr_comment_name
)
if note:
text = note + "\n\n" + text
print("%s" % text)
check_on_pr.check_on_pr("Benchmark", "```\n%s\n```" % text)
if __name__ == "__main__":
args = _args()
main(args)

@ -1,148 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Python utility to run opt and counters benchmarks and save json output """
import argparse
import itertools
import multiprocessing
import os
import random
import subprocess
import sys
import bm_constants
import jobset
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]),
"..",
"..",
"..",
"run_tests",
"python_utils",
)
)
def _args():
argp = argparse.ArgumentParser(description="Runs microbenchmarks")
argp.add_argument(
"-b",
"--benchmarks",
nargs="+",
choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
help="Benchmarks to run",
)
argp.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
help="Number of CPUs to use",
)
argp.add_argument(
"-n",
"--name",
type=str,
help=(
"Unique name of the build to run. Needs to match the handle passed"
" to bm_build.py"
),
)
argp.add_argument(
"-r",
"--regex",
type=str,
default="",
help="Regex to filter benchmarks run",
)
argp.add_argument(
"-l",
"--loops",
type=int,
default=20,
help=(
"Number of times to loops the benchmarks. More loops cuts down on"
" noise"
),
)
argp.add_argument("--counters", dest="counters", action="store_true")
argp.add_argument("--no-counters", dest="counters", action="store_false")
argp.set_defaults(counters=True)
args = argp.parse_args()
assert args.name
if args.loops < 3:
print(
"WARNING: This run will likely be noisy. Increase loops to at "
"least 3."
)
return args
def _collect_bm_data(bm, cfg, name, regex, idx, loops):
jobs_list = []
for line in subprocess.check_output(
[
"bm_diff_%s/%s/%s" % (name, cfg, bm),
"--benchmark_list_tests",
"--benchmark_filter=%s" % regex,
]
).splitlines():
line = line.decode("UTF-8")
stripped_line = (
line.strip()
.replace("/", "_")
.replace("<", "_")
.replace(">", "_")
.replace(", ", "_")
)
cmd = [
"bm_diff_%s/%s/%s" % (name, cfg, bm),
"--benchmark_filter=^%s$" % line,
"--benchmark_out=%s.%s.%s.%s.%d.json"
% (bm, stripped_line, cfg, name, idx),
"--benchmark_out_format=json",
]
jobs_list.append(
jobset.JobSpec(
cmd,
shortname="%s %s %s %s %d/%d"
% (bm, line, cfg, name, idx + 1, loops),
verbose_success=True,
cpu_cost=2,
timeout_seconds=60 * 60,
)
) # one hour
return jobs_list
def create_jobs(name, benchmarks, loops, regex):
jobs_list = []
for loop in range(0, loops):
for bm in benchmarks:
jobs_list += _collect_bm_data(bm, "opt", name, regex, loop, loops)
random.shuffle(jobs_list, random.SystemRandom().random)
return jobs_list
if __name__ == "__main__":
args = _args()
jobs_list = create_jobs(
args.name, args.benchmarks, args.loops, args.regex, args.counters
)
jobset.run(jobs_list, maxjobs=args.jobs)

@ -1,68 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from scipy import stats
_DEFAULT_THRESHOLD = 1e-10
def scale(a, mul):
return [x * mul for x in a]
def cmp(a, b):
return stats.ttest_ind(a, b)
def speedup(new, old, threshold=_DEFAULT_THRESHOLD):
if (len(set(new))) == 1 and new == old:
return 0
s0, p0 = cmp(new, old)
if math.isnan(p0):
return 0
if s0 == 0:
return 0
if p0 > threshold:
return 0
if s0 < 0:
pct = 1
while pct < 100:
sp, pp = cmp(new, scale(old, 1 - pct / 100.0))
if sp > 0:
break
if pp > threshold:
break
pct += 1
return -(pct - 1)
else:
pct = 1
while pct < 10000:
sp, pp = cmp(new, scale(old, 1 + pct / 100.0))
if sp < 0:
break
if pp > threshold:
break
pct += 1
return pct - 1
if __name__ == "__main__":
new = [0.0, 0.0, 0.0, 0.0]
old = [2.96608e-06, 3.35076e-06, 3.45384e-06, 3.34407e-06]
print(speedup(new, old, 1e-5))
print(speedup(old, new, 1e-5))

@ -1,214 +0,0 @@
#!/usr/bin/env python3
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Utilities for manipulating JSON data that represents microbenchmark results.
import os
# template arguments and dynamic arguments of individual benchmark types
# Example benchmark name: "BM_UnaryPingPong<TCP, NoOpMutator, NoOpMutator>/0/0"
_BM_SPECS = {
"BM_UnaryPingPong": {
"tpl": ["fixture", "client_mutator", "server_mutator"],
"dyn": ["request_size", "response_size"],
},
"BM_PumpStreamClientToServer": {
"tpl": ["fixture"],
"dyn": ["request_size"],
},
"BM_PumpStreamServerToClient": {
"tpl": ["fixture"],
"dyn": ["request_size"],
},
"BM_StreamingPingPong": {
"tpl": ["fixture", "client_mutator", "server_mutator"],
"dyn": ["request_size", "request_count"],
},
"BM_StreamingPingPongMsgs": {
"tpl": ["fixture", "client_mutator", "server_mutator"],
"dyn": ["request_size"],
},
"BM_PumpStreamServerToClient_Trickle": {
"tpl": [],
"dyn": ["request_size", "bandwidth_kilobits"],
},
"BM_PumpUnbalancedUnary_Trickle": {
"tpl": [],
"dyn": ["cli_req_size", "svr_req_size", "bandwidth_kilobits"],
},
"BM_ErrorStringOnNewError": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_ErrorStringRepeatedly": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_ErrorGetStatus": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_ErrorGetStatusCode": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_ErrorHttpError": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_HasClearGrpcStatus": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_IsolatedFilter": {
"tpl": ["fixture", "client_mutator"],
"dyn": [],
},
"BM_HpackEncoderEncodeHeader": {
"tpl": ["fixture"],
"dyn": ["end_of_stream", "request_size"],
},
"BM_HpackParserParseHeader": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_CallCreateDestroy": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_Zalloc": {
"tpl": [],
"dyn": ["request_size"],
},
"BM_PollEmptyPollset_SpeedOfLight": {
"tpl": [],
"dyn": ["request_size", "request_count"],
},
"BM_StreamCreateSendInitialMetadataDestroy": {
"tpl": ["fixture"],
"dyn": [],
},
"BM_TransportStreamSend": {
"tpl": [],
"dyn": ["request_size"],
},
"BM_TransportStreamRecv": {
"tpl": [],
"dyn": ["request_size"],
},
"BM_StreamingPingPongWithCoalescingApi": {
"tpl": ["fixture", "client_mutator", "server_mutator"],
"dyn": ["request_size", "request_count", "end_of_stream"],
},
"BM_Base16SomeStuff": {
"tpl": [],
"dyn": ["request_size"],
},
}
def numericalize(s):
"""Convert abbreviations like '100M' or '10k' to a number."""
if not s:
return ""
if s[-1] == "k":
return float(s[:-1]) * 1024
if s[-1] == "M":
return float(s[:-1]) * 1024 * 1024
if 0 <= (ord(s[-1]) - ord("0")) <= 9:
return float(s)
assert "not a number: %s" % s
def parse_name(name):
cpp_name = name
if "<" not in name and "/" not in name and name not in _BM_SPECS:
return {"name": name, "cpp_name": name}
rest = name
out = {}
tpl_args = []
dyn_args = []
if "<" in rest:
tpl_bit = rest[rest.find("<") + 1 : rest.rfind(">")]
arg = ""
nesting = 0
for c in tpl_bit:
if c == "<":
nesting += 1
arg += c
elif c == ">":
nesting -= 1
arg += c
elif c == ",":
if nesting == 0:
tpl_args.append(arg.strip())
arg = ""
else:
arg += c
else:
arg += c
tpl_args.append(arg.strip())
rest = rest[: rest.find("<")] + rest[rest.rfind(">") + 1 :]
if "/" in rest:
s = rest.split("/")
rest = s[0]
dyn_args = s[1:]
name = rest
assert name in _BM_SPECS, "_BM_SPECS needs to be expanded for %s" % name
assert len(dyn_args) == len(_BM_SPECS[name]["dyn"])
assert len(tpl_args) == len(_BM_SPECS[name]["tpl"])
out["name"] = name
out["cpp_name"] = cpp_name
out.update(
dict(
(k, numericalize(v))
for k, v in zip(_BM_SPECS[name]["dyn"], dyn_args)
)
)
out.update(dict(zip(_BM_SPECS[name]["tpl"], tpl_args)))
return out
def expand_json(js):
if not js:
raise StopIteration()
for bm in js["benchmarks"]:
if bm["name"].endswith("_stddev") or bm["name"].endswith("_mean"):
continue
context = js["context"]
if "label" in bm:
labels_list = [
s.split(":")
for s in bm["label"].strip().split(" ")
if len(s) and s[0] != "#"
]
for el in labels_list:
el[0] = el[0].replace("/iter", "_per_iteration")
labels = dict(labels_list)
else:
labels = {}
# TODO(jtattermusch): grabbing kokoro env values shouldn't be buried
# deep in the JSON conversion logic.
# Link the data to a kokoro job run by adding
# well known kokoro env variables as metadata for each row
row = {
"jenkins_build": os.environ.get("KOKORO_BUILD_NUMBER", ""),
"jenkins_job": os.environ.get("KOKORO_JOB_NAME", ""),
}
row.update(context)
row.update(bm)
row.update(parse_name(row["name"]))
row.update(labels)
yield row

@ -1,191 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Computes the diff between two qps runs and outputs significant results """
import argparse
import json
import multiprocessing
import os
import shutil
import subprocess
import sys
import qps_scenarios
import tabulate
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]), "..", "microbenchmarks", "bm_diff"
)
)
import bm_speedup
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]), "..", "..", "run_tests", "python_utils"
)
)
import check_on_pr
def _args():
argp = argparse.ArgumentParser(description="Perform diff on QPS Driver")
argp.add_argument(
"-d",
"--diff_base",
type=str,
help="Commit or branch to compare the current one to",
)
argp.add_argument(
"-l",
"--loops",
type=int,
default=4,
help=(
"Number of loops for each benchmark. More loops cuts down on noise"
),
)
argp.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
help="Number of CPUs to use",
)
args = argp.parse_args()
assert args.diff_base, "diff_base must be set"
return args
def _make_cmd(jobs):
return ["make", "-j", "%d" % jobs, "qps_json_driver", "qps_worker"]
def build(name, jobs):
shutil.rmtree("qps_diff_%s" % name, ignore_errors=True)
subprocess.check_call(["git", "submodule", "update"])
try:
subprocess.check_call(_make_cmd(jobs))
except subprocess.CalledProcessError as e:
subprocess.check_call(["make", "clean"])
subprocess.check_call(_make_cmd(jobs))
os.rename("bins", "qps_diff_%s" % name)
def _run_cmd(name, scenario, fname):
return [
"qps_diff_%s/opt/qps_json_driver" % name,
"--scenarios_json",
scenario,
"--json_file_out",
fname,
]
def run(name, scenarios, loops):
for sn in scenarios:
for i in range(0, loops):
fname = "%s.%s.%d.json" % (sn, name, i)
subprocess.check_call(_run_cmd(name, scenarios[sn], fname))
def _load_qps(fname):
try:
with open(fname) as f:
return json.loads(f.read())["qps"]
except IOError as e:
print(("IOError occurred reading file: %s" % fname))
return None
except ValueError as e:
print(("ValueError occurred reading file: %s" % fname))
return None
def _median(ary):
assert len(ary)
ary = sorted(ary)
n = len(ary)
if n % 2 == 0:
return (ary[(n - 1) / 2] + ary[(n - 1) / 2 + 1]) / 2.0
else:
return ary[n / 2]
def diff(scenarios, loops, old, new):
old_data = {}
new_data = {}
# collect data
for sn in scenarios:
old_data[sn] = []
new_data[sn] = []
for i in range(loops):
old_data[sn].append(_load_qps("%s.%s.%d.json" % (sn, old, i)))
new_data[sn].append(_load_qps("%s.%s.%d.json" % (sn, new, i)))
# crunch data
headers = ["Benchmark", "qps"]
rows = []
for sn in scenarios:
mdn_diff = abs(_median(new_data[sn]) - _median(old_data[sn]))
print(
"%s: %s=%r %s=%r mdn_diff=%r"
% (sn, new, new_data[sn], old, old_data[sn], mdn_diff)
)
s = bm_speedup.speedup(new_data[sn], old_data[sn], 10e-5)
if abs(s) > 3 and mdn_diff > 0.5:
rows.append([sn, "%+d%%" % s])
if rows:
return tabulate.tabulate(rows, headers=headers, floatfmt="+.2f")
else:
return None
def main(args):
build("new", args.jobs)
if args.diff_base:
where_am_i = (
subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"]
)
.decode()
.strip()
)
subprocess.check_call(["git", "checkout", args.diff_base])
try:
build("old", args.jobs)
finally:
subprocess.check_call(["git", "checkout", where_am_i])
subprocess.check_call(["git", "submodule", "update"])
run("new", qps_scenarios._SCENARIOS, args.loops)
run("old", qps_scenarios._SCENARIOS, args.loops)
diff_output = diff(qps_scenarios._SCENARIOS, args.loops, "old", "new")
if diff_output:
text = "[qps] Performance differences noted:\n%s" % diff_output
else:
text = "[qps] No significant performance differences"
print(("%s" % text))
check_on_pr.check_on_pr("QPS", "```\n%s\n```" % text)
if __name__ == "__main__":
args = _args()
main(args)

@ -1,168 +0,0 @@
#!/usr/bin/env python3
# Copyright 2017 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import html
import multiprocessing
import os
import subprocess
import sys
import python_utils.jobset as jobset
import python_utils.start_port_server as start_port_server
sys.path.append(
os.path.join(
os.path.dirname(sys.argv[0]),
"..",
"profiling",
"microbenchmarks",
"bm_diff",
)
)
import bm_constants
flamegraph_dir = os.path.join(os.path.expanduser("~"), "FlameGraph")
os.chdir(os.path.join(os.path.dirname(sys.argv[0]), "../.."))
if not os.path.exists("reports"):
os.makedirs("reports")
start_port_server.start_port_server()
def fnize(s):
out = ""
for c in s:
if c in "<>, /":
if len(out) and out[-1] == "_":
continue
out += "_"
else:
out += c
return out
# index html
index_html = """
<html>
<head>
<title>Microbenchmark Results</title>
</head>
<body>
"""
def heading(name):
global index_html
index_html += "<h1>%s</h1>\n" % name
def link(txt, tgt):
global index_html
index_html += '<p><a href="%s">%s</a></p>\n' % (
html.escape(tgt, quote=True),
html.escape(txt),
)
def text(txt):
global index_html
index_html += "<p><pre>%s</pre></p>\n" % html.escape(txt)
def _bazel_build_benchmark(bm_name, cfg):
"""Build given benchmark with bazel"""
subprocess.check_call(
[
"tools/bazel",
"build",
"--config=%s" % cfg,
"//test/cpp/microbenchmarks:%s" % bm_name,
]
)
def run_summary(bm_name, cfg, base_json_name):
_bazel_build_benchmark(bm_name, cfg)
cmd = [
"bazel-bin/test/cpp/microbenchmarks/%s" % bm_name,
"--benchmark_out=%s.%s.json" % (base_json_name, cfg),
"--benchmark_out_format=json",
]
if args.summary_time is not None:
cmd += ["--benchmark_min_time=%d" % args.summary_time]
return subprocess.check_output(cmd).decode("UTF-8")
def collect_summary(bm_name, args):
# no counters, run microbenchmark and add summary
# both to HTML report and to console.
nocounters_heading = "Summary: %s" % bm_name
nocounters_summary = run_summary(bm_name, "opt", bm_name)
heading(nocounters_heading)
text(nocounters_summary)
print(nocounters_heading)
print(nocounters_summary)
collectors = {
"summary": collect_summary,
}
argp = argparse.ArgumentParser(description="Collect data from microbenchmarks")
argp.add_argument(
"-c",
"--collect",
choices=sorted(collectors.keys()),
nargs="*",
default=sorted(collectors.keys()),
help="Which collectors should be run against each benchmark",
)
argp.add_argument(
"-b",
"--benchmarks",
choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
nargs="+",
type=str,
help="Which microbenchmarks should be run",
)
argp.add_argument(
"--bq_result_table",
default="",
type=str,
help=(
"Upload results from summary collection to a specified bigquery table."
),
)
argp.add_argument(
"--summary_time",
default=None,
type=int,
help="Minimum time to run benchmarks for the summary collection",
)
args = argp.parse_args()
try:
for collect in args.collect:
for bm_name in args.benchmarks:
collectors[collect](bm_name, args)
finally:
if not os.path.exists("reports"):
os.makedirs("reports")
index_html += "</body>\n</html>\n"
with open("reports/index.html", "w") as f:
f.write(index_html)
Loading…
Cancel
Save