|
|
|
#!/usr/bin/env python3
|
|
|
|
"""Benchmarks the current working directory against a given baseline.
|
|
|
|
|
|
|
|
This script benchmarks both size and speed. Sample output:
|
|
|
|
"""
|
|
|
|
|
|
|
|
import contextlib
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import tempfile
|
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def GitWorktree(commit):
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
subprocess.run(['git', 'worktree', 'add', '-q', '-d', tmpdir, commit], check=True)
|
|
|
|
cwd = os.getcwd()
|
|
|
|
os.chdir(tmpdir)
|
|
|
|
try:
|
|
|
|
yield tmpdir
|
|
|
|
finally:
|
|
|
|
os.chdir(cwd)
|
|
|
|
subprocess.run(['git', 'worktree', 'remove', tmpdir], check=True)
|
|
|
|
|
|
|
|
def Run(cmd):
|
|
|
|
subprocess.check_call(cmd, shell=True)
|
|
|
|
|
|
|
|
def Benchmark(outbase, bench_cpu=True, runs=12):
|
|
|
|
tmpfile = "/tmp/bench-output.json"
|
|
|
|
Run("rm -rf {}".format(tmpfile))
|
|
|
|
Run("CC=clang bazel test ...")
|
|
|
|
|
|
|
|
if bench_cpu:
|
|
|
|
Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark")
|
|
|
|
|
|
|
|
Run("./bazel-bin/benchmarks/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
|
|
|
|
with open(tmpfile) as f:
|
|
|
|
bench_json = json.load(f)
|
|
|
|
|
|
|
|
# Translate into the format expected by benchstat.
|
|
|
|
with open(outbase + ".txt", "w") as f:
|
|
|
|
for run in bench_json["benchmarks"]:
|
|
|
|
name = re.sub(r'^BM_', 'Benchmark', run["name"])
|
|
|
|
if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"):
|
|
|
|
continue
|
|
|
|
values = (name, run["iterations"], run["cpu_time"])
|
|
|
|
print("{} {} {} ns/op".format(*values), file=f)
|
|
|
|
|
|
|
|
Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb")
|
|
|
|
Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase))
|
|
|
|
|
|
|
|
|
|
|
|
baseline = "master"
|
Optimized short string copying.
This sped up the alias=false case:
Before:
------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc_WithInitialBlock 4562 ns 4562 ns 153251 1.53276GB/s
Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock':
1,216.65 msec task-clock # 0.936 CPUs utilized
6 context-switches # 0.005 K/sec
0 cpu-migrations # 0.000 K/sec
200 page-faults # 0.164 K/sec
4,490,925,650 cycles # 3.691 GHz
16,516,403,731 instructions # 3.68 insn per cycle
2,828,536,650 branches # 2324.861 M/sec
5,425,830 branch-misses # 0.19% of all branches
1.300178903 seconds time elapsed
1.211475000 seconds user
0.072207000 seconds sys
After:
------------------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc_WithInitialBlock 3587 ns 3587 ns 195749 1.94935GB/s
Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock':
1,109.69 msec task-clock # 0.930 CPUs utilized
5 context-switches # 0.005 K/sec
0 cpu-migrations # 0.000 K/sec
198 page-faults # 0.178 K/sec
4,094,010,257 cycles # 3.689 GHz
15,672,677,812 instructions # 3.83 insn per cycle
2,589,291,160 branches # 2333.346 M/sec
3,306,386 branch-misses # 0.13% of all branches
1.193221789 seconds time elapsed
1.102538000 seconds user
0.072166000 seconds sys
4 years ago
|
|
|
bench_cpu = False
|
|
|
|
|
|
|
|
if len(sys.argv) > 1:
|
|
|
|
baseline = sys.argv[1]
|
|
|
|
|
|
|
|
# Quickly verify that the baseline exists.
|
|
|
|
with GitWorktree(baseline):
|
|
|
|
pass
|
|
|
|
|
|
|
|
# Benchmark our current directory first, since it's more likely to be broken.
|
|
|
|
Benchmark("/tmp/new", bench_cpu)
|
|
|
|
|
|
|
|
# Benchmark the baseline.
|
|
|
|
with GitWorktree(baseline):
|
|
|
|
Benchmark("/tmp/old", bench_cpu)
|
|
|
|
|
|
|
|
print()
|
|
|
|
print()
|
|
|
|
|
|
|
|
if bench_cpu:
|
|
|
|
Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt")
|
|
|
|
|
|
|
|
print()
|
|
|
|
print()
|
|
|
|
|
|
|
|
Run("objcopy --strip-debug /tmp/old.bin /tmp/old.bin.stripped")
|
|
|
|
Run("objcopy --strip-debug /tmp/new.bin /tmp/new.bin.stripped")
|
|
|
|
Run("~/code/bloaty/bloaty /tmp/new.bin.stripped -- /tmp/old.bin.stripped --debug-file=/tmp/old.bin --debug-file=/tmp/new.bin -d compileunits,symbols")
|