|
|
|
#!/usr/bin/env python3
|
|
|
|
"""Benchmarks the current working directory against a given baseline.
|
|
|
|
|
|
|
|
This script benchmarks both size and speed. Sample output:
|
|
|
|
"""
|
|
|
|
|
|
|
|
import contextlib
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import tempfile
|
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def GitWorktree(commit):
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
subprocess.run(['git', 'worktree', 'add', '-q', '-d', tmpdir, commit], check=True)
|
|
|
|
cwd = os.getcwd()
|
|
|
|
os.chdir(tmpdir)
|
|
|
|
try:
|
|
|
|
yield tmpdir
|
|
|
|
finally:
|
|
|
|
os.chdir(cwd)
|
|
|
|
subprocess.run(['git', 'worktree', 'remove', tmpdir], check=True)
|
|
|
|
|
|
|
|
def Run(cmd):
|
|
|
|
subprocess.check_call(cmd, shell=True)
|
|
|
|
|
|
|
|
def Benchmark(outbase, runs=12):
|
|
|
|
tmpfile = "/tmp/bench-output.json"
|
|
|
|
Run("rm -rf {}".format(tmpfile))
|
I think this may have reached the optimization limit.
-------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------
BM_ArenaOneAlloc 21 ns 21 ns 32994231
BM_ArenaInitialBlockOneAlloc 6 ns 6 ns 116318005
BM_ParseDescriptorNoHeap 3028 ns 3028 ns 231138 2.34354GB/s
BM_ParseDescriptor 3557 ns 3557 ns 196583 1.99498GB/s
BM_ParseDescriptorProto2NoArena 33228 ns 33226 ns 21196 218.688MB/s
BM_ParseDescriptorProto2WithArena 22863 ns 22861 ns 30666 317.831MB/s
BM_SerializeDescriptorProto2 5444 ns 5444 ns 127368 1.30348GB/s
BM_SerializeDescriptor 12509 ns 12508 ns 55816 580.914MB/s
$ perf stat bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap
2020-10-08 14:07:06
Running bazel-bin/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_ParseDescriptorNoHeap 3071 ns 3071 ns 227743 2.31094GB/s
Performance counter stats for 'bazel-bin/benchmark --benchmark_filter=BM_ParseDescriptorNoHeap':
1,050.22 msec task-clock # 0.978 CPUs utilized
4 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
179 page-faults # 0.170 K/sec
3,875,796,334 cycles # 3.690 GHz
13,282,835,967 instructions # 3.43 insn per cycle
2,887,725,848 branches # 2749.627 M/sec
8,324,912 branch-misses # 0.29% of all branches
1.073924364 seconds time elapsed
1.042806000 seconds user
0.008021000 seconds sys
Profile:
23.96% benchmark benchmark [.] upb_prm_1bt_max192b
22.44% benchmark benchmark [.] fastdecode_dispatch
18.96% benchmark benchmark [.] upb_pss_1bt
14.20% benchmark benchmark [.] upb_psv4_1bt
8.33% benchmark benchmark [.] upb_prm_1bt_max64b
6.66% benchmark benchmark [.] upb_prm_1bt_max128b
1.29% benchmark benchmark [.] upb_psm_1bt_max64b
0.77% benchmark benchmark [.] fastdecode_generic
0.55% benchmark [kernel.kallsyms] [k] smp_call_function_single
0.42% benchmark [kernel.kallsyms] [k] _raw_spin_lock_irqsave
0.42% benchmark benchmark [.] upb_psm_1bt_max256b
0.31% benchmark benchmark [.] upb_psb1_1bt
0.21% benchmark benchmark [.] upb_plv4_5bv
0.14% benchmark benchmark [.] upb_psb1_2bt
0.12% benchmark benchmark [.] decode_longvarint64
0.08% benchmark [kernel.kallsyms] [k] vsnprintf
0.07% benchmark [kernel.kallsyms] [k] _raw_spin_lock
0.07% benchmark benchmark [.] _upb_msg_new
0.06% benchmark ld-2.31.so [.] check_match
4 years ago
|
|
|
Run("CC=clang bazel test :all")
|
|
|
|
Run("CC=clang bazel build -c opt --copt=-march=native :benchmark")
|
|
|
|
|
|
|
|
Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
|
|
|
|
|
|
|
|
Run("CC=clang bazel build -c opt --copt=-g :conformance_upb")
|
|
|
|
Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
|
|
|
|
|
|
|
|
with open(tmpfile) as f:
|
|
|
|
bench_json = json.load(f)
|
|
|
|
|
|
|
|
# Translate into the format expected by benchstat.
|
|
|
|
with open(outbase + ".txt", "w") as f:
|
|
|
|
for run in bench_json["benchmarks"]:
|
|
|
|
name = re.sub(r'^BM_', 'Benchmark', run["name"])
|
|
|
|
if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"):
|
|
|
|
continue
|
|
|
|
values = (name, run["iterations"], run["cpu_time"])
|
|
|
|
print("{} {} {} ns/op".format(*values), file=f)
|
|
|
|
|
|
|
|
baseline = "master"
|
|
|
|
|
|
|
|
if len(sys.argv) > 1:
|
|
|
|
baseline = sys.argv[1]
|
|
|
|
|
|
|
|
# Quickly verify that the baseline exists.
|
|
|
|
with GitWorktree(baseline):
|
|
|
|
pass
|
|
|
|
|
|
|
|
# Benchmark our current directory first, since it's more likely to be broken.
|
|
|
|
Benchmark("/tmp/new")
|
|
|
|
|
|
|
|
# Benchmark the baseline.
|
|
|
|
with GitWorktree(baseline):
|
|
|
|
Benchmark("/tmp/old")
|
|
|
|
|
|
|
|
print()
|
|
|
|
print()
|
|
|
|
|
|
|
|
Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt")
|
|
|
|
|
|
|
|
print()
|
|
|
|
print()
|
|
|
|
|
|
|
|
Run("objcopy --strip-debug /tmp/old.bin /tmp/old.bin.stripped")
|
|
|
|
Run("objcopy --strip-debug /tmp/new.bin /tmp/new.bin.stripped")
|
|
|
|
Run("~/code/bloaty/bloaty /tmp/new.bin.stripped -- /tmp/old.bin.stripped --debug-file=/tmp/old.bin --debug-file=/tmp/new.bin -d compileunits,symbols")
|