|
|
@ -49,6 +49,14 @@ def changed_ratio(n, o): |
|
|
|
if o == 0: return 100 |
|
|
|
if o == 0: return 100 |
|
|
|
return (float(n)-float(o))/float(o) |
|
|
|
return (float(n)-float(o))/float(o) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def median(ary): |
|
|
|
|
|
|
|
ary = sorted(ary) |
|
|
|
|
|
|
|
n = len(ary) |
|
|
|
|
|
|
|
if n%2 == 0: |
|
|
|
|
|
|
|
return (ary[n/2] + ary[n/2+1]) / 2.0 |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
return ary[n/2] |
|
|
|
|
|
|
|
|
|
|
|
def min_change(pct): |
|
|
|
def min_change(pct): |
|
|
|
return lambda n, o: abs(changed_ratio(n,o)) > pct/100.0 |
|
|
|
return lambda n, o: abs(changed_ratio(n,o)) > pct/100.0 |
|
|
|
|
|
|
|
|
|
|
@ -83,15 +91,15 @@ argp.add_argument('-t', '--track', |
|
|
|
help='Which metrics to track') |
|
|
|
help='Which metrics to track') |
|
|
|
argp.add_argument('-b', '--benchmarks', nargs='+', choices=_AVAILABLE_BENCHMARK_TESTS, default=['bm_cq']) |
|
|
|
argp.add_argument('-b', '--benchmarks', nargs='+', choices=_AVAILABLE_BENCHMARK_TESTS, default=['bm_cq']) |
|
|
|
argp.add_argument('-d', '--diff_base', type=str) |
|
|
|
argp.add_argument('-d', '--diff_base', type=str) |
|
|
|
argp.add_argument('-r', '--repetitions', type=int, default=5) |
|
|
|
argp.add_argument('-r', '--repetitions', type=int, default=7) |
|
|
|
argp.add_argument('-p', '--p_threshold', type=float, default=0.05) |
|
|
|
argp.add_argument('-p', '--p_threshold', type=float, default=0.01) |
|
|
|
args = argp.parse_args() |
|
|
|
args = argp.parse_args() |
|
|
|
|
|
|
|
|
|
|
|
assert args.diff_base |
|
|
|
assert args.diff_base |
|
|
|
|
|
|
|
|
|
|
|
def avg(lst): |
|
|
|
def avg(lst): |
|
|
|
sum = 0 |
|
|
|
sum = 0.0 |
|
|
|
n = 0 |
|
|
|
n = 0.0 |
|
|
|
for el in lst: |
|
|
|
for el in lst: |
|
|
|
sum += el |
|
|
|
sum += el |
|
|
|
n += 1 |
|
|
|
n += 1 |
|
|
@ -162,11 +170,14 @@ class Benchmark: |
|
|
|
old = self.samples[False][f] |
|
|
|
old = self.samples[False][f] |
|
|
|
if not new or not old: continue |
|
|
|
if not new or not old: continue |
|
|
|
p = stats.ttest_ind(new, old)[1] |
|
|
|
p = stats.ttest_ind(new, old)[1] |
|
|
|
new_avg = avg(new) |
|
|
|
new_mdn = median(new) |
|
|
|
old_avg = avg(old) |
|
|
|
old_mdn = median(old) |
|
|
|
delta = new_avg - old_avg |
|
|
|
delta = new_mdn - old_mdn |
|
|
|
ratio = changed_ratio(new_avg, old_avg) |
|
|
|
ratio = changed_ratio(new_mdn, old_mdn) |
|
|
|
if p < args.p_threshold and abs(delta) > 0.1 and abs(ratio) > 0.05: |
|
|
|
print 'new=%r old=%r new_mdn=%f old_mdn=%f delta=%f ratio=%f p=%f' % ( |
|
|
|
|
|
|
|
new, old, new_mdn, old_mdn, delta, ratio, p |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
if p < args.p_threshold and abs(delta) > 0.1 and abs(ratio) > 0.03: |
|
|
|
self.final[f] = delta |
|
|
|
self.final[f] = delta |
|
|
|
return self.final.keys() |
|
|
|
return self.final.keys() |
|
|
|
|
|
|
|
|
|
|
@ -199,7 +210,8 @@ for bm in comparables: |
|
|
|
benchmarks[name].add_sample(row, False) |
|
|
|
benchmarks[name].add_sample(row, False) |
|
|
|
|
|
|
|
|
|
|
|
really_interesting = set() |
|
|
|
really_interesting = set() |
|
|
|
for bm in benchmarks.values(): |
|
|
|
for name, bm in benchmarks.items(): |
|
|
|
|
|
|
|
print name |
|
|
|
really_interesting.update(bm.process()) |
|
|
|
really_interesting.update(bm.process()) |
|
|
|
fields = [f for f in _INTERESTING if f in really_interesting] |
|
|
|
fields = [f for f in _INTERESTING if f in really_interesting] |
|
|
|
|
|
|
|
|
|
|
|