From 71735185a14ca1064472e4e7a8df9d275c762e1e Mon Sep 17 00:00:00 2001
From: Craig Tiller <ctiller@google.com>
Date: Thu, 15 Jan 2015 17:07:13 -0800
Subject: [PATCH 1/2] Cache for successful test runs

By default don't re-run tests that have already run successfully, to
reduce latency between making a change and seeing if it fixed the last
thing that you were trying to fix.
---
 .gitignore                   |  3 ++
 tools/run_tests/jobset.py    | 52 +++++++++++++++++++++++++++--------
 tools/run_tests/run_tests.py | 53 ++++++++++++++++++++++++++++++++----
 3 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3cae07ed12f..bf57027c942 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,6 @@ gens
 libs
 objs
 *.pyc
+
+# cache for run_tests.py
+.run_tests_cache
diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py
index bb1c862f1ad..7a6d979ba30 100755
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@@ -1,6 +1,8 @@
 """Run a group of subprocesses and then finish."""
 
+import hashlib
 import multiprocessing
+import os
 import random
 import subprocess
 import sys
@@ -75,30 +77,43 @@ def message(tag, message, explanatory_text=None, do_newline=False):
   sys.stdout.flush()
 
 
+def which(filename):
+  if '/' in filename:
+    return filename
+  for path in os.environ['PATH'].split(os.pathsep):
+    if os.path.exists(os.path.join(path, filename)):
+      return os.path.join(path, filename)
+  raise Exception('%s not found' % filename)
+
+
 class Job(object):
   """Manages one job."""
 
-  def __init__(self, cmdline, newline_on_success):
-    self._cmdline = ' '.join(cmdline)
+  def __init__(self, cmdline, bin_hash, newline_on_success):
+    self._cmdline = cmdline
+    self._bin_hash = bin_hash
     self._tempfile = tempfile.TemporaryFile()
     self._process = subprocess.Popen(args=cmdline,
                                      stderr=subprocess.STDOUT,
                                      stdout=self._tempfile)
     self._state = _RUNNING
     self._newline_on_success = newline_on_success
-    message('START', self._cmdline)
+    message('START', ' '.join(self._cmdline))
 
-  def state(self):
+  def state(self, update_cache):
     """Poll current state of the job. Prints messages at completion."""
     if self._state == _RUNNING and self._process.poll() is not None:
       if self._process.returncode != 0:
         self._state = _FAILURE
         self._tempfile.seek(0)
         stdout = self._tempfile.read()
-        message('FAILED', '%s [ret=%d]' % (self._cmdline, self._process.returncode), stdout)
+        message('FAILED', '%s [ret=%d]' % (
+            ' '.join(self._cmdline), self._process.returncode), stdout)
       else:
         self._state = _SUCCESS
-        message('PASSED', '%s' % self._cmdline, do_newline=self._newline_on_success)
+        message('PASSED', '%s' % ' '.join(self._cmdline),
+                do_newline=self._newline_on_success)
+        update_cache.finished(self._cmdline, self._bin_hash)
     return self._state
 
   def kill(self):
@@ -110,7 +125,7 @@ class Job(object):
 class Jobset(object):
   """Manages one run of jobs."""
 
-  def __init__(self, check_cancelled, maxjobs, newline_on_success):
+  def __init__(self, check_cancelled, maxjobs, newline_on_success, cache):
     self._running = set()
     self._check_cancelled = check_cancelled
     self._cancelled = False
@@ -118,6 +133,7 @@ class Jobset(object):
     self._completed = 0
     self._maxjobs = maxjobs
     self._newline_on_success = newline_on_success
+    self._cache = cache
 
   def start(self, cmdline):
     """Start a job. Return True on success, False on failure."""
@@ -125,7 +141,10 @@ class Jobset(object):
       if self.cancelled(): return False
       self.reap()
     if self.cancelled(): return False
-    self._running.add(Job(cmdline, self._newline_on_success))
+    with open(which(cmdline[0])) as f:
+      bin_hash = hashlib.sha1(f.read()).hexdigest()
+    if self._cache.should_run(cmdline, bin_hash):
+      self._running.add(Job(cmdline, bin_hash, self._newline_on_success))
     return True
 
   def reap(self):
@@ -133,7 +152,7 @@ class Jobset(object):
     while self._running:
       dead = set()
       for job in self._running:
-        st = job.state()
+        st = job.state(self._cache)
         if st == _RUNNING: continue
         if st == _FAILURE: self._failures += 1
         dead.add(job)
@@ -165,13 +184,24 @@ def _never_cancelled():
   return False
 
 
+# cache class that caches nothing
+class NoCache(object):
+  def should_run(self, cmdline, bin_hash):
+    return True
+
+  def finished(self, cmdline, bin_hash):
+    pass
+
+
 def run(cmdlines,
         check_cancelled=_never_cancelled,
         maxjobs=None,
-        newline_on_success=False):
+        newline_on_success=False,
+        cache=None):
   js = Jobset(check_cancelled,
               maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS,
-              newline_on_success)
+              newline_on_success,
+              cache if cache is not None else NoCache())
   for cmdline in shuffle_iteratable(cmdlines):
     if not js.start(cmdline):
       break
diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index aa3245d8303..8d54f884876 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -4,6 +4,7 @@
 import argparse
 import glob
 import itertools
+import simplejson
 import multiprocessing
 import sys
 import time
@@ -85,7 +86,37 @@ runs_per_test = args.runs_per_test
 forever = args.forever
 
 
-def _build_and_run(check_cancelled, newline_on_success, forever=False):
+class TestCache(object):
+  def __init__(self):
+    self._last_successful_run = {}
+
+  def should_run(self, cmdline, bin_hash):
+    cmdline = ' '.join(cmdline)
+    if cmdline not in self._last_successful_run:
+      return True
+    if self._last_successful_run[cmdline] != bin_hash:
+      return True
+    return False
+
+  def finished(self, cmdline, bin_hash):
+    self._last_successful_run[' '.join(cmdline)] = bin_hash
+
+  def dump(self):
+    return [{'cmdline': k, 'hash': v} for k, v in self._last_successful_run.iteritems()]
+
+  def parse(self, exdump):
+    self._last_successful_run = dict((o['cmdline'], o['hash']) for o in exdump)
+
+  def save(self):
+    with open('.run_tests_cache', 'w') as f:
+      f.write(simplejson.dumps(self.dump()))
+
+  def load(self):
+    with open('.run_tests_cache') as f:
+      self.parse(simplejson.loads(f.read()))
+
+
+def _build_and_run(check_cancelled, newline_on_success, cache):
   """Do one pass of building & running tests."""
   # build latest, sharing cpu between the various makes
   if not jobset.run(
@@ -109,12 +140,20 @@ def _build_and_run(check_cancelled, newline_on_success, forever=False):
                   runs_per_test)))),
               check_cancelled,
               newline_on_success=newline_on_success,
-              maxjobs=min(c.maxjobs for c in run_configs)):
+              maxjobs=min(c.maxjobs for c in run_configs),
+              cache=cache):
     return 2
 
   return 0
 
 
+test_cache = (None if runs_per_test != 1
+                       or 'gcov' in build_configs
+                       or 'valgrind' in build_configs
+                   else TestCache())
+if test_cache:
+  test_cache.load()
+
 if forever:
   success = True
   while True:
@@ -122,9 +161,9 @@ if forever:
     initial_time = dw.most_recent_change()
     have_files_changed = lambda: dw.most_recent_change() != initial_time
     previous_success = success
-    success = _build_and_run(have_files_changed,
+    success = _build_and_run(check_cancelled=have_files_changed,
                              newline_on_success=False,
-                             forever=True) == 0
+                             cache=test_cache) == 0
     if not previous_success and success:
       jobset.message('SUCCESS',
                      'All tests are now passing properly',
@@ -133,10 +172,12 @@ if forever:
     while not have_files_changed():
       time.sleep(1)
 else:
-  result = _build_and_run(lambda: False,
-                          newline_on_success=args.newline_on_success)
+  result = _build_and_run(check_cancelled=lambda: False,
+                          newline_on_success=args.newline_on_success,
+                          cache=test_cache)
   if result == 0:
     jobset.message('SUCCESS', 'All tests passed', do_newline=True)
   else:
     jobset.message('FAILED', 'Some tests failed', do_newline=True)
+  test_cache.save()
   sys.exit(result)

From b50d166634565b925c3ccdbf7ddab52344c24f0a Mon Sep 17 00:00:00 2001
From: Craig Tiller <ctiller@google.com>
Date: Thu, 15 Jan 2015 17:28:21 -0800
Subject: [PATCH 2/2] Make run_tests.py pass lint

---
 tools/run_tests/run_tests.py | 42 ++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py
index 8d54f884876..8c4c998d7be 100755
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@@ -4,16 +4,18 @@
 import argparse
 import glob
 import itertools
-import simplejson
 import multiprocessing
 import sys
 import time
 
 import jobset
+import simplejson
 import watch_dirs
 
+
 # SimpleConfig: just compile with CONFIG=config, and run the binary to test
 class SimpleConfig(object):
+
   def __init__(self, config):
     self.build_config = config
     self.maxjobs = 32 * multiprocessing.cpu_count()
@@ -24,6 +26,7 @@ class SimpleConfig(object):
 
 # ValgrindConfig: compile with some CONFIG=config, but use valgrind to run
 class ValgrindConfig(object):
+
   def __init__(self, config, tool):
     self.build_config = config
     self.tool = tool
@@ -35,15 +38,15 @@ class ValgrindConfig(object):
 
 # different configurations we can run under
 _CONFIGS = {
-  'dbg': SimpleConfig('dbg'),
-  'opt': SimpleConfig('opt'),
-  'tsan': SimpleConfig('tsan'),
-  'msan': SimpleConfig('msan'),
-  'asan': SimpleConfig('asan'),
-  'gcov': SimpleConfig('gcov'),
-  'memcheck': ValgrindConfig('valgrind', 'memcheck'),
-  'helgrind': ValgrindConfig('dbg', 'helgrind')
-  }
+    'dbg': SimpleConfig('dbg'),
+    'opt': SimpleConfig('opt'),
+    'tsan': SimpleConfig('tsan'),
+    'msan': SimpleConfig('msan'),
+    'asan': SimpleConfig('asan'),
+    'gcov': SimpleConfig('gcov'),
+    'memcheck': ValgrindConfig('valgrind', 'memcheck'),
+    'helgrind': ValgrindConfig('dbg', 'helgrind')
+    }
 
 
 _DEFAULT = ['dbg', 'opt']
@@ -87,6 +90,8 @@ forever = args.forever
 
 
 class TestCache(object):
+  """Cache for running tests."""
+
   def __init__(self):
     self._last_successful_run = {}
 
@@ -102,7 +107,8 @@ class TestCache(object):
     self._last_successful_run[' '.join(cmdline)] = bin_hash
 
   def dump(self):
-    return [{'cmdline': k, 'hash': v} for k, v in self._last_successful_run.iteritems()]
+    return [{'cmdline': k, 'hash': v}
+            for k, v in self._last_successful_run.iteritems()]
 
   def parse(self, exdump):
     self._last_successful_run = dict((o['cmdline'], o['hash']) for o in exdump)
@@ -138,19 +144,19 @@ def _build_and_run(check_cancelled, newline_on_success, cache):
                   glob.glob('bins/%s/%s_test' % (
                       config.build_config, filt)),
                   runs_per_test)))),
-              check_cancelled,
-              newline_on_success=newline_on_success,
-              maxjobs=min(c.maxjobs for c in run_configs),
-              cache=cache):
+      check_cancelled,
+      newline_on_success=newline_on_success,
+      maxjobs=min(c.maxjobs for c in run_configs),
+      cache=cache):
     return 2
 
   return 0
 
 
 test_cache = (None if runs_per_test != 1
-                       or 'gcov' in build_configs
-                       or 'valgrind' in build_configs
-                   else TestCache())
+              or 'gcov' in build_configs
+              or 'valgrind' in build_configs
+              else TestCache())
 if test_cache:
   test_cache.load()