Add CPU cost measurement, tune parameters, decrease default maxjobs

9 years ago · 5f735a64e1
parent 56c6b6ab0a
commit 5f735a64e1
8 changed files with 450 additions and 418 deletions
--- a/build.yaml
+++ b/build.yaml
@ -922,6 +922,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: dualstack_socket_test
+  cpu_cost: 0.1
  build: test
  language: c
  src:
@ -996,6 +997,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: fling_stream_test
+  cpu_cost: 2
  build: test
  language: c
  src:
@ -1005,12 +1007,12 @@ targets:
  - grpc
  - gpr_test_util
  - gpr
-  cpu_cost: 2
  platforms:
  - mac
  - linux
  - posix
 - name: fling_test
+  cpu_cost: 2
  build: test
  language: c
  src:
@ -1020,7 +1022,6 @@ targets:
  - grpc
  - gpr_test_util
  - gpr
-  cpu_cost: 2
  platforms:
  - mac
  - linux
@ -1120,6 +1121,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: gpr_stack_lockfree_test
+  cpu_cost: 2
  build: test
  language: c
  src:
@ -1136,6 +1138,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: gpr_sync_test
+  cpu_cost: 10
  build: test
  language: c
  src:
@ -1143,8 +1146,8 @@ targets:
  deps:
  - gpr_test_util
  - gpr
-  cpu_cost: 10
 - name: gpr_thd_test
+  cpu_cost: 10
  build: test
  language: c
  src:
@ -1152,7 +1155,6 @@ targets:
  deps:
  - gpr_test_util
  - gpr
-  cpu_cost: 10
 - name: gpr_time_test
  build: test
  language: c
@ -1372,6 +1374,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: httpcli_test
+  cpu_cost: 0.5
  build: test
  language: c
  src:
@ -1386,6 +1389,7 @@ targets:
  - linux
  - posix
 - name: httpscli_test
+  cpu_cost: 0.5
  build: test
  language: c
  src:
@ -1467,6 +1471,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: lb_policies_test
+  cpu_cost: 0.1
  build: test
  language: c
  src:
@ -1519,6 +1524,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: no_server_test
+  cpu_cost: 0.1
  build: test
  language: c
  src:
@ -1579,6 +1585,7 @@ targets:
  - gpr_test_util
  - gpr
 - name: set_initial_connect_string_test
+  cpu_cost: 0.1
  build: test
  language: c
  src:
@ -1624,6 +1631,7 @@ targets:
  - linux
  - posix
 - name: tcp_client_posix_test
+  cpu_cost: 0.5
  build: test
  language: c
  src:
@ -1638,6 +1646,7 @@ targets:
  - linux
  - posix
 - name: tcp_posix_test
+  cpu_cost: 0.5
  build: test
  language: c
  src:
--- a/test/core/bad_client/gen_build_yaml.py
+++ b/test/core/bad_client/gen_build_yaml.py
@ -35,15 +35,15 @@
 import collections
 import yaml

-TestOptions = collections.namedtuple('TestOptions', 'flaky')
-default_test_options = TestOptions(False)
+TestOptions = collections.namedtuple('TestOptions', 'flaky cpu_cost')
+default_test_options = TestOptions(False, 1.0)

 # maps test names to options
 BAD_CLIENT_TESTS = {
    'badreq': default_test_options,
-    'connection_prefix': default_test_options,
-    'headers': default_test_options,
-    'initial_settings_frame': default_test_options,
+    'connection_prefix': default_test_options._replace(cpu_cost=0.2),
+    'headers': default_test_options._replace(cpu_cost=0.2),
+    'initial_settings_frame': default_test_options._replace(cpu_cost=0.2),
    'server_registered_method': default_test_options,
    'simple_request': default_test_options,
    'window_overflow': default_test_options,
@ -75,6 +75,7 @@ def main():
      'targets': [
          {
              'name': '%s_bad_client_test' % t,
+              'cpu_cost': BAD_CLIENT_TESTS[t].cpu_cost,
              'build': 'test',
              'language': 'c',
              'secure': 'no',
--- a/test/core/bad_ssl/gen_build_yaml.py
+++ b/test/core/bad_ssl/gen_build_yaml.py
@ -35,13 +35,13 @@
 import collections
 import yaml

-TestOptions = collections.namedtuple('TestOptions', 'flaky')
-default_test_options = TestOptions(False)
+TestOptions = collections.namedtuple('TestOptions', 'flaky cpu_cost')
+default_test_options = TestOptions(False, 1.0)

 # maps test names to options
 BAD_CLIENT_TESTS = {
-  'cert': default_test_options,
-  'alpn': default_test_options,
+    'cert': default_test_options._replace(cpu_cost=0.1),
+    'alpn': default_test_options._replace(cpu_cost=0.1),
 }

 def main():
@ -84,6 +84,7 @@ def main():
      for t in sorted(BAD_CLIENT_TESTS.keys())] + [
          {
              'name': 'bad_ssl_%s_test' % t,
+              'cpu_cost': BAD_CLIENT_TESTS[t].cpu_cost,
              'build': 'test',
              'language': 'c',
              'src': ['test/core/bad_ssl/bad_ssl_test.c'],
--- a/test/core/end2end/gen_build_yaml.py
+++ b/test/core/end2end/gen_build_yaml.py
@ -81,7 +81,7 @@ TestOptions = collections.namedtuple(
 default_test_options = TestOptions(False, False, True, False, True, 1.0)
 connectivity_test_options = default_test_options._replace(needs_fullstack=True)

-LOWCPU = 0.01
+LOWCPU = 0.1

 # maps test names to options
 END2END_TESTS = {
@ -94,26 +94,25 @@ END2END_TESTS = {
    'cancel_before_invoke': default_test_options._replace(cpu_cost=LOWCPU),
    'cancel_in_a_vacuum': default_test_options._replace(cpu_cost=LOWCPU),
    'cancel_with_status': default_test_options._replace(cpu_cost=LOWCPU),
-    'channel_connectivity': connectivity_test_options._replace(proxyable=False),
+    'channel_connectivity': connectivity_test_options._replace(proxyable=False, cpu_cost=LOWCPU),
    'channel_ping': connectivity_test_options._replace(proxyable=False),
-    'compressed_payload': default_test_options._replace(proxyable=False),
+    'compressed_payload': default_test_options._replace(proxyable=False, cpu_cost=LOWCPU),
    'default_host': default_test_options._replace(needs_fullstack=True,
                                                  needs_dns=True),
    'disappearing_server': connectivity_test_options,
    'empty_batch': default_test_options,
-    'graceful_server_shutdown': default_test_options,
+    'graceful_server_shutdown': default_test_options._replace(cpu_cost=LOWCPU),
    'hpack_size': default_test_options._replace(proxyable=False,
-                                                traceable=False,
-                                                cpu_cost=2.0),
+                                                traceable=False),
    'high_initial_seqno': default_test_options,
    'invoke_large_request': default_test_options,
    'large_metadata': default_test_options,
    'max_concurrent_streams': default_test_options._replace(proxyable=False),
-    'max_message_length': default_test_options,
+    'max_message_length': default_test_options._replace(cpu_cost=LOWCPU),
    'metadata': default_test_options,
    'negative_deadline': default_test_options,
    'no_op': default_test_options,
-    'payload': default_test_options,
+    'payload': default_test_options._replace(cpu_cost=LOWCPU),
    'ping_pong_streaming': default_test_options,
    'registered_call': default_test_options,
    'request_with_flags': default_test_options._replace(proxyable=False),
@ -121,7 +120,7 @@ END2END_TESTS = {
    'server_finishes_request': default_test_options,
    'shutdown_finishes_calls': default_test_options,
    'shutdown_finishes_tags': default_test_options,
-    'simple_delayed_request': connectivity_test_options,
+    'simple_delayed_request': connectivity_test_options._replace(cpu_cost=LOWCPU),
    'simple_request': default_test_options,
    'trailing_metadata': default_test_options,
 }
--- a/tools/buildgen/build-cleaner.py
+++ b/tools/buildgen/build-cleaner.py
@ -41,6 +41,7 @@ _TOP_LEVEL_KEYS = ['settings', 'proto_deps', 'filegroups', 'libs', 'targets', 'v
 _VERSION_KEYS = ['major', 'minor', 'micro', 'build']
 _ELEM_KEYS = [
    'name',
+    'cpu_cost',
    'flaky',
    'build',
    'run',
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@ -33,6 +33,7 @@ import hashlib
 import multiprocessing
 import os
 import platform
+import re
 import signal
 import subprocess
 import sys
@ -40,6 +41,10 @@ import tempfile
 import time


+# cpu cost measurement
+measure_cpu_costs = False
+
+
 _DEFAULT_MAX_JOBS = 16 * multiprocessing.cpu_count()
 _MAX_RESULT_SIZE = 8192

@ -220,7 +225,10 @@ class Job(object):
    env.update(self._spec.environ)
    env.update(self._add_env)
    self._start = time.time()
-    try_start = lambda: subprocess.Popen(args=self._spec.cmdline,
+    cmdline = self._spec.cmdline
+    if measure_cpu_costs:
+      cmdline = ['time', '--portability'] + cmdline
+    try_start = lambda: subprocess.Popen(args=cmdline,
                                         stderr=subprocess.STDOUT,
                                         stdout=self._tempfile,
                                         cwd=self._spec.cwd,
@ -269,14 +277,23 @@ class Job(object):
          self.result.returncode = self._process.returncode
      else:
        self._state = _SUCCESS
-        message('PASSED', '%s [time=%.1fsec; retries=%d;%d]' % (
-                    self._spec.shortname, elapsed, self._retries, self._timeout_retries),
+        measurement = ''
+        if measure_cpu_costs:
+          m = re.search(r'real ([0-9.]+)\nuser ([0-9.]+)\nsys ([0-9.]+)', stdout())
+          real = float(m.group(1))
+          user = float(m.group(2))
+          sys = float(m.group(3))
+          if real > 0.5:
+            cores = (user + sys) / real
+            measurement = '; cpu_cost=%.01f' % cores
+        message('PASSED', '%s [time=%.1fsec; retries=%d:%d%s]' % (
+                    self._spec.shortname, elapsed, self._retries, self._timeout_retries, measurement),
            do_newline=self._newline_on_success or self._travis)
        self.result.state = 'PASSED'
        if self._bin_hash:
          update_cache.finished(self._spec.identity(), self._bin_hash)
-    elif (self._state == _RUNNING and 
-          self._spec.timeout_seconds is not None and 
+    elif (self._state == _RUNNING and
+          self._spec.timeout_seconds is not None and
          time.time() - self._start > self._spec.timeout_seconds):
      if self._timeout_retries < self._spec.timeout_retries:
        message('TIMEOUT_FLAKE', '%s [pid=%d]' % (self._spec.shortname, self._process.pid), stdout(), do_newline=True)
--- a/tools/run_tests/run_tests.py
+++ b/tools/run_tests/run_tests.py
@ -603,7 +603,7 @@ argp.add_argument('-n', '--runs_per_test', default=1, type=runs_per_test_type,
        help='A positive integer or "inf". If "inf", all tests will run in an '
             'infinite loop. Especially useful in combination with "-f"')
 argp.add_argument('-r', '--regex', default='.*', type=str)
-argp.add_argument('-j', '--jobs', default=2 * multiprocessing.cpu_count(), type=int)
+argp.add_argument('-j', '--jobs', default=multiprocessing.cpu_count(), type=int)
 argp.add_argument('-s', '--slowdown', default=1.0, type=float)
 argp.add_argument('-f', '--forever',
                  default=False,
@ -650,6 +650,8 @@ argp.add_argument('--build_only',
                  action='store_const',
                  const=True,
                  help='Perform all the build steps but dont run any tests.')
+argp.add_argument('--measure_cpu_costs', default=False, action='store_const', const=True,
+                  help='Measure the cpu costs of tests')
 argp.add_argument('--update_submodules', default=[], nargs='*',
                  help='Update some submodules before building. If any are updated, also run generate_projects. ' +
                       'Submodules are specified as SUBMODULE_NAME:BRANCH; if BRANCH is omitted, master is assumed.')
@ -658,6 +660,8 @@ argp.add_argument('-x', '--xml_report', default=None, type=str,
        help='Generates a JUnit-compatible XML report')
 args = argp.parse_args()

+jobset.measure_cpu_costs = args.measure_cpu_costs
+
 if args.use_docker:
  if not args.travis:
    print 'Seen --use_docker flag, will run tests under docker.'
--- a/tools/run_tests/tests.json
+++ b/tools/run_tests/tests.json