From 376dc34d78414c038f56a575a39706a91cf4f8ab Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Mon, 16 Oct 2017 16:00:35 +0200 Subject: [PATCH 1/2] adjust number of retries --- tools/run_tests/run_interop_tests.py | 4 ++-- tools/run_tests/run_tests.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/run_tests/run_interop_tests.py b/tools/run_tests/run_interop_tests.py index 4dd982756d2..2f826871a09 100755 --- a/tools/run_tests/run_interop_tests.py +++ b/tools/run_tests/run_interop_tests.py @@ -680,7 +680,7 @@ def cloud_to_prod_jobspec(language, test_case, server_host_name, shortname='%s:%s:%s:%s' % (suite_name, language, server_host_name, test_case), timeout_seconds=_TEST_TIMEOUT, - flake_retries=5 if args.allow_flakes else 0, + flake_retries=4 if args.allow_flakes else 0, timeout_retries=2 if args.allow_flakes else 0, kill_handler=_job_kill_handler) if docker_image: @@ -746,7 +746,7 @@ def cloud_to_cloud_jobspec(language, test_case, server_name, server_host, shortname='cloud_to_cloud:%s:%s_server:%s' % (language, server_name, test_case), timeout_seconds=_TEST_TIMEOUT, - flake_retries=5 if args.allow_flakes else 0, + flake_retries=4 if args.allow_flakes else 0, timeout_retries=2 if args.allow_flakes else 0, kill_handler=_job_kill_handler) if docker_image: diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py index ea21c81875d..9ba2c6f90ee 100755 --- a/tools/run_tests/run_tests.py +++ b/tools/run_tests/run_tests.py @@ -159,8 +159,8 @@ class Config(object): environ=actual_environ, cpu_cost=cpu_cost, timeout_seconds=(self.timeout_multiplier * timeout_seconds if timeout_seconds else None), - flake_retries=5 if flaky or args.allow_flakes else 0, - timeout_retries=3 if flaky or args.allow_flakes else 0) + flake_retries=4 if flaky or args.allow_flakes else 0, + timeout_retries=1 if flaky or args.allow_flakes else 0) def get_c_tests(travis, test_lang) : @@ -1493,7 +1493,7 @@ def build_step_environ(cfg): return environ build_steps = list(set( - jobset.JobSpec(cmdline, environ=build_step_environ(build_config), flake_retries=5) + jobset.JobSpec(cmdline, environ=build_step_environ(build_config), flake_retries=2) for l in languages for cmdline in l.pre_build_steps())) if make_targets: From 3d1b6c1035f006292a8631c5423c44089da9ad03 Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Mon, 16 Oct 2017 16:01:12 +0200 Subject: [PATCH 2/2] explain retries and jobset.max_time setting --- tools/run_tests/python_utils/jobset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 658b814d81f..85eef444ef8 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -302,6 +302,7 @@ class Job(object): self._retries += 1 self.result.num_failures += 1 self.result.retries = self._timeout_retries + self._retries + # NOTE: job is restarted regardless of jobset's max_time setting self.start() else: self._state = _FAILURE @@ -344,6 +345,7 @@ class Job(object): if self._spec.kill_handler: self._spec.kill_handler(self) self._process.terminate() + # NOTE: job is restarted regardless of jobset's max_time setting self.start() else: message('TIMEOUT', '%s [pid=%d, time=%.1fsec]' % (self._spec.shortname, self._process.pid, elapsed), stdout(), do_newline=True)