From 39e3cb3a383c4ff2c66a0710136df7db939d3e8c Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 22 Oct 2015 18:21:08 -0700 Subject: [PATCH 1/2] prevent container name reuse error for timeout flakes --- tools/run_tests/jobset.py | 2 ++ tools/run_tests/run_interop_tests.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py index a9c96a6c95a..8343441a189 100755 --- a/tools/run_tests/jobset.py +++ b/tools/run_tests/jobset.py @@ -272,6 +272,8 @@ class Job(object): message('TIMEOUT_FLAKE', self._spec.shortname, stdout, do_newline=True) self._timeout_retries += 1 self.result.retries = self._timeout_retries + self._retries + if self._spec.kill_handler: + self._spec.kill_handler(self) self._process.terminate() self.start() else: diff --git a/tools/run_tests/run_interop_tests.py b/tools/run_tests/run_interop_tests.py index c2705c8cab2..0f3b824996a 100755 --- a/tools/run_tests/run_interop_tests.py +++ b/tools/run_tests/run_interop_tests.py @@ -344,6 +344,11 @@ def add_auth_options(language, test_case, cmdline, env): def _job_kill_handler(job): if job._spec.container_name: dockerjob.docker_kill(job._spec.container_name) + # When the job times out and we decide to kill it, + # we need to wait a before restarting the job + # to prevent "container name already in use" error. + # TODO(jtattermusch): figure out a cleaner way to to this. + time.sleep(2) def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False): From 29fd00567e02eabbde983770374f403a91b5a856 Mon Sep 17 00:00:00 2001 From: Jan Tattermusch Date: Thu, 22 Oct 2015 18:58:57 -0700 Subject: [PATCH 2/2] adjust paralellism and timeouts --- tools/jenkins/run_interop.sh | 2 +- tools/run_tests/run_interop_tests.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/jenkins/run_interop.sh b/tools/jenkins/run_interop.sh index b1cf7b57485..5dd477ed77a 100755 --- a/tools/jenkins/run_interop.sh +++ b/tools/jenkins/run_interop.sh @@ -34,4 +34,4 @@ set -ex # Enter the gRPC repo root cd $(dirname $0)/../.. -tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker -t -j 8 $@ || true +tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker -t -j 12 $@ || true diff --git a/tools/run_tests/run_interop_tests.py b/tools/run_tests/run_interop_tests.py index 0f3b824996a..a71ebc6b359 100755 --- a/tools/run_tests/run_interop_tests.py +++ b/tools/run_tests/run_interop_tests.py @@ -383,7 +383,7 @@ def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False): cwd=cwd, environ=environ, shortname='%s:%s:%s' % (suite_name, language, test_case), - timeout_seconds=2*60, + timeout_seconds=90, flake_retries=5 if args.allow_flakes else 0, timeout_retries=2 if args.allow_flakes else 0, kill_handler=_job_kill_handler) @@ -419,7 +419,7 @@ def cloud_to_cloud_jobspec(language, test_case, server_name, server_host, environ=environ, shortname='cloud_to_cloud:%s:%s_server:%s' % (language, server_name, test_case), - timeout_seconds=2*60, + timeout_seconds=90, flake_retries=5 if args.allow_flakes else 0, timeout_retries=2 if args.allow_flakes else 0, kill_handler=_job_kill_handler)