Merge pull request #3939 from jtattermusch/interop_test_timeout_retry

Correct behavior for interop test timeout flakes.
pull/3946/head
Nicolas Noble 9 years ago
commit 2f7b678c9f
  1. 2
      tools/jenkins/run_interop.sh
  2. 2
      tools/run_tests/jobset.py
  3. 9
      tools/run_tests/run_interop_tests.py

@ -34,4 +34,4 @@ set -ex
# Enter the gRPC repo root
cd $(dirname $0)/../..
tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker -t -j 8 $@ || true
tools/run_tests/run_interop_tests.py -l all -s all --cloud_to_prod --cloud_to_prod_auth --use_docker -t -j 12 $@ || true

@ -272,6 +272,8 @@ class Job(object):
message('TIMEOUT_FLAKE', self._spec.shortname, stdout, do_newline=True)
self._timeout_retries += 1
self.result.retries = self._timeout_retries + self._retries
if self._spec.kill_handler:
self._spec.kill_handler(self)
self._process.terminate()
self.start()
else:

@ -344,6 +344,11 @@ def add_auth_options(language, test_case, cmdline, env):
def _job_kill_handler(job):
if job._spec.container_name:
dockerjob.docker_kill(job._spec.container_name)
# When the job times out and we decide to kill it,
# we need to wait a before restarting the job
# to prevent "container name already in use" error.
# TODO(jtattermusch): figure out a cleaner way to to this.
time.sleep(2)
def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False):
@ -378,7 +383,7 @@ def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False):
cwd=cwd,
environ=environ,
shortname='%s:%s:%s' % (suite_name, language, test_case),
timeout_seconds=2*60,
timeout_seconds=90,
flake_retries=5 if args.allow_flakes else 0,
timeout_retries=2 if args.allow_flakes else 0,
kill_handler=_job_kill_handler)
@ -414,7 +419,7 @@ def cloud_to_cloud_jobspec(language, test_case, server_name, server_host,
environ=environ,
shortname='cloud_to_cloud:%s:%s_server:%s' % (language, server_name,
test_case),
timeout_seconds=2*60,
timeout_seconds=90,
flake_retries=5 if args.allow_flakes else 0,
timeout_retries=2 if args.allow_flakes else 0,
kill_handler=_job_kill_handler)

Loading…
Cancel
Save