Merge pull request #3755 from jtattermusch/handle_interop_client_timeout

Kill interop clients on timeout
pull/3757/head
Jan Tattermusch 9 years ago
commit 21361863db
  1. 14
      tools/jenkins/build_docker_and_run_tests.sh
  2. 11
      tools/jenkins/build_interop_image.sh
  3. 34
      tools/run_tests/dockerjob.py
  4. 6
      tools/run_tests/jobset.py
  5. 35
      tools/run_tests/run_interop_tests.py

@ -53,8 +53,8 @@ DOCKER_IMAGE_NAME=grpc_jenkins_slave${docker_suffix}_`sha1sum tools/jenkins/grpc
# Make sure docker image has been built. Should be instantaneous if so.
docker build -t $DOCKER_IMAGE_NAME tools/jenkins/grpc_jenkins_slave$docker_suffix
# Make sure the CID file is gone.
rm -f docker.cid
# Choose random name for docker container
CONTAINER_NAME="run_tests_$(uuidgen)"
# Run tests inside docker
docker run \
@ -70,23 +70,21 @@ docker run \
-v /var/run/docker.sock:/var/run/docker.sock \
-v $(which docker):/bin/docker \
-w /var/local/git/grpc \
--cidfile=docker.cid \
--name=$CONTAINER_NAME \
$DOCKER_IMAGE_NAME \
bash -l /var/local/jenkins/grpc/tools/jenkins/docker_run_tests.sh || DOCKER_FAILED="true"
DOCKER_CID=`cat docker.cid`
if [ "$XML_REPORT" != "" ]
then
docker cp "$DOCKER_CID:/var/local/git/grpc/$XML_REPORT" $git_root
docker cp "$CONTAINER_NAME:/var/local/git/grpc/$XML_REPORT" $git_root
fi
docker cp "$DOCKER_CID:/var/local/git/grpc/reports.zip" $git_root || true
docker cp "$CONTAINER_NAME:/var/local/git/grpc/reports.zip" $git_root || true
unzip $git_root/reports.zip -d $git_root || true
rm -f reports.zip
# remove the container, possibly killing it first
docker rm -f $DOCKER_CID || true
docker rm -f $CONTAINER_NAME || true
if [ "$DOCKER_FAILED" != "" ] && [ "$XML_REPORT" == "" ]
then

@ -77,7 +77,7 @@ docker build -t $BASE_IMAGE --force-rm=true tools/jenkins/$BASE_NAME || exit $?
# Create a local branch so the child Docker script won't complain
git branch -f jenkins-docker
CIDFILE=`mktemp -u --suffix=.cid`
CONTAINER_NAME="build_${BASE_NAME}_$(uuidgen)"
# Prepare image for interop tests, commit it on success.
(docker run \
@ -85,17 +85,14 @@ CIDFILE=`mktemp -u --suffix=.cid`
-i $TTY_FLAG \
$MOUNT_ARGS \
-v /tmp/ccache:/tmp/ccache \
--cidfile=$CIDFILE \
--name=$CONTAINER_NAME \
$BASE_IMAGE \
bash -l /var/local/jenkins/grpc/tools/jenkins/$BASE_NAME/build_interop.sh \
&& docker commit `cat $CIDFILE` $INTEROP_IMAGE \
&& docker commit $CONTAINER_NAME $INTEROP_IMAGE \
&& echo "Successfully built image $INTEROP_IMAGE")
EXITCODE=$?
# remove intermediate container, possibly killing it first
docker rm -f `cat $CIDFILE`
# remove the cidfile
rm -rf `cat $CIDFILE`
docker rm -f $CONTAINER_NAME
exit $EXITCODE

@ -38,18 +38,15 @@ import subprocess
_DEVNULL = open(os.devnull, 'w')
def wait_for_file(filepath, timeout_seconds=15):
"""Wait until given file exists and returns its content."""
started = time.time()
while time.time() - started < timeout_seconds:
if os.path.isfile(filepath):
with open(filepath, 'r') as f:
content = f.read()
# make sure we don't return empty content
if content:
return content
time.sleep(1)
raise Exception('Failed to read file %s.' % filepath)
def random_name(base_name):
"""Randomizes given base name."""
return '%s_%s' % (base_name, uuid.uuid4())
def docker_kill(cid):
"""Kills a docker container. Returns True if successful."""
return subprocess.call(['docker','kill', str(cid)]) == 0
def docker_mapped_port(cid, port):
@ -92,23 +89,16 @@ class DockerJob:
def __init__(self, spec):
self._spec = spec
self._job = jobset.Job(spec, bin_hash=None, newline_on_success=True, travis=True, add_env={}, xml_report=None)
self._cidfile = spec.cidfile
self._cid = None
def cid(self):
"""Gets cid of this container"""
if not self._cid:
self._cid = wait_for_file(self._cidfile)
return self._cid
self._container_name = spec.container_name
def mapped_port(self, port):
return docker_mapped_port(self.cid(), port)
return docker_mapped_port(self._container_name, port)
def kill(self, suppress_failure=False):
"""Sends kill signal to the container."""
if suppress_failure:
self._job.suppress_failure_message()
return subprocess.call(['docker','kill', self.cid()]) == 0
return docker_kill(self._container_name)
def is_running(self):
"""Polls a job and returns True if given job is still running."""

@ -135,13 +135,14 @@ class JobSpec(object):
def __init__(self, cmdline, shortname=None, environ=None, hash_targets=None,
cwd=None, shell=False, timeout_seconds=5*60, flake_retries=0,
timeout_retries=0):
timeout_retries=0, kill_handler=None):
"""
Arguments:
cmdline: a list of arguments to pass as the command line
environ: a dictionary of environment variables to set in the child process
hash_targets: which files to include in the hash representing the jobs version
(or empty, indicating the job should not be hashed)
kill_handler: a handler that will be called whenever job.kill() is invoked
"""
if environ is None:
environ = {}
@ -156,6 +157,7 @@ class JobSpec(object):
self.timeout_seconds = timeout_seconds
self.flake_retries = flake_retries
self.timeout_retries = timeout_retries
self.kill_handler = kill_handler
def identity(self):
return '%r %r %r' % (self.cmdline, self.environ, self.hash_targets)
@ -254,6 +256,8 @@ class Job(object):
def kill(self):
if self._state == _RUNNING:
self._state = _KILLED
if self._spec.kill_handler:
self._spec.kill_handler(self)
self._process.terminate()
def suppress_failure_message(self):

@ -321,17 +321,29 @@ def add_auth_options(language, test_case, cmdline, env):
return (cmdline, env)
def _job_kill_handler(job):
if job._spec.container_name:
dockerjob.docker_kill(job._spec.container_name)
def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False):
"""Creates jobspec for cloud-to-prod interop test"""
cmdline = language.cloud_to_prod_args() + ['--test_case=%s' % test_case]
cwd = language.client_cwd
environ = language.cloud_to_prod_env()
container_name = None
if auth:
cmdline, environ = add_auth_options(language, test_case, cmdline, environ)
cmdline = bash_login_cmdline(cmdline)
if docker_image:
cmdline = docker_run_cmdline(cmdline, image=docker_image, cwd=cwd, environ=environ)
container_name = dockerjob.random_name('interop_client_%s' % language)
cmdline = docker_run_cmdline(cmdline,
image=docker_image,
cwd=cwd,
environ=environ,
docker_args=['--net=host',
'--name', container_name])
cwd = None
environ = None
@ -343,7 +355,9 @@ def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False):
shortname="%s:%s:%s" % (suite_name, language, test_case),
timeout_seconds=2*60,
flake_retries=5 if args.allow_flakes else 0,
timeout_retries=2 if args.allow_flakes else 0)
timeout_retries=2 if args.allow_flakes else 0,
kill_handler=_job_kill_handler)
test_job.container_name = container_name
return test_job
@ -356,11 +370,14 @@ def cloud_to_cloud_jobspec(language, test_case, server_name, server_host,
'--server_port=%s' % server_port ])
cwd = language.client_cwd
if docker_image:
container_name = dockerjob.random_name('interop_client_%s' % language)
cmdline = docker_run_cmdline(cmdline,
image=docker_image,
cwd=cwd,
docker_args=['--net=host'])
docker_args=['--net=host',
'--name', container_name])
cwd = None
test_job = jobset.JobSpec(
cmdline=cmdline,
cwd=cwd,
@ -368,25 +385,27 @@ def cloud_to_cloud_jobspec(language, test_case, server_name, server_host,
test_case),
timeout_seconds=2*60,
flake_retries=5 if args.allow_flakes else 0,
timeout_retries=2 if args.allow_flakes else 0)
timeout_retries=2 if args.allow_flakes else 0,
kill_handler=_job_kill_handler)
test_job.container_name = container_name
return test_job
def server_jobspec(language, docker_image):
"""Create jobspec for running a server"""
cidfile = tempfile.mktemp()
container_name = dockerjob.random_name('interop_server_%s' % language)
cmdline = bash_login_cmdline(language.server_args() +
['--port=%s' % _DEFAULT_SERVER_PORT])
docker_cmdline = docker_run_cmdline(cmdline,
image=docker_image,
cwd=language.server_cwd,
docker_args=['-p', str(_DEFAULT_SERVER_PORT),
'--cidfile', cidfile])
'--name', container_name])
server_job = jobset.JobSpec(
cmdline=docker_cmdline,
shortname="interop_server:%s" % language,
shortname="interop_server_%s" % language,
timeout_seconds=30*60)
server_job.cidfile = cidfile
server_job.container_name = container_name
return server_job

Loading…
Cancel
Save