From f0a293ed67bca4b79f377d3ba2d7f05de937fdee Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Mon, 12 Oct 2015 10:05:50 -0700 Subject: [PATCH 1/4] Robustness work for port_server startup --- tools/run_tests/port_server.py | 9 +++++++-- tools/run_tests/run_tests.py | 37 +++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/tools/run_tests/port_server.py b/tools/run_tests/port_server.py index b953df952cb..4e473af4113 100755 --- a/tools/run_tests/port_server.py +++ b/tools/run_tests/port_server.py @@ -42,7 +42,7 @@ import time # increment this number whenever making a change to ensure that # the changes are picked up by running CI servers # note that all changes must be backwards compatible -_MY_VERSION = 2 +_MY_VERSION = 4 if len(sys.argv) == 2 and sys.argv[1] == 'dump_version': @@ -52,8 +52,13 @@ if len(sys.argv) == 2 and sys.argv[1] == 'dump_version': argp = argparse.ArgumentParser(description='Server for httpcli_test') argp.add_argument('-p', '--port', default=12345, type=int) +argp.add_argument('-l', '--logfile', default=None, type=str) args = argp.parse_args() +if args.logfile is not None: + sys.stderr = open(args.logfile, 'w') + sys.stdout = sys.stderr + print 'port server running on port %d' % args.port pool = [] @@ -146,6 +151,6 @@ class Handler(BaseHTTPServer.BaseHTTPRequestHandler): httpd = BaseHTTPServer.HTTPServer(('', args.port), Handler) while keep_running: httpd.handle_request() + sys.stderr.flush() print 'done' - diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py index 048ab90798d..71377590221 100755 --- a/tools/run_tests/run_tests.py +++ b/tools/run_tests/run_tests.py @@ -43,6 +43,8 @@ import re import socket import subprocess import sys +import tempfile +import traceback import time import xml.etree.cElementTree as ET import urllib2 @@ -704,35 +706,50 @@ def _start_port_server(port_server_port): urllib2.urlopen('http://localhost:%d/quitquitquit' % port_server_port).read() time.sleep(1) if not running: - print 'starting port_server' - port_log = open('portlog.txt', 'w') + fd, logfile = tempfile.mkstemp() + os.close(fd) + print 'starting port_server, with log file %s' % logfile port_server = subprocess.Popen( - [sys.executable, 'tools/run_tests/port_server.py', '-p', '%d' % port_server_port], - stderr=subprocess.STDOUT, - stdout=port_log) + [sys.executable, 'tools/run_tests/port_server.py', '-p', '%d' % port_server_port, '-l', logfile], + close_fds=True) + time.sleep(1) # ensure port server is up waits = 0 while True: if waits > 10: + print 'killing port server due to excessive start up waits' port_server.kill() if port_server.poll() is not None: print 'port_server failed to start' - port_log = open('portlog.txt', 'r').read() - print port_log - sys.exit(1) + # try one final time: maybe another build managed to start one + time.sleep(1) + try: + urllib2.urlopen('http://localhost:%d/get' % port_server_port, + timeout=1).read() + print 'last ditch attempt to contact port server succeeded' + break + except: + traceback.print_exc(); + port_log = open(logfile, 'r').read() + print port_log + sys.exit(1) try: urllib2.urlopen('http://localhost:%d/get' % port_server_port, timeout=1).read() + print 'port server is up and ready' break except socket.timeout: print 'waiting for port_server: timeout' - time.sleep(0.5) + traceback.print_exc(); + time.sleep(1) waits += 1 except urllib2.URLError: print 'waiting for port_server: urlerror' - time.sleep(0.5) + traceback.print_exc(); + time.sleep(1) waits += 1 except: + traceback.print_exc(); port_server.kill() raise From d2c39713bc821c59f3962fe4ae807b1d14c84ea8 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Mon, 12 Oct 2015 11:08:49 -0700 Subject: [PATCH 2/4] Attempt to daemonize port server --- tools/run_tests/port_server.py | 11 +++++++---- tools/run_tests/run_tests.py | 12 ++++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/run_tests/port_server.py b/tools/run_tests/port_server.py index 4e473af4113..0593d7dc974 100755 --- a/tools/run_tests/port_server.py +++ b/tools/run_tests/port_server.py @@ -42,7 +42,7 @@ import time # increment this number whenever making a change to ensure that # the changes are picked up by running CI servers # note that all changes must be backwards compatible -_MY_VERSION = 4 +_MY_VERSION = 5 if len(sys.argv) == 2 and sys.argv[1] == 'dump_version': @@ -124,9 +124,12 @@ class Handler(BaseHTTPServer.BaseHTTPRequestHandler): self.send_header('Content-Type', 'text/plain') self.end_headers() p = int(self.path[6:]) - del in_use[p] - pool.append(p) - self.log_message('drop port %d' % p) + if p in in_use: + del in_use[p] + pool.append(p) + self.log_message('drop known port %d' % p) + else: + self.log_message('drop unknown port %d' % p) elif self.path == '/version_number': # fetch a version string and the current process pid self.send_response(200) diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py index 71377590221..2934b5f70f8 100755 --- a/tools/run_tests/run_tests.py +++ b/tools/run_tests/run_tests.py @@ -709,9 +709,17 @@ def _start_port_server(port_server_port): fd, logfile = tempfile.mkstemp() os.close(fd) print 'starting port_server, with log file %s' % logfile - port_server = subprocess.Popen( - [sys.executable, 'tools/run_tests/port_server.py', '-p', '%d' % port_server_port, '-l', logfile], + args = [sys.executable, 'tools/run_tests/port_server.py', '-p', '%d' % port_server_port, '-l', logfile] + if platform.system() == 'Windows': + port_server = subprocess.Popen( + args, + creationflags = 0x00000008, # detached process close_fds=True) + else: + port_server = subprocess.Popen( + args, + preexec_fn=os.setsid, + close_fds=True) time.sleep(1) # ensure port server is up waits = 0 From 41a7bf544944e343a764b7e3e36fa80c27064149 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Mon, 12 Oct 2015 12:54:55 -0700 Subject: [PATCH 3/4] Close standard pipes --- tools/run_tests/port_server.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/run_tests/port_server.py b/tools/run_tests/port_server.py index 0593d7dc974..3b85486ebfe 100755 --- a/tools/run_tests/port_server.py +++ b/tools/run_tests/port_server.py @@ -56,6 +56,9 @@ argp.add_argument('-l', '--logfile', default=None, type=str) args = argp.parse_args() if args.logfile is not None: + sys.stdin.close() + sys.stderr.close() + sys.stdout.close() sys.stderr = open(args.logfile, 'w') sys.stdout = sys.stderr From 367d41d304e2c69bfa1182c67f15b51551e96f8c Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Mon, 12 Oct 2015 13:00:22 -0700 Subject: [PATCH 4/4] Trying harder to avoid Jenkins killing us --- tools/run_tests/run_tests.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/run_tests/run_tests.py b/tools/run_tests/run_tests.py index 2934b5f70f8..8f69e4a18a9 100755 --- a/tools/run_tests/run_tests.py +++ b/tools/run_tests/run_tests.py @@ -710,14 +710,18 @@ def _start_port_server(port_server_port): os.close(fd) print 'starting port_server, with log file %s' % logfile args = [sys.executable, 'tools/run_tests/port_server.py', '-p', '%d' % port_server_port, '-l', logfile] + env = dict(os.environ) + env['BUILD_ID'] = 'pleaseDontKillMeJenkins' if platform.system() == 'Windows': port_server = subprocess.Popen( - args, - creationflags = 0x00000008, # detached process - close_fds=True) + args, + env=env, + creationflags = 0x00000008, # detached process + close_fds=True) else: port_server = subprocess.Popen( args, + env=env, preexec_fn=os.setsid, close_fds=True) time.sleep(1)