From c15d32bbe89a2bf950992ded06d1b3da7f1f39a6 Mon Sep 17 00:00:00 2001 From: Matt Kwong Date: Fri, 30 Jun 2017 11:30:21 -0700 Subject: [PATCH] Don't clear alarm in jobset when running performance tests --- tools/run_tests/python_utils/jobset.py | 13 +++++++++---- tools/run_tests/run_performance_tests.py | 10 +++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py index 044c6f3aa4f..08d652ae3f3 100755 --- a/tools/run_tests/python_utils/jobset.py +++ b/tools/run_tests/python_utils/jobset.py @@ -367,9 +367,10 @@ class Jobset(object): """Manages one run of jobs.""" def __init__(self, check_cancelled, maxjobs, newline_on_success, travis, - stop_on_failure, add_env, quiet_success, max_time): + stop_on_failure, add_env, quiet_success, max_time, clear_alarms): self._running = set() self._check_cancelled = check_cancelled + self._clear_alarms = clear_alarms self._cancelled = False self._failures = 0 self._completed = 0 @@ -473,7 +474,10 @@ class Jobset(object): while self._running: if self.cancelled(): pass # poll cancellation self.reap() - if platform_string() != 'windows': + # Clear the alarms when finished to avoid a race condition causing job + # failures. Don't do this when running multi-VM tests because clearing + # the alarms causes the test to stall + if platform_string() != 'windows' and self._clear_alarms: signal.alarm(0) return not self.cancelled() and self._failures == 0 @@ -503,7 +507,8 @@ def run(cmdlines, add_env={}, skip_jobs=False, quiet_success=False, - max_time=-1): + max_time=-1, + clear_alarms=True): if skip_jobs: resultset = {} skipped_job_result = JobResult() @@ -515,7 +520,7 @@ def run(cmdlines, js = Jobset(check_cancelled, maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS, newline_on_success, travis, stop_on_failure, add_env, - quiet_success, max_time) + quiet_success, max_time, clear_alarms) for cmdline, remaining in tag_remaining(cmdlines): if not js.start(cmdline): break diff --git a/tools/run_tests/run_performance_tests.py b/tools/run_tests/run_performance_tests.py index ad1fb054819..78d1079c15d 100755 --- a/tools/run_tests/run_performance_tests.py +++ b/tools/run_tests/run_performance_tests.py @@ -183,7 +183,7 @@ def archive_repo(languages): jobset.message('START', 'Archiving local repository.', do_newline=True) num_failures, _ = jobset.run( - [archive_job], newline_on_success=True, maxjobs=1) + [archive_job], newline_on_success=True, maxjobs=1, clear_alarms=False) if num_failures == 0: jobset.message('SUCCESS', 'Archive with local repository created successfully.', @@ -215,7 +215,7 @@ def prepare_remote_hosts(hosts, prepare_local=False): timeout_seconds=prepare_timeout)) jobset.message('START', 'Preparing hosts.', do_newline=True) num_failures, _ = jobset.run( - prepare_jobs, newline_on_success=True, maxjobs=10) + prepare_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False) if num_failures == 0: jobset.message('SUCCESS', 'Prepare step completed successfully.', @@ -248,7 +248,7 @@ def build_on_remote_hosts(hosts, languages=scenario_config.LANGUAGES.keys(), bui timeout_seconds=build_timeout)) jobset.message('START', 'Building.', do_newline=True) num_failures, _ = jobset.run( - build_jobs, newline_on_success=True, maxjobs=10) + build_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False) if num_failures == 0: jobset.message('SUCCESS', 'Built successfully.', @@ -414,7 +414,7 @@ def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name): perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename)) jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True) - failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1) + failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1, clear_alarms=False) jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True) return failures @@ -556,7 +556,7 @@ for scenario in scenarios: jobs = [scenario.jobspec] if scenario.workers: jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host)) - scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1) + scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1, clear_alarms=False) total_scenario_failures += scenario_failures merged_resultset = dict(itertools.chain(six.iteritems(merged_resultset), six.iteritems(resultset)))