Merge branch 'revert-12289-sig_hand' of github.com:grpc/grpc into stats_json

pull/12509/head
Craig Tiller 7 years ago
commit 459098eead
  1. 20
      tools/run_tests/python_utils/jobset.py
  2. 8
      tools/run_tests/run_performance_tests.py

@ -71,8 +71,10 @@ def platform_string():
if platform_string() == 'windows': if platform_string() == 'windows':
pass pass
else: else:
have_alarm = False
def alarm_handler(unused_signum, unused_frame): def alarm_handler(unused_signum, unused_frame):
pass global have_alarm
have_alarm = False
signal.signal(signal.SIGCHLD, lambda unused_signum, unused_frame: None) signal.signal(signal.SIGCHLD, lambda unused_signum, unused_frame: None)
signal.signal(signal.SIGALRM, alarm_handler) signal.signal(signal.SIGALRM, alarm_handler)
@ -365,9 +367,10 @@ class Jobset(object):
"""Manages one run of jobs.""" """Manages one run of jobs."""
def __init__(self, check_cancelled, maxjobs, newline_on_success, travis, def __init__(self, check_cancelled, maxjobs, newline_on_success, travis,
stop_on_failure, add_env, quiet_success, max_time): stop_on_failure, add_env, quiet_success, max_time, clear_alarms):
self._running = set() self._running = set()
self._check_cancelled = check_cancelled self._check_cancelled = check_cancelled
self._clear_alarms = clear_alarms
self._cancelled = False self._cancelled = False
self._failures = 0 self._failures = 0
self._completed = 0 self._completed = 0
@ -452,6 +455,9 @@ class Jobset(object):
if platform_string() == 'windows': if platform_string() == 'windows':
time.sleep(0.1) time.sleep(0.1)
else: else:
global have_alarm
if not have_alarm:
have_alarm = True
signal.alarm(10) signal.alarm(10)
signal.pause() signal.pause()
@ -468,7 +474,10 @@ class Jobset(object):
while self._running: while self._running:
if self.cancelled(): pass # poll cancellation if self.cancelled(): pass # poll cancellation
self.reap() self.reap()
if platform_string() != 'windows': # Clear the alarms when finished to avoid a race condition causing job
# failures. Don't do this when running multi-VM tests because clearing
# the alarms causes the test to stall
if platform_string() != 'windows' and self._clear_alarms:
signal.alarm(0) signal.alarm(0)
return not self.cancelled() and self._failures == 0 return not self.cancelled() and self._failures == 0
@ -498,7 +507,8 @@ def run(cmdlines,
add_env={}, add_env={},
skip_jobs=False, skip_jobs=False,
quiet_success=False, quiet_success=False,
max_time=-1): max_time=-1,
clear_alarms=True):
if skip_jobs: if skip_jobs:
resultset = {} resultset = {}
skipped_job_result = JobResult() skipped_job_result = JobResult()
@ -510,7 +520,7 @@ def run(cmdlines,
js = Jobset(check_cancelled, js = Jobset(check_cancelled,
maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS, maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS,
newline_on_success, travis, stop_on_failure, add_env, newline_on_success, travis, stop_on_failure, add_env,
quiet_success, max_time) quiet_success, max_time, clear_alarms)
for cmdline, remaining in tag_remaining(cmdlines): for cmdline, remaining in tag_remaining(cmdlines):
if not js.start(cmdline): if not js.start(cmdline):
break break

@ -183,7 +183,7 @@ def archive_repo(languages):
jobset.message('START', 'Archiving local repository.', do_newline=True) jobset.message('START', 'Archiving local repository.', do_newline=True)
num_failures, _ = jobset.run( num_failures, _ = jobset.run(
[archive_job], newline_on_success=True, maxjobs=1) [archive_job], newline_on_success=True, maxjobs=1, clear_alarms=False)
if num_failures == 0: if num_failures == 0:
jobset.message('SUCCESS', jobset.message('SUCCESS',
'Archive with local repository created successfully.', 'Archive with local repository created successfully.',
@ -215,7 +215,7 @@ def prepare_remote_hosts(hosts, prepare_local=False):
timeout_seconds=prepare_timeout)) timeout_seconds=prepare_timeout))
jobset.message('START', 'Preparing hosts.', do_newline=True) jobset.message('START', 'Preparing hosts.', do_newline=True)
num_failures, _ = jobset.run( num_failures, _ = jobset.run(
prepare_jobs, newline_on_success=True, maxjobs=10) prepare_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False)
if num_failures == 0: if num_failures == 0:
jobset.message('SUCCESS', jobset.message('SUCCESS',
'Prepare step completed successfully.', 'Prepare step completed successfully.',
@ -248,7 +248,7 @@ def build_on_remote_hosts(hosts, languages=scenario_config.LANGUAGES.keys(), bui
timeout_seconds=build_timeout)) timeout_seconds=build_timeout))
jobset.message('START', 'Building.', do_newline=True) jobset.message('START', 'Building.', do_newline=True)
num_failures, _ = jobset.run( num_failures, _ = jobset.run(
build_jobs, newline_on_success=True, maxjobs=10) build_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False)
if num_failures == 0: if num_failures == 0:
jobset.message('SUCCESS', jobset.message('SUCCESS',
'Built successfully.', 'Built successfully.',
@ -414,7 +414,7 @@ def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name, flame_gra
perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename, flame_graph_reports)) perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename, flame_graph_reports))
jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True) jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True)
failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1) failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1, clear_alarms=False)
jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True) jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True)
return failures return failures

Loading…
Cancel
Save