Misc changes to stress test scripts

pull/6157/head
Sree Kuchibhotla 9 years ago
parent c2b6a0bcf9
commit 478bd4449b
  1. 22
      tools/gcp/stress_test/stress_test_utils.py
  2. 6
      tools/run_tests/stress_test/configs/asan.json
  3. 2
      tools/run_tests/stress_test/configs/opt.json
  4. 6
      tools/run_tests/stress_test/configs/tsan.json
  5. 18
      tools/run_tests/stress_test/run_on_gke.py

@ -103,23 +103,29 @@ class BigQueryHelper:
return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id, return bq_utils.insert_rows(self.bq, self.project_id, self.dataset_id,
self.qps_table_id, [row]) self.qps_table_id, [row])
def check_if_any_tests_failed(self, num_query_retries=3): def check_if_any_tests_failed(self, num_query_retries=3, timeout_msec=30000):
query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND ' query = ('SELECT event_type FROM %s.%s WHERE run_id = \'%s\' AND '
'event_type="%s"') % (self.dataset_id, self.summary_table_id, 'event_type="%s"') % (self.dataset_id, self.summary_table_id,
self.run_id, EventType.FAILURE) self.run_id, EventType.FAILURE)
page = None
try: try:
query_job = bq_utils.sync_query_job(self.bq, self.project_id, query) query_job = bq_utils.sync_query_job(self.bq, self.project_id, query)
job_id = query_job['jobReference']['jobId']
project_id = query_job['jobReference']['projectId']
page = self.bq.jobs().getQueryResults( page = self.bq.jobs().getQueryResults(
**query_job['jobReference']).execute(num_retries=num_query_retries) projectId=project_id,
jobId=job_id,
timeoutMs=timeout_msec).execute(num_retries=num_query_retries)
if not page['jobComplete']:
print('TIMEOUT ERROR: The query %s timed out. Current timeout value is'
' %d msec. Returning False (i.e assuming there are no failures)'
) % (query, timeoout_msec)
return False
num_failures = int(page['totalRows']) num_failures = int(page['totalRows'])
print 'num rows: ', num_failures print 'num rows: ', num_failures
return num_failures > 0 return num_failures > 0
# TODO (sreek): Cleanup the following lines once we have a better idea of
# why we sometimes get KeyError exceptions in long running test cases
except KeyError:
print 'KeyError in check_if_any_tests_failed()'
print 'Query:', query
print 'Query result page:', page
except: except:
print 'Exception in check_if_any_tests_failed(). Info: ', sys.exc_info() print 'Exception in check_if_any_tests_failed(). Info: ', sys.exc_info()
print 'Query: ', query print 'Query: ', query

@ -11,13 +11,13 @@
"baseTemplates": { "baseTemplates": {
"default": { "default": {
"wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py", "wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py",
"pollIntervalSecs": 60, "pollIntervalSecs": 120,
"clientArgs": { "clientArgs": {
"num_channels_per_server":5, "num_channels_per_server":5,
"num_stubs_per_channel":10, "num_stubs_per_channel":10,
"test_cases": "empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1", "test_cases": "empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1",
"metrics_port": 8081, "metrics_port": 8081,
"metrics_collection_interval_secs":60 "metrics_collection_interval_secs":120
}, },
"metricsPort": 8081, "metricsPort": 8081,
"metricsArgs": { "metricsArgs": {
@ -66,7 +66,7 @@
"stress-client-asan": { "stress-client-asan": {
"clientTemplate": "cxx_client_asan", "clientTemplate": "cxx_client_asan",
"dockerImage": "grpc_stress_cxx_asan", "dockerImage": "grpc_stress_cxx_asan",
"numInstances": 20, "numInstances": 5,
"serverPodSpec": "stress-server-asan" "serverPodSpec": "stress-server-asan"
} }
} }

@ -66,7 +66,7 @@
"stress-client-opt": { "stress-client-opt": {
"clientTemplate": "cxx_client_opt", "clientTemplate": "cxx_client_opt",
"dockerImage": "grpc_stress_cxx_opt", "dockerImage": "grpc_stress_cxx_opt",
"numInstances": 10, "numInstances": 15,
"serverPodSpec": "stress-server-opt" "serverPodSpec": "stress-server-opt"
} }
} }

@ -11,13 +11,13 @@
"baseTemplates": { "baseTemplates": {
"default": { "default": {
"wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py", "wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_client.py",
"pollIntervalSecs": 60, "pollIntervalSecs": 120,
"clientArgs": { "clientArgs": {
"num_channels_per_server":5, "num_channels_per_server":5,
"num_stubs_per_channel":10, "num_stubs_per_channel":10,
"test_cases": "empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1", "test_cases": "empty_unary:1,large_unary:1,client_streaming:1,server_streaming:1,empty_stream:1",
"metrics_port": 8081, "metrics_port": 8081,
"metrics_collection_interval_secs":60 "metrics_collection_interval_secs":120
}, },
"metricsPort": 8081, "metricsPort": 8081,
"metricsArgs": { "metricsArgs": {
@ -66,7 +66,7 @@
"stress-client-tsan": { "stress-client-tsan": {
"clientTemplate": "cxx_client_tsan", "clientTemplate": "cxx_client_tsan",
"dockerImage": "grpc_stress_cxx_tsan", "dockerImage": "grpc_stress_cxx_tsan",
"numInstances": 20, "numInstances": 5,
"serverPodSpec": "stress-server-tsan" "serverPodSpec": "stress-server-tsan"
} }
} }

@ -604,6 +604,17 @@ def run_tests(config):
return is_success return is_success
def tear_down(config):
gke = Gke(config.global_settings.gcp_project_id, '', '',
config.global_settings.summary_table_id,
config.global_settings.qps_table_id,
config.global_settings.kubernetes_proxy_port)
for name, server_pod_spec in config.server_pod_specs_dict.iteritems():
gke.delete_servers(server_pod_spec)
for name, client_pod_spec in config.client_pod_specs_dict.iteritems():
gke.delete_clients(client_pod_spec)
argp = argparse.ArgumentParser( argp = argparse.ArgumentParser(
description='Launch stress tests in GKE', description='Launch stress tests in GKE',
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
@ -614,6 +625,7 @@ argp.add_argument('--config_file',
required=True, required=True,
type=str, type=str,
help='The test config file') help='The test config file')
argp.add_argument('--tear_down', action='store_true', default=False)
if __name__ == '__main__': if __name__ == '__main__':
args = argp.parse_args() args = argp.parse_args()
@ -636,5 +648,11 @@ if __name__ == '__main__':
os.path.dirname(sys.argv[0]), '../../..')) os.path.dirname(sys.argv[0]), '../../..'))
os.chdir(grpc_root) os.chdir(grpc_root)
# Note that tear_down is only in cases where we want to manually tear down a
# test that for some reason run_tests() could not cleanup
if args.tear_down:
tear_down(config)
sys.exit(1)
if not run_tests(config): if not run_tests(config):
sys.exit(1) sys.exit(1)

Loading…
Cancel
Save