Add a will_run_forever flag to handle cases where the process terminates with exit code of 0

9 years ago · 5bc112c0c9
parent 9192498a08
commit 5bc112c0c9
4 changed files with 28 additions and 10 deletions
--- a/tools/gcp/stress_test/run_client.py
+++ b/tools/gcp/stress_test/run_client.py
@ -103,6 +103,11 @@ def run_client():
  dataset_id = env['DATASET_ID']
  summary_table_id = env['SUMMARY_TABLE_ID']
  qps_table_id = env['QPS_TABLE_ID']
+  # The following parameter is to inform us whether the stress client runs
+  # forever until forcefully stopped or will it naturally stop after sometime.
+  # This way, we know that the stress client process should not terminate (even
+  # if it does with a success exit code) and flag the termination as a failure
+  will_run_forever = env.get('WILL_RUN_FOREVER', '1')

  bq_helper = BigQueryHelper(run_id, image_type, pod_name, project_id,
                             dataset_id, summary_table_id, qps_table_id)
@ -140,11 +145,12 @@ def run_client():
  while True:
    # Check if stress_client is still running. If so, collect metrics and upload
    # to BigQuery status table
+    # If stress_p.poll() is not None, it means that the stress client terminated
    if stress_p.poll() is not None:
      end_time = datetime.datetime.now().isoformat()
      event_type = EventType.SUCCESS
      details = 'End time: %s' % end_time
-      if stress_p.returncode != 0:
+      if will_run_forever == '1' or stress_p.returncode != 0:
        event_type = EventType.FAILURE
        details = 'Return code = %d. End time: %s' % (stress_p.returncode,
                                                      end_time)
--- a/tools/gcp/stress_test/run_server.py
+++ b/tools/gcp/stress_test/run_server.py
@ -69,6 +69,11 @@ def run_server():
  dataset_id = env['DATASET_ID']
  summary_table_id = env['SUMMARY_TABLE_ID']
  qps_table_id = env['QPS_TABLE_ID']
+  # The following parameter is to inform us whether the server runs forever
+  # until forcefully stopped or will it naturally stop after sometime.
+  # This way, we know that the process should not terminate (even if it does
+  # with a success exit code) and flag any termination as a failure.
+  will_run_forever = env.get('WILL_RUN_FOREVER', '1')

  logfile_name = env.get('LOGFILE_NAME')

@ -106,7 +111,8 @@ def run_server():
                              stderr=subprocess.STDOUT)

  returncode = stress_p.wait()
-  if returncode != 0:
+
+  if will_run_forever == '1' or returncode != 0:
    end_time = datetime.datetime.now().isoformat()
    event_type = EventType.FAILURE
    details = 'Returncode: %d; End time: %s' % (returncode, end_time)
--- a/tools/run_tests/stress_test/configs/java.json
+++ b/tools/run_tests/stress_test/configs/java.json
@ -43,7 +43,8 @@
        "wrapperScriptPath": "/var/local/git/grpc/tools/gcp/stress_test/run_server.py",
        "serverPort": 8080,
        "serverArgs": {
-          "port": 8080
+          "port": 8080,
+		  "use_tls": "false"
        }
      }
    },
@ -70,7 +71,7 @@
      "java-stress-client": {
        "clientTemplate": "java_client",
        "dockerImage": "grpc_stress_java",
-        "numInstances": 15,
+        "numInstances": 10,
        "serverPodSpec": "java-stress-server"
      }
    }
--- a/tools/run_tests/stress_test/run_on_gke.py
+++ b/tools/run_tests/stress_test/run_on_gke.py
@ -69,7 +69,7 @@ class ClientTemplate:

  def __init__(self, name, stress_client_cmd, metrics_client_cmd, metrics_port,
               wrapper_script_path, poll_interval_secs, client_args_dict,
-               metrics_args_dict):
+               metrics_args_dict, will_run_forever):
    self.name = name
    self.stress_client_cmd = stress_client_cmd
    self.metrics_client_cmd = metrics_client_cmd
@ -78,18 +78,20 @@ class ClientTemplate:
    self.poll_interval_secs = poll_interval_secs
    self.client_args_dict = client_args_dict
    self.metrics_args_dict = metrics_args_dict
+    self.will_run_forever = will_run_forever


 class ServerTemplate:
  """ Contains all the common settings used by a stress server """

  def __init__(self, name, server_cmd, wrapper_script_path, server_port,
-               server_args_dict):
+               server_args_dict, will_run_forever):
    self.name = name
    self.server_cmd = server_cmd
    self.wrapper_script_path = wrapper_script_path
    self.server_port = server_port
    self.server_args_dict = server_args_dict
+    self.will_run_forever = will_run_forever


 class DockerImage:
@ -242,7 +244,8 @@ class Gke:
        'STRESS_TEST_IMAGE_TYPE': 'SERVER',
        'STRESS_TEST_CMD': server_pod_spec.template.server_cmd,
        'STRESS_TEST_ARGS_STR': self._args_dict_to_str(
-            server_pod_spec.template.server_args_dict)
+            server_pod_spec.template.server_args_dict),
+        'WILL_RUN_FOREVER': str(server_pod_spec.template.will_run_forever)
    })

    for pod_name in server_pod_spec.pod_names():
@ -288,7 +291,8 @@ class Gke:
        'METRICS_CLIENT_CMD': client_pod_spec.template.metrics_client_cmd,
        'METRICS_CLIENT_ARGS_STR': self._args_dict_to_str(
            client_pod_spec.template.metrics_args_dict),
-        'POLL_INTERVAL_SECS': str(client_pod_spec.template.poll_interval_secs)
+        'POLL_INTERVAL_SECS': str(client_pod_spec.template.poll_interval_secs),
+        'WILL_RUN_FOREVER': str(client_pod_spec.template.will_run_forever)
    })

    for pod_name in client_pod_spec.pod_names():
@ -421,7 +425,7 @@ class Config:
          template_name, stress_client_cmd, metrics_client_cmd,
          temp_dict['metricsPort'], temp_dict['wrapperScriptPath'],
          temp_dict['pollIntervalSecs'], temp_dict['clientArgs'].copy(),
-          temp_dict['metricsArgs'].copy())
+          temp_dict['metricsArgs'].copy(), temp_dict.get('willRunForever', 1))

    return client_templates_dict

@ -456,7 +460,8 @@ class Config:
      stress_server_cmd = ' '.join(temp_dict['stressServerCmd'])
      server_templates_dict[template_name] = ServerTemplate(
          template_name, stress_server_cmd, temp_dict['wrapperScriptPath'],
-          temp_dict['serverPort'], temp_dict['serverArgs'].copy())
+          temp_dict['serverPort'], temp_dict['serverArgs'].copy(),
+          temp_dict.get('willRunForever', 1))

    return server_templates_dict