From 6818c8740ff49432b1b582ea66adb1006a4672c1 Mon Sep 17 00:00:00 2001 From: Xuan Wang Date: Mon, 15 May 2023 17:49:34 -0700 Subject: [PATCH] [PSM Interop] Fail test if client or server pods restarted during test (#33040) Fail test if client or server pods restarted during test. #### Testing Tested locally, test will fail with message similar to: ``` ---------------------------------------------------------------------- Traceback (most recent call last): File "/usr/local/google/home/xuanwn/workspace/xds/grpc/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py", line 501, in tearDown )) AssertionError: 5 != 0 : Server pods unexpectedly restarted {sever_restarts} times during test. ---------------------------------------------------------------------- Ran 1 test in 886.867s ``` --- .../test_app/runners/k8s/k8s_base_runner.py | 11 +++++++ .../framework/xds_k8s_testcase.py | 29 +++++++++++++++++++ .../framework/xds_url_map_testcase.py | 16 ++++++++++ 3 files changed, 56 insertions(+) diff --git a/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py b/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py index 94e6858ea14..fce79512c6c 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py @@ -98,6 +98,17 @@ class KubernetesBaseRunner(base_runner.BaseRunner): self.pod_log_collectors = [] + def get_pod_restarts(self, deployment: k8s.V1Deployment) -> int: + if not self.k8s_namespace or not deployment: + return 0 + total_restart: int = 0 + pods: List[k8s.V1Pod] = self.k8s_namespace.list_deployment_pods( + deployment) + for pod in pods: + total_restart += sum(status.restart_count + for status in pod.status.container_statuses) + return total_restart + @classmethod def _render_template(cls, template_file, **kwargs): template = mako.template.Template(filename=str(template_file)) diff --git a/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py b/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py index 178806e5646..e430644b613 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py @@ -492,6 +492,17 @@ class IsolatedXdsKubernetesTestCase(XdsKubernetesBaseTestCase, def tearDown(self): logger.info('----- TestMethod %s teardown -----', self.id()) + logger.debug('Getting pods restart times') + client_restarts: int = 0 + server_restarts: int = 0 + try: + client_restarts = self.client_runner.get_pod_restarts( + self.client_runner.deployment) + server_restarts = self.server_runner.get_pod_restarts( + self.server_runner.deployment) + except (retryers.RetryError, k8s.NotFound) as e: + logger.exception(e) + retryer = retryers.constant_retryer(wait_fixed=_timedelta(seconds=10), attempts=3, log_level=logging.INFO) @@ -499,6 +510,24 @@ class IsolatedXdsKubernetesTestCase(XdsKubernetesBaseTestCase, retryer(self.cleanup) except retryers.RetryError: logger.exception('Got error during teardown') + finally: + # Fail if any of the pods restarted. + self.assertEqual( + client_restarts, + 0, + msg= + ('Client pods unexpectedly restarted' + f' {client_restarts} times during test.' + ' In most cases, this is caused by the test client app crash.' + )) + self.assertEqual( + server_restarts, + 0, + msg= + ('Server pods unexpectedly restarted' + f' {server_restarts} times during test.' + ' In most cases, this is caused by the test client app crash.' + )) def cleanup(self): self.td.cleanup(force=self.force_cleanup) diff --git a/tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py b/tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py index 591b06bbb07..6c7ab20b9aa 100644 --- a/tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py +++ b/tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py @@ -34,6 +34,7 @@ from framework import xds_k8s_testcase from framework import xds_url_map_test_resources from framework.helpers import retryers from framework.helpers import skips +from framework.infrastructure import k8s from framework.test_app import client_app # Load existing flags @@ -364,6 +365,14 @@ class XdsUrlMapTestCase(absltest.TestCase, metaclass=_MetaXdsUrlMapTestCase): @classmethod def cleanupAfterTests(cls): logging.info('----- TestCase %s teardown -----', cls.__name__) + logging.debug('Getting pods restart times') + client_restarts: int = 0 + try: + client_restarts = cls.test_client_runner.get_pod_restarts( + cls.test_client_runner.deployment) + except (retryers.RetryError, k8s.NotFound) as e: + logging.exception(e) + retryer = retryers.constant_retryer(wait_fixed=_timedelta(seconds=10), attempts=3, log_level=logging.INFO) @@ -378,6 +387,13 @@ class XdsUrlMapTestCase(absltest.TestCase, metaclass=_MetaXdsUrlMapTestCase): retryer(cls._cleanup, cleanup_all) except retryers.RetryError: logging.exception('Got error during teardown') + finally: + # Fail if any of the pods restarted. + error_msg = ( + 'Client pods unexpectedly restarted' + f' {client_restarts} times during test.' + ' In most cases, this is caused by the test client app crash.') + assert client_restarts == 0, error_msg @classmethod def _cleanup(cls, cleanup_all: bool = False):