[PSM Interop] Fail test if client or server pods restarted during test (#33040)

Fail test if client or server pods restarted during test.

#### Testing
Tested locally, test will fail with message similar to:
```
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/usr/local/google/home/xuanwn/workspace/xds/grpc/tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py", line 501, in tearDown
    ))
AssertionError: 5 != 0 : Server pods unexpectedly restarted {sever_restarts} times during test.

----------------------------------------------------------------------
Ran 1 test in 886.867s
```
pull/33137/head
Xuan Wang 2 years ago committed by GitHub
parent de295eb424
commit 6818c8740f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py
  2. 29
      tools/run_tests/xds_k8s_test_driver/framework/xds_k8s_testcase.py
  3. 16
      tools/run_tests/xds_k8s_test_driver/framework/xds_url_map_testcase.py

@ -98,6 +98,17 @@ class KubernetesBaseRunner(base_runner.BaseRunner):
self.pod_log_collectors = []
def get_pod_restarts(self, deployment: k8s.V1Deployment) -> int:
if not self.k8s_namespace or not deployment:
return 0
total_restart: int = 0
pods: List[k8s.V1Pod] = self.k8s_namespace.list_deployment_pods(
deployment)
for pod in pods:
total_restart += sum(status.restart_count
for status in pod.status.container_statuses)
return total_restart
@classmethod
def _render_template(cls, template_file, **kwargs):
template = mako.template.Template(filename=str(template_file))

@ -492,6 +492,17 @@ class IsolatedXdsKubernetesTestCase(XdsKubernetesBaseTestCase,
def tearDown(self):
logger.info('----- TestMethod %s teardown -----', self.id())
logger.debug('Getting pods restart times')
client_restarts: int = 0
server_restarts: int = 0
try:
client_restarts = self.client_runner.get_pod_restarts(
self.client_runner.deployment)
server_restarts = self.server_runner.get_pod_restarts(
self.server_runner.deployment)
except (retryers.RetryError, k8s.NotFound) as e:
logger.exception(e)
retryer = retryers.constant_retryer(wait_fixed=_timedelta(seconds=10),
attempts=3,
log_level=logging.INFO)
@ -499,6 +510,24 @@ class IsolatedXdsKubernetesTestCase(XdsKubernetesBaseTestCase,
retryer(self.cleanup)
except retryers.RetryError:
logger.exception('Got error during teardown')
finally:
# Fail if any of the pods restarted.
self.assertEqual(
client_restarts,
0,
msg=
('Client pods unexpectedly restarted'
f' {client_restarts} times during test.'
' In most cases, this is caused by the test client app crash.'
))
self.assertEqual(
server_restarts,
0,
msg=
('Server pods unexpectedly restarted'
f' {server_restarts} times during test.'
' In most cases, this is caused by the test client app crash.'
))
def cleanup(self):
self.td.cleanup(force=self.force_cleanup)

@ -34,6 +34,7 @@ from framework import xds_k8s_testcase
from framework import xds_url_map_test_resources
from framework.helpers import retryers
from framework.helpers import skips
from framework.infrastructure import k8s
from framework.test_app import client_app
# Load existing flags
@ -364,6 +365,14 @@ class XdsUrlMapTestCase(absltest.TestCase, metaclass=_MetaXdsUrlMapTestCase):
@classmethod
def cleanupAfterTests(cls):
logging.info('----- TestCase %s teardown -----', cls.__name__)
logging.debug('Getting pods restart times')
client_restarts: int = 0
try:
client_restarts = cls.test_client_runner.get_pod_restarts(
cls.test_client_runner.deployment)
except (retryers.RetryError, k8s.NotFound) as e:
logging.exception(e)
retryer = retryers.constant_retryer(wait_fixed=_timedelta(seconds=10),
attempts=3,
log_level=logging.INFO)
@ -378,6 +387,13 @@ class XdsUrlMapTestCase(absltest.TestCase, metaclass=_MetaXdsUrlMapTestCase):
retryer(cls._cleanup, cleanup_all)
except retryers.RetryError:
logging.exception('Got error during teardown')
finally:
# Fail if any of the pods restarted.
error_msg = (
'Client pods unexpectedly restarted'
f' {client_restarts} times during test.'
' In most cases, this is caused by the test client app crash.')
assert client_restarts == 0, error_msg
@classmethod
def _cleanup(cls, cleanup_all: bool = False):

Loading…
Cancel
Save