[PSM Interop] Add support to enable CSM Observability and a new test case (#34835)

This PR adds CSM Observability testing capability in the PSM Interop testing framework. This PR mostly changes the framework Python code.

This adds a flag `enable_csm_observability` to the client / server deployment yaml file such that, when enabled, we will create a GMP `PodMonitoring` resource and pass the `--enable_csm_observability` to each language's client / server container (for them to actually enable the Prometheus endpoint)

I added a new test under `tests/csm/csm_observability_test.py`. This is basically a copy of the `tests/baseline_test.py` but with the `enable_csm_observability=True`.

Other PRs for this whole thing to work:
- https://github.com/grpc/grpc/pull/34752: The `PodMonitoring` resource yaml template
- https://github.com/grpc/grpc/pull/34832: Support for the `--enable_csm_observability` flag in the C++ client/server image

Closes #34835

COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/34835 from stanley-cheung:csm-o11y-framework-changes 0b3d0eb7ed
PiperOrigin-RevId: 595502496
pull/35445/head
Stanley Cheung 11 months ago committed by Copybara-Service
parent 691a068a07
commit 9e702debfb
  1. 2
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/gamma_server_runner.py
  2. 28
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_base_runner.py
  3. 12
      tools/run_tests/xds_k8s_test_driver/framework/test_app/runners/k8s/k8s_xds_client_runner.py
  4. 3
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/client.deployment.yaml
  5. 4
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/csm/pod-monitoring.yaml
  6. 3
      tools/run_tests/xds_k8s_test_driver/kubernetes-manifests/server.deployment.yaml
  7. 48
      tools/run_tests/xds_k8s_test_driver/tests/gamma/csm_observability_test.py

@ -115,6 +115,7 @@ class GammaServerRunner(KubernetesServerRunner):
log_to_stdout: bool = False,
bootstrap_version: Optional[str] = None,
route_template: str = "gamma/route_http.yaml",
enable_csm_observability: bool = False,
) -> List[XdsTestServer]:
if not maintenance_port:
maintenance_port = self._get_default_maintenance_port(secure_mode)
@ -206,6 +207,7 @@ class GammaServerRunner(KubernetesServerRunner):
bootstrap_version=bootstrap_version,
termination_grace_period_seconds=self.termination_grace_period_seconds,
pre_stop_hook=self.pre_stop_hook,
enable_csm_observability=enable_csm_observability,
)
servers = self._make_servers_for_deployment(

@ -299,6 +299,34 @@ class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta):
logger.info("Reusing namespace: %s", self.k8s_namespace.name)
return self.k8s_namespace.get()
def _create_pod_monitoring(self, template, **kwargs) -> None:
if not kwargs["namespace_name"]:
raise _RunnerError(
"namespace_name required to create PodMonitoring resource"
)
if not kwargs["deployment_id"]:
raise _RunnerError(
"deployment_id required to create PodMonitoring resource"
)
if not kwargs["pod_monitoring_name"]:
raise _RunnerError(
"pod_monitoring_name required to create PodMonitoring resource"
)
pod_monitoring = self._create_from_template(
template, custom_object=True, **kwargs
)
if pod_monitoring.metadata.namespace != kwargs["namespace_name"]:
raise _RunnerError(
"PodMonitoring resource created with unexpected namespace: "
f"{pod_monitoring.metadata.namespace}"
)
logger.debug(
"PodMonitoring %s created at %s",
pod_monitoring.metadata.name,
pod_monitoring.metadata.creation_timestamp,
)
return pod_monitoring
def _create_namespace(self, template, **kwargs) -> k8s.V1Namespace:
namespace = self._create_from_template(template, **kwargs)
if not isinstance(namespace, k8s.V1Namespace):

@ -104,6 +104,7 @@ class KubernetesClientRunner(k8s_base_runner.KubernetesBaseRunner):
generate_mesh_id=False,
print_response=False,
log_to_stdout: bool = False,
enable_csm_observability: bool = False,
) -> XdsTestClient:
logger.info(
(
@ -158,8 +159,19 @@ class KubernetesClientRunner(k8s_base_runner.KubernetesBaseRunner):
config_mesh=config_mesh,
generate_mesh_id=generate_mesh_id,
print_response=print_response,
enable_csm_observability=enable_csm_observability,
)
# Create a PodMonitoring resource if CSM Observability is enabled
# This is GMP (Google Managed Prometheus)
if enable_csm_observability:
self._create_pod_monitoring(
"csm/pod-monitoring.yaml",
namespace_name=self.k8s_namespace.name,
deployment_id=self.deployment_id,
pod_monitoring_name="%s-gmp" % self.deployment_id,
)
# Load test client pod. We need only one client at the moment
pod_name = self._wait_deployment_pod_count(self.deployment)[0]
pod: k8s.V1Pod = self._wait_pod_started(pod_name)

@ -42,6 +42,9 @@ spec:
- "--rpc=${rpc}"
- "--metadata=${metadata}"
- "--print_response=${print_response}"
% if enable_csm_observability:
- "--enable_csm_observability"
% endif
ports:
- containerPort: ${stats_port}
env:

@ -3,7 +3,9 @@ apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
namespace: ${namespace_name}
name: ${deployment_id}-gmp
name: ${pod_monitoring_name}
labels:
owner: xds-k8s-interop-test
spec:
selector:
matchLabels:

@ -40,6 +40,9 @@ spec:
failureThreshold: 1000
args:
- "--port=${test_port}"
% if enable_csm_observability:
- "--enable_csm_observability"
% endif
ports:
- containerPort: ${test_port}
env:

@ -0,0 +1,48 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from absl import flags
from absl.testing import absltest
from framework import xds_gamma_testcase
from framework import xds_k8s_testcase
logger = logging.getLogger(__name__)
flags.adopt_module_key_flags(xds_k8s_testcase)
_XdsTestServer = xds_k8s_testcase.XdsTestServer
_XdsTestClient = xds_k8s_testcase.XdsTestClient
class CsmObservabilityTest(xds_gamma_testcase.GammaXdsKubernetesTestCase):
def test_ping_pong(self):
# TODO(sergiitk): [GAMMA] Consider moving out custom gamma
# resource creation out of self.startTestServers()
with self.subTest("1_run_test_server"):
test_server: _XdsTestServer = self.startTestServers(
enable_csm_observability=True
)[0]
with self.subTest("2_start_test_client"):
test_client: _XdsTestClient = self.startTestClient(
test_server, enable_csm_observability=True
)
with self.subTest("3_test_server_received_rpcs_from_test_client"):
self.assertSuccessfulRpcs(test_client)
if __name__ == "__main__":
absltest.main()
Loading…
Cancel
Save