[Python O11y] Implement CSM observability for Python (#36557)

Implement Python CSM observability.

Design: [go/grpc-python-opentelemetry](http://goto.google.com/grpc-python-opentelemetry)
<!--

If you know who should review your pull request, please assign it to that
person, otherwise the pull request would get assigned randomly.

If your pull request is for a specific language, please add the appropriate
lang label.

-->

Closes #36557

PiperOrigin-RevId: 639073741
pull/36714/head^2
Xuan Wang 6 months ago committed by Copybara-Service
parent 7ccb51e2ea
commit f3220d08d2
  1. 7
      requirements.bazel.txt
  2. 10
      src/python/grpcio/grpc/_cython/_cygrpc/observability.pyx.pxi
  3. 54
      src/python/grpcio/grpc/_observability.py
  4. 13
      src/python/grpcio/grpc/_server.py
  5. 4
      src/python/grpcio_csm_observability/MANIFEST.in
  6. 5
      src/python/grpcio_csm_observability/README.rst
  7. 36
      src/python/grpcio_csm_observability/grpc_csm_observability/BUILD.bazel
  8. 18
      src/python/grpcio_csm_observability/grpc_csm_observability/__init__.py
  9. 343
      src/python/grpcio_csm_observability/grpc_csm_observability/_csm_observability_plugin.py
  10. 17
      src/python/grpcio_csm_observability/grpc_version.py
  11. 63
      src/python/grpcio_csm_observability/setup.py
  12. 2
      src/python/grpcio_observability/grpc_observability/BUILD.bazel
  13. 7
      src/python/grpcio_observability/grpc_observability/_cyobservability.pxd
  14. 88
      src/python/grpcio_observability/grpc_observability/_cyobservability.pyx
  15. 167
      src/python/grpcio_observability/grpc_observability/_gcp_observability.py
  16. 23
      src/python/grpcio_observability/grpc_observability/_observability.py
  17. 3
      src/python/grpcio_observability/grpc_observability/_open_telemetry_measures.py
  18. 239
      src/python/grpcio_observability/grpc_observability/_open_telemetry_observability.py
  19. 108
      src/python/grpcio_observability/grpc_observability/_open_telemetry_plugin.py
  20. 85
      src/python/grpcio_observability/grpc_observability/client_call_tracer.cc
  21. 29
      src/python/grpcio_observability/grpc_observability/client_call_tracer.h
  22. 4
      src/python/grpcio_observability/grpc_observability/constants.h
  23. 115
      src/python/grpcio_observability/grpc_observability/metadata_exchange.cc
  24. 63
      src/python/grpcio_observability/grpc_observability/metadata_exchange.h
  25. 26
      src/python/grpcio_observability/grpc_observability/observability_util.cc
  26. 25
      src/python/grpcio_observability/grpc_observability/observability_util.h
  27. 1
      src/python/grpcio_observability/grpc_observability/python_observability_context.h
  28. 250
      src/python/grpcio_observability/grpc_observability/server_call_tracer.cc
  29. 84
      src/python/grpcio_observability/grpc_observability/server_call_tracer.h
  30. 4
      src/python/grpcio_observability/make_grpcio_observability.py
  31. 1
      src/python/grpcio_observability/observability_lib_deps.py
  32. 5
      src/python/grpcio_observability/setup.py
  33. 39
      src/python/grpcio_tests/tests/observability/BUILD.bazel
  34. 652
      src/python/grpcio_tests/tests/observability/_csm_observability_plugin_test.py
  35. 25
      src/python/grpcio_tests/tests/observability/_from_csm_observability_import_star.py
  36. 11
      src/python/grpcio_tests/tests/observability/_observability_api_test.py
  37. 363
      src/python/grpcio_tests/tests/observability/_observability_plugin_test.py
  38. 77
      src/python/grpcio_tests/tests/observability/_open_telemetry_observability_test.py
  39. 1
      src/python/grpcio_tests/tests/observability/_test_server.py
  40. 3
      src/python/grpcio_tests/tests/tests.json
  41. 19
      templates/src/python/grpcio_csm_observability/grpc_version.py.template
  42. 2
      tools/distrib/install_all_python_modules.sh
  43. 1
      tools/distrib/pylint_code.sh
  44. 8
      tools/run_tests/artifacts/build_artifact_python.sh
  45. 1
      tools/run_tests/helper_scripts/build_python.sh

@ -26,9 +26,12 @@ pyasn1==0.5.0
rsa==4.9
greenlet==1.1.3.post0
zope.interface==6.1
opentelemetry-sdk==1.21.0
opentelemetry-api==1.21.0
opentelemetry-sdk==1.24.0
opentelemetry-api==1.24.0
importlib-metadata==6.11.0
opentelemetry-resourcedetector-gcp==1.6.0a0
opentelemetry-exporter-prometheus==0.45b0
prometheus_client==0.20.0
Deprecated==1.2.14
opentelemetry-semantic-conventions==0.42b0
typing-extensions==4.9.0

@ -23,11 +23,13 @@ cdef const char* CLIENT_CALL_TRACER = "client_call_tracer"
cdef const char* SERVER_CALL_TRACER_FACTORY = "server_call_tracer_factory"
def set_server_call_tracer_factory(object observability_plugin) -> None:
capsule = observability_plugin.create_server_call_tracer_factory()
def get_server_call_tracer_factory_address(object observability_plugin, bint xds) -> Optional[int]:
capsule = observability_plugin.create_server_call_tracer_factory(xds=xds)
if capsule:
capsule_ptr = cpython.PyCapsule_GetPointer(capsule, SERVER_CALL_TRACER_FACTORY)
_register_server_call_tracer_factory(capsule_ptr)
return int(<uintptr_t>capsule_ptr)
else:
return None
def clear_server_call_tracer_factory() -> None:
_register_server_call_tracer_factory(NULL)

@ -21,6 +21,7 @@ import threading
from typing import Any, Generator, Generic, List, Optional, TypeVar
from grpc._cython import cygrpc as _cygrpc
from grpc._typing import ChannelArgumentType
_LOGGER = logging.getLogger(__name__)
@ -36,6 +37,20 @@ _SERVICES_TO_EXCLUDE: List[bytes] = [
]
class ServerCallTracerFactory:
"""An encapsulation of a ServerCallTracerFactory.
Instances of this class can be passed to a Channel as values for the
grpc.experimental.server_call_tracer_factory option
"""
def __init__(self, address):
self._address = address
def __int__(self):
return self._address
class ObservabilityPlugin(
Generic[ClientCallTracerCapsule, ServerCallTracerFactoryCapsule],
metaclass=abc.ABCMeta,
@ -126,19 +141,23 @@ class ObservabilityPlugin(
@abc.abstractmethod
def create_server_call_tracer_factory(
self,
) -> ServerCallTracerFactoryCapsule:
*,
xds: bool = False,
) -> Optional[ServerCallTracerFactoryCapsule]:
"""Creates a ServerCallTracerFactoryCapsule.
After register the plugin, if tracing or stats is enabled, this method
will be called by calling observability_init, the ServerCallTracerFactory
created by this method will be registered to gRPC core.
This method will be called at server initialization time to create a
ServerCallTracerFactory, which will be registered to gRPC core.
The ServerCallTracerFactory is an object which implements
`grpc_core::ServerCallTracerFactory` interface and wrapped in a PyCapsule
using `server_call_tracer_factory` as name.
Args:
xds: Whether the server is xds server.
Returns:
A PyCapsule which stores a ServerCallTracerFactory object.
A PyCapsule which stores a ServerCallTracerFactory object. Or None if
plugin decides not to create ServerCallTracerFactory.
"""
raise NotImplementedError()
@ -244,10 +263,6 @@ def observability_init(observability_plugin: ObservabilityPlugin) -> None:
time of calling this method.
"""
set_plugin(observability_plugin)
try:
_cygrpc.set_server_call_tracer_factory(observability_plugin)
except Exception: # pylint:disable=broad-except
_LOGGER.exception("Failed to set server call tracer factory!")
def observability_deinit() -> None:
@ -291,10 +306,27 @@ def maybe_record_rpc_latency(state: "_channel._RPCState") -> None:
if exclude_prefix in state.method.encode("utf8"):
return
with get_plugin() as plugin:
if not (plugin and plugin.stats_enabled):
return
if plugin and plugin.stats_enabled:
rpc_latency_s = state.rpc_end_time - state.rpc_start_time
rpc_latency_ms = rpc_latency_s * 1000
plugin.record_rpc_latency(
state.method, state.target, rpc_latency_ms, state.code
)
def create_server_call_tracer_factory_option(xds: bool) -> ChannelArgumentType:
with get_plugin() as plugin:
if plugin and plugin.stats_enabled:
server_call_tracer_factory_address = (
_cygrpc.get_server_call_tracer_factory_address(plugin, xds)
)
if server_call_tracer_factory_address:
return (
(
"grpc.experimental.server_call_tracer_factory",
ServerCallTracerFactory(
server_call_tracer_factory_address
),
),
)
return ()

@ -43,6 +43,7 @@ import grpc # pytype: disable=pyi-error
from grpc import _common # pytype: disable=pyi-error
from grpc import _compression # pytype: disable=pyi-error
from grpc import _interceptor # pytype: disable=pyi-error
from grpc import _observability # pytype: disable=pyi-error
from grpc._cython import cygrpc
from grpc._typing import ArityAgnosticMethodHandler
from grpc._typing import ChannelArgumentType
@ -1403,9 +1404,17 @@ def _validate_generic_rpc_handlers(
def _augment_options(
base_options: Sequence[ChannelArgumentType],
compression: Optional[grpc.Compression],
xds: bool,
) -> Sequence[ChannelArgumentType]:
compression_option = _compression.create_channel_option(compression)
return tuple(base_options) + compression_option
maybe_server_call_tracer_factory_option = (
_observability.create_server_call_tracer_factory_option(xds)
)
return (
tuple(base_options)
+ compression_option
+ maybe_server_call_tracer_factory_option
)
class _Server(grpc.Server):
@ -1423,7 +1432,7 @@ class _Server(grpc.Server):
xds: bool,
):
completion_queue = cygrpc.CompletionQueue()
server = cygrpc.Server(_augment_options(options, compression), xds)
server = cygrpc.Server(_augment_options(options, compression, xds), xds)
server.register_completion_queue(completion_queue)
self._state = _ServerState(
completion_queue,

@ -0,0 +1,4 @@
graft src/python/grpcio_csm_observability/grpc_csm_observability.egg-info
graft grpc_csm_observability
include grpc_version.py
include README.rst

@ -0,0 +1,5 @@
gRPC Python CSM Observability
=========================
Package for gRPC Python CSM Observability.
TODO(xuanwn): Add more content.

@ -0,0 +1,36 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("@grpc_python_dependencies//:requirements.bzl", "requirement")
package(default_visibility = ["//:__subpackages__"])
# Since packages in requirement() are non-hermetic,
# csm_observability is for internal use only.
py_library(
name = "csm_observability",
srcs = glob(["*.py"]),
imports = [
".",
"../",
],
srcs_version = "PY3ONLY",
deps = [
requirement("opentelemetry-resourcedetector-gcp"),
requirement("opentelemetry-sdk"),
"//src/python/grpcio/grpc:grpcio",
"//src/python/grpcio_observability/grpc_observability:pyobservability",
"@com_google_protobuf//:protobuf_python",
],
)

@ -0,0 +1,18 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from grpc_csm_observability._csm_observability_plugin import (
CsmOpenTelemetryPlugin,
)
__all__ = ("CsmOpenTelemetryPlugin",)

@ -0,0 +1,343 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import re
from typing import AnyStr, Callable, Dict, Iterable, List, Optional, Union
from google.protobuf import struct_pb2
from grpc_observability._observability import OptionalLabelType
from grpc_observability._open_telemetry_plugin import OpenTelemetryLabelInjector
from grpc_observability._open_telemetry_plugin import OpenTelemetryPlugin
from grpc_observability._open_telemetry_plugin import OpenTelemetryPluginOption
# pytype: disable=pyi-error
from opentelemetry.metrics import MeterProvider
from opentelemetry.resourcedetector.gcp_resource_detector import (
GoogleCloudResourceDetector,
)
from opentelemetry.sdk.resources import Resource
from opentelemetry.semconv.resource import ResourceAttributes
TRAFFIC_DIRECTOR_AUTHORITY = "traffic-director-global.xds.googleapis.com"
UNKNOWN_VALUE = "unknown"
TYPE_GCE = "gcp_compute_engine"
TYPE_GKE = "gcp_kubernetes_engine"
MESH_ID_PREFIX = "mesh:"
METADATA_EXCHANGE_KEY_FIXED_MAP = {
"type": "csm.remote_workload_type",
"canonical_service": "csm.remote_workload_canonical_service",
}
METADATA_EXCHANGE_KEY_GKE_MAP = {
"workload_name": "csm.remote_workload_name",
"namespace_name": "csm.remote_workload_namespace_name",
"cluster_name": "csm.remote_workload_cluster_name",
"location": "csm.remote_workload_location",
"project_id": "csm.remote_workload_project_id",
}
METADATA_EXCHANGE_KEY_GCE_MAP = {
"workload_name": "csm.remote_workload_name",
"location": "csm.remote_workload_location",
"project_id": "csm.remote_workload_project_id",
}
class CSMOpenTelemetryLabelInjector(OpenTelemetryLabelInjector):
"""
An implementation of OpenTelemetryLabelInjector for CSM.
This injector will fetch labels from GCP resource detector and
environment, it's also responsible for serialize and deserialize
metadata exchange labels.
"""
_exchange_labels: Dict[str, AnyStr]
_additional_exchange_labels: Dict[str, str]
def __init__(self):
fields = {}
self._exchange_labels = {}
self._additional_exchange_labels = {}
# Labels from environment
canonical_service_value = os.getenv(
"CSM_CANONICAL_SERVICE_NAME", UNKNOWN_VALUE
)
workload_name_value = os.getenv("CSM_WORKLOAD_NAME", UNKNOWN_VALUE)
gcp_resource = GoogleCloudResourceDetector().detect()
resource_type_value = get_resource_type(gcp_resource)
namespace_value = get_str_value_from_resource(
ResourceAttributes.K8S_NAMESPACE_NAME, gcp_resource
)
cluster_name_value = get_str_value_from_resource(
ResourceAttributes.K8S_CLUSTER_NAME, gcp_resource
)
# ResourceAttributes.CLOUD_AVAILABILITY_ZONE are called
# "zones" on Google Cloud.
location_value = get_str_value_from_resource("cloud.zone", gcp_resource)
if UNKNOWN_VALUE == location_value:
location_value = get_str_value_from_resource(
ResourceAttributes.CLOUD_REGION, gcp_resource
)
project_id_value = get_str_value_from_resource(
ResourceAttributes.CLOUD_ACCOUNT_ID, gcp_resource
)
fields["type"] = struct_pb2.Value(string_value=resource_type_value)
fields["canonical_service"] = struct_pb2.Value(
string_value=canonical_service_value
)
if resource_type_value == TYPE_GKE:
fields["workload_name"] = struct_pb2.Value(
string_value=workload_name_value
)
fields["namespace_name"] = struct_pb2.Value(
string_value=namespace_value
)
fields["cluster_name"] = struct_pb2.Value(
string_value=cluster_name_value
)
fields["location"] = struct_pb2.Value(string_value=location_value)
fields["project_id"] = struct_pb2.Value(
string_value=project_id_value
)
elif resource_type_value == TYPE_GCE:
fields["workload_name"] = struct_pb2.Value(
string_value=workload_name_value
)
fields["location"] = struct_pb2.Value(string_value=location_value)
fields["project_id"] = struct_pb2.Value(
string_value=project_id_value
)
serialized_struct = struct_pb2.Struct(fields=fields)
serialized_str = serialized_struct.SerializeToString()
self._exchange_labels = {"XEnvoyPeerMetadata": serialized_str}
self._additional_exchange_labels[
"csm.workload_canonical_service"
] = canonical_service_value
self._additional_exchange_labels["csm.mesh_id"] = get_mesh_id()
def get_labels_for_exchange(self) -> Dict[str, AnyStr]:
return self._exchange_labels
def get_additional_labels(
self, include_exchange_labels: bool
) -> Dict[str, str]:
if include_exchange_labels:
return self._additional_exchange_labels
else:
return {}
@staticmethod
def deserialize_labels(labels: Dict[str, AnyStr]) -> Dict[str, AnyStr]:
deserialized_labels = {}
for key, value in labels.items():
if "XEnvoyPeerMetadata" == key:
pb_struct = struct_pb2.Struct()
pb_struct.ParseFromString(value)
remote_type = get_value_from_struct("type", pb_struct)
for (
local_key,
remote_key,
) in METADATA_EXCHANGE_KEY_FIXED_MAP.items():
deserialized_labels[remote_key] = get_value_from_struct(
local_key, pb_struct
)
if remote_type == TYPE_GKE:
for (
local_key,
remote_key,
) in METADATA_EXCHANGE_KEY_GKE_MAP.items():
deserialized_labels[remote_key] = get_value_from_struct(
local_key, pb_struct
)
elif remote_type == TYPE_GCE:
for (
local_key,
remote_key,
) in METADATA_EXCHANGE_KEY_GCE_MAP.items():
deserialized_labels[remote_key] = get_value_from_struct(
local_key, pb_struct
)
# If CSM label injector is enabled on server side but client didn't send
# XEnvoyPeerMetadata, we'll record remote label as unknown.
else:
for _, remote_key in METADATA_EXCHANGE_KEY_FIXED_MAP.items():
deserialized_labels[remote_key] = UNKNOWN_VALUE
deserialized_labels[key] = value
return deserialized_labels
class CsmOpenTelemetryPluginOption(OpenTelemetryPluginOption):
"""
An implementation of OpenTelemetryPlugin for CSM.
"""
_label_injector: CSMOpenTelemetryLabelInjector
def __init__(self):
self._label_injector = CSMOpenTelemetryLabelInjector()
@staticmethod
def is_active_on_client_channel(target: str) -> bool:
"""Determines whether this plugin option is active on a channel based on target.
Args:
target: Required. The target for the RPC.
Returns:
True if this this plugin option is active on the channel, false otherwise.
"""
# CSM channels should have an "xds" scheme
if not target.startswith("xds:"):
return False
# If scheme is correct, the authority should be TD if exist
authority_pattern = r"^xds:\/\/([^/]+)"
match = re.search(authority_pattern, target)
if match:
return TRAFFIC_DIRECTOR_AUTHORITY in match.group(1)
else:
# Return True if the authority doesn't exist
return True
@staticmethod
def is_active_on_server(
xds: bool, # pylint: disable=unused-argument
) -> bool:
"""Determines whether this plugin option is active on a given server.
Since servers don't need to be xds enabled to work as part of a service
mesh, we're returning True and enable this PluginOption for all servers.
Note: This always returns true because server can be part of the mesh even
if it's not xds-enabled. And we want CSM labels for those servers too.
Args:
xds: Required. if this server is build for xds.
Returns:
True if this this plugin option is active on the server, false otherwise.
"""
return True
def get_label_injector(self) -> OpenTelemetryLabelInjector:
return self._label_injector
# pylint: disable=no-self-use
class CsmOpenTelemetryPlugin(OpenTelemetryPlugin):
"""Describes a Plugin for CSM OpenTelemetry observability.
This is class is part of an EXPERIMENTAL API.
"""
plugin_options: Iterable[OpenTelemetryPluginOption]
meter_provider: Optional[MeterProvider]
generic_method_attribute_filter: Callable[[str], bool]
def __init__(
self,
*,
plugin_options: Iterable[OpenTelemetryPluginOption] = [],
meter_provider: Optional[MeterProvider] = None,
generic_method_attribute_filter: Optional[Callable[[str], bool]] = None,
):
new_options = list(plugin_options) + [CsmOpenTelemetryPluginOption()]
super().__init__(
plugin_options=new_options,
meter_provider=meter_provider,
generic_method_attribute_filter=generic_method_attribute_filter,
)
def _get_enabled_optional_labels(self) -> List[OptionalLabelType]:
return [OptionalLabelType.XDS_SERVICE_LABELS]
def get_value_from_struct(key: str, struct: struct_pb2.Struct) -> str:
value = struct.fields.get(key)
if not value:
return UNKNOWN_VALUE
return value.string_value
def get_str_value_from_resource(
attribute: Union[ResourceAttributes, str], resource: Resource
) -> str:
value = resource.attributes.get(attribute, UNKNOWN_VALUE)
return str(value)
# pylint: disable=line-too-long
def get_resource_type(gcp_resource: Resource) -> str:
# Convert resource type from GoogleCloudResourceDetector to the value we used for
# metadata exchange.
# Reference: https://github.com/GoogleCloudPlatform/opentelemetry-operations-python/blob/cc61f23a5ff2f16f4aa2c38d07e55153828849cc/opentelemetry-resourcedetector-gcp/src/opentelemetry/resourcedetector/gcp_resource_detector/__init__.py#L96
gcp_resource_type = get_str_value_from_resource(
"gcp.resource_type", gcp_resource
)
if gcp_resource_type == "gke_container":
return TYPE_GKE
elif gcp_resource_type == "gce_instance":
return TYPE_GCE
else:
return gcp_resource_type
# Returns the mesh ID by reading and parsing the bootstrap file. Returns "unknown"
# if for some reason, mesh ID could not be figured out.
def get_mesh_id() -> str:
config_contents = get_bootstrap_config_contents()
try:
config_json = json.loads(config_contents)
# The expected format of the Node ID is -
# projects/[GCP Project number]/networks/mesh:[Mesh ID]/nodes/[UUID]
node_id_parts = config_json.get("node", {}).get("id", "").split("/")
if len(node_id_parts) == 6 and node_id_parts[3].startswith(
MESH_ID_PREFIX
):
return node_id_parts[3][len(MESH_ID_PREFIX) :]
except json.decoder.JSONDecodeError:
return UNKNOWN_VALUE
return UNKNOWN_VALUE
def get_bootstrap_config_contents() -> str:
"""Get the contents of the bootstrap config from environment variable or file.
Returns:
The content from environment variable. Or empty str if no config was found.
"""
contents_str = ""
for source in ("GRPC_XDS_BOOTSTRAP", "GRPC_XDS_BOOTSTRAP_CONFIG"):
config = os.getenv(source)
if config:
if os.path.isfile(config): # Prioritize file over raw config
with open(config, "r") as f:
contents_str = f.read()
else:
contents_str = config
return contents_str

@ -0,0 +1,17 @@
# Copyright 2024 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# AUTO-GENERATED FROM `$REPO_ROOT/templates/src/python/grpcio_csm_observability/grpc_version.py.template`!!!
VERSION = '1.65.0.dev0'

@ -0,0 +1,63 @@
# Copyright 2024 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import setuptools
_PACKAGE_PATH = os.path.realpath(os.path.dirname(__file__))
_README_PATH = os.path.join(_PACKAGE_PATH, "README.rst")
# Ensure we're in the proper directory whether or not we're being used by pip.
os.chdir(os.path.dirname(os.path.abspath(__file__)))
import grpc_version
CLASSIFIERS = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
]
PACKAGE_DIRECTORIES = {
"": ".",
}
INSTALL_REQUIRES = (
"opentelemetry-sdk>=1.24.0",
"opentelemetry-resourcedetector-gcp>=1.6.0a0",
"grpcio=={version}".format(version=grpc_version.VERSION),
"protobuf>=5.26.1,<6.0dev",
)
setuptools.setup(
name="grpcio-csm-observability",
version=grpc_version.VERSION,
description="gRPC Python CSM observability package",
long_description=open(_README_PATH, "r").read(),
author="The gRPC Authors",
author_email="grpc-io@googlegroups.com",
url="https://grpc.io",
project_urls={
"Source Code": "https://github.com/grpc/grpc/tree/master/src/python/grpcio_csm_observability",
"Bug Tracker": "https://github.com/grpc/grpc/issues",
},
license="Apache License 2.0",
classifiers=CLASSIFIERS,
package_dir=PACKAGE_DIRECTORIES,
packages=setuptools.find_packages("."),
python_requires=">=3.8",
install_requires=INSTALL_REQUIRES,
)

@ -21,6 +21,7 @@ cc_library(
name = "observability",
srcs = [
"client_call_tracer.cc",
"metadata_exchange.cc",
"observability_util.cc",
"python_observability_context.cc",
"rpc_encoding.cc",
@ -30,6 +31,7 @@ cc_library(
hdrs = [
"client_call_tracer.h",
"constants.h",
"metadata_exchange.h",
"observability_util.h",
"python_observability_context.h",
"rpc_encoding.h",

@ -59,6 +59,7 @@ cdef extern from "python_observability_context.h" namespace "grpc_observability"
MeasurementType type
MeasurementValue value
bint registered_method
bint include_exchange_labels
ctypedef struct SpanCensusData:
string name
@ -80,8 +81,11 @@ cdef extern from "observability_util.h" namespace "grpc_observability":
const char* target,
const char* trace_id,
const char* parent_span_id,
const char* identifier,
const vector[Label] exchange_labels,
bint add_csm_optional_labels,
bint registered_method) except +
cdef void* CreateServerCallTracerFactory() except +
cdef void* CreateServerCallTracerFactory(const vector[Label] exchange_labels, const char* identifier) except +
cdef queue[NativeCensusData]* g_census_data_buffer
cdef void AwaitNextBatchLocked(unique_lock[mutex]&, int) nogil
cdef bint PythonCensusStatsEnabled() nogil
@ -91,6 +95,7 @@ cdef extern from "observability_util.h" namespace "grpc_observability":
cppclass NativeCensusData "::grpc_observability::CensusData":
DataType type
string identifier
Measurement measurement_data
SpanCensusData span_data
vector[Label] labels

@ -20,7 +20,7 @@ import functools
import logging
import os
from threading import Thread
from typing import List, Mapping, Tuple, Union
from typing import AnyStr, Dict, List, Mapping, Tuple, Union
from grpc_observability import _observability
@ -33,6 +33,8 @@ cdef const char* SERVER_CALL_TRACER_FACTORY = "server_call_tracer_factory"
cdef bint GLOBAL_SHUTDOWN_EXPORT_THREAD = False
cdef object GLOBAL_EXPORT_THREAD
PLUGIN_IDENTIFIER_SEP = ","
_LOGGER = logging.getLogger(__name__)
@ -114,10 +116,9 @@ def activate_config(object py_config) -> None:
def activate_stats() -> None:
EnablePythonCensusStats(True);
def create_client_call_tracer(bytes method_name, bytes target,
bytes trace_id, bint registered_method,
bytes parent_span_id=b'') -> cpython.PyObject:
def create_client_call_tracer(bytes method_name, bytes target, bytes trace_id, str identifier,
dict exchange_labels, object enabled_optional_labels,
bint registered_method, bytes parent_span_id=b'') -> cpython.PyObject:
"""Create a ClientCallTracer and save to PyCapsule.
Returns: A grpc_observability._observability.ClientCallTracerCapsule object.
@ -126,18 +127,30 @@ def create_client_call_tracer(bytes method_name, bytes target,
cdef char* c_target = cpython.PyBytes_AsString(target)
cdef char* c_trace_id = cpython.PyBytes_AsString(trace_id)
cdef char* c_parent_span_id = cpython.PyBytes_AsString(parent_span_id)
cdef void* call_tracer = CreateClientCallTracer(c_method, c_target, c_trace_id, c_parent_span_id, registered_method)
identifier_bytes = _encode(identifier)
cdef char* c_identifier = cpython.PyBytes_AsString(identifier_bytes)
cdef vector[Label] c_labels = _labels_to_c_labels(exchange_labels)
cdef bint add_csm_optional_labels = False
for label_type in enabled_optional_labels:
if label_type == _observability.OptionalLabelType.XDS_SERVICE_LABELS:
add_csm_optional_labels = True
cdef void* call_tracer = CreateClientCallTracer(c_method, c_target, c_trace_id, c_parent_span_id,
c_identifier, c_labels, add_csm_optional_labels,
registered_method)
capsule = cpython.PyCapsule_New(call_tracer, CLIENT_CALL_TRACER, NULL)
return capsule
def create_server_call_tracer_factory_capsule() -> cpython.PyObject:
def create_server_call_tracer_factory_capsule(dict exchange_labels, str identifier) -> cpython.PyObject:
"""Create a ServerCallTracerFactory and save to PyCapsule.
Returns: A grpc_observability._observability.ServerCallTracerFactoryCapsule object.
"""
cdef void* call_tracer_factory = CreateServerCallTracerFactory()
cdef vector[Label] c_labels = _labels_to_c_labels(exchange_labels)
cdef char* c_identifier = cpython.PyBytes_AsString(_encode(identifier))
cdef void* call_tracer_factory = CreateServerCallTracerFactory(c_labels, c_identifier)
capsule = cpython.PyCapsule_New(call_tracer_factory, SERVER_CALL_TRACER_FACTORY, NULL)
return capsule
@ -151,13 +164,25 @@ def delete_client_call_tracer(object client_call_tracer) -> None:
del call_tracer_ptr
def _c_label_to_labels(vector[Label] c_labels) -> Mapping[str, str]:
def _c_label_to_labels(vector[Label] c_labels) -> Dict[str, AnyStr]:
py_labels = {}
for label in c_labels:
py_labels[_decode(label.key)] = _decode(label.value)
py_labels[_decode(label.key)] = label.value
return py_labels
def _labels_to_c_labels(dict py_labels) -> vector[Label]:
cdef vector[Label] c_labels
cdef Label label
for key, value in py_labels.items():
label.key = _encode(key)
label.value = _encode(value)
c_labels.push_back(label)
return c_labels
def _c_measurement_to_measurement(object measurement
) -> Mapping[str, Union[enum, Mapping[str, Union[float, int], bool]]]:
"""Convert Cython Measurement to Python measurement.
@ -171,6 +196,7 @@ def _c_measurement_to_measurement(object measurement
name -> cMetricsName
type -> MeasurementType
registered_method -> bool
include_exchange_labels -> bool
value -> {value_double: float | value_int: int}
"""
measurement: Measurement
@ -179,6 +205,7 @@ def _c_measurement_to_measurement(object measurement
py_measurement['name'] = measurement['name']
py_measurement['type'] = measurement['type']
py_measurement['registered_method'] = measurement['registered_method']
py_measurement['include_exchange_labels'] = measurement['include_exchange_labels']
if measurement['type'] == kMeasurementDouble:
py_measurement['value'] = {'value_double': measurement['value']['value_double']}
else:
@ -208,7 +235,7 @@ def _cy_metric_name_to_py_metric_name(cMetricsName metric_name) -> MetricsName:
raise ValueError('Invalid metric name %s' % metric_name)
def _get_stats_data(object measurement, object labels) -> _observability.StatsData:
def _get_stats_data(object measurement, object labels, object identifier) -> _observability.StatsData:
"""Convert a Python measurement to StatsData.
Args:
@ -216,23 +243,31 @@ def _get_stats_data(object measurement, object labels) -> _observability.StatsDa
with keys and values as following:
name -> cMetricsName
type -> MeasurementType
registered_method -> bool
include_exchange_labels -> bool
value -> {value_double: float | value_int: int}
labels: Labels assciociated with stats data with type of dict[str, str].
labels: Labels assciociated with stats data with type of Mapping[str, AnyStr].
identifier: Specifies the plugins associated with this stats data.
"""
measurement: Measurement
labels: Mapping[str, str]
labels: Mapping[str, AnyStr]
metric_name = _cy_metric_name_to_py_metric_name(measurement['name'])
identifiers = set(identifier.split(PLUGIN_IDENTIFIER_SEP))
if measurement['type'] == kMeasurementDouble:
py_stat = _observability.StatsData(name=metric_name, measure_double=True,
value_float=measurement['value']['value_double'],
labels=labels,
identifiers=identifiers,
registered_method=measurement['registered_method'],
labels=labels)
include_exchange_labels=measurement['include_exchange_labels'],)
else:
py_stat = _observability.StatsData(name=metric_name, measure_double=False,
value_int=measurement['value']['value_int'],
labels=labels,
identifiers=identifiers,
registered_method=measurement['registered_method'],
labels=labels)
include_exchange_labels=measurement['include_exchange_labels'],)
return py_stat
@ -253,8 +288,8 @@ def _get_tracing_data(SpanCensusData span_data, vector[Label] span_labels,
span_annotations = py_span_annotations)
def _record_rpc_latency(object exporter, str method, str target,
float rpc_latency, str status_code, bint registered_method) -> None:
def _record_rpc_latency(object exporter, str method, str target, float rpc_latency,
str status_code, str identifier, bint registered_method) -> None:
exporter: _observability.Exporter
measurement = {}
@ -262,12 +297,13 @@ def _record_rpc_latency(object exporter, str method, str target,
measurement['type'] = kMeasurementDouble
measurement['value'] = {'value_double': rpc_latency}
measurement['registered_method'] = registered_method
measurement['include_exchange_labels'] = False
labels = {}
labels[_decode(kClientMethod)] = method.strip("/")
labels[_decode(kClientTarget)] = target
labels[_decode(kClientStatus)] = status_code
metric = _get_stats_data(measurement, labels)
metric = _get_stats_data(measurement, labels, identifier)
exporter.export_stats_data([metric])
@ -313,8 +349,9 @@ cdef void _flush_census_data(object exporter):
c_census_data = g_census_data_buffer.front()
if c_census_data.type == kMetricData:
py_labels = _c_label_to_labels(c_census_data.labels)
py_identifier = _decode(c_census_data.identifier)
py_measurement = _c_measurement_to_measurement(c_census_data.measurement_data)
py_metric = _get_stats_data(py_measurement, py_labels)
py_metric = _get_stats_data(py_measurement, py_labels, py_identifier)
py_metrics_batch.append(py_metric)
else:
py_span = _get_tracing_data(c_census_data.span_data, c_census_data.span_data.span_labels,
@ -344,3 +381,14 @@ cdef str _decode(bytes bytestring):
except UnicodeDecodeError:
_LOGGER.exception('Invalid encoding on %s', bytestring)
return bytestring.decode('latin1')
cdef bytes _encode(object string_or_none):
if string_or_none is None:
return b''
elif isinstance(string_or_none, (bytes,)):
return <bytes>string_or_none
elif isinstance(string_or_none, (unicode,)):
return string_or_none.encode('utf8')
else:
raise TypeError('Expected str, not {}'.format(type(string_or_none)))

@ -1,167 +0,0 @@
# Copyright 2023 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
import time
from typing import Any, Set
import grpc
# pytype: disable=pyi-error
from grpc_observability import _cyobservability
from grpc_observability import _observability_config
_LOGGER = logging.getLogger(__name__)
ClientCallTracerCapsule = Any # it appears only once in the function signature
ServerCallTracerFactoryCapsule = (
Any # it appears only once in the function signature
)
grpc_observability = Any # grpc_observability.py imports this module.
GRPC_STATUS_CODE_TO_STRING = {
grpc.StatusCode.OK: "OK",
grpc.StatusCode.CANCELLED: "CANCELLED",
grpc.StatusCode.UNKNOWN: "UNKNOWN",
grpc.StatusCode.INVALID_ARGUMENT: "INVALID_ARGUMENT",
grpc.StatusCode.DEADLINE_EXCEEDED: "DEADLINE_EXCEEDED",
grpc.StatusCode.NOT_FOUND: "NOT_FOUND",
grpc.StatusCode.ALREADY_EXISTS: "ALREADY_EXISTS",
grpc.StatusCode.PERMISSION_DENIED: "PERMISSION_DENIED",
grpc.StatusCode.UNAUTHENTICATED: "UNAUTHENTICATED",
grpc.StatusCode.RESOURCE_EXHAUSTED: "RESOURCE_EXHAUSTED",
grpc.StatusCode.FAILED_PRECONDITION: "FAILED_PRECONDITION",
grpc.StatusCode.ABORTED: "ABORTED",
grpc.StatusCode.OUT_OF_RANGE: "OUT_OF_RANGE",
grpc.StatusCode.UNIMPLEMENTED: "UNIMPLEMENTED",
grpc.StatusCode.INTERNAL: "INTERNAL",
grpc.StatusCode.UNAVAILABLE: "UNAVAILABLE",
grpc.StatusCode.DATA_LOSS: "DATA_LOSS",
}
# pylint: disable=no-self-use
class GCPOpenCensusObservability(grpc._observability.ObservabilityPlugin):
"""GCP OpenCensus based plugin implementation.
If no exporter is passed, the default will be OpenCensus StackDriver
based exporter.
For more details, please refer to User Guide:
* https://cloud.google.com/stackdriver/docs/solutions/grpc
Attributes:
config: Configuration for GCP OpenCensus Observability.
exporter: Exporter used to export data.
"""
config: _observability_config.GcpObservabilityConfig
exporter: "grpc_observability.Exporter"
_registered_method: Set[bytes]
def __init__(self, exporter: "grpc_observability.Exporter" = None):
self.exporter = None
self.config = None
try:
self.config = _observability_config.read_config()
_cyobservability.activate_config(self.config)
except Exception as e: # pylint: disable=broad-except
raise ValueError(f"Reading configuration failed with: {e}")
if exporter:
self.exporter = exporter
else:
raise ValueError(f"Please provide an exporter!")
if self.config.tracing_enabled:
self.set_tracing(True)
if self.config.stats_enabled:
self.set_stats(True)
def __enter__(self):
try:
_cyobservability.cyobservability_init(self.exporter)
# TODO(xuanwn): Use specific exceptons
except Exception as e: # pylint: disable=broad-except
_LOGGER.exception("GCPOpenCensusObservability failed with: %s", e)
grpc._observability.observability_init(self)
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
self.exit()
def exit(self) -> None:
# Sleep so we don't loss any data. If we shutdown export thread
# immediately after exit, it's possible that core didn't call RecordEnd
# in callTracer, and all data recorded by calling RecordEnd will be
# lost.
# CENSUS_EXPORT_BATCH_INTERVAL_SECS: The time equals to the time in
# AwaitNextBatchLocked.
# TODO(xuanwn): explicit synchronization
# https://github.com/grpc/grpc/issues/33262
time.sleep(_cyobservability.CENSUS_EXPORT_BATCH_INTERVAL_SECS)
self.set_tracing(False)
self.set_stats(False)
_cyobservability.observability_deinit()
grpc._observability.observability_deinit()
def create_client_call_tracer(
self, method_name: bytes, target: bytes
) -> ClientCallTracerCapsule:
trace_id = b"TRACE_ID"
capsule = _cyobservability.create_client_call_tracer(
method_name,
target,
trace_id,
method_name in self._registered_methods,
)
return capsule
def create_server_call_tracer_factory(
self,
) -> ServerCallTracerFactoryCapsule:
capsule = _cyobservability.create_server_call_tracer_factory_capsule()
return capsule
def delete_client_call_tracer(
self, client_call_tracer: ClientCallTracerCapsule
) -> None:
_cyobservability.delete_client_call_tracer(client_call_tracer)
def save_trace_context(
self, trace_id: str, span_id: str, is_sampled: bool
) -> None:
pass
def record_rpc_latency(
self,
method: str,
target: str,
rpc_latency: float,
status_code: grpc.StatusCode,
) -> None:
status_code = GRPC_STATUS_CODE_TO_STRING.get(status_code, "UNKNOWN")
_cyobservability._record_rpc_latency(
self.exporter,
method,
target,
rpc_latency,
status_code,
method in self._registered_methods,
)
def save_registered_method(self, method_name: bytes) -> None:
self._registered_methods.add(method_name)

@ -16,7 +16,8 @@ from __future__ import annotations
import abc
from dataclasses import dataclass
from dataclasses import field
from typing import List, Mapping, Tuple
import enum
from typing import AnyStr, Dict, List, Mapping, Set, Tuple
class Exporter(metaclass=abc.ABCMeta):
@ -52,18 +53,23 @@ class StatsData:
value.
value_int: The actual metric value if measure_double is False.
value_float: The actual metric value if measure_double is True.
registered_method: Whether the method in this data is a registered method
in stubs.
include_exchange_labels: Whether this data should include exchanged labels.
labels: A dictionary that maps label tags associated with this metric to
corresponding label value.
identifiers: A set of strings identifying which stats plugins this StatsData
belongs to.
registered_method: Whether the method in this data is a registered method
in stubs.
"""
name: "grpc_observability._cyobservability.MetricsName"
measure_double: bool
value_int: int = 0
value_float: float = 0.0
include_exchange_labels: bool = False
labels: Dict[str, AnyStr] = field(default_factory=dict)
identifiers: Set[str] = field(default_factory=set)
registered_method: bool = False
labels: Mapping[str, str] = field(default_factory=dict)
@dataclass(frozen=True)
@ -102,5 +108,12 @@ class TracingData:
status: str
should_sample: bool
child_span_count: int
span_labels: Mapping[str, str] = field(default_factory=dict)
span_labels: Mapping[str, AnyStr] = field(default_factory=dict)
span_annotations: List[Tuple[str, str]] = field(default_factory=list)
@enum.unique
class OptionalLabelType(enum.Enum):
"""What kinds of optional labels to add to metrics."""
XDS_SERVICE_LABELS = "kXdsServiceLabels"

@ -87,7 +87,8 @@ def base_metrics() -> List[Metric]:
return [
CLIENT_ATTEMPT_STARTED,
CLIENT_ATTEMPT_DURATION,
CLIENT_RPC_DURATION,
# CLIENT_RPC_DURATION is not required yet
# CLIENT_RPC_DURATION,
CLIENT_ATTEMPT_SEND_BYTES,
CLIENT_ATTEMPT_RECEIVED_BYTES,
SERVER_STARTED_RPCS,

@ -15,7 +15,7 @@
import logging
import threading
import time
from typing import Any, Dict, Iterable, List, Optional, Set, Union
from typing import Any, AnyStr, Dict, Iterable, List, Optional, Set, Union
import grpc
@ -24,6 +24,8 @@ from grpc_observability import _cyobservability
from grpc_observability import _observability
from grpc_observability import _open_telemetry_measures
from grpc_observability._cyobservability import MetricsName
from grpc_observability._cyobservability import PLUGIN_IDENTIFIER_SEP
from grpc_observability._observability import OptionalLabelType
from grpc_observability._observability import StatsData
from opentelemetry.metrics import Counter
from opentelemetry.metrics import Histogram
@ -37,9 +39,13 @@ ServerCallTracerFactoryCapsule = (
)
grpc_observability = Any # grpc_observability.py imports this module.
OpenTelemetryPlugin = Any # _open_telemetry_plugin.py imports this module.
OpenTelemetryPluginOption = (
Any # _open_telemetry_plugin.py imports this module.
)
GRPC_METHOD_LABEL = "grpc.method"
GRPC_TARGET_LABEL = "grpc.target"
GRPC_CLIENT_METRIC_PREFIX = "grpc.client"
GRPC_OTHER_LABEL_VALUE = "other"
_observability_lock: threading.RLock = threading.RLock()
_OPEN_TELEMETRY_OBSERVABILITY: Optional["OpenTelemetryObservability"] = None
@ -68,10 +74,16 @@ GRPC_STATUS_CODE_TO_STRING = {
class _OpenTelemetryPlugin:
_plugin: OpenTelemetryPlugin
_metric_to_recorder: Dict[MetricsName, Union[Counter, Histogram]]
_enabled_client_plugin_options: Optional[List[OpenTelemetryPluginOption]]
_enabled_server_plugin_options: Optional[List[OpenTelemetryPluginOption]]
identifier: str
def __init__(self, plugin: OpenTelemetryPlugin):
self._plugin = plugin
self._metric_to_recorder = dict()
self.identifier = str(id(self))
self._enabled_client_plugin_options = None
self._enabled_server_plugin_options = None
meter_provider = self._plugin.meter_provider
if meter_provider:
@ -87,16 +99,38 @@ class _OpenTelemetryPlugin:
def _record_stats_data(self, stats_data: StatsData) -> None:
recorder = self._metric_to_recorder[stats_data.name]
enabled_plugin_options = []
if GRPC_CLIENT_METRIC_PREFIX in recorder.name:
enabled_plugin_options = self._enabled_client_plugin_options
else:
enabled_plugin_options = self._enabled_server_plugin_options
# Only deserialize labels if we need add exchanged labels.
if stats_data.include_exchange_labels:
deserialized_labels = self._deserialize_labels(
stats_data.labels, enabled_plugin_options
)
else:
deserialized_labels = stats_data.labels
labels = self._maybe_add_labels(
stats_data.include_exchange_labels,
deserialized_labels,
enabled_plugin_options,
)
decoded_labels = self.decode_labels(labels)
target = stats_data.labels.get(GRPC_TARGET_LABEL, "")
target = decoded_labels.get(GRPC_TARGET_LABEL, "")
if not self._plugin.target_attribute_filter(target):
# Filter target name.
stats_data.labels[GRPC_TARGET_LABEL] = GRPC_OTHER_LABEL_VALUE
decoded_labels[GRPC_TARGET_LABEL] = GRPC_OTHER_LABEL_VALUE
method = stats_data.labels.get(GRPC_METHOD_LABEL, "")
if not self._plugin.generic_method_attribute_filter(method):
# Filter method name.
stats_data.labels[GRPC_METHOD_LABEL] = GRPC_OTHER_LABEL_VALUE
method = decoded_labels.get(GRPC_METHOD_LABEL, "")
if not (
stats_data.registered_method
or self._plugin.generic_method_attribute_filter(method)
):
# Filter method name if it's not registered method and
# generic_method_attribute_filter returns false.
decoded_labels[GRPC_METHOD_LABEL] = GRPC_OTHER_LABEL_VALUE
value = 0
if stats_data.measure_double:
@ -104,18 +138,109 @@ class _OpenTelemetryPlugin:
else:
value = stats_data.value_int
if isinstance(recorder, Counter):
recorder.add(value, attributes=stats_data.labels)
recorder.add(value, attributes=decoded_labels)
elif isinstance(recorder, Histogram):
recorder.record(value, attributes=stats_data.labels)
recorder.record(value, attributes=decoded_labels)
# pylint: disable=no-self-use
def maybe_record_stats_data(self, stats_data: List[StatsData]) -> None:
# Records stats data to MeterProvider.
if self._should_record(stats_data):
self._record_stats_data(stats_data)
def get_client_exchange_labels(self) -> Dict[str, AnyStr]:
"""Get labels used for client side Metadata Exchange."""
labels_for_exchange = {}
for plugin_option in self._enabled_client_plugin_options:
if hasattr(plugin_option, "get_label_injector") and hasattr(
plugin_option.get_label_injector(), "get_labels_for_exchange"
):
labels_for_exchange.update(
plugin_option.get_label_injector().get_labels_for_exchange()
)
return labels_for_exchange
def get_server_exchange_labels(self) -> Dict[str, str]:
"""Get labels used for server side Metadata Exchange."""
labels_for_exchange = {}
for plugin_option in self._enabled_server_plugin_options:
if hasattr(plugin_option, "get_label_injector") and hasattr(
plugin_option.get_label_injector(), "get_labels_for_exchange"
):
labels_for_exchange.update(
plugin_option.get_label_injector().get_labels_for_exchange()
)
return labels_for_exchange
def activate_client_plugin_options(self, target: bytes) -> None:
"""Activate client plugin options based on option settings."""
target_str = target.decode("utf-8", "replace")
if not self._enabled_client_plugin_options:
self._enabled_client_plugin_options = []
for plugin_option in self._plugin.plugin_options:
if hasattr(
plugin_option, "is_active_on_client_channel"
) and plugin_option.is_active_on_client_channel(target_str):
self._enabled_client_plugin_options.append(plugin_option)
def activate_server_plugin_options(self, xds: bool) -> None:
"""Activate server plugin options based on option settings."""
if not self._enabled_server_plugin_options:
self._enabled_server_plugin_options = []
for plugin_option in self._plugin.plugin_options:
if hasattr(
plugin_option, "is_active_on_server"
) and plugin_option.is_active_on_server(xds):
self._enabled_server_plugin_options.append(plugin_option)
@staticmethod
def _deserialize_labels(
labels: Dict[str, AnyStr],
enabled_plugin_options: List[OpenTelemetryPluginOption],
) -> Dict[str, AnyStr]:
for plugin_option in enabled_plugin_options:
if all(
[
hasattr(plugin_option, "get_label_injector"),
hasattr(
plugin_option.get_label_injector(), "deserialize_labels"
),
]
):
labels = plugin_option.get_label_injector().deserialize_labels(
labels
)
return labels
@staticmethod
def _maybe_add_labels(
include_exchange_labels: bool,
labels: Dict[str, str],
enabled_plugin_options: List[OpenTelemetryPluginOption],
) -> Dict[str, AnyStr]:
for plugin_option in enabled_plugin_options:
if all(
[
hasattr(plugin_option, "get_label_injector"),
hasattr(
plugin_option.get_label_injector(),
"get_additional_labels",
),
]
):
labels.update(
plugin_option.get_label_injector().get_additional_labels(
include_exchange_labels
)
)
return labels
def get_enabled_optional_labels(self) -> List[OptionalLabelType]:
return self._plugin._get_enabled_optional_labels()
@staticmethod
def _register_metrics(
self, meter: Meter, metrics: List[_open_telemetry_measures.Metric]
meter: Meter, metrics: List[_open_telemetry_measures.Metric]
) -> Dict[MetricsName, Union[Counter, Histogram]]:
metric_to_recorder_map = {}
recorder = None
@ -179,6 +304,15 @@ class _OpenTelemetryPlugin:
metric_to_recorder_map[metric.cyname] = recorder
return metric_to_recorder_map
@staticmethod
def decode_labels(labels: Dict[str, AnyStr]) -> Dict[str, str]:
decoded_labels = {}
for key, value in labels.items():
if isinstance(value, bytes):
value = value.decode()
decoded_labels[key] = value
return decoded_labels
def start_open_telemetry_observability(
*,
@ -220,19 +354,25 @@ class OpenTelemetryObservability(grpc._observability.ObservabilityPlugin):
This is class is part of an EXPERIMENTAL API.
Args:
plugin: _OpenTelemetryPlugin to enable.
plugins: _OpenTelemetryPlugins to enable.
"""
exporter: "grpc_observability.Exporter"
_exporter: "grpc_observability.Exporter"
_plugins: List[_OpenTelemetryPlugin]
_registered_method: Set[bytes]
_client_option_activated: bool
_server_option_activated: bool
def __init__(
self,
*,
plugins: Optional[Iterable[_OpenTelemetryPlugin]],
):
self.exporter = _OpenTelemetryExporterDelegator(plugins)
self._exporter = _OpenTelemetryExporterDelegator(plugins)
self._registered_methods = set()
self._plugins = plugins
self._client_option_activated = False
self._server_option_activated = False
def observability_init(self):
try:
@ -242,7 +382,7 @@ class OpenTelemetryObservability(grpc._observability.ObservabilityPlugin):
raise ValueError(f"Activate observability metrics failed with: {e}")
try:
_cyobservability.cyobservability_init(self.exporter)
_cyobservability.cyobservability_init(self._exporter)
# TODO(xuanwn): Use specific exceptons
except Exception as e: # pylint: disable=broad-except
_LOGGER.exception("Initiate observability failed with: %s", e)
@ -268,18 +408,34 @@ class OpenTelemetryObservability(grpc._observability.ObservabilityPlugin):
self, method_name: bytes, target: bytes
) -> ClientCallTracerCapsule:
trace_id = b"TRACE_ID"
registered_method = False
if method_name in self._registered_methods:
registered_method = True
self._maybe_activate_client_plugin_options(target)
exchange_labels = self._get_client_exchange_labels()
enabled_optional_labels = set()
for plugin in self._plugins:
enabled_optional_labels.update(plugin.get_enabled_optional_labels())
capsule = _cyobservability.create_client_call_tracer(
method_name, target, trace_id, registered_method
method_name,
target,
trace_id,
self._get_identifier(),
exchange_labels,
enabled_optional_labels,
method_name in self._registered_methods,
)
return capsule
def create_server_call_tracer_factory(
self,
) -> ServerCallTracerFactoryCapsule:
capsule = _cyobservability.create_server_call_tracer_factory_capsule()
*,
xds: bool = False,
) -> Optional[ServerCallTracerFactoryCapsule]:
capsule = None
self._maybe_activate_server_plugin_options(xds)
exchange_labels = self._get_server_exchange_labels()
capsule = _cyobservability.create_server_call_tracer_factory_capsule(
exchange_labels, self._get_identifier()
)
return capsule
def delete_client_call_tracer(
@ -300,22 +456,53 @@ class OpenTelemetryObservability(grpc._observability.ObservabilityPlugin):
status_code: grpc.StatusCode,
) -> None:
status_code = GRPC_STATUS_CODE_TO_STRING.get(status_code, "UNKNOWN")
registered_method = False
encoded_method = method.encode("utf8")
if encoded_method in self._registered_methods:
registered_method = True
_cyobservability._record_rpc_latency(
self.exporter,
self._exporter,
method,
target,
rpc_latency,
status_code,
registered_method,
self._get_identifier(),
encoded_method in self._registered_methods,
)
def save_registered_method(self, method_name: bytes) -> None:
self._registered_methods.add(method_name)
def _get_client_exchange_labels(self) -> Dict[str, AnyStr]:
client_exchange_labels = {}
for _plugin in self._plugins:
client_exchange_labels.update(_plugin.get_client_exchange_labels())
return client_exchange_labels
def _get_server_exchange_labels(self) -> Dict[str, AnyStr]:
server_exchange_labels = {}
for _plugin in self._plugins:
server_exchange_labels.update(_plugin.get_server_exchange_labels())
return server_exchange_labels
def _maybe_activate_client_plugin_options(self, target: bytes) -> None:
if not self._client_option_activated:
for _plugin in self._plugins:
_plugin.activate_client_plugin_options(target)
self._client_option_activated = True
def _maybe_activate_server_plugin_options(self, xds: bool) -> None:
if not self._server_option_activated:
for _plugin in self._plugins:
_plugin.activate_server_plugin_options(xds)
self._server_option_activated = True
def _get_identifier(self) -> str:
plugin_identifiers = []
for _plugin in self._plugins:
plugin_identifiers.append(_plugin.identifier)
return PLUGIN_IDENTIFIER_SEP.join(plugin_identifiers)
def get_enabled_optional_labels(self) -> List[OptionalLabelType]:
return []
def _start_open_telemetry_observability(
otel_o11y: OpenTelemetryObservability,

@ -12,70 +12,79 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
from typing import Callable, Dict, Iterable, List, Optional
from typing import AnyStr, Callable, Dict, Iterable, List, Optional
# pytype: disable=pyi-error
from grpc_observability import _open_telemetry_observability
from grpc_observability._observability import OptionalLabelType
from opentelemetry.metrics import MeterProvider
GRPC_METHOD_LABEL = "grpc.method"
GRPC_TARGET_LABEL = "grpc.target"
GRPC_CLIENT_METRIC_PREFIX = "grpc.client"
GRPC_OTHER_LABEL_VALUE = "other"
class OpenTelemetryLabelInjector(abc.ABC):
class OpenTelemetryLabelInjector:
"""
An interface that allows you to add additional labels on the calls traced.
Please note that this class is still work in progress and NOT READY to be used.
"""
_labels: List[Dict[str, str]]
def __init__(self):
# Calls Python OTel API to detect resource and get labels, save
# those lables to OpenTelemetryLabelInjector.labels.
pass
@abc.abstractmethod
def get_labels(self):
# Get additional labels for this OpenTelemetryLabelInjector.
raise NotImplementedError()
def get_labels_for_exchange(self) -> Dict[str, AnyStr]:
"""
Get labels used for metadata exchange.
class OpenTelemetryPluginOption(abc.ABC):
Returns:
A dict of labels, with a string as key representing label name, string or bytes
as value representing label value.
"""
An interface that allows you to add additional function to OpenTelemetryPlugin.
raise NotImplementedError()
Please note that this class is still work in progress and NOT READY to be used.
def get_additional_labels(
self, include_exchange_labels: bool
) -> Dict[str, str]:
"""
Get additional labels added by this injector.
@abc.abstractmethod
def is_active_on_method(self, method: str) -> bool:
"""Determines whether this plugin option is active on a given method.
The return value from this method will be added directly to metric data.
Args:
method: Required. The RPC method, for example: `/helloworld.Greeter/SayHello`.
include_exchange_labels: Whether to add additional metadata exchange related labels.
Returns:
True if this this plugin option is active on the giving method, false otherwise.
A dict of labels.
"""
raise NotImplementedError()
@abc.abstractmethod
def is_active_on_server(self, channel_args: List[str]) -> bool:
"""Determines whether this plugin option is active on a given server.
# pylint: disable=no-self-use
def deserialize_labels(
self, labels: Dict[str, AnyStr]
) -> Dict[str, AnyStr]:
"""
Deserialize the labels if required.
Args:
channel_args: Required. The channel args used for server.
TODO(xuanwn): detail on what channel_args will contain.
If this injector added labels for metadata exchange, this method will be called to
deserialize the exchanged labels.
For example, if this injector added xds_peer_metadata_label for exchange:
labels: {"labelA": b"valueA", "xds_peer_metadata_label": b"exchanged_bytes"}
This method should deserialize xds_peer_metadata_label and return labels as:
labels: {"labelA": b"valueA", "xds_label_A": "xds_label_A",
"xds_label_B": "xds_label_B"}
Returns:
True if this this plugin option is active on the server, false otherwise.
A dict of deserialized labels.
"""
raise NotImplementedError()
return labels
@abc.abstractmethod
def get_label_injector(self) -> Optional[OpenTelemetryLabelInjector]:
# Returns the LabelsInjector used by this plugin option, or None.
raise NotImplementedError()
class OpenTelemetryPluginOption:
"""
An interface that allows you to add additional function to OpenTelemetryPlugin.
"""
# pylint: disable=no-self-use
@ -86,7 +95,7 @@ class OpenTelemetryPlugin:
meter_provider: Optional[MeterProvider]
target_attribute_filter: Callable[[str], bool]
generic_method_attribute_filter: Callable[[str], bool]
_plugin: _open_telemetry_observability._OpenTelemetryPlugin
_plugins: List[_open_telemetry_observability._OpenTelemetryPlugin]
def __init__(
self,
@ -120,17 +129,15 @@ class OpenTelemetryPlugin:
"""
self.plugin_options = plugin_options
self.meter_provider = meter_provider
if target_attribute_filter:
self.target_attribute_filter = target_attribute_filter
else:
self.target_attribute_filter = lambda target: True
if generic_method_attribute_filter:
self.target_attribute_filter = target_attribute_filter or (
lambda target: True
)
self.generic_method_attribute_filter = (
generic_method_attribute_filter
generic_method_attribute_filter or (lambda target: False)
)
else:
self.generic_method_attribute_filter = lambda method: False
self._plugin = _open_telemetry_observability._OpenTelemetryPlugin(self)
self._plugins = [
_open_telemetry_observability._OpenTelemetryPlugin(self)
]
def register_global(self) -> None:
"""
@ -140,7 +147,7 @@ class OpenTelemetryPlugin:
RuntimeError: If a global plugin was already registered.
"""
_open_telemetry_observability.start_open_telemetry_observability(
plugins=[self._plugin]
plugins=self._plugins
)
def deregister_global(self) -> None:
@ -154,8 +161,11 @@ class OpenTelemetryPlugin:
def __enter__(self) -> None:
_open_telemetry_observability.start_open_telemetry_observability(
plugins=[self._plugin]
plugins=self._plugins
)
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
_open_telemetry_observability.end_open_telemetry_observability()
def _get_enabled_optional_labels(self) -> List[OptionalLabelType]:
return []

@ -22,6 +22,7 @@
#include "absl/strings/str_cat.h"
#include "absl/time/clock.h"
#include "constants.h"
#include "metadata_exchange.h"
#include "observability_util.h"
#include "python_observability_context.h"
@ -42,10 +43,15 @@ constexpr uint32_t
PythonOpenCensusCallTracer::PythonOpenCensusCallTracer(
const char* method, const char* target, const char* trace_id,
const char* parent_span_id, bool tracing_enabled, bool registered_method)
const char* parent_span_id, const char* identifier,
const std::vector<Label>& exchange_labels, bool tracing_enabled,
bool add_csm_optional_labels, bool registered_method)
: method_(GetMethod(method)),
target_(GetTarget(target)),
tracing_enabled_(tracing_enabled),
add_csm_optional_labels_(add_csm_optional_labels),
labels_injector_(exchange_labels),
identifier_(identifier),
registered_method_(registered_method) {
GenerateClientContext(absl::StrCat("Sent.", method_),
absl::string_view(trace_id),
@ -83,14 +89,15 @@ PythonOpenCensusCallTracer::~PythonOpenCensusCallTracer() {
if (PythonCensusStatsEnabled()) {
context_.Labels().emplace_back(kClientMethod, std::string(method_));
RecordIntMetric(kRpcClientRetriesPerCallMeasureName, retries_ - 1,
registered_method_,
context_.Labels()); // exclude first attempt
context_.Labels(), identifier_, registered_method_,
/*include_exchange_labels=*/true); // exclude first attempt
RecordIntMetric(kRpcClientTransparentRetriesPerCallMeasureName,
transparent_retries_, registered_method_,
context_.Labels());
transparent_retries_, context_.Labels(), identifier_,
registered_method_, /*include_exchange_labels=*/true);
RecordDoubleMetric(kRpcClientRetryDelayPerCallMeasureName,
ToDoubleSeconds(retry_delay_), registered_method_,
context_.Labels());
ToDoubleSeconds(retry_delay_), context_.Labels(),
identifier_, registered_method_,
/*include_exchange_labels=*/true);
}
if (tracing_enabled_) {
@ -153,8 +160,9 @@ PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
}
context_.Labels().emplace_back(kClientMethod, std::string(parent_->method_));
context_.Labels().emplace_back(kClientTarget, std::string(parent_->target_));
RecordIntMetric(kRpcClientStartedRpcsMeasureName, 1,
parent_->registered_method_, context_.Labels());
RecordIntMetric(kRpcClientStartedRpcsMeasureName, 1, context_.Labels(),
parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/false);
}
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
@ -178,6 +186,19 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
send_initial_metadata->Set(grpc_core::GrpcTagsBinMetadata(),
grpc_core::Slice(tags));
}
parent_->labels_injector_.AddExchangeLabelsToMetadata(send_initial_metadata);
}
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
RecordReceivedInitialMetadata(grpc_metadata_batch* recv_initial_metadata) {
if (recv_initial_metadata != nullptr &&
recv_initial_metadata->get(grpc_core::GrpcTrailersOnly())
.value_or(false)) {
is_trailers_only_ = true;
return;
}
labels_from_peer_ =
parent_->labels_injector_.GetExchangeLabels(recv_initial_metadata);
}
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
@ -195,6 +216,12 @@ std::shared_ptr<grpc_core::TcpTracerInterface> PythonOpenCensusCallTracer::
return nullptr;
}
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
SetOptionalLabel(OptionalLabelKey key,
grpc_core::RefCountedStringValue value) {
optional_labels_array_[static_cast<size_t>(key)] = std::move(value);
}
namespace {
// Returns 0 if no server stats are present in the metadata.
@ -225,6 +252,10 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
if (!PythonCensusStatsEnabled()) {
return;
}
if (is_trailers_only_) {
labels_from_peer_ =
parent_->labels_injector_.GetExchangeLabels(recv_trailing_metadata);
}
auto status_code_ = status.code();
uint64_t elapsed_time = 0;
if (recv_trailing_metadata != nullptr) {
@ -235,26 +266,40 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
context_.Labels().emplace_back(kClientMethod, std::string(parent_->method_));
context_.Labels().emplace_back(kClientTarget, std::string(parent_->target_));
context_.Labels().emplace_back(kClientStatus, final_status);
if (parent_->add_csm_optional_labels_) {
parent_->labels_injector_.AddXdsOptionalLabels(
/*is_client=*/true, optional_labels_array_, context_.Labels());
}
for (const auto& label : labels_from_peer_) {
context_.Labels().emplace_back(label);
}
RecordDoubleMetric(
kRpcClientSentBytesPerRpcMeasureName,
static_cast<double>(transport_stream_stats != nullptr
? transport_stream_stats->outgoing.data_bytes
: 0),
parent_->registered_method_, context_.Labels());
context_.Labels(), parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/true);
RecordDoubleMetric(
kRpcClientReceivedBytesPerRpcMeasureName,
static_cast<double>(transport_stream_stats != nullptr
? transport_stream_stats->incoming.data_bytes
: 0),
parent_->registered_method_, context_.Labels());
context_.Labels(), parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/true);
RecordDoubleMetric(kRpcClientServerLatencyMeasureName,
absl::ToDoubleSeconds(absl::Nanoseconds(elapsed_time)),
parent_->registered_method_, context_.Labels());
context_.Labels(), parent_->identifier_,
parent_->registered_method_,
/*include_exchange_labels=*/true);
RecordDoubleMetric(kRpcClientRoundtripLatencyMeasureName,
absl::ToDoubleSeconds(absl::Now() - start_time_),
parent_->registered_method_, context_.Labels());
RecordIntMetric(kRpcClientCompletedRpcMeasureName, 1,
parent_->registered_method_, context_.Labels());
context_.Labels(), parent_->identifier_,
parent_->registered_method_,
/*include_exchange_labels=*/true);
RecordIntMetric(kRpcClientCompletedRpcMeasureName, 1, context_.Labels(),
parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/true);
}
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
@ -268,11 +313,13 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::RecordEnd(
context_.Labels().emplace_back(kClientStatus,
StatusCodeToString(status_code_));
RecordIntMetric(kRpcClientSentMessagesPerRpcMeasureName,
sent_message_count_, parent_->registered_method_,
context_.Labels());
sent_message_count_, context_.Labels(),
parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/true);
RecordIntMetric(kRpcClientReceivedMessagesPerRpcMeasureName,
recv_message_count_, parent_->registered_method_,
context_.Labels());
recv_message_count_, context_.Labels(),
parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/true);
grpc_core::MutexLock lock(&parent_->mu_);
if (--parent_->num_active_rpcs_ == 0) {

@ -24,6 +24,7 @@
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "metadata_exchange.h"
#include "python_observability_context.h"
#include <grpc/support/time.h>
@ -60,7 +61,7 @@ class PythonOpenCensusCallTracer : public grpc_core::ClientCallTracer {
void RecordSendCompressedMessage(
const grpc_core::SliceBuffer& /*send_compressed_message*/) override {}
void RecordReceivedInitialMetadata(
grpc_metadata_batch* /*recv_initial_metadata*/) override {}
grpc_metadata_batch* /*recv_initial_metadata*/) override;
void RecordReceivedMessage(
const grpc_core::SliceBuffer& /*recv_message*/) override;
void RecordReceivedDecompressedMessage(
@ -73,9 +74,8 @@ class PythonOpenCensusCallTracer : public grpc_core::ClientCallTracer {
void RecordAnnotation(absl::string_view annotation) override;
void RecordAnnotation(const Annotation& annotation) override;
std::shared_ptr<grpc_core::TcpTracerInterface> StartNewTcpTrace() override;
void SetOptionalLabel(OptionalLabelKey /*key*/,
grpc_core::RefCountedStringValue /*value*/) override {
}
void SetOptionalLabel(OptionalLabelKey key,
grpc_core::RefCountedStringValue value) override;
private:
// Maximum size of trace context is sent on the wire.
@ -91,13 +91,19 @@ class PythonOpenCensusCallTracer : public grpc_core::ClientCallTracer {
uint64_t sent_message_count_ = 0;
// End status code
absl::StatusCode status_code_;
// Avoid std::map to avoid per-call allocations.
std::array<grpc_core::RefCountedStringValue,
static_cast<size_t>(OptionalLabelKey::kSize)>
optional_labels_array_;
std::vector<Label> labels_from_peer_;
bool is_trailers_only_ = false;
};
explicit PythonOpenCensusCallTracer(const char* method, const char* target,
const char* trace_id,
const char* parent_span_id,
bool tracing_enabled,
bool registered_method);
explicit PythonOpenCensusCallTracer(
const char* method, const char* target, const char* trace_id,
const char* parent_span_id, const char* identifier,
const std::vector<Label>& exchange_labels, bool tracing_enabled,
bool add_csm_optional_labels, bool registered_method);
~PythonOpenCensusCallTracer() override;
std::string TraceId() override {
@ -128,8 +134,11 @@ class PythonOpenCensusCallTracer : public grpc_core::ClientCallTracer {
absl::string_view target_;
PythonCensusContext context_;
bool tracing_enabled_;
const bool registered_method_;
bool add_csm_optional_labels_;
mutable grpc_core::Mutex mu_;
PythonLabelsInjector labels_injector_;
std::string identifier_;
const bool registered_method_;
// Non-transparent attempts per call
uint64_t retries_ ABSL_GUARDED_BY(&mu_) = 0;
// Transparent retries per call

@ -15,6 +15,7 @@
#ifndef GRPC_PYTHON_OBSERVABILITY_CONSTANTS_H
#define GRPC_PYTHON_OBSERVABILITY_CONSTANTS_H
#include <set>
#include <string>
namespace grpc_observability {
@ -25,6 +26,7 @@ const std::string kClientStatus = "grpc.status";
const std::string kServerMethod = "grpc.method";
const std::string kServerStatus = "grpc.status";
const std::string kRegisteredMethod = "registerMethod";
const std::string kXEnvoyPeerMetadata = "XEnvoyPeerMetadata";
typedef enum { kMeasurementDouble = 0, kMeasurementInt } MeasurementType;
@ -53,6 +55,8 @@ typedef enum {
kRpcServerStartedRpcsMeasureName
} MetricsName;
const std::set<std::string> MetadataExchangeKeyNames = {kXEnvoyPeerMetadata};
} // namespace grpc_observability
#endif // GRPC_PYTHON_OBSERVABILITY_CONSTANTS_H

@ -0,0 +1,115 @@
//
//
// Copyright 2024 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
#include "metadata_exchange.h"
#include <stddef.h>
#include <algorithm>
#include <array>
#include <cstdint>
#include <unordered_map>
#include "absl/strings/string_view.h"
#include "constants.h"
#include <grpc/slice.h>
#include "src/core/telemetry/call_tracer.h"
namespace grpc_observability {
PythonLabelsInjector::PythonLabelsInjector(
const std::vector<Label>& exchange_labels) {
for (const auto& label : exchange_labels) {
auto it = MetadataExchangeKeyNames.find(label.key);
if (it != MetadataExchangeKeyNames.end()) {
metadata_to_exchange_.emplace_back(label.key, label.value);
}
}
}
std::vector<Label> PythonLabelsInjector::GetExchangeLabels(
grpc_metadata_batch* incoming_initial_metadata) const {
std::vector<Label> labels;
for (const auto& key : MetadataExchangeKeyNames) {
if (key == kXEnvoyPeerMetadata) {
auto xds_peer_metadata =
incoming_initial_metadata->Take(grpc_core::XEnvoyPeerMetadata());
grpc_core::Slice xds_remote_metadata = xds_peer_metadata.has_value()
? *std::move(xds_peer_metadata)
: grpc_core::Slice();
if (!xds_remote_metadata.empty()) {
std::string xds_decoded_metadata;
bool metadata_decoded = absl::Base64Unescape(
xds_remote_metadata.as_string_view(), &xds_decoded_metadata);
if (metadata_decoded) {
labels.emplace_back(kXEnvoyPeerMetadata, xds_decoded_metadata);
}
}
}
}
return labels;
}
void PythonLabelsInjector::AddExchangeLabelsToMetadata(
grpc_metadata_batch* outgoing_initial_metadata) const {
for (const auto& metadata : metadata_to_exchange_) {
if (metadata.first == kXEnvoyPeerMetadata) {
grpc_core::Slice metadata_slice = grpc_core::Slice::FromCopiedString(
absl::Base64Escape(absl::string_view(metadata.second)));
outgoing_initial_metadata->Set(grpc_core::XEnvoyPeerMetadata(),
metadata_slice.Ref());
}
}
}
void PythonLabelsInjector::AddXdsOptionalLabels(
bool is_client,
absl::Span<const grpc_core::RefCountedStringValue> optional_labels_span,
std::vector<Label>& labels) {
if (!is_client) {
// Currently the CSM optional labels are only set on client.
return;
}
// Performs JSON label name format to CSM Observability Metric spec format
// conversion.
absl::string_view service_name =
optional_labels_span[static_cast<size_t>(
grpc_core::ClientCallTracer::CallAttemptTracer::
OptionalLabelKey::kXdsServiceName)]
.as_string_view();
absl::string_view service_namespace =
optional_labels_span[static_cast<size_t>(
grpc_core::ClientCallTracer::CallAttemptTracer::
OptionalLabelKey::kXdsServiceNamespace)]
.as_string_view();
// According to the CSM Observability Metric spec, if the control plane fails
// to provide these labels, the client will set their values to "unknown".
if (service_name.empty()) {
service_name = "unknown";
}
if (service_namespace.empty()) {
service_namespace = "unknown";
}
labels.emplace_back("csm.service_name", std::string(service_name));
labels.emplace_back("csm.service_namespace_name",
std::string(service_namespace));
}
} // namespace grpc_observability

@ -0,0 +1,63 @@
//
//
// Copyright 2024 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
#ifndef GRPC_PYTHON_OBSERVABILITY_METADATA_EXCHANGE_H
#define GRPC_PYTHON_OBSERVABILITY_METADATA_EXCHANGE_H
#include <stddef.h>
#include <stdint.h>
#include <bitset>
#include <memory>
#include <string>
#include <utility>
#include "absl/strings/string_view.h"
#include "constants.h"
#include "python_observability_context.h"
#include "src/core/lib/transport/metadata_batch.h"
namespace grpc_observability {
class PythonLabelsInjector {
public:
explicit PythonLabelsInjector(const std::vector<Label>& exchange_labels);
// Read the incoming initial metadata to get the set of labels exchanged from
// peer.
std::vector<Label> GetExchangeLabels(
grpc_metadata_batch* incoming_initial_metadata) const;
// Add metadata_to_exchange_ to the outgoing initial metadata.
void AddExchangeLabelsToMetadata(
grpc_metadata_batch* outgoing_initial_metadata) const;
// Add optional xds labels from optional_labels_span to labels.
void AddXdsOptionalLabels(
bool is_client,
absl::Span<const grpc_core::RefCountedStringValue> optional_labels_span,
std::vector<Label>& labels);
private:
std::vector<std::pair<std::string, std::string>> metadata_to_exchange_;
};
} // namespace grpc_observability
#endif // GRPC_PYTHON_OBSERVABILITY_CONSTANTS_H

@ -61,28 +61,32 @@ int GetMaxExportBufferSize() {
} // namespace
void RecordIntMetric(MetricsName name, int64_t value,
const std::vector<Label>& labels, std::string identifier,
const bool registered_method,
const std::vector<Label>& labels) {
const bool include_exchange_labels) {
Measurement measurement_data;
measurement_data.type = kMeasurementInt;
measurement_data.name = name;
measurement_data.registered_method = registered_method;
measurement_data.include_exchange_labels = include_exchange_labels;
measurement_data.value.value_int = value;
CensusData data = CensusData(measurement_data, labels);
CensusData data = CensusData(measurement_data, labels, identifier);
AddCensusDataToBuffer(data);
}
void RecordDoubleMetric(MetricsName name, double value,
const bool registered_method,
const std::vector<Label>& labels) {
const std::vector<Label>& labels,
std::string identifier, const bool registered_method,
const bool include_exchange_labels) {
Measurement measurement_data;
measurement_data.type = kMeasurementDouble;
measurement_data.name = name;
measurement_data.registered_method = registered_method;
measurement_data.include_exchange_labels = include_exchange_labels;
measurement_data.value.value_double = value;
CensusData data = CensusData(measurement_data, labels);
CensusData data = CensusData(measurement_data, labels, identifier);
AddCensusDataToBuffer(data);
}
@ -97,16 +101,20 @@ void NativeObservabilityInit() {
void* CreateClientCallTracer(const char* method, const char* target,
const char* trace_id, const char* parent_span_id,
const char* identifier,
const std::vector<Label> exchange_labels,
bool add_csm_optional_labels,
bool registered_method) {
void* client_call_tracer = new PythonOpenCensusCallTracer(
method, target, trace_id, parent_span_id, PythonCensusTracingEnabled(),
registered_method);
method, target, trace_id, parent_span_id, identifier, exchange_labels,
PythonCensusTracingEnabled(), add_csm_optional_labels, registered_method);
return client_call_tracer;
}
void* CreateServerCallTracerFactory() {
void* CreateServerCallTracerFactory(const std::vector<Label> exchange_labels,
const char* identifier) {
void* server_call_tracer_factory =
new PythonOpenCensusServerCallTracerFactory();
new PythonOpenCensusServerCallTracerFactory(exchange_labels, identifier);
return server_call_tracer_factory;
}

@ -36,12 +36,17 @@ namespace grpc_observability {
struct CensusData {
DataType type;
std::vector<Label> labels;
// TODO(xuanwn): We can use union here
std::string identifier;
// TODO(xuanwn): We can use union for span_data and measurement_data
SpanCensusData span_data;
Measurement measurement_data;
CensusData() {}
CensusData(const Measurement& mm, const std::vector<Label>& labels)
: type(kMetricData), labels(std::move(labels)), measurement_data(mm) {}
CensusData(const Measurement& mm, const std::vector<Label>& labels,
std::string id)
: type(kMetricData),
labels(std::move(labels)),
identifier(id),
measurement_data(mm) {}
CensusData(const SpanCensusData& sd) : type(kSpanData), span_data(sd) {}
};
@ -52,9 +57,13 @@ extern std::condition_variable g_census_data_buffer_cv;
void* CreateClientCallTracer(const char* method, const char* target,
const char* trace_id, const char* parent_span_id,
const char* identifier,
const std::vector<Label> exchange_labels,
bool add_csm_optional_labels,
bool registered_method);
void* CreateServerCallTracerFactory();
void* CreateServerCallTracerFactory(const std::vector<Label> exchange_labels,
const char* identifier);
void NativeObservabilityInit();
@ -63,12 +72,14 @@ void AwaitNextBatchLocked(std::unique_lock<std::mutex>& lock, int timeout_ms);
void AddCensusDataToBuffer(const CensusData& buffer);
void RecordIntMetric(MetricsName name, int64_t value,
const std::vector<Label>& labels, std::string identifier,
const bool registered_method,
const std::vector<Label>& labels);
const bool include_exchange_labels);
void RecordDoubleMetric(MetricsName name, double value,
const bool registered_method,
const std::vector<Label>& labels);
const std::vector<Label>& labels,
std::string identifier, const bool registered_method,
const bool include_exchange_labels);
void RecordSpan(const SpanCensusData& span_census_data);

@ -106,6 +106,7 @@ struct Measurement {
MeasurementType type;
MeasurementValue value;
bool registered_method;
bool include_exchange_labels;
};
struct Annotation {

@ -75,121 +75,33 @@ void GetO11yMetadata(const grpc_metadata_batch* b, ServerO11yMetadata* som) {
}
}
} // namespace
//
// PythonOpenCensusServerCallTracer
//
class PythonOpenCensusServerCallTracer : public grpc_core::ServerCallTracer {
public:
// Maximum size of server stats that are sent on the wire.
static constexpr uint32_t kMaxServerStatsLen = 16;
bool KeyInLabels(std::string key, const std::vector<Label>& labels) {
const auto it = std::find_if(labels.begin(), labels.end(),
[&key](const Label& l) { return l.key == key; });
PythonOpenCensusServerCallTracer()
: start_time_(absl::Now()),
recv_message_count_(0),
sent_message_count_(0) {}
std::string TraceId() override {
return absl::BytesToHexString(
absl::string_view(context_.GetSpanContext().TraceId()));
if (it == labels.end()) {
return false;
}
std::string SpanId() override {
return absl::BytesToHexString(
absl::string_view(context_.GetSpanContext().SpanId()));
return true;
}
bool IsSampled() override { return context_.GetSpanContext().IsSampled(); }
// Please refer to `grpc_transport_stream_op_batch_payload` for details on
// arguments.
// It's not a requirement to have this metric thus left unimplemented.
void RecordSendInitialMetadata(
grpc_metadata_batch* /*send_initial_metadata*/) override {}
void RecordSendTrailingMetadata(
grpc_metadata_batch* send_trailing_metadata) override;
void RecordSendMessage(const grpc_core::SliceBuffer& send_message) override {
RecordAnnotation(
absl::StrFormat("Send message: %ld bytes", send_message.Length()));
++sent_message_count_;
}
void RecordSendCompressedMessage(
const grpc_core::SliceBuffer& send_compressed_message) override {
RecordAnnotation(absl::StrFormat("Send compressed message: %ld bytes",
send_compressed_message.Length()));
}
void RecordReceivedInitialMetadata(
grpc_metadata_batch* recv_initial_metadata) override;
void RecordReceivedMessage(
const grpc_core::SliceBuffer& recv_message) override {
RecordAnnotation(
absl::StrFormat("Received message: %ld bytes", recv_message.Length()));
++recv_message_count_;
}
void RecordReceivedDecompressedMessage(
const grpc_core::SliceBuffer& recv_decompressed_message) override {
RecordAnnotation(absl::StrFormat("Received decompressed message: %ld bytes",
recv_decompressed_message.Length()));
}
void RecordReceivedTrailingMetadata(
grpc_metadata_batch* /*recv_trailing_metadata*/) override {}
void RecordCancel(grpc_error_handle /*cancel_error*/) override {
elapsed_time_ = absl::Now() - start_time_;
}
void RecordEnd(const grpc_call_final_info* final_info) override;
void RecordAnnotation(absl::string_view annotation) override {
if (!context_.GetSpanContext().IsSampled()) {
return;
}
context_.AddSpanAnnotation(annotation);
}
} // namespace
void RecordAnnotation(const Annotation& annotation) override {
if (!context_.GetSpanContext().IsSampled()) {
return;
}
//
// PythonOpenCensusServerCallTracer
//
switch (annotation.type()) {
// Annotations are expensive to create. We should only create it if the
// call is being sampled by default.
default:
if (IsSampled()) {
context_.AddSpanAnnotation(annotation.ToString());
void PythonOpenCensusServerCallTracer::RecordSendInitialMetadata(
grpc_metadata_batch* send_initial_metadata) {
// Only add labels if exchange is needed (Client send metadata with keys in
// MetadataExchangeKeyNames).
for (const auto& key : MetadataExchangeKeyNames) {
if (KeyInLabels(key, labels_from_peer_)) {
labels_injector_.AddExchangeLabelsToMetadata(send_initial_metadata);
}
break;
}
}
std::shared_ptr<grpc_core::TcpTracerInterface> StartNewTcpTrace() override {
return nullptr;
}
private:
PythonCensusContext context_;
// server method
grpc_core::Slice path_;
absl::string_view method_;
absl::Time start_time_;
absl::Duration elapsed_time_;
bool registered_method_;
uint64_t recv_message_count_;
uint64_t sent_message_count_;
// Buffer needed for grpc_slice to reference it when adding metadata to
// response.
char stats_buf_[kMaxServerStatsLen];
};
void PythonOpenCensusServerCallTracer::RecordReceivedInitialMetadata(
grpc_metadata_batch* recv_initial_metadata) {
ServerO11yMetadata som;
@ -205,9 +117,12 @@ void PythonOpenCensusServerCallTracer::RecordReceivedInitialMetadata(
.value_or(nullptr) != nullptr;
if (PythonCensusStatsEnabled()) {
context_.Labels().emplace_back(kServerMethod, std::string(method_));
RecordIntMetric(kRpcServerStartedRpcsMeasureName, 1, registered_method_,
context_.Labels());
RecordIntMetric(kRpcServerStartedRpcsMeasureName, 1, context_.Labels(),
identifier_, registered_method_,
/*include_exchange_labels=*/false);
}
labels_from_peer_ = labels_injector_.GetExchangeLabels(recv_initial_metadata);
}
void PythonOpenCensusServerCallTracer::RecordSendTrailingMetadata(
@ -226,30 +141,70 @@ void PythonOpenCensusServerCallTracer::RecordSendTrailingMetadata(
}
}
void PythonOpenCensusServerCallTracer::RecordSendMessage(
const grpc_core::SliceBuffer& send_message) {
RecordAnnotation(
absl::StrFormat("Send message: %ld bytes", send_message.Length()));
++sent_message_count_;
}
void PythonOpenCensusServerCallTracer::RecordSendCompressedMessage(
const grpc_core::SliceBuffer& send_compressed_message) {
RecordAnnotation(absl::StrFormat("Send compressed message: %ld bytes",
send_compressed_message.Length()));
}
void PythonOpenCensusServerCallTracer::RecordReceivedMessage(
const grpc_core::SliceBuffer& recv_message) {
RecordAnnotation(
absl::StrFormat("Received message: %ld bytes", recv_message.Length()));
++recv_message_count_;
}
void PythonOpenCensusServerCallTracer::RecordReceivedDecompressedMessage(
const grpc_core::SliceBuffer& recv_decompressed_message) {
RecordAnnotation(absl::StrFormat("Received decompressed message: %ld bytes",
recv_decompressed_message.Length()));
}
void PythonOpenCensusServerCallTracer::RecordCancel(
grpc_error_handle /*cancel_error*/) {
elapsed_time_ = absl::Now() - start_time_;
}
void PythonOpenCensusServerCallTracer::RecordEnd(
const grpc_call_final_info* final_info) {
if (PythonCensusStatsEnabled()) {
const uint64_t request_size = GetOutgoingDataSize(final_info);
const uint64_t response_size = GetIncomingDataSize(final_info);
const uint64_t request_size = GetIncomingDataSize(final_info);
const uint64_t response_size = GetOutgoingDataSize(final_info);
double elapsed_time_s = absl::ToDoubleSeconds(elapsed_time_);
context_.Labels().emplace_back(kServerMethod, std::string(method_));
context_.Labels().emplace_back(
kServerStatus,
std::string(StatusCodeToString(final_info->final_status)));
for (const auto& label : labels_from_peer_) {
context_.Labels().emplace_back(label);
}
RecordDoubleMetric(kRpcServerSentBytesPerRpcMeasureName,
static_cast<double>(response_size), registered_method_,
context_.Labels());
static_cast<double>(response_size), context_.Labels(),
identifier_, registered_method_,
/*include_exchange_labels=*/true);
RecordDoubleMetric(kRpcServerReceivedBytesPerRpcMeasureName,
static_cast<double>(request_size), registered_method_,
context_.Labels());
static_cast<double>(request_size), context_.Labels(),
identifier_, registered_method_,
/*include_exchange_labels=*/true);
RecordDoubleMetric(kRpcServerServerLatencyMeasureName, elapsed_time_s,
registered_method_, context_.Labels());
RecordIntMetric(kRpcServerCompletedRpcMeasureName, 1, registered_method_,
context_.Labels());
RecordIntMetric(kRpcServerSentMessagesPerRpcMeasureName, registered_method_,
sent_message_count_, context_.Labels());
context_.Labels(), identifier_, registered_method_,
/*include_exchange_labels=*/true);
RecordIntMetric(kRpcServerCompletedRpcMeasureName, 1, context_.Labels(),
identifier_, registered_method_,
/*include_exchange_labels=*/true);
RecordIntMetric(kRpcServerSentMessagesPerRpcMeasureName,
sent_message_count_, context_.Labels(), identifier_,
registered_method_, /*include_exchange_labels=*/true);
RecordIntMetric(kRpcServerReceivedMessagesPerRpcMeasureName,
registered_method_, recv_message_count_, context_.Labels());
recv_message_count_, context_.Labels(), identifier_,
registered_method_, /*include_exchange_labels=*/true);
}
if (PythonCensusTracingEnabled()) {
context_.EndSpan();
@ -263,6 +218,50 @@ void PythonOpenCensusServerCallTracer::RecordEnd(
delete this;
}
void PythonOpenCensusServerCallTracer::RecordAnnotation(
absl::string_view annotation) {
if (!context_.GetSpanContext().IsSampled()) {
return;
}
context_.AddSpanAnnotation(annotation);
}
void PythonOpenCensusServerCallTracer::RecordAnnotation(
const Annotation& annotation) {
if (!context_.GetSpanContext().IsSampled()) {
return;
}
switch (annotation.type()) {
// Annotations are expensive to create. We should only create it if the
// call is being sampled by default.
default:
if (IsSampled()) {
context_.AddSpanAnnotation(annotation.ToString());
}
break;
}
}
std::shared_ptr<grpc_core::TcpTracerInterface>
PythonOpenCensusServerCallTracer::StartNewTcpTrace() {
return nullptr;
}
std::string PythonOpenCensusServerCallTracer::TraceId() {
return absl::BytesToHexString(
absl::string_view(context_.GetSpanContext().TraceId()));
}
std::string PythonOpenCensusServerCallTracer::SpanId() {
return absl::BytesToHexString(
absl::string_view(context_.GetSpanContext().SpanId()));
}
bool PythonOpenCensusServerCallTracer::IsSampled() {
return context_.GetSpanContext().IsSampled();
}
//
// PythonOpenCensusServerCallTracerFactory
//
@ -274,7 +273,18 @@ PythonOpenCensusServerCallTracerFactory::CreateNewServerCallTracer(
// the same DLL in Windows.
(void)arena;
(void)channel_args;
return new PythonOpenCensusServerCallTracer();
return new PythonOpenCensusServerCallTracer(exchange_labels_, identifier_);
}
bool PythonOpenCensusServerCallTracerFactory::IsServerTraced(
const grpc_core::ChannelArgs& args) {
// Returns true if a server is to be traced, false otherwise.
return true;
}
PythonOpenCensusServerCallTracerFactory::
PythonOpenCensusServerCallTracerFactory(
const std::vector<Label>& exchange_labels, const char* identifier)
: exchange_labels_(exchange_labels), identifier_(identifier) {}
} // namespace grpc_observability

@ -17,6 +17,9 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "constants.h"
#include "metadata_exchange.h"
#include "python_observability_context.h"
#include <grpc/support/port_platform.h>
@ -32,6 +35,14 @@ class PythonOpenCensusServerCallTracerFactory
grpc_core::ServerCallTracer* CreateNewServerCallTracer(
grpc_core::Arena* arena,
const grpc_core::ChannelArgs& channel_args) override;
explicit PythonOpenCensusServerCallTracerFactory(
const std::vector<Label>& exchange_labels, const char* identifier);
bool IsServerTraced(const grpc_core::ChannelArgs& args) override;
private:
const std::vector<Label> exchange_labels_;
std::string identifier_;
};
inline absl::string_view GetMethod(const grpc_core::Slice& path) {
@ -42,6 +53,79 @@ inline absl::string_view GetMethod(const grpc_core::Slice& path) {
return absl::StripPrefix(path.as_string_view(), "/");
}
class PythonOpenCensusServerCallTracer : public grpc_core::ServerCallTracer {
public:
// Maximum size of server stats that are sent on the wire.
static constexpr uint32_t kMaxServerStatsLen = 16;
PythonOpenCensusServerCallTracer(const std::vector<Label>& exchange_labels,
std::string identifier)
: start_time_(absl::Now()),
recv_message_count_(0),
sent_message_count_(0),
labels_injector_(exchange_labels),
identifier_(identifier) {}
std::string TraceId() override;
std::string SpanId() override;
bool IsSampled() override;
// Please refer to `grpc_transport_stream_op_batch_payload` for details on
// arguments.
// It's not a requirement to have this metric thus left unimplemented.
void RecordSendInitialMetadata(
grpc_metadata_batch* send_initial_metadata) override;
void RecordSendTrailingMetadata(
grpc_metadata_batch* send_trailing_metadata) override;
void RecordSendMessage(const grpc_core::SliceBuffer& send_message) override;
void RecordSendCompressedMessage(
const grpc_core::SliceBuffer& send_compressed_message) override;
void RecordReceivedInitialMetadata(
grpc_metadata_batch* recv_initial_metadata) override;
void RecordReceivedMessage(
const grpc_core::SliceBuffer& recv_message) override;
void RecordReceivedDecompressedMessage(
const grpc_core::SliceBuffer& recv_decompressed_message) override;
void RecordReceivedTrailingMetadata(
grpc_metadata_batch* /*recv_trailing_metadata*/) override {}
void RecordCancel(grpc_error_handle /*cancel_error*/) override;
void RecordEnd(const grpc_call_final_info* final_info) override;
void RecordAnnotation(absl::string_view annotation) override;
void RecordAnnotation(const Annotation& annotation) override;
std::shared_ptr<grpc_core::TcpTracerInterface> StartNewTcpTrace() override;
private:
PythonCensusContext context_;
// server method
grpc_core::Slice path_;
absl::string_view method_;
absl::Time start_time_;
absl::Duration elapsed_time_;
uint64_t recv_message_count_;
uint64_t sent_message_count_;
// Buffer needed for grpc_slice to reference it when adding metadata to
// response.
char stats_buf_[kMaxServerStatsLen];
PythonLabelsInjector labels_injector_;
std::vector<Label> labels_from_peer_;
std::string identifier_;
bool registered_method_ = false;
};
} // namespace grpc_observability
#endif // GRPC_PYTHON_OPENCENSUS_SERVER_CALL_TRACER_H

@ -90,9 +90,7 @@ BAZEL_DEPS = os.path.join(
)
# the bazel target to scrape to get list of sources for the build
BAZEL_DEPS_QUERIES = [
"//src/core:slice",
]
BAZEL_DEPS_QUERIES = ["//src/core:slice", "//src/core:ref_counted_string"]
def _bazel_query(query):

@ -29,6 +29,7 @@ CC_FILES=[
'grpc_root/src/core/lib/gprpp/posix/env.cc',
'grpc_root/src/core/lib/gprpp/posix/stat.cc',
'grpc_root/src/core/lib/gprpp/posix/thd.cc',
'grpc_root/src/core/lib/gprpp/ref_counted_string.cc',
'grpc_root/src/core/lib/gprpp/strerror.cc',
'grpc_root/src/core/lib/gprpp/tchar.cc',
'grpc_root/src/core/lib/gprpp/time_util.cc',

@ -47,12 +47,13 @@ CLASSIFIERS = [
]
O11Y_CC_SRCS = [
"server_call_tracer.cc",
"client_call_tracer.cc",
"metadata_exchange.cc",
"observability_util.cc",
"python_observability_context.cc",
"sampler.cc",
"rpc_encoding.cc",
"sampler.cc",
"server_call_tracer.cc",
]

@ -26,6 +26,11 @@ py_library(
srcs = ["_from_observability_import_star.py"],
)
py_library(
name = "_from_csm_observability_import_star",
srcs = ["_from_csm_observability_import_star.py"],
)
py_test(
name = "_open_telemetry_observability_test",
size = "small",
@ -41,6 +46,38 @@ py_test(
],
)
py_test(
name = "_observability_plugin_test",
size = "small",
srcs = ["_observability_plugin_test.py"],
imports = ["../../"],
main = "_observability_plugin_test.py",
deps = [
":test_server",
"//src/python/grpcio/grpc:grpcio",
"//src/python/grpcio_observability/grpc_observability:pyobservability",
"//src/python/grpcio_tests/tests/testing",
requirement("protobuf"),
requirement("opentelemetry-sdk"),
],
)
py_test(
name = "_csm_observability_plugin_test",
size = "small",
srcs = ["_csm_observability_plugin_test.py"],
imports = ["../../"],
main = "_csm_observability_plugin_test.py",
deps = [
":test_server",
"//src/python/grpcio/grpc:grpcio",
"//src/python/grpcio_csm_observability/grpc_csm_observability:csm_observability",
"//src/python/grpcio_tests/tests/testing",
requirement("protobuf"),
requirement("opentelemetry-sdk"),
],
)
py_test(
name = "_observability_api_test",
size = "small",
@ -48,8 +85,10 @@ py_test(
imports = ["../../"],
main = "_observability_api_test.py",
deps = [
":_from_csm_observability_import_star",
":_from_observability_import_star",
"//src/python/grpcio/grpc:grpcio",
"//src/python/grpcio_csm_observability/grpc_csm_observability:csm_observability",
"//src/python/grpcio_observability/grpc_observability:pyobservability",
"//src/python/grpcio_tests/tests/testing",
],

@ -0,0 +1,652 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import datetime
import json
import logging
import os
import random
import sys
import time
from typing import Any, Callable, Dict, List, Optional, Set
import unittest
from unittest import mock
from grpc_csm_observability import CsmOpenTelemetryPlugin
from grpc_csm_observability._csm_observability_plugin import (
CSMOpenTelemetryLabelInjector,
)
from grpc_csm_observability._csm_observability_plugin import TYPE_GCE
from grpc_csm_observability._csm_observability_plugin import TYPE_GKE
from grpc_csm_observability._csm_observability_plugin import UNKNOWN_VALUE
import grpc_observability
from grpc_observability import _open_telemetry_measures
from grpc_observability._open_telemetry_plugin import OpenTelemetryLabelInjector
from grpc_observability._open_telemetry_plugin import OpenTelemetryPluginOption
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import AggregationTemporality
from opentelemetry.sdk.metrics.export import MetricExportResult
from opentelemetry.sdk.metrics.export import MetricExporter
from opentelemetry.sdk.metrics.export import MetricsData
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from tests.observability import _test_server
logger = logging.getLogger(__name__)
OTEL_EXPORT_INTERVAL_S = 0.5
# We only expect basic labels to be exchanged.
CSM_METADATA_EXCHANGE_DEFAULT_LABELS = [
"csm.remote_workload_type",
"csm.remote_workload_canonical_service",
]
# The following metrics should have optional labels when optional
# labels is enabled through OpenTelemetryPlugin.
METRIC_NAME_WITH_OPTIONAL_LABEL = [
"grpc.client.attempt.duration",
"grpc.client.attempt.sent_total_compressed_message_size",
"grpc.client.attempt.rcvd_total_compressed_message_size",
]
CSM_OPTIONAL_LABEL_KEYS = ["csm.service_name", "csm.service_namespace_name"]
# The following metrics should have metadata exchange labels when metadata
# exchange flow is triggered.
METRIC_NAME_WITH_EXCHANGE_LABEL = [
"grpc.client.attempt.duration",
"grpc.client.attempt.sent_total_compressed_message_size",
"grpc.client.attempt.rcvd_total_compressed_message_size",
"grpc.server.call.duration",
"grpc.server.call.sent_total_compressed_message_size",
"grpc.server.call.rcvd_total_compressed_message_size",
]
MOCK_GKE_RESOURCE = Resource.create(
attributes={
"gcp.resource_type": "gke_container",
"k8s.pod.name": "pod",
"k8s.container.name": "container",
"k8s.namespace.name": "namespace",
"k8s.cluster.name": "cluster",
"cloud.region": "region",
"cloud.account.id": "id",
}
)
MOCK_GCE_RESOURCE = Resource.create(
attributes={
"gcp.resource_type": "gce_instance",
"cloud.zone": "zone",
"cloud.account.id": "id",
}
)
MOCK_UNKNOWN_RESOURCE = Resource.create(
attributes={
"gcp.resource_type": "random",
}
)
class OTelMetricExporter(MetricExporter):
"""Implementation of :class:`MetricExporter` that export metrics to the
provided metric_list.
all_metrics: A dict which key is grpc_observability._opentelemetry_measures.Metric.name,
value is a list of labels recorded for that metric.
An example item of this dict:
{"grpc.client.attempt.started":
[{'grpc.method': 'test/UnaryUnary', 'grpc.target': 'localhost:42517'},
{'grpc.method': 'other', 'grpc.target': 'localhost:42517'}]}
"""
def __init__(
self,
all_metrics: Dict[str, List],
preferred_temporality: Dict[type, AggregationTemporality] = None,
preferred_aggregation: Dict[
type, "opentelemetry.sdk.metrics.view.Aggregation"
] = None,
):
super().__init__(
preferred_temporality=preferred_temporality,
preferred_aggregation=preferred_aggregation,
)
self.all_metrics = all_metrics
def export(
self,
metrics_data: MetricsData,
timeout_millis: float = 10_000,
**kwargs,
) -> MetricExportResult:
self.record_metric(metrics_data)
return MetricExportResult.SUCCESS
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
pass
def force_flush(self, timeout_millis: float = 10_000) -> bool:
return True
def record_metric(self, metrics_data: MetricsData) -> None:
for resource_metric in metrics_data.resource_metrics:
for scope_metric in resource_metric.scope_metrics:
for metric in scope_metric.metrics:
for data_point in metric.data.data_points:
self.all_metrics[metric.name].append(
data_point.attributes
)
class TestOpenTelemetryPluginOption(OpenTelemetryPluginOption):
_label_injector: OpenTelemetryLabelInjector
_active_on_client: bool
_active_on_server: bool
def __init__(
self,
label_injector: OpenTelemetryLabelInjector,
active_on_client: Optional[bool] = True,
active_on_server: Optional[bool] = True,
):
self._label_injector = label_injector
self._active_on_client = active_on_client
self._active_on_server = active_on_server
def is_active_on_client_channel(self, target: str) -> bool:
return self._active_on_client
def is_active_on_server(self, xds: bool) -> bool:
return self._active_on_server
def get_label_injector(self) -> OpenTelemetryLabelInjector:
return self._label_injector
@unittest.skipIf(
os.name == "nt" or "darwin" in sys.platform,
"Observability is not supported in Windows and MacOS",
)
class CSMObservabilityPluginTest(unittest.TestCase):
def setUp(self):
self.all_metrics = defaultdict(list)
otel_exporter = OTelMetricExporter(self.all_metrics)
reader = PeriodicExportingMetricReader(
exporter=otel_exporter,
export_interval_millis=OTEL_EXPORT_INTERVAL_S * 1000,
)
self._provider = MeterProvider(metric_readers=[reader])
self._server = None
self._port = None
def tearDown(self):
if self._server:
self._server.stop(0)
def testOptionalXdsServiceLabelExist(self):
csm_plugin = CsmOpenTelemetryPlugin(
meter_provider=self._provider,
)
csm_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
csm_plugin.deregister_global()
validate_metrics_exist(self, self.all_metrics)
for name, label_list in self.all_metrics.items():
if name in METRIC_NAME_WITH_OPTIONAL_LABEL:
self._validate_label_exist(
name, label_list, CSM_OPTIONAL_LABEL_KEYS
)
else:
self._validate_label_not_exist(
name, label_list, CSM_OPTIONAL_LABEL_KEYS
)
def testPluginOptionOnlyEnabledForXdsTargets(self):
csm_plugin = CsmOpenTelemetryPlugin(
meter_provider=self._provider,
)
csm_plugin_option = csm_plugin.plugin_options[0]
self.assertFalse(
csm_plugin_option.is_active_on_client_channel("foo.bar.google.com")
)
self.assertFalse(
csm_plugin_option.is_active_on_client_channel(
"dns:///foo.bar.google.com"
)
)
self.assertFalse(
csm_plugin_option.is_active_on_client_channel(
"dns:///foo.bar.google.com:1234"
)
)
self.assertFalse(
csm_plugin_option.is_active_on_client_channel(
"dns://authority/foo.bar.google.com:1234"
)
)
self.assertFalse(
csm_plugin_option.is_active_on_client_channel("xds://authority/foo")
)
self.assertTrue(
csm_plugin_option.is_active_on_client_channel("xds:///foo")
)
self.assertTrue(
csm_plugin_option.is_active_on_client_channel(
"xds://traffic-director-global.xds.googleapis.com/foo"
)
)
self.assertTrue(
csm_plugin_option.is_active_on_client_channel(
"xds://traffic-director-global.xds.googleapis.com/foo.bar"
)
)
def testGetMeshIdFromConfig(self):
config_json = {
"node": {
"id": "projects/12345/networks/mesh:test_mesh_id/nodes/abcdefg"
}
}
config_str = json.dumps(config_json)
with mock.patch.dict(
os.environ, {"GRPC_XDS_BOOTSTRAP_CONFIG": config_str}
):
csm_plugin = CsmOpenTelemetryPlugin(
meter_provider=self._provider,
)
csm_label_injector = csm_plugin.plugin_options[
0
].get_label_injector()
additional_labels = csm_label_injector.get_additional_labels(
include_exchange_labels=True
)
self.assertEqual(additional_labels["csm.mesh_id"], "test_mesh_id")
def testGetMeshIdFromFile(self):
config_json = {
"node": {
"id": "projects/12345/networks/mesh:test_mesh_id/nodes/abcdefg"
}
}
config_file_path = "/tmp/" + str(random.randint(0, 100000))
with open(config_file_path, "w", encoding="utf-8") as f:
f.write(json.dumps(config_json))
with mock.patch.dict(
os.environ, {"GRPC_XDS_BOOTSTRAP": config_file_path}
):
csm_plugin = CsmOpenTelemetryPlugin(
meter_provider=self._provider,
)
csm_label_injector = csm_plugin.plugin_options[
0
].get_label_injector()
additional_labels = csm_label_injector.get_additional_labels(
include_exchange_labels=True
)
self.assertEqual(additional_labels["csm.mesh_id"], "test_mesh_id")
def testGetMeshIdFromInvalidConfig(self):
config_json = {"node": {"id": "12345"}}
config_str = json.dumps(config_json)
with mock.patch.dict(
os.environ, {"GRPC_XDS_BOOTSTRAP_CONFIG": config_str}
):
csm_plugin = CsmOpenTelemetryPlugin(
meter_provider=self._provider,
)
csm_label_injector = csm_plugin.plugin_options[
0
].get_label_injector()
additional_labels = csm_label_injector.get_additional_labels(
include_exchange_labels=True
)
self.assertEqual(additional_labels["csm.mesh_id"], "unknown")
def _validate_all_metrics_names(self, metric_names: Set[str]) -> None:
self._validate_server_metrics_names(metric_names)
self._validate_client_metrics_names(metric_names)
def _validate_server_metrics_names(self, metric_names: Set[str]) -> None:
for base_metric in _open_telemetry_measures.base_metrics():
if "grpc.server" in base_metric.name:
self.assertTrue(
base_metric.name in metric_names,
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!",
)
def _validate_client_metrics_names(self, metric_names: Set[str]) -> None:
for base_metric in _open_telemetry_measures.base_metrics():
if "grpc.client" in base_metric.name:
self.assertTrue(
base_metric.name in metric_names,
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!",
)
def _validate_label_exist(
self,
metric_name: str,
metric_label_list: List[str],
labels_to_check: List[str],
) -> None:
for metric_label in metric_label_list:
for label in labels_to_check:
self.assertTrue(
label in metric_label,
msg=f"label with key {label} not found in metric {metric_name}, found label list: {metric_label}",
)
def _validate_label_not_exist(
self,
metric_name: str,
metric_label_list: List[str],
labels_to_check: List[str],
) -> None:
for metric_label in metric_label_list:
for label in labels_to_check:
self.assertFalse(
label in metric_label,
msg=f"found unexpected label with key {label} in metric {metric_name}, found label list: {metric_label}",
)
@unittest.skipIf(
os.name == "nt" or "darwin" in sys.platform,
"Observability is not supported in Windows and MacOS",
)
class MetadataExchangeTest(unittest.TestCase):
def setUp(self):
self.all_metrics = defaultdict(list)
otel_exporter = OTelMetricExporter(self.all_metrics)
reader = PeriodicExportingMetricReader(
exporter=otel_exporter,
export_interval_millis=OTEL_EXPORT_INTERVAL_S * 1000,
)
self._provider = MeterProvider(metric_readers=[reader])
self._server = None
self._port = None
def tearDown(self):
if self._server:
self._server.stop(0)
@mock.patch(
"opentelemetry.resourcedetector.gcp_resource_detector.GoogleCloudResourceDetector.detect"
)
def testMetadataExchangeClientDoesNotSendMetadata(self, mock_detector):
mock_detector.return_value = MOCK_GKE_RESOURCE
with mock.patch.dict(
os.environ,
{
"CSM_CANONICAL_SERVICE_NAME": "canonical_service",
"CSM_WORKLOAD_NAME": "workload",
},
):
plugin_option = TestOpenTelemetryPluginOption(
label_injector=CSMOpenTelemetryLabelInjector(),
active_on_client=False,
)
# Manually create csm_plugin so that it's always disabled on client.
csm_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
csm_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
csm_plugin.deregister_global()
validate_metrics_exist(self, self.all_metrics)
for name, label_list in self.all_metrics.items():
for labels in label_list:
# Verifies that the server records unknown when the client does not send metadata
if name in ["grpc.server.call.duration"]:
self.assertEqual(
labels["csm.workload_canonical_service"],
"canonical_service",
)
self.assertEqual(
labels["csm.remote_workload_canonical_service"],
"unknown",
)
# Client metric should not have CSM labels.
elif "grpc.client" in name:
self.assertTrue(
"csm.workload_canonical_service" not in labels.keys()
)
self.assertTrue(
"csm.remote_workload_canonical_service"
not in labels.keys()
)
@mock.patch(
"opentelemetry.resourcedetector.gcp_resource_detector.GoogleCloudResourceDetector.detect"
)
def testResourceDetectorGCE(self, mock_detector):
mock_detector.return_value = MOCK_GCE_RESOURCE
with mock.patch.dict(
os.environ,
{
"CSM_CANONICAL_SERVICE_NAME": "canonical_service",
"CSM_WORKLOAD_NAME": "workload",
},
):
plugin_option = TestOpenTelemetryPluginOption(
label_injector=CSMOpenTelemetryLabelInjector(),
)
# Have to manually create csm_plugin so that we can enable it for all
# channels.
csm_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
csm_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
_test_server.unary_unary_call(port=port)
csm_plugin.deregister_global()
validate_metrics_exist(self, self.all_metrics)
for name, label_list in self.all_metrics.items():
# started metrics shouldn't have any csm labels.
if name in [
"grpc.client.attempt.started",
"grpc.server.call.started",
]:
self._verify_no_service_mesh_attributes(label_list)
# duration metrics should have all csm related labels.
elif name in [
"grpc.client.attempt.duration",
"grpc.server.call.duration",
]:
self._verify_service_mesh_attributes(label_list, TYPE_GCE)
@mock.patch(
"opentelemetry.resourcedetector.gcp_resource_detector.GoogleCloudResourceDetector.detect"
)
def testResourceDetectorGKE(self, mock_detector):
mock_detector.return_value = MOCK_GKE_RESOURCE
with mock.patch.dict(
os.environ,
{
"CSM_CANONICAL_SERVICE_NAME": "canonical_service",
"CSM_WORKLOAD_NAME": "workload",
},
):
plugin_option = TestOpenTelemetryPluginOption(
label_injector=CSMOpenTelemetryLabelInjector()
)
# Have to manually create csm_plugin so that we can enable it for all
# channels.
csm_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
csm_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
csm_plugin.deregister_global()
validate_metrics_exist(self, self.all_metrics)
for name, label_list in self.all_metrics.items():
# started metrics shouldn't have any csm labels.
if name in [
"grpc.client.attempt.started",
"grpc.server.call.started",
]:
self._verify_no_service_mesh_attributes(label_list)
# duration metrics should have all csm related labels.
elif name in [
"grpc.client.attempt.duration",
"grpc.server.call.duration",
]:
self._verify_service_mesh_attributes(label_list, TYPE_GKE)
@mock.patch(
"opentelemetry.resourcedetector.gcp_resource_detector.GoogleCloudResourceDetector.detect"
)
def testResourceDetectorUnknown(self, mock_detector):
mock_detector.return_value = MOCK_UNKNOWN_RESOURCE
with mock.patch.dict(
os.environ,
{
"CSM_CANONICAL_SERVICE_NAME": "canonical_service",
"CSM_WORKLOAD_NAME": "workload",
},
):
plugin_option = TestOpenTelemetryPluginOption(
label_injector=CSMOpenTelemetryLabelInjector()
)
# Have to manually create csm_plugin so that we can enable it for all
# channels.
csm_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
csm_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
csm_plugin.deregister_global()
validate_metrics_exist(self, self.all_metrics)
for name, label_list in self.all_metrics.items():
# started metrics shouldn't have any csm labels.
if name in [
"grpc.client.attempt.started",
"grpc.server.call.started",
]:
self._verify_no_service_mesh_attributes(label_list)
# duration metrics should have all csm related labels.
elif name in [
"grpc.client.attempt.duration",
"grpc.server.call.duration",
]:
self._verify_service_mesh_attributes(label_list, UNKNOWN_VALUE)
def _verify_service_mesh_attributes(
self, label_list: List[Dict[str, str]], resource_type: str
):
for labels in label_list:
# Assuming attributes is a dictionary
self.assertEqual(
labels["csm.workload_canonical_service"], "canonical_service"
)
self.assertEqual(
labels["csm.remote_workload_canonical_service"],
"canonical_service",
)
if resource_type == TYPE_GKE:
self.assertEqual(
labels["csm.remote_workload_type"], "gcp_kubernetes_engine"
)
self.assertEqual(labels["csm.remote_workload_name"], "workload")
self.assertEqual(
labels["csm.remote_workload_namespace_name"], "namespace"
)
self.assertEqual(
labels["csm.remote_workload_cluster_name"], "cluster"
)
self.assertEqual(
labels["csm.remote_workload_location"], "region"
)
self.assertEqual(labels["csm.remote_workload_project_id"], "id")
elif resource_type == TYPE_GCE:
self.assertEqual(
labels["csm.remote_workload_type"], "gcp_compute_engine"
)
self.assertEqual(labels["csm.remote_workload_name"], "workload")
self.assertEqual(labels["csm.remote_workload_location"], "zone")
self.assertEqual(labels["csm.remote_workload_project_id"], "id")
elif resource_type == UNKNOWN_VALUE:
self.assertEqual(labels["csm.remote_workload_type"], "random")
def _verify_no_service_mesh_attributes(
self, label_list: List[Dict[str, str]]
):
for labels in label_list:
self.assertTrue(
"csm.remote_workload_canonical_service" not in labels.keys()
)
self.assertTrue("csm.remote_workload_type" not in labels.keys())
self.assertTrue(
"csm.workload_canonical_service" not in labels.keys()
)
self.assertTrue("csm.workload_type" not in labels.keys())
self.assertTrue("csm.mesh_id" not in labels.keys())
def validate_metrics_exist(
testCase: unittest.TestCase, all_metrics: Dict[str, Any]
) -> None:
# Sleep here to make sure we have at least one export from OTel MetricExporter.
assert_eventually(
testCase=testCase,
predicate=lambda: len(all_metrics.keys()) > 1,
message=lambda: f"No metrics was exported",
)
def assert_eventually(
testCase: unittest.TestCase,
predicate: Callable[[], bool],
*,
timeout: Optional[datetime.timedelta] = None,
message: Optional[Callable[[], str]] = None,
) -> None:
message = message or (lambda: "Proposition did not evaluate to true")
timeout = timeout or datetime.timedelta(seconds=5)
end = datetime.datetime.now() + timeout
while datetime.datetime.now() < end:
if predicate():
break
time.sleep(0.5)
else:
testCase.fail(message() + " after " + str(timeout))
if __name__ == "__main__":
logging.basicConfig()
unittest.main(verbosity=2)

@ -0,0 +1,25 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
_BEFORE_IMPORT = tuple(globals())
from grpc_csm_observability import * # pylint: disable=wildcard-import,unused-wildcard-import
_AFTER_IMPORT = tuple(globals())
GRPC_CSM_OBSERVABILITY_ELEMENTS = tuple(
element
for element in _AFTER_IMPORT
if element not in _BEFORE_IMPORT and element != "_BEFORE_IMPORT"
)

@ -16,11 +16,12 @@
import logging
import unittest
from tests.observability import _from_csm_observability_import_star
from tests.observability import _from_observability_import_star
class AllTest(unittest.TestCase):
def testAll(self):
def testBaseOtel(self):
expected_observability_code_elements = ("OpenTelemetryPlugin",)
self.assertCountEqual(
@ -28,6 +29,14 @@ class AllTest(unittest.TestCase):
_from_observability_import_star.GRPC_OBSERVABILITY_ELEMENTS,
)
def testCSMOtel(self):
expected_csm_observability_code_elements = ("CsmOpenTelemetryPlugin",)
self.assertCountEqual(
expected_csm_observability_code_elements,
_from_csm_observability_import_star.GRPC_CSM_OBSERVABILITY_ELEMENTS,
)
if __name__ == "__main__":
logging.basicConfig()

@ -0,0 +1,363 @@
# Copyright 2024 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import datetime
import logging
import os
import sys
import time
from typing import Any, AnyStr, Callable, Dict, List, Optional, Set
import unittest
from google.protobuf import struct_pb2
import grpc_observability
from grpc_observability import _open_telemetry_measures
from grpc_observability._open_telemetry_plugin import OpenTelemetryLabelInjector
from grpc_observability._open_telemetry_plugin import OpenTelemetryPluginOption
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import AggregationTemporality
from opentelemetry.sdk.metrics.export import MetricExportResult
from opentelemetry.sdk.metrics.export import MetricExporter
from opentelemetry.sdk.metrics.export import MetricsData
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from tests.observability import _test_server
logger = logging.getLogger(__name__)
STREAM_LENGTH = 5
OTEL_EXPORT_INTERVAL_S = 0.5
CSM_METADATA_EXCHANGE_LABEL_KEY = "exchange_labels_key"
# The following metrics should have optional labels when optional
# labels is enabled through OpenTelemetryPlugin.
METRIC_NAME_WITH_OPTIONAL_LABEL = [
"grpc.client.attempt.duration"
"grpc.client.attempt.sent_total_compressed_message_size",
"grpc.client.attempt.rcvd_total_compressed_message_size",
]
CSM_OPTIONAL_LABEL_KEYS = ["csm.service_name", "csm.service_namespace_name"]
# The following metrics should have metadata exchange labels when metadata
# exchange flow is triggered.
METRIC_NAME_WITH_EXCHANGE_LABEL = [
"grpc.client.attempt.duration"
"grpc.client.attempt.sent_total_compressed_message_size",
"grpc.client.attempt.rcvd_total_compressed_message_size",
"grpc.server.call.duration",
"grpc.server.call.sent_total_compressed_message_size",
"grpc.server.call.rcvd_total_compressed_message_size",
]
class OTelMetricExporter(MetricExporter):
"""Implementation of :class:`MetricExporter` that export metrics to the
provided metric_list.
all_metrics: A dict which key is grpc_observability._opentelemetry_measures.Metric.name,
value is a list of labels recorded for that metric.
An example item of this dict:
{"grpc.client.attempt.started":
[{'grpc.method': 'test/UnaryUnary', 'grpc.target': 'localhost:42517'},
{'grpc.method': 'other', 'grpc.target': 'localhost:42517'}]}
"""
def __init__(
self,
all_metrics: Dict[str, List],
preferred_temporality: Dict[type, AggregationTemporality] = None,
preferred_aggregation: Dict[
type, "opentelemetry.sdk.metrics.view.Aggregation"
] = None,
):
super().__init__(
preferred_temporality=preferred_temporality,
preferred_aggregation=preferred_aggregation,
)
self.all_metrics = all_metrics
def export(
self,
metrics_data: MetricsData,
timeout_millis: float = 10_000,
**kwargs,
) -> MetricExportResult:
self.record_metric(metrics_data)
return MetricExportResult.SUCCESS
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None:
pass
def force_flush(self, timeout_millis: float = 10_000) -> bool:
return True
def record_metric(self, metrics_data: MetricsData) -> None:
for resource_metric in metrics_data.resource_metrics:
for scope_metric in resource_metric.scope_metrics:
for metric in scope_metric.metrics:
for data_point in metric.data.data_points:
self.all_metrics[metric.name].append(
data_point.attributes
)
class TestLabelInjector(OpenTelemetryLabelInjector):
_exchange_labels: Dict[str, AnyStr]
_local_labels: Dict[str, str]
def __init__(
self, local_labels: Dict[str, str], exchange_labels: Dict[str, str]
):
self._exchange_labels = exchange_labels
self._local_labels = local_labels
def get_labels_for_exchange(self) -> Dict[str, AnyStr]:
return self._exchange_labels
def get_additional_labels(
self, include_exchange_labels: bool
) -> Dict[str, str]:
return self._local_labels
def deserialize_labels(
self, labels: Dict[str, AnyStr]
) -> Dict[str, AnyStr]:
deserialized_labels = {}
for key, value in labels.items():
if "XEnvoyPeerMetadata" == key:
struct = struct_pb2.Struct()
struct.ParseFromString(value)
exchange_labels_value = self._get_value_from_struct(
CSM_METADATA_EXCHANGE_LABEL_KEY, struct
)
deserialized_labels[
CSM_METADATA_EXCHANGE_LABEL_KEY
] = exchange_labels_value
else:
deserialized_labels[key] = value
return deserialized_labels
def _get_value_from_struct(
self, key: str, struct: struct_pb2.Struct
) -> str:
value = struct.fields.get(key)
if not value:
return "unknown"
return value.string_value
class TestOpenTelemetryPluginOption(OpenTelemetryPluginOption):
_label_injector: OpenTelemetryLabelInjector
_active_on_client: bool
_active_on_server: bool
def __init__(
self,
label_injector: OpenTelemetryLabelInjector,
active_on_client: Optional[bool] = True,
active_on_server: Optional[bool] = True,
):
self._label_injector = label_injector
self._active_on_client = active_on_client
self._active_on_server = active_on_server
def is_active_on_client_channel(self, target: str) -> bool:
return self._active_on_client
def is_active_on_server(self, xds: bool) -> bool:
return self._active_on_server
def get_label_injector(self) -> OpenTelemetryLabelInjector:
return self._label_injector
@unittest.skipIf(
os.name == "nt" or "darwin" in sys.platform,
"Observability is not supported in Windows and MacOS",
)
class ObservabilityPluginTest(unittest.TestCase):
def setUp(self):
self.all_metrics = defaultdict(list)
otel_exporter = OTelMetricExporter(self.all_metrics)
reader = PeriodicExportingMetricReader(
exporter=otel_exporter,
export_interval_millis=OTEL_EXPORT_INTERVAL_S * 1000,
)
self._provider = MeterProvider(metric_readers=[reader])
self._server = None
self._port = None
def tearDown(self):
if self._server:
self._server.stop(0)
def testLabelInjectorWithLocalLabels(self):
"""Local labels in label injector should be added to all metrics."""
label_injector = TestLabelInjector(
local_labels={"local_labels_key": "local_labels_value"},
exchange_labels={},
)
plugin_option = TestOpenTelemetryPluginOption(
label_injector=label_injector
)
otel_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
otel_plugin.register_global()
self._server, port = _test_server.start_server()
_test_server.unary_unary_call(port=port)
otel_plugin.deregister_global()
self._validate_metrics_exist(self.all_metrics)
for name, label_list in self.all_metrics.items():
self._validate_label_exist(name, label_list, ["local_labels_key"])
def testClientSidePluginOption(self):
label_injector = TestLabelInjector(
local_labels={"local_labels_key": "local_labels_value"},
exchange_labels={},
)
plugin_option = TestOpenTelemetryPluginOption(
label_injector=label_injector, active_on_server=False
)
otel_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
otel_plugin.register_global()
server, port = _test_server.start_server()
self._server = server
_test_server.unary_unary_call(port=port)
otel_plugin.deregister_global()
self._validate_metrics_exist(self.all_metrics)
for name, label_list in self.all_metrics.items():
if "grpc.client" in name:
self._validate_label_exist(
name, label_list, ["local_labels_key"]
)
for name, label_list in self.all_metrics.items():
if "grpc.server" in name:
self._validate_label_not_exist(
name, label_list, ["local_labels_key"]
)
def testServerSidePluginOption(self):
label_injector = TestLabelInjector(
local_labels={"local_labels_key": "local_labels_value"},
exchange_labels={},
)
plugin_option = TestOpenTelemetryPluginOption(
label_injector=label_injector, active_on_client=False
)
otel_plugin = grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider, plugin_options=[plugin_option]
)
otel_plugin.register_global()
server, port = _test_server.start_server()
self._server = server
_test_server.unary_unary_call(port=port)
otel_plugin.deregister_global()
self._validate_metrics_exist(self.all_metrics)
for name, label_list in self.all_metrics.items():
if "grpc.client" in name:
self._validate_label_not_exist(
name, label_list, ["local_labels_key"]
)
for name, label_list in self.all_metrics.items():
if "grpc.server" in name:
self._validate_label_exist(
name, label_list, ["local_labels_key"]
)
def assert_eventually(
self,
predicate: Callable[[], bool],
*,
timeout: Optional[datetime.timedelta] = None,
message: Optional[Callable[[], str]] = None,
) -> None:
message = message or (lambda: "Proposition did not evaluate to true")
timeout = timeout or datetime.timedelta(seconds=5)
end = datetime.datetime.now() + timeout
while datetime.datetime.now() < end:
if predicate():
break
time.sleep(0.5)
else:
self.fail(message() + " after " + str(timeout))
def _validate_metrics_exist(self, all_metrics: Dict[str, Any]) -> None:
# Sleep here to make sure we have at least one export from OTel MetricExporter.
self.assert_eventually(
lambda: len(all_metrics.keys()) > 1,
message=lambda: f"No metrics was exported",
)
def _validate_all_metrics_names(self, metric_names: Set[str]) -> None:
self._validate_server_metrics_names(metric_names)
self._validate_client_metrics_names(metric_names)
def _validate_server_metrics_names(self, metric_names: Set[str]) -> None:
for base_metric in _open_telemetry_measures.base_metrics():
if "grpc.server" in base_metric.name:
self.assertTrue(
base_metric.name in metric_names,
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!",
)
def _validate_client_metrics_names(self, metric_names: Set[str]) -> None:
for base_metric in _open_telemetry_measures.base_metrics():
if "grpc.client" in base_metric.name:
self.assertTrue(
base_metric.name in metric_names,
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!",
)
def _validate_label_exist(
self,
metric_name: str,
metric_label_list: List[str],
labels_to_check: List[str],
) -> None:
for metric_label in metric_label_list:
for label in labels_to_check:
self.assertTrue(
label in metric_label,
msg=f"label with key {label} not found in metric {metric_name}, found label list: {metric_label}",
)
def _validate_label_not_exist(
self,
metric_name: str,
metric_label_list: List[str],
labels_to_check: List[str],
) -> None:
for metric_label in metric_label_list:
for label in labels_to_check:
self.assertFalse(
label in metric_label,
msg=f"found unexpected label with key {label} in metric {metric_name}, found label list: {metric_label}",
)
if __name__ == "__main__":
logging.basicConfig()
unittest.main(verbosity=2)

@ -137,7 +137,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testRecordUnaryUnaryUseGlobalInit(self):
otel_plugin = grpc_observability.OpenTelemetryPlugin(
@ -150,7 +150,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
otel_plugin.deregister_global()
def testCallGlobalInitThrowErrorWhenGlobalCalled(self):
@ -248,7 +248,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testRecordUnaryUnaryWithServerInterceptor(self):
interceptor = _ServerInterceptor()
@ -260,7 +260,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testRecordUnaryUnaryClientOnly(self):
server, port = _test_server.start_server()
@ -288,7 +288,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testNoRecordAfterExitUseContextManager(self):
with grpc_observability.OpenTelemetryPlugin(
@ -300,7 +300,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
self.all_metrics = defaultdict(list)
_test_server.unary_unary_call(port=self._port)
@ -320,7 +320,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
otel_plugin.deregister_global()
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
self.all_metrics = defaultdict(list)
_test_server.unary_unary_call(port=self._port)
@ -336,7 +336,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_stream_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testRecordStreamUnary(self):
with grpc_observability.OpenTelemetryPlugin(
@ -347,7 +347,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.stream_unary_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testRecordStreamStream(self):
with grpc_observability.OpenTelemetryPlugin(
@ -358,7 +358,7 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.stream_stream_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
def testTargetAttributeFilter(self):
main_server, main_port = _test_server.start_server()
@ -405,13 +405,13 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
meter_provider=self._provider,
generic_method_attribute_filter=method_filter,
):
server, port = _test_server.start_server()
server, port = _test_server.start_server(register_method=False)
self._server = server
_test_server.unary_unary_call(port=port, registered_method=True)
_test_server.unary_unary_filtered_call(port=port)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
method_values = set()
for label_list in self.all_metrics.values():
for labels in label_list:
@ -420,7 +420,36 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
self.assertTrue(GRPC_OTHER_LABEL_VALUE in method_values)
self.assertTrue(FILTERED_METHOD_NAME not in method_values)
def testNonRegisteredMethod(self):
def testClientNonRegisteredMethod(self):
UNARY_METHOD_NAME = "test/UnaryUnary"
with grpc_observability.OpenTelemetryPlugin(
meter_provider=self._provider
):
server, port = _test_server.start_server(register_method=True)
self._server = server
_test_server.unary_unary_call(port=port, registered_method=False)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics.keys())
client_method_values = set()
server_method_values = set()
for metric_name, label_list in self.all_metrics.items():
for labels in label_list:
if GRPC_METHOD_LABEL in labels:
if "grpc.client" in metric_name:
client_method_values.add(labels[GRPC_METHOD_LABEL])
elif "grpc.server" in metric_name:
server_method_values.add(labels[GRPC_METHOD_LABEL])
# For client metrics, all method name should be replaced with 'other'.
self.assertTrue(GRPC_OTHER_LABEL_VALUE in client_method_values)
self.assertTrue(UNARY_METHOD_NAME not in client_method_values)
# For server metrics, all method name should be 'test/UnaryUnary'.
self.assertTrue(GRPC_OTHER_LABEL_VALUE not in server_method_values)
self.assertTrue(UNARY_METHOD_NAME in server_method_values)
def testServerNonRegisteredMethod(self):
UNARY_METHOD_NAME = "test/UnaryUnary"
with grpc_observability.OpenTelemetryPlugin(
@ -431,15 +460,23 @@ class OpenTelemetryObservabilityTest(unittest.TestCase):
_test_server.unary_unary_call(port=port, registered_method=True)
self._validate_metrics_exist(self.all_metrics)
self._validate_all_metrics_names(self.all_metrics)
method_values = set()
for label_list in self.all_metrics.values():
self._validate_all_metrics_names(self.all_metrics.keys())
client_method_values = set()
server_method_values = set()
for metric_name, label_list in self.all_metrics.items():
for labels in label_list:
if GRPC_METHOD_LABEL in labels:
method_values.add(labels[GRPC_METHOD_LABEL])
# For un-registered method, all method name should be replaced with 'other'.
self.assertTrue(GRPC_OTHER_LABEL_VALUE in method_values)
self.assertTrue(UNARY_METHOD_NAME not in method_values)
if "grpc.client" in metric_name:
client_method_values.add(labels[GRPC_METHOD_LABEL])
elif "grpc.server" in metric_name:
server_method_values.add(labels[GRPC_METHOD_LABEL])
# For client metrics, all method name should be 'test/UnaryUnary'.
self.assertTrue(GRPC_OTHER_LABEL_VALUE not in client_method_values)
self.assertTrue(UNARY_METHOD_NAME in client_method_values)
# For server metrics, all method name should be replaced with 'other'.
self.assertTrue(GRPC_OTHER_LABEL_VALUE in server_method_values)
self.assertTrue(UNARY_METHOD_NAME not in server_method_values)
def assert_eventually(
self,

@ -99,6 +99,7 @@ class _GenericHandler(grpc.GenericRpcHandler):
RPC_METHOD_HANDLERS = {
_UNARY_UNARY_FILTERED: _MethodHandler(False, False),
_UNARY_UNARY: _MethodHandler(False, False),
_UNARY_STREAM: _MethodHandler(False, True),
_STREAM_UNARY: _MethodHandler(True, False),

@ -9,7 +9,10 @@
"tests.health_check._health_servicer_test.HealthServicerTest",
"tests.interop._insecure_intraop_test.InsecureIntraopTest",
"tests.interop._secure_intraop_test.SecureIntraopTest",
"tests.observability._csm_observability_plugin_test.CSMObservabilityPluginTest",
"tests.observability._csm_observability_plugin_test.MetadataExchangeTest",
"tests.observability._observability_api_test.AllTest",
"tests.observability._observability_plugin_test.ObservabilityPluginTest",
"tests.observability._open_telemetry_observability_test.OpenTelemetryObservabilityTest",
"tests.protoc_plugin._python_plugin_test.ModuleMainTest",
"tests.protoc_plugin._python_plugin_test.PythonPluginTest",

@ -0,0 +1,19 @@
%YAML 1.2
--- |
# Copyright 2024 The gRPC Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# AUTO-GENERATED FROM `$REPO_ROOT/templates/src/python/grpcio_csm_observability/grpc_version.py.template`!!!
VERSION = '${settings.python_version.pep440()}'

@ -28,7 +28,7 @@ set -e
BASEDIR=$(dirname "$0")
BASEDIR=$(realpath "$BASEDIR")/../..
PACKAGES="grpcio_channelz grpcio_csds grpcio_admin grpcio_health_checking grpcio_reflection grpcio_status grpcio_testing grpcio_tests"
PACKAGES="grpcio_channelz grpcio_csds grpcio_admin grpcio_health_checking grpcio_reflection grpcio_status grpcio_testing grpcio_csm_observability grpcio_tests"
(cd "$BASEDIR";
pip install --upgrade "cython<3.0.0rc1";

@ -29,6 +29,7 @@ DIRS=(
'src/python/grpcio_testing/grpc_testing'
'src/python/grpcio_status/grpc_status'
'src/python/grpcio_observability/grpc_observability'
'src/python/grpcio_csm_observability/grpc_csm_observability'
)
TEST_DIRS=(

@ -92,6 +92,7 @@ ancillary_package_dir=(
"src/python/grpcio_status/"
"src/python/grpcio_testing/"
"src/python/grpcio_observability/"
"src/python/grpcio_csm_observability/"
)
# Copy license to ancillary package directories so it will be distributed.
@ -240,6 +241,13 @@ if [ "$GRPC_BUILD_MAC" == "" ]; then
cp -r src/python/grpcio_observability/dist/*.whl "$ARTIFACT_DIR"
fi
cp -r src/python/grpcio_observability/dist/*.tar.gz "$ARTIFACT_DIR"
# Build grpcio_csm_observability distribution
if [ "$GRPC_BUILD_MAC" == "" ]; then
${SETARCH_CMD} "${PYTHON}" src/python/grpcio_csm_observability/setup.py \
sdist bdist_wheel
cp -r src/python/grpcio_csm_observability/dist/* "$ARTIFACT_DIR"
fi
fi
# We need to use the built grpcio-tools/grpcio to compile the health proto

@ -178,6 +178,7 @@ if [ "$(is_mingw)" ] || [ "$(is_darwin)" ]; then
else
$VENV_PYTHON "$ROOT/src/python/grpcio_observability/make_grpcio_observability.py"
pip_install_dir_and_deps "$ROOT/src/python/grpcio_observability"
pip_install_dir_and_deps "$ROOT/src/python/grpcio_csm_observability"
fi
# Build/install Channelz

Loading…
Cancel
Save