mirror of https://github.com/grpc/grpc.git
[Python Otel] Re-apply Otel (#35439)
This reverts commitpull/35459/head96b9e8d3e3
. [Implement OpenTelemetry PR](https://github.com/grpc/grpc/pull/35292) was [reverted](96b9e8d3e3
) because some tests started failing after import the changes to g3. After investigation, we found root cause, it can be fixed both on our side and on gapic API side, we opened an issue to [gapic API team](https://github.com/googleapis/python-api-core/issues/579), this PR will includes the fixes on our side. <!-- If you know who should review your pull request, please assign it to that person, otherwise the pull request would get assigned randomly. If your pull request is for a specific language, please add the appropriate lang label. --> Closes #35439 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/35439 from XuanWang-Amos:reapply_otel0133564438
PiperOrigin-RevId: 595746222
parent
808886375d
commit
48cf940fd1
30 changed files with 1318 additions and 294 deletions
@ -0,0 +1,38 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from typing import Iterable, List |
||||
|
||||
from grpc_observability import _observability # pytype: disable=pyi-error |
||||
from grpc_observability._open_telemetry_plugin import _OpenTelemetryPlugin |
||||
|
||||
|
||||
class _OpenTelemetryExporterDelegator(_observability.Exporter): |
||||
_plugins: Iterable[_OpenTelemetryPlugin] |
||||
|
||||
def __init__(self, plugins: Iterable[_OpenTelemetryPlugin]): |
||||
self._plugins = plugins |
||||
|
||||
def export_stats_data( |
||||
self, stats_data: List[_observability.StatsData] |
||||
) -> None: |
||||
# Records stats data to MeterProvider. |
||||
for data in stats_data: |
||||
for plugin in self._plugins: |
||||
plugin.maybe_record_stats_data(data) |
||||
|
||||
def export_tracing_data( |
||||
self, tracing_data: List[_observability.TracingData] |
||||
) -> None: |
||||
pass |
@ -0,0 +1,97 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import collections |
||||
from typing import List |
||||
|
||||
from grpc_observability._cyobservability import MetricsName |
||||
|
||||
|
||||
class Metric( |
||||
collections.namedtuple( |
||||
"Metric", |
||||
["name", "cyname", "unit", "description"], |
||||
) |
||||
): |
||||
pass |
||||
|
||||
|
||||
CLIENT_ATTEMPT_STARTED = Metric( |
||||
"grpc.client.attempt.started", |
||||
MetricsName.CLIENT_STARTED_RPCS, |
||||
"{attempt}", |
||||
"Number of client call attempts started", |
||||
) |
||||
CLIENT_ATTEMPT_DURATION = Metric( |
||||
"grpc.client.attempt.duration", |
||||
MetricsName.CLIENT_ROUNDTRIP_LATENCY, |
||||
"s", |
||||
"End-to-end time taken to complete a client call attempt", |
||||
) |
||||
CLIENT_RPC_DURATION = Metric( |
||||
"grpc.client.call.duration", |
||||
MetricsName.CLIENT_API_LATENCY, |
||||
"s", |
||||
"End-to-end time taken to complete a call from client's perspective", |
||||
) |
||||
CLIENT_ATTEMPT_SEND_BYTES = Metric( |
||||
"grpc.client.attempt.sent_total_compressed_message_size", |
||||
MetricsName.CLIENT_SEND_BYTES_PER_RPC, |
||||
"By", |
||||
"Compressed message bytes sent per client call attempt", |
||||
) |
||||
CLIENT_ATTEMPT_RECEIVED_BYTES = Metric( |
||||
"grpc.client.attempt.rcvd_total_compressed_message_size", |
||||
MetricsName.CLIENT_RECEIVED_BYTES_PER_RPC, |
||||
"By", |
||||
"Compressed message bytes received per call attempt", |
||||
) |
||||
SERVER_STARTED_RPCS = Metric( |
||||
"grpc.server.call.started", |
||||
MetricsName.SERVER_STARTED_RPCS, |
||||
"{call}", |
||||
"Number of server calls started", |
||||
) |
||||
SERVER_RPC_DURATION = Metric( |
||||
"grpc.server.call.duration", |
||||
MetricsName.SERVER_SERVER_LATENCY, |
||||
"s", |
||||
"End-to-end time taken to complete a call from server transport's perspective", |
||||
) |
||||
SERVER_RPC_SEND_BYTES = Metric( |
||||
"grpc.server.call.sent_total_compressed_message_size", |
||||
MetricsName.SERVER_SENT_BYTES_PER_RPC, |
||||
"By", |
||||
"Compressed message bytes sent per server call", |
||||
) |
||||
SERVER_RPC_RECEIVED_BYTES = Metric( |
||||
"grpc.server.call.rcvd_total_compressed_message_size", |
||||
MetricsName.SERVER_RECEIVED_BYTES_PER_RPC, |
||||
"By", |
||||
"Compressed message bytes received per server call", |
||||
) |
||||
|
||||
|
||||
def base_metrics() -> List[Metric]: |
||||
return [ |
||||
CLIENT_ATTEMPT_STARTED, |
||||
CLIENT_ATTEMPT_DURATION, |
||||
CLIENT_RPC_DURATION, |
||||
CLIENT_ATTEMPT_SEND_BYTES, |
||||
CLIENT_ATTEMPT_RECEIVED_BYTES, |
||||
SERVER_STARTED_RPCS, |
||||
SERVER_RPC_DURATION, |
||||
SERVER_RPC_SEND_BYTES, |
||||
SERVER_RPC_RECEIVED_BYTES, |
||||
] |
@ -0,0 +1,155 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import logging |
||||
import time |
||||
from typing import Any, Iterable, Optional |
||||
|
||||
import grpc |
||||
|
||||
# pytype: disable=pyi-error |
||||
from grpc_observability import _cyobservability |
||||
from grpc_observability._open_telemetry_exporter import ( |
||||
_OpenTelemetryExporterDelegator, |
||||
) |
||||
from grpc_observability._open_telemetry_plugin import OpenTelemetryPlugin |
||||
from grpc_observability._open_telemetry_plugin import _OpenTelemetryPlugin |
||||
|
||||
_LOGGER = logging.getLogger(__name__) |
||||
|
||||
ClientCallTracerCapsule = Any # it appears only once in the function signature |
||||
ServerCallTracerFactoryCapsule = ( |
||||
Any # it appears only once in the function signature |
||||
) |
||||
grpc_observability = Any # grpc_observability.py imports this module. |
||||
|
||||
GRPC_STATUS_CODE_TO_STRING = { |
||||
grpc.StatusCode.OK: "OK", |
||||
grpc.StatusCode.CANCELLED: "CANCELLED", |
||||
grpc.StatusCode.UNKNOWN: "UNKNOWN", |
||||
grpc.StatusCode.INVALID_ARGUMENT: "INVALID_ARGUMENT", |
||||
grpc.StatusCode.DEADLINE_EXCEEDED: "DEADLINE_EXCEEDED", |
||||
grpc.StatusCode.NOT_FOUND: "NOT_FOUND", |
||||
grpc.StatusCode.ALREADY_EXISTS: "ALREADY_EXISTS", |
||||
grpc.StatusCode.PERMISSION_DENIED: "PERMISSION_DENIED", |
||||
grpc.StatusCode.UNAUTHENTICATED: "UNAUTHENTICATED", |
||||
grpc.StatusCode.RESOURCE_EXHAUSTED: "RESOURCE_EXHAUSTED", |
||||
grpc.StatusCode.FAILED_PRECONDITION: "FAILED_PRECONDITION", |
||||
grpc.StatusCode.ABORTED: "ABORTED", |
||||
grpc.StatusCode.OUT_OF_RANGE: "OUT_OF_RANGE", |
||||
grpc.StatusCode.UNIMPLEMENTED: "UNIMPLEMENTED", |
||||
grpc.StatusCode.INTERNAL: "INTERNAL", |
||||
grpc.StatusCode.UNAVAILABLE: "UNAVAILABLE", |
||||
grpc.StatusCode.DATA_LOSS: "DATA_LOSS", |
||||
} |
||||
|
||||
|
||||
# pylint: disable=no-self-use |
||||
class OpenTelemetryObservability(grpc._observability.ObservabilityPlugin): |
||||
"""OpenTelemetry based plugin implementation. |
||||
|
||||
Args: |
||||
exporter: Exporter used to export data. |
||||
plugin: OpenTelemetryPlugin to enable. |
||||
""" |
||||
|
||||
exporter: "grpc_observability.Exporter" |
||||
plugins: Iterable[OpenTelemetryPlugin] |
||||
|
||||
def __init__( |
||||
self, |
||||
*, |
||||
plugins: Optional[Iterable[OpenTelemetryPlugin]] = None, |
||||
exporter: "grpc_observability.Exporter" = None, |
||||
): |
||||
_plugins = [] |
||||
if plugins: |
||||
for plugin in plugins: |
||||
_plugins.append(_OpenTelemetryPlugin(plugin)) |
||||
|
||||
if exporter: |
||||
self.exporter = exporter |
||||
else: |
||||
self.exporter = _OpenTelemetryExporterDelegator(_plugins) |
||||
|
||||
try: |
||||
_cyobservability.activate_stats() |
||||
self.set_stats(True) |
||||
except Exception as e: # pylint: disable=broad-except |
||||
raise ValueError(f"Activate observability metrics failed with: {e}") |
||||
|
||||
def __enter__(self): |
||||
try: |
||||
_cyobservability.cyobservability_init(self.exporter) |
||||
# TODO(xuanwn): Use specific exceptons |
||||
except Exception as e: # pylint: disable=broad-except |
||||
_LOGGER.exception("Initiate observability failed with: %s", e) |
||||
|
||||
grpc._observability.observability_init(self) |
||||
return self |
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb) -> None: |
||||
self.exit() |
||||
|
||||
def exit(self) -> None: |
||||
# Sleep so we don't loss any data. If we shutdown export thread |
||||
# immediately after exit, it's possible that core didn't call RecordEnd |
||||
# in callTracer, and all data recorded by calling RecordEnd will be |
||||
# lost. |
||||
# CENSUS_EXPORT_BATCH_INTERVAL_SECS: The time equals to the time in |
||||
# AwaitNextBatchLocked. |
||||
# TODO(xuanwn): explicit synchronization |
||||
# https://github.com/grpc/grpc/issues/33262 |
||||
time.sleep(_cyobservability.CENSUS_EXPORT_BATCH_INTERVAL_SECS) |
||||
self.set_tracing(False) |
||||
self.set_stats(False) |
||||
_cyobservability.observability_deinit() |
||||
grpc._observability.observability_deinit() |
||||
|
||||
def create_client_call_tracer( |
||||
self, method_name: bytes, target: bytes |
||||
) -> ClientCallTracerCapsule: |
||||
trace_id = b"TRACE_ID" |
||||
capsule = _cyobservability.create_client_call_tracer( |
||||
method_name, target, trace_id |
||||
) |
||||
return capsule |
||||
|
||||
def create_server_call_tracer_factory( |
||||
self, |
||||
) -> ServerCallTracerFactoryCapsule: |
||||
capsule = _cyobservability.create_server_call_tracer_factory_capsule() |
||||
return capsule |
||||
|
||||
def delete_client_call_tracer( |
||||
self, client_call_tracer: ClientCallTracerCapsule |
||||
) -> None: |
||||
_cyobservability.delete_client_call_tracer(client_call_tracer) |
||||
|
||||
def save_trace_context( |
||||
self, trace_id: str, span_id: str, is_sampled: bool |
||||
) -> None: |
||||
pass |
||||
|
||||
def record_rpc_latency( |
||||
self, |
||||
method: str, |
||||
target: str, |
||||
rpc_latency: float, |
||||
status_code: grpc.StatusCode, |
||||
) -> None: |
||||
status_code = GRPC_STATUS_CODE_TO_STRING.get(status_code, "UNKNOWN") |
||||
_cyobservability._record_rpc_latency( |
||||
self.exporter, method, target, rpc_latency, status_code |
||||
) |
@ -0,0 +1,254 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
import abc |
||||
from typing import Dict, Iterable, List, Optional, Union |
||||
|
||||
# pytype: disable=pyi-error |
||||
import grpc |
||||
from grpc_observability import _open_telemetry_measures |
||||
from grpc_observability._cyobservability import MetricsName |
||||
from grpc_observability._observability import StatsData |
||||
from opentelemetry.sdk.metrics import Counter |
||||
from opentelemetry.sdk.metrics import Histogram |
||||
from opentelemetry.sdk.metrics import Meter |
||||
from opentelemetry.sdk.metrics import MeterProvider |
||||
|
||||
GRPC_METHOD_LABEL = "grpc.method" |
||||
GRPC_TARGET_LABEL = "grpc.target" |
||||
GRPC_OTHER_LABEL_VALUE = "other" |
||||
|
||||
|
||||
class OpenTelemetryLabelInjector(abc.ABC): |
||||
""" |
||||
An interface that allows you to add additional labels on the calls traced. |
||||
|
||||
Please note that this class is still work in progress and NOT READY to be used. |
||||
""" |
||||
|
||||
_labels: List[Dict[str, str]] |
||||
|
||||
def __init__(self): |
||||
# Calls Python OTel API to detect resource and get labels, save |
||||
# those lables to OpenTelemetryLabelInjector.labels. |
||||
pass |
||||
|
||||
@abc.abstractmethod |
||||
def get_labels(self): |
||||
# Get additional labels for this OpenTelemetryLabelInjector. |
||||
raise NotImplementedError() |
||||
|
||||
|
||||
class OpenTelemetryPluginOption(abc.ABC): |
||||
""" |
||||
An interface that allows you to add additional function to OpenTelemetryPlugin. |
||||
|
||||
Please note that this class is still work in progress and NOT READY to be used. |
||||
""" |
||||
|
||||
@abc.abstractmethod |
||||
def is_active_on_method(self, method: str) -> bool: |
||||
"""Determines whether this plugin option is active on a given method. |
||||
|
||||
Args: |
||||
method: Required. The RPC method, for example: `/helloworld.Greeter/SayHello`. |
||||
|
||||
Returns: |
||||
True if this this plugin option is active on the giving method, false otherwise. |
||||
""" |
||||
raise NotImplementedError() |
||||
|
||||
@abc.abstractmethod |
||||
def is_active_on_server(self, channel_args: List[str]) -> bool: |
||||
"""Determines whether this plugin option is active on a given server. |
||||
|
||||
Args: |
||||
channel_args: Required. The channel args used for server. |
||||
TODO(xuanwn): detail on what channel_args will contain. |
||||
|
||||
Returns: |
||||
True if this this plugin option is active on the server, false otherwise. |
||||
""" |
||||
raise NotImplementedError() |
||||
|
||||
@abc.abstractmethod |
||||
def get_label_injector(self) -> Optional[OpenTelemetryLabelInjector]: |
||||
# Returns the LabelsInjector used by this plugin option, or None. |
||||
raise NotImplementedError() |
||||
|
||||
|
||||
# pylint: disable=no-self-use |
||||
class OpenTelemetryPlugin: |
||||
"""Describes a Plugin for OpenTelemetry observability.""" |
||||
|
||||
def get_plugin_options( |
||||
self, |
||||
) -> Iterable[OpenTelemetryPluginOption]: |
||||
return [] |
||||
|
||||
def get_meter_provider(self) -> Optional[MeterProvider]: |
||||
return None |
||||
|
||||
def target_attribute_filter( |
||||
self, target: str # pylint: disable=unused-argument |
||||
) -> bool: |
||||
""" |
||||
If set, this will be called per channel to decide whether to record the |
||||
target attribute on client or to replace it with "other". |
||||
This helps reduce the cardinality on metrics in cases where many channels |
||||
are created with different targets in the same binary (which might happen |
||||
for example, if the channel target string uses IP addresses directly). |
||||
|
||||
Args: |
||||
target: The target for the RPC. |
||||
|
||||
Returns: |
||||
bool: True means the original target string will be used, False means target string |
||||
will be replaced with "other". |
||||
""" |
||||
return True |
||||
|
||||
def generic_method_attribute_filter( |
||||
self, method: str # pylint: disable=unused-argument |
||||
) -> bool: |
||||
""" |
||||
If set, this will be called with a generic method type to decide whether to |
||||
record the method name or to replace it with "other". |
||||
|
||||
Note that pre-registered methods will always be recorded no matter what this |
||||
function returns. |
||||
|
||||
Args: |
||||
method: The method name for the RPC. |
||||
|
||||
Returns: |
||||
bool: True means the original method name will be used, False means method name |
||||
will be replaced with "other". |
||||
""" |
||||
return False |
||||
|
||||
|
||||
class _OpenTelemetryPlugin: |
||||
_plugin: OpenTelemetryPlugin |
||||
_metric_to_recorder: Dict[MetricsName, Union[Counter, Histogram]] |
||||
|
||||
def __init__(self, plugin: OpenTelemetryPlugin): |
||||
self._plugin = plugin |
||||
self._metric_to_recorder = dict() |
||||
|
||||
meter_provider = self._plugin.get_meter_provider() |
||||
if meter_provider: |
||||
meter = meter_provider.get_meter("grpc-python", grpc.__version__) |
||||
enabled_metrics = _open_telemetry_measures.base_metrics() |
||||
self._metric_to_recorder = self._register_metrics( |
||||
meter, enabled_metrics |
||||
) |
||||
|
||||
def _should_record(self, stats_data: StatsData) -> bool: |
||||
# Decide if this plugin should record the stats_data. |
||||
return stats_data.name in self._metric_to_recorder.keys() |
||||
|
||||
def _record_stats_data(self, stats_data: StatsData) -> None: |
||||
recorder = self._metric_to_recorder[stats_data.name] |
||||
|
||||
target = stats_data.labels.get(GRPC_TARGET_LABEL, "") |
||||
if not self._plugin.target_attribute_filter(target): |
||||
# Filter target name. |
||||
stats_data.labels[GRPC_TARGET_LABEL] = GRPC_OTHER_LABEL_VALUE |
||||
|
||||
method = stats_data.labels.get(GRPC_METHOD_LABEL, "") |
||||
if not self._plugin.generic_method_attribute_filter(method): |
||||
# Filter method name. |
||||
stats_data.labels[GRPC_METHOD_LABEL] = GRPC_OTHER_LABEL_VALUE |
||||
|
||||
value = 0 |
||||
if stats_data.measure_double: |
||||
value = stats_data.value_float |
||||
else: |
||||
value = stats_data.value_int |
||||
if isinstance(recorder, Counter): |
||||
recorder.add(value, attributes=stats_data.labels) |
||||
elif isinstance(recorder, Histogram): |
||||
recorder.record(value, attributes=stats_data.labels) |
||||
|
||||
# pylint: disable=no-self-use |
||||
def maybe_record_stats_data(self, stats_data: List[StatsData]) -> None: |
||||
# Records stats data to MeterProvider. |
||||
if self._should_record(stats_data): |
||||
self._record_stats_data(stats_data) |
||||
|
||||
def _register_metrics( |
||||
self, meter: Meter, metrics: List[_open_telemetry_measures.Metric] |
||||
) -> Dict[MetricsName, Union[Counter, Histogram]]: |
||||
metric_to_recorder_map = {} |
||||
recorder = None |
||||
for metric in metrics: |
||||
if metric == _open_telemetry_measures.CLIENT_ATTEMPT_STARTED: |
||||
recorder = meter.create_counter( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.CLIENT_ATTEMPT_DURATION: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.CLIENT_RPC_DURATION: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.CLIENT_ATTEMPT_SEND_BYTES: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif ( |
||||
metric == _open_telemetry_measures.CLIENT_ATTEMPT_RECEIVED_BYTES |
||||
): |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.SERVER_STARTED_RPCS: |
||||
recorder = meter.create_counter( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.SERVER_RPC_DURATION: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.SERVER_RPC_SEND_BYTES: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
elif metric == _open_telemetry_measures.SERVER_RPC_RECEIVED_BYTES: |
||||
recorder = meter.create_histogram( |
||||
name=metric.name, |
||||
unit=metric.unit, |
||||
description=metric.description, |
||||
) |
||||
metric_to_recorder_map[metric.cyname] = recorder |
||||
return metric_to_recorder_map |
@ -0,0 +1,299 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from collections import defaultdict |
||||
import datetime |
||||
import logging |
||||
import os |
||||
import sys |
||||
import time |
||||
from typing import Any, Callable, Dict, List, Optional, Set |
||||
import unittest |
||||
|
||||
import grpc_observability |
||||
from grpc_observability import _open_telemetry_measures |
||||
from grpc_observability._open_telemetry_plugin import GRPC_METHOD_LABEL |
||||
from grpc_observability._open_telemetry_plugin import GRPC_OTHER_LABEL_VALUE |
||||
from grpc_observability._open_telemetry_plugin import GRPC_TARGET_LABEL |
||||
from opentelemetry.sdk.metrics import MeterProvider |
||||
from opentelemetry.sdk.metrics.export import AggregationTemporality |
||||
from opentelemetry.sdk.metrics.export import MetricExportResult |
||||
from opentelemetry.sdk.metrics.export import MetricExporter |
||||
from opentelemetry.sdk.metrics.export import MetricsData |
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader |
||||
|
||||
from tests.observability import _test_server |
||||
|
||||
logger = logging.getLogger(__name__) |
||||
|
||||
STREAM_LENGTH = 5 |
||||
OTEL_EXPORT_INTERVAL_S = 0.5 |
||||
|
||||
|
||||
class OTelMetricExporter(MetricExporter): |
||||
"""Implementation of :class:`MetricExporter` that export metrics to the |
||||
provided metric_list. |
||||
|
||||
all_metrics: A dict which key is grpc_observability._opentelemetry_measures.Metric.name, |
||||
value is a list of labels recorded for that metric. |
||||
An example item of this dict: |
||||
{"grpc.client.attempt.started": |
||||
[{'grpc.method': 'test/UnaryUnary', 'grpc.target': 'localhost:42517'}, |
||||
{'grpc.method': 'other', 'grpc.target': 'localhost:42517'}]} |
||||
""" |
||||
|
||||
def __init__( |
||||
self, |
||||
all_metrics: Dict[str, List], |
||||
preferred_temporality: Dict[type, AggregationTemporality] = None, |
||||
preferred_aggregation: Dict[ |
||||
type, "opentelemetry.sdk.metrics.view.Aggregation" |
||||
] = None, |
||||
): |
||||
super().__init__( |
||||
preferred_temporality=preferred_temporality, |
||||
preferred_aggregation=preferred_aggregation, |
||||
) |
||||
self.all_metrics = all_metrics |
||||
|
||||
def export( |
||||
self, |
||||
metrics_data: MetricsData, |
||||
timeout_millis: float = 10_000, |
||||
**kwargs, |
||||
) -> MetricExportResult: |
||||
self.record_metric(metrics_data) |
||||
return MetricExportResult.SUCCESS |
||||
|
||||
def shutdown(self, timeout_millis: float = 30_000, **kwargs) -> None: |
||||
pass |
||||
|
||||
def force_flush(self, timeout_millis: float = 10_000) -> bool: |
||||
return True |
||||
|
||||
def record_metric(self, metrics_data: MetricsData) -> None: |
||||
for resource_metric in metrics_data.resource_metrics: |
||||
for scope_metric in resource_metric.scope_metrics: |
||||
for metric in scope_metric.metrics: |
||||
for data_point in metric.data.data_points: |
||||
self.all_metrics[metric.name].append( |
||||
data_point.attributes |
||||
) |
||||
|
||||
|
||||
class BaseTestOpenTelemetryPlugin(grpc_observability.OpenTelemetryPlugin): |
||||
def __init__(self, provider: MeterProvider): |
||||
self.provider = provider |
||||
|
||||
def get_meter_provider(self) -> Optional[MeterProvider]: |
||||
return self.provider |
||||
|
||||
|
||||
@unittest.skipIf( |
||||
os.name == "nt" or "darwin" in sys.platform, |
||||
"Observability is not supported in Windows and MacOS", |
||||
) |
||||
class OpenTelemetryObservabilityTest(unittest.TestCase): |
||||
def setUp(self): |
||||
self.all_metrics = defaultdict(list) |
||||
otel_exporter = OTelMetricExporter(self.all_metrics) |
||||
reader = PeriodicExportingMetricReader( |
||||
exporter=otel_exporter, |
||||
export_interval_millis=OTEL_EXPORT_INTERVAL_S * 1000, |
||||
) |
||||
self._provider = MeterProvider(metric_readers=[reader]) |
||||
self._server = None |
||||
self._port = None |
||||
|
||||
def tearDown(self): |
||||
if self._server: |
||||
self._server.stop(0) |
||||
|
||||
def testRecordUnaryUnary(self): |
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
_test_server.unary_unary_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_all_metrics_names(self.all_metrics) |
||||
|
||||
def testRecordUnaryUnaryClientOnly(self): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
|
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
_test_server.unary_unary_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_client_metrics_names(self.all_metrics) |
||||
|
||||
def testRecordUnaryStream(self): |
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
|
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
_test_server.unary_stream_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_all_metrics_names(self.all_metrics) |
||||
|
||||
def testRecordStreamUnary(self): |
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
|
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
_test_server.stream_unary_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_all_metrics_names(self.all_metrics) |
||||
|
||||
def testRecordStreamStream(self): |
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
|
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
_test_server.stream_stream_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_all_metrics_names(self.all_metrics) |
||||
|
||||
def testTargetAttributeFilter(self): |
||||
main_server, main_port = _test_server.start_server() |
||||
backup_server, backup_port = _test_server.start_server() |
||||
main_target = f"localhost:{main_port}" |
||||
backup_target = f"localhost:{backup_port}" |
||||
|
||||
# Replace target label with 'other' for main_server. |
||||
def target_filter(target: str) -> bool: |
||||
if main_target in target: |
||||
return False |
||||
return True |
||||
|
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
otel_plugin.target_attribute_filter = target_filter |
||||
|
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
_test_server.unary_unary_call(port=main_port) |
||||
_test_server.unary_unary_call(port=backup_port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_client_metrics_names(self.all_metrics) |
||||
|
||||
target_values = set() |
||||
for label_list in self.all_metrics.values(): |
||||
for labels in label_list: |
||||
if GRPC_TARGET_LABEL in labels: |
||||
target_values.add(labels[GRPC_TARGET_LABEL]) |
||||
self.assertTrue(GRPC_OTHER_LABEL_VALUE in target_values) |
||||
self.assertTrue(backup_target in target_values) |
||||
|
||||
main_server.stop(0) |
||||
backup_server.stop(0) |
||||
|
||||
def testMethodAttributeFilter(self): |
||||
# If method name is 'test/UnaryUnaryFiltered', is should be replaced with 'other'. |
||||
FILTERED_METHOD_NAME = "test/UnaryUnaryFiltered" |
||||
|
||||
def method_filter(method: str) -> bool: |
||||
if FILTERED_METHOD_NAME in method: |
||||
return False |
||||
return True |
||||
|
||||
otel_plugin = BaseTestOpenTelemetryPlugin(self._provider) |
||||
otel_plugin.generic_method_attribute_filter = method_filter |
||||
|
||||
with grpc_observability.OpenTelemetryObservability( |
||||
plugins=[otel_plugin] |
||||
): |
||||
server, port = _test_server.start_server() |
||||
self._server = server |
||||
_test_server.unary_unary_call(port=port) |
||||
_test_server.unary_unary_filtered_call(port=port) |
||||
|
||||
self._validate_metrics_exist(self.all_metrics) |
||||
self._validate_all_metrics_names(self.all_metrics) |
||||
method_values = set() |
||||
for label_list in self.all_metrics.values(): |
||||
for labels in label_list: |
||||
if GRPC_METHOD_LABEL in labels: |
||||
method_values.add(labels[GRPC_METHOD_LABEL]) |
||||
self.assertTrue(GRPC_OTHER_LABEL_VALUE in method_values) |
||||
self.assertTrue(FILTERED_METHOD_NAME not in method_values) |
||||
|
||||
def assert_eventually( |
||||
self, |
||||
predicate: Callable[[], bool], |
||||
*, |
||||
timeout: Optional[datetime.timedelta] = None, |
||||
message: Optional[Callable[[], str]] = None, |
||||
) -> None: |
||||
message = message or (lambda: "Proposition did not evaluate to true") |
||||
timeout = timeout or datetime.timedelta(seconds=10) |
||||
end = datetime.datetime.now() + timeout |
||||
while datetime.datetime.now() < end: |
||||
if predicate(): |
||||
break |
||||
time.sleep(0.5) |
||||
else: |
||||
self.fail(message() + " after " + str(timeout)) |
||||
|
||||
def _validate_metrics_exist(self, all_metrics: Dict[str, Any]) -> None: |
||||
# Sleep here to make sure we have at least one export from OTel MetricExporter. |
||||
self.assert_eventually( |
||||
lambda: len(all_metrics.keys()) > 1, |
||||
message=lambda: f"No metrics was exported", |
||||
) |
||||
|
||||
def _validate_all_metrics_names(self, metric_names: Set[str]) -> None: |
||||
self._validate_server_metrics_names(metric_names) |
||||
self._validate_client_metrics_names(metric_names) |
||||
|
||||
def _validate_server_metrics_names(self, metric_names: Set[str]) -> None: |
||||
for base_metric in _open_telemetry_measures.base_metrics(): |
||||
if "grpc.server" in base_metric.name: |
||||
self.assertTrue( |
||||
base_metric.name in metric_names, |
||||
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!", |
||||
) |
||||
|
||||
def _validate_client_metrics_names(self, metric_names: Set[str]) -> None: |
||||
for base_metric in _open_telemetry_measures.base_metrics(): |
||||
if "grpc.client" in base_metric.name: |
||||
self.assertTrue( |
||||
base_metric.name in metric_names, |
||||
msg=f"metric {base_metric.name} not found in exported metrics: {metric_names}!", |
||||
) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
logging.basicConfig() |
||||
unittest.main(verbosity=2) |
@ -0,0 +1,169 @@ |
||||
# Copyright 2023 gRPC authors. |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from concurrent import futures |
||||
from typing import Tuple |
||||
|
||||
import grpc |
||||
|
||||
_REQUEST = b"\x00\x00\x00" |
||||
_RESPONSE = b"\x00\x00\x00" |
||||
|
||||
_UNARY_UNARY = "/test/UnaryUnary" |
||||
_UNARY_UNARY_FILTERED = "/test/UnaryUnaryFiltered" |
||||
_UNARY_STREAM = "/test/UnaryStream" |
||||
_STREAM_UNARY = "/test/StreamUnary" |
||||
_STREAM_STREAM = "/test/StreamStream" |
||||
STREAM_LENGTH = 5 |
||||
TRIGGER_RPC_METADATA = ("control", "trigger_rpc") |
||||
TRIGGER_RPC_TO_NEW_SERVER_METADATA = ("to_new_server", "") |
||||
|
||||
|
||||
def handle_unary_unary(request, servicer_context): |
||||
if TRIGGER_RPC_METADATA in servicer_context.invocation_metadata(): |
||||
for k, v in servicer_context.invocation_metadata(): |
||||
if "port" in k: |
||||
unary_unary_call(port=int(v)) |
||||
if "to_new_server" in k: |
||||
second_server = grpc.server( |
||||
futures.ThreadPoolExecutor(max_workers=10) |
||||
) |
||||
second_server.add_generic_rpc_handlers((_GenericHandler(),)) |
||||
second_server_port = second_server.add_insecure_port("[::]:0") |
||||
second_server.start() |
||||
unary_unary_call(port=second_server_port) |
||||
second_server.stop(0) |
||||
return _RESPONSE |
||||
|
||||
|
||||
def handle_unary_stream(request, servicer_context): |
||||
for _ in range(STREAM_LENGTH): |
||||
yield _RESPONSE |
||||
|
||||
|
||||
def handle_stream_unary(request_iterator, servicer_context): |
||||
return _RESPONSE |
||||
|
||||
|
||||
def handle_stream_stream(request_iterator, servicer_context): |
||||
for request in request_iterator: |
||||
yield _RESPONSE |
||||
|
||||
|
||||
class _MethodHandler(grpc.RpcMethodHandler): |
||||
def __init__(self, request_streaming, response_streaming): |
||||
self.request_streaming = request_streaming |
||||
self.response_streaming = response_streaming |
||||
self.request_deserializer = None |
||||
self.response_serializer = None |
||||
self.unary_unary = None |
||||
self.unary_stream = None |
||||
self.stream_unary = None |
||||
self.stream_stream = None |
||||
if self.request_streaming and self.response_streaming: |
||||
self.stream_stream = handle_stream_stream |
||||
elif self.request_streaming: |
||||
self.stream_unary = handle_stream_unary |
||||
elif self.response_streaming: |
||||
self.unary_stream = handle_unary_stream |
||||
else: |
||||
self.unary_unary = handle_unary_unary |
||||
|
||||
|
||||
class _GenericHandler(grpc.GenericRpcHandler): |
||||
def service(self, handler_call_details): |
||||
if handler_call_details.method == _UNARY_UNARY: |
||||
return _MethodHandler(False, False) |
||||
if handler_call_details.method == _UNARY_UNARY_FILTERED: |
||||
return _MethodHandler(False, False) |
||||
elif handler_call_details.method == _UNARY_STREAM: |
||||
return _MethodHandler(False, True) |
||||
elif handler_call_details.method == _STREAM_UNARY: |
||||
return _MethodHandler(True, False) |
||||
elif handler_call_details.method == _STREAM_STREAM: |
||||
return _MethodHandler(True, True) |
||||
else: |
||||
return None |
||||
|
||||
|
||||
def start_server(interceptors=None) -> Tuple[grpc.Server, int]: |
||||
if interceptors: |
||||
server = grpc.server( |
||||
futures.ThreadPoolExecutor(max_workers=10), |
||||
interceptors=interceptors, |
||||
) |
||||
else: |
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) |
||||
server.add_generic_rpc_handlers((_GenericHandler(),)) |
||||
port = server.add_insecure_port("[::]:0") |
||||
server.start() |
||||
return server, port |
||||
|
||||
|
||||
def unary_unary_call(port, metadata=None): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
multi_callable = channel.unary_unary(_UNARY_UNARY) |
||||
if metadata: |
||||
unused_response, call = multi_callable.with_call( |
||||
_REQUEST, metadata=metadata |
||||
) |
||||
else: |
||||
unused_response, call = multi_callable.with_call(_REQUEST) |
||||
|
||||
|
||||
def intercepted_unary_unary_call(port, interceptors, metadata=None): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
intercept_channel = grpc.intercept_channel(channel, interceptors) |
||||
multi_callable = intercept_channel.unary_unary(_UNARY_UNARY) |
||||
if metadata: |
||||
unused_response, call = multi_callable.with_call( |
||||
_REQUEST, metadata=metadata |
||||
) |
||||
else: |
||||
unused_response, call = multi_callable.with_call(_REQUEST) |
||||
|
||||
|
||||
def unary_unary_filtered_call(port, metadata=None): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
multi_callable = channel.unary_unary(_UNARY_UNARY_FILTERED) |
||||
if metadata: |
||||
unused_response, call = multi_callable.with_call( |
||||
_REQUEST, metadata=metadata |
||||
) |
||||
else: |
||||
unused_response, call = multi_callable.with_call(_REQUEST) |
||||
|
||||
|
||||
def unary_stream_call(port): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
multi_callable = channel.unary_stream(_UNARY_STREAM) |
||||
call = multi_callable(_REQUEST) |
||||
for _ in call: |
||||
pass |
||||
|
||||
|
||||
def stream_unary_call(port): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
multi_callable = channel.stream_unary(_STREAM_UNARY) |
||||
unused_response, call = multi_callable.with_call( |
||||
iter([_REQUEST] * STREAM_LENGTH) |
||||
) |
||||
|
||||
|
||||
def stream_stream_call(port): |
||||
with grpc.insecure_channel(f"localhost:{port}") as channel: |
||||
multi_callable = channel.stream_stream(_STREAM_STREAM) |
||||
call = multi_callable(iter([_REQUEST] * STREAM_LENGTH)) |
||||
for _ in call: |
||||
pass |
@ -0,0 +1,23 @@ |
||||
%YAML 1.2 |
||||
--- | |
||||
# Copyright 2023 The gRPC Authors |
||||
# |
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
# |
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
# |
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
# |
||||
# This file has been automatically generated from a template file. |
||||
# Please make modifications to |
||||
# `$REPO_ROOT/templates/src/python/grpcio/_parallel_compile_patch.py.template` |
||||
# instead. This file can be regenerated from the template by running |
||||
# `tools/buildgen/generate_projects.sh`. |
||||
|
||||
<%include file="../_parallel_compile_patch.py.include" /> |
Loading…
Reference in new issue