From c8765eb9c48a462a379ae8376c04b65cc85d6566 Mon Sep 17 00:00:00 2001 From: Yijie Ma Date: Mon, 23 Sep 2024 14:39:48 -0700 Subject: [PATCH] [OTel plugin] Do not crash when the gauge in `AddCallback` is different from the gauge in `Report` (#37764) Log error message instead of crashing for this API misuse. Closes #37764 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37764 from yijiem:report-different-gauge-wont-crash 1b6e912bfc65c44b754e9fa143f8c22e15ba49ef PiperOrigin-RevId: 677944595 --- src/cpp/ext/otel/otel_plugin.cc | 18 ++++++++++ test/cpp/ext/otel/otel_plugin_test.cc | 47 +++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/cpp/ext/otel/otel_plugin.cc b/src/cpp/ext/otel/otel_plugin.cc index c904df2e40a..7f71ec517f6 100644 --- a/src/cpp/ext/otel/otel_plugin.cc +++ b/src/cpp/ext/otel/otel_plugin.cc @@ -302,6 +302,15 @@ void OpenTelemetryPluginImpl::CallbackMetricReporter::ReportInt64( grpc_core::GlobalInstrumentsRegistry::GetInstrumentDescriptor(handle); CHECK(descriptor.label_keys.size() == label_values.size()); CHECK(descriptor.optional_label_keys.size() == optional_values.size()); + if ((*callback_gauge_state)->caches.find(key_) == + (*callback_gauge_state)->caches.end()) { + LOG(ERROR) << "This may occur when the gauge used in AddCallback is " + "different from the gauge used in Report. This indicates a " + "misuse of the API. The value " + << value << " will not be recorded for instrument " + << handle.index; + return; + } auto& cell = (*callback_gauge_state)->caches.at(key_); std::vector key; key.reserve(label_values.size() + @@ -330,6 +339,15 @@ void OpenTelemetryPluginImpl::CallbackMetricReporter::ReportDouble( grpc_core::GlobalInstrumentsRegistry::GetInstrumentDescriptor(handle); CHECK(descriptor.label_keys.size() == label_values.size()); CHECK(descriptor.optional_label_keys.size() == optional_values.size()); + if ((*callback_gauge_state)->caches.find(key_) == + (*callback_gauge_state)->caches.end()) { + LOG(ERROR) << "This may occur when the gauge used in AddCallback is " + "different from the gauge used in Report. This indicates a " + "misuse of the API. The value " + << value << " will not be recorded for instrument " + << handle.index; + return; + } auto& cell = (*callback_gauge_state)->caches.at(key_); std::vector key; key.reserve(label_values.size() + diff --git a/test/cpp/ext/otel/otel_plugin_test.cc b/test/cpp/ext/otel/otel_plugin_test.cc index 22b971ac370..d5f95377d00 100644 --- a/test/cpp/ext/otel/otel_plugin_test.cc +++ b/test/cpp/ext/otel/otel_plugin_test.cc @@ -2085,6 +2085,53 @@ TEST_F(OpenTelemetryPluginCallbackMetricsTest, VerifyCallbacksAreCleanedUp) { EXPECT_EQ(report_count_2, 2 * kIterations); } +TEST_F(OpenTelemetryPluginCallbackMetricsTest, + ReportDifferentGaugeThanRegisteredWontCrash) { + constexpr absl::string_view kInt64CallbackGaugeMetric = + "yet_another_int64_callback_gauge"; + constexpr absl::string_view kDoubleCallbackGaugeMetric = + "yet_another_double_callback_gauge"; + auto integer_gauge_handle = + grpc_core::GlobalInstrumentsRegistry::RegisterCallbackInt64Gauge( + kInt64CallbackGaugeMetric, "An int64 callback gauge.", "unit", + /*enable_by_default=*/true) + .Build(); + auto double_gauge_handle = + grpc_core::GlobalInstrumentsRegistry::RegisterCallbackDoubleGauge( + kDoubleCallbackGaugeMetric, "A double callback gauge.", "unit", + /*enable_by_default=*/true) + .Build(); + Init(std::move(Options().set_metric_names( + {kInt64CallbackGaugeMetric, kDoubleCallbackGaugeMetric}))); + auto stats_plugins = + grpc_core::GlobalStatsPluginRegistry::GetStatsPluginsForChannel( + grpc_core::experimental::StatsPluginChannelScope( + "dns:///localhost:8080", "", endpoint_config_)); + // Registers integer_gauge_handle but reports double_gauge_handle. + int report_count_1 = 0; + double double_value_1 = 0.5; + auto registered_metric_callback_1 = stats_plugins.RegisterCallback( + [&](grpc_core::CallbackMetricReporter& reporter) { + ++report_count_1; + reporter.Report(double_gauge_handle, double_value_1++, {}, {}); + }, + grpc_core::Duration::Milliseconds(50) * grpc_test_slowdown_factor(), + integer_gauge_handle); + constexpr int kIterations = 50; + { + MetricsCollectorThread collector{ + this, + grpc_core::Duration::Milliseconds(100) * grpc_test_slowdown_factor(), + kIterations, + [&](const absl::flat_hash_map< + std::string, + std::vector>& + data) { return false; }}; + } + // Verify that callbacks are invoked + EXPECT_EQ(report_count_1, kIterations); +} + TEST(OpenTelemetryPluginMetricsEnablingDisablingTest, TestEnableDisableAPIs) { grpc::internal::OpenTelemetryPluginBuilderImpl builder; // First disable all metrics