[Python Otel] Fix Segfault caused by CallTracer method lifetime issue (#37329)

We're seeing a segfault issue in observability tests:
```
2024-07-26T09:09:18.422255153Z *** SIGSEGV received at time=1721984958 on cpu 0 ***
2024-07-26T09:09:18.424985750Z PC: @     0x7e1acccb71c9  (unknown)  (unknown)
2024-07-26T09:09:18.425333774Z     @     0x7e1ac714ed8c         64  absl::lts_20240116::WriteFailureInfo()
2024-07-26T09:09:18.425356717Z     @     0x7e1ac714ea15        272  absl::lts_20240116::AbslFailureSignalHandler()
2024-07-26T09:09:18.425368880Z     @     0x7e1accb98050       1584  (unknown)
2024-07-26T09:09:18.426117382Z     @     0x7e1ac77f458c        112  absl::lts_20240116::string_view::operator std::__cxx11::basic_string<><>()
2024-07-26T09:09:18.426647368Z     @     0x7e1ac78008df        688  grpc_observability::PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::RecordEnd()
```

It points to `absl::string_view::operator std::__cxx11::basic_string<>()` which indicates the issue might be related to string conversion.

The most probable cause is that the `parent_->method_` string object is being destroyed before the `std::string` conversion is completed or used by `emplace_back`:

b056bc41d3/src/python/grpcio_observability/grpc_observability/client_call_tracer.cc (L325-L326)

Since it's difficult to manage the lifecycle of `method` in Python/Cython, this PR changes `method_` and `traget_` from `absl::string_view` to `std::string` so that they'll always be available.

<!--

If you know who should review your pull request, please assign it to that
person, otherwise the pull request would get assigned randomly.

If your pull request is for a specific language, please add the appropriate
lang label.

-->

Closes #37329

COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37329 from XuanWang-Amos:fix_otel_segfault c579022529
PiperOrigin-RevId: 657366034
pull/37335/head
Xuan Wang 6 months ago committed by Copybara-Service
parent b056bc41d3
commit b6e0137608
  1. 13
      src/python/grpcio_observability/grpc_observability/client_call_tracer.cc
  2. 4
      src/python/grpcio_observability/grpc_observability/client_call_tracer.h
  3. 8
      src/python/grpcio_observability/grpc_observability/python_observability_context.h

@ -88,7 +88,7 @@ void PythonOpenCensusCallTracer::RecordAnnotation(
PythonOpenCensusCallTracer::~PythonOpenCensusCallTracer() {
if (PythonCensusStatsEnabled()) {
context_.Labels().emplace_back(kClientMethod, std::string(method_));
context_.Labels().emplace_back(kClientMethod, method_);
RecordIntMetric(kRpcClientRetriesPerCallMeasureName, retries_ - 1,
context_.Labels(), identifier_, registered_method_,
/*include_exchange_labels=*/true); // exclude first attempt
@ -159,8 +159,8 @@ PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
if (!PythonCensusStatsEnabled()) {
return;
}
context_.Labels().emplace_back(kClientMethod, std::string(parent_->method_));
context_.Labels().emplace_back(kClientTarget, std::string(parent_->target_));
context_.Labels().emplace_back(kClientMethod, parent_->method_);
context_.Labels().emplace_back(kClientTarget, parent_->target_);
RecordIntMetric(kRpcClientStartedRpcsMeasureName, 1, context_.Labels(),
parent_->identifier_, parent_->registered_method_,
/*include_exchange_labels=*/false);
@ -264,8 +264,8 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
}
std::string final_status = absl::StatusCodeToString(status_code_);
context_.Labels().emplace_back(kClientMethod, std::string(parent_->method_));
context_.Labels().emplace_back(kClientTarget, std::string(parent_->target_));
context_.Labels().emplace_back(kClientMethod, parent_->method_);
context_.Labels().emplace_back(kClientTarget, parent_->target_);
context_.Labels().emplace_back(kClientStatus, final_status);
if (parent_->add_csm_optional_labels_) {
parent_->labels_injector_.AddXdsOptionalLabels(
@ -322,8 +322,7 @@ void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::
void PythonOpenCensusCallTracer::PythonOpenCensusCallAttemptTracer::RecordEnd(
const gpr_timespec& /*latency*/) {
if (PythonCensusStatsEnabled()) {
context_.Labels().emplace_back(kClientMethod,
std::string(parent_->method_));
context_.Labels().emplace_back(kClientMethod, parent_->method_);
context_.Labels().emplace_back(kClientStatus,
StatusCodeToString(status_code_));
RecordIntMetric(kRpcClientSentMessagesPerRpcMeasureName,

@ -140,9 +140,9 @@ class PythonOpenCensusCallTracer : public grpc_core::ClientCallTracer {
PythonCensusContext CreateCensusContextForCallAttempt();
// Client method.
absl::string_view method_;
std::string method_;
// Client target.
absl::string_view target_;
std::string target_;
PythonCensusContext context_;
bool tracing_enabled_;
bool add_csm_optional_labels_;

@ -272,17 +272,15 @@ void GenerateClientContext(absl::string_view method, absl::string_view trace_id,
void GenerateServerContext(absl::string_view header, absl::string_view method,
PythonCensusContext* context);
inline absl::string_view GetMethod(const char* method) {
inline std::string GetMethod(const char* method) {
if (std::string(method).empty()) {
return "";
}
// Check for leading '/' and trim it if present.
return absl::StripPrefix(absl::string_view(method), "/");
return std::string(absl::StripPrefix(method, "/"));
}
inline absl::string_view GetTarget(const char* target) {
return absl::string_view(target);
}
inline std::string GetTarget(const char* target) { return std::string(target); }
// Fills a pre-allocated buffer with the value for the grpc-trace-bin header.
// The buffer must be at least kGrpcTraceBinHeaderLen bytes long.

Loading…
Cancel
Save