Fix race condition in health checking client.

pull/23074/head
Mark D. Roth 5 years ago
parent 71c8056669
commit 104aef6ddf
  1. 29
      src/core/ext/filters/client_channel/health/health_check_client.cc
  2. 6
      src/core/ext/filters/client_channel/health/health_check_client.h

@ -125,8 +125,7 @@ void HealthCheckClient::StartCallLocked() {
call_state_->StartCall(); call_state_->StartCall();
} }
void HealthCheckClient::StartRetryTimer() { void HealthCheckClient::StartRetryTimerLocked() {
MutexLock lock(&mu_);
SetHealthStatusLocked(GRPC_CHANNEL_TRANSIENT_FAILURE, SetHealthStatusLocked(GRPC_CHANNEL_TRANSIENT_FAILURE,
"health check call failed; will retry after backoff"); "health check call failed; will retry after backoff");
grpc_millis next_try = retry_backoff_.NextAttemptTime(); grpc_millis next_try = retry_backoff_.NextAttemptTime();
@ -297,14 +296,7 @@ void HealthCheckClient::CallState::StartCall() {
"checking call on subchannel (%s); will retry", "checking call on subchannel (%s); will retry",
health_check_client_.get(), this, grpc_error_string(error)); health_check_client_.get(), this, grpc_error_string(error));
GRPC_ERROR_UNREF(error); GRPC_ERROR_UNREF(error);
// Schedule instead of running directly, since we must not be CallEndedLocked(/*retry=*/true);
// holding health_check_client_->mu_ when CallEnded() is called.
call_->Ref(DEBUG_LOCATION, "call_end_closure").release();
ExecCtx::Run(
DEBUG_LOCATION,
GRPC_CLOSURE_INIT(&batch_.handler_private.closure, CallEndedRetry, this,
grpc_schedule_on_exec_ctx),
GRPC_ERROR_NONE);
return; return;
} }
// Initialize payload and batch. // Initialize payload and batch.
@ -582,18 +574,11 @@ void HealthCheckClient::CallState::RecvTrailingMetadataReady(
kErrorMessage); kErrorMessage);
retry = false; retry = false;
} }
self->CallEnded(retry); MutexLock lock(&self->health_check_client_->mu_);
} self->CallEndedLocked(retry);
void HealthCheckClient::CallState::CallEndedRetry(void* arg,
grpc_error* /*error*/) {
HealthCheckClient::CallState* self =
static_cast<HealthCheckClient::CallState*>(arg);
self->CallEnded(true /* retry */);
self->call_->Unref(DEBUG_LOCATION, "call_end_closure");
} }
void HealthCheckClient::CallState::CallEnded(bool retry) { void HealthCheckClient::CallState::CallEndedLocked(bool retry) {
// If this CallState is still in use, this call ended because of a failure, // If this CallState is still in use, this call ended because of a failure,
// so we need to stop using it and optionally create a new one. // so we need to stop using it and optionally create a new one.
// Otherwise, we have deliberately ended this call, and no further action // Otherwise, we have deliberately ended this call, and no further action
@ -606,10 +591,10 @@ void HealthCheckClient::CallState::CallEnded(bool retry) {
// If the call fails after we've gotten a successful response, reset // If the call fails after we've gotten a successful response, reset
// the backoff and restart the call immediately. // the backoff and restart the call immediately.
health_check_client_->retry_backoff_.Reset(); health_check_client_->retry_backoff_.Reset();
health_check_client_->StartCall(); health_check_client_->StartCallLocked();
} else { } else {
// If the call failed without receiving any messages, retry later. // If the call failed without receiving any messages, retry later.
health_check_client_->StartRetryTimer(); health_check_client_->StartRetryTimerLocked();
} }
} }
} }

@ -72,8 +72,8 @@ class HealthCheckClient : public InternallyRefCounted<HealthCheckClient> {
void StartBatch(grpc_transport_stream_op_batch* batch); void StartBatch(grpc_transport_stream_op_batch* batch);
static void StartBatchInCallCombiner(void* arg, grpc_error* error); static void StartBatchInCallCombiner(void* arg, grpc_error* error);
static void CallEndedRetry(void* arg, grpc_error* error); // Requires holding health_check_client_->mu_.
void CallEnded(bool retry); void CallEndedLocked(bool retry);
static void OnComplete(void* arg, grpc_error* error); static void OnComplete(void* arg, grpc_error* error);
static void RecvInitialMetadataReady(void* arg, grpc_error* error); static void RecvInitialMetadataReady(void* arg, grpc_error* error);
@ -143,7 +143,7 @@ class HealthCheckClient : public InternallyRefCounted<HealthCheckClient> {
void StartCall(); void StartCall();
void StartCallLocked(); // Requires holding mu_. void StartCallLocked(); // Requires holding mu_.
void StartRetryTimer(); void StartRetryTimerLocked(); // Requires holding mu_.
static void OnRetryTimer(void* arg, grpc_error* error); static void OnRetryTimer(void* arg, grpc_error* error);
void SetHealthStatus(grpc_connectivity_state state, const char* reason); void SetHealthStatus(grpc_connectivity_state state, const char* reason);

Loading…
Cancel
Save