[EventEngine] Fix lock acquisition ordering problem on Windows Connect (#35892)

CC @youyuanwu

This fixes a lock acquisition ordering problem in WindowsEventEngine's Connect behavior, between a ConnectionState's internal mutex, and a HandshakeManager's mutex. The connection state mutex should not be held when calling the on_connected callback.

This should unblock #34801

Closes #35892

COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/35892 from drfloob:connect-callback-lock-cycle 4d56120e2f
PiperOrigin-RevId: 606752276
pull/35800/head
AJ Heller 11 months ago committed by Copybara-Service
parent 1669b9e853
commit 510dbc92e1
  1. 27
      src/core/lib/event_engine/windows/windows_engine.cc

@ -49,6 +49,13 @@
namespace grpc_event_engine {
namespace experimental {
namespace {
EventEngine::OnConnectCallback CreateCrashingOnConnectCallback() {
return [](absl::StatusOr<std::unique_ptr<EventEngine::Endpoint>>) {
grpc_core::Crash("Internal Error: OnConnect callback called when unset");
};
}
} // namespace
// ---- IOCPWorkClosure ----
WindowsEventEngine::IOCPWorkClosure::IOCPWorkClosure(ThreadPool* thread_pool,
@ -250,6 +257,7 @@ void WindowsEventEngine::OnConnectCompleted(
// Connection attempt complete!
grpc_core::MutexLock lock(&state->mu);
cb = std::move(state->on_connected_user_callback);
state->on_connected_user_callback = CreateCrashingOnConnectCallback();
state->on_connected = nullptr;
{
grpc_core::MutexLock handle_lock(&connection_mu_);
@ -356,10 +364,13 @@ EventEngine::ConnectionHandle WindowsEventEngine::Connect(
});
connection_state->timer_handle =
RunAfter(timeout, [this, connection_state]() {
grpc_core::MutexLock lock(&connection_state->mu);
grpc_core::ReleasableMutexLock lock(&connection_state->mu);
if (CancelConnectFromDeadlineTimer(connection_state.get())) {
connection_state->on_connected_user_callback(
absl::DeadlineExceededError("Connection timed out"));
auto cb = std::move(connection_state->on_connected_user_callback);
connection_state->on_connected_user_callback =
CreateCrashingOnConnectCallback();
lock.Release();
cb(absl::DeadlineExceededError("Connection timed out"));
}
// else: The connection attempt could not be canceled. We can assume
// the connection callback will be called.
@ -380,8 +391,14 @@ EventEngine::ConnectionHandle WindowsEventEngine::Connect(
connection_state->socket->Shutdown(DEBUG_LOCATION, "ConnectEx");
Run([connection_state = std::move(connection_state),
status = GRPC_WSA_ERROR(WSAGetLastError(), "ConnectEx")]() mutable {
grpc_core::MutexLock lock(&connection_state->mu);
connection_state->on_connected_user_callback(status);
EventEngine::OnConnectCallback cb;
{
grpc_core::MutexLock lock(&connection_state->mu);
cb = std::move(connection_state->on_connected_user_callback);
connection_state->on_connected_user_callback =
CreateCrashingOnConnectCallback();
}
cb(status);
});
return EventEngine::ConnectionHandle::kInvalid;
}

Loading…
Cancel
Save