From 3ddc48bf62a73bff3ab5cbc254df7ccbdb3dcced Mon Sep 17 00:00:00 2001 From: AJ Heller Date: Tue, 6 Feb 2024 10:36:34 -0800 Subject: [PATCH] [server] Keep server running after failed UDS `getpeername` (#35823) Closes #35076. See discussion there for context. Previously, if `getpeername` failed on a recently-accepted Unix domain socket, the server would shut down. It's unclear if there's any valid reason `getpeername` would fail here, we could be looking at system issues / data corruption, but it may be implementation-dependent. I did not find a man page that specified this behavior: getpeername on a server errors with `EBADF` when a client closes its end of a UDS connection; the server's fd should still be valid. I was not able to reproduce the failure on recent Linux/Mac systems where clients close their connection after a listener accepted (basic, non-gRPC test: listener accepts a connection and sleeps, client closes their end, then listener wakes up and calls getpeername). If there are no valid reasons `getpeername` would fail in these spots, gRPC behavior is essentially undefined - we cannot defensively code around system failures in every place they may occur. If this _is_ a valid situation on some platforms, then this fix keeps the server alive as one would hope. There are arguments to be made for shutting down servers if the system is unstable, but I can't find an objectively correct behavior here. For those reasons, I think it may be best to keep the server running, which is what this PR does. Closes #35823 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/35823 from drfloob:abort-listen-on-failed-getpeername 29e723c5bb2ac40c6f1d8927417cadcb8c3a98e9 PiperOrigin-RevId: 604694057 --- src/core/BUILD | 1 + .../posix_engine/posix_engine_listener.cc | 13 ++++++------- src/core/lib/iomgr/tcp_server_posix.cc | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 3db3e15eedb..b4779505fcb 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -2188,6 +2188,7 @@ grpc_cc_library( "posix_event_engine_tcp_socket_utils", "socket_mutator", "status_helper", + "strerror", "time", "//:event_engine_base_hdrs", "//:exec_ctx", diff --git a/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc b/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc index 09dc4d5929c..1048e589c2d 100644 --- a/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc +++ b/src/core/lib/event_engine/posix_engine/posix_engine_listener.cc @@ -45,6 +45,7 @@ #include "src/core/lib/event_engine/posix_engine/tcp_socket_utils.h" #include "src/core/lib/event_engine/tcp_socket_utils.h" #include "src/core/lib/gprpp/status_helper.h" +#include "src/core/lib/gprpp/strerror.h" #include "src/core/lib/gprpp/time.h" #include "src/core/lib/iomgr/socket_mutator.h" @@ -173,7 +174,7 @@ void PosixEngineListenerImpl::AsyncConnectionAcceptor::NotifyOnAccept( return; default: gpr_log(GPR_ERROR, "Closing acceptor. Failed accept4: %s", - strerror(errno)); + grpc_core::StrError(errno).c_str()); // Shutting down the acceptor. Unref the ref grabbed in // AsyncConnectionAcceptor::Start(). Unref(); @@ -189,15 +190,13 @@ void PosixEngineListenerImpl::AsyncConnectionAcceptor::NotifyOnAccept( auto listener_addr_uri = ResolvedAddressToURI(socket_.addr); gpr_log( GPR_ERROR, - "Failed getpeername: %s. This is a critical failure, the " - "listener on %s:%d is shutting down.", - strerror(errno), + "Failed getpeername: %s. Dropping the connection, and continuing " + "to listen on %s:%d.", + grpc_core::StrError(errno).c_str(), listener_addr_uri.ok() ? listener_addr_uri->c_str() : "", socket_.port); close(fd); - // Shutting down the acceptor. Unref the ref grabbed in - // AsyncConnectionAcceptor::Start(). - Unref(); + handle_->NotifyOnRead(notify_on_accept_); return; } addr = EventEngine::ResolvedAddress(addr.address(), len); diff --git a/src/core/lib/iomgr/tcp_server_posix.cc b/src/core/lib/iomgr/tcp_server_posix.cc index 6f373751c82..85404201425 100644 --- a/src/core/lib/iomgr/tcp_server_posix.cc +++ b/src/core/lib/iomgr/tcp_server_posix.cc @@ -429,13 +429,13 @@ static void on_read(void* arg, grpc_error_handle err) { auto listener_addr_uri = grpc_sockaddr_to_uri(&sp->addr); gpr_log( GPR_ERROR, - "Failed getpeername: %s. This is a critical failure, the " - "listener on %s:%d is shutting down.", + "Failed getpeername: %s. Dropping the connection, and continuing " + "to listen on %s:%d.", grpc_core::StrError(errno).c_str(), listener_addr_uri.ok() ? listener_addr_uri->c_str() : "", sp->port); close(fd); - goto error; + continue; } }