From 2c28b44852ddf242ff9f100724af499412eee7ed Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 30 Aug 2022 17:01:59 -0700 Subject: [PATCH] subchannel: include IP address in status messages (#30784) * subchannel: include IP address in status messages * Automated change: Fix sanity tests * add comment about lock annotations * clang-format Co-authored-by: markdroth --- .../ext/filters/client_channel/subchannel.cc | 27 +++-- test/cpp/end2end/client_lb_end2end_test.cc | 109 +++++++++--------- .../end2end/xds/xds_cluster_end2end_test.cc | 30 ++--- .../end2end/xds/xds_ring_hash_end2end_test.cc | 18 ++- .../end2end/xds/xds_routing_end2end_test.cc | 6 +- 5 files changed, 109 insertions(+), 81 deletions(-) diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 6cf7b32ecaa..3ef84e6587c 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -27,6 +27,9 @@ #include #include "absl/status/statusor.h" +#include "absl/strings/cord.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include #include @@ -62,10 +65,6 @@ #include "src/core/lib/transport/connectivity_state.h" #include "src/core/lib/transport/error_utils.h" -// Strong and weak refs. -#define INTERNAL_REF_BITS 16 -#define STRONG_REF_MASK (~(gpr_atm)((1 << INTERNAL_REF_BITS) - 1)) - // Backoff parameters. #define GRPC_SUBCHANNEL_INITIAL_CONNECT_BACKOFF_SECONDS 1 #define GRPC_SUBCHANNEL_RECONNECT_BACKOFF_MULTIPLIER 1.6 @@ -835,7 +834,21 @@ const char* SubchannelConnectivityStateChangeString( void Subchannel::SetConnectivityStateLocked(grpc_connectivity_state state, const absl::Status& status) { state_ = state; - status_ = status; + if (status.ok()) { + status_ = status; + } else { + // Augment status message to include IP address. + status_ = absl::Status(status.code(), + absl::StrCat(grpc_sockaddr_to_uri(&key_.address()) + .value_or(""), + ": ", status.message())); + status.ForEachPayload( + [this](absl::string_view key, const absl::Cord& value) + // Want to use ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_) here, + // but that won't work, because we can't pass the lock + // annotation through absl::Status::ForEachPayload(). + ABSL_NO_THREAD_SAFETY_ANALYSIS { status_.SetPayload(key, value); }); + } if (channelz_node_ != nullptr) { channelz_node_->UpdateConnectivityState(state); channelz_node_->AddTraceEvent( @@ -844,9 +857,9 @@ void Subchannel::SetConnectivityStateLocked(grpc_connectivity_state state, SubchannelConnectivityStateChangeString(state))); } // Notify non-health watchers. - watcher_list_.NotifyLocked(state, status); + watcher_list_.NotifyLocked(state, status_); // Notify health watchers. - health_watcher_map_.NotifyLocked(state, status); + health_watcher_map_.NotifyLocked(state, status_); } void Subchannel::OnRetryTimer() { diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 27ba2e97098..afa997499c9 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -1189,11 +1189,12 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { response_generator.SetNextResolution(dead_ports); gpr_log(GPR_INFO, "****** INITIAL RESOLUTION SET *******"); for (size_t i = 0; i < 10; ++i) { - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "failed to connect to all addresses; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "failed to connect to all addresses; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); } // Set a re-resolution result that contains reachable ports, so that the // pick_first LB policy can recover soon. @@ -1201,12 +1202,13 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { gpr_log(GPR_INFO, "****** RE-RESOLUTION SET *******"); WaitForServer(DEBUG_LOCATION, stub, 0, [](const Status& status) { EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); - EXPECT_THAT( - status.error_message(), - ::testing::ContainsRegex( - "failed to connect to all addresses; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)")); + EXPECT_THAT(status.error_message(), + ::testing::ContainsRegex( + "failed to connect to all addresses; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)")); }); CheckRpcSendOk(DEBUG_LOCATION, stub); EXPECT_EQ(servers_[0]->service_.request_count(), 1); @@ -1271,10 +1273,7 @@ TEST_F(PickFirstTest, CheckStateBeforeStartWatch) { auto stub_2 = BuildStub(channel_2); response_generator_2.SetNextResolution(ports); gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 2 *******"); - WaitForServer(DEBUG_LOCATION, stub_2, 0, [](const Status& status) { - EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); - EXPECT_EQ("failed to connect to all addresses", status.error_message()); - }); + WaitForServer(DEBUG_LOCATION, stub_2, 0); gpr_log(GPR_INFO, "****** CHANNEL 2 CONNECTED *******"); servers_[0]->Shutdown(); // Wait until the disconnection has triggered the connectivity notification. @@ -1354,10 +1353,7 @@ TEST_F(PickFirstTest, PendingUpdateAndSelectedSubchannelFails) { gpr_log(GPR_INFO, "Phase 4: Resuming connection attempt to second server."); hold->Resume(); WaitForChannelReady(channel.get()); - WaitForServer(DEBUG_LOCATION, stub, 1, [](const Status& status) { - EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); - EXPECT_EQ("failed to connect to all addresses", status.error_message()); - }); + WaitForServer(DEBUG_LOCATION, stub, 1); } TEST_F(PickFirstTest, StaysIdleUponEmptyUpdate) { @@ -1404,11 +1400,12 @@ TEST_F(PickFirstTest, response_generator.SetNextResolution(ports); EXPECT_EQ(GRPC_CHANNEL_IDLE, channel->GetState(false)); // Send an RPC, which should fail. - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "failed to connect to all addresses; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "failed to connect to all addresses; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); // Channel should be in TRANSIENT_FAILURE. EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel->GetState(false)); // Now start a server on the last port. @@ -1650,11 +1647,12 @@ TEST_F(RoundRobinTest, TransientFailure) { return state == GRPC_CHANNEL_TRANSIENT_FAILURE; }; EXPECT_TRUE(WaitForChannelState(channel.get(), predicate)); - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); } TEST_F(RoundRobinTest, TransientFailureAtStartup) { @@ -1675,11 +1673,12 @@ TEST_F(RoundRobinTest, TransientFailureAtStartup) { return state == GRPC_CHANNEL_TRANSIENT_FAILURE; }; EXPECT_TRUE(WaitForChannelState(channel.get(), predicate, true)); - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); } TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { @@ -1711,11 +1710,12 @@ TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { // new picker, in case it was going to incorrectly do so. gpr_log(GPR_INFO, "=== EXPECTING RPCs TO FAIL ==="); for (size_t i = 0; i < 5; ++i) { - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); } // Clean up. hold->Resume(); @@ -1734,11 +1734,12 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { response_generator.SetNextResolution(ports); // Allow first connection attempts to fail normally, and check that // the RPC fails with the right status message. - CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); // Now intercept the next connection attempt for each port. auto hold1 = injector.AddHold(ports[0]); auto hold2 = injector.AddHold(ports[1]); @@ -1753,17 +1754,19 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { while (true) { Status status = SendRpc(stub); EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); - if (status.error_message() == - "connections to all backends failing; last error: " - "UNKNOWN: Survey says... Bzzzzt!") { + if (::testing::Matches(::testing::MatchesRegex( + "connections to all backends failing; last error: " + "UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Survey says... Bzzzzt!"))(status.error_message())) { break; } - EXPECT_THAT( - status.error_message(), - ::testing::MatchesRegex( - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)")); + EXPECT_THAT(status.error_message(), + ::testing::MatchesRegex( + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)")); EXPECT_LT(absl::Now(), deadline); if (absl::Now() >= deadline) break; } diff --git a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc index 731026e2d54..868e0b6966f 100644 --- a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc @@ -554,12 +554,13 @@ TEST_P(EdsTest, AllServersUnreachableFailFast) { // The error shouldn't be DEADLINE_EXCEEDED because timeout is set to 5 // seconds, and we should disocver in that time that the target backend is // down. - CheckRpcSendFailure( - DEBUG_LOCATION, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)", - RpcOptions().set_timeout_ms(kRpcTimeoutMs)); + CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)", + RpcOptions().set_timeout_ms(kRpcTimeoutMs)); } // Tests that RPCs fail when the backends are down, and will succeed again @@ -580,11 +581,12 @@ TEST_P(EdsTest, BackendsRestart) { ::testing::AnyOf(::testing::Eq(GRPC_CHANNEL_TRANSIENT_FAILURE), ::testing::Eq(GRPC_CHANNEL_CONNECTING))); // RPCs should fail. - CheckRpcSendFailure( - DEBUG_LOCATION, StatusCode::UNAVAILABLE, - "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"); + CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE, + "connections to all backends failing; last error: " + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"); // Restart all backends. RPCs should start succeeding again. StartAllBackends(); CheckRpcSendOk(DEBUG_LOCATION, 1, @@ -1217,8 +1219,10 @@ TEST_P(FailoverTest, UpdateInitialUnavailable) { balancer_->ads_service()->SetEdsResource(BuildEdsResource(args)); constexpr char kErrorMessageRegex[] = "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"; + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"; CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE, kErrorMessageRegex); args = EdsResourceArgs({ diff --git a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc index 2e7a146fdd8..e7f4a3f604a 100644 --- a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc @@ -995,8 +995,10 @@ TEST_P(RingHashTest, ReattemptWhenAllEndpointsUnreachable) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, "ring hash cannot find a connected subchannel; first failure: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)", + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)", RpcOptions().set_metadata(std::move(metadata))); StartBackend(0); // Ensure we are actively connecting without any traffic. @@ -1035,8 +1037,10 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, "ring hash cannot find a connected subchannel; first failure: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)", + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)", rpc_options); gpr_log(GPR_INFO, "=== DONE WITH FIRST RPC ==="); EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel_->GetState(false)); @@ -1072,8 +1076,10 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, "ring hash cannot find a connected subchannel; first failure: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)", + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)", rpc_options); gpr_log(GPR_INFO, "=== STARTING BACKEND 1 ==="); StartBackend(1); diff --git a/test/cpp/end2end/xds/xds_routing_end2end_test.cc b/test/cpp/end2end/xds/xds_routing_end2end_test.cc index 93a4f5ace80..7fda2b49424 100644 --- a/test/cpp/end2end/xds/xds_routing_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_routing_end2end_test.cc @@ -1836,8 +1836,10 @@ TEST_P(LdsRdsTest, XdsRoutingClusterUpdateClustersWithPickingDelays) { // that the client has received the update and attempted to connect. constexpr char kErrorMessageRegex[] = "connections to all backends failing; last error: " - "(UNKNOWN: Failed to connect to remote host: Connection refused|" - "UNAVAILABLE: Failed to connect to remote host: FD shutdown)"; + "(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: Connection refused|" + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "Failed to connect to remote host: FD shutdown)"; CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE, kErrorMessageRegex); // Now create a new cluster, pointing to backend 1.