subchannel: include IP address in status messages (#30784)

* subchannel: include IP address in status messages

* Automated change: Fix sanity tests

* add comment about lock annotations

* clang-format

Co-authored-by: markdroth <markdroth@users.noreply.github.com>
pull/30801/head
Mark D. Roth 3 years ago committed by GitHub
parent 32bc9a8656
commit 2c28b44852
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 27
      src/core/ext/filters/client_channel/subchannel.cc
  2. 109
      test/cpp/end2end/client_lb_end2end_test.cc
  3. 30
      test/cpp/end2end/xds/xds_cluster_end2end_test.cc
  4. 18
      test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc
  5. 6
      test/cpp/end2end/xds/xds_routing_end2end_test.cc

@ -27,6 +27,9 @@
#include <utility>
#include "absl/status/statusor.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include <grpc/grpc.h>
#include <grpc/impl/codegen/grpc_types.h>
@ -62,10 +65,6 @@
#include "src/core/lib/transport/connectivity_state.h"
#include "src/core/lib/transport/error_utils.h"
// Strong and weak refs.
#define INTERNAL_REF_BITS 16
#define STRONG_REF_MASK (~(gpr_atm)((1 << INTERNAL_REF_BITS) - 1))
// Backoff parameters.
#define GRPC_SUBCHANNEL_INITIAL_CONNECT_BACKOFF_SECONDS 1
#define GRPC_SUBCHANNEL_RECONNECT_BACKOFF_MULTIPLIER 1.6
@ -835,7 +834,21 @@ const char* SubchannelConnectivityStateChangeString(
void Subchannel::SetConnectivityStateLocked(grpc_connectivity_state state,
const absl::Status& status) {
state_ = state;
status_ = status;
if (status.ok()) {
status_ = status;
} else {
// Augment status message to include IP address.
status_ = absl::Status(status.code(),
absl::StrCat(grpc_sockaddr_to_uri(&key_.address())
.value_or("<unknown address type>"),
": ", status.message()));
status.ForEachPayload(
[this](absl::string_view key, const absl::Cord& value)
// Want to use ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_) here,
// but that won't work, because we can't pass the lock
// annotation through absl::Status::ForEachPayload().
ABSL_NO_THREAD_SAFETY_ANALYSIS { status_.SetPayload(key, value); });
}
if (channelz_node_ != nullptr) {
channelz_node_->UpdateConnectivityState(state);
channelz_node_->AddTraceEvent(
@ -844,9 +857,9 @@ void Subchannel::SetConnectivityStateLocked(grpc_connectivity_state state,
SubchannelConnectivityStateChangeString(state)));
}
// Notify non-health watchers.
watcher_list_.NotifyLocked(state, status);
watcher_list_.NotifyLocked(state, status_);
// Notify health watchers.
health_watcher_map_.NotifyLocked(state, status);
health_watcher_map_.NotifyLocked(state, status_);
}
void Subchannel::OnRetryTimer() {

@ -1189,11 +1189,12 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) {
response_generator.SetNextResolution(dead_ports);
gpr_log(GPR_INFO, "****** INITIAL RESOLUTION SET *******");
for (size_t i = 0; i < 10; ++i) {
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"failed to connect to all addresses; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"failed to connect to all addresses; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
}
// Set a re-resolution result that contains reachable ports, so that the
// pick_first LB policy can recover soon.
@ -1201,12 +1202,13 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) {
gpr_log(GPR_INFO, "****** RE-RESOLUTION SET *******");
WaitForServer(DEBUG_LOCATION, stub, 0, [](const Status& status) {
EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code());
EXPECT_THAT(
status.error_message(),
::testing::ContainsRegex(
"failed to connect to all addresses; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)"));
EXPECT_THAT(status.error_message(),
::testing::ContainsRegex(
"failed to connect to all addresses; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)"));
});
CheckRpcSendOk(DEBUG_LOCATION, stub);
EXPECT_EQ(servers_[0]->service_.request_count(), 1);
@ -1271,10 +1273,7 @@ TEST_F(PickFirstTest, CheckStateBeforeStartWatch) {
auto stub_2 = BuildStub(channel_2);
response_generator_2.SetNextResolution(ports);
gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 2 *******");
WaitForServer(DEBUG_LOCATION, stub_2, 0, [](const Status& status) {
EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code());
EXPECT_EQ("failed to connect to all addresses", status.error_message());
});
WaitForServer(DEBUG_LOCATION, stub_2, 0);
gpr_log(GPR_INFO, "****** CHANNEL 2 CONNECTED *******");
servers_[0]->Shutdown();
// Wait until the disconnection has triggered the connectivity notification.
@ -1354,10 +1353,7 @@ TEST_F(PickFirstTest, PendingUpdateAndSelectedSubchannelFails) {
gpr_log(GPR_INFO, "Phase 4: Resuming connection attempt to second server.");
hold->Resume();
WaitForChannelReady(channel.get());
WaitForServer(DEBUG_LOCATION, stub, 1, [](const Status& status) {
EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code());
EXPECT_EQ("failed to connect to all addresses", status.error_message());
});
WaitForServer(DEBUG_LOCATION, stub, 1);
}
TEST_F(PickFirstTest, StaysIdleUponEmptyUpdate) {
@ -1404,11 +1400,12 @@ TEST_F(PickFirstTest,
response_generator.SetNextResolution(ports);
EXPECT_EQ(GRPC_CHANNEL_IDLE, channel->GetState(false));
// Send an RPC, which should fail.
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"failed to connect to all addresses; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"failed to connect to all addresses; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
// Channel should be in TRANSIENT_FAILURE.
EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel->GetState(false));
// Now start a server on the last port.
@ -1650,11 +1647,12 @@ TEST_F(RoundRobinTest, TransientFailure) {
return state == GRPC_CHANNEL_TRANSIENT_FAILURE;
};
EXPECT_TRUE(WaitForChannelState(channel.get(), predicate));
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
}
TEST_F(RoundRobinTest, TransientFailureAtStartup) {
@ -1675,11 +1673,12 @@ TEST_F(RoundRobinTest, TransientFailureAtStartup) {
return state == GRPC_CHANNEL_TRANSIENT_FAILURE;
};
EXPECT_TRUE(WaitForChannelState(channel.get(), predicate, true));
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
}
TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) {
@ -1711,11 +1710,12 @@ TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) {
// new picker, in case it was going to incorrectly do so.
gpr_log(GPR_INFO, "=== EXPECTING RPCs TO FAIL ===");
for (size_t i = 0; i < 5; ++i) {
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
}
// Clean up.
hold->Resume();
@ -1734,11 +1734,12 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) {
response_generator.SetNextResolution(ports);
// Allow first connection attempts to fail normally, and check that
// the RPC fails with the right status message.
CheckRpcSendFailure(
DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
// Now intercept the next connection attempt for each port.
auto hold1 = injector.AddHold(ports[0]);
auto hold2 = injector.AddHold(ports[1]);
@ -1753,17 +1754,19 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) {
while (true) {
Status status = SendRpc(stub);
EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code());
if (status.error_message() ==
"connections to all backends failing; last error: "
"UNKNOWN: Survey says... Bzzzzt!") {
if (::testing::Matches(::testing::MatchesRegex(
"connections to all backends failing; last error: "
"UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Survey says... Bzzzzt!"))(status.error_message())) {
break;
}
EXPECT_THAT(
status.error_message(),
::testing::MatchesRegex(
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)"));
EXPECT_THAT(status.error_message(),
::testing::MatchesRegex(
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)"));
EXPECT_LT(absl::Now(), deadline);
if (absl::Now() >= deadline) break;
}

@ -554,12 +554,13 @@ TEST_P(EdsTest, AllServersUnreachableFailFast) {
// The error shouldn't be DEADLINE_EXCEEDED because timeout is set to 5
// seconds, and we should disocver in that time that the target backend is
// down.
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)",
RpcOptions().set_timeout_ms(kRpcTimeoutMs));
CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)",
RpcOptions().set_timeout_ms(kRpcTimeoutMs));
}
// Tests that RPCs fail when the backends are down, and will succeed again
@ -580,11 +581,12 @@ TEST_P(EdsTest, BackendsRestart) {
::testing::AnyOf(::testing::Eq(GRPC_CHANNEL_TRANSIENT_FAILURE),
::testing::Eq(GRPC_CHANNEL_CONNECTING)));
// RPCs should fail.
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)");
CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)");
// Restart all backends. RPCs should start succeeding again.
StartAllBackends();
CheckRpcSendOk(DEBUG_LOCATION, 1,
@ -1217,8 +1219,10 @@ TEST_P(FailoverTest, UpdateInitialUnavailable) {
balancer_->ads_service()->SetEdsResource(BuildEdsResource(args));
constexpr char kErrorMessageRegex[] =
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)";
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)";
CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE,
kErrorMessageRegex);
args = EdsResourceArgs({

@ -995,8 +995,10 @@ TEST_P(RingHashTest, ReattemptWhenAllEndpointsUnreachable) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"ring hash cannot find a connected subchannel; first failure: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)",
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)",
RpcOptions().set_metadata(std::move(metadata)));
StartBackend(0);
// Ensure we are actively connecting without any traffic.
@ -1035,8 +1037,10 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"ring hash cannot find a connected subchannel; first failure: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)",
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)",
rpc_options);
gpr_log(GPR_INFO, "=== DONE WITH FIRST RPC ===");
EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel_->GetState(false));
@ -1072,8 +1076,10 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
"ring hash cannot find a connected subchannel; first failure: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)",
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)",
rpc_options);
gpr_log(GPR_INFO, "=== STARTING BACKEND 1 ===");
StartBackend(1);

@ -1836,8 +1836,10 @@ TEST_P(LdsRdsTest, XdsRoutingClusterUpdateClustersWithPickingDelays) {
// that the client has received the update and attempted to connect.
constexpr char kErrorMessageRegex[] =
"connections to all backends failing; last error: "
"(UNKNOWN: Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: Failed to connect to remote host: FD shutdown)";
"(UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: Connection refused|"
"UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"Failed to connect to remote host: FD shutdown)";
CheckRpcSendFailure(DEBUG_LOCATION, StatusCode::UNAVAILABLE,
kErrorMessageRegex);
// Now create a new cluster, pointing to backend 1.

Loading…
Cancel
Save