From 0fb47cd886895a015c85568c6863b5158db41e3d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 18 Jan 2022 13:25:07 -0800 Subject: [PATCH] pick_first: make TRANSIENT_FAILURE sticky (#28571) --- .../lb_policy/pick_first/pick_first.cc | 8 ++- test/cpp/end2end/client_lb_end2end_test.cc | 58 +++++++++++++------ 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index a0b83d78683..e3de151eff6 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -377,9 +377,9 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // for a subchannel in p->latest_pending_subchannel_list_. The // goal here is to find a subchannel from the update that we can // select in place of the current one. - subchannel_list()->set_in_transient_failure(false); switch (connectivity_state) { case GRPC_CHANNEL_READY: { + subchannel_list()->set_in_transient_failure(false); ProcessUnselectedReadyLocked(); break; } @@ -428,8 +428,10 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( } case GRPC_CHANNEL_CONNECTING: case GRPC_CHANNEL_IDLE: { - // Only update connectivity state in case 1. - if (subchannel_list() == p->subchannel_list_.get()) { + // Only update connectivity state in case 1, and only if we're not + // already in TRANSIENT_FAILURE. + if (subchannel_list() == p->subchannel_list_.get() && + !subchannel_list()->in_transient_failure()) { p->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), absl::make_unique( diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 8c0bf224f72..9bb3005df44 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -1,20 +1,16 @@ -/* - * - * Copyright 2016 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ +// Copyright 2016 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include #include @@ -1079,6 +1075,34 @@ TEST_F(ClientLbEnd2endTest, PickFirstStaysIdleUponEmptyUpdate) { EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY); } +TEST_F(ClientLbEnd2endTest, + PickFirstStaysTransientFailureOnFailedConnectionAttemptUntilReady) { + // Allocate 3 ports, with no servers running. + std::vector ports = {grpc_pick_unused_port_or_die(), + grpc_pick_unused_port_or_die(), + grpc_pick_unused_port_or_die()}; + // Create channel with a 1-second backoff. + ChannelArguments args; + args.SetInt(GRPC_ARG_INITIAL_RECONNECT_BACKOFF_MS, + 1000 * grpc_test_slowdown_factor()); + auto response_generator = BuildResolverResponseGenerator(); + auto channel = BuildChannel("", response_generator, args); + auto stub = BuildStub(channel); + response_generator.SetNextResolution(ports); + EXPECT_EQ(GRPC_CHANNEL_IDLE, channel->GetState(false)); + // Send an RPC, which should fail. + CheckRpcSendFailure(stub); + // Channel should be in TRANSIENT_FAILURE. + EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel->GetState(false)); + // Now start a server on the last port. + StartServers(1, {ports.back()}); + // Channel should remain in TRANSIENT_FAILURE until it transitions to READY. + EXPECT_TRUE(channel->WaitForStateChange(GRPC_CHANNEL_TRANSIENT_FAILURE, + grpc_timeout_seconds_to_deadline(4))); + EXPECT_EQ(GRPC_CHANNEL_READY, channel->GetState(false)); + CheckRpcSendOk(stub, DEBUG_LOCATION); +} + TEST_F(ClientLbEnd2endTest, RoundRobin) { // Start servers and send one RPC per server. const int kNumServers = 3;