Merge pull request #16306 from AspirinSJL/pf_check_state_before_watch

PF: Check connectivity state before watching
reviewable/pr16342/r9^2
Juanli Shen 7 years ago committed by GitHub
commit b8972046b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 112
      src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
  2. 55
      test/cpp/end2end/client_lb_end2end_test.cc

@ -80,6 +80,11 @@ class PickFirst : public LoadBalancingPolicy {
void ProcessConnectivityChangeLocked(
grpc_connectivity_state connectivity_state, grpc_error* error) override;
// Processes the connectivity change to READY for an unselected subchannel.
void ProcessUnselectedReadyLocked();
void CheckConnectivityStateAndStartWatchingLocked();
};
class PickFirstSubchannelList
@ -247,7 +252,8 @@ void PickFirst::StartPickingLocked() {
if (subchannel_list_ != nullptr) {
for (size_t i = 0; i < subchannel_list_->num_subchannels(); ++i) {
if (subchannel_list_->subchannel(i)->subchannel() != nullptr) {
subchannel_list_->subchannel(i)->StartConnectivityWatchLocked();
subchannel_list_->subchannel(i)
->CheckConnectivityStateAndStartWatchingLocked();
break;
}
}
@ -386,7 +392,8 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
// If we've started picking, start trying to connect to the first
// subchannel in the new list.
if (started_picking_) {
subchannel_list_->subchannel(0)->StartConnectivityWatchLocked();
subchannel_list_->subchannel(0)
->CheckConnectivityStateAndStartWatchingLocked();
}
} else {
// We do have a selected subchannel.
@ -440,7 +447,7 @@ void PickFirst::UpdateLocked(const grpc_channel_args& args) {
// subchannel in the new list.
if (started_picking_) {
latest_pending_subchannel_list_->subchannel(0)
->StartConnectivityWatchLocked();
->CheckConnectivityStateAndStartWatchingLocked();
}
}
}
@ -519,41 +526,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// select in place of the current one.
switch (connectivity_state) {
case GRPC_CHANNEL_READY: {
// Case 2. Promote p->latest_pending_subchannel_list_ to
// p->subchannel_list_.
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
"replace %p",
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// Cases 1 and 2.
grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
GRPC_ERROR_NONE, "connecting_ready");
p->selected_ = this;
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p,
subchannel());
}
// Drop all other subchannels, since we are now connected.
p->DestroyUnselectedSubchannelsLocked();
// Update any calls that were waiting for a pick.
PickState* pick;
while ((pick = p->pending_picks_)) {
p->pending_picks_ = pick->next;
pick->connected_subchannel =
p->selected_->connected_subchannel()->Ref();
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO,
"Servicing pending pick with selected subchannel %p",
p->selected_->subchannel());
}
GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
}
ProcessUnselectedReadyLocked();
// Renew notification.
RenewConnectivityWatchLocked();
break;
@ -574,7 +547,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
&p->state_tracker_, GRPC_CHANNEL_TRANSIENT_FAILURE,
GRPC_ERROR_REF(error), "exhausted_subchannels");
}
sd->StartConnectivityWatchLocked();
sd->CheckConnectivityStateAndStartWatchingLocked();
break;
}
case GRPC_CHANNEL_CONNECTING:
@ -595,6 +568,67 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
GRPC_ERROR_UNREF(error);
}
void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
// If we get here, there are two possible cases:
// 1. We do not currently have a selected subchannel, and the update is
// for a subchannel in p->subchannel_list_ that we're trying to
// connect to. The goal here is to find a subchannel that we can
// select.
// 2. We do currently have a selected subchannel, and the update is
// for a subchannel in p->latest_pending_subchannel_list_. The
// goal here is to find a subchannel from the update that we can
// select in place of the current one.
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
subchannel_list() == p->latest_pending_subchannel_list_.get());
// Case 2. Promote p->latest_pending_subchannel_list_ to p->subchannel_list_.
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
"replace %p",
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// Cases 1 and 2.
grpc_connectivity_state_set(&p->state_tracker_, GRPC_CHANNEL_READY,
GRPC_ERROR_NONE, "subchannel_ready");
p->selected_ = this;
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel());
}
// Drop all other subchannels, since we are now connected.
p->DestroyUnselectedSubchannelsLocked();
// Update any calls that were waiting for a pick.
PickState* pick;
while ((pick = p->pending_picks_)) {
p->pending_picks_ = pick->next;
pick->connected_subchannel = p->selected_->connected_subchannel()->Ref();
if (grpc_lb_pick_first_trace.enabled()) {
gpr_log(GPR_INFO, "Servicing pending pick with selected subchannel %p",
p->selected_->subchannel());
}
GRPC_CLOSURE_SCHED(pick->on_complete, GRPC_ERROR_NONE);
}
}
void PickFirst::PickFirstSubchannelData::
CheckConnectivityStateAndStartWatchingLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
grpc_error* error = GRPC_ERROR_NONE;
if (p->selected_ != this &&
CheckConnectivityStateLocked(&error) == GRPC_CHANNEL_READY) {
// We must process the READY subchannel before we start watching it.
// Otherwise, we won't know it's READY because we will be waiting for its
// connectivity state to change from READY.
ProcessUnselectedReadyLocked();
}
GRPC_ERROR_UNREF(error);
StartConnectivityWatchLocked();
}
//
// factory
//

@ -291,6 +291,17 @@ class ClientLbEnd2endTest : public ::testing::Test {
ResetCounters();
}
bool WaitForChannelNotReady(Channel* channel, int timeout_seconds = 5) {
const gpr_timespec deadline =
grpc_timeout_seconds_to_deadline(timeout_seconds);
grpc_connectivity_state state;
while ((state = channel->GetState(false /* try_to_connect */)) ==
GRPC_CHANNEL_READY) {
if (!channel->WaitForStateChange(state, deadline)) return false;
}
return true;
}
bool SeenAllServers() {
for (const auto& server : servers_) {
if (server->service_.request_count() == 0) return false;
@ -590,6 +601,50 @@ TEST_F(ClientLbEnd2endTest, PickFirstReresolutionNoSelected) {
EXPECT_EQ("pick_first", channel->GetLoadBalancingPolicyName());
}
TEST_F(ClientLbEnd2endTest, PickFirstCheckStateBeforeStartWatch) {
std::vector<int> ports = {grpc_pick_unused_port_or_die()};
StartServers(1, ports);
auto channel_1 = BuildChannel("pick_first");
auto stub_1 = BuildStub(channel_1);
SetNextResolution(ports);
gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 1 *******");
WaitForServer(stub_1, 0, DEBUG_LOCATION);
gpr_log(GPR_INFO, "****** CHANNEL 1 CONNECTED *******");
servers_[0]->Shutdown();
// Channel 1 will receive a re-resolution containing the same server. It will
// create a new subchannel and hold a ref to it.
servers_.clear();
StartServers(1, ports);
gpr_log(GPR_INFO, "****** SERVER RESTARTED *******");
auto channel_2 = BuildChannel("pick_first");
auto stub_2 = BuildStub(channel_2);
// TODO(juanlishen): This resolution result will only be visible to channel 2
// since the response generator is only associated with channel 2 now. We
// should change the response generator to be able to deliver updates to
// multiple channels at once.
SetNextResolution(ports);
gpr_log(GPR_INFO, "****** RESOLUTION SET FOR CHANNEL 2 *******");
WaitForServer(stub_2, 0, DEBUG_LOCATION, true);
gpr_log(GPR_INFO, "****** CHANNEL 2 CONNECTED *******");
servers_[0]->Shutdown();
// Wait until the disconnection has triggered the connectivity notification.
// Otherwise, the subchannel may be picked for next call but will fail soon.
EXPECT_TRUE(WaitForChannelNotReady(channel_2.get()));
// Channel 2 will also receive a re-resolution containing the same server.
// Both channels will ref the same subchannel that failed.
servers_.clear();
StartServers(1, ports);
gpr_log(GPR_INFO, "****** SERVER RESTARTED AGAIN *******");
gpr_log(GPR_INFO, "****** CHANNEL 2 STARTING A CALL *******");
// The first call after the server restart will succeed.
CheckRpcSendOk(stub_2, DEBUG_LOCATION);
gpr_log(GPR_INFO, "****** CHANNEL 2 FINISHED A CALL *******");
// Check LB policy name for the channel.
EXPECT_EQ("pick_first", channel_1->GetLoadBalancingPolicyName());
// Check LB policy name for the channel.
EXPECT_EQ("pick_first", channel_2->GetLoadBalancingPolicyName());
}
TEST_F(ClientLbEnd2endTest, RoundRobin) {
// Start servers and send one RPC per server.
const int kNumServers = 3;

Loading…
Cancel
Save