From db0a475df0f5f866af062177d9e190df9e0a5803 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 30 Mar 2018 14:44:27 -0700 Subject: [PATCH] more WIP --- .../lb_policy/pick_first/pick_first.cc | 2 +- .../lb_policy/round_robin/round_robin.cc | 17 ++++++++------ .../lb_policy/subchannel_list.h | 23 +++++++++++-------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index b593f93d7b9..5fd4cade9ef 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -508,7 +508,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( case GRPC_CHANNEL_READY: { // Case 2. Promote p->latest_pending_subchannel_list_ to // p->subchannel_list_. - GetConnectedSubchannelFromSubchannelLocked(); + SetConnectedSubchannelFromSubchannelLocked(); if (p->latest_pending_subchannel_list_ == subchannel_list()) { GPR_ASSERT(p->subchannel_list_ != nullptr); p->subchannel_list_->ShutdownLocked("finish_update"); diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index a9d9227ea52..c697ebb402b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -477,10 +477,11 @@ void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( if (grpc_lb_round_robin_trace.enabled()) { gpr_log( GPR_DEBUG, - "[RR %p] connectivity changed for subchannel %p, subchannel_list %p: " - "prev_state=%s new_state=%s p->shutdown=%d " - "sd->subchannel_list->shutting_down=%d error=%s", - p, subchannel(), subchannel_list(), + "[RR %p] connectivity changed for subchannel %p, subchannel_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s " + "p->shutdown=%d sd->subchannel_list->shutting_down=%d error=%s", + p, subchannel(), subchannel_list(), Index(), + subchannel_list()->num_subchannels(), grpc_connectivity_state_name(prev_connectivity_state_), grpc_connectivity_state_name(connectivity_state()), p->shutdown_, subchannel_list()->shutting_down(), @@ -510,7 +511,6 @@ void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( // subchannel, if any. switch (connectivity_state()) { case GRPC_CHANNEL_TRANSIENT_FAILURE: { - clear_connected_subchannel(); if (grpc_lb_round_robin_trace.enabled()) { gpr_log(GPR_DEBUG, "[RR %p] Subchannel %p has gone into TRANSIENT_FAILURE. " @@ -522,7 +522,7 @@ void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( } case GRPC_CHANNEL_READY: { if (connected_subchannel() == nullptr) { - GetConnectedSubchannelFromSubchannelLocked(); + SetConnectedSubchannelFromSubchannelLocked(); } if (p->subchannel_list_ != subchannel_list()) { // promote subchannel_list() to p->subchannel_list_. @@ -657,13 +657,16 @@ void RoundRobin::UpdateLocked(const grpc_channel_args& args) { } if (started_picking_) { for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { +// FIXME: this is wrong, because we should not reset +// curr_connectivity_state_ or pending_connectivity_state_unsafe_ unless +// the new state is TRANSIENT_FAILURE const grpc_connectivity_state subchannel_state = subchannel_list->subchannel(i)->CheckConnectivityStateLocked(); // Override the default setting of IDLE for connectivity notification // purposes if the subchannel is already in transient failure. Otherwise // we'd be immediately notified of the IDLE-TRANSIENT_FAILURE // discrepancy, attempt to re-resolve, and end up here again. -// FIXME +// FIXME: do this // TODO(roth): As part of C++-ifying the subchannel_list API, design a // better API for notifying the LB policy of subchannel states, which can // be used both for the subchannel's initial state and for subsequent diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index d717ba0e371..0824fe373c6 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -59,22 +59,22 @@ class SubchannelData { return connected_subchannel_.get(); } -// FIXME: maybe do this automatically in OnConnectivityChangedLocked() -// when the new state is TRANSIENT_FAILURE? - void clear_connected_subchannel() { connected_subchannel_.reset(); } - - void GetConnectedSubchannelFromSubchannelLocked() { + void SetConnectedSubchannelFromSubchannelLocked() { connected_subchannel_ = grpc_subchannel_get_connected_subchannel(subchannel_); } + // An alternative to SetConnectedSubchannelFromSubchannelLocked() for + // cases where we are retaining a connected subchannel from a previous + // subchannel list. This is slightly more efficient than getting the + // connected subchannel from the subchannel, because that approach + // requires the use of a mutex, whereas this one only mutates a + // refcount. void SetConnectedSubchannelFromLocked(SubchannelData* other) { + GPR_ASSERT(subchannel_ == other->subchannel_); connected_subchannel_ = other->connected_subchannel_; // Adds ref. } - bool connectivity_notification_pending() const { - return connectivity_notification_pending_; - } grpc_connectivity_state connectivity_state() const { return curr_connectivity_state_; } @@ -87,6 +87,7 @@ class SubchannelData { } // Unrefs the subchannel. +// FIXME: move this to private in favor of ShutdownLocked()? virtual void UnrefSubchannelLocked(const char* reason); /// Starts watching the connectivity state of the subchannel. @@ -129,7 +130,7 @@ class SubchannelData { // Notification that connectivity has changed on subchannel. grpc_closure connectivity_changed_closure_; // Is a connectivity notification pending? - bool connectivity_notification_pending_; + bool connectivity_notification_pending_ = false; // Connectivity state to be updated by // grpc_subchannel_notify_on_state_change(), not guarded by // the combiner. Will be copied to \a curr_connectivity_state_ by @@ -297,6 +298,10 @@ void SubchannelDatacurr_connectivity_state_ = sd->pending_connectivity_state_unsafe_; + // If we get TRANSIENT_FAILURE, unref the connected subchannel. + if (sd->curr_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE) { + sd->connected_subchannel_.reset(); + } sd->ProcessConnectivityChangeLocked(error); }