diff --git a/BUILD b/BUILD index dba6592f172..290f4c56213 100644 --- a/BUILD +++ b/BUILD @@ -544,24 +544,24 @@ grpc_cc_library( grpc_cc_library( name = "debug_location", - public_hdrs = ["src/core/lib/support/debug_location.h"], language = "c++", + public_hdrs = ["src/core/lib/support/debug_location.h"], ) grpc_cc_library( name = "ref_counted", - public_hdrs = ["src/core/lib/support/ref_counted.h"], language = "c++", + public_hdrs = ["src/core/lib/support/ref_counted.h"], deps = [ - "grpc_trace", "debug_location", + "grpc_trace", ], ) grpc_cc_library( name = "ref_counted_ptr", - public_hdrs = ["src/core/lib/support/ref_counted_ptr.h"], language = "c++", + public_hdrs = ["src/core/lib/support/ref_counted_ptr.h"], ) grpc_cc_library( @@ -919,6 +919,7 @@ grpc_cc_library( deps = [ "grpc_base", "grpc_deadline_filter", + "ref_counted", ], ) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index e99022a91b8..4f3b774212f 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -1004,7 +1004,7 @@ static void create_subchannel_call_locked(grpc_call_element* elem, grpc_error* error) { channel_data* chand = (channel_data*)elem->channel_data; call_data* calld = (call_data*)elem->call_data; - const grpc_connected_subchannel_call_args call_args = { + const grpc_connected_subchannel::CallArgs call_args = { calld->pollent, // pollent calld->path, // path calld->call_start_time, // start_time @@ -1013,8 +1013,8 @@ static void create_subchannel_call_locked(grpc_call_element* elem, calld->subchannel_call_context, // context calld->call_combiner // call_combiner }; - grpc_error* new_error = grpc_connected_subchannel_create_call( - calld->connected_subchannel, &call_args, &calld->subchannel_call); + grpc_error* new_error = calld->connected_subchannel->CreateCall( + &call_args, &calld->subchannel_call); if (grpc_client_channel_trace.enabled()) { gpr_log(GPR_DEBUG, "chand=%p calld=%p: create subchannel_call=%p: error=%s", chand, calld, calld->subchannel_call, grpc_error_string(new_error)); diff --git a/src/core/ext/filters/client_channel/lb_policy.h b/src/core/ext/filters/client_channel/lb_policy.h index 3572c97ed14..628127106b1 100644 --- a/src/core/ext/filters/client_channel/lb_policy.h +++ b/src/core/ext/filters/client_channel/lb_policy.h @@ -164,7 +164,7 @@ int grpc_lb_policy_pick_locked(grpc_lb_policy* policy, grpc_call_context_element* context, void** user_data, grpc_closure* on_complete); -/** Perform a connected subchannel ping (see \a grpc_connected_subchannel_ping) +/** Perform a connected subchannel ping (see \a grpc_connected_subchannel::Ping) against one of the connected subchannels managed by \a policy. */ void grpc_lb_policy_ping_one_locked(grpc_lb_policy* policy, grpc_closure* on_initiate, diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 0861261359f..a3b05aacaf6 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -225,8 +225,7 @@ static void pf_ping_one_locked(grpc_lb_policy* pol, grpc_closure* on_initiate, grpc_closure* on_ack) { pick_first_lb_policy* p = (pick_first_lb_policy*)pol; if (p->selected) { - grpc_connected_subchannel_ping(p->selected->connected_subchannel, - on_initiate, on_ack); + p->selected->connected_subchannel->Ping(on_initiate, on_ack); } else { GRPC_CLOSURE_SCHED(on_initiate, GRPC_ERROR_CREATE_FROM_STATIC_STRING("Not connected")); @@ -413,6 +412,18 @@ static void pf_connectivity_changed_locked(void* arg, grpc_error* error) { &p->state_tracker, GRPC_CHANNEL_TRANSIENT_FAILURE, GRPC_ERROR_REF(error), "selected_not_ready+switch_to_update"); } else { + if (sd->curr_connectivity_state < GRPC_CHANNEL_TRANSIENT_FAILURE) { + // Renew notification. + grpc_lb_subchannel_data_start_connectivity_watch(sd); + } else { // in transient failure or shutdown. Rely on re-resolution to + // recover. + p->selected = nullptr; + grpc_lb_subchannel_data_stop_connectivity_watch(sd); + grpc_lb_subchannel_list_unref_for_connectivity_watch( + sd->subchannel_list, "pf_selected_shutdown"); + grpc_lb_subchannel_data_unref_subchannel( + sd, "pf_selected_shutdown"); // Unrefs connected subchannel + } // TODO(juanlishen): we re-resolve when the selected subchannel goes to // TRANSIENT_FAILURE because we used to shut down in this case before // re-resolution is introduced. But we need to investigate whether we @@ -432,16 +443,6 @@ static void pf_connectivity_changed_locked(void* arg, grpc_error* error) { sd->curr_connectivity_state, GRPC_ERROR_REF(error), "selected_changed"); } - if (sd->curr_connectivity_state != GRPC_CHANNEL_SHUTDOWN) { - // Renew notification. - grpc_lb_subchannel_data_start_connectivity_watch(sd); - } else { - p->selected = nullptr; - grpc_lb_subchannel_data_stop_connectivity_watch(sd); - grpc_lb_subchannel_list_unref_for_connectivity_watch( - sd->subchannel_list, "pf_selected_shutdown"); - grpc_lb_subchannel_data_unref_subchannel(sd, "pf_selected_shutdown"); - } } return; } diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index b0c84017dfb..0836dad2f6b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -442,8 +442,19 @@ static void rr_connectivity_changed_locked(void* arg, grpc_error* error) { // Update state counters and new overall state. update_state_counters_locked(sd); update_lb_connectivity_status_locked(sd, GRPC_ERROR_REF(error)); + // If the sd's new state is TRANSIENT_FAILURE, unref the *connected* + // subchannel, if any. + if (sd->curr_connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + if (sd->connected_subchannel != nullptr) { + GRPC_CONNECTED_SUBCHANNEL_UNREF(sd->connected_subchannel, + "connected_subchannel_transient_failure"); + sd->connected_subchannel = nullptr; + } + // Renew notification. + grpc_lb_subchannel_data_start_connectivity_watch(sd); + } // If the sd's new state is SHUTDOWN, unref the subchannel. - if (sd->curr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) { + else if (sd->curr_connectivity_state == GRPC_CHANNEL_SHUTDOWN) { grpc_lb_subchannel_data_stop_connectivity_watch(sd); grpc_lb_subchannel_data_unref_subchannel(sd, "rr_connectivity_shutdown"); grpc_lb_subchannel_list_unref_for_connectivity_watch( @@ -540,7 +551,7 @@ static void rr_ping_one_locked(grpc_lb_policy* pol, grpc_closure* on_initiate, &p->subchannel_list->subchannels[next_ready_index]; grpc_connected_subchannel* target = GRPC_CONNECTED_SUBCHANNEL_REF( selected->connected_subchannel, "rr_ping"); - grpc_connected_subchannel_ping(target, on_initiate, on_ack); + target->Ping(on_initiate, on_ack); GRPC_CONNECTED_SUBCHANNEL_UNREF(target, "rr_ping"); } else { GRPC_CLOSURE_SCHED(on_initiate, GRPC_ERROR_CREATE_FROM_STATIC_STRING( diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index f07394d29b2..25615b6326f 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -41,6 +41,7 @@ #include "src/core/lib/iomgr/timer.h" #include "src/core/lib/profiling/timers.h" #include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/support/debug_location.h" #include "src/core/lib/support/manual_constructor.h" #include "src/core/lib/surface/channel.h" #include "src/core/lib/surface/channel_init.h" @@ -96,7 +97,7 @@ struct grpc_subchannel { grpc_connect_out_args connecting_result; /** callback for connection finishing */ - grpc_closure connected; + grpc_closure on_connected; /** callback for our alarm */ grpc_closure on_alarm; @@ -139,11 +140,10 @@ struct grpc_subchannel_call { }; #define SUBCHANNEL_CALL_TO_CALL_STACK(call) ((grpc_call_stack*)((call) + 1)) -#define CHANNEL_STACK_FROM_CONNECTION(con) ((grpc_channel_stack*)(con)) #define CALLSTACK_TO_SUBCHANNEL_CALL(callstack) \ (((grpc_subchannel_call*)(callstack)) - 1) -static void subchannel_connected(void* subchannel, grpc_error* error); +static void on_subchannel_connected(void* subchannel, grpc_error* error); #ifndef NDEBUG #define REF_REASON reason @@ -161,20 +161,20 @@ static void subchannel_connected(void* subchannel, grpc_error* error); */ static void connection_destroy(void* arg, grpc_error* error) { - grpc_connected_subchannel* c = (grpc_connected_subchannel*)arg; - grpc_channel_stack_destroy(CHANNEL_STACK_FROM_CONNECTION(c)); - gpr_free(c); + grpc_channel_stack* stk = (grpc_channel_stack*)arg; + grpc_channel_stack_destroy(stk); + gpr_free(stk); } grpc_connected_subchannel* grpc_connected_subchannel_ref( grpc_connected_subchannel* c GRPC_SUBCHANNEL_REF_EXTRA_ARGS) { - GRPC_CHANNEL_STACK_REF(CHANNEL_STACK_FROM_CONNECTION(c), REF_REASON); + c->Ref(DEBUG_LOCATION, REF_REASON); return c; } void grpc_connected_subchannel_unref( grpc_connected_subchannel* c GRPC_SUBCHANNEL_REF_EXTRA_ARGS) { - GRPC_CHANNEL_STACK_UNREF(CHANNEL_STACK_FROM_CONNECTION(c), REF_REASON); + c->Unref(DEBUG_LOCATION, REF_REASON); } /* @@ -241,16 +241,15 @@ grpc_subchannel* grpc_subchannel_ref_from_weak_ref( } static void disconnect(grpc_subchannel* c) { - grpc_connected_subchannel* con; grpc_subchannel_index_unregister(c->key, c); gpr_mu_lock(&c->mu); GPR_ASSERT(!c->disconnected); c->disconnected = true; grpc_connector_shutdown(c->connector, GRPC_ERROR_CREATE_FROM_STATIC_STRING( "Subchannel disconnected")); - con = GET_CONNECTED_SUBCHANNEL(c, no_barrier); + grpc_connected_subchannel* con = GET_CONNECTED_SUBCHANNEL(c, no_barrier); if (con != nullptr) { - GRPC_CONNECTED_SUBCHANNEL_UNREF(con, "connection"); + GRPC_CONNECTED_SUBCHANNEL_UNREF(con, "disconnect"); gpr_atm_no_barrier_store(&c->connected_subchannel, (gpr_atm)0xdeadbeef); } gpr_mu_unlock(&c->mu); @@ -372,7 +371,7 @@ grpc_subchannel* grpc_subchannel_create(grpc_connector* connector, if (new_args != nullptr) grpc_channel_args_destroy(new_args); c->root_external_state_watcher.next = c->root_external_state_watcher.prev = &c->root_external_state_watcher; - GRPC_CLOSURE_INIT(&c->connected, subchannel_connected, c, + GRPC_CLOSURE_INIT(&c->on_connected, on_subchannel_connected, c, grpc_schedule_on_exec_ctx); grpc_connectivity_state_init(&c->state_tracker, GRPC_CHANNEL_IDLE, "subchannel"); @@ -395,7 +394,7 @@ static void continue_connect_locked(grpc_subchannel* c) { grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_CONNECTING, GRPC_ERROR_NONE, "state_change"); grpc_connector_connect(c->connector, &args, &c->connecting_result, - &c->connected); + &c->on_connected); } grpc_connectivity_state grpc_subchannel_check_connectivity(grpc_subchannel* c, @@ -479,9 +478,10 @@ static void maybe_start_connecting_locked(grpc_subchannel* c) { const grpc_millis time_til_next = c->next_attempt_deadline - grpc_core::ExecCtx::Get()->Now(); if (time_til_next <= 0) { - gpr_log(GPR_INFO, "Retry immediately"); + gpr_log(GPR_INFO, "Subchannel %p: Retry immediately", c); } else { - gpr_log(GPR_INFO, "Retry in %" PRIdPTR " milliseconds", time_til_next); + gpr_log(GPR_INFO, "Subchannel %p: Retry in %" PRIdPTR " milliseconds", c, + time_til_next); } GRPC_CLOSURE_INIT(&c->on_alarm, on_alarm, c, grpc_schedule_on_exec_ctx); grpc_timer_init(&c->alarm, c->next_attempt_deadline, &c->on_alarm); @@ -525,75 +525,56 @@ void grpc_subchannel_notify_on_state_change( } } -void grpc_connected_subchannel_process_transport_op( - grpc_connected_subchannel* con, grpc_transport_op* op) { - grpc_channel_stack* channel_stack = CHANNEL_STACK_FROM_CONNECTION(con); - grpc_channel_element* top_elem = grpc_channel_stack_element(channel_stack, 0); - top_elem->filter->start_transport_op(top_elem, op); -} - -static void subchannel_on_child_state_changed(void* p, grpc_error* error) { - state_watcher* sw = (state_watcher*)p; - grpc_subchannel* c = sw->subchannel; +static void on_connected_subchannel_connectivity_changed(void* p, + grpc_error* error) { + state_watcher* connected_subchannel_watcher = (state_watcher*)p; + grpc_subchannel* c = connected_subchannel_watcher->subchannel; gpr_mu* mu = &c->mu; gpr_mu_lock(mu); + auto* con = GET_CONNECTED_SUBCHANNEL(c, no_barrier); /* if we failed just leave this closure */ - if (sw->connectivity_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - /* any errors on a subchannel ==> we're done, create a new one */ - sw->connectivity_state = GRPC_CHANNEL_SHUTDOWN; + if (connected_subchannel_watcher->connectivity_state == + GRPC_CHANNEL_TRANSIENT_FAILURE) { + if (!c->disconnected && con != nullptr) { + GRPC_CONNECTED_SUBCHANNEL_UNREF(con, "transient_failure"); + gpr_atm_no_barrier_store(&c->connected_subchannel, (gpr_atm) nullptr); + grpc_connectivity_state_set(&c->state_tracker, + GRPC_CHANNEL_TRANSIENT_FAILURE, + GRPC_ERROR_REF(error), "reflect_child"); + c->backoff_begun = false; + c->backoff->Reset(); + if (grpc_trace_stream_refcount.enabled()) { + gpr_log(GPR_INFO, + "Connected subchannel %p of subchannel %p has gone into " + "TRANSIENT_FAILURE. Attempting to reconnect.", + con, c); + } + maybe_start_connecting_locked(c); + goto done; + } else { + connected_subchannel_watcher->connectivity_state = GRPC_CHANNEL_SHUTDOWN; + } } - grpc_connectivity_state_set(&c->state_tracker, sw->connectivity_state, + grpc_connectivity_state_set(&c->state_tracker, + connected_subchannel_watcher->connectivity_state, GRPC_ERROR_REF(error), "reflect_child"); - if (sw->connectivity_state != GRPC_CHANNEL_SHUTDOWN) { - grpc_connected_subchannel_notify_on_state_change( - GET_CONNECTED_SUBCHANNEL(c, no_barrier), nullptr, - &sw->connectivity_state, &sw->closure); + if (connected_subchannel_watcher->connectivity_state < + GRPC_CHANNEL_TRANSIENT_FAILURE) { GRPC_SUBCHANNEL_WEAK_REF(c, "state_watcher"); - sw = nullptr; + con->NotifyOnStateChange(nullptr, + &connected_subchannel_watcher->connectivity_state, + &connected_subchannel_watcher->closure); + connected_subchannel_watcher = nullptr; } - +done: gpr_mu_unlock(mu); GRPC_SUBCHANNEL_WEAK_UNREF(c, "state_watcher"); - gpr_free(sw); -} - -static void connected_subchannel_state_op(grpc_connected_subchannel* con, - grpc_pollset_set* interested_parties, - grpc_connectivity_state* state, - grpc_closure* closure) { - grpc_transport_op* op = grpc_make_transport_op(nullptr); - grpc_channel_element* elem; - op->connectivity_state = state; - op->on_connectivity_state_change = closure; - op->bind_pollset_set = interested_parties; - elem = grpc_channel_stack_element(CHANNEL_STACK_FROM_CONNECTION(con), 0); - elem->filter->start_transport_op(elem, op); -} - -void grpc_connected_subchannel_notify_on_state_change( - grpc_connected_subchannel* con, grpc_pollset_set* interested_parties, - grpc_connectivity_state* state, grpc_closure* closure) { - connected_subchannel_state_op(con, interested_parties, state, closure); -} - -void grpc_connected_subchannel_ping(grpc_connected_subchannel* con, - grpc_closure* on_initiate, - grpc_closure* on_ack) { - grpc_transport_op* op = grpc_make_transport_op(nullptr); - grpc_channel_element* elem; - op->send_ping.on_initiate = on_initiate; - op->send_ping.on_ack = on_ack; - elem = grpc_channel_stack_element(CHANNEL_STACK_FROM_CONNECTION(con), 0); - elem->filter->start_transport_op(elem, op); + gpr_free(connected_subchannel_watcher); } static bool publish_transport_locked(grpc_subchannel* c) { - grpc_connected_subchannel* con; - grpc_channel_stack* stk; - state_watcher* sw_subchannel; - /* construct channel stack */ grpc_channel_stack_builder* builder = grpc_channel_stack_builder_create(); grpc_channel_stack_builder_set_channel_arguments( @@ -605,8 +586,9 @@ static bool publish_transport_locked(grpc_subchannel* c) { grpc_channel_stack_builder_destroy(builder); return false; } + grpc_channel_stack* stk; grpc_error* error = grpc_channel_stack_builder_finish( - builder, 0, 1, connection_destroy, nullptr, (void**)&con); + builder, 0, 1, connection_destroy, nullptr, (void**)&stk); if (error != GRPC_ERROR_NONE) { grpc_transport_destroy(c->connecting_result.transport); gpr_log(GPR_ERROR, "error initializing subchannel stack: %s", @@ -614,20 +596,21 @@ static bool publish_transport_locked(grpc_subchannel* c) { GRPC_ERROR_UNREF(error); return false; } - stk = CHANNEL_STACK_FROM_CONNECTION(con); memset(&c->connecting_result, 0, sizeof(c->connecting_result)); /* initialize state watcher */ - sw_subchannel = (state_watcher*)gpr_malloc(sizeof(*sw_subchannel)); - sw_subchannel->subchannel = c; - sw_subchannel->connectivity_state = GRPC_CHANNEL_READY; - GRPC_CLOSURE_INIT(&sw_subchannel->closure, subchannel_on_child_state_changed, - sw_subchannel, grpc_schedule_on_exec_ctx); + state_watcher* connected_subchannel_watcher = + (state_watcher*)gpr_zalloc(sizeof(*connected_subchannel_watcher)); + connected_subchannel_watcher->subchannel = c; + connected_subchannel_watcher->connectivity_state = GRPC_CHANNEL_READY; + GRPC_CLOSURE_INIT(&connected_subchannel_watcher->closure, + on_connected_subchannel_connectivity_changed, + connected_subchannel_watcher, grpc_schedule_on_exec_ctx); if (c->disconnected) { - gpr_free(sw_subchannel); + gpr_free(connected_subchannel_watcher); grpc_channel_stack_destroy(stk); - gpr_free(con); + gpr_free(stk); return false; } @@ -636,6 +619,8 @@ static bool publish_transport_locked(grpc_subchannel* c) { I'd have expected the rel_cas below to be enough, but seemingly it's not. Re-evaluate if we really need this. */ + grpc_connected_subchannel* con = + grpc_core::New(stk); gpr_atm_full_barrier(); GPR_ASSERT(gpr_atm_rel_cas(&c->connected_subchannel, 0, (gpr_atm)con)); @@ -643,9 +628,9 @@ static bool publish_transport_locked(grpc_subchannel* c) { ref for connecting is donated to the state watcher */ GRPC_SUBCHANNEL_WEAK_REF(c, "state_watcher"); GRPC_SUBCHANNEL_WEAK_UNREF(c, "connecting"); - grpc_connected_subchannel_notify_on_state_change( - con, c->pollset_set, &sw_subchannel->connectivity_state, - &sw_subchannel->closure); + con->NotifyOnStateChange(c->pollset_set, + &connected_subchannel_watcher->connectivity_state, + &connected_subchannel_watcher->closure); /* signal completion */ grpc_connectivity_state_set(&c->state_tracker, GRPC_CHANNEL_READY, @@ -653,11 +638,11 @@ static bool publish_transport_locked(grpc_subchannel* c) { return true; } -static void subchannel_connected(void* arg, grpc_error* error) { +static void on_subchannel_connected(void* arg, grpc_error* error) { grpc_subchannel* c = (grpc_subchannel*)arg; grpc_channel_args* delete_channel_args = c->connecting_result.channel_args; - GRPC_SUBCHANNEL_WEAK_REF(c, "connected"); + GRPC_SUBCHANNEL_WEAK_REF(c, "on_subchannel_connected"); gpr_mu_lock(&c->mu); c->connecting = false; if (c->connecting_result.transport != nullptr && @@ -736,36 +721,6 @@ const grpc_subchannel_key* grpc_subchannel_get_key( return subchannel->key; } -grpc_error* grpc_connected_subchannel_create_call( - grpc_connected_subchannel* con, - const grpc_connected_subchannel_call_args* args, - grpc_subchannel_call** call) { - grpc_channel_stack* chanstk = CHANNEL_STACK_FROM_CONNECTION(con); - *call = (grpc_subchannel_call*)gpr_arena_alloc( - args->arena, sizeof(grpc_subchannel_call) + chanstk->call_stack_size); - grpc_call_stack* callstk = SUBCHANNEL_CALL_TO_CALL_STACK(*call); - (*call)->connection = GRPC_CONNECTED_SUBCHANNEL_REF(con, "subchannel_call"); - const grpc_call_element_args call_args = { - callstk, /* call_stack */ - nullptr, /* server_transport_data */ - args->context, /* context */ - args->path, /* path */ - args->start_time, /* start_time */ - args->deadline, /* deadline */ - args->arena, /* arena */ - args->call_combiner /* call_combiner */ - }; - grpc_error* error = grpc_call_stack_init(chanstk, 1, subchannel_call_destroy, - *call, &call_args); - if (error != GRPC_ERROR_NONE) { - const char* error_string = grpc_error_string(error); - gpr_log(GPR_ERROR, "error: %s", error_string); - return error; - } - grpc_call_stack_set_pollset_or_pollset_set(callstk, args->pollent); - return GRPC_ERROR_NONE; -} - grpc_call_stack* grpc_subchannel_call_get_call_stack( grpc_subchannel_call* subchannel_call) { return SUBCHANNEL_CALL_TO_CALL_STACK(subchannel_call); @@ -801,3 +756,70 @@ grpc_arg grpc_create_subchannel_address_arg(const grpc_resolved_address* addr) { (char*)GRPC_ARG_SUBCHANNEL_ADDRESS, addr->len > 0 ? grpc_sockaddr_to_uri(addr) : gpr_strdup("")); } + +grpc_connected_subchannel::grpc_connected_subchannel( + grpc_channel_stack* channel_stack) + : grpc_core::RefCountedWithTracing(&grpc_trace_stream_refcount), + channel_stack_(channel_stack) {} + +grpc_connected_subchannel* grpc_connected_subchannel::Ref( + const grpc_core::DebugLocation& location, const char* reason) { + GRPC_CHANNEL_STACK_REF(channel_stack_, REF_REASON); + grpc_core::RefCountedWithTracing::Ref(location, reason); + return this; +} +void grpc_connected_subchannel::Unref(const grpc_core::DebugLocation& location, + const char* reason) { + GRPC_CHANNEL_STACK_UNREF(channel_stack_, REF_REASON); + grpc_core::RefCountedWithTracing::Unref(location, reason); +} + +void grpc_connected_subchannel::NotifyOnStateChange( + grpc_pollset_set* interested_parties, grpc_connectivity_state* state, + grpc_closure* closure) { + grpc_transport_op* op = grpc_make_transport_op(nullptr); + grpc_channel_element* elem; + op->connectivity_state = state; + op->on_connectivity_state_change = closure; + op->bind_pollset_set = interested_parties; + elem = grpc_channel_stack_element(channel_stack_, 0); + elem->filter->start_transport_op(elem, op); +} + +void grpc_connected_subchannel::Ping(grpc_closure* on_initiate, + grpc_closure* on_ack) { + grpc_transport_op* op = grpc_make_transport_op(nullptr); + grpc_channel_element* elem; + op->send_ping.on_initiate = on_initiate; + op->send_ping.on_ack = on_ack; + elem = grpc_channel_stack_element(channel_stack_, 0); + elem->filter->start_transport_op(elem, op); +} + +grpc_error* grpc_connected_subchannel::CreateCall(const CallArgs* args, + grpc_subchannel_call** call) { + *call = (grpc_subchannel_call*)gpr_arena_alloc( + args->arena, + sizeof(grpc_subchannel_call) + channel_stack_->call_stack_size); + grpc_call_stack* callstk = SUBCHANNEL_CALL_TO_CALL_STACK(*call); + (*call)->connection = Ref(DEBUG_LOCATION, "subchannel_call"); + const grpc_call_element_args call_args = { + callstk, /* call_stack */ + nullptr, /* server_transport_data */ + args->context, /* context */ + args->path, /* path */ + args->start_time, /* start_time */ + args->deadline, /* deadline */ + args->arena, /* arena */ + args->call_combiner /* call_combiner */ + }; + grpc_error* error = grpc_call_stack_init( + channel_stack_, 1, subchannel_call_destroy, *call, &call_args); + if (error != GRPC_ERROR_NONE) { + const char* error_string = grpc_error_string(error); + gpr_log(GPR_ERROR, "error: %s", error_string); + return error; + } + grpc_call_stack_set_pollset_or_pollset_set(callstk, args->pollent); + return GRPC_ERROR_NONE; +} diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 9d34fff07a8..d9a850daaea 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -23,6 +23,7 @@ #include "src/core/lib/channel/channel_stack.h" #include "src/core/lib/iomgr/polling_entity.h" #include "src/core/lib/support/arena.h" +#include "src/core/lib/support/ref_counted.h" #include "src/core/lib/transport/connectivity_state.h" #include "src/core/lib/transport/metadata.h" @@ -32,7 +33,6 @@ /** A (sub-)channel that knows how to connect to exactly one target address. Provides a target for load balancing. */ typedef struct grpc_subchannel grpc_subchannel; -typedef struct grpc_connected_subchannel grpc_connected_subchannel; typedef struct grpc_subchannel_call grpc_subchannel_call; typedef struct grpc_subchannel_args grpc_subchannel_args; typedef struct grpc_subchannel_key grpc_subchannel_key; @@ -73,6 +73,34 @@ typedef struct grpc_subchannel_key grpc_subchannel_key; #define GRPC_SUBCHANNEL_REF_EXTRA_ARGS #endif +class grpc_connected_subchannel : public grpc_core::RefCountedWithTracing { + public: + struct CallArgs { + grpc_polling_entity* pollent; + grpc_slice path; + gpr_timespec start_time; + grpc_millis deadline; + gpr_arena* arena; + grpc_call_context_element* context; + grpc_call_combiner* call_combiner; + }; + + grpc_connected_subchannel(grpc_channel_stack* channel_stack); + grpc_connected_subchannel* Ref(const grpc_core::DebugLocation& location, + const char* reason); + void Unref(const grpc_core::DebugLocation& location, const char* reason); + grpc_channel_stack* channel_stack() { return channel_stack_; } + void NotifyOnStateChange(grpc_pollset_set* interested_parties, + grpc_connectivity_state* state, + grpc_closure* closure); + void Ping(grpc_closure* on_initiate, grpc_closure* on_ack); + + grpc_error* CreateCall(const CallArgs* args, grpc_subchannel_call** call); + + private: + grpc_channel_stack* channel_stack_; +}; + grpc_subchannel* grpc_subchannel_ref( grpc_subchannel* channel GRPC_SUBCHANNEL_REF_EXTRA_ARGS); grpc_subchannel* grpc_subchannel_ref_from_weak_ref( @@ -92,26 +120,6 @@ void grpc_subchannel_call_ref( void grpc_subchannel_call_unref( grpc_subchannel_call* call GRPC_SUBCHANNEL_REF_EXTRA_ARGS); -/** construct a subchannel call */ -typedef struct { - grpc_polling_entity* pollent; - grpc_slice path; - gpr_timespec start_time; - grpc_millis deadline; - gpr_arena* arena; - grpc_call_context_element* context; - grpc_call_combiner* call_combiner; -} grpc_connected_subchannel_call_args; - -grpc_error* grpc_connected_subchannel_create_call( - grpc_connected_subchannel* connected_subchannel, - const grpc_connected_subchannel_call_args* args, - grpc_subchannel_call** subchannel_call); - -/** process a transport level op */ -void grpc_connected_subchannel_process_transport_op( - grpc_connected_subchannel* subchannel, grpc_transport_op* op); - /** poll the current connectivity state of a channel */ grpc_connectivity_state grpc_subchannel_check_connectivity( grpc_subchannel* channel, grpc_error** error); @@ -121,12 +129,6 @@ grpc_connectivity_state grpc_subchannel_check_connectivity( void grpc_subchannel_notify_on_state_change( grpc_subchannel* channel, grpc_pollset_set* interested_parties, grpc_connectivity_state* state, grpc_closure* notify); -void grpc_connected_subchannel_notify_on_state_change( - grpc_connected_subchannel* channel, grpc_pollset_set* interested_parties, - grpc_connectivity_state* state, grpc_closure* notify); -void grpc_connected_subchannel_ping(grpc_connected_subchannel* channel, - grpc_closure* on_initiate, - grpc_closure* on_ack); /** retrieve the grpc_connected_subchannel - or NULL if called before the subchannel becomes connected */ diff --git a/src/core/lib/debug/trace.h b/src/core/lib/debug/trace.h index 69ddd802222..6ee1b49f747 100644 --- a/src/core/lib/debug/trace.h +++ b/src/core/lib/debug/trace.h @@ -88,9 +88,10 @@ class TraceFlag { #ifndef NDEBUG typedef TraceFlag DebugOnlyTraceFlag; #else -class DebugOnlyTraceFlag { +class DebugOnlyTraceFlag : public TraceFlag { public: - DebugOnlyTraceFlag(bool default_enabled, const char* name) {} + DebugOnlyTraceFlag(bool default_enabled, const char* name) + : TraceFlag(default_enabled, name) {} bool enabled() { return false; } private: diff --git a/src/core/lib/support/ref_counted.h b/src/core/lib/support/ref_counted.h index 4c662f91190..f2182baea19 100644 --- a/src/core/lib/support/ref_counted.h +++ b/src/core/lib/support/ref_counted.h @@ -23,6 +23,7 @@ #include #include "src/core/lib/debug/trace.h" +#include "src/core/lib/support/abstract.h" #include "src/core/lib/support/debug_location.h" #include "src/core/lib/support/memory.h" @@ -97,6 +98,7 @@ class RefCountedWithTracing { // Not copyable nor movable. RefCountedWithTracing(const RefCountedWithTracing&) = delete; RefCountedWithTracing& operator=(const RefCountedWithTracing&) = delete; + GRPC_ABSTRACT_BASE_CLASS protected: // Allow Delete() to access destructor. @@ -110,8 +112,6 @@ class RefCountedWithTracing { gpr_ref_init(&refs_, 1); } - virtual ~RefCountedWithTracing() {} - private: TraceFlag* trace_flag_ = nullptr; gpr_refcount refs_; diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index c6e9577f0ce..5a7e52e9e94 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -673,6 +673,42 @@ TEST_F(ClientLbEnd2endTest, RoundRobinReresolve) { GPR_ASSERT(gpr_time_cmp(deadline, now) > 0); } +TEST_F(ClientLbEnd2endTest, RoundRobinSingleReconnect) { + const int kNumServers = 3; + StartServers(kNumServers); + const auto ports = GetServersPorts(); + ResetStub(ports, "round_robin"); + SetNextResolution(ports); + for (size_t i = 0; i < kNumServers; ++i) WaitForServer(i); + for (size_t i = 0; i < servers_.size(); ++i) { + CheckRpcSendOk(); + EXPECT_EQ(1, servers_[i]->service_.request_count()) << "for backend #" << i; + } + // One request should have gone to each server. + for (size_t i = 0; i < servers_.size(); ++i) { + EXPECT_EQ(1, servers_[i]->service_.request_count()); + } + const auto pre_death = servers_[0]->service_.request_count(); + // Kill the first server. + servers_[0]->Shutdown(true); + // Client request still succeed. May need retrying if RR had returned a pick + // before noticing the change in the server's connectivity. + while (!SendRpc()) + ; // Retry until success. + // Send a bunch of RPCs that should succeed. + for (int i = 0; i < 10 * kNumServers; ++i) CheckRpcSendOk(); + const auto post_death = servers_[0]->service_.request_count(); + // No requests have gone to the deceased server. + EXPECT_EQ(pre_death, post_death); + // Bring the first server back up. + servers_[0].reset(new ServerData(server_host_, ports[0])); + // Requests should start arriving at the first server either right away (if + // the server managed to start before the RR policy retried the subchannel) or + // after the subchannel retry delay otherwise (RR's subchannel retried before + // the server was fully back up). + WaitForServer(0); +} + } // namespace } // namespace testing } // namespace grpc