diff --git a/CMakeLists.txt b/CMakeLists.txt index 1635d5dc2d5..fd53f621a65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,7 @@ add_dependencies(buildtests_c mlog_test) add_dependencies(buildtests_c multiple_server_queues_test) add_dependencies(buildtests_c murmur_hash_test) add_dependencies(buildtests_c no_server_test) +add_dependencies(buildtests_c num_external_connectivity_watchers_test) add_dependencies(buildtests_c parse_address_test) add_dependencies(buildtests_c percent_encoding_test) if(_gRPC_PLATFORM_LINUX) @@ -7523,6 +7524,37 @@ target_link_libraries(no_server_test endif (gRPC_BUILD_TESTS) if (gRPC_BUILD_TESTS) +add_executable(num_external_connectivity_watchers_test + test/core/surface/num_external_connectivity_watchers_test.c +) + + +target_include_directories(num_external_connectivity_watchers_test + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include + PRIVATE ${BORINGSSL_ROOT_DIR}/include + PRIVATE ${PROTOBUF_ROOT_DIR}/src + PRIVATE ${BENCHMARK_ROOT_DIR}/include + PRIVATE ${ZLIB_ROOT_DIR} + PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/zlib + PRIVATE ${CARES_BUILD_INCLUDE_DIR} + PRIVATE ${CARES_INCLUDE_DIR} + PRIVATE ${CARES_PLATFORM_INCLUDE_DIR} + PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/cares/cares + PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/third_party/gflags/include +) + +target_link_libraries(num_external_connectivity_watchers_test + ${_gRPC_ALLTARGETS_LIBRARIES} + grpc_test_util + grpc + gpr_test_util + gpr +) + +endif (gRPC_BUILD_TESTS) +if (gRPC_BUILD_TESTS) + add_executable(parse_address_test test/core/client_channel/parse_address_test.c ) diff --git a/Makefile b/Makefile index 188ca8b2682..82b2104b4a7 100644 --- a/Makefile +++ b/Makefile @@ -1063,6 +1063,7 @@ murmur_hash_test: $(BINDIR)/$(CONFIG)/murmur_hash_test nanopb_fuzzer_response_test: $(BINDIR)/$(CONFIG)/nanopb_fuzzer_response_test nanopb_fuzzer_serverlist_test: $(BINDIR)/$(CONFIG)/nanopb_fuzzer_serverlist_test no_server_test: $(BINDIR)/$(CONFIG)/no_server_test +num_external_connectivity_watchers_test: $(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test parse_address_test: $(BINDIR)/$(CONFIG)/parse_address_test percent_decode_fuzzer: $(BINDIR)/$(CONFIG)/percent_decode_fuzzer percent_encode_fuzzer: $(BINDIR)/$(CONFIG)/percent_encode_fuzzer @@ -1442,6 +1443,7 @@ buildtests_c: privatelibs_c \ $(BINDIR)/$(CONFIG)/multiple_server_queues_test \ $(BINDIR)/$(CONFIG)/murmur_hash_test \ $(BINDIR)/$(CONFIG)/no_server_test \ + $(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test \ $(BINDIR)/$(CONFIG)/parse_address_test \ $(BINDIR)/$(CONFIG)/percent_encoding_test \ $(BINDIR)/$(CONFIG)/pollset_set_test \ @@ -1911,6 +1913,8 @@ test_c: buildtests_c $(Q) $(BINDIR)/$(CONFIG)/murmur_hash_test || ( echo test murmur_hash_test failed ; exit 1 ) $(E) "[RUN] Testing no_server_test" $(Q) $(BINDIR)/$(CONFIG)/no_server_test || ( echo test no_server_test failed ; exit 1 ) + $(E) "[RUN] Testing num_external_connectivity_watchers_test" + $(Q) $(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test || ( echo test num_external_connectivity_watchers_test failed ; exit 1 ) $(E) "[RUN] Testing parse_address_test" $(Q) $(BINDIR)/$(CONFIG)/parse_address_test || ( echo test parse_address_test failed ; exit 1 ) $(E) "[RUN] Testing percent_encoding_test" @@ -11766,6 +11770,38 @@ endif endif +NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_SRC = \ + test/core/surface/num_external_connectivity_watchers_test.c \ + +NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_SRC)))) +ifeq ($(NO_SECURE),true) + +# You can't build secure targets if you don't have OpenSSL. + +$(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test: openssl_dep_error + +else + + + +$(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test: $(NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + $(E) "[LD] Linking $@" + $(Q) mkdir -p `dirname $@` + $(Q) $(LD) $(LDFLAGS) $(NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBS) $(LDLIBS_SECURE) -o $(BINDIR)/$(CONFIG)/num_external_connectivity_watchers_test + +endif + +$(OBJDIR)/$(CONFIG)/test/core/surface/num_external_connectivity_watchers_test.o: $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a + +deps_num_external_connectivity_watchers_test: $(NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_OBJS:.o=.dep) + +ifneq ($(NO_SECURE),true) +ifneq ($(NO_DEPS),true) +-include $(NUM_EXTERNAL_CONNECTIVITY_WATCHERS_TEST_OBJS:.o=.dep) +endif +endif + + PARSE_ADDRESS_TEST_SRC = \ test/core/client_channel/parse_address_test.c \ diff --git a/build.yaml b/build.yaml index d2ea16c8c20..26144e33def 100644 --- a/build.yaml +++ b/build.yaml @@ -2707,6 +2707,18 @@ targets: - grpc - gpr_test_util - gpr +- name: num_external_connectivity_watchers_test + build: test + language: c + src: + - test/core/surface/num_external_connectivity_watchers_test.c + deps: + - grpc_test_util + - grpc + - gpr_test_util + - gpr + exclude_iomgrs: + - uv - name: parse_address_test build: test language: c diff --git a/grpc.def b/grpc.def index 55b27c2c245..af4bd1674f2 100644 --- a/grpc.def +++ b/grpc.def @@ -65,6 +65,7 @@ EXPORTS grpc_alarm_cancel grpc_alarm_destroy grpc_channel_check_connectivity_state + grpc_channel_num_external_connectivity_watchers grpc_channel_watch_connectivity_state grpc_channel_create_call grpc_channel_ping diff --git a/include/grpc/grpc.h b/include/grpc/grpc.h index a36367fa8f9..47d1ea62380 100644 --- a/include/grpc/grpc.h +++ b/include/grpc/grpc.h @@ -178,6 +178,12 @@ GRPCAPI void grpc_alarm_destroy(grpc_alarm *alarm); GRPCAPI grpc_connectivity_state grpc_channel_check_connectivity_state( grpc_channel *channel, int try_to_connect); +/** Number of active "external connectivity state watchers" attached to a + * channel. + * Useful for testing. **/ +GRPCAPI int grpc_channel_num_external_connectivity_watchers( + grpc_channel *channel); + /** Watch for a change in connectivity state. Once the channel connectivity state is different from last_observed_state, tag will be enqueued on cq with success=1. diff --git a/src/core/ext/filters/client_channel/channel_connectivity.c b/src/core/ext/filters/client_channel/channel_connectivity.c index f83670db82e..04666edbec7 100644 --- a/src/core/ext/filters/client_channel/channel_connectivity.c +++ b/src/core/ext/filters/client_channel/channel_connectivity.c @@ -67,9 +67,8 @@ grpc_connectivity_state grpc_channel_check_connectivity_state( typedef enum { WAITING, - CALLING_BACK, + READY_TO_CALL_BACK, CALLING_BACK_AND_FINISHED, - CALLED_BACK } callback_phase; typedef struct { @@ -77,11 +76,13 @@ typedef struct { callback_phase phase; grpc_closure on_complete; grpc_closure on_timeout; + grpc_closure watcher_timer_init; grpc_timer alarm; grpc_connectivity_state state; grpc_completion_queue *cq; grpc_cq_completion completion_storage; grpc_channel *channel; + grpc_error *error; void *tag; } state_watcher; @@ -105,11 +106,8 @@ static void finished_completion(grpc_exec_ctx *exec_ctx, void *pw, gpr_mu_lock(&w->mu); switch (w->phase) { case WAITING: - case CALLED_BACK: + case READY_TO_CALL_BACK: GPR_UNREACHABLE_CODE(return ); - case CALLING_BACK: - w->phase = CALLED_BACK; - break; case CALLING_BACK_AND_FINISHED: delete = 1; break; @@ -123,10 +121,14 @@ static void finished_completion(grpc_exec_ctx *exec_ctx, void *pw, static void partly_done(grpc_exec_ctx *exec_ctx, state_watcher *w, bool due_to_completion, grpc_error *error) { - int delete = 0; - if (due_to_completion) { grpc_timer_cancel(exec_ctx, &w->alarm); + } else { + grpc_channel_element *client_channel_elem = grpc_channel_stack_last_element( + grpc_channel_get_channel_stack(w->channel)); + grpc_client_channel_watch_connectivity_state(exec_ctx, client_channel_elem, + grpc_cq_pollset(w->cq), NULL, + &w->on_complete, NULL); } gpr_mu_lock(&w->mu); @@ -147,25 +149,27 @@ static void partly_done(grpc_exec_ctx *exec_ctx, state_watcher *w, } switch (w->phase) { case WAITING: - w->phase = CALLING_BACK; - grpc_cq_end_op(exec_ctx, w->cq, w->tag, GRPC_ERROR_REF(error), - finished_completion, w, &w->completion_storage); + GRPC_ERROR_REF(error); + w->error = error; + w->phase = READY_TO_CALL_BACK; break; - case CALLING_BACK: + case READY_TO_CALL_BACK: + if (error != GRPC_ERROR_NONE) { + GPR_ASSERT(!due_to_completion); + GRPC_ERROR_UNREF(w->error); + GRPC_ERROR_REF(error); + w->error = error; + } w->phase = CALLING_BACK_AND_FINISHED; + grpc_cq_end_op(exec_ctx, w->cq, w->tag, w->error, finished_completion, w, + &w->completion_storage); break; case CALLING_BACK_AND_FINISHED: GPR_UNREACHABLE_CODE(return ); - case CALLED_BACK: - delete = 1; break; } gpr_mu_unlock(&w->mu); - if (delete) { - delete_state_watcher(exec_ctx, w); - } - GRPC_ERROR_UNREF(error); } @@ -179,6 +183,28 @@ static void timeout_complete(grpc_exec_ctx *exec_ctx, void *pw, partly_done(exec_ctx, pw, false, GRPC_ERROR_REF(error)); } +int grpc_channel_num_external_connectivity_watchers(grpc_channel *channel) { + grpc_channel_element *client_channel_elem = + grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); + return grpc_client_channel_num_external_connectivity_watchers( + client_channel_elem); +} + +typedef struct watcher_timer_init_arg { + state_watcher *w; + gpr_timespec deadline; +} watcher_timer_init_arg; + +static void watcher_timer_init(grpc_exec_ctx *exec_ctx, void *arg, + grpc_error *error_ignored) { + watcher_timer_init_arg *wa = (watcher_timer_init_arg *)arg; + + grpc_timer_init(exec_ctx, &wa->w->alarm, + gpr_convert_clock_type(wa->deadline, GPR_CLOCK_MONOTONIC), + &wa->w->on_timeout, gpr_now(GPR_CLOCK_MONOTONIC)); + gpr_free(wa); +} + void grpc_channel_watch_connectivity_state( grpc_channel *channel, grpc_connectivity_state last_observed_state, gpr_timespec deadline, grpc_completion_queue *cq, void *tag) { @@ -208,16 +234,19 @@ void grpc_channel_watch_connectivity_state( w->cq = cq; w->tag = tag; w->channel = channel; + w->error = NULL; - grpc_timer_init(&exec_ctx, &w->alarm, - gpr_convert_clock_type(deadline, GPR_CLOCK_MONOTONIC), - &w->on_timeout, gpr_now(GPR_CLOCK_MONOTONIC)); + watcher_timer_init_arg *wa = gpr_malloc(sizeof(watcher_timer_init_arg)); + wa->w = w; + wa->deadline = deadline; + grpc_closure_init(&w->watcher_timer_init, watcher_timer_init, wa, + grpc_schedule_on_exec_ctx); if (client_channel_elem->filter == &grpc_client_channel_filter) { GRPC_CHANNEL_INTERNAL_REF(channel, "watch_channel_connectivity"); - grpc_client_channel_watch_connectivity_state(&exec_ctx, client_channel_elem, - grpc_cq_pollset(cq), &w->state, - &w->on_complete); + grpc_client_channel_watch_connectivity_state( + &exec_ctx, client_channel_elem, grpc_cq_pollset(cq), &w->state, + &w->on_complete, &w->watcher_timer_init); } else { abort(); } diff --git a/src/core/ext/filters/client_channel/client_channel.c b/src/core/ext/filters/client_channel/client_channel.c index f2f27b9175a..8cebbe9eca6 100644 --- a/src/core/ext/filters/client_channel/client_channel.c +++ b/src/core/ext/filters/client_channel/client_channel.c @@ -167,6 +167,8 @@ static void *method_parameters_create_from_json(const grpc_json *json) { return value; } +struct external_connectivity_watcher; + /************************************************************************* * CHANNEL-WIDE FUNCTIONS */ @@ -204,6 +206,11 @@ typedef struct client_channel_channel_data { /** interested parties (owned) */ grpc_pollset_set *interested_parties; + /* external_connectivity_watcher_list head is guarded by its own mutex, since + * counts need to be grabbed immediately without polling on a cq */ + gpr_mu external_connectivity_watcher_list_mu; + struct external_connectivity_watcher *external_connectivity_watcher_list_head; + /* the following properties are guarded by a mutex since API's require them to be instantaneously available */ gpr_mu info_mu; @@ -661,6 +668,12 @@ static grpc_error *cc_init_channel_elem(grpc_exec_ctx *exec_ctx, // Initialize data members. chand->combiner = grpc_combiner_create(NULL); gpr_mu_init(&chand->info_mu); + gpr_mu_init(&chand->external_connectivity_watcher_list_mu); + + gpr_mu_lock(&chand->external_connectivity_watcher_list_mu); + chand->external_connectivity_watcher_list_head = NULL; + gpr_mu_unlock(&chand->external_connectivity_watcher_list_mu); + chand->owning_stack = args->channel_stack; grpc_closure_init(&chand->on_resolver_result_changed, on_resolver_result_changed_locked, chand, @@ -749,6 +762,7 @@ static void cc_destroy_channel_elem(grpc_exec_ctx *exec_ctx, grpc_pollset_set_destroy(exec_ctx, chand->interested_parties); GRPC_COMBINER_UNREF(exec_ctx, chand->combiner, "client_channel"); gpr_mu_destroy(&chand->info_mu); + gpr_mu_destroy(&chand->external_connectivity_watcher_list_mu); } /************************************************************************* @@ -1431,14 +1445,79 @@ grpc_connectivity_state grpc_client_channel_check_connectivity_state( return out; } -typedef struct { +typedef struct external_connectivity_watcher { channel_data *chand; grpc_pollset *pollset; grpc_closure *on_complete; + grpc_closure *watcher_timer_init; grpc_connectivity_state *state; grpc_closure my_closure; + struct external_connectivity_watcher *next; } external_connectivity_watcher; +static external_connectivity_watcher *lookup_external_connectivity_watcher( + channel_data *chand, grpc_closure *on_complete) { + gpr_mu_lock(&chand->external_connectivity_watcher_list_mu); + external_connectivity_watcher *w = + chand->external_connectivity_watcher_list_head; + while (w != NULL && w->on_complete != on_complete) { + w = w->next; + } + gpr_mu_unlock(&chand->external_connectivity_watcher_list_mu); + return w; +} + +static void external_connectivity_watcher_list_append( + channel_data *chand, external_connectivity_watcher *w) { + GPR_ASSERT(!lookup_external_connectivity_watcher(chand, w->on_complete)); + + gpr_mu_lock(&w->chand->external_connectivity_watcher_list_mu); + GPR_ASSERT(!w->next); + w->next = chand->external_connectivity_watcher_list_head; + chand->external_connectivity_watcher_list_head = w; + gpr_mu_unlock(&w->chand->external_connectivity_watcher_list_mu); +} + +static void external_connectivity_watcher_list_remove( + channel_data *chand, external_connectivity_watcher *too_remove) { + GPR_ASSERT( + lookup_external_connectivity_watcher(chand, too_remove->on_complete)); + gpr_mu_lock(&chand->external_connectivity_watcher_list_mu); + if (too_remove == chand->external_connectivity_watcher_list_head) { + chand->external_connectivity_watcher_list_head = too_remove->next; + gpr_mu_unlock(&chand->external_connectivity_watcher_list_mu); + return; + } + external_connectivity_watcher *w = + chand->external_connectivity_watcher_list_head; + while (w != NULL) { + if (w->next == too_remove) { + w->next = w->next->next; + gpr_mu_unlock(&chand->external_connectivity_watcher_list_mu); + return; + } + w = w->next; + } + GPR_UNREACHABLE_CODE(return ); +} + +int grpc_client_channel_num_external_connectivity_watchers( + grpc_channel_element *elem) { + channel_data *chand = elem->channel_data; + int count = 0; + + gpr_mu_lock(&chand->external_connectivity_watcher_list_mu); + external_connectivity_watcher *w = + chand->external_connectivity_watcher_list_head; + while (w != NULL) { + count++; + w = w->next; + } + gpr_mu_unlock(&chand->external_connectivity_watcher_list_mu); + + return count; +} + static void on_external_watch_complete(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error) { external_connectivity_watcher *w = arg; @@ -1447,6 +1526,7 @@ static void on_external_watch_complete(grpc_exec_ctx *exec_ctx, void *arg, w->pollset); GRPC_CHANNEL_STACK_UNREF(exec_ctx, w->chand->owning_stack, "external_connectivity_watcher"); + external_connectivity_watcher_list_remove(w->chand, w); gpr_free(w); grpc_closure_run(exec_ctx, follow_up, GRPC_ERROR_REF(error)); } @@ -1454,21 +1534,42 @@ static void on_external_watch_complete(grpc_exec_ctx *exec_ctx, void *arg, static void watch_connectivity_state_locked(grpc_exec_ctx *exec_ctx, void *arg, grpc_error *error_ignored) { external_connectivity_watcher *w = arg; - grpc_closure_init(&w->my_closure, on_external_watch_complete, w, - grpc_schedule_on_exec_ctx); - grpc_connectivity_state_notify_on_state_change( - exec_ctx, &w->chand->state_tracker, w->state, &w->my_closure); + external_connectivity_watcher *found = NULL; + if (w->state != NULL) { + external_connectivity_watcher_list_append(w->chand, w); + grpc_closure_run(exec_ctx, w->watcher_timer_init, GRPC_ERROR_NONE); + grpc_closure_init(&w->my_closure, on_external_watch_complete, w, + grpc_schedule_on_exec_ctx); + grpc_connectivity_state_notify_on_state_change( + exec_ctx, &w->chand->state_tracker, w->state, &w->my_closure); + } else { + GPR_ASSERT(w->watcher_timer_init == NULL); + found = lookup_external_connectivity_watcher(w->chand, w->on_complete); + if (found) { + GPR_ASSERT(found->on_complete == w->on_complete); + grpc_connectivity_state_notify_on_state_change( + exec_ctx, &found->chand->state_tracker, NULL, &found->my_closure); + } + grpc_pollset_set_del_pollset(exec_ctx, w->chand->interested_parties, + w->pollset); + GRPC_CHANNEL_STACK_UNREF(exec_ctx, w->chand->owning_stack, + "external_connectivity_watcher"); + gpr_free(w); + } } void grpc_client_channel_watch_connectivity_state( grpc_exec_ctx *exec_ctx, grpc_channel_element *elem, grpc_pollset *pollset, - grpc_connectivity_state *state, grpc_closure *closure) { + grpc_connectivity_state *state, grpc_closure *closure, + grpc_closure *watcher_timer_init) { channel_data *chand = elem->channel_data; - external_connectivity_watcher *w = gpr_malloc(sizeof(*w)); + external_connectivity_watcher *w = gpr_zalloc(sizeof(*w)); w->chand = chand; w->pollset = pollset; w->on_complete = closure; w->state = state; + w->watcher_timer_init = watcher_timer_init; + grpc_pollset_set_add_pollset(exec_ctx, chand->interested_parties, pollset); GRPC_CHANNEL_STACK_REF(w->chand->owning_stack, "external_connectivity_watcher"); diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h index 8d2490ea55d..356a7ab0c15 100644 --- a/src/core/ext/filters/client_channel/client_channel.h +++ b/src/core/ext/filters/client_channel/client_channel.h @@ -53,9 +53,13 @@ extern const grpc_channel_filter grpc_client_channel_filter; grpc_connectivity_state grpc_client_channel_check_connectivity_state( grpc_exec_ctx *exec_ctx, grpc_channel_element *elem, int try_to_connect); +int grpc_client_channel_num_external_connectivity_watchers( + grpc_channel_element *elem); + void grpc_client_channel_watch_connectivity_state( grpc_exec_ctx *exec_ctx, grpc_channel_element *elem, grpc_pollset *pollset, - grpc_connectivity_state *state, grpc_closure *on_complete); + grpc_connectivity_state *state, grpc_closure *on_complete, + grpc_closure *watcher_timer_init); /* Debug helper: pull the subchannel call from a call stack element */ grpc_subchannel_call *grpc_client_channel_get_subchannel_call( diff --git a/src/csharp/Grpc.Auth/Grpc.Auth.csproj b/src/csharp/Grpc.Auth/Grpc.Auth.csproj index 6ac25aa1f02..188ddb95b99 100755 --- a/src/csharp/Grpc.Auth/Grpc.Auth.csproj +++ b/src/csharp/Grpc.Auth/Grpc.Auth.csproj @@ -16,6 +16,8 @@ https://github.com/grpc/grpc https://github.com/grpc/grpc/blob/master/LICENSE 1.6.0 + true + true @@ -23,7 +25,9 @@ - + + None + diff --git a/src/csharp/Grpc.Core.Testing/Grpc.Core.Testing.csproj b/src/csharp/Grpc.Core.Testing/Grpc.Core.Testing.csproj index f4dd5105fc7..45ec8743222 100755 --- a/src/csharp/Grpc.Core.Testing/Grpc.Core.Testing.csproj +++ b/src/csharp/Grpc.Core.Testing/Grpc.Core.Testing.csproj @@ -1,4 +1,4 @@ - + @@ -16,6 +16,8 @@ https://github.com/grpc/grpc https://github.com/grpc/grpc/blob/master/LICENSE 1.6.0 + true + true @@ -23,7 +25,9 @@ - + + None + diff --git a/src/csharp/Grpc.Core/Channel.cs b/src/csharp/Grpc.Core/Channel.cs index 4f29c35b321..51ae11fbdec 100644 --- a/src/csharp/Grpc.Core/Channel.cs +++ b/src/csharp/Grpc.Core/Channel.cs @@ -59,6 +59,8 @@ namespace Grpc.Core readonly ChannelSafeHandle handle; readonly Dictionary options; + readonly Task connectivityWatcherTask; + bool shutdownRequested; /// @@ -99,6 +101,9 @@ namespace Grpc.Core this.handle = ChannelSafeHandle.CreateInsecure(target, nativeChannelArgs); } } + // TODO(jtattermusch): Workaround for https://github.com/GoogleCloudPlatform/google-cloud-dotnet/issues/822. + // Remove once retries are supported in C core + this.connectivityWatcherTask = RunConnectivityWatcherAsync(); GrpcEnvironment.RegisterChannel(this); } @@ -244,7 +249,7 @@ namespace Grpc.Core handle.Dispose(); - await GrpcEnvironment.ReleaseAsync().ConfigureAwait(false); + await Task.WhenAll(GrpcEnvironment.ReleaseAsync(), connectivityWatcherTask).ConfigureAwait(false); } internal ChannelSafeHandle Handle @@ -299,6 +304,40 @@ namespace Grpc.Core } } + /// + /// Constantly Watches channel connectivity status to work around https://github.com/GoogleCloudPlatform/google-cloud-dotnet/issues/822 + /// + private async Task RunConnectivityWatcherAsync() + { + try + { + var lastState = State; + while (lastState != ChannelState.Shutdown) + { + lock (myLock) + { + if (shutdownRequested) + { + break; + } + } + + try + { + await WaitForStateChangedAsync(lastState, DateTime.UtcNow.AddSeconds(1)).ConfigureAwait(false); + } + catch (TaskCanceledException) + { + // ignore timeout + } + lastState = State; + } + } + catch (ObjectDisposedException) { + // during shutdown, channel is going to be disposed. + } + } + private static void EnsureUserAgentChannelOption(Dictionary options) { var key = ChannelOptions.PrimaryUserAgentString; diff --git a/src/csharp/Grpc.Core/Grpc.Core.csproj b/src/csharp/Grpc.Core/Grpc.Core.csproj index c0865001a80..ae0d8b2c8d8 100755 --- a/src/csharp/Grpc.Core/Grpc.Core.csproj +++ b/src/csharp/Grpc.Core/Grpc.Core.csproj @@ -1,4 +1,4 @@ - + @@ -15,6 +15,8 @@ https://github.com/grpc/grpc https://github.com/grpc/grpc/blob/master/LICENSE 1.6.0 + true + true diff --git a/src/csharp/Grpc.HealthCheck/Grpc.HealthCheck.csproj b/src/csharp/Grpc.HealthCheck/Grpc.HealthCheck.csproj index eac6e1fc95f..c3791a4e6b2 100755 --- a/src/csharp/Grpc.HealthCheck/Grpc.HealthCheck.csproj +++ b/src/csharp/Grpc.HealthCheck/Grpc.HealthCheck.csproj @@ -15,6 +15,8 @@ https://github.com/grpc/grpc https://github.com/grpc/grpc/blob/master/LICENSE 1.6.0 + true + true @@ -22,7 +24,9 @@ - + + None + diff --git a/src/csharp/Grpc.Reflection/Grpc.Reflection.csproj b/src/csharp/Grpc.Reflection/Grpc.Reflection.csproj index 70bfcc89c5c..3a075552483 100755 --- a/src/csharp/Grpc.Reflection/Grpc.Reflection.csproj +++ b/src/csharp/Grpc.Reflection/Grpc.Reflection.csproj @@ -15,6 +15,8 @@ https://github.com/grpc/grpc https://github.com/grpc/grpc/blob/master/LICENSE 1.6.0 + true + true @@ -22,7 +24,9 @@ - + + None + diff --git a/src/csharp/build_packages_dotnetcli.bat b/src/csharp/build_packages_dotnetcli.bat index d823942be52..aa8a8d3b17b 100755 --- a/src/csharp/build_packages_dotnetcli.bat +++ b/src/csharp/build_packages_dotnetcli.bat @@ -51,11 +51,11 @@ powershell -Command "cp -r ..\..\platform=*\artifacts\protoc_* protoc_plugins" @rem To be able to build, we also need to put grpc_csharp_ext to its normal location xcopy /Y /I nativelibs\csharp_ext_windows_x64\grpc_csharp_ext.dll ..\..\cmake\build\x64\Release\ -%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Core --output ..\..\..\artifacts || goto :error -%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Core.Testing --output ..\..\..\artifacts || goto :error -%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Auth --output ..\..\..\artifacts || goto :error -%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.HealthCheck --output ..\..\..\artifacts || goto :error -%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Reflection --output ..\..\..\artifacts || goto :error +%DOTNET% pack --configuration Release Grpc.Core --output ..\..\..\artifacts || goto :error +%DOTNET% pack --configuration Release Grpc.Core.Testing --output ..\..\..\artifacts || goto :error +%DOTNET% pack --configuration Release Grpc.Auth --output ..\..\..\artifacts || goto :error +%DOTNET% pack --configuration Release Grpc.HealthCheck --output ..\..\..\artifacts || goto :error +%DOTNET% pack --configuration Release Grpc.Reflection --output ..\..\..\artifacts || goto :error %NUGET% pack Grpc.nuspec -Version %VERSION% -OutputDirectory ..\..\artifacts || goto :error %NUGET% pack Grpc.Tools.nuspec -Version %VERSION% -OutputDirectory ..\..\artifacts diff --git a/src/csharp/build_packages_dotnetcli.sh b/src/csharp/build_packages_dotnetcli.sh index f79c97fbbcd..d33923845c1 100755 --- a/src/csharp/build_packages_dotnetcli.sh +++ b/src/csharp/build_packages_dotnetcli.sh @@ -48,11 +48,11 @@ dotnet restore Grpc.sln mkdir -p ../../libs/opt cp nativelibs/csharp_ext_linux_x64/libgrpc_csharp_ext.so ../../libs/opt -dotnet pack --configuration Release --include-symbols --include-source Grpc.Core --output ../../../artifacts -dotnet pack --configuration Release --include-symbols --include-source Grpc.Core.Testing --output ../../../artifacts -dotnet pack --configuration Release --include-symbols --include-source Grpc.Auth --output ../../../artifacts -dotnet pack --configuration Release --include-symbols --include-source Grpc.HealthCheck --output ../../../artifacts -dotnet pack --configuration Release --include-symbols --include-source Grpc.Reflection --output ../../../artifacts +dotnet pack --configuration Release Grpc.Core --output ../../../artifacts +dotnet pack --configuration Release Grpc.Core.Testing --output ../../../artifacts +dotnet pack --configuration Release Grpc.Auth --output ../../../artifacts +dotnet pack --configuration Release Grpc.HealthCheck --output ../../../artifacts +dotnet pack --configuration Release Grpc.Reflection --output ../../../artifacts nuget pack Grpc.nuspec -Version "1.4.0-dev" -OutputDirectory ../../artifacts nuget pack Grpc.Tools.nuspec -Version "1.4.0-dev" -OutputDirectory ../../artifacts diff --git a/src/node/ext/call.cc b/src/node/ext/call.cc index 49179ab3596..9453000ad3f 100644 --- a/src/node/ext/call.cc +++ b/src/node/ext/call.cc @@ -508,9 +508,14 @@ void Call::DestroyCall() { } Call::Call(grpc_call *call) - : wrapped_call(call), pending_batches(0), has_final_op_completed(false) {} + : wrapped_call(call), pending_batches(0), has_final_op_completed(false) { + peer = grpc_call_get_peer(call); +} -Call::~Call() { DestroyCall(); } +Call::~Call() { + DestroyCall(); + gpr_free(peer); +} void Call::Init(Local exports) { HandleScope scope; @@ -662,6 +667,16 @@ NAN_METHOD(Call::StartBatch) { } Local callback_func = info[1].As(); Call *call = ObjectWrap::Unwrap(info.This()); + if (call->wrapped_call == NULL) { + /* This implies that the call has completed and has been destroyed. To + * emulate + * previous behavior, we should call the callback immediately with an error, + * as though the batch had failed in core */ + Local argv[] = { + Nan::Error("The async function failed because the call has completed")}; + Nan::Call(callback_func, Nan::New(), 1, argv); + return; + } Local obj = Nan::To(info[0]).ToLocalChecked(); Local keys = Nan::GetOwnPropertyNames(obj).ToLocalChecked(); size_t nops = keys->Length(); @@ -727,6 +742,11 @@ NAN_METHOD(Call::Cancel) { return Nan::ThrowTypeError("cancel can only be called on Call objects"); } Call *call = ObjectWrap::Unwrap(info.This()); + if (call->wrapped_call == NULL) { + /* Cancel is supposed to be idempotent. If the call has already finished, + * cancel should just complete silently */ + return; + } grpc_call_error error = grpc_call_cancel(call->wrapped_call, NULL); if (error != GRPC_CALL_OK) { return Nan::ThrowError(nanErrorWithCode("cancel failed", error)); @@ -747,6 +767,11 @@ NAN_METHOD(Call::CancelWithStatus) { "cancelWithStatus's second argument must be a string"); } Call *call = ObjectWrap::Unwrap(info.This()); + if (call->wrapped_call == NULL) { + /* Cancel is supposed to be idempotent. If the call has already finished, + * cancel should just complete silently */ + return; + } grpc_status_code code = static_cast(Nan::To(info[0]).FromJust()); if (code == GRPC_STATUS_OK) { @@ -763,9 +788,7 @@ NAN_METHOD(Call::GetPeer) { return Nan::ThrowTypeError("getPeer can only be called on Call objects"); } Call *call = ObjectWrap::Unwrap(info.This()); - char *peer = grpc_call_get_peer(call->wrapped_call); - Local peer_value = Nan::New(peer).ToLocalChecked(); - gpr_free(peer); + Local peer_value = Nan::New(call->peer).ToLocalChecked(); info.GetReturnValue().Set(peer_value); } @@ -780,6 +803,10 @@ NAN_METHOD(Call::SetCredentials) { "setCredentials' first argument must be a CallCredentials"); } Call *call = ObjectWrap::Unwrap(info.This()); + if (call->wrapped_call == NULL) { + return Nan::ThrowError( + "Cannot set credentials on a call that has already started"); + } CallCredentials *creds_object = ObjectWrap::Unwrap( Nan::To(info[0]).ToLocalChecked()); grpc_call_credentials *creds = creds_object->GetWrappedCredentials(); diff --git a/src/node/ext/call.h b/src/node/ext/call.h index 0bd24f56a9a..8f751279e43 100644 --- a/src/node/ext/call.h +++ b/src/node/ext/call.h @@ -96,6 +96,7 @@ class Call : public Nan::ObjectWrap { call, this is GRPC_OP_RECV_STATUS_ON_CLIENT and for a server call, this is GRPC_OP_SEND_STATUS_FROM_SERVER */ bool has_final_op_completed; + char *peer; }; class Op { diff --git a/src/node/src/client.js b/src/node/src/client.js index cf4c104144c..f59ac5c94c5 100644 --- a/src/node/src/client.js +++ b/src/node/src/client.js @@ -147,6 +147,12 @@ function _write(chunk, encoding, callback) { /* jshint validthis: true */ var batch = {}; var message; + var self = this; + if (this.writeFailed) { + /* Once a write fails, just call the callback immediately to let the caller + flush any pending writes. */ + setImmediate(callback); + } try { message = this.serialize(chunk); } catch (e) { @@ -167,8 +173,10 @@ function _write(chunk, encoding, callback) { batch[grpc.opType.SEND_MESSAGE] = message; this.call.startBatch(batch, function(err, event) { if (err) { - // Something has gone wrong. Stop writing by failing to call callback - return; + /* Assume that the call is complete and that writing failed because a + status was received. In that case, set a flag to discard all future + writes */ + self.writeFailed = true; } callback(); }); diff --git a/src/node/test/common_test.js b/src/node/test/common_test.js index b7c2c6a8d66..db80207e223 100644 --- a/src/node/test/common_test.js +++ b/src/node/test/common_test.js @@ -100,7 +100,6 @@ describe('Proto message long int serialize and deserialize', function() { var longNumDeserialize = deserializeCls(messages_proto.LongValues, num_options); var serialized = longSerialize({int_64: pos_value}); - console.log(longDeserialize(serialized)); assert.strictEqual(typeof longDeserialize(serialized).int_64, 'string'); /* With the longsAsStrings option disabled, long values are represented as * objects with 3 keys: low, high, and unsigned */ diff --git a/src/node/test/surface_test.js b/src/node/test/surface_test.js index 6f1c269267c..f8eaf62aaff 100644 --- a/src/node/test/surface_test.js +++ b/src/node/test/surface_test.js @@ -1110,6 +1110,18 @@ describe('Other conditions', function() { done(); }); }); + it('after the call has fully completed', function(done) { + var peer; + var call = client.unary({error: false}, function(err, data) { + assert.ifError(err); + setImmediate(function() { + assert.strictEqual(peer, call.getPeer()); + done(); + }); + }); + peer = call.getPeer(); + assert.strictEqual(typeof peer, 'string'); + }); }); }); describe('Call propagation', function() { @@ -1352,4 +1364,17 @@ describe('Cancelling surface client', function() { }); call.cancel(); }); + it('Should be idempotent', function(done) { + var call = client.div({'divisor': 0, 'dividend': 0}, function(err, resp) { + assert.strictEqual(err.code, grpc.status.CANCELLED); + // Call asynchronously to try cancelling after call is fully completed + setImmediate(function() { + assert.doesNotThrow(function() { + call.cancel(); + }); + done(); + }); + }); + call.cancel(); + }); }); diff --git a/src/python/grpcio/grpc/_channel.py b/src/python/grpcio/grpc/_channel.py index 4316449ac65..012ed8ec812 100644 --- a/src/python/grpcio/grpc/_channel.py +++ b/src/python/grpcio/grpc/_channel.py @@ -786,7 +786,7 @@ def _channel_managed_call_management(state): class _ChannelConnectivityState(object): def __init__(self, channel): - self.lock = threading.Lock() + self.lock = threading.RLock() self.channel = channel self.polling = False self.connectivity = None @@ -926,6 +926,11 @@ class Channel(grpc.Channel): self._call_state = _ChannelCallState(self._channel) self._connectivity_state = _ChannelConnectivityState(self._channel) + # TODO(https://github.com/grpc/grpc/issues/9884) + # Temporary work around UNAVAILABLE issues + # Remove this once c-core has retry support + _subscribe(self._connectivity_state, lambda *args: None, None) + def subscribe(self, callback, try_to_connect=None): _subscribe(self._connectivity_state, callback, try_to_connect) diff --git a/src/python/grpcio_tests/tests/tests.json b/src/python/grpcio_tests/tests/tests.json index 9f7587faa66..126e8ac60dd 100644 --- a/src/python/grpcio_tests/tests/tests.json +++ b/src/python/grpcio_tests/tests/tests.json @@ -32,6 +32,7 @@ "unit._invocation_defects_test.InvocationDefectsTest", "unit._metadata_code_details_test.MetadataCodeDetailsTest", "unit._metadata_test.MetadataTest", + "unit._reconnect_test.ReconnectTest", "unit._resource_exhausted_test.ResourceExhaustedTest", "unit._rpc_test.RPCTest", "unit._sanity._sanity_test.Sanity", diff --git a/src/python/grpcio_tests/tests/unit/_reconnect_test.py b/src/python/grpcio_tests/tests/unit/_reconnect_test.py new file mode 100644 index 00000000000..6c316476b3e --- /dev/null +++ b/src/python/grpcio_tests/tests/unit/_reconnect_test.py @@ -0,0 +1,70 @@ +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +"""Tests that a channel will reconnect if a connection is dropped""" + +import unittest + +import grpc +from grpc.framework.foundation import logging_pool + +from tests.unit.framework.common import test_constants + +_REQUEST = b'\x00\x00\x00' +_RESPONSE = b'\x00\x00\x01' + +_UNARY_UNARY = '/test/UnaryUnary' + + +def _handle_unary_unary(unused_request, unused_servicer_context): + return _RESPONSE + + +class ReconnectTest(unittest.TestCase): + + def test_reconnect(self): + server_pool = logging_pool.pool(test_constants.THREAD_CONCURRENCY) + handler = grpc.method_handlers_generic_handler('test', { + 'UnaryUnary': + grpc.unary_unary_rpc_method_handler(_handle_unary_unary) + }) + server = grpc.server(server_pool, (handler,)) + port = server.add_insecure_port('[::]:0') + server.start() + channel = grpc.insecure_channel('localhost:%d' % port) + multi_callable = channel.unary_unary(_UNARY_UNARY) + self.assertEqual(_RESPONSE, multi_callable(_REQUEST)) + server.stop(None) + server = grpc.server(server_pool, (handler,)) + server.add_insecure_port('[::]:{}'.format(port)) + server.start() + self.assertEqual(_RESPONSE, multi_callable(_REQUEST)) + + +if __name__ == '__main__': + unittest.main(verbosity=2) diff --git a/src/ruby/end2end/channel_closing_driver.rb b/src/ruby/end2end/channel_closing_driver.rb index d3e5373b0bb..bed8c434058 100755 --- a/src/ruby/end2end/channel_closing_driver.rb +++ b/src/ruby/end2end/channel_closing_driver.rb @@ -61,6 +61,11 @@ def main 'channel is closed while connectivity is watched' end + client_exit_code = $CHILD_STATUS + if client_exit_code != 0 + fail "channel closing client failed, exit code #{client_exit_code}" + end + server_runner.stop end diff --git a/src/ruby/end2end/channel_state_driver.rb b/src/ruby/end2end/channel_state_driver.rb index 80fb62899e5..9910076dba1 100755 --- a/src/ruby/end2end/channel_state_driver.rb +++ b/src/ruby/end2end/channel_state_driver.rb @@ -58,6 +58,9 @@ def main 'It likely hangs when ended abruptly' end + # The interrupt in the child process should cause it to + # exit a non-zero status, so don't check it here. + # This test mainly tries to catch deadlock. server_runner.stop end diff --git a/src/ruby/end2end/grpc_class_init_client.rb b/src/ruby/end2end/grpc_class_init_client.rb index ee79292119a..e73ca76850f 100755 --- a/src/ruby/end2end/grpc_class_init_client.rb +++ b/src/ruby/end2end/grpc_class_init_client.rb @@ -34,44 +34,110 @@ require_relative './end2end_common' -def main - grpc_class = '' - OptionParser.new do |opts| - opts.on('--grpc_class=P', String) do |p| - grpc_class = p +def construct_many(test_proc) + thds = [] + 4.times do + thds << Thread.new do + 20.times do + test_proc.call + end end - end.parse! + end + 20.times do + test_proc.call + end + thds.each(&:join) +end + +def run_gc_stress_test(test_proc) + GC.disable + construct_many(test_proc) - test_proc = nil + GC.enable + construct_many(test_proc) + + GC.start(full_mark: true, immediate_sweep: true) + construct_many(test_proc) +end + +def run_concurrency_stress_test(test_proc) + 100.times do + Thread.new do + test_proc.call + end + end + + test_proc.call + + fail 'exception thrown while child thread initing class' +end +# default (no gc_stress and no concurrency_stress) +def run_default_test(test_proc) + thd = Thread.new do + test_proc.call + end + test_proc.call + thd.join +end + +def get_test_proc(grpc_class) case grpc_class when 'channel' - test_proc = proc do + return proc do GRPC::Core::Channel.new('dummy_host', nil, :this_channel_is_insecure) end when 'server' - test_proc = proc do + return proc do GRPC::Core::Server.new({}) end when 'channel_credentials' - test_proc = proc do + return proc do GRPC::Core::ChannelCredentials.new end when 'call_credentials' - test_proc = proc do + return proc do GRPC::Core::CallCredentials.new(proc { |noop| noop }) end when 'compression_options' - test_proc = proc do + return proc do GRPC::Core::CompressionOptions.new end else fail "bad --grpc_class=#{grpc_class} param" end +end - th = Thread.new { test_proc.call } - test_proc.call - th.join +def main + grpc_class = '' + stress_test = '' + OptionParser.new do |opts| + opts.on('--grpc_class=P', String) do |p| + grpc_class = p + end + opts.on('--stress_test=P') do |p| + stress_test = p + end + end.parse! + + test_proc = get_test_proc(grpc_class) + + # the different test configs need to be ran + # in separate processes, since each one tests + # clean shutdown in a different way + case stress_test + when 'gc' + p 'run gc stress' + run_gc_stress_test(test_proc) + when 'concurrency' + p 'run concurrency stress' + run_concurrency_stress_test(test_proc) + when '' + p 'run default' + run_default_test(test_proc) + else + fail "bad --stress_test=#{stress_test} param" + end end main diff --git a/src/ruby/end2end/grpc_class_init_driver.rb b/src/ruby/end2end/grpc_class_init_driver.rb index 764d029f149..c65ed547c54 100755 --- a/src/ruby/end2end/grpc_class_init_driver.rb +++ b/src/ruby/end2end/grpc_class_init_driver.rb @@ -38,29 +38,40 @@ def main call_credentials compression_options ) - native_grpc_classes.each do |grpc_class| - STDERR.puts 'start client' - this_dir = File.expand_path(File.dirname(__FILE__)) - client_path = File.join(this_dir, 'grpc_class_init_client.rb') - client_pid = Process.spawn(RbConfig.ruby, - client_path, - "--grpc_class=#{grpc_class}") - begin - Timeout.timeout(10) do - Process.wait(client_pid) + # there is room for false positives in this test, + # do a few runs for each config + 4.times do + native_grpc_classes.each do |grpc_class| + ['', 'gc', 'concurrency'].each do |stress_test_type| + STDERR.puts 'start client' + this_dir = File.expand_path(File.dirname(__FILE__)) + client_path = File.join(this_dir, 'grpc_class_init_client.rb') + client_pid = Process.spawn(RbConfig.ruby, + client_path, + "--grpc_class=#{grpc_class}", + "--stress_test=#{stress_test_type}") + begin + Timeout.timeout(10) do + Process.wait(client_pid) + end + rescue Timeout::Error + STDERR.puts "timeout waiting for client pid #{client_pid}" + Process.kill('SIGKILL', client_pid) + Process.wait(client_pid) + STDERR.puts 'killed client child' + raise 'Timed out waiting for client process. ' \ + 'It likely hangs when the first constructed gRPC object has ' \ + "type: #{grpc_class}" + end + + client_exit_code = $CHILD_STATUS + # concurrency stress test type is expected to exit with a + # non-zero status due to an exception being raised + if client_exit_code != 0 && stress_test_type != 'concurrency' + fail "client failed, exit code #{client_exit_code}" + end end - rescue Timeout::Error - STDERR.puts "timeout waiting for client pid #{client_pid}" - Process.kill('SIGKILL', client_pid) - Process.wait(client_pid) - STDERR.puts 'killed client child' - raise 'Timed out waiting for client process. ' \ - 'It likely hangs when the first constructed gRPC object has ' \ - "type: #{grpc_class}" end - - client_exit_code = $CHILD_STATUS - fail "client failed, exit code #{client_exit_code}" if client_exit_code != 0 end end diff --git a/src/ruby/end2end/multiple_killed_watching_threads_driver.rb b/src/ruby/end2end/multiple_killed_watching_threads_driver.rb new file mode 100755 index 00000000000..206ec8e801f --- /dev/null +++ b/src/ruby/end2end/multiple_killed_watching_threads_driver.rb @@ -0,0 +1,63 @@ +#!/usr/bin/env ruby + +# Copyright 2016, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +require_relative './end2end_common' + +Thread.abort_on_exception = true + +include GRPC::Core::ConnectivityStates + +def watch_state(ch) + thd = Thread.new do + state = ch.connectivity_state(false) + fail "non-idle state: #{state}" unless state == IDLE + ch.watch_connectivity_state(IDLE, Time.now + 360) + end + sleep 0.1 + thd.kill +end + +def main + channels = [] + 10.times do + ch = GRPC::Core::Channel.new('dummy_host', + nil, :this_channel_is_insecure) + watch_state(ch) + channels << ch + end + + # checking state should still be safe to call + channels.each do |c| + fail unless c.connectivity_state(false) == FATAL_FAILURE + end +end + +main diff --git a/src/ruby/end2end/sig_int_during_channel_watch_client.rb b/src/ruby/end2end/sig_int_during_channel_watch_client.rb index 389fc5ba332..0c6a3749254 100755 --- a/src/ruby/end2end/sig_int_during_channel_watch_client.rb +++ b/src/ruby/end2end/sig_int_during_channel_watch_client.rb @@ -46,6 +46,8 @@ def main end end.parse! + trap('SIGINT') { exit 0 } + thd = Thread.new do child_thread_channel = GRPC::Core::Channel.new("localhost:#{server_port}", {}, diff --git a/src/ruby/end2end/sig_int_during_channel_watch_driver.rb b/src/ruby/end2end/sig_int_during_channel_watch_driver.rb index 670cda0919f..79a8c133fa8 100755 --- a/src/ruby/end2end/sig_int_during_channel_watch_driver.rb +++ b/src/ruby/end2end/sig_int_during_channel_watch_driver.rb @@ -63,6 +63,11 @@ def main 'SIGINT is sent while there is an active connectivity_state call' end + client_exit_code = $CHILD_STATUS + if client_exit_code != 0 + fail "sig_int_during_channel_watch_client failed: #{client_exit_code}" + end + server_runner.stop end diff --git a/src/ruby/ext/grpc/rb_channel.c b/src/ruby/ext/grpc/rb_channel.c index a8021837264..f65388448a3 100644 --- a/src/ruby/ext/grpc/rb_channel.c +++ b/src/ruby/ext/grpc/rb_channel.c @@ -68,29 +68,53 @@ static VALUE grpc_rb_cChannel = Qnil; /* Used during the conversion of a hash to channel args during channel setup */ static VALUE grpc_rb_cChannelArgs; +typedef struct bg_watched_channel { + grpc_channel *channel; + // these fields must only be accessed under global_connection_polling_mu + struct bg_watched_channel *next; + int channel_destroyed; + int refcount; +} bg_watched_channel; + /* grpc_rb_channel wraps a grpc_channel. */ typedef struct grpc_rb_channel { VALUE credentials; - /* The actual channel */ - grpc_channel *wrapped; - int request_safe_destroy; - int safe_to_destroy; - grpc_connectivity_state current_connectivity_state; - - int mu_init_done; - int abort_watch_connectivity_state; - gpr_mu channel_mu; - gpr_cv channel_cv; + /* The actual channel (protected in a wrapper to tell when it's safe to + * destroy) */ + bg_watched_channel *bg_wrapped; } grpc_rb_channel; -/* Forward declarations of functions involved in temporary fix to - * https://github.com/grpc/grpc/issues/9941 */ +typedef enum { CONTINUOUS_WATCH, WATCH_STATE_API } watch_state_op_type; + +typedef struct watch_state_op { + watch_state_op_type op_type; + // from event.success + union { + struct { + int success; + // has been called back due to a cq next call + int called_back; + } api_callback_args; + struct { + bg_watched_channel *bg; + } continuous_watch_callback_args; + } op; +} watch_state_op; + +static bg_watched_channel *bg_watched_channel_list_head = NULL; + static void grpc_rb_channel_try_register_connection_polling( - grpc_rb_channel *wrapper); -static void grpc_rb_channel_safe_destroy(grpc_rb_channel *wrapper); + bg_watched_channel *bg); static void *wait_until_channel_polling_thread_started_no_gil(void *); static void wait_until_channel_polling_thread_started_unblocking_func(void *); +static void *channel_init_try_register_connection_polling_without_gil( + void *arg); + +typedef struct channel_init_try_register_stack { + grpc_channel *channel; + grpc_rb_channel *wrapper; +} channel_init_try_register_stack; static grpc_completion_queue *channel_polling_cq; static gpr_mu global_connection_polling_mu; @@ -98,6 +122,42 @@ static gpr_cv global_connection_polling_cv; static int abort_channel_polling = 0; static int channel_polling_thread_started = 0; +static int bg_watched_channel_list_lookup(bg_watched_channel *bg); +static bg_watched_channel *bg_watched_channel_list_create_and_add( + grpc_channel *channel); +static void bg_watched_channel_list_free_and_remove(bg_watched_channel *bg); +static void run_poll_channels_loop_unblocking_func(void *arg); + +// Needs to be called under global_connection_polling_mu +static void grpc_rb_channel_watch_connection_state_op_complete( + watch_state_op *op, int success) { + GPR_ASSERT(!op->op.api_callback_args.called_back); + op->op.api_callback_args.called_back = 1; + op->op.api_callback_args.success = success; + // wake up the watch API call thats waiting on this op + gpr_cv_broadcast(&global_connection_polling_cv); +} + +/* Avoids destroying a channel twice. */ +static void grpc_rb_channel_safe_destroy(bg_watched_channel *bg) { + gpr_mu_lock(&global_connection_polling_mu); + GPR_ASSERT(bg_watched_channel_list_lookup(bg)); + if (!bg->channel_destroyed) { + grpc_channel_destroy(bg->channel); + bg->channel_destroyed = 1; + } + bg->refcount--; + if (bg->refcount == 0) { + bg_watched_channel_list_free_and_remove(bg); + } + gpr_mu_unlock(&global_connection_polling_mu); +} + +static void *channel_safe_destroy_without_gil(void *arg) { + grpc_rb_channel_safe_destroy((bg_watched_channel *)arg); + return NULL; +} + /* Destroys Channel instances. */ static void grpc_rb_channel_free(void *p) { grpc_rb_channel *ch = NULL; @@ -106,14 +166,13 @@ static void grpc_rb_channel_free(void *p) { }; ch = (grpc_rb_channel *)p; - if (ch->wrapped != NULL) { - grpc_rb_channel_safe_destroy(ch); - ch->wrapped = NULL; - } - - if (ch->mu_init_done) { - gpr_mu_destroy(&ch->channel_mu); - gpr_cv_destroy(&ch->channel_cv); + if (ch->bg_wrapped != NULL) { + /* assumption made here: it's ok to directly gpr_mu_lock the global + * connection polling mutex becuse we're in a finalizer, + * and we can count on this thread to not be interrupted or + * yield the gil. */ + grpc_rb_channel_safe_destroy(ch->bg_wrapped); + ch->bg_wrapped = NULL; } xfree(p); @@ -146,7 +205,7 @@ static rb_data_type_t grpc_channel_data_type = {"grpc_channel", /* Allocates grpc_rb_channel instances. */ static VALUE grpc_rb_channel_alloc(VALUE cls) { grpc_rb_channel *wrapper = ALLOC(grpc_rb_channel); - wrapper->wrapped = NULL; + wrapper->bg_wrapped = NULL; wrapper->credentials = Qnil; return TypedData_Wrap_Struct(cls, &grpc_channel_data_type, wrapper); } @@ -168,18 +227,21 @@ static VALUE grpc_rb_channel_init(int argc, VALUE *argv, VALUE self) { grpc_channel_credentials *creds = NULL; char *target_chars = NULL; grpc_channel_args args; + channel_init_try_register_stack stack; + int stop_waiting_for_thread_start = 0; MEMZERO(&args, grpc_channel_args, 1); grpc_ruby_once_init(); rb_thread_call_without_gvl( - wait_until_channel_polling_thread_started_no_gil, NULL, - wait_until_channel_polling_thread_started_unblocking_func, NULL); + wait_until_channel_polling_thread_started_no_gil, + &stop_waiting_for_thread_start, + wait_until_channel_polling_thread_started_unblocking_func, + &stop_waiting_for_thread_start); /* "3" == 3 mandatory args */ rb_scan_args(argc, argv, "3", &target, &channel_args, &credentials); TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - wrapper->mu_init_done = 0; target_chars = StringValueCStr(target); grpc_rb_hash_convert_to_channel_args(channel_args, &args); if (TYPE(credentials) == T_SYMBOL) { @@ -196,24 +258,11 @@ static VALUE grpc_rb_channel_init(int argc, VALUE *argv, VALUE self) { } GPR_ASSERT(ch); - - wrapper->wrapped = ch; - - gpr_mu_init(&wrapper->channel_mu); - gpr_cv_init(&wrapper->channel_cv); - wrapper->mu_init_done = 1; - - gpr_mu_lock(&wrapper->channel_mu); - wrapper->abort_watch_connectivity_state = 0; - wrapper->current_connectivity_state = - grpc_channel_check_connectivity_state(wrapper->wrapped, 0); - wrapper->safe_to_destroy = 0; - wrapper->request_safe_destroy = 0; - - gpr_cv_broadcast(&wrapper->channel_cv); - gpr_mu_unlock(&wrapper->channel_mu); - - grpc_rb_channel_try_register_connection_polling(wrapper); + stack.channel = ch; + stack.wrapper = wrapper; + rb_thread_call_without_gvl( + channel_init_try_register_connection_polling_without_gil, &stack, NULL, + NULL); if (args.args != NULL) { xfree(args.args); /* Allocated by grpc_rb_hash_convert_to_channel_args */ @@ -224,10 +273,31 @@ static VALUE grpc_rb_channel_init(int argc, VALUE *argv, VALUE self) { return Qnil; } rb_ivar_set(self, id_target, target); - wrapper->wrapped = ch; return self; } +typedef struct get_state_stack { + bg_watched_channel *bg; + int try_to_connect; + int out; +} get_state_stack; + +static void *get_state_without_gil(void *arg) { + get_state_stack *stack = (get_state_stack *)arg; + + gpr_mu_lock(&global_connection_polling_mu); + GPR_ASSERT(abort_channel_polling || channel_polling_thread_started); + if (stack->bg->channel_destroyed) { + stack->out = GRPC_CHANNEL_SHUTDOWN; + } else { + stack->out = grpc_channel_check_connectivity_state(stack->bg->channel, + stack->try_to_connect); + } + gpr_mu_unlock(&global_connection_polling_mu); + + return NULL; +} + /* call-seq: ch.connectivity_state -> state @@ -240,59 +310,69 @@ static VALUE grpc_rb_channel_init(int argc, VALUE *argv, VALUE self) { static VALUE grpc_rb_channel_get_connectivity_state(int argc, VALUE *argv, VALUE self) { VALUE try_to_connect_param = Qfalse; - int grpc_try_to_connect = 0; grpc_rb_channel *wrapper = NULL; - grpc_channel *ch = NULL; + get_state_stack stack; /* "01" == 0 mandatory args, 1 (try_to_connect) is optional */ rb_scan_args(argc, argv, "01", &try_to_connect_param); - grpc_try_to_connect = RTEST(try_to_connect_param) ? 1 : 0; TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - ch = wrapper->wrapped; - if (ch == NULL) { + if (wrapper->bg_wrapped == NULL) { rb_raise(rb_eRuntimeError, "closed!"); return Qnil; } - return LONG2NUM(grpc_channel_check_connectivity_state(wrapper->wrapped, - grpc_try_to_connect)); + + stack.bg = wrapper->bg_wrapped; + stack.try_to_connect = RTEST(try_to_connect_param) ? 1 : 0; + rb_thread_call_without_gvl(get_state_without_gil, &stack, NULL, NULL); + + return LONG2NUM(stack.out); } typedef struct watch_state_stack { - grpc_rb_channel *wrapper; + grpc_channel *channel; gpr_timespec deadline; int last_state; } watch_state_stack; -static void *watch_channel_state_without_gvl(void *arg) { +static void *wait_for_watch_state_op_complete_without_gvl(void *arg) { watch_state_stack *stack = (watch_state_stack *)arg; - gpr_timespec deadline = stack->deadline; - grpc_rb_channel *wrapper = stack->wrapper; - int last_state = stack->last_state; - void *return_value = (void *)0; + watch_state_op *op = NULL; + void *success = (void *)0; - gpr_mu_lock(&wrapper->channel_mu); - while (wrapper->current_connectivity_state == last_state && - !wrapper->request_safe_destroy && !wrapper->safe_to_destroy && - !wrapper->abort_watch_connectivity_state && - gpr_time_cmp(deadline, gpr_now(GPR_CLOCK_REALTIME)) > 0) { - gpr_cv_wait(&wrapper->channel_cv, &wrapper->channel_mu, deadline); + gpr_mu_lock(&global_connection_polling_mu); + // its unsafe to do a "watch" after "channel polling abort" because the cq has + // been shut down. + if (abort_channel_polling) { + gpr_mu_unlock(&global_connection_polling_mu); + return (void *)0; } - if (wrapper->current_connectivity_state != last_state) { - return_value = (void *)1; + op = gpr_zalloc(sizeof(watch_state_op)); + op->op_type = WATCH_STATE_API; + grpc_channel_watch_connectivity_state(stack->channel, stack->last_state, + stack->deadline, channel_polling_cq, + op); + + while (!op->op.api_callback_args.called_back) { + gpr_cv_wait(&global_connection_polling_cv, &global_connection_polling_mu, + gpr_inf_future(GPR_CLOCK_REALTIME)); } - gpr_mu_unlock(&wrapper->channel_mu); + if (op->op.api_callback_args.success) { + success = (void *)1; + } + gpr_free(op); + gpr_mu_unlock(&global_connection_polling_mu); - return return_value; + return success; } - -static void watch_channel_state_unblocking_func(void *arg) { - grpc_rb_channel *wrapper = (grpc_rb_channel *)arg; - gpr_log(GPR_DEBUG, "GRPC_RUBY: watch channel state unblocking func called"); - gpr_mu_lock(&wrapper->channel_mu); - wrapper->abort_watch_connectivity_state = 1; - gpr_cv_broadcast(&wrapper->channel_cv); - gpr_mu_unlock(&wrapper->channel_mu); +static void wait_for_watch_state_op_complete_unblocking_func(void *arg) { + bg_watched_channel *bg = (bg_watched_channel *)arg; + gpr_mu_lock(&global_connection_polling_mu); + if (!bg->channel_destroyed) { + grpc_channel_destroy(bg->channel); + bg->channel_destroyed = 1; + } + gpr_mu_unlock(&global_connection_polling_mu); } /* Wait until the channel's connectivity state becomes different from @@ -307,11 +387,11 @@ static VALUE grpc_rb_channel_watch_connectivity_state(VALUE self, VALUE deadline) { grpc_rb_channel *wrapper = NULL; watch_state_stack stack; - void *out; + void *op_success = 0; TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - if (wrapper->wrapped == NULL) { + if (wrapper->bg_wrapped == NULL) { rb_raise(rb_eRuntimeError, "closed!"); return Qnil; } @@ -323,16 +403,15 @@ static VALUE grpc_rb_channel_watch_connectivity_state(VALUE self, return Qnil; } - stack.wrapper = wrapper; - stack.deadline = grpc_rb_time_timeval(deadline, 0); + stack.channel = wrapper->bg_wrapped->channel; + stack.deadline = grpc_rb_time_timeval(deadline, 0), stack.last_state = NUM2LONG(last_state); - out = - rb_thread_call_without_gvl(watch_channel_state_without_gvl, &stack, - watch_channel_state_unblocking_func, wrapper); - if (out) { - return Qtrue; - } - return Qfalse; + + op_success = rb_thread_call_without_gvl( + wait_for_watch_state_op_complete_without_gvl, &stack, + wait_for_watch_state_op_complete_unblocking_func, wrapper->bg_wrapped); + + return op_success ? Qtrue : Qfalse; } /* Create a call given a grpc_channel, in order to call method. The request @@ -344,7 +423,6 @@ static VALUE grpc_rb_channel_create_call(VALUE self, VALUE parent, VALUE mask, grpc_rb_channel *wrapper = NULL; grpc_call *call = NULL; grpc_call *parent_call = NULL; - grpc_channel *ch = NULL; grpc_completion_queue *cq = NULL; int flags = GRPC_PROPAGATE_DEFAULTS; grpc_slice method_slice; @@ -366,8 +444,7 @@ static VALUE grpc_rb_channel_create_call(VALUE self, VALUE parent, VALUE mask, cq = grpc_completion_queue_create_for_pluck(NULL); TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - ch = wrapper->wrapped; - if (ch == NULL) { + if (wrapper->bg_wrapped == NULL) { rb_raise(rb_eRuntimeError, "closed!"); return Qnil; } @@ -375,8 +452,8 @@ static VALUE grpc_rb_channel_create_call(VALUE self, VALUE parent, VALUE mask, method_slice = grpc_slice_from_copied_buffer(RSTRING_PTR(method), RSTRING_LEN(method)); - call = grpc_channel_create_call(ch, parent_call, flags, cq, method_slice, - host_slice_ptr, + call = grpc_channel_create_call(wrapper->bg_wrapped->channel, parent_call, + flags, cq, method_slice, host_slice_ptr, grpc_rb_time_timeval(deadline, /* absolute time */ 0), NULL); @@ -401,15 +478,16 @@ static VALUE grpc_rb_channel_create_call(VALUE self, VALUE parent, VALUE mask, } /* Closes the channel, calling it's destroy method */ +/* Note this is an API-level call; a wrapped channel's finalizer doesn't call + * this */ static VALUE grpc_rb_channel_destroy(VALUE self) { grpc_rb_channel *wrapper = NULL; - grpc_channel *ch = NULL; TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - ch = wrapper->wrapped; - if (ch != NULL) { - grpc_rb_channel_safe_destroy(wrapper); - wrapper->wrapped = NULL; + if (wrapper->bg_wrapped != NULL) { + rb_thread_call_without_gvl(channel_safe_destroy_without_gil, + wrapper->bg_wrapped, NULL, NULL); + wrapper->bg_wrapped = NULL; } return Qnil; @@ -422,64 +500,110 @@ static VALUE grpc_rb_channel_get_target(VALUE self) { char *target = NULL; TypedData_Get_Struct(self, grpc_rb_channel, &grpc_channel_data_type, wrapper); - target = grpc_channel_get_target(wrapper->wrapped); + target = grpc_channel_get_target(wrapper->bg_wrapped->channel); res = rb_str_new2(target); gpr_free(target); return res; } -// Either start polling channel connection state or signal that it's free to -// destroy. -// Not safe to call while a channel's connection state is polled. -static void grpc_rb_channel_try_register_connection_polling( - grpc_rb_channel *wrapper) { - grpc_connectivity_state conn_state; - gpr_timespec sleep_time = gpr_time_add( - gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(20, GPR_TIMESPAN)); - - GPR_ASSERT(wrapper); - GPR_ASSERT(wrapper->wrapped); - gpr_mu_lock(&wrapper->channel_mu); - if (wrapper->request_safe_destroy) { - wrapper->safe_to_destroy = 1; - gpr_cv_broadcast(&wrapper->channel_cv); - gpr_mu_unlock(&wrapper->channel_mu); - return; +/* Needs to be called under global_connection_polling_mu */ +static int bg_watched_channel_list_lookup(bg_watched_channel *target) { + bg_watched_channel *cur = bg_watched_channel_list_head; + + while (cur != NULL) { + if (cur == target) { + return 1; + } + cur = cur->next; } - gpr_mu_lock(&global_connection_polling_mu); - GPR_ASSERT(channel_polling_thread_started || abort_channel_polling); - conn_state = grpc_channel_check_connectivity_state(wrapper->wrapped, 0); - if (conn_state != wrapper->current_connectivity_state) { - wrapper->current_connectivity_state = conn_state; - gpr_cv_broadcast(&wrapper->channel_cv); - } - // avoid posting work to the channel polling cq if it's been shutdown - if (!abort_channel_polling && conn_state != GRPC_CHANNEL_SHUTDOWN) { - grpc_channel_watch_connectivity_state( - wrapper->wrapped, conn_state, sleep_time, channel_polling_cq, wrapper); - } else { - wrapper->safe_to_destroy = 1; - gpr_cv_broadcast(&wrapper->channel_cv); + return 0; +} + +/* Needs to be called under global_connection_polling_mu */ +static bg_watched_channel *bg_watched_channel_list_create_and_add( + grpc_channel *channel) { + bg_watched_channel *watched = gpr_zalloc(sizeof(bg_watched_channel)); + + watched->channel = channel; + watched->next = bg_watched_channel_list_head; + watched->refcount = 1; + bg_watched_channel_list_head = watched; + return watched; +} + +/* Needs to be called under global_connection_polling_mu */ +static void bg_watched_channel_list_free_and_remove( + bg_watched_channel *target) { + bg_watched_channel *bg = NULL; + + GPR_ASSERT(bg_watched_channel_list_lookup(target)); + GPR_ASSERT(target->channel_destroyed && target->refcount == 0); + if (bg_watched_channel_list_head == target) { + bg_watched_channel_list_head = target->next; + gpr_free(target); + return; + } + bg = bg_watched_channel_list_head; + while (bg != NULL && bg->next != NULL) { + if (bg->next == target) { + bg->next = bg->next->next; + gpr_free(target); + return; + } + bg = bg->next; } + GPR_ASSERT(0); +} + +/* Initialize a grpc_rb_channel's "protected grpc_channel" and try to push + * it onto the background thread for constant watches. */ +static void *channel_init_try_register_connection_polling_without_gil( + void *arg) { + channel_init_try_register_stack *stack = + (channel_init_try_register_stack *)arg; + + gpr_mu_lock(&global_connection_polling_mu); + stack->wrapper->bg_wrapped = + bg_watched_channel_list_create_and_add(stack->channel); + grpc_rb_channel_try_register_connection_polling(stack->wrapper->bg_wrapped); gpr_mu_unlock(&global_connection_polling_mu); - gpr_mu_unlock(&wrapper->channel_mu); + return NULL; } -// Note requires wrapper->wrapped, wrapper->channel_mu/cv initialized -static void grpc_rb_channel_safe_destroy(grpc_rb_channel *wrapper) { - gpr_mu_lock(&wrapper->channel_mu); - wrapper->request_safe_destroy = 1; +// Needs to be called under global_connection_poolling_mu +static void grpc_rb_channel_try_register_connection_polling( + bg_watched_channel *bg) { + grpc_connectivity_state conn_state; + watch_state_op *op = NULL; - while (!wrapper->safe_to_destroy) { - gpr_cv_wait(&wrapper->channel_cv, &wrapper->channel_mu, - gpr_inf_future(GPR_CLOCK_REALTIME)); + GPR_ASSERT(channel_polling_thread_started || abort_channel_polling); + + if (bg->refcount == 0) { + GPR_ASSERT(bg->channel_destroyed); + bg_watched_channel_list_free_and_remove(bg); + return; + } + GPR_ASSERT(bg->refcount == 1); + if (bg->channel_destroyed || abort_channel_polling) { + return; } - GPR_ASSERT(wrapper->safe_to_destroy); - gpr_mu_unlock(&wrapper->channel_mu); - grpc_channel_destroy(wrapper->wrapped); + conn_state = grpc_channel_check_connectivity_state(bg->channel, 0); + if (conn_state == GRPC_CHANNEL_SHUTDOWN) { + return; + } + GPR_ASSERT(bg_watched_channel_list_lookup(bg)); + // prevent bg from being free'd by GC while background thread is watching it + bg->refcount++; + + op = gpr_zalloc(sizeof(watch_state_op)); + op->op_type = CONTINUOUS_WATCH; + op->op.continuous_watch_callback_args.bg = bg; + grpc_channel_watch_connectivity_state(bg->channel, conn_state, + gpr_inf_future(GPR_CLOCK_REALTIME), + channel_polling_cq, op); } // Note this loop breaks out with a single call of @@ -490,6 +614,8 @@ static void grpc_rb_channel_safe_destroy(grpc_rb_channel *wrapper) { // early and falls back to current behavior. static void *run_poll_channels_loop_no_gil(void *arg) { grpc_event event; + watch_state_op *op = NULL; + bg_watched_channel *bg = NULL; (void)arg; gpr_log(GPR_DEBUG, "GRPC_RUBY: run_poll_channels_loop_no_gil - begin"); @@ -505,10 +631,22 @@ static void *run_poll_channels_loop_no_gil(void *arg) { if (event.type == GRPC_QUEUE_SHUTDOWN) { break; } + gpr_mu_lock(&global_connection_polling_mu); if (event.type == GRPC_OP_COMPLETE) { - grpc_rb_channel_try_register_connection_polling( - (grpc_rb_channel *)event.tag); + op = (watch_state_op *)event.tag; + if (op->op_type == CONTINUOUS_WATCH) { + bg = (bg_watched_channel *)op->op.continuous_watch_callback_args.bg; + bg->refcount--; + grpc_rb_channel_try_register_connection_polling(bg); + gpr_free(op); + } else if (op->op_type == WATCH_STATE_API) { + grpc_rb_channel_watch_connection_state_op_complete( + (watch_state_op *)event.tag, event.success); + } else { + GPR_ASSERT(0); + } } + gpr_mu_unlock(&global_connection_polling_mu); } grpc_completion_queue_destroy(channel_polling_cq); gpr_log(GPR_DEBUG, @@ -519,14 +657,36 @@ static void *run_poll_channels_loop_no_gil(void *arg) { // Notify the channel polling loop to cleanup and shutdown. static void run_poll_channels_loop_unblocking_func(void *arg) { + bg_watched_channel *bg = NULL; (void)arg; + gpr_mu_lock(&global_connection_polling_mu); gpr_log(GPR_DEBUG, - "GRPC_RUBY: grpc_rb_event_unblocking_func - begin aborting " + "GRPC_RUBY: run_poll_channels_loop_unblocking_func - begin aborting " "connection polling"); + // early out after first time through + if (abort_channel_polling) { + gpr_mu_unlock(&global_connection_polling_mu); + return; + } abort_channel_polling = 1; + + // force pending watches to end by switching to shutdown state + bg = bg_watched_channel_list_head; + while (bg != NULL) { + if (!bg->channel_destroyed) { + grpc_channel_destroy(bg->channel); + bg->channel_destroyed = 1; + } + bg = bg->next; + } + grpc_completion_queue_shutdown(channel_polling_cq); + gpr_cv_broadcast(&global_connection_polling_cv); gpr_mu_unlock(&global_connection_polling_mu); + gpr_log(GPR_DEBUG, + "GRPC_RUBY: run_poll_channels_loop_unblocking_func - end aborting " + "connection polling"); } // Poll channel connectivity states in background thread without the GIL. @@ -542,10 +702,11 @@ static VALUE run_poll_channels_loop(VALUE arg) { } static void *wait_until_channel_polling_thread_started_no_gil(void *arg) { - (void)arg; + int *stop_waiting = (int *)arg; gpr_log(GPR_DEBUG, "GRPC_RUBY: wait for channel polling thread to start"); gpr_mu_lock(&global_connection_polling_mu); - while (!channel_polling_thread_started && !abort_channel_polling) { + while (!channel_polling_thread_started && !abort_channel_polling && + !*stop_waiting) { gpr_cv_wait(&global_connection_polling_cv, &global_connection_polling_mu, gpr_inf_future(GPR_CLOCK_REALTIME)); } @@ -556,15 +717,22 @@ static void *wait_until_channel_polling_thread_started_no_gil(void *arg) { static void wait_until_channel_polling_thread_started_unblocking_func( void *arg) { - (void)arg; + int *stop_waiting = (int *)arg; gpr_mu_lock(&global_connection_polling_mu); gpr_log(GPR_DEBUG, - "GRPC_RUBY: " - "wait_until_channel_polling_thread_started_unblocking_func - begin " - "aborting connection polling"); + "GRPC_RUBY: interrupt wait for channel polling thread to start"); + *stop_waiting = 1; + gpr_cv_broadcast(&global_connection_polling_cv); + gpr_mu_unlock(&global_connection_polling_mu); +} + +static void *set_abort_channel_polling_without_gil(void *arg) { + (void)arg; + gpr_mu_lock(&global_connection_polling_mu); abort_channel_polling = 1; gpr_cv_broadcast(&global_connection_polling_cv); gpr_mu_unlock(&global_connection_polling_mu); + return NULL; } /* Temporary fix for @@ -592,10 +760,8 @@ void grpc_rb_channel_polling_thread_start() { if (!RTEST(background_thread)) { gpr_log(GPR_DEBUG, "GRPC_RUBY: failed to spawn channel polling thread"); - gpr_mu_lock(&global_connection_polling_mu); - abort_channel_polling = 1; - gpr_cv_broadcast(&global_connection_polling_cv); - gpr_mu_unlock(&global_connection_polling_mu); + rb_thread_call_without_gvl(set_abort_channel_polling_without_gil, NULL, + NULL, NULL); } } @@ -674,5 +840,5 @@ void Init_grpc_channel() { grpc_channel *grpc_rb_get_wrapped_channel(VALUE v) { grpc_rb_channel *wrapper = NULL; TypedData_Get_Struct(v, grpc_rb_channel, &grpc_channel_data_type, wrapper); - return wrapper->wrapped; + return wrapper->bg_wrapped->channel; } diff --git a/src/ruby/ext/grpc/rb_event_thread.c b/src/ruby/ext/grpc/rb_event_thread.c index 9a3b56ddfb7..71138265c84 100644 --- a/src/ruby/ext/grpc/rb_event_thread.c +++ b/src/ruby/ext/grpc/rb_event_thread.c @@ -105,16 +105,16 @@ static void *grpc_rb_wait_for_event_no_gil(void *param) { grpc_rb_event *event = NULL; (void)param; gpr_mu_lock(&event_queue.mu); - while ((event = grpc_rb_event_queue_dequeue()) == NULL) { - gpr_cv_wait(&event_queue.cv, &event_queue.mu, - gpr_inf_future(GPR_CLOCK_REALTIME)); - if (event_queue.abort) { + while (!event_queue.abort) { + if ((event = grpc_rb_event_queue_dequeue()) != NULL) { gpr_mu_unlock(&event_queue.mu); - return NULL; + return event; } + gpr_cv_wait(&event_queue.cv, &event_queue.mu, + gpr_inf_future(GPR_CLOCK_REALTIME)); } gpr_mu_unlock(&event_queue.mu); - return event; + return NULL; } static void grpc_rb_event_unblocking_func(void *arg) { diff --git a/src/ruby/ext/grpc/rb_grpc.c b/src/ruby/ext/grpc/rb_grpc.c index 5be8861e0c4..c319cd13917 100644 --- a/src/ruby/ext/grpc/rb_grpc.c +++ b/src/ruby/ext/grpc/rb_grpc.c @@ -292,11 +292,12 @@ static gpr_once g_once_init = GPR_ONCE_INIT; static void grpc_ruby_once_init_internal() { grpc_init(); - grpc_rb_event_queue_thread_start(); - grpc_rb_channel_polling_thread_start(); atexit(grpc_rb_shutdown); } +static VALUE bg_thread_init_rb_mu = Qundef; +static int bg_thread_init_done = 0; + void grpc_ruby_once_init() { /* ruby_vm_at_exit doesn't seem to be working. It would crash once every * blue moon, and some users are getting it repeatedly. See the discussions @@ -309,6 +310,18 @@ void grpc_ruby_once_init() { * schedule our initialization and destruction only once. */ gpr_once_init(&g_once_init, grpc_ruby_once_init_internal); + + // Avoid calling calling into ruby library (when creating threads here) + // in gpr_once_init. In general, it appears to be unsafe to call + // into the ruby library while holding a non-ruby mutex, because a gil yield + // could end up trying to lock onto that same mutex and deadlocking. + rb_mutex_lock(bg_thread_init_rb_mu); + if (!bg_thread_init_done) { + grpc_rb_event_queue_thread_start(); + grpc_rb_channel_polling_thread_start(); + bg_thread_init_done = 1; + } + rb_mutex_unlock(bg_thread_init_rb_mu); } void Init_grpc_c() { @@ -317,6 +330,9 @@ void Init_grpc_c() { return; } + bg_thread_init_rb_mu = rb_mutex_new(); + rb_global_variable(&bg_thread_init_rb_mu); + grpc_rb_mGRPC = rb_define_module("GRPC"); grpc_rb_mGrpcCore = rb_define_module_under(grpc_rb_mGRPC, "Core"); grpc_rb_sNewServerRpc = rb_struct_define( diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.c b/src/ruby/ext/grpc/rb_grpc_imports.generated.c index 8f76420af6b..f6a4ff6795a 100644 --- a/src/ruby/ext/grpc/rb_grpc_imports.generated.c +++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.c @@ -103,6 +103,7 @@ grpc_alarm_create_type grpc_alarm_create_import; grpc_alarm_cancel_type grpc_alarm_cancel_import; grpc_alarm_destroy_type grpc_alarm_destroy_import; grpc_channel_check_connectivity_state_type grpc_channel_check_connectivity_state_import; +grpc_channel_num_external_connectivity_watchers_type grpc_channel_num_external_connectivity_watchers_import; grpc_channel_watch_connectivity_state_type grpc_channel_watch_connectivity_state_import; grpc_channel_create_call_type grpc_channel_create_call_import; grpc_channel_ping_type grpc_channel_ping_import; @@ -405,6 +406,7 @@ void grpc_rb_load_imports(HMODULE library) { grpc_alarm_cancel_import = (grpc_alarm_cancel_type) GetProcAddress(library, "grpc_alarm_cancel"); grpc_alarm_destroy_import = (grpc_alarm_destroy_type) GetProcAddress(library, "grpc_alarm_destroy"); grpc_channel_check_connectivity_state_import = (grpc_channel_check_connectivity_state_type) GetProcAddress(library, "grpc_channel_check_connectivity_state"); + grpc_channel_num_external_connectivity_watchers_import = (grpc_channel_num_external_connectivity_watchers_type) GetProcAddress(library, "grpc_channel_num_external_connectivity_watchers"); grpc_channel_watch_connectivity_state_import = (grpc_channel_watch_connectivity_state_type) GetProcAddress(library, "grpc_channel_watch_connectivity_state"); grpc_channel_create_call_import = (grpc_channel_create_call_type) GetProcAddress(library, "grpc_channel_create_call"); grpc_channel_ping_import = (grpc_channel_ping_type) GetProcAddress(library, "grpc_channel_ping"); diff --git a/src/ruby/ext/grpc/rb_grpc_imports.generated.h b/src/ruby/ext/grpc/rb_grpc_imports.generated.h index 58467f97e84..0d64290b55a 100644 --- a/src/ruby/ext/grpc/rb_grpc_imports.generated.h +++ b/src/ruby/ext/grpc/rb_grpc_imports.generated.h @@ -260,6 +260,9 @@ extern grpc_alarm_destroy_type grpc_alarm_destroy_import; typedef grpc_connectivity_state(*grpc_channel_check_connectivity_state_type)(grpc_channel *channel, int try_to_connect); extern grpc_channel_check_connectivity_state_type grpc_channel_check_connectivity_state_import; #define grpc_channel_check_connectivity_state grpc_channel_check_connectivity_state_import +typedef int(*grpc_channel_num_external_connectivity_watchers_type)(grpc_channel *channel); +extern grpc_channel_num_external_connectivity_watchers_type grpc_channel_num_external_connectivity_watchers_import; +#define grpc_channel_num_external_connectivity_watchers grpc_channel_num_external_connectivity_watchers_import typedef void(*grpc_channel_watch_connectivity_state_type)(grpc_channel *channel, grpc_connectivity_state last_observed_state, gpr_timespec deadline, grpc_completion_queue *cq, void *tag); extern grpc_channel_watch_connectivity_state_type grpc_channel_watch_connectivity_state_import; #define grpc_channel_watch_connectivity_state grpc_channel_watch_connectivity_state_import diff --git a/src/ruby/spec/channel_connection_spec.rb b/src/ruby/spec/channel_connection_spec.rb index 940d68b9b04..c8a7856a099 100644 --- a/src/ruby/spec/channel_connection_spec.rb +++ b/src/ruby/spec/channel_connection_spec.rb @@ -28,6 +28,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. require 'grpc' +require 'timeout' + +include Timeout +include GRPC::Core # A test message class EchoMsg @@ -62,7 +66,7 @@ end EchoStub = EchoService.rpc_stub_class def start_server(port = 0) - @srv = GRPC::RpcServer.new + @srv = GRPC::RpcServer.new(pool_size: 1) server_port = @srv.add_http2_port("localhost:#{port}", :this_port_is_insecure) @srv.handle(EchoService) @server_thd = Thread.new { @srv.run } @@ -138,4 +142,32 @@ describe 'channel connection behavior' do stop_server end + + it 'concurrent watches on the same channel' do + timeout(180) do + port = start_server + ch = GRPC::Core::Channel.new("localhost:#{port}", {}, + :this_channel_is_insecure) + stop_server + + thds = [] + 50.times do + thds << Thread.new do + while ch.connectivity_state(true) != ConnectivityStates::READY + ch.watch_connectivity_state( + ConnectivityStates::READY, Time.now + 60) + break + end + end + end + + sleep 0.01 + + start_server(port) + + thds.each(&:join) + + stop_server + end + end end diff --git a/templates/src/csharp/build_packages_dotnetcli.bat.template b/templates/src/csharp/build_packages_dotnetcli.bat.template index 8624c34782b..3db1e0ac3de 100755 --- a/templates/src/csharp/build_packages_dotnetcli.bat.template +++ b/templates/src/csharp/build_packages_dotnetcli.bat.template @@ -53,11 +53,11 @@ @rem To be able to build, we also need to put grpc_csharp_ext to its normal location xcopy /Y /I nativelibs\csharp_ext_windows_x64\grpc_csharp_ext.dll ..\..\cmake\build\x64\Release${"\\"} - %%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Core --output ..\..\..\artifacts || goto :error - %%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Core.Testing --output ..\..\..\artifacts || goto :error - %%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Auth --output ..\..\..\artifacts || goto :error - %%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.HealthCheck --output ..\..\..\artifacts || goto :error - %%DOTNET% pack --configuration Release --include-symbols --include-source Grpc.Reflection --output ..\..\..\artifacts || goto :error + %%DOTNET% pack --configuration Release Grpc.Core --output ..\..\..\artifacts || goto :error + %%DOTNET% pack --configuration Release Grpc.Core.Testing --output ..\..\..\artifacts || goto :error + %%DOTNET% pack --configuration Release Grpc.Auth --output ..\..\..\artifacts || goto :error + %%DOTNET% pack --configuration Release Grpc.HealthCheck --output ..\..\..\artifacts || goto :error + %%DOTNET% pack --configuration Release Grpc.Reflection --output ..\..\..\artifacts || goto :error %%NUGET% pack Grpc.nuspec -Version %VERSION% -OutputDirectory ..\..\artifacts || goto :error %%NUGET% pack Grpc.Tools.nuspec -Version %VERSION% -OutputDirectory ..\..\artifacts diff --git a/templates/src/csharp/build_packages_dotnetcli.sh.template b/templates/src/csharp/build_packages_dotnetcli.sh.template index bff497c85a4..65afec55c4d 100755 --- a/templates/src/csharp/build_packages_dotnetcli.sh.template +++ b/templates/src/csharp/build_packages_dotnetcli.sh.template @@ -50,11 +50,11 @@ mkdir -p ../../libs/opt cp nativelibs/csharp_ext_linux_x64/libgrpc_csharp_ext.so ../../libs/opt - dotnet pack --configuration Release --include-symbols --include-source Grpc.Core --output ../../../artifacts - dotnet pack --configuration Release --include-symbols --include-source Grpc.Core.Testing --output ../../../artifacts - dotnet pack --configuration Release --include-symbols --include-source Grpc.Auth --output ../../../artifacts - dotnet pack --configuration Release --include-symbols --include-source Grpc.HealthCheck --output ../../../artifacts - dotnet pack --configuration Release --include-symbols --include-source Grpc.Reflection --output ../../../artifacts + dotnet pack --configuration Release Grpc.Core --output ../../../artifacts + dotnet pack --configuration Release Grpc.Core.Testing --output ../../../artifacts + dotnet pack --configuration Release Grpc.Auth --output ../../../artifacts + dotnet pack --configuration Release Grpc.HealthCheck --output ../../../artifacts + dotnet pack --configuration Release Grpc.Reflection --output ../../../artifacts nuget pack Grpc.nuspec -Version "${settings.csharp_version}" -OutputDirectory ../../artifacts nuget pack Grpc.Tools.nuspec -Version "${settings.csharp_version}" -OutputDirectory ../../artifacts diff --git a/test/core/surface/concurrent_connectivity_test.c b/test/core/surface/concurrent_connectivity_test.c index 87ad095170f..7614696caed 100644 --- a/test/core/surface/concurrent_connectivity_test.c +++ b/test/core/surface/concurrent_connectivity_test.c @@ -61,6 +61,14 @@ #define DELAY_MILLIS 10 #define POLL_MILLIS 3000 +#define NUM_OUTER_LOOPS_SHORT_TIMEOUTS 10 +#define NUM_INNER_LOOPS_SHORT_TIMEOUTS 100 +#define DELAY_MILLIS_SHORT_TIMEOUTS 1 +// in a successful test run, POLL_MILLIS should never be reached beause all runs +// should +// end after the shorter delay_millis +#define POLL_MILLIS_SHORT_TIMEOUTS 30000 + static void *tag(int n) { return (void *)(uintptr_t)n; } static int detag(void *p) { return (int)(uintptr_t)p; } @@ -79,6 +87,8 @@ void create_loop_destroy(void *addr) { grpc_timeout_milliseconds_to_deadline(POLL_MILLIS); GPR_ASSERT(grpc_completion_queue_next(cq, poll_time, NULL).type == GRPC_OP_COMPLETE); + /* check that the watcher from "watch state" was free'd */ + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(chan) == 0); } grpc_channel_destroy(chan); grpc_completion_queue_destroy(cq); @@ -168,11 +178,10 @@ static void done_pollset_shutdown(grpc_exec_ctx *exec_ctx, void *pollset, gpr_free(pollset); } -int main(int argc, char **argv) { +int run_concurrent_connectivity_test() { struct server_thread_args args; memset(&args, 0, sizeof(args)); - grpc_test_init(argc, argv); grpc_init(); gpr_thd_id threads[NUM_THREADS]; @@ -242,3 +251,59 @@ int main(int argc, char **argv) { grpc_shutdown(); return 0; } + +void watches_with_short_timeouts(void *addr) { + for (int i = 0; i < NUM_OUTER_LOOPS_SHORT_TIMEOUTS; ++i) { + grpc_completion_queue *cq = grpc_completion_queue_create_for_next(NULL); + grpc_channel *chan = grpc_insecure_channel_create((char *)addr, NULL, NULL); + + for (int j = 0; j < NUM_INNER_LOOPS_SHORT_TIMEOUTS; ++j) { + gpr_timespec later_time = + grpc_timeout_milliseconds_to_deadline(DELAY_MILLIS_SHORT_TIMEOUTS); + grpc_connectivity_state state = + grpc_channel_check_connectivity_state(chan, 0); + GPR_ASSERT(state == GRPC_CHANNEL_IDLE); + grpc_channel_watch_connectivity_state(chan, state, later_time, cq, NULL); + gpr_timespec poll_time = + grpc_timeout_milliseconds_to_deadline(POLL_MILLIS_SHORT_TIMEOUTS); + grpc_event ev = grpc_completion_queue_next(cq, poll_time, NULL); + GPR_ASSERT(ev.type == GRPC_OP_COMPLETE); + GPR_ASSERT(ev.success == false); + /* check that the watcher from "watch state" was free'd */ + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(chan) == 0); + } + grpc_channel_destroy(chan); + grpc_completion_queue_destroy(cq); + } +} + +// This test tries to catch deadlock situations. +// With short timeouts on "watches" and long timeouts on cq next calls, +// so that a QUEUE_TIMEOUT likely means that something is stuck. +int run_concurrent_watches_with_short_timeouts_test() { + grpc_init(); + + gpr_thd_id threads[NUM_THREADS]; + + char *localhost = gpr_strdup("localhost:54321"); + gpr_thd_options options = gpr_thd_options_default(); + gpr_thd_options_set_joinable(&options); + + for (size_t i = 0; i < NUM_THREADS; ++i) { + gpr_thd_new(&threads[i], watches_with_short_timeouts, localhost, &options); + } + for (size_t i = 0; i < NUM_THREADS; ++i) { + gpr_thd_join(threads[i]); + } + gpr_free(localhost); + + grpc_shutdown(); + return 0; +} + +int main(int argc, char **argv) { + grpc_test_init(argc, argv); + + run_concurrent_connectivity_test(); + run_concurrent_watches_with_short_timeouts_test(); +} diff --git a/test/core/surface/num_external_connectivity_watchers_test.c b/test/core/surface/num_external_connectivity_watchers_test.c new file mode 100644 index 00000000000..93944c9ad5b --- /dev/null +++ b/test/core/surface/num_external_connectivity_watchers_test.c @@ -0,0 +1,214 @@ +/* + * + * Copyright 2016, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/exec_ctx.h" +#include "test/core/end2end/data/ssl_test_data.h" +#include "test/core/util/port.h" +#include "test/core/util/test_config.h" + +typedef struct test_fixture { + const char *name; + grpc_channel *(*create_channel)(const char *addr); +} test_fixture; + +static size_t next_tag = 1; + +static void channel_idle_start_watch(grpc_channel *channel, + grpc_completion_queue *cq) { + gpr_timespec connect_deadline = grpc_timeout_milliseconds_to_deadline(1); + GPR_ASSERT(grpc_channel_check_connectivity_state(channel, 0) == + GRPC_CHANNEL_IDLE); + + grpc_channel_watch_connectivity_state( + channel, GRPC_CHANNEL_IDLE, connect_deadline, cq, (void *)(next_tag++)); + gpr_log(GPR_DEBUG, "number of active connect watchers: %d", + grpc_channel_num_external_connectivity_watchers(channel)); +} + +static void channel_idle_poll_for_timeout(grpc_channel *channel, + grpc_completion_queue *cq) { + grpc_event ev = + grpc_completion_queue_next(cq, gpr_inf_future(GPR_CLOCK_REALTIME), NULL); + + /* expect watch_connectivity_state to end with a timeout */ + GPR_ASSERT(ev.type == GRPC_OP_COMPLETE); + GPR_ASSERT(ev.success == false); + GPR_ASSERT(grpc_channel_check_connectivity_state(channel, 0) == + GRPC_CHANNEL_IDLE); +} + +/* Test and use the "num_external_watchers" call to make sure + * that "connectivity watcher" structs are free'd just after, if + * their corresponding timeouts occur. */ +static void run_timeouts_test(const test_fixture *fixture) { + gpr_log(GPR_INFO, "TEST: %s", fixture->name); + + char *addr; + + grpc_init(); + + gpr_join_host_port(&addr, "localhost", grpc_pick_unused_port_or_die()); + + grpc_channel *channel = fixture->create_channel(addr); + grpc_completion_queue *cq = grpc_completion_queue_create_for_next(NULL); + + /* start 1 watcher and then let it time out */ + channel_idle_start_watch(channel, cq); + channel_idle_poll_for_timeout(channel, cq); + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(channel) == 0); + + /* start 3 watchers and then let them all time out */ + for (size_t i = 1; i <= 3; i++) { + channel_idle_start_watch(channel, cq); + } + for (size_t i = 1; i <= 3; i++) { + channel_idle_poll_for_timeout(channel, cq); + } + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(channel) == 0); + + /* start 3 watchers, see one time out, start another 3, and then see them all + * time out */ + for (size_t i = 1; i <= 3; i++) { + channel_idle_start_watch(channel, cq); + } + channel_idle_poll_for_timeout(channel, cq); + for (size_t i = 3; i <= 5; i++) { + channel_idle_start_watch(channel, cq); + } + for (size_t i = 1; i <= 5; i++) { + channel_idle_poll_for_timeout(channel, cq); + } + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(channel) == 0); + + grpc_channel_destroy(channel); + grpc_completion_queue_shutdown(cq); + GPR_ASSERT( + grpc_completion_queue_next(cq, gpr_inf_future(GPR_CLOCK_REALTIME), NULL) + .type == GRPC_QUEUE_SHUTDOWN); + grpc_completion_queue_destroy(cq); + + grpc_shutdown(); + gpr_free(addr); +} + +/* An edge scenario; sets channel state to explicitly, and outside + * of a polling call. */ +static void run_channel_shutdown_before_timeout_test( + const test_fixture *fixture) { + gpr_log(GPR_INFO, "TEST: %s", fixture->name); + + char *addr; + + grpc_init(); + + gpr_join_host_port(&addr, "localhost", grpc_pick_unused_port_or_die()); + + grpc_channel *channel = fixture->create_channel(addr); + grpc_completion_queue *cq = grpc_completion_queue_create_for_next(NULL); + + /* start 1 watcher and then shut down the channel before the timer goes off */ + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(channel) == 0); + + /* expecting a 30 second timeout to go off much later than the shutdown. */ + gpr_timespec connect_deadline = grpc_timeout_seconds_to_deadline(30); + GPR_ASSERT(grpc_channel_check_connectivity_state(channel, 0) == + GRPC_CHANNEL_IDLE); + + grpc_channel_watch_connectivity_state(channel, GRPC_CHANNEL_IDLE, + connect_deadline, cq, (void *)1); + grpc_channel_destroy(channel); + + grpc_event ev = + grpc_completion_queue_next(cq, gpr_inf_future(GPR_CLOCK_REALTIME), NULL); + GPR_ASSERT(ev.type == GRPC_OP_COMPLETE); + /* expect success with a state transition to CHANNEL_SHUTDOWN */ + GPR_ASSERT(ev.success == true); + + grpc_completion_queue_shutdown(cq); + GPR_ASSERT( + grpc_completion_queue_next(cq, gpr_inf_future(GPR_CLOCK_REALTIME), NULL) + .type == GRPC_QUEUE_SHUTDOWN); + grpc_completion_queue_destroy(cq); + + grpc_shutdown(); + gpr_free(addr); +} + +static grpc_channel *insecure_test_create_channel(const char *addr) { + return grpc_insecure_channel_create(addr, NULL, NULL); +} + +static const test_fixture insecure_test = { + "insecure", insecure_test_create_channel, +}; + +static grpc_channel *secure_test_create_channel(const char *addr) { + grpc_channel_credentials *ssl_creds = + grpc_ssl_credentials_create(test_root_cert, NULL, NULL); + grpc_arg ssl_name_override = {GRPC_ARG_STRING, + GRPC_SSL_TARGET_NAME_OVERRIDE_ARG, + {"foo.test.google.fr"}}; + grpc_channel_args *new_client_args = + grpc_channel_args_copy_and_add(NULL, &ssl_name_override, 1); + grpc_channel *channel = + grpc_secure_channel_create(ssl_creds, addr, new_client_args, NULL); + { + grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; + grpc_channel_args_destroy(&exec_ctx, new_client_args); + grpc_exec_ctx_finish(&exec_ctx); + } + grpc_channel_credentials_release(ssl_creds); + return channel; +} + +static const test_fixture secure_test = { + "secure", secure_test_create_channel, +}; + +int main(int argc, char **argv) { + grpc_test_init(argc, argv); + + run_timeouts_test(&insecure_test); + run_timeouts_test(&secure_test); + + run_channel_shutdown_before_timeout_test(&insecure_test); + run_channel_shutdown_before_timeout_test(&secure_test); +} diff --git a/test/core/surface/sequential_connectivity_test.c b/test/core/surface/sequential_connectivity_test.c index fbecdd7e388..a0f18e9bbfb 100644 --- a/test/core/surface/sequential_connectivity_test.c +++ b/test/core/surface/sequential_connectivity_test.c @@ -100,6 +100,9 @@ static void run_test(const test_fixture *fixture) { connect_deadline, cq, NULL); grpc_event ev = grpc_completion_queue_next( cq, gpr_inf_future(GPR_CLOCK_REALTIME), NULL); + /* check that the watcher from "watch state" was free'd */ + GPR_ASSERT(grpc_channel_num_external_connectivity_watchers(channels[i]) == + 0); GPR_ASSERT(ev.type == GRPC_OP_COMPLETE); GPR_ASSERT(ev.tag == NULL); GPR_ASSERT(ev.success == true); diff --git a/tools/run_tests/generated/sources_and_headers.json b/tools/run_tests/generated/sources_and_headers.json index 97e079d8a8e..6c4462e9f21 100644 --- a/tools/run_tests/generated/sources_and_headers.json +++ b/tools/run_tests/generated/sources_and_headers.json @@ -1755,6 +1755,23 @@ "third_party": false, "type": "target" }, + { + "deps": [ + "gpr", + "gpr_test_util", + "grpc", + "grpc_test_util" + ], + "headers": [], + "is_filegroup": false, + "language": "c", + "name": "num_external_connectivity_watchers_test", + "src": [ + "test/core/surface/num_external_connectivity_watchers_test.c" + ], + "third_party": false, + "type": "target" + }, { "deps": [ "gpr", diff --git a/tools/run_tests/generated/tests.json b/tools/run_tests/generated/tests.json index 82db441a1a5..1242025fc20 100644 --- a/tools/run_tests/generated/tests.json +++ b/tools/run_tests/generated/tests.json @@ -1829,6 +1829,30 @@ "windows" ] }, + { + "args": [], + "ci_platforms": [ + "linux", + "mac", + "posix", + "windows" + ], + "cpu_cost": 1.0, + "exclude_configs": [], + "exclude_iomgrs": [ + "uv" + ], + "flaky": false, + "gtest": false, + "language": "c", + "name": "num_external_connectivity_watchers_test", + "platforms": [ + "linux", + "mac", + "posix", + "windows" + ] + }, { "args": [], "ci_platforms": [ diff --git a/tools/run_tests/helper_scripts/run_ruby_end2end_tests.sh b/tools/run_tests/helper_scripts/run_ruby_end2end_tests.sh index 6688025260f..ab882d62bc7 100755 --- a/tools/run_tests/helper_scripts/run_ruby_end2end_tests.sh +++ b/tools/run_tests/helper_scripts/run_ruby_end2end_tests.sh @@ -41,4 +41,5 @@ ruby src/ruby/end2end/sig_int_during_channel_watch_driver.rb || EXIT_CODE=1 ruby src/ruby/end2end/killed_client_thread_driver.rb || EXIT_CODE=1 ruby src/ruby/end2end/forking_client_driver.rb || EXIT_CODE=1 ruby src/ruby/end2end/grpc_class_init_driver.rb || EXIT_CODE=1 +ruby src/ruby/end2end/multiple_killed_watching_threads_driver.rb || EXIT_CODE=1 exit $EXIT_CODE diff --git a/vsprojects/buildtests_c.sln b/vsprojects/buildtests_c.sln index 284dd68c165..b7696a99651 100644 --- a/vsprojects/buildtests_c.sln +++ b/vsprojects/buildtests_c.sln @@ -1350,6 +1350,17 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "no_server_test", "vcxproj\t {B23D3D1A-9438-4EDA-BEB6-9A0A03D17792} = {B23D3D1A-9438-4EDA-BEB6-9A0A03D17792} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "num_external_connectivity_watchers_test", "vcxproj\test\num_external_connectivity_watchers_test\num_external_connectivity_watchers_test.vcxproj", "{4E856E4A-7497-1B1A-1AED-D4C01E5D873A}" + ProjectSection(myProperties) = preProject + lib = "False" + EndProjectSection + ProjectSection(ProjectDependencies) = postProject + {17BCAFC0-5FDC-4C94-AEB9-95F3E220614B} = {17BCAFC0-5FDC-4C94-AEB9-95F3E220614B} + {29D16885-7228-4C31-81ED-5F9187C7F2A9} = {29D16885-7228-4C31-81ED-5F9187C7F2A9} + {EAB0A629-17A9-44DB-B5FF-E91A721FE037} = {EAB0A629-17A9-44DB-B5FF-E91A721FE037} + {B23D3D1A-9438-4EDA-BEB6-9A0A03D17792} = {B23D3D1A-9438-4EDA-BEB6-9A0A03D17792} + EndProjectSection +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "parse_address_test", "vcxproj\test\parse_address_test\parse_address_test.vcxproj", "{EDEA8257-AEA8-1B0A-F95B-8D6CD7286463}" ProjectSection(myProperties) = preProject lib = "False" @@ -3750,6 +3761,22 @@ Global {A66AC548-E2B9-74CD-293C-43526EE51DCE}.Release-DLL|Win32.Build.0 = Release|Win32 {A66AC548-E2B9-74CD-293C-43526EE51DCE}.Release-DLL|x64.ActiveCfg = Release|x64 {A66AC548-E2B9-74CD-293C-43526EE51DCE}.Release-DLL|x64.Build.0 = Release|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug|Win32.ActiveCfg = Debug|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug|x64.ActiveCfg = Debug|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release|Win32.ActiveCfg = Release|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release|x64.ActiveCfg = Release|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug|Win32.Build.0 = Debug|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug|x64.Build.0 = Debug|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release|Win32.Build.0 = Release|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release|x64.Build.0 = Release|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug-DLL|Win32.ActiveCfg = Debug|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug-DLL|Win32.Build.0 = Debug|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug-DLL|x64.ActiveCfg = Debug|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Debug-DLL|x64.Build.0 = Debug|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release-DLL|Win32.ActiveCfg = Release|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release-DLL|Win32.Build.0 = Release|Win32 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release-DLL|x64.ActiveCfg = Release|x64 + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A}.Release-DLL|x64.Build.0 = Release|x64 {EDEA8257-AEA8-1B0A-F95B-8D6CD7286463}.Debug|Win32.ActiveCfg = Debug|Win32 {EDEA8257-AEA8-1B0A-F95B-8D6CD7286463}.Debug|x64.ActiveCfg = Debug|x64 {EDEA8257-AEA8-1B0A-F95B-8D6CD7286463}.Release|Win32.ActiveCfg = Release|Win32 diff --git a/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj b/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj new file mode 100644 index 00000000000..2b373e8a16d --- /dev/null +++ b/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj @@ -0,0 +1,199 @@ + + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {4E856E4A-7497-1B1A-1AED-D4C01E5D873A} + true + $(SolutionDir)IntDir\$(MSBuildProjectName)\ + + + + v100 + + + v110 + + + v120 + + + v140 + + + Application + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + num_external_connectivity_watchers_test + static + Debug + static + Debug + + + num_external_connectivity_watchers_test + static + Release + static + Release + + + + NotUsing + Level3 + Disabled + WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + MultiThreadedDebug + true + None + false + + + Console + true + false + + + + + + NotUsing + Level3 + Disabled + WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + MultiThreadedDebug + true + None + false + + + Console + true + false + + + + + + NotUsing + Level3 + MaxSpeed + WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + true + true + MultiThreaded + true + None + false + + + Console + true + false + true + true + + + + + + NotUsing + Level3 + MaxSpeed + WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + true + true + MultiThreaded + true + None + false + + + Console + true + false + true + true + + + + + + + + + + {17BCAFC0-5FDC-4C94-AEB9-95F3E220614B} + + + {29D16885-7228-4C31-81ED-5F9187C7F2A9} + + + {EAB0A629-17A9-44DB-B5FF-E91A721FE037} + + + {B23D3D1A-9438-4EDA-BEB6-9A0A03D17792} + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + + diff --git a/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj.filters b/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj.filters new file mode 100644 index 00000000000..92a4198e307 --- /dev/null +++ b/vsprojects/vcxproj/test/num_external_connectivity_watchers_test/num_external_connectivity_watchers_test.vcxproj.filters @@ -0,0 +1,21 @@ + + + + + test\core\surface + + + + + + {9557f01e-947a-775e-4540-bf9a1fd9b19a} + + + {2b3a6de2-5820-e21f-5b39-66012c94bfbb} + + + {e3f23659-fc16-a4cc-a9e2-c73b625c38f5} + + + +