From 3f19333ced55edaf58f6191b2d7179d1efb233a2 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 31 Mar 2021 15:39:14 -0700 Subject: [PATCH] Move retry code into its own filter in the DynamicFilter stack (#25820) * rename ChannelData to ClientChannel * make ClientChannel class definition public * move retry code to its own filter * move LB call factory method to ClientChannel class * move dynamic termination filter out of ClientChannel class * update comments * remove retry parsing from client channel service config parser * fix clang-tidy * fix service_config_test * clang-format --- BUILD | 4 + BUILD.gn | 4 + CMakeLists.txt | 4 + Makefile | 4 + build_autogenerated.yaml | 8 + config.m4 | 2 + config.w32 | 2 + gRPC-C++.podspec | 4 + gRPC-Core.podspec | 6 + grpc.gemspec | 4 + grpc.gyp | 4 + package.xml | 4 + .../client_channel/channel_connectivity.cc | 80 +- .../filters/client_channel/client_channel.cc | 3383 +++-------------- .../filters/client_channel/client_channel.h | 530 ++- .../client_channel/client_channel_plugin.cc | 5 +- .../client_channel/lb_policy/grpclb/grpclb.cc | 16 +- .../client_channel/resolver_result_parsing.cc | 251 +- .../client_channel/resolver_result_parsing.h | 65 +- .../filters/client_channel/retry_filter.cc | 2164 +++++++++++ .../ext/filters/client_channel/retry_filter.h | 30 + .../client_channel/retry_service_config.cc | 285 ++ .../client_channel/retry_service_config.h | 90 + src/core/ext/xds/xds_client.cc | 16 +- src/python/grpcio/grpc_core_dependencies.py | 2 + .../client_channel/service_config_test.cc | 288 +- test/cpp/microbenchmarks/bm_call_create.cc | 3 +- tools/doxygen/Doxyfile.c++.internal | 4 + tools/doxygen/Doxyfile.core.internal | 4 + 29 files changed, 3803 insertions(+), 3463 deletions(-) create mode 100644 src/core/ext/filters/client_channel/retry_filter.cc create mode 100644 src/core/ext/filters/client_channel/retry_filter.h create mode 100644 src/core/ext/filters/client_channel/retry_service_config.cc create mode 100644 src/core/ext/filters/client_channel/retry_service_config.h diff --git a/BUILD b/BUILD index 4db31563827..4cd06c0e5f2 100644 --- a/BUILD +++ b/BUILD @@ -1135,6 +1135,8 @@ grpc_cc_library( "src/core/ext/filters/client_channel/resolver.cc", "src/core/ext/filters/client_channel/resolver_registry.cc", "src/core/ext/filters/client_channel/resolver_result_parsing.cc", + "src/core/ext/filters/client_channel/retry_filter.cc", + "src/core/ext/filters/client_channel/retry_service_config.cc", "src/core/ext/filters/client_channel/retry_throttle.cc", "src/core/ext/filters/client_channel/server_address.cc", "src/core/ext/filters/client_channel/service_config.cc", @@ -1167,6 +1169,8 @@ grpc_cc_library( "src/core/ext/filters/client_channel/resolver_factory.h", "src/core/ext/filters/client_channel/resolver_registry.h", "src/core/ext/filters/client_channel/resolver_result_parsing.h", + "src/core/ext/filters/client_channel/retry_filter.h", + "src/core/ext/filters/client_channel/retry_service_config.h", "src/core/ext/filters/client_channel/retry_throttle.h", "src/core/ext/filters/client_channel/server_address.h", "src/core/ext/filters/client_channel/service_config.h", diff --git a/BUILD.gn b/BUILD.gn index 1d8f3404a47..1f8089e009c 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -300,6 +300,10 @@ config("grpc_config") { "src/core/ext/filters/client_channel/resolver_registry.h", "src/core/ext/filters/client_channel/resolver_result_parsing.cc", "src/core/ext/filters/client_channel/resolver_result_parsing.h", + "src/core/ext/filters/client_channel/retry_filter.cc", + "src/core/ext/filters/client_channel/retry_filter.h", + "src/core/ext/filters/client_channel/retry_service_config.cc", + "src/core/ext/filters/client_channel/retry_service_config.h", "src/core/ext/filters/client_channel/retry_throttle.cc", "src/core/ext/filters/client_channel/retry_throttle.h", "src/core/ext/filters/client_channel/server_address.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index b2c4e8153f0..b48dc0fbc07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1518,6 +1518,8 @@ add_library(grpc src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc src/core/ext/filters/client_channel/resolver_registry.cc src/core/ext/filters/client_channel/resolver_result_parsing.cc + src/core/ext/filters/client_channel/retry_filter.cc + src/core/ext/filters/client_channel/retry_service_config.cc src/core/ext/filters/client_channel/retry_throttle.cc src/core/ext/filters/client_channel/server_address.cc src/core/ext/filters/client_channel/service_config.cc @@ -2331,6 +2333,8 @@ add_library(grpc_unsecure src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc src/core/ext/filters/client_channel/resolver_registry.cc src/core/ext/filters/client_channel/resolver_result_parsing.cc + src/core/ext/filters/client_channel/retry_filter.cc + src/core/ext/filters/client_channel/retry_service_config.cc src/core/ext/filters/client_channel/retry_throttle.cc src/core/ext/filters/client_channel/server_address.cc src/core/ext/filters/client_channel/service_config.cc diff --git a/Makefile b/Makefile index a3404edeb2e..759d958301f 100644 --- a/Makefile +++ b/Makefile @@ -1086,6 +1086,8 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc \ src/core/ext/filters/client_channel/resolver_registry.cc \ src/core/ext/filters/client_channel/resolver_result_parsing.cc \ + src/core/ext/filters/client_channel/retry_filter.cc \ + src/core/ext/filters/client_channel/retry_service_config.cc \ src/core/ext/filters/client_channel/retry_throttle.cc \ src/core/ext/filters/client_channel/server_address.cc \ src/core/ext/filters/client_channel/service_config.cc \ @@ -1747,6 +1749,8 @@ LIBGRPC_UNSECURE_SRC = \ src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc \ src/core/ext/filters/client_channel/resolver_registry.cc \ src/core/ext/filters/client_channel/resolver_result_parsing.cc \ + src/core/ext/filters/client_channel/retry_filter.cc \ + src/core/ext/filters/client_channel/retry_service_config.cc \ src/core/ext/filters/client_channel/retry_throttle.cc \ src/core/ext/filters/client_channel/server_address.cc \ src/core/ext/filters/client_channel/service_config.cc \ diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index c0829c11f11..b5d69ca47ca 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -412,6 +412,8 @@ libs: - src/core/ext/filters/client_channel/resolver_factory.h - src/core/ext/filters/client_channel/resolver_registry.h - src/core/ext/filters/client_channel/resolver_result_parsing.h + - src/core/ext/filters/client_channel/retry_filter.h + - src/core/ext/filters/client_channel/retry_service_config.h - src/core/ext/filters/client_channel/retry_throttle.h - src/core/ext/filters/client_channel/server_address.h - src/core/ext/filters/client_channel/service_config.h @@ -928,6 +930,8 @@ libs: - src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc - src/core/ext/filters/client_channel/resolver_registry.cc - src/core/ext/filters/client_channel/resolver_result_parsing.cc + - src/core/ext/filters/client_channel/retry_filter.cc + - src/core/ext/filters/client_channel/retry_service_config.cc - src/core/ext/filters/client_channel/retry_throttle.cc - src/core/ext/filters/client_channel/server_address.cc - src/core/ext/filters/client_channel/service_config.cc @@ -1610,6 +1614,8 @@ libs: - src/core/ext/filters/client_channel/resolver_factory.h - src/core/ext/filters/client_channel/resolver_registry.h - src/core/ext/filters/client_channel/resolver_result_parsing.h + - src/core/ext/filters/client_channel/retry_filter.h + - src/core/ext/filters/client_channel/retry_service_config.h - src/core/ext/filters/client_channel/retry_throttle.h - src/core/ext/filters/client_channel/server_address.h - src/core/ext/filters/client_channel/service_config.h @@ -1863,6 +1869,8 @@ libs: - src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc - src/core/ext/filters/client_channel/resolver_registry.cc - src/core/ext/filters/client_channel/resolver_result_parsing.cc + - src/core/ext/filters/client_channel/retry_filter.cc + - src/core/ext/filters/client_channel/retry_service_config.cc - src/core/ext/filters/client_channel/retry_throttle.cc - src/core/ext/filters/client_channel/server_address.cc - src/core/ext/filters/client_channel/service_config.cc diff --git a/config.m4 b/config.m4 index 36f9596a634..63b6aa51d34 100644 --- a/config.m4 +++ b/config.m4 @@ -92,6 +92,8 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc \ src/core/ext/filters/client_channel/resolver_registry.cc \ src/core/ext/filters/client_channel/resolver_result_parsing.cc \ + src/core/ext/filters/client_channel/retry_filter.cc \ + src/core/ext/filters/client_channel/retry_service_config.cc \ src/core/ext/filters/client_channel/retry_throttle.cc \ src/core/ext/filters/client_channel/server_address.cc \ src/core/ext/filters/client_channel/service_config.cc \ diff --git a/config.w32 b/config.w32 index 1cee2fb8b88..8f3770a63f1 100644 --- a/config.w32 +++ b/config.w32 @@ -58,6 +58,8 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\resolver\\xds\\xds_resolver.cc " + "src\\core\\ext\\filters\\client_channel\\resolver_registry.cc " + "src\\core\\ext\\filters\\client_channel\\resolver_result_parsing.cc " + + "src\\core\\ext\\filters\\client_channel\\retry_filter.cc " + + "src\\core\\ext\\filters\\client_channel\\retry_service_config.cc " + "src\\core\\ext\\filters\\client_channel\\retry_throttle.cc " + "src\\core\\ext\\filters\\client_channel\\server_address.cc " + "src\\core\\ext\\filters\\client_channel\\service_config.cc " + diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 41e52a5d671..f4faed350bd 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -239,6 +239,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/resolver_factory.h', 'src/core/ext/filters/client_channel/resolver_registry.h', 'src/core/ext/filters/client_channel/resolver_result_parsing.h', + 'src/core/ext/filters/client_channel/retry_filter.h', + 'src/core/ext/filters/client_channel/retry_service_config.h', 'src/core/ext/filters/client_channel/retry_throttle.h', 'src/core/ext/filters/client_channel/server_address.h', 'src/core/ext/filters/client_channel/service_config.h', @@ -878,6 +880,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/resolver_factory.h', 'src/core/ext/filters/client_channel/resolver_registry.h', 'src/core/ext/filters/client_channel/resolver_result_parsing.h', + 'src/core/ext/filters/client_channel/retry_filter.h', + 'src/core/ext/filters/client_channel/retry_service_config.h', 'src/core/ext/filters/client_channel/retry_throttle.h', 'src/core/ext/filters/client_channel/server_address.h', 'src/core/ext/filters/client_channel/service_config.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 95d784a61d6..e3e1cfc4509 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -279,6 +279,10 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/resolver_registry.h', 'src/core/ext/filters/client_channel/resolver_result_parsing.cc', 'src/core/ext/filters/client_channel/resolver_result_parsing.h', + 'src/core/ext/filters/client_channel/retry_filter.cc', + 'src/core/ext/filters/client_channel/retry_filter.h', + 'src/core/ext/filters/client_channel/retry_service_config.cc', + 'src/core/ext/filters/client_channel/retry_service_config.h', 'src/core/ext/filters/client_channel/retry_throttle.cc', 'src/core/ext/filters/client_channel/retry_throttle.h', 'src/core/ext/filters/client_channel/server_address.cc', @@ -1436,6 +1440,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/resolver_factory.h', 'src/core/ext/filters/client_channel/resolver_registry.h', 'src/core/ext/filters/client_channel/resolver_result_parsing.h', + 'src/core/ext/filters/client_channel/retry_filter.h', + 'src/core/ext/filters/client_channel/retry_service_config.h', 'src/core/ext/filters/client_channel/retry_throttle.h', 'src/core/ext/filters/client_channel/server_address.h', 'src/core/ext/filters/client_channel/service_config.h', diff --git a/grpc.gemspec b/grpc.gemspec index 47091fc1621..9e3c88ef256 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -195,6 +195,10 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/resolver_registry.h ) s.files += %w( src/core/ext/filters/client_channel/resolver_result_parsing.cc ) s.files += %w( src/core/ext/filters/client_channel/resolver_result_parsing.h ) + s.files += %w( src/core/ext/filters/client_channel/retry_filter.cc ) + s.files += %w( src/core/ext/filters/client_channel/retry_filter.h ) + s.files += %w( src/core/ext/filters/client_channel/retry_service_config.cc ) + s.files += %w( src/core/ext/filters/client_channel/retry_service_config.h ) s.files += %w( src/core/ext/filters/client_channel/retry_throttle.cc ) s.files += %w( src/core/ext/filters/client_channel/retry_throttle.h ) s.files += %w( src/core/ext/filters/client_channel/server_address.cc ) diff --git a/grpc.gyp b/grpc.gyp index 479bf559fa5..5dbe669f626 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -496,6 +496,8 @@ 'src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc', 'src/core/ext/filters/client_channel/resolver_registry.cc', 'src/core/ext/filters/client_channel/resolver_result_parsing.cc', + 'src/core/ext/filters/client_channel/retry_filter.cc', + 'src/core/ext/filters/client_channel/retry_service_config.cc', 'src/core/ext/filters/client_channel/retry_throttle.cc', 'src/core/ext/filters/client_channel/server_address.cc', 'src/core/ext/filters/client_channel/service_config.cc', @@ -1135,6 +1137,8 @@ 'src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc', 'src/core/ext/filters/client_channel/resolver_registry.cc', 'src/core/ext/filters/client_channel/resolver_result_parsing.cc', + 'src/core/ext/filters/client_channel/retry_filter.cc', + 'src/core/ext/filters/client_channel/retry_service_config.cc', 'src/core/ext/filters/client_channel/retry_throttle.cc', 'src/core/ext/filters/client_channel/server_address.cc', 'src/core/ext/filters/client_channel/service_config.cc', diff --git a/package.xml b/package.xml index df7be0db2d0..7bd61a7447b 100644 --- a/package.xml +++ b/package.xml @@ -175,6 +175,10 @@ + + + + diff --git a/src/core/ext/filters/client_channel/channel_connectivity.cc b/src/core/ext/filters/client_channel/channel_connectivity.cc index 4ebb976efe5..838ede6203e 100644 --- a/src/core/ext/filters/client_channel/channel_connectivity.cc +++ b/src/core/ext/filters/client_channel/channel_connectivity.cc @@ -32,27 +32,21 @@ grpc_connectivity_state grpc_channel_check_connectivity_state( grpc_channel* channel, int try_to_connect) { - /* forward through to the underlying client channel */ - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); grpc_core::ApplicationCallbackExecCtx callback_exec_ctx; grpc_core::ExecCtx exec_ctx; - grpc_connectivity_state state; GRPC_API_TRACE( "grpc_channel_check_connectivity_state(channel=%p, try_to_connect=%d)", 2, (channel, try_to_connect)); - if (GPR_LIKELY(client_channel_elem->filter == &grpc_client_channel_filter)) { - state = grpc_client_channel_check_connectivity_state(client_channel_elem, - try_to_connect); - - return state; + // Forward through to the underlying client channel. + grpc_core::ClientChannel* client_channel = + grpc_core::ClientChannel::GetFromChannel(channel); + if (GPR_UNLIKELY(client_channel == nullptr)) { + gpr_log(GPR_ERROR, + "grpc_channel_check_connectivity_state called on something that is " + "not a client channel"); + return GRPC_CHANNEL_SHUTDOWN; } - gpr_log(GPR_ERROR, - "grpc_channel_check_connectivity_state called on something that is " - "not a client channel, but '%s'", - client_channel_elem->filter->name); - - return GRPC_CHANNEL_SHUTDOWN; + return client_channel->CheckConnectivityState(try_to_connect); } typedef enum { @@ -79,13 +73,7 @@ struct state_watcher { } // namespace static void delete_state_watcher(state_watcher* w) { - grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element( - grpc_channel_get_channel_stack(w->channel)); - if (client_channel_elem->filter == &grpc_client_channel_filter) { - GRPC_CHANNEL_INTERNAL_UNREF(w->channel, "watch_channel_connectivity"); - } else { - abort(); - } + GRPC_CHANNEL_INTERNAL_UNREF(w->channel, "watch_channel_connectivity"); gpr_mu_destroy(&w->mu); gpr_free(w); } @@ -120,12 +108,10 @@ static void partly_done(state_watcher* w, bool due_to_completion, if (due_to_completion) { grpc_timer_cancel(&w->alarm); } else { - grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element( - grpc_channel_get_channel_stack(w->channel)); - grpc_client_channel_watch_connectivity_state( - client_channel_elem, - grpc_polling_entity_create_from_pollset(grpc_cq_pollset(w->cq)), - nullptr, &w->on_complete, nullptr); + grpc_core::ClientChannel* client_channel = + grpc_core::ClientChannel::GetFromChannel(w->channel); + GPR_ASSERT(client_channel != nullptr); + client_channel->CancelExternalConnectivityWatcher(&w->on_complete); } gpr_mu_lock(&w->mu); @@ -187,10 +173,15 @@ static void timeout_complete(void* pw, grpc_error* error) { } int grpc_channel_num_external_connectivity_watchers(grpc_channel* channel) { - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); - return grpc_client_channel_num_external_connectivity_watchers( - client_channel_elem); + grpc_core::ClientChannel* client_channel = + grpc_core::ClientChannel::GetFromChannel(channel); + if (client_channel == nullptr) { + gpr_log(GPR_ERROR, + "grpc_channel_num_external_connectivity_watchers called on " + "something that is not a client channel"); + return 0; + } + return client_channel->NumExternalConnectivityWatchers(); } typedef struct watcher_timer_init_arg { @@ -207,20 +198,14 @@ static void watcher_timer_init(void* arg, grpc_error* /*error_ignored*/) { } int grpc_channel_support_connectivity_watcher(grpc_channel* channel) { - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); - return client_channel_elem->filter != &grpc_client_channel_filter ? 0 : 1; + return grpc_core::ClientChannel::GetFromChannel(channel) != nullptr; } void grpc_channel_watch_connectivity_state( grpc_channel* channel, grpc_connectivity_state last_observed_state, gpr_timespec deadline, grpc_completion_queue* cq, void* tag) { - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); grpc_core::ApplicationCallbackExecCtx callback_exec_ctx; grpc_core::ExecCtx exec_ctx; - state_watcher* w = static_cast(gpr_malloc(sizeof(*w))); - GRPC_API_TRACE( "grpc_channel_watch_connectivity_state(" "channel=%p, last_observed_state=%d, " @@ -233,6 +218,7 @@ void grpc_channel_watch_connectivity_state( GPR_ASSERT(grpc_cq_begin_op(cq, tag)); + state_watcher* w = static_cast(gpr_malloc(sizeof(*w))); gpr_mu_init(&w->mu); GRPC_CLOSURE_INIT(&w->on_complete, watch_complete, w, grpc_schedule_on_exec_ctx); @@ -252,13 +238,11 @@ void grpc_channel_watch_connectivity_state( GRPC_CLOSURE_INIT(&w->watcher_timer_init, watcher_timer_init, wa, grpc_schedule_on_exec_ctx); - if (client_channel_elem->filter == &grpc_client_channel_filter) { - GRPC_CHANNEL_INTERNAL_REF(channel, "watch_channel_connectivity"); - grpc_client_channel_watch_connectivity_state( - client_channel_elem, - grpc_polling_entity_create_from_pollset(grpc_cq_pollset(cq)), &w->state, - &w->on_complete, &w->watcher_timer_init); - } else { - abort(); - } + GRPC_CHANNEL_INTERNAL_REF(channel, "watch_channel_connectivity"); + grpc_core::ClientChannel* client_channel = + grpc_core::ClientChannel::GetFromChannel(channel); + GPR_ASSERT(client_channel != nullptr); + client_channel->AddExternalConnectivityWatcher( + grpc_polling_entity_create_from_pollset(grpc_cq_pollset(cq)), &w->state, + &w->on_complete, &w->watcher_timer_init); } diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index c86ff900577..5e8a9d1775b 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -51,7 +51,7 @@ #include "src/core/ext/filters/client_channel/proxy_mapper_registry.h" #include "src/core/ext/filters/client_channel/resolver_registry.h" #include "src/core/ext/filters/client_channel/resolver_result_parsing.h" -#include "src/core/ext/filters/client_channel/retry_throttle.h" +#include "src/core/ext/filters/client_channel/retry_filter.h" #include "src/core/ext/filters/client_channel/service_config.h" #include "src/core/ext/filters/client_channel/service_config_call_data.h" #include "src/core/ext/filters/client_channel/subchannel.h" @@ -61,7 +61,6 @@ #include "src/core/lib/channel/connected_channel.h" #include "src/core/lib/channel/status_util.h" #include "src/core/lib/gpr/string.h" -#include "src/core/lib/gprpp/manual_constructor.h" #include "src/core/lib/gprpp/sync.h" #include "src/core/lib/iomgr/iomgr.h" #include "src/core/lib/iomgr/polling_entity.h" @@ -89,309 +88,20 @@ // any even moderately compelling reason to do so. #define RETRY_BACKOFF_JITTER 0.2 -// Max number of batches that can be pending on a call at any given -// time. This includes one batch for each of the following ops: -// recv_initial_metadata -// send_initial_metadata -// recv_message -// send_message -// recv_trailing_metadata -// send_trailing_metadata -#define MAX_PENDING_BATCHES 6 - -// Channel arg containing a pointer to the ChannelData object. -#define GRPC_ARG_CLIENT_CHANNEL_DATA "grpc.internal.client_channel_data" - -// Channel arg containing a pointer to the RetryThrottleData object. -#define GRPC_ARG_RETRY_THROTTLE_DATA "grpc.internal.retry_throttle_data" - namespace grpc_core { using internal::ClientChannelGlobalParsedConfig; using internal::ClientChannelMethodParsedConfig; using internal::ClientChannelServiceConfigParser; -using internal::ServerRetryThrottleData; TraceFlag grpc_client_channel_call_trace(false, "client_channel_call"); TraceFlag grpc_client_channel_routing_trace(false, "client_channel_routing"); -namespace { - -// -// ChannelData definition -// - -class ChannelData { - public: - class CallData; - class RetryingCall; - class LoadBalancedCall; - - static grpc_error* Init(grpc_channel_element* elem, - grpc_channel_element_args* args); - static void Destroy(grpc_channel_element* elem); - static void StartTransportOp(grpc_channel_element* elem, - grpc_transport_op* op); - static void GetChannelInfo(grpc_channel_element* elem, - const grpc_channel_info* info); - - grpc_connectivity_state CheckConnectivityState(bool try_to_connect); - - void AddExternalConnectivityWatcher(grpc_polling_entity pollent, - grpc_connectivity_state* state, - grpc_closure* on_complete, - grpc_closure* watcher_timer_init) { - new ExternalConnectivityWatcher(this, pollent, state, on_complete, - watcher_timer_init); - } - - void RemoveExternalConnectivityWatcher(grpc_closure* on_complete, - bool cancel) { - ExternalConnectivityWatcher::RemoveWatcherFromExternalWatchersMap( - this, on_complete, cancel); - } - - int NumExternalConnectivityWatchers() const { - MutexLock lock(&external_watchers_mu_); - return static_cast(external_watchers_.size()); - } - - void AddConnectivityWatcher( - grpc_connectivity_state initial_state, - OrphanablePtr watcher); - void RemoveConnectivityWatcher( - AsyncConnectivityStateWatcherInterface* watcher); - - private: - class DynamicTerminationFilterChannelData; - class SubchannelWrapper; - class ClientChannelControlHelper; - class ConnectivityWatcherAdder; - class ConnectivityWatcherRemover; - - // Represents a pending connectivity callback from an external caller - // via grpc_client_channel_watch_connectivity_state(). - class ExternalConnectivityWatcher : public ConnectivityStateWatcherInterface { - public: - ExternalConnectivityWatcher(ChannelData* chand, grpc_polling_entity pollent, - grpc_connectivity_state* state, - grpc_closure* on_complete, - grpc_closure* watcher_timer_init); - - ~ExternalConnectivityWatcher() override; - - // Removes the watcher from the external_watchers_ map. - static void RemoveWatcherFromExternalWatchersMap(ChannelData* chand, - grpc_closure* on_complete, - bool cancel); - - void Notify(grpc_connectivity_state state, - const absl::Status& /* status */) override; - - void Cancel(); - - private: - // Adds the watcher to state_tracker_. Consumes the ref that is passed to it - // from Start(). - void AddWatcherLocked(); - void RemoveWatcherLocked(); - - ChannelData* chand_; - grpc_polling_entity pollent_; - grpc_connectivity_state initial_state_; - grpc_connectivity_state* state_; - grpc_closure* on_complete_; - grpc_closure* watcher_timer_init_; - Atomic done_{false}; - }; - - class ResolverResultHandler : public Resolver::ResultHandler { - public: - explicit ResolverResultHandler(ChannelData* chand) : chand_(chand) { - GRPC_CHANNEL_STACK_REF(chand_->owning_stack_, "ResolverResultHandler"); - } - - ~ResolverResultHandler() override { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { - gpr_log(GPR_INFO, "chand=%p: resolver shutdown complete", chand_); - } - GRPC_CHANNEL_STACK_UNREF(chand_->owning_stack_, "ResolverResultHandler"); - } - - void ReturnResult(Resolver::Result result) override { - chand_->OnResolverResultChangedLocked(std::move(result)); - } - - void ReturnError(grpc_error* error) override { - chand_->OnResolverErrorLocked(error); - } - - private: - ChannelData* chand_; - }; - - struct ResolverQueuedCall { - grpc_call_element* elem; - ResolverQueuedCall* next = nullptr; - }; - struct LbQueuedCall { - LoadBalancedCall* lb_call; - LbQueuedCall* next = nullptr; - }; - - ChannelData(grpc_channel_element_args* args, grpc_error** error); - ~ChannelData(); - - // Note: Does NOT return a new ref. - grpc_error* disconnect_error() const { - return disconnect_error_.Load(MemoryOrder::ACQUIRE); - } - - // Note: All methods with "Locked" suffix must be invoked from within - // work_serializer_. - - void OnResolverResultChangedLocked(Resolver::Result result); - void OnResolverErrorLocked(grpc_error* error); - - void CreateOrUpdateLbPolicyLocked( - RefCountedPtr lb_policy_config, - Resolver::Result result); - OrphanablePtr CreateLbPolicyLocked( - const grpc_channel_args& args); - - void UpdateStateAndPickerLocked( - grpc_connectivity_state state, const absl::Status& status, - const char* reason, - std::unique_ptr picker); - - void UpdateServiceConfigInControlPlaneLocked( - RefCountedPtr service_config, - RefCountedPtr config_selector, - const internal::ClientChannelGlobalParsedConfig* parsed_service_config, - const char* lb_policy_name); - - void UpdateServiceConfigInDataPlaneLocked(); - - void CreateResolverLocked(); - void DestroyResolverAndLbPolicyLocked(); - - grpc_error* DoPingLocked(grpc_transport_op* op); - - void StartTransportOpLocked(grpc_transport_op* op); - - void TryToConnectLocked(); - - // These methods all require holding resolution_mu_. - void AddResolverQueuedCall(ResolverQueuedCall* call, - grpc_polling_entity* pollent) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(resolution_mu_); - void RemoveResolverQueuedCall(ResolverQueuedCall* to_remove, - grpc_polling_entity* pollent) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(resolution_mu_); - - // These methods all require holding data_plane_mu_. - void AddLbQueuedCall(LbQueuedCall* call, grpc_polling_entity* pollent) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); - void RemoveLbQueuedCall(LbQueuedCall* to_remove, grpc_polling_entity* pollent) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); - RefCountedPtr GetConnectedSubchannelInDataPlane( - SubchannelInterface* subchannel) const - ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); - - // - // Fields set at construction and never modified. - // - const bool deadline_checking_enabled_; - const bool enable_retries_; - const size_t per_rpc_retry_buffer_size_; - grpc_channel_stack* owning_stack_; - ClientChannelFactory* client_channel_factory_; - const grpc_channel_args* channel_args_; - RefCountedPtr default_service_config_; - std::string server_name_; - UniquePtr target_uri_; - channelz::ChannelNode* channelz_node_; - - // - // Fields related to name resolution. Guarded by resolution_mu_. - // - mutable Mutex resolution_mu_; - // Linked list of calls queued waiting for resolver result. - ResolverQueuedCall* resolver_queued_calls_ ABSL_GUARDED_BY(resolution_mu_) = - nullptr; - // Data from service config. - grpc_error* resolver_transient_failure_error_ - ABSL_GUARDED_BY(resolution_mu_) = GRPC_ERROR_NONE; - bool received_service_config_data_ ABSL_GUARDED_BY(resolution_mu_) = false; - RefCountedPtr service_config_ ABSL_GUARDED_BY(resolution_mu_); - RefCountedPtr config_selector_ - ABSL_GUARDED_BY(resolution_mu_); - RefCountedPtr dynamic_filters_ - ABSL_GUARDED_BY(resolution_mu_); - - // - // Fields used in the data plane. Guarded by data_plane_mu_. - // - mutable Mutex data_plane_mu_; - std::unique_ptr picker_ - ABSL_GUARDED_BY(data_plane_mu_); - // Linked list of calls queued waiting for LB pick. - LbQueuedCall* lb_queued_calls_ ABSL_GUARDED_BY(data_plane_mu_) = nullptr; - - // - // Fields used in the control plane. Guarded by work_serializer. - // - std::shared_ptr work_serializer_; - grpc_pollset_set* interested_parties_; - ConnectivityStateTracker state_tracker_; - OrphanablePtr resolver_; - bool previous_resolution_contained_addresses_ = false; - RefCountedPtr saved_service_config_; - RefCountedPtr saved_config_selector_; - absl::optional health_check_service_name_; - OrphanablePtr lb_policy_; - RefCountedPtr subchannel_pool_; - // The number of SubchannelWrapper instances referencing a given Subchannel. - std::map subchannel_refcount_map_; - // The set of SubchannelWrappers that currently exist. - // No need to hold a ref, since the map is updated in the control-plane - // work_serializer when the SubchannelWrappers are created and destroyed. - std::set subchannel_wrappers_; - // Pending ConnectedSubchannel updates for each SubchannelWrapper. - // Updates are queued here in the control plane work_serializer and then - // applied in the data plane mutex when the picker is updated. - std::map, RefCountedPtr> - pending_subchannel_updates_; - int keepalive_time_ = -1; - - // - // Fields accessed from both data plane mutex and control plane - // work_serializer. - // - Atomic disconnect_error_; - - // - // Fields guarded by a mutex, since they need to be accessed - // synchronously via get_channel_info(). - // - Mutex info_mu_; - UniquePtr info_lb_policy_name_ ABSL_GUARDED_BY(info_mu_); - UniquePtr info_service_config_json_ ABSL_GUARDED_BY(info_mu_); - - // - // Fields guarded by a mutex, since they need to be accessed - // synchronously via grpc_channel_num_external_connectivity_watchers(). - // - mutable Mutex external_watchers_mu_; - std::map> - external_watchers_ ABSL_GUARDED_BY(external_watchers_mu_); -}; - // -// ChannelData::CallData definition +// ClientChannel::CallData definition // -class ChannelData::CallData { +class ClientChannel::CallData { public: static grpc_error* Init(grpc_call_element* elem, const grpc_call_element_args* args); @@ -405,12 +115,12 @@ class ChannelData::CallData { // Invoked by channel for queued calls when name resolution is completed. static void CheckResolution(void* arg, grpc_error* error); // Helper function for applying the service config to a call while - // holding ChannelData::resolution_mu_. + // holding ClientChannel::resolution_mu_. // Returns true if the service config has been applied to the call, in which // case the caller must invoke ResolutionDone() or AsyncResolutionDone() // with the returned error. bool CheckResolutionLocked(grpc_call_element* elem, grpc_error** error) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::resolution_mu_); + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::resolution_mu_); // Schedules a callback to continue processing the call once // resolution is complete. The callback will not run until after this // method returns. @@ -419,7 +129,7 @@ class ChannelData::CallData { private: class ResolverQueuedCallCanceller; - CallData(grpc_call_element* elem, const ChannelData& chand, + CallData(grpc_call_element* elem, const ClientChannel& chand, const grpc_call_element_args& args); ~CallData(); @@ -457,18 +167,18 @@ class ChannelData::CallData { // the call should be failed. grpc_error* ApplyServiceConfigToCallLocked( grpc_call_element* elem, grpc_metadata_batch* initial_metadata) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::resolution_mu_); + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::resolution_mu_); // Invoked when the resolver result is applied to the caller, on both // success or failure. static void ResolutionDone(void* arg, grpc_error* error); // Removes the call (if present) from the channel's list of calls queued // for name resolution. void MaybeRemoveCallFromResolverQueuedCallsLocked(grpc_call_element* elem) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::resolution_mu_); + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::resolution_mu_); // Adds the call (if not already present) to the channel's list of // calls queued for name resolution. void MaybeAddCallToResolverQueuedCallsLocked(grpc_call_element* elem) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::resolution_mu_); + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::resolution_mu_); static void RecvInitialMetadataReadyForConfigSelectorCommitCallback( void* arg, grpc_error* error); @@ -497,10 +207,10 @@ class ChannelData::CallData { grpc_closure pick_closure_; - // Accessed while holding ChannelData::resolution_mu_. + // Accessed while holding ClientChannel::resolution_mu_. bool service_config_applied_ = false; bool queued_pending_resolver_result_ = false; - ChannelData::ResolverQueuedCall resolver_queued_call_; + ClientChannel::ResolverQueuedCall resolver_queued_call_; ResolverQueuedCallCanceller* resolver_call_canceller_ = nullptr; std::function on_call_committed_; @@ -523,561 +233,68 @@ class ChannelData::CallData { }; // -// ChannelData::RetryingCall definition -// - -class ChannelData::RetryingCall { - public: - RetryingCall( - ChannelData* chand, const grpc_call_element_args& args, - grpc_polling_entity* pollent, - RefCountedPtr retry_throttle_data, - const ClientChannelMethodParsedConfig::RetryPolicy* retry_policy); - ~RetryingCall(); - - void StartTransportStreamOpBatch(grpc_transport_stream_op_batch* batch); - - RefCountedPtr subchannel_call() const; - - private: - // State used for starting a retryable batch on a subchannel call. - // This provides its own grpc_transport_stream_op_batch and other data - // structures needed to populate the ops in the batch. - // We allocate one struct on the arena for each attempt at starting a - // batch on a given subchannel call. - struct SubchannelCallBatchData { - // Creates a SubchannelCallBatchData object on the call's arena with the - // specified refcount. If set_on_complete is true, the batch's - // on_complete callback will be set to point to on_complete(); - // otherwise, the batch's on_complete callback will be null. - static SubchannelCallBatchData* Create(RetryingCall* call, int refcount, - bool set_on_complete); - - void Unref() { - if (gpr_unref(&refs)) Destroy(); - } - - SubchannelCallBatchData(RetryingCall* call, int refcount, - bool set_on_complete); - // All dtor code must be added in `Destroy()`. This is because we may - // call closures in `SubchannelCallBatchData` after they are unrefed by - // `Unref()`, and msan would complain about accessing this class - // after calling dtor. As a result we cannot call the `dtor` in `Unref()`. - // TODO(soheil): We should try to call the dtor in `Unref()`. - ~SubchannelCallBatchData() { Destroy(); } - void Destroy(); - - gpr_refcount refs; - grpc_call_element* elem; - RetryingCall* call; - RefCountedPtr lb_call; - // The batch to use in the subchannel call. - // Its payload field points to SubchannelCallRetryState::batch_payload. - grpc_transport_stream_op_batch batch; - // For intercepting on_complete. - grpc_closure on_complete; - }; - - // Retry state associated with a subchannel call. - // Stored in the parent_data of the subchannel call object. - struct SubchannelCallRetryState { - explicit SubchannelCallRetryState(grpc_call_context_element* context) - : batch_payload(context), - started_send_initial_metadata(false), - completed_send_initial_metadata(false), - started_send_trailing_metadata(false), - completed_send_trailing_metadata(false), - started_recv_initial_metadata(false), - completed_recv_initial_metadata(false), - started_recv_trailing_metadata(false), - completed_recv_trailing_metadata(false), - retry_dispatched(false) {} - - // SubchannelCallBatchData.batch.payload points to this. - grpc_transport_stream_op_batch_payload batch_payload; - // For send_initial_metadata. - // Note that we need to make a copy of the initial metadata for each - // subchannel call instead of just referring to the copy in call_data, - // because filters in the subchannel stack will probably add entries, - // so we need to start in a pristine state for each attempt of the call. - grpc_linked_mdelem* send_initial_metadata_storage; - grpc_metadata_batch send_initial_metadata; - // For send_message. - // TODO(roth): Restructure this to eliminate use of ManualConstructor. - ManualConstructor send_message; - // For send_trailing_metadata. - grpc_linked_mdelem* send_trailing_metadata_storage; - grpc_metadata_batch send_trailing_metadata; - // For intercepting recv_initial_metadata. - grpc_metadata_batch recv_initial_metadata; - grpc_closure recv_initial_metadata_ready; - bool trailing_metadata_available = false; - // For intercepting recv_message. - grpc_closure recv_message_ready; - OrphanablePtr recv_message; - // For intercepting recv_trailing_metadata. - grpc_metadata_batch recv_trailing_metadata; - grpc_transport_stream_stats collect_stats; - grpc_closure recv_trailing_metadata_ready; - // These fields indicate which ops have been started and completed on - // this subchannel call. - size_t started_send_message_count = 0; - size_t completed_send_message_count = 0; - size_t started_recv_message_count = 0; - size_t completed_recv_message_count = 0; - bool started_send_initial_metadata : 1; - bool completed_send_initial_metadata : 1; - bool started_send_trailing_metadata : 1; - bool completed_send_trailing_metadata : 1; - bool started_recv_initial_metadata : 1; - bool completed_recv_initial_metadata : 1; - bool started_recv_trailing_metadata : 1; - bool completed_recv_trailing_metadata : 1; - // State for callback processing. - SubchannelCallBatchData* recv_initial_metadata_ready_deferred_batch = - nullptr; - grpc_error* recv_initial_metadata_error = GRPC_ERROR_NONE; - SubchannelCallBatchData* recv_message_ready_deferred_batch = nullptr; - grpc_error* recv_message_error = GRPC_ERROR_NONE; - SubchannelCallBatchData* recv_trailing_metadata_internal_batch = nullptr; - // NOTE: Do not move this next to the metadata bitfields above. That would - // save space but will also result in a data race because compiler - // will generate a 2 byte store which overwrites the meta-data - // fields upon setting this field. - bool retry_dispatched : 1; - }; - - // Pending batches stored in call data. - struct PendingBatch { - // The pending batch. If nullptr, this slot is empty. - grpc_transport_stream_op_batch* batch = nullptr; - // Indicates whether payload for send ops has been cached in CallData. - bool send_ops_cached = false; - }; - - // Caches data for send ops so that it can be retried later, if not - // already cached. - void MaybeCacheSendOpsForBatch(PendingBatch* pending); - void FreeCachedSendInitialMetadata(); - // Frees cached send_message at index idx. - void FreeCachedSendMessage(size_t idx); - void FreeCachedSendTrailingMetadata(); - // Frees cached send ops that have already been completed after - // committing the call. - void FreeCachedSendOpDataAfterCommit(SubchannelCallRetryState* retry_state); - // Frees cached send ops that were completed by the completed batch in - // batch_data. Used when batches are completed after the call is committed. - void FreeCachedSendOpDataForCompletedBatch( - SubchannelCallBatchData* batch_data, - SubchannelCallRetryState* retry_state); - - // Returns the index into pending_batches_ to be used for batch. - static size_t GetBatchIndex(grpc_transport_stream_op_batch* batch); - void PendingBatchesAdd(grpc_transport_stream_op_batch* batch); - void PendingBatchClear(PendingBatch* pending); - void MaybeClearPendingBatch(PendingBatch* pending); - static void FailPendingBatchInCallCombiner(void* arg, grpc_error* error); - // A predicate type and some useful implementations for PendingBatchesFail(). - typedef bool (*YieldCallCombinerPredicate)( - const CallCombinerClosureList& closures); - static bool YieldCallCombiner(const CallCombinerClosureList& /*closures*/) { - return true; - } - static bool NoYieldCallCombiner(const CallCombinerClosureList& /*closures*/) { - return false; - } - static bool YieldCallCombinerIfPendingBatchesFound( - const CallCombinerClosureList& closures) { - return closures.size() > 0; - } - // Fails all pending batches. - // If yield_call_combiner_predicate returns true, assumes responsibility for - // yielding the call combiner. - void PendingBatchesFail( - grpc_error* error, - YieldCallCombinerPredicate yield_call_combiner_predicate); - static void ResumePendingBatchInCallCombiner(void* arg, grpc_error* ignored); - // Resumes all pending batches on lb_call_. - void PendingBatchesResume(); - // Returns a pointer to the first pending batch for which predicate(batch) - // returns true, or null if not found. - template - PendingBatch* PendingBatchFind(const char* log_message, Predicate predicate); - - // Commits the call so that no further retry attempts will be performed. - void RetryCommit(SubchannelCallRetryState* retry_state); - // Starts a retry after appropriate back-off. - void DoRetry(SubchannelCallRetryState* retry_state, - grpc_millis server_pushback_ms); - // Returns true if the call is being retried. - bool MaybeRetry(SubchannelCallBatchData* batch_data, grpc_status_code status, - grpc_mdelem* server_pushback_md); - - // Invokes recv_initial_metadata_ready for a subchannel batch. - static void InvokeRecvInitialMetadataCallback(void* arg, grpc_error* error); - // Intercepts recv_initial_metadata_ready callback for retries. - // Commits the call and returns the initial metadata up the stack. - static void RecvInitialMetadataReady(void* arg, grpc_error* error); - - // Invokes recv_message_ready for a subchannel batch. - static void InvokeRecvMessageCallback(void* arg, grpc_error* error); - // Intercepts recv_message_ready callback for retries. - // Commits the call and returns the message up the stack. - static void RecvMessageReady(void* arg, grpc_error* error); - - // Sets *status and *server_pushback_md based on md_batch and error. - // Only sets *server_pushback_md if server_pushback_md != nullptr. - void GetCallStatus(grpc_metadata_batch* md_batch, grpc_error* error, - grpc_status_code* status, - grpc_mdelem** server_pushback_md); - // Adds recv_trailing_metadata_ready closure to closures. - void AddClosureForRecvTrailingMetadataReady( - SubchannelCallBatchData* batch_data, grpc_error* error, - CallCombinerClosureList* closures); - // Adds any necessary closures for deferred recv_initial_metadata and - // recv_message callbacks to closures. - static void AddClosuresForDeferredRecvCallbacks( - SubchannelCallBatchData* batch_data, - SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); - // Returns true if any op in the batch was not yet started. - // Only looks at send ops, since recv ops are always started immediately. - bool PendingBatchIsUnstarted(PendingBatch* pending, - SubchannelCallRetryState* retry_state); - // For any pending batch containing an op that has not yet been started, - // adds the pending batch's completion closures to closures. - void AddClosuresToFailUnstartedPendingBatches( - SubchannelCallRetryState* retry_state, grpc_error* error, - CallCombinerClosureList* closures); - // Runs necessary closures upon completion of a call attempt. - void RunClosuresForCompletedCall(SubchannelCallBatchData* batch_data, - grpc_error* error); - // Intercepts recv_trailing_metadata_ready callback for retries. - // Commits the call and returns the trailing metadata up the stack. - static void RecvTrailingMetadataReady(void* arg, grpc_error* error); - - // Adds the on_complete closure for the pending batch completed in - // batch_data to closures. - void AddClosuresForCompletedPendingBatch(SubchannelCallBatchData* batch_data, - grpc_error* error, - CallCombinerClosureList* closures); - - // If there are any cached ops to replay or pending ops to start on the - // subchannel call, adds a closure to closures to invoke - // StartRetriableSubchannelBatches(). - void AddClosuresForReplayOrPendingSendOps( - SubchannelCallBatchData* batch_data, - SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); - - // Callback used to intercept on_complete from subchannel calls. - // Called only when retries are enabled. - static void OnComplete(void* arg, grpc_error* error); - - static void StartBatchInCallCombiner(void* arg, grpc_error* ignored); - // Adds a closure to closures that will execute batch in the call combiner. - void AddClosureForSubchannelBatch(grpc_transport_stream_op_batch* batch, - CallCombinerClosureList* closures); - // Adds retriable send_initial_metadata op to batch_data. - void AddRetriableSendInitialMetadataOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Adds retriable send_message op to batch_data. - void AddRetriableSendMessageOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Adds retriable send_trailing_metadata op to batch_data. - void AddRetriableSendTrailingMetadataOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Adds retriable recv_initial_metadata op to batch_data. - void AddRetriableRecvInitialMetadataOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Adds retriable recv_message op to batch_data. - void AddRetriableRecvMessageOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Adds retriable recv_trailing_metadata op to batch_data. - void AddRetriableRecvTrailingMetadataOp(SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data); - // Helper function used to start a recv_trailing_metadata batch. This - // is used in the case where a recv_initial_metadata or recv_message - // op fails in a way that we know the call is over but when the application - // has not yet started its own recv_trailing_metadata op. - void StartInternalRecvTrailingMetadata(); - // If there are any cached send ops that need to be replayed on the - // current subchannel call, creates and returns a new subchannel batch - // to replay those ops. Otherwise, returns nullptr. - SubchannelCallBatchData* MaybeCreateSubchannelBatchForReplay( - SubchannelCallRetryState* retry_state); - // Adds subchannel batches for pending batches to closures. - void AddSubchannelBatchesForPendingBatches( - SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); - // Constructs and starts whatever subchannel batches are needed on the - // subchannel call. - static void StartRetriableSubchannelBatches(void* arg, grpc_error* ignored); - - static void CreateLbCall(void* arg, grpc_error* error); - - ChannelData* chand_; - grpc_polling_entity* pollent_; - RefCountedPtr retry_throttle_data_; - const ClientChannelMethodParsedConfig::RetryPolicy* retry_policy_ = nullptr; - BackOff retry_backoff_; - - grpc_slice path_; // Request path. - gpr_cycle_counter call_start_time_; - grpc_millis deadline_; - Arena* arena_; - grpc_call_stack* owning_call_; - CallCombiner* call_combiner_; - grpc_call_context_element* call_context_; - - grpc_closure retry_closure_; - - RefCountedPtr lb_call_; - - // Batches are added to this list when received from above. - // They are removed when we are done handling the batch (i.e., when - // either we have invoked all of the batch's callbacks or we have - // passed the batch down to the LB call and are not intercepting any of - // its callbacks). - // TODO(roth): Now that the retry code is split out into its own call - // object, revamp this to work in a cleaner way, since we no longer need - // for batches to ever wait for name resolution or LB picks. - PendingBatch pending_batches_[MAX_PENDING_BATCHES]; - bool pending_send_initial_metadata_ : 1; - bool pending_send_message_ : 1; - bool pending_send_trailing_metadata_ : 1; - - // Set when we get a cancel_stream op. - grpc_error* cancel_error_ = GRPC_ERROR_NONE; - - // Retry state. - bool enable_retries_ : 1; - bool retry_committed_ : 1; - bool last_attempt_got_server_pushback_ : 1; - int num_attempts_completed_ = 0; - size_t bytes_buffered_for_retry_ = 0; - grpc_timer retry_timer_; - - // The number of pending retriable subchannel batches containing send ops. - // We hold a ref to the call stack while this is non-zero, since replay - // batches may not complete until after all callbacks have been returned - // to the surface, and we need to make sure that the call is not destroyed - // until all of these batches have completed. - // Note that we actually only need to track replay batches, but it's - // easier to track all batches with send ops. - int num_pending_retriable_subchannel_send_batches_ = 0; - - // Cached data for retrying send ops. - // send_initial_metadata - bool seen_send_initial_metadata_ = false; - grpc_linked_mdelem* send_initial_metadata_storage_ = nullptr; - grpc_metadata_batch send_initial_metadata_; - uint32_t send_initial_metadata_flags_; - gpr_atm* peer_string_; - // send_message - // When we get a send_message op, we replace the original byte stream - // with a CachingByteStream that caches the slices to a local buffer for - // use in retries. - // Note: We inline the cache for the first 3 send_message ops and use - // dynamic allocation after that. This number was essentially picked - // at random; it could be changed in the future to tune performance. - absl::InlinedVector send_messages_; - // send_trailing_metadata - bool seen_send_trailing_metadata_ = false; - grpc_linked_mdelem* send_trailing_metadata_storage_ = nullptr; - grpc_metadata_batch send_trailing_metadata_; -}; - -// -// ChannelData::LoadBalancedCall definition +// Filter vtable // -// This object is ref-counted, but it cannot inherit from RefCounted<>, -// because it is allocated on the arena and can't free its memory when -// its refcount goes to zero. So instead, it manually implements the -// same API as RefCounted<>, so that it can be used with RefCountedPtr<>. -class ChannelData::LoadBalancedCall { - public: - static RefCountedPtr Create( - ChannelData* chand, const grpc_call_element_args& args, - grpc_polling_entity* pollent, size_t parent_data_size); - - LoadBalancedCall(ChannelData* chand, const grpc_call_element_args& args, - grpc_polling_entity* pollent); - ~LoadBalancedCall(); - - // Interface of RefCounted<>. - RefCountedPtr Ref() GRPC_MUST_USE_RESULT; - RefCountedPtr Ref(const DebugLocation& location, - const char* reason) GRPC_MUST_USE_RESULT; - // When refcount drops to 0, destroys itself and the associated call stack, - // but does NOT free the memory because it's in the call arena. - void Unref(); - void Unref(const DebugLocation& location, const char* reason); - - void* GetParentData(); - - void StartTransportStreamOpBatch(grpc_transport_stream_op_batch* batch); - - // Invoked by channel for queued LB picks when the picker is updated. - static void PickSubchannel(void* arg, grpc_error* error); - // Helper function for performing an LB pick while holding the data plane - // mutex. Returns true if the pick is complete, in which case the caller - // must invoke PickDone() or AsyncPickDone() with the returned error. - bool PickSubchannelLocked(grpc_error** error) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::data_plane_mu_); - // Schedules a callback to process the completed pick. The callback - // will not run until after this method returns. - void AsyncPickDone(grpc_error* error); - - RefCountedPtr subchannel_call() const { - return subchannel_call_; - } - - private: - // Allow RefCountedPtr<> to access IncrementRefCount(). - template - friend class ::grpc_core::RefCountedPtr; - - class LbQueuedCallCanceller; - class Metadata; - class LbCallState; - - // Interface of RefCounted<>. - void IncrementRefCount(); - void IncrementRefCount(const DebugLocation& location, const char* reason); - - // Returns the index into pending_batches_ to be used for batch. - static size_t GetBatchIndex(grpc_transport_stream_op_batch* batch); - void PendingBatchesAdd(grpc_transport_stream_op_batch* batch); - static void FailPendingBatchInCallCombiner(void* arg, grpc_error* error); - // A predicate type and some useful implementations for PendingBatchesFail(). - typedef bool (*YieldCallCombinerPredicate)( - const CallCombinerClosureList& closures); - static bool YieldCallCombiner(const CallCombinerClosureList& /*closures*/) { - return true; - } - static bool NoYieldCallCombiner(const CallCombinerClosureList& /*closures*/) { - return false; - } - static bool YieldCallCombinerIfPendingBatchesFound( - const CallCombinerClosureList& closures) { - return closures.size() > 0; - } - // Fails all pending batches. - // If yield_call_combiner_predicate returns true, assumes responsibility for - // yielding the call combiner. - void PendingBatchesFail( - grpc_error* error, - YieldCallCombinerPredicate yield_call_combiner_predicate); - static void ResumePendingBatchInCallCombiner(void* arg, grpc_error* ignored); - // Resumes all pending batches on subchannel_call_. - void PendingBatchesResume(); - - static void RecvTrailingMetadataReadyForLoadBalancingPolicy( - void* arg, grpc_error* error); - void InjectRecvTrailingMetadataReadyForLoadBalancingPolicy( - grpc_transport_stream_op_batch* batch); - - void CreateSubchannelCall(); - // Invoked when a pick is completed, on both success or failure. - static void PickDone(void* arg, grpc_error* error); - // Removes the call from the channel's list of queued picks if present. - void MaybeRemoveCallFromLbQueuedCallsLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::data_plane_mu_); - // Adds the call to the channel's list of queued picks if not already present. - void MaybeAddCallToLbQueuedCallsLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ChannelData::data_plane_mu_); - - RefCount refs_; - - ChannelData* chand_; - - // TODO(roth): Instead of duplicating these fields in every filter - // that uses any one of them, we should store them in the call - // context. This will save per-call memory overhead. - grpc_slice path_; // Request path. - gpr_cycle_counter call_start_time_; - grpc_millis deadline_; - Arena* arena_; - grpc_call_stack* owning_call_; - CallCombiner* call_combiner_; - grpc_call_context_element* call_context_; - - // Set when we get a cancel_stream op. - grpc_error* cancel_error_ = GRPC_ERROR_NONE; - - grpc_polling_entity* pollent_ = nullptr; - - grpc_closure pick_closure_; - - // Accessed while holding ChannelData::data_plane_mu_. - ChannelData::LbQueuedCall queued_call_; - bool queued_pending_lb_pick_ = false; - const LoadBalancingPolicy::BackendMetricData* backend_metric_data_ = nullptr; - RefCountedPtr connected_subchannel_; - std::function - lb_recv_trailing_metadata_ready_; - LbQueuedCallCanceller* lb_call_canceller_ = nullptr; - - RefCountedPtr subchannel_call_; - - // For intercepting recv_trailing_metadata_ready for the LB policy. - grpc_metadata_batch* recv_trailing_metadata_ = nullptr; - grpc_closure recv_trailing_metadata_ready_; - grpc_closure* original_recv_trailing_metadata_ready_ = nullptr; - - // Batches are added to this list when received from above. - // They are removed when we are done handling the batch (i.e., when - // either we have invoked all of the batch's callbacks or we have - // passed the batch down to the subchannel call and are not - // intercepting any of its callbacks). - grpc_transport_stream_op_batch* pending_batches_[MAX_PENDING_BATCHES] = {}; +const grpc_channel_filter ClientChannel::kFilterVtable = { + ClientChannel::CallData::StartTransportStreamOpBatch, + ClientChannel::StartTransportOp, + sizeof(ClientChannel::CallData), + ClientChannel::CallData::Init, + ClientChannel::CallData::SetPollent, + ClientChannel::CallData::Destroy, + sizeof(ClientChannel), + ClientChannel::Init, + ClientChannel::Destroy, + ClientChannel::GetChannelInfo, + "client-channel", }; // // dynamic termination filter // -// Channel arg pointer vtable for GRPC_ARG_CLIENT_CHANNEL_DATA. -void* ChannelDataArgCopy(void* p) { return p; } -void ChannelDataArgDestroy(void* /*p*/) {} -int ChannelDataArgCmp(void* p, void* q) { return GPR_ICMP(p, q); } -const grpc_arg_pointer_vtable kChannelDataArgPointerVtable = { - ChannelDataArgCopy, ChannelDataArgDestroy, ChannelDataArgCmp}; - -// Channel arg pointer vtable for GRPC_ARG_RETRY_THROTTLE_DATA. -void* RetryThrottleDataArgCopy(void* p) { - auto* retry_throttle_data = static_cast(p); - retry_throttle_data->Ref().release(); +namespace { + +// Channel arg pointer vtable for GRPC_ARG_CLIENT_CHANNEL. +void* ClientChannelArgCopy(void* p) { return p; } +void ClientChannelArgDestroy(void* /*p*/) {} +int ClientChannelArgCmp(void* p, void* q) { return GPR_ICMP(p, q); } +const grpc_arg_pointer_vtable kClientChannelArgPointerVtable = { + ClientChannelArgCopy, ClientChannelArgDestroy, ClientChannelArgCmp}; + +// Channel arg pointer vtable for GRPC_ARG_SERVICE_CONFIG_OBJ. +void* ServiceConfigObjArgCopy(void* p) { + auto* service_config = static_cast(p); + service_config->Ref().release(); return p; } -void RetryThrottleDataArgDestroy(void* p) { - auto* retry_throttle_data = static_cast(p); - retry_throttle_data->Unref(); +void ServiceConfigObjArgDestroy(void* p) { + auto* service_config = static_cast(p); + service_config->Unref(); } -int RetryThrottleDataArgCmp(void* p, void* q) { return GPR_ICMP(p, q); } -const grpc_arg_pointer_vtable kRetryThrottleDataArgPointerVtable = { - RetryThrottleDataArgCopy, RetryThrottleDataArgDestroy, - RetryThrottleDataArgCmp}; +int ServiceConfigObjArgCmp(void* p, void* q) { return GPR_ICMP(p, q); } +const grpc_arg_pointer_vtable kServiceConfigObjArgPointerVtable = { + ServiceConfigObjArgCopy, ServiceConfigObjArgDestroy, + ServiceConfigObjArgCmp}; -class ChannelData::DynamicTerminationFilterChannelData { +class DynamicTerminationFilter { public: - class DynamicTerminationFilterCallData; + class CallData; - static const grpc_channel_filter kDynamicTerminationFilterVtable; + static const grpc_channel_filter kFilterVtable; static grpc_error* Init(grpc_channel_element* elem, grpc_channel_element_args* args) { GPR_ASSERT(args->is_last); - GPR_ASSERT(elem->filter == &kDynamicTerminationFilterVtable); - new (elem->channel_data) - DynamicTerminationFilterChannelData(args->channel_args); + GPR_ASSERT(elem->filter == &kFilterVtable); + new (elem->channel_data) DynamicTerminationFilter(args->channel_args); return GRPC_ERROR_NONE; } static void Destroy(grpc_channel_element* elem) { - auto* chand = - static_cast(elem->channel_data); - chand->~DynamicTerminationFilterChannelData(); + auto* chand = static_cast(elem->channel_data); + chand->~DynamicTerminationFilter(); } // Will never be called. @@ -1087,52 +304,30 @@ class ChannelData::DynamicTerminationFilterChannelData { const grpc_channel_info* /*info*/) {} private: - static RefCountedPtr GetRetryThrottleDataFromArgs( - const grpc_channel_args* args) { - auto* retry_throttle_data = - grpc_channel_args_find_pointer( - args, GRPC_ARG_RETRY_THROTTLE_DATA); - if (retry_throttle_data == nullptr) return nullptr; - return retry_throttle_data->Ref(); - } - - explicit DynamicTerminationFilterChannelData(const grpc_channel_args* args) - : chand_(grpc_channel_args_find_pointer( - args, GRPC_ARG_CLIENT_CHANNEL_DATA)), - retry_throttle_data_(GetRetryThrottleDataFromArgs(args)) {} - - ChannelData* chand_; - RefCountedPtr retry_throttle_data_; + explicit DynamicTerminationFilter(const grpc_channel_args* args) + : chand_(grpc_channel_args_find_pointer( + args, GRPC_ARG_CLIENT_CHANNEL)) {} + + ClientChannel* chand_; }; -class ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData { +class DynamicTerminationFilter::CallData { public: static grpc_error* Init(grpc_call_element* elem, const grpc_call_element_args* args) { - new (elem->call_data) DynamicTerminationFilterCallData(*args); + new (elem->call_data) CallData(*args); return GRPC_ERROR_NONE; } static void Destroy(grpc_call_element* elem, const grpc_call_final_info* /*final_info*/, grpc_closure* then_schedule_closure) { - auto* calld = - static_cast(elem->call_data); - auto* chand = - static_cast(elem->channel_data); + auto* calld = static_cast(elem->call_data); RefCountedPtr subchannel_call; - if (chand->chand_->enable_retries_) { - if (GPR_LIKELY(calld->retrying_call_ != nullptr)) { - subchannel_call = calld->retrying_call_->subchannel_call(); - calld->retrying_call_->~RetryingCall(); - } - } else { - if (GPR_LIKELY(calld->lb_call_ != nullptr)) { - subchannel_call = calld->lb_call_->subchannel_call(); - } + if (GPR_LIKELY(calld->lb_call_ != nullptr)) { + subchannel_call = calld->lb_call_->subchannel_call(); } - calld->~DynamicTerminationFilterCallData(); + calld->~CallData(); if (GPR_LIKELY(subchannel_call != nullptr)) { subchannel_call->SetAfterCallStackDestroy(then_schedule_closure); } else { @@ -1143,60 +338,30 @@ class ChannelData::DynamicTerminationFilterChannelData:: static void StartTransportStreamOpBatch( grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { - auto* calld = - static_cast(elem->call_data); - auto* chand = - static_cast(elem->channel_data); - if (chand->chand_->enable_retries_) { - calld->retrying_call_->StartTransportStreamOpBatch(batch); - } else { - calld->lb_call_->StartTransportStreamOpBatch(batch); - } + auto* calld = static_cast(elem->call_data); + calld->lb_call_->StartTransportStreamOpBatch(batch); } static void SetPollent(grpc_call_element* elem, grpc_polling_entity* pollent) { - auto* calld = - static_cast(elem->call_data); - auto* chand = - static_cast(elem->channel_data); - ChannelData* client_channel = chand->chand_; + auto* calld = static_cast(elem->call_data); + auto* chand = static_cast(elem->channel_data); + ClientChannel* client_channel = chand->chand_; grpc_call_element_args args = { calld->owning_call_, nullptr, calld->call_context_, calld->path_, calld->call_start_time_, calld->deadline_, calld->arena_, calld->call_combiner_}; - if (client_channel->enable_retries_) { - // Get retry settings from service config. - auto* svc_cfg_call_data = static_cast( - calld->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value); - GPR_ASSERT(svc_cfg_call_data != nullptr); - auto* method_config = static_cast( - svc_cfg_call_data->GetMethodParsedConfig( - ClientChannelServiceConfigParser::ParserIndex())); - // Create retrying call. - calld->retrying_call_ = calld->arena_->New( - client_channel, args, pollent, chand->retry_throttle_data_, - method_config == nullptr ? nullptr : method_config->retry_policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { - gpr_log( - GPR_INFO, - "chand=%p dymamic_termination_calld=%p: create retrying_call=%p", - client_channel, calld, calld->retrying_call_); - } - } else { - calld->lb_call_ = ChannelData::LoadBalancedCall::Create(client_channel, - args, pollent, 0); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { - gpr_log(GPR_INFO, - "chand=%p dynamic_termination_calld=%p: create lb_call=%p", - chand, client_channel, calld->lb_call_.get()); - } + calld->lb_call_ = client_channel->CreateLoadBalancedCall(args, pollent, 0); + if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { + gpr_log(GPR_INFO, + "chand=%p dynamic_termination_calld=%p: create lb_call=%p", chand, + client_channel, calld->lb_call_.get()); } } private: - explicit DynamicTerminationFilterCallData(const grpc_call_element_args& args) + explicit CallData(const grpc_call_element_args& args) : path_(grpc_slice_ref_internal(args.path)), call_start_time_(args.start_time), deadline_(args.deadline), @@ -1205,7 +370,7 @@ class ChannelData::DynamicTerminationFilterChannelData:: call_combiner_(args.call_combiner), call_context_(args.context) {} - ~DynamicTerminationFilterCallData() { grpc_slice_unref_internal(path_); } + ~CallData() { grpc_slice_unref_internal(path_); } grpc_slice path_; // Request path. gpr_cycle_counter call_start_time_; @@ -1215,32 +380,56 @@ class ChannelData::DynamicTerminationFilterChannelData:: CallCombiner* call_combiner_; grpc_call_context_element* call_context_; - ChannelData::RetryingCall* retrying_call_ = nullptr; - RefCountedPtr lb_call_; + RefCountedPtr lb_call_; }; -const grpc_channel_filter ChannelData::DynamicTerminationFilterChannelData:: - kDynamicTerminationFilterVtable = { - ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData::StartTransportStreamOpBatch, - ChannelData::DynamicTerminationFilterChannelData::StartTransportOp, - sizeof(ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData), - ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData::Init, - ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData::SetPollent, - ChannelData::DynamicTerminationFilterChannelData:: - DynamicTerminationFilterCallData::Destroy, - sizeof(ChannelData::DynamicTerminationFilterChannelData), - ChannelData::DynamicTerminationFilterChannelData::Init, - ChannelData::DynamicTerminationFilterChannelData::Destroy, - ChannelData::DynamicTerminationFilterChannelData::GetChannelInfo, - "dynamic_filter_termination", +const grpc_channel_filter DynamicTerminationFilter::kFilterVtable = { + DynamicTerminationFilter::CallData::StartTransportStreamOpBatch, + DynamicTerminationFilter::StartTransportOp, + sizeof(DynamicTerminationFilter::CallData), + DynamicTerminationFilter::CallData::Init, + DynamicTerminationFilter::CallData::SetPollent, + DynamicTerminationFilter::CallData::Destroy, + sizeof(DynamicTerminationFilter), + DynamicTerminationFilter::Init, + DynamicTerminationFilter::Destroy, + DynamicTerminationFilter::GetChannelInfo, + "dynamic_filter_termination", +}; + +} // namespace + +// +// ClientChannel::ResolverResultHandler +// + +class ClientChannel::ResolverResultHandler : public Resolver::ResultHandler { + public: + explicit ResolverResultHandler(ClientChannel* chand) : chand_(chand) { + GRPC_CHANNEL_STACK_REF(chand_->owning_stack_, "ResolverResultHandler"); + } + + ~ResolverResultHandler() override { + if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { + gpr_log(GPR_INFO, "chand=%p: resolver shutdown complete", chand_); + } + GRPC_CHANNEL_STACK_UNREF(chand_->owning_stack_, "ResolverResultHandler"); + } + + void ReturnResult(Resolver::Result result) override { + chand_->OnResolverResultChangedLocked(std::move(result)); + } + + void ReturnError(grpc_error* error) override { + chand_->OnResolverErrorLocked(error); + } + + private: + ClientChannel* chand_; }; // -// ChannelData::SubchannelWrapper +// ClientChannel::SubchannelWrapper // // This class is a wrapper for Subchannel that hides details of the @@ -1251,9 +440,9 @@ const grpc_channel_filter ChannelData::DynamicTerminationFilterChannelData:: // underlying subchannel is shared between channels, this wrapper will only // be used within one channel, so it will always be synchronized by the // control plane work_serializer. -class ChannelData::SubchannelWrapper : public SubchannelInterface { +class ClientChannel::SubchannelWrapper : public SubchannelInterface { public: - SubchannelWrapper(ChannelData* chand, RefCountedPtr subchannel, + SubchannelWrapper(ClientChannel* chand, RefCountedPtr subchannel, absl::optional health_check_service_name) : SubchannelInterface( GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace) @@ -1533,7 +722,7 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface { } } - ChannelData* chand_; + ClientChannel* chand_; RefCountedPtr subchannel_; absl::optional health_check_service_name_; // Maps from the address of the watcher passed to us by the LB policy @@ -1549,11 +738,11 @@ class ChannelData::SubchannelWrapper : public SubchannelInterface { }; // -// ChannelData::ExternalConnectivityWatcher +// ClientChannel::ExternalConnectivityWatcher // -ChannelData::ExternalConnectivityWatcher::ExternalConnectivityWatcher( - ChannelData* chand, grpc_polling_entity pollent, +ClientChannel::ExternalConnectivityWatcher::ExternalConnectivityWatcher( + ClientChannel* chand, grpc_polling_entity pollent, grpc_connectivity_state* state, grpc_closure* on_complete, grpc_closure* watcher_timer_init) : chand_(chand), @@ -1582,15 +771,15 @@ ChannelData::ExternalConnectivityWatcher::ExternalConnectivityWatcher( DEBUG_LOCATION); } -ChannelData::ExternalConnectivityWatcher::~ExternalConnectivityWatcher() { +ClientChannel::ExternalConnectivityWatcher::~ExternalConnectivityWatcher() { grpc_polling_entity_del_from_pollset_set(&pollent_, chand_->interested_parties_); GRPC_CHANNEL_STACK_UNREF(chand_->owning_stack_, "ExternalConnectivityWatcher"); } -void ChannelData::ExternalConnectivityWatcher:: - RemoveWatcherFromExternalWatchersMap(ChannelData* chand, +void ClientChannel::ExternalConnectivityWatcher:: + RemoveWatcherFromExternalWatchersMap(ClientChannel* chand, grpc_closure* on_complete, bool cancel) { RefCountedPtr watcher; @@ -1607,7 +796,7 @@ void ChannelData::ExternalConnectivityWatcher:: if (watcher != nullptr && cancel) watcher->Cancel(); } -void ChannelData::ExternalConnectivityWatcher::Notify( +void ClientChannel::ExternalConnectivityWatcher::Notify( grpc_connectivity_state state, const absl::Status& /* status */) { bool done = false; if (!done_.CompareExchangeStrong(&done, true, MemoryOrder::RELAXED, @@ -1615,7 +804,8 @@ void ChannelData::ExternalConnectivityWatcher::Notify( return; // Already done. } // Remove external watcher. - chand_->RemoveExternalConnectivityWatcher(on_complete_, /*cancel=*/false); + ExternalConnectivityWatcher::RemoveWatcherFromExternalWatchersMap( + chand_, on_complete_, /*cancel=*/false); // Report new state to the user. *state_ = state; ExecCtx::Run(DEBUG_LOCATION, on_complete_, GRPC_ERROR_NONE); @@ -1628,7 +818,7 @@ void ChannelData::ExternalConnectivityWatcher::Notify( } } -void ChannelData::ExternalConnectivityWatcher::Cancel() { +void ClientChannel::ExternalConnectivityWatcher::Cancel() { bool done = false; if (!done_.CompareExchangeStrong(&done, true, MemoryOrder::RELAXED, MemoryOrder::RELAXED)) { @@ -1640,25 +830,25 @@ void ChannelData::ExternalConnectivityWatcher::Cancel() { DEBUG_LOCATION); } -void ChannelData::ExternalConnectivityWatcher::AddWatcherLocked() { +void ClientChannel::ExternalConnectivityWatcher::AddWatcherLocked() { Closure::Run(DEBUG_LOCATION, watcher_timer_init_, GRPC_ERROR_NONE); // Add new watcher. Pass the ref of the object from creation to OrphanablePtr. chand_->state_tracker_.AddWatcher( initial_state_, OrphanablePtr(this)); } -void ChannelData::ExternalConnectivityWatcher::RemoveWatcherLocked() { +void ClientChannel::ExternalConnectivityWatcher::RemoveWatcherLocked() { chand_->state_tracker_.RemoveWatcher(this); } // -// ChannelData::ConnectivityWatcherAdder +// ClientChannel::ConnectivityWatcherAdder // -class ChannelData::ConnectivityWatcherAdder { +class ClientChannel::ConnectivityWatcherAdder { public: ConnectivityWatcherAdder( - ChannelData* chand, grpc_connectivity_state initial_state, + ClientChannel* chand, grpc_connectivity_state initial_state, OrphanablePtr watcher) : chand_(chand), initial_state_(initial_state), @@ -1675,18 +865,18 @@ class ChannelData::ConnectivityWatcherAdder { delete this; } - ChannelData* chand_; + ClientChannel* chand_; grpc_connectivity_state initial_state_; OrphanablePtr watcher_; }; // -// ChannelData::ConnectivityWatcherRemover +// ClientChannel::ConnectivityWatcherRemover // -class ChannelData::ConnectivityWatcherRemover { +class ClientChannel::ConnectivityWatcherRemover { public: - ConnectivityWatcherRemover(ChannelData* chand, + ConnectivityWatcherRemover(ClientChannel* chand, AsyncConnectivityStateWatcherInterface* watcher) : chand_(chand), watcher_(watcher) { GRPC_CHANNEL_STACK_REF(chand_->owning_stack_, "ConnectivityWatcherRemover"); @@ -1702,18 +892,18 @@ class ChannelData::ConnectivityWatcherRemover { delete this; } - ChannelData* chand_; + ClientChannel* chand_; AsyncConnectivityStateWatcherInterface* watcher_; }; // -// ChannelData::ClientChannelControlHelper +// ClientChannel::ClientChannelControlHelper // -class ChannelData::ClientChannelControlHelper +class ClientChannel::ClientChannelControlHelper : public LoadBalancingPolicy::ChannelControlHelper { public: - explicit ClientChannelControlHelper(ChannelData* chand) : chand_(chand) { + explicit ClientChannelControlHelper(ClientChannel* chand) : chand_(chand) { GRPC_CHANNEL_STACK_REF(chand_->owning_stack_, "ClientChannelControlHelper"); } @@ -1810,27 +1000,36 @@ class ChannelData::ClientChannelControlHelper return channelz::ChannelTrace::Error; } - ChannelData* chand_; + ClientChannel* chand_; }; // -// ChannelData implementation +// ClientChannel implementation // -grpc_error* ChannelData::Init(grpc_channel_element* elem, - grpc_channel_element_args* args) { +ClientChannel* ClientChannel::GetFromChannel(grpc_channel* channel) { + grpc_channel_element* elem = + grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel)); + if (elem->filter != &kFilterVtable) return nullptr; + return static_cast(elem->channel_data); +} + +grpc_error* ClientChannel::Init(grpc_channel_element* elem, + grpc_channel_element_args* args) { GPR_ASSERT(args->is_last); - GPR_ASSERT(elem->filter == &grpc_client_channel_filter); + GPR_ASSERT(elem->filter == &kFilterVtable); grpc_error* error = GRPC_ERROR_NONE; - new (elem->channel_data) ChannelData(args, &error); + new (elem->channel_data) ClientChannel(args, &error); return error; } -void ChannelData::Destroy(grpc_channel_element* elem) { - ChannelData* chand = static_cast(elem->channel_data); - chand->~ChannelData(); +void ClientChannel::Destroy(grpc_channel_element* elem) { + ClientChannel* chand = static_cast(elem->channel_data); + chand->~ClientChannel(); } +namespace { + bool GetEnableRetries(const grpc_channel_args* args) { return grpc_channel_arg_get_bool( grpc_channel_args_find(args, GRPC_ARG_ENABLE_RETRIES), true); @@ -1861,8 +1060,11 @@ channelz::ChannelNode* GetChannelzNode(const grpc_channel_args* args) { return nullptr; } -ChannelData::ChannelData(grpc_channel_element_args* args, grpc_error** error) - : deadline_checking_enabled_( +} // namespace + +ClientChannel::ClientChannel(grpc_channel_element_args* args, + grpc_error** error) + : deadline_checking_enabled_( grpc_deadline_checking_enabled(args->channel_args)), enable_retries_(GetEnableRetries(args->channel_args)), per_rpc_retry_buffer_size_( @@ -1889,8 +1091,8 @@ ChannelData::ChannelData(grpc_channel_element_args* args, grpc_error** error) return; } // Get server name to resolve, using proxy mapper if needed. - const char* server_uri = grpc_channel_arg_get_string( - grpc_channel_args_find(args->channel_args, GRPC_ARG_SERVER_URI)); + const char* server_uri = + grpc_channel_args_find_string(args->channel_args, GRPC_ARG_SERVER_URI); if (server_uri == nullptr) { *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( "server URI channel arg missing or wrong type in client channel " @@ -1899,8 +1101,8 @@ ChannelData::ChannelData(grpc_channel_element_args* args, grpc_error** error) } // Get default service config. If none is specified via the client API, // we use an empty config. - const char* service_config_json = grpc_channel_arg_get_string( - grpc_channel_args_find(args->channel_args, GRPC_ARG_SERVICE_CONFIG)); + const char* service_config_json = grpc_channel_args_find_string( + args->channel_args, GRPC_ARG_SERVICE_CONFIG); if (service_config_json == nullptr) service_config_json = "{}"; *error = GRPC_ERROR_NONE; default_service_config_ = @@ -1937,7 +1139,7 @@ ChannelData::ChannelData(grpc_channel_element_args* args, grpc_error** error) *error = GRPC_ERROR_NONE; } -ChannelData::~ChannelData() { +ClientChannel::~ClientChannel() { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p: destroying channel", this); } @@ -1950,6 +1152,22 @@ ChannelData::~ChannelData() { GRPC_ERROR_UNREF(disconnect_error_.Load(MemoryOrder::RELAXED)); } +RefCountedPtr +ClientChannel::CreateLoadBalancedCall(const grpc_call_element_args& args, + grpc_polling_entity* pollent, + size_t parent_data_size) { + const size_t alloc_size = + parent_data_size > 0 + ? (GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(LoadBalancedCall)) + + parent_data_size) + : sizeof(LoadBalancedCall); + auto* lb_call = static_cast(args.arena->Alloc(alloc_size)); + new (lb_call) LoadBalancedCall(this, args, pollent); + return lb_call; +} + +namespace { + RefCountedPtr ChooseLbPolicy( const Resolver::Result& resolver_result, const internal::ClientChannelGlobalParsedConfig* parsed_service_config) { @@ -1994,7 +1212,9 @@ RefCountedPtr ChooseLbPolicy( return lb_policy_config; } -void ChannelData::OnResolverResultChangedLocked(Resolver::Result result) { +} // namespace + +void ClientChannel::OnResolverResultChangedLocked(Resolver::Result result) { // Handle race conditions. if (resolver_ == nullptr) return; if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { @@ -2116,7 +1336,7 @@ void ChannelData::OnResolverResultChangedLocked(Resolver::Result result) { GRPC_ERROR_UNREF(service_config_error); } -void ChannelData::OnResolverErrorLocked(grpc_error* error) { +void ClientChannel::OnResolverErrorLocked(grpc_error* error) { if (resolver_ == nullptr) { GRPC_ERROR_UNREF(error); return; @@ -2157,7 +1377,7 @@ void ChannelData::OnResolverErrorLocked(grpc_error* error) { GRPC_ERROR_UNREF(error); } -void ChannelData::CreateOrUpdateLbPolicyLocked( +void ClientChannel::CreateOrUpdateLbPolicyLocked( RefCountedPtr lb_policy_config, Resolver::Result result) { // Construct update. @@ -2183,7 +1403,7 @@ void ChannelData::CreateOrUpdateLbPolicyLocked( } // Creates a new LB policy. -OrphanablePtr ChannelData::CreateLbPolicyLocked( +OrphanablePtr ClientChannel::CreateLbPolicyLocked( const grpc_channel_args& args) { LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.work_serializer = work_serializer_; @@ -2202,8 +1422,8 @@ OrphanablePtr ChannelData::CreateLbPolicyLocked( return lb_policy; } -void ChannelData::AddResolverQueuedCall(ResolverQueuedCall* call, - grpc_polling_entity* pollent) { +void ClientChannel::AddResolverQueuedCall(ResolverQueuedCall* call, + grpc_polling_entity* pollent) { // Add call to queued calls list. call->next = resolver_queued_calls_; resolver_queued_calls_ = call; @@ -2212,8 +1432,8 @@ void ChannelData::AddResolverQueuedCall(ResolverQueuedCall* call, grpc_polling_entity_add_to_pollset_set(pollent, interested_parties_); } -void ChannelData::RemoveResolverQueuedCall(ResolverQueuedCall* to_remove, - grpc_polling_entity* pollent) { +void ClientChannel::RemoveResolverQueuedCall(ResolverQueuedCall* to_remove, + grpc_polling_entity* pollent) { // Remove call's pollent from channel's interested_parties. grpc_polling_entity_del_from_pollset_set(pollent, interested_parties_); // Remove from queued calls list. @@ -2226,7 +1446,7 @@ void ChannelData::RemoveResolverQueuedCall(ResolverQueuedCall* to_remove, } } -void ChannelData::UpdateServiceConfigInControlPlaneLocked( +void ClientChannel::UpdateServiceConfigInControlPlaneLocked( RefCountedPtr service_config, RefCountedPtr config_selector, const internal::ClientChannelGlobalParsedConfig* parsed_service_config, @@ -2266,7 +1486,7 @@ void ChannelData::UpdateServiceConfigInControlPlaneLocked( } } -void ChannelData::UpdateServiceConfigInDataPlaneLocked() { +void ClientChannel::UpdateServiceConfigInDataPlaneLocked() { // Grab ref to service config. RefCountedPtr service_config = saved_service_config_; // Grab ref to config selector. Use default if resolver didn't supply one. @@ -2279,33 +1499,22 @@ void ChannelData::UpdateServiceConfigInDataPlaneLocked() { config_selector = MakeRefCounted(saved_service_config_); } - // Get retry throttle data from service config. - const internal::ClientChannelGlobalParsedConfig* parsed_service_config = - static_cast( - saved_service_config_->GetGlobalParsedConfig( - internal::ClientChannelServiceConfigParser::ParserIndex())); - absl::optional - retry_throttle_config = parsed_service_config->retry_throttling(); - RefCountedPtr retry_throttle_data; - if (retry_throttle_config.has_value()) { - retry_throttle_data = internal::ServerRetryThrottleMap::GetDataForServer( - server_name_, retry_throttle_config.value().max_milli_tokens, - retry_throttle_config.value().milli_token_ratio); - } // Construct dynamic filter stack. std::vector filters = config_selector->GetFilters(); - filters.push_back( - &DynamicTerminationFilterChannelData::kDynamicTerminationFilterVtable); - absl::InlinedVector args_to_add; - args_to_add.push_back(grpc_channel_arg_pointer_create( - const_cast(GRPC_ARG_CLIENT_CHANNEL_DATA), this, - &kChannelDataArgPointerVtable)); - if (retry_throttle_data != nullptr) { - args_to_add.push_back(grpc_channel_arg_pointer_create( - const_cast(GRPC_ARG_RETRY_THROTTLE_DATA), - retry_throttle_data.get(), &kRetryThrottleDataArgPointerVtable)); - } + if (enable_retries_) { + filters.push_back(&kRetryFilterVtable); + } else { + filters.push_back(&DynamicTerminationFilter::kFilterVtable); + } + absl::InlinedVector args_to_add = { + grpc_channel_arg_pointer_create( + const_cast(GRPC_ARG_CLIENT_CHANNEL), this, + &kClientChannelArgPointerVtable), + grpc_channel_arg_pointer_create( + const_cast(GRPC_ARG_SERVICE_CONFIG_OBJ), service_config.get(), + &kServiceConfigObjArgPointerVtable), + }; grpc_channel_args* new_args = grpc_channel_args_copy_and_add( channel_args_, args_to_add.data(), args_to_add.size()); new_args = config_selector->ModifyChannelArgs(new_args); @@ -2343,7 +1552,7 @@ void ChannelData::UpdateServiceConfigInDataPlaneLocked() { // of scope. } -void ChannelData::CreateResolverLocked() { +void ClientChannel::CreateResolverLocked() { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p: starting name resolution", this); } @@ -2362,7 +1571,7 @@ void ChannelData::CreateResolverLocked() { } } -void ChannelData::DestroyResolverAndLbPolicyLocked() { +void ClientChannel::DestroyResolverAndLbPolicyLocked() { if (resolver_ != nullptr) { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p: shutting down resolver=%p", this, @@ -2381,7 +1590,7 @@ void ChannelData::DestroyResolverAndLbPolicyLocked() { } } -void ChannelData::UpdateStateAndPickerLocked( +void ClientChannel::UpdateStateAndPickerLocked( grpc_connectivity_state state, const absl::Status& status, const char* reason, std::unique_ptr picker) { @@ -2454,7 +1663,7 @@ void ChannelData::UpdateStateAndPickerLocked( pending_subchannel_updates_.clear(); } -grpc_error* ChannelData::DoPingLocked(grpc_transport_op* op) { +grpc_error* ClientChannel::DoPingLocked(grpc_transport_op* op) { if (state_tracker_.state() != GRPC_CHANNEL_READY) { return GRPC_ERROR_CREATE_FROM_STATIC_STRING("channel not connected"); } @@ -2480,7 +1689,7 @@ grpc_error* ChannelData::DoPingLocked(grpc_transport_op* op) { return result.error; } -void ChannelData::StartTransportOpLocked(grpc_transport_op* op) { +void ClientChannel::StartTransportOpLocked(grpc_transport_op* op) { // Connectivity watch. if (op->start_connectivity_watch != nullptr) { state_tracker_.AddWatcher(op->start_connectivity_watch_state, @@ -2539,9 +1748,9 @@ void ChannelData::StartTransportOpLocked(grpc_transport_op* op) { ExecCtx::Run(DEBUG_LOCATION, op->on_consumed, GRPC_ERROR_NONE); } -void ChannelData::StartTransportOp(grpc_channel_element* elem, - grpc_transport_op* op) { - ChannelData* chand = static_cast(elem->channel_data); +void ClientChannel::StartTransportOp(grpc_channel_element* elem, + grpc_transport_op* op) { + ClientChannel* chand = static_cast(elem->channel_data); GPR_ASSERT(op->set_accept_stream == false); // Handle bind_pollset. if (op->bind_pollset != nullptr) { @@ -2553,9 +1762,9 @@ void ChannelData::StartTransportOp(grpc_channel_element* elem, [chand, op]() { chand->StartTransportOpLocked(op); }, DEBUG_LOCATION); } -void ChannelData::GetChannelInfo(grpc_channel_element* elem, - const grpc_channel_info* info) { - ChannelData* chand = static_cast(elem->channel_data); +void ClientChannel::GetChannelInfo(grpc_channel_element* elem, + const grpc_channel_info* info) { + ClientChannel* chand = static_cast(elem->channel_data); MutexLock lock(&chand->info_mu_); if (info->lb_policy_name != nullptr) { *info->lb_policy_name = gpr_strdup(chand->info_lb_policy_name_.get()); @@ -2566,8 +1775,8 @@ void ChannelData::GetChannelInfo(grpc_channel_element* elem, } } -void ChannelData::AddLbQueuedCall(LbQueuedCall* call, - grpc_polling_entity* pollent) { +void ClientChannel::AddLbQueuedCall(LbQueuedCall* call, + grpc_polling_entity* pollent) { // Add call to queued picks list. call->next = lb_queued_calls_; lb_queued_calls_ = call; @@ -2576,8 +1785,8 @@ void ChannelData::AddLbQueuedCall(LbQueuedCall* call, grpc_polling_entity_add_to_pollset_set(pollent, interested_parties_); } -void ChannelData::RemoveLbQueuedCall(LbQueuedCall* to_remove, - grpc_polling_entity* pollent) { +void ClientChannel::RemoveLbQueuedCall(LbQueuedCall* to_remove, + grpc_polling_entity* pollent) { // Remove call's pollent from channel's interested_parties. grpc_polling_entity_del_from_pollset_set(pollent, interested_parties_); // Remove from queued picks list. @@ -2591,7 +1800,7 @@ void ChannelData::RemoveLbQueuedCall(LbQueuedCall* to_remove, } RefCountedPtr -ChannelData::GetConnectedSubchannelInDataPlane( +ClientChannel::GetConnectedSubchannelInDataPlane( SubchannelInterface* subchannel) const { SubchannelWrapper* subchannel_wrapper = static_cast(subchannel); @@ -2601,7 +1810,7 @@ ChannelData::GetConnectedSubchannelInDataPlane( return connected_subchannel->Ref(); } -void ChannelData::TryToConnectLocked() { +void ClientChannel::TryToConnectLocked() { if (lb_policy_ != nullptr) { lb_policy_->ExitIdleLocked(); } else if (resolver_ == nullptr) { @@ -2610,7 +1819,7 @@ void ChannelData::TryToConnectLocked() { GRPC_CHANNEL_STACK_UNREF(owning_stack_, "TryToConnect"); } -grpc_connectivity_state ChannelData::CheckConnectivityState( +grpc_connectivity_state ClientChannel::CheckConnectivityState( bool try_to_connect) { grpc_connectivity_state out = state_tracker_.state(); if (out == GRPC_CHANNEL_IDLE && try_to_connect) { @@ -2620,13 +1829,13 @@ grpc_connectivity_state ChannelData::CheckConnectivityState( return out; } -void ChannelData::AddConnectivityWatcher( +void ClientChannel::AddConnectivityWatcher( grpc_connectivity_state initial_state, OrphanablePtr watcher) { new ConnectivityWatcherAdder(this, initial_state, std::move(watcher)); } -void ChannelData::RemoveConnectivityWatcher( +void ClientChannel::RemoveConnectivityWatcher( AsyncConnectivityStateWatcherInterface* watcher) { new ConnectivityWatcherRemover(this, watcher); } @@ -2635,9 +1844,9 @@ void ChannelData::RemoveConnectivityWatcher( // CallData implementation // -ChannelData::CallData::CallData(grpc_call_element* elem, - const ChannelData& chand, - const grpc_call_element_args& args) +ClientChannel::CallData::CallData(grpc_call_element* elem, + const ClientChannel& chand, + const grpc_call_element_args& args) : deadline_state_(elem, args, GPR_LIKELY(chand.deadline_checking_enabled_) ? args.deadline @@ -2654,7 +1863,7 @@ ChannelData::CallData::CallData(grpc_call_element* elem, } } -ChannelData::CallData::~CallData() { +ClientChannel::CallData::~CallData() { grpc_slice_unref_internal(path_); GRPC_ERROR_UNREF(cancel_error_); // Make sure there are no remaining pending batches. @@ -2663,16 +1872,16 @@ ChannelData::CallData::~CallData() { } } -grpc_error* ChannelData::CallData::Init(grpc_call_element* elem, - const grpc_call_element_args* args) { - ChannelData* chand = static_cast(elem->channel_data); +grpc_error* ClientChannel::CallData::Init(grpc_call_element* elem, + const grpc_call_element_args* args) { + ClientChannel* chand = static_cast(elem->channel_data); new (elem->call_data) CallData(elem, *chand, *args); return GRPC_ERROR_NONE; } -void ChannelData::CallData::Destroy(grpc_call_element* elem, - const grpc_call_final_info* /*final_info*/, - grpc_closure* then_schedule_closure) { +void ClientChannel::CallData::Destroy( + grpc_call_element* elem, const grpc_call_final_info* /*final_info*/, + grpc_closure* then_schedule_closure) { CallData* calld = static_cast(elem->call_data); RefCountedPtr dynamic_call = std::move(calld->dynamic_call_); @@ -2685,11 +1894,11 @@ void ChannelData::CallData::Destroy(grpc_call_element* elem, } } -void ChannelData::CallData::StartTransportStreamOpBatch( +void ClientChannel::CallData::StartTransportStreamOpBatch( grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { GPR_TIMER_SCOPE("cc_start_transport_stream_op_batch", 0); CallData* calld = static_cast(elem->call_data); - ChannelData* chand = static_cast(elem->channel_data); + ClientChannel* chand = static_cast(elem->channel_data); if (GPR_LIKELY(chand->deadline_checking_enabled_)) { grpc_deadline_state_client_start_transport_stream_op_batch(elem, batch); } @@ -2774,8 +1983,8 @@ void ChannelData::CallData::StartTransportStreamOpBatch( } } -void ChannelData::CallData::SetPollent(grpc_call_element* elem, - grpc_polling_entity* pollent) { +void ClientChannel::CallData::SetPollent(grpc_call_element* elem, + grpc_polling_entity* pollent) { CallData* calld = static_cast(elem->call_data); calld->pollent_ = pollent; } @@ -2784,7 +1993,7 @@ void ChannelData::CallData::SetPollent(grpc_call_element* elem, // pending_batches management // -size_t ChannelData::CallData::GetBatchIndex( +size_t ClientChannel::CallData::GetBatchIndex( grpc_transport_stream_op_batch* batch) { // Note: It is important the send_initial_metadata be the first entry // here, since the code in pick_subchannel_locked() assumes it will be. @@ -2798,9 +2007,9 @@ size_t ChannelData::CallData::GetBatchIndex( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::CallData::PendingBatchesAdd( +void ClientChannel::CallData::PendingBatchesAdd( grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { - ChannelData* chand = static_cast(elem->channel_data); + ClientChannel* chand = static_cast(elem->channel_data); const size_t idx = GetBatchIndex(batch); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { gpr_log(GPR_INFO, @@ -2813,8 +2022,8 @@ void ChannelData::CallData::PendingBatchesAdd( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::CallData::FailPendingBatchInCallCombiner(void* arg, - grpc_error* error) { +void ClientChannel::CallData::FailPendingBatchInCallCombiner( + void* arg, grpc_error* error) { grpc_transport_stream_op_batch* batch = static_cast(arg); CallData* calld = static_cast(batch->handler_private.extra_arg); @@ -2824,7 +2033,7 @@ void ChannelData::CallData::FailPendingBatchInCallCombiner(void* arg, } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::CallData::PendingBatchesFail( +void ClientChannel::CallData::PendingBatchesFail( grpc_call_element* elem, grpc_error* error, YieldCallCombinerPredicate yield_call_combiner_predicate) { GPR_ASSERT(error != GRPC_ERROR_NONE); @@ -2859,7 +2068,7 @@ void ChannelData::CallData::PendingBatchesFail( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::CallData::ResumePendingBatchInCallCombiner( +void ClientChannel::CallData::ResumePendingBatchInCallCombiner( void* arg, grpc_error* /*ignored*/) { grpc_transport_stream_op_batch* batch = static_cast(arg); @@ -2871,8 +2080,8 @@ void ChannelData::CallData::ResumePendingBatchInCallCombiner( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::CallData::PendingBatchesResume(grpc_call_element* elem) { - ChannelData* chand = static_cast(elem->channel_data); +void ClientChannel::CallData::PendingBatchesResume(grpc_call_element* elem) { + ClientChannel* chand = static_cast(elem->channel_data); // Retries not enabled; send down batches as-is. if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { size_t num_batches = 0; @@ -2906,7 +2115,7 @@ void ChannelData::CallData::PendingBatchesResume(grpc_call_element* elem) { // A class to handle the call combiner cancellation callback for a // queued pick. -class ChannelData::CallData::ResolverQueuedCallCanceller { +class ClientChannel::CallData::ResolverQueuedCallCanceller { public: explicit ResolverQueuedCallCanceller(grpc_call_element* elem) : elem_(elem) { auto* calld = static_cast(elem->call_data); @@ -2919,7 +2128,7 @@ class ChannelData::CallData::ResolverQueuedCallCanceller { private: static void CancelLocked(void* arg, grpc_error* error) { auto* self = static_cast(arg); - auto* chand = static_cast(self->elem_->channel_data); + auto* chand = static_cast(self->elem_->channel_data); auto* calld = static_cast(self->elem_->call_data); { MutexLock lock(&chand->resolution_mu_); @@ -2946,10 +2155,10 @@ class ChannelData::CallData::ResolverQueuedCallCanceller { grpc_closure closure_; }; -void ChannelData::CallData::MaybeRemoveCallFromResolverQueuedCallsLocked( +void ClientChannel::CallData::MaybeRemoveCallFromResolverQueuedCallsLocked( grpc_call_element* elem) { if (!queued_pending_resolver_result_) return; - auto* chand = static_cast(elem->channel_data); + auto* chand = static_cast(elem->channel_data); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p calld=%p: removing from resolver queued picks list", @@ -2961,10 +2170,10 @@ void ChannelData::CallData::MaybeRemoveCallFromResolverQueuedCallsLocked( resolver_call_canceller_ = nullptr; } -void ChannelData::CallData::MaybeAddCallToResolverQueuedCallsLocked( +void ClientChannel::CallData::MaybeAddCallToResolverQueuedCallsLocked( grpc_call_element* elem) { if (queued_pending_resolver_result_) return; - auto* chand = static_cast(elem->channel_data); + auto* chand = static_cast(elem->channel_data); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p calld=%p: adding to resolver queued picks list", chand, this); @@ -2976,9 +2185,9 @@ void ChannelData::CallData::MaybeAddCallToResolverQueuedCallsLocked( resolver_call_canceller_ = new ResolverQueuedCallCanceller(elem); } -grpc_error* ChannelData::CallData::ApplyServiceConfigToCallLocked( +grpc_error* ClientChannel::CallData::ApplyServiceConfigToCallLocked( grpc_call_element* elem, grpc_metadata_batch* initial_metadata) { - ChannelData* chand = static_cast(elem->channel_data); + ClientChannel* chand = static_cast(elem->channel_data); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p calld=%p: applying service config to call", chand, this); @@ -3035,7 +2244,7 @@ grpc_error* ChannelData::CallData::ApplyServiceConfigToCallLocked( return GRPC_ERROR_NONE; } -void ChannelData::CallData:: +void ClientChannel::CallData:: RecvInitialMetadataReadyForConfigSelectorCommitCallback(void* arg, grpc_error* error) { auto* self = static_cast(arg); @@ -3050,7 +2259,7 @@ void ChannelData::CallData:: // TODO(roth): Consider not intercepting this callback unless we // actually need to, if this causes a performance problem. -void ChannelData::CallData:: +void ClientChannel::CallData:: InjectRecvInitialMetadataReadyForConfigSelectorCommitCallback( grpc_transport_stream_op_batch* batch) { original_recv_initial_metadata_ready_ = @@ -3062,15 +2271,15 @@ void ChannelData::CallData:: &recv_initial_metadata_ready_; } -void ChannelData::CallData::AsyncResolutionDone(grpc_call_element* elem, - grpc_error* error) { +void ClientChannel::CallData::AsyncResolutionDone(grpc_call_element* elem, + grpc_error* error) { GRPC_CLOSURE_INIT(&pick_closure_, ResolutionDone, elem, nullptr); ExecCtx::Run(DEBUG_LOCATION, &pick_closure_, error); } -void ChannelData::CallData::ResolutionDone(void* arg, grpc_error* error) { +void ClientChannel::CallData::ResolutionDone(void* arg, grpc_error* error) { grpc_call_element* elem = static_cast(arg); - ChannelData* chand = static_cast(elem->channel_data); + ClientChannel* chand = static_cast(elem->channel_data); CallData* calld = static_cast(elem->call_data); if (error != GRPC_ERROR_NONE) { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { @@ -3084,10 +2293,10 @@ void ChannelData::CallData::ResolutionDone(void* arg, grpc_error* error) { calld->CreateDynamicCall(elem); } -void ChannelData::CallData::CheckResolution(void* arg, grpc_error* error) { +void ClientChannel::CallData::CheckResolution(void* arg, grpc_error* error) { grpc_call_element* elem = static_cast(arg); CallData* calld = static_cast(elem->call_data); - ChannelData* chand = static_cast(elem->channel_data); + ClientChannel* chand = static_cast(elem->channel_data); bool resolution_complete; { MutexLock lock(&chand->resolution_mu_); @@ -3099,9 +2308,9 @@ void ChannelData::CallData::CheckResolution(void* arg, grpc_error* error) { } } -bool ChannelData::CallData::CheckResolutionLocked(grpc_call_element* elem, - grpc_error** error) { - ChannelData* chand = static_cast(elem->channel_data); +bool ClientChannel::CallData::CheckResolutionLocked(grpc_call_element* elem, + grpc_error** error) { + ClientChannel* chand = static_cast(elem->channel_data); // If we're still in IDLE, we need to start resolving. if (GPR_UNLIKELY(chand->CheckConnectivityState(false) == GRPC_CHANNEL_IDLE)) { // Bounce into the control plane work serializer to start resolving, @@ -3113,7 +2322,7 @@ bool ChannelData::CallData::CheckResolutionLocked(grpc_call_element* elem, DEBUG_LOCATION, GRPC_CLOSURE_CREATE( [](void* arg, grpc_error* /*error*/) { - auto* chand = static_cast(arg); + auto* chand = static_cast(arg); chand->work_serializer_->Run( [chand]() { chand->CheckConnectivityState(/*try_to_connect=*/true); @@ -3160,8 +2369,8 @@ bool ChannelData::CallData::CheckResolutionLocked(grpc_call_element* elem, return true; } -void ChannelData::CallData::CreateDynamicCall(grpc_call_element* elem) { - auto* chand = static_cast(elem->channel_data); +void ClientChannel::CallData::CreateDynamicCall(grpc_call_element* elem) { + auto* chand = static_cast(elem->channel_data); DynamicFilters::Call::Args args = {std::move(dynamic_filters_), pollent_, path_, @@ -3192,63 +2401,119 @@ void ChannelData::CallData::CreateDynamicCall(grpc_call_element* elem) { } // -// RetryingCall implementation +// ClientChannel::LoadBalancedCall::Metadata // -// Retry support: +class ClientChannel::LoadBalancedCall::Metadata + : public LoadBalancingPolicy::MetadataInterface { + public: + Metadata(LoadBalancedCall* lb_call, grpc_metadata_batch* batch) + : lb_call_(lb_call), batch_(batch) {} + + void Add(absl::string_view key, absl::string_view value) override { + grpc_linked_mdelem* linked_mdelem = static_cast( + lb_call_->arena_->Alloc(sizeof(grpc_linked_mdelem))); + linked_mdelem->md = grpc_mdelem_from_slices( + ExternallyManagedSlice(key.data(), key.size()), + ExternallyManagedSlice(value.data(), value.size())); + GPR_ASSERT(grpc_metadata_batch_link_tail(batch_, linked_mdelem) == + GRPC_ERROR_NONE); + } + + iterator begin() const override { + static_assert(sizeof(grpc_linked_mdelem*) <= sizeof(intptr_t), + "iterator size too large"); + return iterator( + this, reinterpret_cast(MaybeSkipEntry(batch_->list.head))); + } + iterator end() const override { + static_assert(sizeof(grpc_linked_mdelem*) <= sizeof(intptr_t), + "iterator size too large"); + return iterator(this, 0); + } + + iterator erase(iterator it) override { + grpc_linked_mdelem* linked_mdelem = + reinterpret_cast(GetIteratorHandle(it)); + intptr_t handle = reinterpret_cast(linked_mdelem->next); + grpc_metadata_batch_remove(batch_, linked_mdelem); + return iterator(this, handle); + } + + private: + grpc_linked_mdelem* MaybeSkipEntry(grpc_linked_mdelem* entry) const { + if (entry != nullptr && batch_->idx.named.path == entry) { + return entry->next; + } + return entry; + } + + intptr_t IteratorHandleNext(intptr_t handle) const override { + grpc_linked_mdelem* linked_mdelem = + reinterpret_cast(handle); + return reinterpret_cast(MaybeSkipEntry(linked_mdelem->next)); + } + + std::pair IteratorHandleGet( + intptr_t handle) const override { + grpc_linked_mdelem* linked_mdelem = + reinterpret_cast(handle); + return std::make_pair(StringViewFromSlice(GRPC_MDKEY(linked_mdelem->md)), + StringViewFromSlice(GRPC_MDVALUE(linked_mdelem->md))); + } + + LoadBalancedCall* lb_call_; + grpc_metadata_batch* batch_; +}; + // -// In order to support retries, we act as a proxy for stream op batches. -// When we get a batch from the surface, we add it to our list of pending -// batches, and we then use those batches to construct separate "child" -// batches to be started on the subchannel call. When the child batches -// return, we then decide which pending batches have been completed and -// schedule their callbacks accordingly. If a subchannel call fails and -// we want to retry it, we do a new pick and start again, constructing -// new "child" batches for the new subchannel call. +// ClientChannel::LoadBalancedCall::LbCallState // -// Note that retries are committed when receiving data from the server -// (except for Trailers-Only responses). However, there may be many -// send ops started before receiving any data, so we may have already -// completed some number of send ops (and returned the completions up to -// the surface) by the time we realize that we need to retry. To deal -// with this, we cache data for send ops, so that we can replay them on a -// different subchannel call even after we have completed the original -// batches. + +class ClientChannel::LoadBalancedCall::LbCallState + : public LoadBalancingPolicy::CallState { + public: + explicit LbCallState(LoadBalancedCall* lb_call) : lb_call_(lb_call) {} + + void* Alloc(size_t size) override { return lb_call_->arena_->Alloc(size); } + + const LoadBalancingPolicy::BackendMetricData* GetBackendMetricData() + override { + if (lb_call_->backend_metric_data_ == nullptr) { + grpc_linked_mdelem* md = lb_call_->recv_trailing_metadata_->idx.named + .x_endpoint_load_metrics_bin; + if (md != nullptr) { + lb_call_->backend_metric_data_ = + ParseBackendMetricData(GRPC_MDVALUE(md->md), lb_call_->arena_); + } + } + return lb_call_->backend_metric_data_; + } + + absl::string_view ExperimentalGetCallAttribute(const char* key) override { + auto* service_config_call_data = static_cast( + lb_call_->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value); + auto& call_attributes = service_config_call_data->call_attributes(); + auto it = call_attributes.find(key); + if (it == call_attributes.end()) return absl::string_view(); + return it->second; + } + + private: + LoadBalancedCall* lb_call_; +}; + // -// There are two sets of data to maintain: -// - In call_data (in the parent channel), we maintain a list of pending -// ops and cached data for send ops. -// - In the subchannel call, we maintain state to indicate what ops have -// already been sent down to that call. +// LoadBalancedCall // -// When constructing the "child" batches, we compare those two sets of -// data to see which batches need to be sent to the subchannel call. - -// TODO(roth): In subsequent PRs: -// - add support for transparent retries (including initial metadata) -// - figure out how to record stats in census for retries -// (census filter is on top of this one) -// - add census stats for retries - -ChannelData::RetryingCall::RetryingCall( - ChannelData* chand, const grpc_call_element_args& args, - grpc_polling_entity* pollent, - RefCountedPtr retry_throttle_data, - const ClientChannelMethodParsedConfig::RetryPolicy* retry_policy) - : chand_(chand), - pollent_(pollent), - retry_throttle_data_(std::move(retry_throttle_data)), - retry_policy_(retry_policy), - retry_backoff_( - BackOff::Options() - .set_initial_backoff( - retry_policy_ == nullptr ? 0 : retry_policy_->initial_backoff) - .set_multiplier(retry_policy_ == nullptr - ? 0 - : retry_policy_->backoff_multiplier) - .set_jitter(RETRY_BACKOFF_JITTER) - .set_max_backoff( - retry_policy_ == nullptr ? 0 : retry_policy_->max_backoff)), + +ClientChannel::LoadBalancedCall::LoadBalancedCall( + ClientChannel* chand, const grpc_call_element_args& args, + grpc_polling_entity* pollent) + : refs_(1, GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace) + ? "LoadBalancedCall" + : nullptr), + chand_(chand), path_(grpc_slice_ref_internal(args.path)), call_start_time_(args.start_time), deadline_(args.deadline), @@ -3256,197 +2521,60 @@ ChannelData::RetryingCall::RetryingCall( owning_call_(args.call_stack), call_combiner_(args.call_combiner), call_context_(args.context), - pending_send_initial_metadata_(false), - pending_send_message_(false), - pending_send_trailing_metadata_(false), - enable_retries_(true), - retry_committed_(false), - last_attempt_got_server_pushback_(false) {} - -ChannelData::RetryingCall::~RetryingCall() { + pollent_(pollent) {} + +ClientChannel::LoadBalancedCall::~LoadBalancedCall() { grpc_slice_unref_internal(path_); GRPC_ERROR_UNREF(cancel_error_); + if (backend_metric_data_ != nullptr) { + backend_metric_data_ + ->LoadBalancingPolicy::BackendMetricData::~BackendMetricData(); + } // Make sure there are no remaining pending batches. for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - GPR_ASSERT(pending_batches_[i].batch == nullptr); - } -} - -void ChannelData::RetryingCall::StartTransportStreamOpBatch( - grpc_transport_stream_op_batch* batch) { - // If we've previously been cancelled, immediately fail any new batches. - if (GPR_UNLIKELY(cancel_error_ != GRPC_ERROR_NONE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: failing batch with error: %s", chand_, - this, grpc_error_string(cancel_error_)); - } - // Note: This will release the call combiner. - grpc_transport_stream_op_batch_finish_with_failure( - batch, GRPC_ERROR_REF(cancel_error_), call_combiner_); - return; - } - // Handle cancellation. - if (GPR_UNLIKELY(batch->cancel_stream)) { - // Stash a copy of cancel_error in our call data, so that we can use - // it for subsequent operations. This ensures that if the call is - // cancelled before any batches are passed down (e.g., if the deadline - // is in the past when the call starts), we can return the right - // error to the caller when the first batch does get passed down. - GRPC_ERROR_UNREF(cancel_error_); - cancel_error_ = GRPC_ERROR_REF(batch->payload->cancel_stream.cancel_error); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: recording cancel_error=%s", - chand_, this, grpc_error_string(cancel_error_)); - } - // If we do not have an LB call (i.e., a pick has not yet been started), - // fail all pending batches. Otherwise, send the cancellation down to the - // LB call. - if (lb_call_ == nullptr) { - // TODO(roth): If there is a pending retry callback, do we need to - // cancel it here? - PendingBatchesFail(GRPC_ERROR_REF(cancel_error_), NoYieldCallCombiner); - // Note: This will release the call combiner. - grpc_transport_stream_op_batch_finish_with_failure( - batch, GRPC_ERROR_REF(cancel_error_), call_combiner_); - } else { - // Note: This will release the call combiner. - lb_call_->StartTransportStreamOpBatch(batch); - } - return; - } - // Add the batch to the pending list. - PendingBatchesAdd(batch); - // Create LB call if needed. - // TODO(roth): If we get a new batch from the surface after the - // initial retry attempt has failed, while the retry timer is pending, - // we should queue the batch and not try to send it immediately. - if (lb_call_ == nullptr) { - // We do not yet have an LB call, so create one. - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: creating LB call", chand_, - this); - } - CreateLbCall(this, GRPC_ERROR_NONE); - return; - } - // Send batches to LB call. - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: starting batch on lb_call=%p", - chand_, this, lb_call_.get()); + GPR_ASSERT(pending_batches_[i] == nullptr); } - PendingBatchesResume(); } -RefCountedPtr ChannelData::RetryingCall::subchannel_call() - const { - if (lb_call_ == nullptr) return nullptr; - return lb_call_->subchannel_call(); +RefCountedPtr +ClientChannel::LoadBalancedCall::Ref() { + IncrementRefCount(); + return RefCountedPtr(this); } -// -// send op data caching -// - -void ChannelData::RetryingCall::MaybeCacheSendOpsForBatch( - PendingBatch* pending) { - if (pending->send_ops_cached) return; - pending->send_ops_cached = true; - grpc_transport_stream_op_batch* batch = pending->batch; - // Save a copy of metadata for send_initial_metadata ops. - if (batch->send_initial_metadata) { - seen_send_initial_metadata_ = true; - GPR_ASSERT(send_initial_metadata_storage_ == nullptr); - grpc_metadata_batch* send_initial_metadata = - batch->payload->send_initial_metadata.send_initial_metadata; - send_initial_metadata_storage_ = - static_cast(arena_->Alloc( - sizeof(grpc_linked_mdelem) * send_initial_metadata->list.count)); - grpc_metadata_batch_copy(send_initial_metadata, &send_initial_metadata_, - send_initial_metadata_storage_); - send_initial_metadata_flags_ = - batch->payload->send_initial_metadata.send_initial_metadata_flags; - peer_string_ = batch->payload->send_initial_metadata.peer_string; - } - // Set up cache for send_message ops. - if (batch->send_message) { - ByteStreamCache* cache = arena_->New( - std::move(batch->payload->send_message.send_message)); - send_messages_.push_back(cache); - } - // Save metadata batch for send_trailing_metadata ops. - if (batch->send_trailing_metadata) { - seen_send_trailing_metadata_ = true; - GPR_ASSERT(send_trailing_metadata_storage_ == nullptr); - grpc_metadata_batch* send_trailing_metadata = - batch->payload->send_trailing_metadata.send_trailing_metadata; - send_trailing_metadata_storage_ = - static_cast(arena_->Alloc( - sizeof(grpc_linked_mdelem) * send_trailing_metadata->list.count)); - grpc_metadata_batch_copy(send_trailing_metadata, &send_trailing_metadata_, - send_trailing_metadata_storage_); - } +RefCountedPtr +ClientChannel::LoadBalancedCall::Ref(const DebugLocation& location, + const char* reason) { + IncrementRefCount(location, reason); + return RefCountedPtr(this); } -void ChannelData::RetryingCall::FreeCachedSendInitialMetadata() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: destroying send_initial_metadata", - chand_, this); +void ClientChannel::LoadBalancedCall::Unref() { + if (GPR_UNLIKELY(refs_.Unref())) { + this->~LoadBalancedCall(); } - grpc_metadata_batch_destroy(&send_initial_metadata_); } -void ChannelData::RetryingCall::FreeCachedSendMessage(size_t idx) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: destroying send_messages[%" PRIuPTR "]", - chand_, this, idx); +void ClientChannel::LoadBalancedCall::Unref(const DebugLocation& location, + const char* reason) { + if (GPR_UNLIKELY(refs_.Unref(location, reason))) { + this->~LoadBalancedCall(); } - send_messages_[idx]->Destroy(); } -void ChannelData::RetryingCall::FreeCachedSendTrailingMetadata() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand_=%p retrying_call=%p: destroying send_trailing_metadata", - chand_, this); - } - grpc_metadata_batch_destroy(&send_trailing_metadata_); -} +void ClientChannel::LoadBalancedCall::IncrementRefCount() { refs_.Ref(); } -void ChannelData::RetryingCall::FreeCachedSendOpDataAfterCommit( - SubchannelCallRetryState* retry_state) { - if (retry_state->completed_send_initial_metadata) { - FreeCachedSendInitialMetadata(); - } - for (size_t i = 0; i < retry_state->completed_send_message_count; ++i) { - FreeCachedSendMessage(i); - } - if (retry_state->completed_send_trailing_metadata) { - FreeCachedSendTrailingMetadata(); - } +void ClientChannel::LoadBalancedCall::IncrementRefCount( + const DebugLocation& location, const char* reason) { + refs_.Ref(location, reason); } -void ChannelData::RetryingCall::FreeCachedSendOpDataForCompletedBatch( - SubchannelCallBatchData* batch_data, - SubchannelCallRetryState* retry_state) { - if (batch_data->batch.send_initial_metadata) { - FreeCachedSendInitialMetadata(); - } - if (batch_data->batch.send_message) { - FreeCachedSendMessage(retry_state->completed_send_message_count - 1); - } - if (batch_data->batch.send_trailing_metadata) { - FreeCachedSendTrailingMetadata(); - } +void* ClientChannel::LoadBalancedCall::GetParentData() { + return reinterpret_cast(this) + + GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(LoadBalancedCall)); } -// -// pending_batches management -// - -size_t ChannelData::RetryingCall::GetBatchIndex( +size_t ClientChannel::LoadBalancedCall::GetBatchIndex( grpc_transport_stream_op_batch* batch) { // Note: It is important the send_initial_metadata be the first entry // here, since the code in pick_subchannel_locked() assumes it will be. @@ -3460,133 +2588,46 @@ size_t ChannelData::RetryingCall::GetBatchIndex( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::RetryingCall::PendingBatchesAdd( +void ClientChannel::LoadBalancedCall::PendingBatchesAdd( grpc_transport_stream_op_batch* batch) { const size_t idx = GetBatchIndex(batch); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand_=%p retrying_call=%p: adding pending batch at index %" PRIuPTR, - chand_, this, idx); - } - PendingBatch* pending = &pending_batches_[idx]; - GPR_ASSERT(pending->batch == nullptr); - pending->batch = batch; - pending->send_ops_cached = false; - if (enable_retries_) { - // Update state in calld about pending batches. - // Also check if the batch takes us over the retry buffer limit. - // Note: We don't check the size of trailing metadata here, because - // gRPC clients do not send trailing metadata. - if (batch->send_initial_metadata) { - pending_send_initial_metadata_ = true; - bytes_buffered_for_retry_ += grpc_metadata_batch_size( - batch->payload->send_initial_metadata.send_initial_metadata); - } - if (batch->send_message) { - pending_send_message_ = true; - bytes_buffered_for_retry_ += - batch->payload->send_message.send_message->length(); - } - if (batch->send_trailing_metadata) { - pending_send_trailing_metadata_ = true; - } - if (GPR_UNLIKELY(bytes_buffered_for_retry_ > - chand_->per_rpc_retry_buffer_size_)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: exceeded retry buffer size, " - "committing", - chand_, this); - } - SubchannelCallRetryState* retry_state = - lb_call_ == nullptr ? nullptr - : static_cast( - lb_call_->GetParentData()); - RetryCommit(retry_state); - // If we are not going to retry and have not yet started, pretend - // retries are disabled so that we don't bother with retry overhead. - if (num_attempts_completed_ == 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: disabling retries before first " - "attempt", - chand_, this); - } - // TODO(roth): Treat this as a commit? - enable_retries_ = false; - } - } - } -} - -void ChannelData::RetryingCall::PendingBatchClear(PendingBatch* pending) { - if (enable_retries_) { - if (pending->batch->send_initial_metadata) { - pending_send_initial_metadata_ = false; - } - if (pending->batch->send_message) { - pending_send_message_ = false; - } - if (pending->batch->send_trailing_metadata) { - pending_send_trailing_metadata_ = false; - } - } - pending->batch = nullptr; -} - -void ChannelData::RetryingCall::MaybeClearPendingBatch(PendingBatch* pending) { - grpc_transport_stream_op_batch* batch = pending->batch; - // We clear the pending batch if all of its callbacks have been - // scheduled and reset to nullptr. - if (batch->on_complete == nullptr && - (!batch->recv_initial_metadata || - batch->payload->recv_initial_metadata.recv_initial_metadata_ready == - nullptr) && - (!batch->recv_message || - batch->payload->recv_message.recv_message_ready == nullptr) && - (!batch->recv_trailing_metadata || - batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready == - nullptr)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: clearing pending batch", - chand_, this); - } - PendingBatchClear(pending); + gpr_log(GPR_INFO, + "chand=%p lb_call=%p: adding pending batch at index %" PRIuPTR, + chand_, this, idx); } + GPR_ASSERT(pending_batches_[idx] == nullptr); + pending_batches_[idx] = batch; } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::RetryingCall::FailPendingBatchInCallCombiner( +void ClientChannel::LoadBalancedCall::FailPendingBatchInCallCombiner( void* arg, grpc_error* error) { grpc_transport_stream_op_batch* batch = static_cast(arg); - RetryingCall* call = - static_cast(batch->handler_private.extra_arg); + auto* self = static_cast(batch->handler_private.extra_arg); // Note: This will release the call combiner. grpc_transport_stream_op_batch_finish_with_failure( - batch, GRPC_ERROR_REF(error), call->call_combiner_); + batch, GRPC_ERROR_REF(error), self->call_combiner_); } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::RetryingCall::PendingBatchesFail( +void ClientChannel::LoadBalancedCall::PendingBatchesFail( grpc_error* error, YieldCallCombinerPredicate yield_call_combiner_predicate) { GPR_ASSERT(error != GRPC_ERROR_NONE); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { size_t num_batches = 0; for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - if (pending_batches_[i].batch != nullptr) ++num_batches; + if (pending_batches_[i] != nullptr) ++num_batches; } gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: failing %" PRIuPTR - " pending batches: %s", + "chand=%p lb_call=%p: failing %" PRIuPTR " pending batches: %s", chand_, this, num_batches, grpc_error_string(error)); } CallCombinerClosureList closures; for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - grpc_transport_stream_op_batch* batch = pending->batch; + grpc_transport_stream_op_batch*& batch = pending_batches_[i]; if (batch != nullptr) { batch->handler_private.extra_arg = this; GRPC_CLOSURE_INIT(&batch->handler_private.closure, @@ -3594,7 +2635,7 @@ void ChannelData::RetryingCall::PendingBatchesFail( grpc_schedule_on_exec_ctx); closures.Add(&batch->handler_private.closure, GRPC_ERROR_REF(error), "PendingBatchesFail"); - PendingBatchClear(pending); + batch = nullptr; } } if (yield_call_combiner_predicate(closures)) { @@ -3606,1476 +2647,18 @@ void ChannelData::RetryingCall::PendingBatchesFail( } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::RetryingCall::ResumePendingBatchInCallCombiner( +void ClientChannel::LoadBalancedCall::ResumePendingBatchInCallCombiner( void* arg, grpc_error* /*ignored*/) { grpc_transport_stream_op_batch* batch = static_cast(arg); - auto* lb_call = static_cast( - batch->handler_private.extra_arg); + SubchannelCall* subchannel_call = + static_cast(batch->handler_private.extra_arg); // Note: This will release the call combiner. - lb_call->StartTransportStreamOpBatch(batch); + subchannel_call->StartTransportStreamOpBatch(batch); } // This is called via the call combiner, so access to calld is synchronized. -void ChannelData::RetryingCall::PendingBatchesResume() { - if (enable_retries_) { - StartRetriableSubchannelBatches(this, GRPC_ERROR_NONE); - return; - } - // Retries not enabled; send down batches as-is. - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - size_t num_batches = 0; - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - if (pending_batches_[i].batch != nullptr) ++num_batches; - } - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: starting %" PRIuPTR - " pending batches on lb_call=%p", - chand_, this, num_batches, lb_call_.get()); - } - CallCombinerClosureList closures; - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - grpc_transport_stream_op_batch* batch = pending->batch; - if (batch != nullptr) { - batch->handler_private.extra_arg = lb_call_.get(); - GRPC_CLOSURE_INIT(&batch->handler_private.closure, - ResumePendingBatchInCallCombiner, batch, nullptr); - closures.Add(&batch->handler_private.closure, GRPC_ERROR_NONE, - "PendingBatchesResume"); - PendingBatchClear(pending); - } - } - // Note: This will release the call combiner. - closures.RunClosures(call_combiner_); -} - -template -ChannelData::RetryingCall::PendingBatch* -ChannelData::RetryingCall::PendingBatchFind(const char* log_message, - Predicate predicate) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - grpc_transport_stream_op_batch* batch = pending->batch; - if (batch != nullptr && predicate(batch)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: %s pending batch at index %" PRIuPTR, - chand_, this, log_message, i); - } - return pending; - } - } - return nullptr; -} - -// -// retry code -// - -void ChannelData::RetryingCall::RetryCommit( - SubchannelCallRetryState* retry_state) { - if (retry_committed_) return; - retry_committed_ = true; - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: committing retries", chand_, - this); - } - if (retry_state != nullptr) { - FreeCachedSendOpDataAfterCommit(retry_state); - } -} - -void ChannelData::RetryingCall::DoRetry(SubchannelCallRetryState* retry_state, - grpc_millis server_pushback_ms) { - GPR_ASSERT(retry_policy_ != nullptr); - // Reset LB call. - lb_call_.reset(); - // Compute backoff delay. - grpc_millis next_attempt_time; - if (server_pushback_ms >= 0) { - next_attempt_time = ExecCtx::Get()->Now() + server_pushback_ms; - last_attempt_got_server_pushback_ = true; - } else { - if (num_attempts_completed_ == 1 || last_attempt_got_server_pushback_) { - last_attempt_got_server_pushback_ = false; - } - next_attempt_time = retry_backoff_.NextAttemptTime(); - } - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: retrying failed call in %" PRId64 " ms", - chand_, this, next_attempt_time - ExecCtx::Get()->Now()); - } - // Schedule retry after computed delay. - GRPC_CLOSURE_INIT(&retry_closure_, CreateLbCall, this, nullptr); - grpc_timer_init(&retry_timer_, next_attempt_time, &retry_closure_); - // Update bookkeeping. - if (retry_state != nullptr) retry_state->retry_dispatched = true; -} - -bool ChannelData::RetryingCall::MaybeRetry(SubchannelCallBatchData* batch_data, - grpc_status_code status, - grpc_mdelem* server_pushback_md) { - // Get retry policy. - if (retry_policy_ == nullptr) return false; - // If we've already dispatched a retry from this call, return true. - // This catches the case where the batch has multiple callbacks - // (i.e., it includes either recv_message or recv_initial_metadata). - SubchannelCallRetryState* retry_state = nullptr; - if (batch_data != nullptr) { - retry_state = static_cast( - batch_data->lb_call->GetParentData()); - if (retry_state->retry_dispatched) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: retry already dispatched", - chand_, this); - } - return true; - } - } - // Check status. - if (GPR_LIKELY(status == GRPC_STATUS_OK)) { - if (retry_throttle_data_ != nullptr) { - retry_throttle_data_->RecordSuccess(); - } - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: call succeeded", chand_, - this); - } - return false; - } - // Status is not OK. Check whether the status is retryable. - if (!retry_policy_->retryable_status_codes.Contains(status)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: status %s not configured as retryable", - chand_, this, grpc_status_code_to_string(status)); - } - return false; - } - // Record the failure and check whether retries are throttled. - // Note that it's important for this check to come after the status - // code check above, since we should only record failures whose statuses - // match the configured retryable status codes, so that we don't count - // things like failures due to malformed requests (INVALID_ARGUMENT). - // Conversely, it's important for this to come before the remaining - // checks, so that we don't fail to record failures due to other factors. - if (retry_throttle_data_ != nullptr && - !retry_throttle_data_->RecordFailure()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: retries throttled", chand_, - this); - } - return false; - } - // Check whether the call is committed. - if (retry_committed_) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: retries already committed", - chand_, this); - } - return false; - } - // Check whether we have retries remaining. - ++num_attempts_completed_; - if (num_attempts_completed_ >= retry_policy_->max_attempts) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: exceeded %d retry attempts", - chand_, this, retry_policy_->max_attempts); - } - return false; - } - // If the call was cancelled from the surface, don't retry. - if (cancel_error_ != GRPC_ERROR_NONE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: call cancelled from surface, not " - "retrying", - chand_, this); - } - return false; - } - // Check server push-back. - grpc_millis server_pushback_ms = -1; - if (server_pushback_md != nullptr) { - // If the value is "-1" or any other unparseable string, we do not retry. - uint32_t ms; - if (!grpc_parse_slice_to_uint32(GRPC_MDVALUE(*server_pushback_md), &ms)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: not retrying due to server push-back", - chand_, this); - } - return false; - } else { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: server push-back: retry in %u ms", - chand_, this, ms); - } - server_pushback_ms = static_cast(ms); - } - } - DoRetry(retry_state, server_pushback_ms); - return true; -} - -// -// ChannelData::RetryingCall::SubchannelCallBatchData -// - -ChannelData::RetryingCall::SubchannelCallBatchData* -ChannelData::RetryingCall::SubchannelCallBatchData::Create( - RetryingCall* call, int refcount, bool set_on_complete) { - return call->arena_->New(call, refcount, - set_on_complete); -} - -ChannelData::RetryingCall::SubchannelCallBatchData::SubchannelCallBatchData( - RetryingCall* call, int refcount, bool set_on_complete) - : call(call), lb_call(call->lb_call_) { - SubchannelCallRetryState* retry_state = - static_cast(lb_call->GetParentData()); - batch.payload = &retry_state->batch_payload; - gpr_ref_init(&refs, refcount); - if (set_on_complete) { - GRPC_CLOSURE_INIT(&on_complete, ChannelData::RetryingCall::OnComplete, this, - grpc_schedule_on_exec_ctx); - batch.on_complete = &on_complete; - } - GRPC_CALL_STACK_REF(call->owning_call_, "batch_data"); -} - -void ChannelData::RetryingCall::SubchannelCallBatchData::Destroy() { - SubchannelCallRetryState* retry_state = - static_cast(lb_call->GetParentData()); - if (batch.send_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); - } - if (batch.send_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); - } - if (batch.recv_initial_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); - } - if (batch.recv_trailing_metadata) { - grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); - } - lb_call.reset(); - GRPC_CALL_STACK_UNREF(call->owning_call_, "batch_data"); -} - -// -// recv_initial_metadata callback handling -// - -void ChannelData::RetryingCall::InvokeRecvInitialMetadataCallback( - void* arg, grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - // Find pending batch. - PendingBatch* pending = batch_data->call->PendingBatchFind( - "invoking recv_initial_metadata_ready for", - [](grpc_transport_stream_op_batch* batch) { - return batch->recv_initial_metadata && - batch->payload->recv_initial_metadata - .recv_initial_metadata_ready != nullptr; - }); - GPR_ASSERT(pending != nullptr); - // Return metadata. - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - grpc_metadata_batch_move( - &retry_state->recv_initial_metadata, - pending->batch->payload->recv_initial_metadata.recv_initial_metadata); - // Update bookkeeping. - // Note: Need to do this before invoking the callback, since invoking - // the callback will result in yielding the call combiner. - grpc_closure* recv_initial_metadata_ready = - pending->batch->payload->recv_initial_metadata - .recv_initial_metadata_ready; - pending->batch->payload->recv_initial_metadata.recv_initial_metadata_ready = - nullptr; - batch_data->call->MaybeClearPendingBatch(pending); - batch_data->Unref(); - // Invoke callback. - Closure::Run(DEBUG_LOCATION, recv_initial_metadata_ready, - GRPC_ERROR_REF(error)); -} - -void ChannelData::RetryingCall::RecvInitialMetadataReady(void* arg, - grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - RetryingCall* call = batch_data->call; - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: got recv_initial_metadata_ready, error=%s", - call->chand_, call, grpc_error_string(error)); - } - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - retry_state->completed_recv_initial_metadata = true; - // If a retry was already dispatched, then we're not going to use the - // result of this recv_initial_metadata op, so do nothing. - if (retry_state->retry_dispatched) { - GRPC_CALL_COMBINER_STOP( - call->call_combiner_, - "recv_initial_metadata_ready after retry dispatched"); - return; - } - // If we got an error or a Trailers-Only response and have not yet gotten - // the recv_trailing_metadata_ready callback, then defer propagating this - // callback back to the surface. We can evaluate whether to retry when - // recv_trailing_metadata comes back. - if (GPR_UNLIKELY((retry_state->trailing_metadata_available || - error != GRPC_ERROR_NONE) && - !retry_state->completed_recv_trailing_metadata)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: deferring recv_initial_metadata_ready " - "(Trailers-Only)", - call->chand_, call); - } - retry_state->recv_initial_metadata_ready_deferred_batch = batch_data; - retry_state->recv_initial_metadata_error = GRPC_ERROR_REF(error); - if (!retry_state->started_recv_trailing_metadata) { - // recv_trailing_metadata not yet started by application; start it - // ourselves to get status. - call->StartInternalRecvTrailingMetadata(); - } else { - GRPC_CALL_COMBINER_STOP( - call->call_combiner_, - "recv_initial_metadata_ready trailers-only or error"); - } - return; - } - // Received valid initial metadata, so commit the call. - call->RetryCommit(retry_state); - // Invoke the callback to return the result to the surface. - // Manually invoking a callback function; it does not take ownership of error. - call->InvokeRecvInitialMetadataCallback(batch_data, error); -} - -// -// recv_message callback handling -// - -void ChannelData::RetryingCall::InvokeRecvMessageCallback(void* arg, - grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - RetryingCall* call = batch_data->call; - // Find pending op. - PendingBatch* pending = call->PendingBatchFind( - "invoking recv_message_ready for", - [](grpc_transport_stream_op_batch* batch) { - return batch->recv_message && - batch->payload->recv_message.recv_message_ready != nullptr; - }); - GPR_ASSERT(pending != nullptr); - // Return payload. - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - *pending->batch->payload->recv_message.recv_message = - std::move(retry_state->recv_message); - // Update bookkeeping. - // Note: Need to do this before invoking the callback, since invoking - // the callback will result in yielding the call combiner. - grpc_closure* recv_message_ready = - pending->batch->payload->recv_message.recv_message_ready; - pending->batch->payload->recv_message.recv_message_ready = nullptr; - call->MaybeClearPendingBatch(pending); - batch_data->Unref(); - // Invoke callback. - Closure::Run(DEBUG_LOCATION, recv_message_ready, GRPC_ERROR_REF(error)); -} - -void ChannelData::RetryingCall::RecvMessageReady(void* arg, grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - RetryingCall* call = batch_data->call; - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: got recv_message_ready, error=%s", - call->chand_, call, grpc_error_string(error)); - } - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - ++retry_state->completed_recv_message_count; - // If a retry was already dispatched, then we're not going to use the - // result of this recv_message op, so do nothing. - if (retry_state->retry_dispatched) { - GRPC_CALL_COMBINER_STOP(call->call_combiner_, - "recv_message_ready after retry dispatched"); - return; - } - // If we got an error or the payload was nullptr and we have not yet gotten - // the recv_trailing_metadata_ready callback, then defer propagating this - // callback back to the surface. We can evaluate whether to retry when - // recv_trailing_metadata comes back. - if (GPR_UNLIKELY( - (retry_state->recv_message == nullptr || error != GRPC_ERROR_NONE) && - !retry_state->completed_recv_trailing_metadata)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: deferring recv_message_ready (nullptr " - "message and recv_trailing_metadata pending)", - call->chand_, call); - } - retry_state->recv_message_ready_deferred_batch = batch_data; - retry_state->recv_message_error = GRPC_ERROR_REF(error); - if (!retry_state->started_recv_trailing_metadata) { - // recv_trailing_metadata not yet started by application; start it - // ourselves to get status. - call->StartInternalRecvTrailingMetadata(); - } else { - GRPC_CALL_COMBINER_STOP(call->call_combiner_, "recv_message_ready null"); - } - return; - } - // Received a valid message, so commit the call. - call->RetryCommit(retry_state); - // Invoke the callback to return the result to the surface. - // Manually invoking a callback function; it does not take ownership of error. - call->InvokeRecvMessageCallback(batch_data, error); -} - -// -// recv_trailing_metadata handling -// - -void ChannelData::RetryingCall::GetCallStatus( - grpc_metadata_batch* md_batch, grpc_error* error, grpc_status_code* status, - grpc_mdelem** server_pushback_md) { - if (error != GRPC_ERROR_NONE) { - grpc_error_get_status(error, deadline_, status, nullptr, nullptr, nullptr); - } else { - GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr); - *status = - grpc_get_status_code_from_metadata(md_batch->idx.named.grpc_status->md); - if (server_pushback_md != nullptr && - md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { - *server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md; - } - } - GRPC_ERROR_UNREF(error); -} - -void ChannelData::RetryingCall::AddClosureForRecvTrailingMetadataReady( - SubchannelCallBatchData* batch_data, grpc_error* error, - CallCombinerClosureList* closures) { - // Find pending batch. - PendingBatch* pending = PendingBatchFind( - "invoking recv_trailing_metadata for", - [](grpc_transport_stream_op_batch* batch) { - return batch->recv_trailing_metadata && - batch->payload->recv_trailing_metadata - .recv_trailing_metadata_ready != nullptr; - }); - // If we generated the recv_trailing_metadata op internally via - // StartInternalRecvTrailingMetadata(), then there will be no pending batch. - if (pending == nullptr) { - GRPC_ERROR_UNREF(error); - return; - } - // Return metadata. - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - grpc_metadata_batch_move( - &retry_state->recv_trailing_metadata, - pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata); - // Add closure. - closures->Add(pending->batch->payload->recv_trailing_metadata - .recv_trailing_metadata_ready, - error, "recv_trailing_metadata_ready for pending batch"); - // Update bookkeeping. - pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready = - nullptr; - MaybeClearPendingBatch(pending); -} - -void ChannelData::RetryingCall::AddClosuresForDeferredRecvCallbacks( - SubchannelCallBatchData* batch_data, SubchannelCallRetryState* retry_state, - CallCombinerClosureList* closures) { - if (batch_data->batch.recv_trailing_metadata) { - // Add closure for deferred recv_initial_metadata_ready. - if (GPR_UNLIKELY(retry_state->recv_initial_metadata_ready_deferred_batch != - nullptr)) { - GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready, - InvokeRecvInitialMetadataCallback, - retry_state->recv_initial_metadata_ready_deferred_batch, - grpc_schedule_on_exec_ctx); - closures->Add(&retry_state->recv_initial_metadata_ready, - retry_state->recv_initial_metadata_error, - "resuming recv_initial_metadata_ready"); - retry_state->recv_initial_metadata_ready_deferred_batch = nullptr; - } - // Add closure for deferred recv_message_ready. - if (GPR_UNLIKELY(retry_state->recv_message_ready_deferred_batch != - nullptr)) { - GRPC_CLOSURE_INIT(&retry_state->recv_message_ready, - InvokeRecvMessageCallback, - retry_state->recv_message_ready_deferred_batch, - grpc_schedule_on_exec_ctx); - closures->Add(&retry_state->recv_message_ready, - retry_state->recv_message_error, - "resuming recv_message_ready"); - retry_state->recv_message_ready_deferred_batch = nullptr; - } - } -} - -bool ChannelData::RetryingCall::PendingBatchIsUnstarted( - PendingBatch* pending, SubchannelCallRetryState* retry_state) { - if (pending->batch == nullptr || pending->batch->on_complete == nullptr) { - return false; - } - if (pending->batch->send_initial_metadata && - !retry_state->started_send_initial_metadata) { - return true; - } - if (pending->batch->send_message && - retry_state->started_send_message_count < send_messages_.size()) { - return true; - } - if (pending->batch->send_trailing_metadata && - !retry_state->started_send_trailing_metadata) { - return true; - } - return false; -} - -void ChannelData::RetryingCall::AddClosuresToFailUnstartedPendingBatches( - SubchannelCallRetryState* retry_state, grpc_error* error, - CallCombinerClosureList* closures) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - if (PendingBatchIsUnstarted(pending, retry_state)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: failing unstarted pending batch at " - "index " - "%" PRIuPTR, - chand_, this, i); - } - closures->Add(pending->batch->on_complete, GRPC_ERROR_REF(error), - "failing on_complete for pending batch"); - pending->batch->on_complete = nullptr; - MaybeClearPendingBatch(pending); - } - } - GRPC_ERROR_UNREF(error); -} - -void ChannelData::RetryingCall::RunClosuresForCompletedCall( - SubchannelCallBatchData* batch_data, grpc_error* error) { - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - // Construct list of closures to execute. - CallCombinerClosureList closures; - // First, add closure for recv_trailing_metadata_ready. - AddClosureForRecvTrailingMetadataReady(batch_data, GRPC_ERROR_REF(error), - &closures); - // If there are deferred recv_initial_metadata_ready or recv_message_ready - // callbacks, add them to closures. - AddClosuresForDeferredRecvCallbacks(batch_data, retry_state, &closures); - // Add closures to fail any pending batches that have not yet been started. - AddClosuresToFailUnstartedPendingBatches(retry_state, GRPC_ERROR_REF(error), - &closures); - // Don't need batch_data anymore. - batch_data->Unref(); - // Schedule all of the closures identified above. - // Note: This will release the call combiner. - closures.RunClosures(call_combiner_); - GRPC_ERROR_UNREF(error); -} - -void ChannelData::RetryingCall::RecvTrailingMetadataReady(void* arg, - grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - RetryingCall* call = batch_data->call; - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: got recv_trailing_metadata_ready, error=%s", - call->chand_, call, grpc_error_string(error)); - } - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - retry_state->completed_recv_trailing_metadata = true; - // Get the call's status and check for server pushback metadata. - grpc_status_code status = GRPC_STATUS_OK; - grpc_mdelem* server_pushback_md = nullptr; - grpc_metadata_batch* md_batch = - batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata; - call->GetCallStatus(md_batch, GRPC_ERROR_REF(error), &status, - &server_pushback_md); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: call finished, status=%s", - call->chand_, call, grpc_status_code_to_string(status)); - } - // Check if we should retry. - if (call->MaybeRetry(batch_data, status, server_pushback_md)) { - // Unref batch_data for deferred recv_initial_metadata_ready or - // recv_message_ready callbacks, if any. - if (retry_state->recv_initial_metadata_ready_deferred_batch != nullptr) { - batch_data->Unref(); - GRPC_ERROR_UNREF(retry_state->recv_initial_metadata_error); - } - if (retry_state->recv_message_ready_deferred_batch != nullptr) { - batch_data->Unref(); - GRPC_ERROR_UNREF(retry_state->recv_message_error); - } - batch_data->Unref(); - return; - } - // Not retrying, so commit the call. - call->RetryCommit(retry_state); - // Run any necessary closures. - call->RunClosuresForCompletedCall(batch_data, GRPC_ERROR_REF(error)); -} - -// -// on_complete callback handling -// - -void ChannelData::RetryingCall::AddClosuresForCompletedPendingBatch( - SubchannelCallBatchData* batch_data, grpc_error* error, - CallCombinerClosureList* closures) { - PendingBatch* pending = PendingBatchFind( - "completed", [batch_data](grpc_transport_stream_op_batch* batch) { - // Match the pending batch with the same set of send ops as the - // subchannel batch we've just completed. - return batch->on_complete != nullptr && - batch_data->batch.send_initial_metadata == - batch->send_initial_metadata && - batch_data->batch.send_message == batch->send_message && - batch_data->batch.send_trailing_metadata == - batch->send_trailing_metadata; - }); - // If batch_data is a replay batch, then there will be no pending - // batch to complete. - if (pending == nullptr) { - GRPC_ERROR_UNREF(error); - return; - } - // Add closure. - closures->Add(pending->batch->on_complete, error, - "on_complete for pending batch"); - pending->batch->on_complete = nullptr; - MaybeClearPendingBatch(pending); -} - -void ChannelData::RetryingCall::AddClosuresForReplayOrPendingSendOps( - SubchannelCallBatchData* batch_data, SubchannelCallRetryState* retry_state, - CallCombinerClosureList* closures) { - bool have_pending_send_message_ops = - retry_state->started_send_message_count < send_messages_.size(); - bool have_pending_send_trailing_metadata_op = - seen_send_trailing_metadata_ && - !retry_state->started_send_trailing_metadata; - if (!have_pending_send_message_ops && - !have_pending_send_trailing_metadata_op) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - grpc_transport_stream_op_batch* batch = pending->batch; - if (batch == nullptr || pending->send_ops_cached) continue; - if (batch->send_message) have_pending_send_message_ops = true; - if (batch->send_trailing_metadata) { - have_pending_send_trailing_metadata_op = true; - } - } - } - if (have_pending_send_message_ops || have_pending_send_trailing_metadata_op) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: starting next batch for pending send " - "op(s)", - chand_, this); - } - GRPC_CLOSURE_INIT(&batch_data->batch.handler_private.closure, - StartRetriableSubchannelBatches, this, - grpc_schedule_on_exec_ctx); - closures->Add(&batch_data->batch.handler_private.closure, GRPC_ERROR_NONE, - "starting next batch for send_* op(s)"); - } -} - -void ChannelData::RetryingCall::OnComplete(void* arg, grpc_error* error) { - SubchannelCallBatchData* batch_data = - static_cast(arg); - RetryingCall* call = batch_data->call; - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: got on_complete, error=%s, batch=%s", - call->chand_, call, grpc_error_string(error), - grpc_transport_stream_op_batch_string(&batch_data->batch).c_str()); - } - SubchannelCallRetryState* retry_state = - static_cast( - batch_data->lb_call->GetParentData()); - // Update bookkeeping in retry_state. - if (batch_data->batch.send_initial_metadata) { - retry_state->completed_send_initial_metadata = true; - } - if (batch_data->batch.send_message) { - ++retry_state->completed_send_message_count; - } - if (batch_data->batch.send_trailing_metadata) { - retry_state->completed_send_trailing_metadata = true; - } - // If the call is committed, free cached data for send ops that we've just - // completed. - if (call->retry_committed_) { - call->FreeCachedSendOpDataForCompletedBatch(batch_data, retry_state); - } - // Construct list of closures to execute. - CallCombinerClosureList closures; - // If a retry was already dispatched, that means we saw - // recv_trailing_metadata before this, so we do nothing here. - // Otherwise, invoke the callback to return the result to the surface. - if (!retry_state->retry_dispatched) { - // Add closure for the completed pending batch, if any. - call->AddClosuresForCompletedPendingBatch(batch_data, GRPC_ERROR_REF(error), - &closures); - // If needed, add a callback to start any replay or pending send ops on - // the subchannel call. - if (!retry_state->completed_recv_trailing_metadata) { - call->AddClosuresForReplayOrPendingSendOps(batch_data, retry_state, - &closures); - } - } - // Track number of pending subchannel send batches and determine if this - // was the last one. - --call->num_pending_retriable_subchannel_send_batches_; - const bool last_send_batch_complete = - call->num_pending_retriable_subchannel_send_batches_ == 0; - // Don't need batch_data anymore. - batch_data->Unref(); - // Schedule all of the closures identified above. - // Note: This yeilds the call combiner. - closures.RunClosures(call->call_combiner_); - // If this was the last subchannel send batch, unref the call stack. - if (last_send_batch_complete) { - GRPC_CALL_STACK_UNREF(call->owning_call_, "subchannel_send_batches"); - } -} - -// -// subchannel batch construction -// - -void ChannelData::RetryingCall::StartBatchInCallCombiner( - void* arg, grpc_error* /*ignored*/) { - grpc_transport_stream_op_batch* batch = - static_cast(arg); - auto* lb_call = static_cast( - batch->handler_private.extra_arg); - // Note: This will release the call combiner. - lb_call->StartTransportStreamOpBatch(batch); -} - -void ChannelData::RetryingCall::AddClosureForSubchannelBatch( - grpc_transport_stream_op_batch* batch, CallCombinerClosureList* closures) { - batch->handler_private.extra_arg = lb_call_.get(); - GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner, - batch, grpc_schedule_on_exec_ctx); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: starting subchannel batch: %s", chand_, - this, grpc_transport_stream_op_batch_string(batch).c_str()); - } - closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE, - "start_subchannel_batch"); -} - -void ChannelData::RetryingCall::AddRetriableSendInitialMetadataOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - // Maps the number of retries to the corresponding metadata value slice. - const grpc_slice* retry_count_strings[] = {&GRPC_MDSTR_1, &GRPC_MDSTR_2, - &GRPC_MDSTR_3, &GRPC_MDSTR_4}; - // We need to make a copy of the metadata batch for each attempt, since - // the filters in the subchannel stack may modify this batch, and we don't - // want those modifications to be passed forward to subsequent attempts. - // - // If we've already completed one or more attempts, add the - // grpc-retry-attempts header. - retry_state->send_initial_metadata_storage = - static_cast(arena_->Alloc( - sizeof(grpc_linked_mdelem) * - (send_initial_metadata_.list.count + (num_attempts_completed_ > 0)))); - grpc_metadata_batch_copy(&send_initial_metadata_, - &retry_state->send_initial_metadata, - retry_state->send_initial_metadata_storage); - if (GPR_UNLIKELY(retry_state->send_initial_metadata.idx.named - .grpc_previous_rpc_attempts != nullptr)) { - grpc_metadata_batch_remove(&retry_state->send_initial_metadata, - GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS); - } - if (GPR_UNLIKELY(num_attempts_completed_ > 0)) { - grpc_mdelem retry_md = grpc_mdelem_create( - GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS, - *retry_count_strings[num_attempts_completed_ - 1], nullptr); - grpc_error* error = grpc_metadata_batch_add_tail( - &retry_state->send_initial_metadata, - &retry_state - ->send_initial_metadata_storage[send_initial_metadata_.list.count], - retry_md, GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS); - if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) { - gpr_log(GPR_ERROR, "error adding retry metadata: %s", - grpc_error_string(error)); - GPR_ASSERT(false); - } - } - retry_state->started_send_initial_metadata = true; - batch_data->batch.send_initial_metadata = true; - batch_data->batch.payload->send_initial_metadata.send_initial_metadata = - &retry_state->send_initial_metadata; - batch_data->batch.payload->send_initial_metadata.send_initial_metadata_flags = - send_initial_metadata_flags_; - batch_data->batch.payload->send_initial_metadata.peer_string = peer_string_; -} - -void ChannelData::RetryingCall::AddRetriableSendMessageOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: starting calld->send_messages[%" PRIuPTR - "]", - chand_, this, retry_state->started_send_message_count); - } - ByteStreamCache* cache = - send_messages_[retry_state->started_send_message_count]; - ++retry_state->started_send_message_count; - retry_state->send_message.Init(cache); - batch_data->batch.send_message = true; - batch_data->batch.payload->send_message.send_message.reset( - retry_state->send_message.get()); -} - -void ChannelData::RetryingCall::AddRetriableSendTrailingMetadataOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - // We need to make a copy of the metadata batch for each attempt, since - // the filters in the subchannel stack may modify this batch, and we don't - // want those modifications to be passed forward to subsequent attempts. - retry_state->send_trailing_metadata_storage = - static_cast(arena_->Alloc( - sizeof(grpc_linked_mdelem) * send_trailing_metadata_.list.count)); - grpc_metadata_batch_copy(&send_trailing_metadata_, - &retry_state->send_trailing_metadata, - retry_state->send_trailing_metadata_storage); - retry_state->started_send_trailing_metadata = true; - batch_data->batch.send_trailing_metadata = true; - batch_data->batch.payload->send_trailing_metadata.send_trailing_metadata = - &retry_state->send_trailing_metadata; -} - -void ChannelData::RetryingCall::AddRetriableRecvInitialMetadataOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - retry_state->started_recv_initial_metadata = true; - batch_data->batch.recv_initial_metadata = true; - grpc_metadata_batch_init(&retry_state->recv_initial_metadata); - batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata = - &retry_state->recv_initial_metadata; - batch_data->batch.payload->recv_initial_metadata.trailing_metadata_available = - &retry_state->trailing_metadata_available; - GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready, - RecvInitialMetadataReady, batch_data, - grpc_schedule_on_exec_ctx); - batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata_ready = - &retry_state->recv_initial_metadata_ready; -} - -void ChannelData::RetryingCall::AddRetriableRecvMessageOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - ++retry_state->started_recv_message_count; - batch_data->batch.recv_message = true; - batch_data->batch.payload->recv_message.recv_message = - &retry_state->recv_message; - GRPC_CLOSURE_INIT(&retry_state->recv_message_ready, RecvMessageReady, - batch_data, grpc_schedule_on_exec_ctx); - batch_data->batch.payload->recv_message.recv_message_ready = - &retry_state->recv_message_ready; -} - -void ChannelData::RetryingCall::AddRetriableRecvTrailingMetadataOp( - SubchannelCallRetryState* retry_state, - SubchannelCallBatchData* batch_data) { - retry_state->started_recv_trailing_metadata = true; - batch_data->batch.recv_trailing_metadata = true; - grpc_metadata_batch_init(&retry_state->recv_trailing_metadata); - batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata = - &retry_state->recv_trailing_metadata; - batch_data->batch.payload->recv_trailing_metadata.collect_stats = - &retry_state->collect_stats; - GRPC_CLOSURE_INIT(&retry_state->recv_trailing_metadata_ready, - RecvTrailingMetadataReady, batch_data, - grpc_schedule_on_exec_ctx); - batch_data->batch.payload->recv_trailing_metadata - .recv_trailing_metadata_ready = - &retry_state->recv_trailing_metadata_ready; -} - -void ChannelData::RetryingCall::StartInternalRecvTrailingMetadata() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log( - GPR_INFO, - "chand=%p retrying_call=%p: call failed but recv_trailing_metadata not " - "started; starting it internally", - chand_, this); - } - SubchannelCallRetryState* retry_state = - static_cast(lb_call_->GetParentData()); - // Create batch_data with 2 refs, since this batch will be unreffed twice: - // once for the recv_trailing_metadata_ready callback when the subchannel - // batch returns, and again when we actually get a recv_trailing_metadata - // op from the surface. - SubchannelCallBatchData* batch_data = - SubchannelCallBatchData::Create(this, 2, false /* set_on_complete */); - AddRetriableRecvTrailingMetadataOp(retry_state, batch_data); - retry_state->recv_trailing_metadata_internal_batch = batch_data; - // Note: This will release the call combiner. - lb_call_->StartTransportStreamOpBatch(&batch_data->batch); -} - -// If there are any cached send ops that need to be replayed on the -// current subchannel call, creates and returns a new subchannel batch -// to replay those ops. Otherwise, returns nullptr. -ChannelData::RetryingCall::SubchannelCallBatchData* -ChannelData::RetryingCall::MaybeCreateSubchannelBatchForReplay( - SubchannelCallRetryState* retry_state) { - SubchannelCallBatchData* replay_batch_data = nullptr; - // send_initial_metadata. - if (seen_send_initial_metadata_ && - !retry_state->started_send_initial_metadata && - !pending_send_initial_metadata_) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: replaying previously completed " - "send_initial_metadata op", - chand_, this); - } - replay_batch_data = - SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); - AddRetriableSendInitialMetadataOp(retry_state, replay_batch_data); - } - // send_message. - // Note that we can only have one send_message op in flight at a time. - if (retry_state->started_send_message_count < send_messages_.size() && - retry_state->started_send_message_count == - retry_state->completed_send_message_count && - !pending_send_message_) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: replaying previously completed " - "send_message op", - chand_, this); - } - if (replay_batch_data == nullptr) { - replay_batch_data = - SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); - } - AddRetriableSendMessageOp(retry_state, replay_batch_data); - } - // send_trailing_metadata. - // Note that we only add this op if we have no more send_message ops - // to start, since we can't send down any more send_message ops after - // send_trailing_metadata. - if (seen_send_trailing_metadata_ && - retry_state->started_send_message_count == send_messages_.size() && - !retry_state->started_send_trailing_metadata && - !pending_send_trailing_metadata_) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: replaying previously completed " - "send_trailing_metadata op", - chand_, this); - } - if (replay_batch_data == nullptr) { - replay_batch_data = - SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); - } - AddRetriableSendTrailingMetadataOp(retry_state, replay_batch_data); - } - return replay_batch_data; -} - -void ChannelData::RetryingCall::AddSubchannelBatchesForPendingBatches( - SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures) { - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - PendingBatch* pending = &pending_batches_[i]; - grpc_transport_stream_op_batch* batch = pending->batch; - if (batch == nullptr) continue; - // Skip any batch that either (a) has already been started on this - // subchannel call or (b) we can't start yet because we're still - // replaying send ops that need to be completed first. - // TODO(roth): Note that if any one op in the batch can't be sent - // yet due to ops that we're replaying, we don't start any of the ops - // in the batch. This is probably okay, but it could conceivably - // lead to increased latency in some cases -- e.g., we could delay - // starting a recv op due to it being in the same batch with a send - // op. If/when we revamp the callback protocol in - // transport_stream_op_batch, we may be able to fix this. - if (batch->send_initial_metadata && - retry_state->started_send_initial_metadata) { - continue; - } - if (batch->send_message && retry_state->completed_send_message_count < - retry_state->started_send_message_count) { - continue; - } - // Note that we only start send_trailing_metadata if we have no more - // send_message ops to start, since we can't send down any more - // send_message ops after send_trailing_metadata. - if (batch->send_trailing_metadata && - (retry_state->started_send_message_count + batch->send_message < - send_messages_.size() || - retry_state->started_send_trailing_metadata)) { - continue; - } - if (batch->recv_initial_metadata && - retry_state->started_recv_initial_metadata) { - continue; - } - if (batch->recv_message && retry_state->completed_recv_message_count < - retry_state->started_recv_message_count) { - continue; - } - if (batch->recv_trailing_metadata && - retry_state->started_recv_trailing_metadata) { - // If we previously completed a recv_trailing_metadata op - // initiated by StartInternalRecvTrailingMetadata(), use the - // result of that instead of trying to re-start this op. - if (GPR_UNLIKELY((retry_state->recv_trailing_metadata_internal_batch != - nullptr))) { - // If the batch completed, then trigger the completion callback - // directly, so that we return the previously returned results to - // the application. Otherwise, just unref the internally - // started subchannel batch, since we'll propagate the - // completion when it completes. - if (retry_state->completed_recv_trailing_metadata) { - // Batches containing recv_trailing_metadata always succeed. - closures->Add( - &retry_state->recv_trailing_metadata_ready, GRPC_ERROR_NONE, - "re-executing recv_trailing_metadata_ready to propagate " - "internally triggered result"); - } else { - retry_state->recv_trailing_metadata_internal_batch->Unref(); - } - retry_state->recv_trailing_metadata_internal_batch = nullptr; - } - continue; - } - // If we're not retrying, just send the batch as-is. - // TODO(roth): This condition doesn't seem exactly right -- maybe need a - // notion of "draining" once we've committed and are done replaying? - if (retry_policy_ == nullptr || retry_committed_) { - AddClosureForSubchannelBatch(batch, closures); - PendingBatchClear(pending); - continue; - } - // Create batch with the right number of callbacks. - const bool has_send_ops = batch->send_initial_metadata || - batch->send_message || - batch->send_trailing_metadata; - const int num_callbacks = has_send_ops + batch->recv_initial_metadata + - batch->recv_message + - batch->recv_trailing_metadata; - SubchannelCallBatchData* batch_data = SubchannelCallBatchData::Create( - this, num_callbacks, has_send_ops /* set_on_complete */); - // Cache send ops if needed. - MaybeCacheSendOpsForBatch(pending); - // send_initial_metadata. - if (batch->send_initial_metadata) { - AddRetriableSendInitialMetadataOp(retry_state, batch_data); - } - // send_message. - if (batch->send_message) { - AddRetriableSendMessageOp(retry_state, batch_data); - } - // send_trailing_metadata. - if (batch->send_trailing_metadata) { - AddRetriableSendTrailingMetadataOp(retry_state, batch_data); - } - // recv_initial_metadata. - if (batch->recv_initial_metadata) { - // recv_flags is only used on the server side. - GPR_ASSERT(batch->payload->recv_initial_metadata.recv_flags == nullptr); - AddRetriableRecvInitialMetadataOp(retry_state, batch_data); - } - // recv_message. - if (batch->recv_message) { - AddRetriableRecvMessageOp(retry_state, batch_data); - } - // recv_trailing_metadata. - if (batch->recv_trailing_metadata) { - AddRetriableRecvTrailingMetadataOp(retry_state, batch_data); - } - AddClosureForSubchannelBatch(&batch_data->batch, closures); - // Track number of pending subchannel send batches. - // If this is the first one, take a ref to the call stack. - if (batch->send_initial_metadata || batch->send_message || - batch->send_trailing_metadata) { - if (num_pending_retriable_subchannel_send_batches_ == 0) { - GRPC_CALL_STACK_REF(owning_call_, "subchannel_send_batches"); - } - ++num_pending_retriable_subchannel_send_batches_; - } - } -} - -void ChannelData::RetryingCall::StartRetriableSubchannelBatches( - void* arg, grpc_error* /*ignored*/) { - RetryingCall* call = static_cast(arg); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: constructing retriable batches", - call->chand_, call); - } - SubchannelCallRetryState* retry_state = - static_cast(call->lb_call_->GetParentData()); - // Construct list of closures to execute, one for each pending batch. - CallCombinerClosureList closures; - // Replay previously-returned send_* ops if needed. - SubchannelCallBatchData* replay_batch_data = - call->MaybeCreateSubchannelBatchForReplay(retry_state); - if (replay_batch_data != nullptr) { - call->AddClosureForSubchannelBatch(&replay_batch_data->batch, &closures); - // Track number of pending subchannel send batches. - // If this is the first one, take a ref to the call stack. - if (call->num_pending_retriable_subchannel_send_batches_ == 0) { - GRPC_CALL_STACK_REF(call->owning_call_, "subchannel_send_batches"); - } - ++call->num_pending_retriable_subchannel_send_batches_; - } - // Now add pending batches. - call->AddSubchannelBatchesForPendingBatches(retry_state, &closures); - // Start batches on subchannel call. - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p retrying_call=%p: starting %" PRIuPTR - " retriable batches on lb_call=%p", - call->chand_, call, closures.size(), call->lb_call_.get()); - } - // Note: This will yield the call combiner. - closures.RunClosures(call->call_combiner_); -} - -void ChannelData::RetryingCall::CreateLbCall(void* arg, grpc_error* /*error*/) { - auto* call = static_cast(arg); - const size_t parent_data_size = - call->enable_retries_ ? sizeof(SubchannelCallRetryState) : 0; - grpc_call_element_args args = {call->owning_call_, nullptr, - call->call_context_, call->path_, - call->call_start_time_, call->deadline_, - call->arena_, call->call_combiner_}; - call->lb_call_ = ChannelData::LoadBalancedCall::Create( - call->chand_, args, call->pollent_, parent_data_size); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { - gpr_log(GPR_INFO, "chand=%p retrying_call=%p: create lb_call=%p", - call->chand_, call, call->lb_call_.get()); - } - if (parent_data_size > 0) { - new (call->lb_call_->GetParentData()) - SubchannelCallRetryState(call->call_context_); - } - call->PendingBatchesResume(); -} - -// -// ChannelData::LoadBalancedCall::Metadata -// - -class ChannelData::LoadBalancedCall::Metadata - : public LoadBalancingPolicy::MetadataInterface { - public: - Metadata(LoadBalancedCall* lb_call, grpc_metadata_batch* batch) - : lb_call_(lb_call), batch_(batch) {} - - void Add(absl::string_view key, absl::string_view value) override { - grpc_linked_mdelem* linked_mdelem = static_cast( - lb_call_->arena_->Alloc(sizeof(grpc_linked_mdelem))); - linked_mdelem->md = grpc_mdelem_from_slices( - ExternallyManagedSlice(key.data(), key.size()), - ExternallyManagedSlice(value.data(), value.size())); - GPR_ASSERT(grpc_metadata_batch_link_tail(batch_, linked_mdelem) == - GRPC_ERROR_NONE); - } - - iterator begin() const override { - static_assert(sizeof(grpc_linked_mdelem*) <= sizeof(intptr_t), - "iterator size too large"); - return iterator( - this, reinterpret_cast(MaybeSkipEntry(batch_->list.head))); - } - iterator end() const override { - static_assert(sizeof(grpc_linked_mdelem*) <= sizeof(intptr_t), - "iterator size too large"); - return iterator(this, 0); - } - - iterator erase(iterator it) override { - grpc_linked_mdelem* linked_mdelem = - reinterpret_cast(GetIteratorHandle(it)); - intptr_t handle = reinterpret_cast(linked_mdelem->next); - grpc_metadata_batch_remove(batch_, linked_mdelem); - return iterator(this, handle); - } - - private: - grpc_linked_mdelem* MaybeSkipEntry(grpc_linked_mdelem* entry) const { - if (entry != nullptr && batch_->idx.named.path == entry) { - return entry->next; - } - return entry; - } - - intptr_t IteratorHandleNext(intptr_t handle) const override { - grpc_linked_mdelem* linked_mdelem = - reinterpret_cast(handle); - return reinterpret_cast(MaybeSkipEntry(linked_mdelem->next)); - } - - std::pair IteratorHandleGet( - intptr_t handle) const override { - grpc_linked_mdelem* linked_mdelem = - reinterpret_cast(handle); - return std::make_pair(StringViewFromSlice(GRPC_MDKEY(linked_mdelem->md)), - StringViewFromSlice(GRPC_MDVALUE(linked_mdelem->md))); - } - - LoadBalancedCall* lb_call_; - grpc_metadata_batch* batch_; -}; - -// -// ChannelData::LoadBalancedCall::LbCallState -// - -class ChannelData::LoadBalancedCall::LbCallState - : public LoadBalancingPolicy::CallState { - public: - explicit LbCallState(LoadBalancedCall* lb_call) : lb_call_(lb_call) {} - - void* Alloc(size_t size) override { return lb_call_->arena_->Alloc(size); } - - const LoadBalancingPolicy::BackendMetricData* GetBackendMetricData() - override { - if (lb_call_->backend_metric_data_ == nullptr) { - grpc_linked_mdelem* md = lb_call_->recv_trailing_metadata_->idx.named - .x_endpoint_load_metrics_bin; - if (md != nullptr) { - lb_call_->backend_metric_data_ = - ParseBackendMetricData(GRPC_MDVALUE(md->md), lb_call_->arena_); - } - } - return lb_call_->backend_metric_data_; - } - - absl::string_view ExperimentalGetCallAttribute(const char* key) override { - auto* service_config_call_data = static_cast( - lb_call_->call_context_[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value); - auto& call_attributes = service_config_call_data->call_attributes(); - auto it = call_attributes.find(key); - if (it == call_attributes.end()) return absl::string_view(); - return it->second; - } - - private: - LoadBalancedCall* lb_call_; -}; - -// -// LoadBalancedCall -// - -RefCountedPtr -ChannelData::LoadBalancedCall::Create(ChannelData* chand, - const grpc_call_element_args& args, - grpc_polling_entity* pollent, - size_t parent_data_size) { - const size_t alloc_size = - parent_data_size > 0 - ? (GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(LoadBalancedCall)) + - parent_data_size) - : sizeof(LoadBalancedCall); - auto* lb_call = static_cast(args.arena->Alloc(alloc_size)); - new (lb_call) LoadBalancedCall(chand, args, pollent); - return lb_call; -} - -ChannelData::LoadBalancedCall::LoadBalancedCall( - ChannelData* chand, const grpc_call_element_args& args, - grpc_polling_entity* pollent) - : refs_(1, GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace) - ? "LoadBalancedCall" - : nullptr), - chand_(chand), - path_(grpc_slice_ref_internal(args.path)), - call_start_time_(args.start_time), - deadline_(args.deadline), - arena_(args.arena), - owning_call_(args.call_stack), - call_combiner_(args.call_combiner), - call_context_(args.context), - pollent_(pollent) {} - -ChannelData::LoadBalancedCall::~LoadBalancedCall() { - grpc_slice_unref_internal(path_); - GRPC_ERROR_UNREF(cancel_error_); - if (backend_metric_data_ != nullptr) { - backend_metric_data_ - ->LoadBalancingPolicy::BackendMetricData::~BackendMetricData(); - } - // Make sure there are no remaining pending batches. - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - GPR_ASSERT(pending_batches_[i] == nullptr); - } -} - -RefCountedPtr -ChannelData::LoadBalancedCall::Ref() { - IncrementRefCount(); - return RefCountedPtr(this); -} - -RefCountedPtr ChannelData::LoadBalancedCall::Ref( - const DebugLocation& location, const char* reason) { - IncrementRefCount(location, reason); - return RefCountedPtr(this); -} - -void ChannelData::LoadBalancedCall::Unref() { - if (GPR_UNLIKELY(refs_.Unref())) { - this->~LoadBalancedCall(); - } -} - -void ChannelData::LoadBalancedCall::Unref(const DebugLocation& location, - const char* reason) { - if (GPR_UNLIKELY(refs_.Unref(location, reason))) { - this->~LoadBalancedCall(); - } -} - -void ChannelData::LoadBalancedCall::IncrementRefCount() { refs_.Ref(); } - -void ChannelData::LoadBalancedCall::IncrementRefCount( - const DebugLocation& location, const char* reason) { - refs_.Ref(location, reason); -} - -void* ChannelData::LoadBalancedCall::GetParentData() { - return reinterpret_cast(this) + - GPR_ROUND_UP_TO_ALIGNMENT_SIZE(sizeof(LoadBalancedCall)); -} - -size_t ChannelData::LoadBalancedCall::GetBatchIndex( - grpc_transport_stream_op_batch* batch) { - // Note: It is important the send_initial_metadata be the first entry - // here, since the code in pick_subchannel_locked() assumes it will be. - if (batch->send_initial_metadata) return 0; - if (batch->send_message) return 1; - if (batch->send_trailing_metadata) return 2; - if (batch->recv_initial_metadata) return 3; - if (batch->recv_message) return 4; - if (batch->recv_trailing_metadata) return 5; - GPR_UNREACHABLE_CODE(return (size_t)-1); -} - -// This is called via the call combiner, so access to calld is synchronized. -void ChannelData::LoadBalancedCall::PendingBatchesAdd( - grpc_transport_stream_op_batch* batch) { - const size_t idx = GetBatchIndex(batch); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - gpr_log(GPR_INFO, - "chand=%p lb_call=%p: adding pending batch at index %" PRIuPTR, - chand_, this, idx); - } - GPR_ASSERT(pending_batches_[idx] == nullptr); - pending_batches_[idx] = batch; -} - -// This is called via the call combiner, so access to calld is synchronized. -void ChannelData::LoadBalancedCall::FailPendingBatchInCallCombiner( - void* arg, grpc_error* error) { - grpc_transport_stream_op_batch* batch = - static_cast(arg); - auto* self = static_cast(batch->handler_private.extra_arg); - // Note: This will release the call combiner. - grpc_transport_stream_op_batch_finish_with_failure( - batch, GRPC_ERROR_REF(error), self->call_combiner_); -} - -// This is called via the call combiner, so access to calld is synchronized. -void ChannelData::LoadBalancedCall::PendingBatchesFail( - grpc_error* error, - YieldCallCombinerPredicate yield_call_combiner_predicate) { - GPR_ASSERT(error != GRPC_ERROR_NONE); - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { - size_t num_batches = 0; - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - if (pending_batches_[i] != nullptr) ++num_batches; - } - gpr_log(GPR_INFO, - "chand=%p lb_call=%p: failing %" PRIuPTR " pending batches: %s", - chand_, this, num_batches, grpc_error_string(error)); - } - CallCombinerClosureList closures; - for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { - grpc_transport_stream_op_batch*& batch = pending_batches_[i]; - if (batch != nullptr) { - batch->handler_private.extra_arg = this; - GRPC_CLOSURE_INIT(&batch->handler_private.closure, - FailPendingBatchInCallCombiner, batch, - grpc_schedule_on_exec_ctx); - closures.Add(&batch->handler_private.closure, GRPC_ERROR_REF(error), - "PendingBatchesFail"); - batch = nullptr; - } - } - if (yield_call_combiner_predicate(closures)) { - closures.RunClosures(call_combiner_); - } else { - closures.RunClosuresWithoutYielding(call_combiner_); - } - GRPC_ERROR_UNREF(error); -} - -// This is called via the call combiner, so access to calld is synchronized. -void ChannelData::LoadBalancedCall::ResumePendingBatchInCallCombiner( - void* arg, grpc_error* /*ignored*/) { - grpc_transport_stream_op_batch* batch = - static_cast(arg); - SubchannelCall* subchannel_call = - static_cast(batch->handler_private.extra_arg); - // Note: This will release the call combiner. - subchannel_call->StartTransportStreamOpBatch(batch); -} - -// This is called via the call combiner, so access to calld is synchronized. -void ChannelData::LoadBalancedCall::PendingBatchesResume() { +void ClientChannel::LoadBalancedCall::PendingBatchesResume() { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_call_trace)) { size_t num_batches = 0; for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { @@ -5103,7 +2686,7 @@ void ChannelData::LoadBalancedCall::PendingBatchesResume() { closures.RunClosures(call_combiner_); } -void ChannelData::LoadBalancedCall::StartTransportStreamOpBatch( +void ClientChannel::LoadBalancedCall::StartTransportStreamOpBatch( grpc_transport_stream_op_batch* batch) { // Intercept recv_trailing_metadata_ready for LB callback. if (batch->recv_trailing_metadata) { @@ -5184,7 +2767,7 @@ void ChannelData::LoadBalancedCall::StartTransportStreamOpBatch( } } -void ChannelData::LoadBalancedCall:: +void ClientChannel::LoadBalancedCall:: RecvTrailingMetadataReadyForLoadBalancingPolicy(void* arg, grpc_error* error) { auto* self = static_cast(arg); @@ -5224,7 +2807,7 @@ void ChannelData::LoadBalancedCall:: // TODO(roth): Consider not intercepting this callback unless we // actually need to, if this causes a performance problem. -void ChannelData::LoadBalancedCall:: +void ClientChannel::LoadBalancedCall:: InjectRecvTrailingMetadataReadyForLoadBalancingPolicy( grpc_transport_stream_op_batch* batch) { recv_trailing_metadata_ = @@ -5238,7 +2821,7 @@ void ChannelData::LoadBalancedCall:: &recv_trailing_metadata_ready_; } -void ChannelData::LoadBalancedCall::CreateSubchannelCall() { +void ClientChannel::LoadBalancedCall::CreateSubchannelCall() { SubchannelCall::Args call_args = { std::move(connected_subchannel_), pollent_, path_, call_start_time_, deadline_, arena_, @@ -5266,7 +2849,7 @@ void ChannelData::LoadBalancedCall::CreateSubchannelCall() { // because there may be multiple LB picks happening in parallel. // Instead, we will probably need to maintain a list in the CallData // object of pending LB picks to be cancelled when the closure runs. -class ChannelData::LoadBalancedCall::LbQueuedCallCanceller { +class ClientChannel::LoadBalancedCall::LbQueuedCallCanceller { public: explicit LbQueuedCallCanceller(RefCountedPtr lb_call) : lb_call_(std::move(lb_call)) { @@ -5305,7 +2888,7 @@ class ChannelData::LoadBalancedCall::LbQueuedCallCanceller { grpc_closure closure_; }; -void ChannelData::LoadBalancedCall::MaybeRemoveCallFromLbQueuedCallsLocked() { +void ClientChannel::LoadBalancedCall::MaybeRemoveCallFromLbQueuedCallsLocked() { if (!queued_pending_lb_pick_) return; if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p lb_call=%p: removing from queued picks list", @@ -5317,7 +2900,7 @@ void ChannelData::LoadBalancedCall::MaybeRemoveCallFromLbQueuedCallsLocked() { lb_call_canceller_ = nullptr; } -void ChannelData::LoadBalancedCall::MaybeAddCallToLbQueuedCallsLocked() { +void ClientChannel::LoadBalancedCall::MaybeAddCallToLbQueuedCallsLocked() { if (queued_pending_lb_pick_) return; if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { gpr_log(GPR_INFO, "chand=%p lb_call=%p: adding to queued picks list", @@ -5330,12 +2913,12 @@ void ChannelData::LoadBalancedCall::MaybeAddCallToLbQueuedCallsLocked() { lb_call_canceller_ = new LbQueuedCallCanceller(Ref()); } -void ChannelData::LoadBalancedCall::AsyncPickDone(grpc_error* error) { +void ClientChannel::LoadBalancedCall::AsyncPickDone(grpc_error* error) { GRPC_CLOSURE_INIT(&pick_closure_, PickDone, this, grpc_schedule_on_exec_ctx); ExecCtx::Run(DEBUG_LOCATION, &pick_closure_, error); } -void ChannelData::LoadBalancedCall::PickDone(void* arg, grpc_error* error) { +void ClientChannel::LoadBalancedCall::PickDone(void* arg, grpc_error* error) { auto* self = static_cast(arg); if (error != GRPC_ERROR_NONE) { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_routing_trace)) { @@ -5349,6 +2932,8 @@ void ChannelData::LoadBalancedCall::PickDone(void* arg, grpc_error* error) { self->CreateSubchannelCall(); } +namespace { + const char* PickResultTypeName( LoadBalancingPolicy::PickResult::ResultType type) { switch (type) { @@ -5362,8 +2947,10 @@ const char* PickResultTypeName( GPR_UNREACHABLE_CODE(return "UNKNOWN"); } -void ChannelData::LoadBalancedCall::PickSubchannel(void* arg, - grpc_error* error) { +} // namespace + +void ClientChannel::LoadBalancedCall::PickSubchannel(void* arg, + grpc_error* error) { auto* self = static_cast(arg); bool pick_complete; { @@ -5376,7 +2963,7 @@ void ChannelData::LoadBalancedCall::PickSubchannel(void* arg, } } -bool ChannelData::LoadBalancedCall::PickSubchannelLocked(grpc_error** error) { +bool ClientChannel::LoadBalancedCall::PickSubchannelLocked(grpc_error** error) { GPR_ASSERT(connected_subchannel_ == nullptr); GPR_ASSERT(subchannel_call_ == nullptr); // Grab initial metadata. @@ -5452,68 +3039,4 @@ bool ChannelData::LoadBalancedCall::PickSubchannelLocked(grpc_error** error) { } } -} // namespace } // namespace grpc_core - -/************************************************************************* - * EXPORTED SYMBOLS - */ - -using grpc_core::ChannelData; - -const grpc_channel_filter grpc_client_channel_filter = { - ChannelData::CallData::StartTransportStreamOpBatch, - ChannelData::StartTransportOp, - sizeof(ChannelData::CallData), - ChannelData::CallData::Init, - ChannelData::CallData::SetPollent, - ChannelData::CallData::Destroy, - sizeof(ChannelData), - ChannelData::Init, - ChannelData::Destroy, - ChannelData::GetChannelInfo, - "client-channel", -}; - -grpc_connectivity_state grpc_client_channel_check_connectivity_state( - grpc_channel_element* elem, int try_to_connect) { - auto* chand = static_cast(elem->channel_data); - return chand->CheckConnectivityState(try_to_connect); -} - -int grpc_client_channel_num_external_connectivity_watchers( - grpc_channel_element* elem) { - auto* chand = static_cast(elem->channel_data); - return chand->NumExternalConnectivityWatchers(); -} - -void grpc_client_channel_watch_connectivity_state( - grpc_channel_element* elem, grpc_polling_entity pollent, - grpc_connectivity_state* state, grpc_closure* on_complete, - grpc_closure* watcher_timer_init) { - auto* chand = static_cast(elem->channel_data); - if (state == nullptr) { - // Handle cancellation. - GPR_ASSERT(watcher_timer_init == nullptr); - chand->RemoveExternalConnectivityWatcher(on_complete, /*cancel=*/true); - return; - } - // Handle addition. - return chand->AddExternalConnectivityWatcher(pollent, state, on_complete, - watcher_timer_init); -} - -void grpc_client_channel_start_connectivity_watch( - grpc_channel_element* elem, grpc_connectivity_state initial_state, - grpc_core::OrphanablePtr - watcher) { - auto* chand = static_cast(elem->channel_data); - chand->AddConnectivityWatcher(initial_state, std::move(watcher)); -} - -void grpc_client_channel_stop_connectivity_watch( - grpc_channel_element* elem, - grpc_core::AsyncConnectivityStateWatcherInterface* watcher) { - auto* chand = static_cast(elem->channel_data); - chand->RemoveConnectivityWatcher(watcher); -} diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h index af011cafcc7..fe61e9fb10f 100644 --- a/src/core/ext/filters/client_channel/client_channel.h +++ b/src/core/ext/filters/client_channel/client_channel.h @@ -1,76 +1,496 @@ -/* - * - * Copyright 2015 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_H #define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_H #include -#include "src/core/ext/filters/client_channel/client_channel_channelz.h" +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/types/optional.h" + +#include + #include "src/core/ext/filters/client_channel/client_channel_factory.h" +#include "src/core/ext/filters/client_channel/config_selector.h" +#include "src/core/ext/filters/client_channel/dynamic_filters.h" +#include "src/core/ext/filters/client_channel/lb_policy.h" #include "src/core/ext/filters/client_channel/resolver.h" -#include "src/core/lib/channel/channel_stack.h" +#include "src/core/ext/filters/client_channel/resolver_result_parsing.h" +#include "src/core/ext/filters/client_channel/retry_throttle.h" +#include "src/core/ext/filters/client_channel/service_config.h" +#include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/ext/filters/client_channel/subchannel_pool_interface.h" +#include "src/core/lib/gprpp/sync.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/polling_entity.h" +#include "src/core/lib/iomgr/work_serializer.h" +#include "src/core/lib/surface/channel.h" +#include "src/core/lib/transport/connectivity_state.h" + +// +// Client channel filter +// + +// A client channel is a channel that begins disconnected, and can connect +// to some endpoint on demand. If that endpoint disconnects, it will be +// connected to again later. +// +// Calls on a disconnected client channel are queued until a connection is +// established. // Channel arg key for server URI string. #define GRPC_ARG_SERVER_URI "grpc.server_uri" -/* A client channel is a channel that begins disconnected, and can connect - to some endpoint on demand. If that endpoint disconnects, it will be - connected to again later. +// Channel arg containing a pointer to the ClientChannel object. +#define GRPC_ARG_CLIENT_CHANNEL "grpc.internal.client_channel" + +// Channel arg containing a pointer to the ServiceConfig object. +#define GRPC_ARG_SERVICE_CONFIG_OBJ "grpc.internal.service_config_obj" + +// Max number of batches that can be pending on a call at any given +// time. This includes one batch for each of the following ops: +// recv_initial_metadata +// send_initial_metadata +// recv_message +// send_message +// recv_trailing_metadata +// send_trailing_metadata +#define MAX_PENDING_BATCHES 6 + +namespace grpc_core { + +class ClientChannel { + public: + static const grpc_channel_filter kFilterVtable; + + class LoadBalancedCall; + + // Returns the ClientChannel object from channel, or null if channel + // is not a client channel. + static ClientChannel* GetFromChannel(grpc_channel* channel); + + grpc_connectivity_state CheckConnectivityState(bool try_to_connect); + + // Starts a one-time connectivity state watch. When the channel's state + // becomes different from *state, sets *state to the new state and + // schedules on_complete. The watcher_timer_init callback is invoked as + // soon as the watch is actually started (i.e., after hopping into the + // client channel combiner). I/O will be serviced via pollent. + // + // This is intended to be used when starting a watch from outside of C-core + // via grpc_channel_watch_connectivity_state(). It should not be used + // by other callers. + void AddExternalConnectivityWatcher(grpc_polling_entity pollent, + grpc_connectivity_state* state, + grpc_closure* on_complete, + grpc_closure* watcher_timer_init) { + new ExternalConnectivityWatcher(this, pollent, state, on_complete, + watcher_timer_init); + } + + // Cancels a pending external watcher previously added by + // AddExternalConnectivityWatcher(). + void CancelExternalConnectivityWatcher(grpc_closure* on_complete) { + ExternalConnectivityWatcher::RemoveWatcherFromExternalWatchersMap( + this, on_complete, /*cancel=*/true); + } + + int NumExternalConnectivityWatchers() const { + MutexLock lock(&external_watchers_mu_); + return static_cast(external_watchers_.size()); + } + + // Starts and stops a connectivity watch. The watcher will be initially + // notified as soon as the state changes from initial_state and then on + // every subsequent state change until either the watch is stopped or + // it is notified that the state has changed to SHUTDOWN. + // + // This is intended to be used when starting watches from code inside of + // C-core (e.g., for a nested control plane channel for things like xds). + void AddConnectivityWatcher( + grpc_connectivity_state initial_state, + OrphanablePtr watcher); + void RemoveConnectivityWatcher( + AsyncConnectivityStateWatcherInterface* watcher); + + RefCountedPtr CreateLoadBalancedCall( + const grpc_call_element_args& args, grpc_polling_entity* pollent, + size_t parent_data_size); + + private: + class CallData; + class ResolverResultHandler; + class SubchannelWrapper; + class ClientChannelControlHelper; + class ConnectivityWatcherAdder; + class ConnectivityWatcherRemover; + + // Represents a pending connectivity callback from an external caller + // via grpc_client_channel_watch_connectivity_state(). + class ExternalConnectivityWatcher : public ConnectivityStateWatcherInterface { + public: + ExternalConnectivityWatcher(ClientChannel* chand, + grpc_polling_entity pollent, + grpc_connectivity_state* state, + grpc_closure* on_complete, + grpc_closure* watcher_timer_init); + + ~ExternalConnectivityWatcher() override; + + // Removes the watcher from the external_watchers_ map. + static void RemoveWatcherFromExternalWatchersMap(ClientChannel* chand, + grpc_closure* on_complete, + bool cancel); + + void Notify(grpc_connectivity_state state, + const absl::Status& /* status */) override; + + void Cancel(); + + private: + // Adds the watcher to state_tracker_. Consumes the ref that is passed to it + // from Start(). + void AddWatcherLocked(); + void RemoveWatcherLocked(); + + ClientChannel* chand_; + grpc_polling_entity pollent_; + grpc_connectivity_state initial_state_; + grpc_connectivity_state* state_; + grpc_closure* on_complete_; + grpc_closure* watcher_timer_init_; + Atomic done_{false}; + }; + + struct ResolverQueuedCall { + grpc_call_element* elem; + ResolverQueuedCall* next = nullptr; + }; + struct LbQueuedCall { + LoadBalancedCall* lb_call; + LbQueuedCall* next = nullptr; + }; + + ClientChannel(grpc_channel_element_args* args, grpc_error** error); + ~ClientChannel(); + + // Filter vtable functions. + static grpc_error* Init(grpc_channel_element* elem, + grpc_channel_element_args* args); + static void Destroy(grpc_channel_element* elem); + static void StartTransportOp(grpc_channel_element* elem, + grpc_transport_op* op); + static void GetChannelInfo(grpc_channel_element* elem, + const grpc_channel_info* info); + + // Note: Does NOT return a new ref. + grpc_error* disconnect_error() const { + return disconnect_error_.Load(MemoryOrder::ACQUIRE); + } + + // Note: All methods with "Locked" suffix must be invoked from within + // work_serializer_. - Calls on a disconnected client channel are queued until a connection is - established. */ + void OnResolverResultChangedLocked(Resolver::Result result); + void OnResolverErrorLocked(grpc_error* error); -extern const grpc_channel_filter grpc_client_channel_filter; + void CreateOrUpdateLbPolicyLocked( + RefCountedPtr lb_policy_config, + Resolver::Result result); + OrphanablePtr CreateLbPolicyLocked( + const grpc_channel_args& args); -grpc_connectivity_state grpc_client_channel_check_connectivity_state( - grpc_channel_element* elem, int try_to_connect); + void UpdateStateAndPickerLocked( + grpc_connectivity_state state, const absl::Status& status, + const char* reason, + std::unique_ptr picker); -int grpc_client_channel_num_external_connectivity_watchers( - grpc_channel_element* elem); + void UpdateServiceConfigInControlPlaneLocked( + RefCountedPtr service_config, + RefCountedPtr config_selector, + const internal::ClientChannelGlobalParsedConfig* parsed_service_config, + const char* lb_policy_name); + + void UpdateServiceConfigInDataPlaneLocked(); + + void CreateResolverLocked(); + void DestroyResolverAndLbPolicyLocked(); + + grpc_error* DoPingLocked(grpc_transport_op* op); + + void StartTransportOpLocked(grpc_transport_op* op); + + void TryToConnectLocked(); + + // These methods all require holding resolution_mu_. + void AddResolverQueuedCall(ResolverQueuedCall* call, + grpc_polling_entity* pollent) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(resolution_mu_); + void RemoveResolverQueuedCall(ResolverQueuedCall* to_remove, + grpc_polling_entity* pollent) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(resolution_mu_); + + // These methods all require holding data_plane_mu_. + void AddLbQueuedCall(LbQueuedCall* call, grpc_polling_entity* pollent) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); + void RemoveLbQueuedCall(LbQueuedCall* to_remove, grpc_polling_entity* pollent) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); + RefCountedPtr GetConnectedSubchannelInDataPlane( + SubchannelInterface* subchannel) const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(data_plane_mu_); + + // + // Fields set at construction and never modified. + // + const bool deadline_checking_enabled_; + const bool enable_retries_; + const size_t per_rpc_retry_buffer_size_; + grpc_channel_stack* owning_stack_; + ClientChannelFactory* client_channel_factory_; + const grpc_channel_args* channel_args_; + RefCountedPtr default_service_config_; + std::string server_name_; + UniquePtr target_uri_; + channelz::ChannelNode* channelz_node_; + + // + // Fields related to name resolution. Guarded by resolution_mu_. + // + mutable Mutex resolution_mu_; + // Linked list of calls queued waiting for resolver result. + ResolverQueuedCall* resolver_queued_calls_ ABSL_GUARDED_BY(resolution_mu_) = + nullptr; + // Data from service config. + grpc_error* resolver_transient_failure_error_ + ABSL_GUARDED_BY(resolution_mu_) = GRPC_ERROR_NONE; + bool received_service_config_data_ ABSL_GUARDED_BY(resolution_mu_) = false; + RefCountedPtr service_config_ ABSL_GUARDED_BY(resolution_mu_); + RefCountedPtr config_selector_ + ABSL_GUARDED_BY(resolution_mu_); + RefCountedPtr dynamic_filters_ + ABSL_GUARDED_BY(resolution_mu_); + + // + // Fields used in the data plane. Guarded by data_plane_mu_. + // + mutable Mutex data_plane_mu_; + std::unique_ptr picker_ + ABSL_GUARDED_BY(data_plane_mu_); + // Linked list of calls queued waiting for LB pick. + LbQueuedCall* lb_queued_calls_ ABSL_GUARDED_BY(data_plane_mu_) = nullptr; + + // + // Fields used in the control plane. Guarded by work_serializer. + // + std::shared_ptr work_serializer_; + grpc_pollset_set* interested_parties_; + ConnectivityStateTracker state_tracker_; + OrphanablePtr resolver_; + bool previous_resolution_contained_addresses_ = false; + RefCountedPtr saved_service_config_; + RefCountedPtr saved_config_selector_; + absl::optional health_check_service_name_; + OrphanablePtr lb_policy_; + RefCountedPtr subchannel_pool_; + // The number of SubchannelWrapper instances referencing a given Subchannel. + std::map subchannel_refcount_map_; + // The set of SubchannelWrappers that currently exist. + // No need to hold a ref, since the map is updated in the control-plane + // work_serializer when the SubchannelWrappers are created and destroyed. + std::set subchannel_wrappers_; + // Pending ConnectedSubchannel updates for each SubchannelWrapper. + // Updates are queued here in the control plane work_serializer and then + // applied in the data plane mutex when the picker is updated. + std::map, RefCountedPtr> + pending_subchannel_updates_; + int keepalive_time_ = -1; + + // + // Fields accessed from both data plane mutex and control plane + // work_serializer. + // + Atomic disconnect_error_; + + // + // Fields guarded by a mutex, since they need to be accessed + // synchronously via get_channel_info(). + // + Mutex info_mu_; + UniquePtr info_lb_policy_name_ ABSL_GUARDED_BY(info_mu_); + UniquePtr info_service_config_json_ ABSL_GUARDED_BY(info_mu_); + + // + // Fields guarded by a mutex, since they need to be accessed + // synchronously via grpc_channel_num_external_connectivity_watchers(). + // + mutable Mutex external_watchers_mu_; + std::map> + external_watchers_ ABSL_GUARDED_BY(external_watchers_mu_); +}; -// Starts a one-time connectivity state watch. When the channel's state -// becomes different from *state, sets *state to the new state and -// schedules on_complete. The watcher_timer_init callback is invoked as -// soon as the watch is actually started (i.e., after hopping into the -// client channel combiner). I/O will be serviced via pollent. // -// This is intended to be used when starting a watch from outside of C-core -// via grpc_channel_watch_connectivity_state(). It should not be used -// by other callers. -void grpc_client_channel_watch_connectivity_state( - grpc_channel_element* elem, grpc_polling_entity pollent, - grpc_connectivity_state* state, grpc_closure* on_complete, - grpc_closure* watcher_timer_init); - -// Starts and stops a connectivity watch. The watcher will be initially -// notified as soon as the state changes from initial_state and then on -// every subsequent state change until either the watch is stopped or -// it is notified that the state has changed to SHUTDOWN. +// ClientChannel::LoadBalancedCall // -// This is intended to be used when starting watches from code inside of -// C-core (e.g., for a nested control plane channel for things like xds). -void grpc_client_channel_start_connectivity_watch( - grpc_channel_element* elem, grpc_connectivity_state initial_state, - grpc_core::OrphanablePtr - watcher); -void grpc_client_channel_stop_connectivity_watch( - grpc_channel_element* elem, - grpc_core::AsyncConnectivityStateWatcherInterface* watcher); + +// This object is ref-counted, but it cannot inherit from RefCounted<>, +// because it is allocated on the arena and can't free its memory when +// its refcount goes to zero. So instead, it manually implements the +// same API as RefCounted<>, so that it can be used with RefCountedPtr<>. +class ClientChannel::LoadBalancedCall { + public: + LoadBalancedCall(ClientChannel* chand, const grpc_call_element_args& args, + grpc_polling_entity* pollent); + ~LoadBalancedCall(); + + // Interface of RefCounted<>. + RefCountedPtr Ref() GRPC_MUST_USE_RESULT; + RefCountedPtr Ref(const DebugLocation& location, + const char* reason) GRPC_MUST_USE_RESULT; + // When refcount drops to 0, destroys itself and the associated call stack, + // but does NOT free the memory because it's in the call arena. + void Unref(); + void Unref(const DebugLocation& location, const char* reason); + + void* GetParentData(); + + void StartTransportStreamOpBatch(grpc_transport_stream_op_batch* batch); + + // Invoked by channel for queued LB picks when the picker is updated. + static void PickSubchannel(void* arg, grpc_error* error); + // Helper function for performing an LB pick while holding the data plane + // mutex. Returns true if the pick is complete, in which case the caller + // must invoke PickDone() or AsyncPickDone() with the returned error. + bool PickSubchannelLocked(grpc_error** error) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::data_plane_mu_); + // Schedules a callback to process the completed pick. The callback + // will not run until after this method returns. + void AsyncPickDone(grpc_error* error); + + RefCountedPtr subchannel_call() const { + return subchannel_call_; + } + + private: + // Allow RefCountedPtr<> to access IncrementRefCount(). + template + friend class ::grpc_core::RefCountedPtr; + + class LbQueuedCallCanceller; + class Metadata; + class LbCallState; + + // Interface of RefCounted<>. + void IncrementRefCount(); + void IncrementRefCount(const DebugLocation& location, const char* reason); + + // Returns the index into pending_batches_ to be used for batch. + static size_t GetBatchIndex(grpc_transport_stream_op_batch* batch); + void PendingBatchesAdd(grpc_transport_stream_op_batch* batch); + static void FailPendingBatchInCallCombiner(void* arg, grpc_error* error); + // A predicate type and some useful implementations for PendingBatchesFail(). + typedef bool (*YieldCallCombinerPredicate)( + const CallCombinerClosureList& closures); + static bool YieldCallCombiner(const CallCombinerClosureList& /*closures*/) { + return true; + } + static bool NoYieldCallCombiner(const CallCombinerClosureList& /*closures*/) { + return false; + } + static bool YieldCallCombinerIfPendingBatchesFound( + const CallCombinerClosureList& closures) { + return closures.size() > 0; + } + // Fails all pending batches. + // If yield_call_combiner_predicate returns true, assumes responsibility for + // yielding the call combiner. + void PendingBatchesFail( + grpc_error* error, + YieldCallCombinerPredicate yield_call_combiner_predicate); + static void ResumePendingBatchInCallCombiner(void* arg, grpc_error* ignored); + // Resumes all pending batches on subchannel_call_. + void PendingBatchesResume(); + + static void RecvTrailingMetadataReadyForLoadBalancingPolicy( + void* arg, grpc_error* error); + void InjectRecvTrailingMetadataReadyForLoadBalancingPolicy( + grpc_transport_stream_op_batch* batch); + + void CreateSubchannelCall(); + // Invoked when a pick is completed, on both success or failure. + static void PickDone(void* arg, grpc_error* error); + // Removes the call from the channel's list of queued picks if present. + void MaybeRemoveCallFromLbQueuedCallsLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::data_plane_mu_); + // Adds the call to the channel's list of queued picks if not already present. + void MaybeAddCallToLbQueuedCallsLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&ClientChannel::data_plane_mu_); + + RefCount refs_; + + ClientChannel* chand_; + + // TODO(roth): Instead of duplicating these fields in every filter + // that uses any one of them, we should store them in the call + // context. This will save per-call memory overhead. + grpc_slice path_; // Request path. + gpr_cycle_counter call_start_time_; + grpc_millis deadline_; + Arena* arena_; + grpc_call_stack* owning_call_; + CallCombiner* call_combiner_; + grpc_call_context_element* call_context_; + + // Set when we get a cancel_stream op. + grpc_error* cancel_error_ = GRPC_ERROR_NONE; + + grpc_polling_entity* pollent_ = nullptr; + + grpc_closure pick_closure_; + + // Accessed while holding ClientChannel::data_plane_mu_. + ClientChannel::LbQueuedCall queued_call_; + bool queued_pending_lb_pick_ = false; + const LoadBalancingPolicy::BackendMetricData* backend_metric_data_ = nullptr; + RefCountedPtr connected_subchannel_; + std::function + lb_recv_trailing_metadata_ready_; + LbQueuedCallCanceller* lb_call_canceller_ = nullptr; + + RefCountedPtr subchannel_call_; + + // For intercepting recv_trailing_metadata_ready for the LB policy. + grpc_metadata_batch* recv_trailing_metadata_ = nullptr; + grpc_closure recv_trailing_metadata_ready_; + grpc_closure* original_recv_trailing_metadata_ready_ = nullptr; + + // Batches are added to this list when received from above. + // They are removed when we are done handling the batch (i.e., when + // either we have invoked all of the batch's callbacks or we have + // passed the batch down to the subchannel call and are not + // intercepting any of its callbacks). + grpc_transport_stream_op_batch* pending_batches_[MAX_PENDING_BATCHES] = {}; +}; + +} // namespace grpc_core #endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_CLIENT_CHANNEL_H diff --git a/src/core/ext/filters/client_channel/client_channel_plugin.cc b/src/core/ext/filters/client_channel/client_channel_plugin.cc index 5690545cbb7..1d33d25b491 100644 --- a/src/core/ext/filters/client_channel/client_channel_plugin.cc +++ b/src/core/ext/filters/client_channel/client_channel_plugin.cc @@ -34,6 +34,7 @@ #include "src/core/ext/filters/client_channel/proxy_mapper_registry.h" #include "src/core/ext/filters/client_channel/resolver_registry.h" #include "src/core/ext/filters/client_channel/resolver_result_parsing.h" +#include "src/core/ext/filters/client_channel/retry_service_config.h" #include "src/core/ext/filters/client_channel/retry_throttle.h" #include "src/core/ext/filters/client_channel/service_config_parser.h" #include "src/core/lib/surface/channel_init.h" @@ -46,6 +47,7 @@ static bool append_filter(grpc_channel_stack_builder* builder, void* arg) { void grpc_client_channel_init(void) { grpc_core::ServiceConfigParser::Init(); grpc_core::internal::ClientChannelServiceConfigParser::Register(); + grpc_core::internal::RetryServiceConfigParser::Register(); grpc_core::LoadBalancingPolicyRegistry::Builder::InitRegistry(); grpc_core::ResolverRegistry::Builder::InitRegistry(); grpc_core::internal::ServerRetryThrottleMap::Init(); @@ -54,7 +56,8 @@ void grpc_client_channel_init(void) { grpc_core::GlobalSubchannelPool::Init(); grpc_channel_init_register_stage( GRPC_CLIENT_CHANNEL, GRPC_CHANNEL_INIT_BUILTIN_PRIORITY, append_filter, - const_cast(&grpc_client_channel_filter)); + const_cast( + &grpc_core::ClientChannel::kFilterVtable)); grpc_http_connect_register_handshaker_factory(); grpc_client_channel_global_init_backup_polling(); } diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index 3b6e59b5f5a..0cf36f9e305 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -1420,13 +1420,12 @@ void GrpcLb::UpdateLocked(UpdateArgs args) { // Start watching the channel's connectivity state. If the channel // goes into state TRANSIENT_FAILURE before the timer fires, we go into // fallback mode even if the fallback timeout has not elapsed. - grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element( - grpc_channel_get_channel_stack(lb_channel_)); - GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + ClientChannel* client_channel = ClientChannel::GetFromChannel(lb_channel_); + GPR_ASSERT(client_channel != nullptr); // Ref held by callback. watcher_ = new StateWatcher(Ref(DEBUG_LOCATION, "StateWatcher")); - grpc_client_channel_start_connectivity_watch( - client_channel_elem, GRPC_CHANNEL_IDLE, + client_channel->AddConnectivityWatcher( + GRPC_CHANNEL_IDLE, OrphanablePtr(watcher_)); // Start balancer call. StartBalancerCallLocked(); @@ -1490,10 +1489,9 @@ void GrpcLb::ProcessAddressesAndChannelArgsLocked( } void GrpcLb::CancelBalancerChannelConnectivityWatchLocked() { - grpc_channel_element* client_channel_elem = grpc_channel_stack_last_element( - grpc_channel_get_channel_stack(lb_channel_)); - GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); - grpc_client_channel_stop_connectivity_watch(client_channel_elem, watcher_); + ClientChannel* client_channel = ClientChannel::GetFromChannel(lb_channel_); + GPR_ASSERT(client_channel != nullptr); + client_channel->RemoveConnectivityWatcher(watcher_); } // diff --git a/src/core/ext/filters/client_channel/resolver_result_parsing.cc b/src/core/ext/filters/client_channel/resolver_result_parsing.cc index 500c7408a15..96aff1400ac 100644 --- a/src/core/ext/filters/client_channel/resolver_result_parsing.cc +++ b/src/core/ext/filters/client_channel/resolver_result_parsing.cc @@ -1,20 +1,18 @@ -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ +// +// Copyright 2018 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #include @@ -63,191 +61,6 @@ void ClientChannelServiceConfigParser::Register() { namespace { -std::unique_ptr ParseRetryPolicy( - const Json& json, grpc_error** error) { - GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE); - auto retry_policy = - absl::make_unique(); - if (json.type() != Json::Type::OBJECT) { - *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryPolicy error:should be of type object"); - return nullptr; - } - std::vector error_list; - // Parse maxAttempts. - auto it = json.object_value().find("maxAttempts"); - if (it != json.object_value().end()) { - if (it->second.type() != Json::Type::NUMBER) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:maxAttempts error:should be of type number")); - } else { - retry_policy->max_attempts = - gpr_parse_nonnegative_int(it->second.string_value().c_str()); - if (retry_policy->max_attempts <= 1) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:maxAttempts error:should be at least 2")); - } else if (retry_policy->max_attempts > MAX_MAX_RETRY_ATTEMPTS) { - gpr_log(GPR_ERROR, - "service config: clamped retryPolicy.maxAttempts at %d", - MAX_MAX_RETRY_ATTEMPTS); - retry_policy->max_attempts = MAX_MAX_RETRY_ATTEMPTS; - } - } - } - // Parse initialBackoff. - if (ParseJsonObjectFieldAsDuration(json.object_value(), "initialBackoff", - &retry_policy->initial_backoff, - &error_list) && - retry_policy->initial_backoff == 0) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:initialBackoff error:must be greater than 0")); - } - // Parse maxBackoff. - if (ParseJsonObjectFieldAsDuration(json.object_value(), "maxBackoff", - &retry_policy->max_backoff, &error_list) && - retry_policy->max_backoff == 0) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:maxBackoff error:should be greater than 0")); - } - // Parse backoffMultiplier. - it = json.object_value().find("backoffMultiplier"); - if (it != json.object_value().end()) { - if (it->second.type() != Json::Type::NUMBER) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:backoffMultiplier error:should be of type number")); - } else { - if (sscanf(it->second.string_value().c_str(), "%f", - &retry_policy->backoff_multiplier) != 1) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:backoffMultiplier error:failed to parse")); - } else if (retry_policy->backoff_multiplier <= 0) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:backoffMultiplier error:should be greater than 0")); - } - } - } - // Parse retryableStatusCodes. - it = json.object_value().find("retryableStatusCodes"); - if (it != json.object_value().end()) { - if (it->second.type() != Json::Type::ARRAY) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryableStatusCodes error:should be of type array")); - } else { - for (const Json& element : it->second.array_value()) { - if (element.type() != Json::Type::STRING) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryableStatusCodes error:status codes should be of type " - "string")); - continue; - } - grpc_status_code status; - if (!grpc_status_code_from_string(element.string_value().c_str(), - &status)) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryableStatusCodes error:failed to parse status code")); - continue; - } - retry_policy->retryable_status_codes.Add(status); - } - if (retry_policy->retryable_status_codes.Empty()) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryableStatusCodes error:should be non-empty")); - }; - } - } - // Make sure required fields are set. - if (error_list.empty()) { - if (retry_policy->max_attempts == 0 || retry_policy->initial_backoff == 0 || - retry_policy->max_backoff == 0 || - retry_policy->backoff_multiplier == 0 || - retry_policy->retryable_status_codes.Empty()) { - *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryPolicy error:Missing required field(s)"); - return nullptr; - } - } - *error = GRPC_ERROR_CREATE_FROM_VECTOR("retryPolicy", &error_list); - return *error == GRPC_ERROR_NONE ? std::move(retry_policy) : nullptr; -} - -grpc_error* ParseRetryThrottling( - const Json& json, - ClientChannelGlobalParsedConfig::RetryThrottling* retry_throttling) { - if (json.type() != Json::Type::OBJECT) { - return GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling error:Type should be object"); - } - std::vector error_list; - // Parse maxTokens. - auto it = json.object_value().find("maxTokens"); - if (it == json.object_value().end()) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:maxTokens error:Not found")); - } else if (it->second.type() != Json::Type::NUMBER) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:maxTokens error:Type should be " - "number")); - } else { - retry_throttling->max_milli_tokens = - gpr_parse_nonnegative_int(it->second.string_value().c_str()) * 1000; - if (retry_throttling->max_milli_tokens <= 0) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:maxTokens error:should be " - "greater than zero")); - } - } - // Parse tokenRatio. - it = json.object_value().find("tokenRatio"); - if (it == json.object_value().end()) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:tokenRatio error:Not found")); - } else if (it->second.type() != Json::Type::NUMBER) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:tokenRatio error:type should be " - "number")); - } else { - // We support up to 3 decimal digits. - size_t whole_len = it->second.string_value().size(); - const char* value = it->second.string_value().c_str(); - uint32_t multiplier = 1; - uint32_t decimal_value = 0; - const char* decimal_point = strchr(value, '.'); - if (decimal_point != nullptr) { - whole_len = static_cast(decimal_point - value); - multiplier = 1000; - size_t decimal_len = strlen(decimal_point + 1); - if (decimal_len > 3) decimal_len = 3; - if (!gpr_parse_bytes_to_uint32(decimal_point + 1, decimal_len, - &decimal_value)) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:tokenRatio error:Failed " - "parsing")); - return GRPC_ERROR_CREATE_FROM_VECTOR("retryPolicy", &error_list); - } - uint32_t decimal_multiplier = 1; - for (size_t i = 0; i < (3 - decimal_len); ++i) { - decimal_multiplier *= 10; - } - decimal_value *= decimal_multiplier; - } - uint32_t whole_value; - if (!gpr_parse_bytes_to_uint32(value, whole_len, &whole_value)) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:tokenRatio error:Failed " - "parsing")); - return GRPC_ERROR_CREATE_FROM_VECTOR("retryPolicy", &error_list); - } - retry_throttling->milli_token_ratio = - static_cast((whole_value * multiplier) + decimal_value); - if (retry_throttling->milli_token_ratio <= 0) { - error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( - "field:retryThrottling field:tokenRatio error:value should " - "be greater than 0")); - } - } - return GRPC_ERROR_CREATE_FROM_VECTOR("retryPolicy", &error_list); -} - absl::optional ParseHealthCheckConfig(const Json& field, grpc_error** error) { GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE); @@ -319,19 +132,6 @@ ClientChannelServiceConfigParser::ParseGlobalParams( } } } - // Parse retry throttling. - absl::optional - retry_throttling; - it = json.object_value().find("retryThrottling"); - if (it != json.object_value().end()) { - ClientChannelGlobalParsedConfig::RetryThrottling data; - grpc_error* parsing_error = ParseRetryThrottling(it->second, &data); - if (parsing_error != GRPC_ERROR_NONE) { - error_list.push_back(parsing_error); - } else { - retry_throttling.emplace(data); - } - } // Parse health check config. absl::optional health_check_service_name; it = json.object_value().find("healthCheckConfig"); @@ -348,7 +148,7 @@ ClientChannelServiceConfigParser::ParseGlobalParams( if (*error == GRPC_ERROR_NONE) { return absl::make_unique( std::move(parsed_lb_config), std::move(lb_policy_name), - retry_throttling, std::move(health_check_service_name)); + std::move(health_check_service_name)); } return nullptr; } @@ -358,10 +158,8 @@ ClientChannelServiceConfigParser::ParsePerMethodParams( const grpc_channel_args* /*args*/, const Json& json, grpc_error** error) { GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE); std::vector error_list; - absl::optional wait_for_ready; - grpc_millis timeout = 0; - std::unique_ptr retry_policy; // Parse waitForReady. + absl::optional wait_for_ready; auto it = json.object_value().find("waitForReady"); if (it != json.object_value().end()) { if (it->second.type() == Json::Type::JSON_TRUE) { @@ -374,21 +172,14 @@ ClientChannelServiceConfigParser::ParsePerMethodParams( } } // Parse timeout. + grpc_millis timeout = 0; ParseJsonObjectFieldAsDuration(json.object_value(), "timeout", &timeout, &error_list, false); - // Parse retry policy. - it = json.object_value().find("retryPolicy"); - if (it != json.object_value().end()) { - grpc_error* error = GRPC_ERROR_NONE; - retry_policy = ParseRetryPolicy(it->second, &error); - if (retry_policy == nullptr) { - error_list.push_back(error); - } - } + // Return result. *error = GRPC_ERROR_CREATE_FROM_VECTOR("Client channel parser", &error_list); if (*error == GRPC_ERROR_NONE) { - return absl::make_unique( - timeout, wait_for_ready, std::move(retry_policy)); + return absl::make_unique(timeout, + wait_for_ready); } return nullptr; } diff --git a/src/core/ext/filters/client_channel/resolver_result_parsing.h b/src/core/ext/filters/client_channel/resolver_result_parsing.h index cdf89d35ce1..98b1f3e7c36 100644 --- a/src/core/ext/filters/client_channel/resolver_result_parsing.h +++ b/src/core/ext/filters/client_channel/resolver_result_parsing.h @@ -1,20 +1,18 @@ -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ +// +// Copyright 2018 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_RESULT_PARSING_H #define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_RESULT_PARSING_H @@ -26,7 +24,6 @@ #include "src/core/ext/filters/client_channel/lb_policy.h" #include "src/core/ext/filters/client_channel/lb_policy_factory.h" #include "src/core/ext/filters/client_channel/resolver.h" -#include "src/core/ext/filters/client_channel/retry_throttle.h" #include "src/core/ext/filters/client_channel/service_config.h" #include "src/core/lib/channel/status_util.h" #include "src/core/lib/gprpp/ref_counted.h" @@ -40,19 +37,12 @@ namespace internal { class ClientChannelGlobalParsedConfig : public ServiceConfigParser::ParsedConfig { public: - struct RetryThrottling { - intptr_t max_milli_tokens = 0; - intptr_t milli_token_ratio = 0; - }; - ClientChannelGlobalParsedConfig( RefCountedPtr parsed_lb_config, std::string parsed_deprecated_lb_policy, - const absl::optional& retry_throttling, absl::optional health_check_service_name) : parsed_lb_config_(std::move(parsed_lb_config)), parsed_deprecated_lb_policy_(std::move(parsed_deprecated_lb_policy)), - retry_throttling_(retry_throttling), health_check_service_name_(std::move(health_check_service_name)) {} RefCountedPtr parsed_lb_config() const { @@ -63,10 +53,6 @@ class ClientChannelGlobalParsedConfig return parsed_deprecated_lb_policy_; } - absl::optional retry_throttling() const { - return retry_throttling_; - } - const absl::optional& health_check_service_name() const { return health_check_service_name_; } @@ -74,38 +60,23 @@ class ClientChannelGlobalParsedConfig private: RefCountedPtr parsed_lb_config_; std::string parsed_deprecated_lb_policy_; - absl::optional retry_throttling_; absl::optional health_check_service_name_; }; class ClientChannelMethodParsedConfig : public ServiceConfigParser::ParsedConfig { public: - struct RetryPolicy { - int max_attempts = 0; - grpc_millis initial_backoff = 0; - grpc_millis max_backoff = 0; - float backoff_multiplier = 0; - StatusCodeSet retryable_status_codes; - }; - ClientChannelMethodParsedConfig(grpc_millis timeout, - const absl::optional& wait_for_ready, - std::unique_ptr retry_policy) - : timeout_(timeout), - wait_for_ready_(wait_for_ready), - retry_policy_(std::move(retry_policy)) {} + const absl::optional& wait_for_ready) + : timeout_(timeout), wait_for_ready_(wait_for_ready) {} grpc_millis timeout() const { return timeout_; } absl::optional wait_for_ready() const { return wait_for_ready_; } - const RetryPolicy* retry_policy() const { return retry_policy_.get(); } - private: grpc_millis timeout_ = 0; absl::optional wait_for_ready_; - std::unique_ptr retry_policy_; }; class ClientChannelServiceConfigParser : public ServiceConfigParser::Parser { @@ -125,4 +96,4 @@ class ClientChannelServiceConfigParser : public ServiceConfigParser::Parser { } // namespace internal } // namespace grpc_core -#endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_RESULT_PARSING_H */ +#endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RESOLVER_RESULT_PARSING_H diff --git a/src/core/ext/filters/client_channel/retry_filter.cc b/src/core/ext/filters/client_channel/retry_filter.cc new file mode 100644 index 00000000000..ba627bf387f --- /dev/null +++ b/src/core/ext/filters/client_channel/retry_filter.cc @@ -0,0 +1,2164 @@ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "src/core/ext/filters/client_channel/retry_filter.h" + +#include "absl/container/inlined_vector.h" +#include "absl/status/statusor.h" +#include "absl/strings/strip.h" + +#include + +#include "src/core/ext/filters/client_channel/client_channel.h" +#include "src/core/ext/filters/client_channel/retry_service_config.h" +#include "src/core/ext/filters/client_channel/retry_throttle.h" +#include "src/core/ext/filters/client_channel/service_config.h" +#include "src/core/ext/filters/client_channel/service_config_call_data.h" +#include "src/core/lib/backoff/backoff.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/channel/channel_stack.h" +#include "src/core/lib/channel/status_util.h" +#include "src/core/lib/gprpp/manual_constructor.h" +#include "src/core/lib/iomgr/polling_entity.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/slice/slice_string_helpers.h" +#include "src/core/lib/transport/error_utils.h" +#include "src/core/lib/transport/metadata.h" +#include "src/core/lib/transport/metadata_batch.h" +#include "src/core/lib/transport/static_metadata.h" +#include "src/core/lib/transport/status_metadata.h" +#include "src/core/lib/uri/uri_parser.h" + +// +// Retry filter +// + +// This filter is intended to be used in the DynamicFilter stack in the +// client channel, which is situated between the name resolver and the +// LB policy. Normally, the last filter in the DynamicFilter stack is +// the DynamicTerminationFilter (see client_channel.cc), which creates a +// LoadBalancedCall and delegates to it. However, when retries are +// enabled, this filter is used instead of the DynamicTerminationFilter. +// +// In order to support retries, we act as a proxy for stream op batches. +// When we get a batch from the surface, we add it to our list of pending +// batches, and we then use those batches to construct separate "child" +// batches to be started on the subchannel call. When the child batches +// return, we then decide which pending batches have been completed and +// schedule their callbacks accordingly. If a subchannel call fails and +// we want to retry it, we do a new pick and start again, constructing +// new "child" batches for the new subchannel call. +// +// Note that retries are committed when receiving data from the server +// (except for Trailers-Only responses). However, there may be many +// send ops started before receiving any data, so we may have already +// completed some number of send ops (and returned the completions up to +// the surface) by the time we realize that we need to retry. To deal +// with this, we cache data for send ops, so that we can replay them on a +// different subchannel call even after we have completed the original +// batches. +// +// There are two sets of data to maintain: +// - In call_data (in the parent channel), we maintain a list of pending +// ops and cached data for send ops. +// - In the subchannel call, we maintain state to indicate what ops have +// already been sent down to that call. +// +// When constructing the "child" batches, we compare those two sets of +// data to see which batches need to be sent to the subchannel call. + +// TODO(roth): In subsequent PRs: +// - add support for transparent retries (including initial metadata) +// - figure out how to record stats in census for retries +// (census filter is on top of this one) +// - add census stats for retries + +// By default, we buffer 256 KiB per RPC for retries. +// TODO(roth): Do we have any data to suggest a better value? +#define DEFAULT_PER_RPC_RETRY_BUFFER_SIZE (256 << 10) + +// This value was picked arbitrarily. It can be changed if there is +// any even moderately compelling reason to do so. +#define RETRY_BACKOFF_JITTER 0.2 + +namespace grpc_core { + +namespace { + +using internal::RetryGlobalConfig; +using internal::RetryMethodConfig; +using internal::RetryServiceConfigParser; +using internal::ServerRetryThrottleData; + +TraceFlag grpc_retry_trace(false, "retry"); + +// +// RetryFilter +// + +class RetryFilter { + public: + class CallData; + + static grpc_error* Init(grpc_channel_element* elem, + grpc_channel_element_args* args) { + GPR_ASSERT(args->is_last); + GPR_ASSERT(elem->filter == &kRetryFilterVtable); + grpc_error* error = GRPC_ERROR_NONE; + new (elem->channel_data) RetryFilter(args->channel_args, &error); + return error; + } + + static void Destroy(grpc_channel_element* elem) { + auto* chand = static_cast(elem->channel_data); + chand->~RetryFilter(); + } + + // Will never be called. + static void StartTransportOp(grpc_channel_element* /*elem*/, + grpc_transport_op* /*op*/) {} + static void GetChannelInfo(grpc_channel_element* /*elem*/, + const grpc_channel_info* /*info*/) {} + + private: + static size_t GetMaxPerRpcRetryBufferSize(const grpc_channel_args* args) { + return static_cast(grpc_channel_args_find_integer( + args, GRPC_ARG_PER_RPC_RETRY_BUFFER_SIZE, + {DEFAULT_PER_RPC_RETRY_BUFFER_SIZE, 0, INT_MAX})); + } + + RetryFilter(const grpc_channel_args* args, grpc_error** error) + : client_channel_(grpc_channel_args_find_pointer( + args, GRPC_ARG_CLIENT_CHANNEL)), + per_rpc_retry_buffer_size_(GetMaxPerRpcRetryBufferSize(args)) { + // Get retry throttling parameters from service config. + auto* service_config = grpc_channel_args_find_pointer( + args, GRPC_ARG_SERVICE_CONFIG_OBJ); + if (service_config == nullptr) return; + const auto* config = static_cast( + service_config->GetGlobalParsedConfig( + RetryServiceConfigParser::ParserIndex())); + if (config == nullptr) return; + // Get server name from target URI. + const char* server_uri = + grpc_channel_args_find_string(args, GRPC_ARG_SERVER_URI); + if (server_uri == nullptr) { + *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "server URI channel arg missing or wrong type in client channel " + "filter"); + return; + } + absl::StatusOr uri = URI::Parse(server_uri); + if (!uri.ok() || uri->path().empty()) { + *error = GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "could not extract server name from target URI"); + return; + } + std::string server_name(absl::StripPrefix(uri->path(), "/")); + // Get throttling config for server_name. + retry_throttle_data_ = internal::ServerRetryThrottleMap::GetDataForServer( + server_name, config->max_milli_tokens(), config->milli_token_ratio()); + } + + ClientChannel* client_channel_; + size_t per_rpc_retry_buffer_size_; + RefCountedPtr retry_throttle_data_; +}; + +// +// RetryFilter::CallData +// + +class RetryFilter::CallData { + public: + static grpc_error* Init(grpc_call_element* elem, + const grpc_call_element_args* args); + static void Destroy(grpc_call_element* elem, + const grpc_call_final_info* /*final_info*/, + grpc_closure* then_schedule_closure); + static void StartTransportStreamOpBatch( + grpc_call_element* elem, grpc_transport_stream_op_batch* batch); + static void SetPollent(grpc_call_element* elem, grpc_polling_entity* pollent); + + private: + CallData(RetryFilter* chand, const grpc_call_element_args& args); + ~CallData(); + + // State used for starting a retryable batch on a subchannel call. + // This provides its own grpc_transport_stream_op_batch and other data + // structures needed to populate the ops in the batch. + // We allocate one struct on the arena for each attempt at starting a + // batch on a given subchannel call. + struct SubchannelCallBatchData { + // Creates a SubchannelCallBatchData object on the call's arena with the + // specified refcount. If set_on_complete is true, the batch's + // on_complete callback will be set to point to on_complete(); + // otherwise, the batch's on_complete callback will be null. + static SubchannelCallBatchData* Create(CallData* call, int refcount, + bool set_on_complete); + + void Unref() { + if (gpr_unref(&refs)) Destroy(); + } + + SubchannelCallBatchData(CallData* call, int refcount, bool set_on_complete); + // All dtor code must be added in `Destroy()`. This is because we may + // call closures in `SubchannelCallBatchData` after they are unrefed by + // `Unref()`, and msan would complain about accessing this class + // after calling dtor. As a result we cannot call the `dtor` in `Unref()`. + // TODO(soheil): We should try to call the dtor in `Unref()`. + ~SubchannelCallBatchData() { Destroy(); } + void Destroy(); + + gpr_refcount refs; + grpc_call_element* elem; + CallData* call; + RefCountedPtr lb_call; + // The batch to use in the subchannel call. + // Its payload field points to SubchannelCallRetryState::batch_payload. + grpc_transport_stream_op_batch batch; + // For intercepting on_complete. + grpc_closure on_complete; + }; + + // Retry state associated with a subchannel call. + // Stored in the parent_data of the subchannel call object. + // TODO(roth): As part of implementing hedging, we'll need to store a + // ref to the LB call in this struct instead of doing the parent_data + // hack, since there will be multiple LB calls in flight at once. + struct SubchannelCallRetryState { + explicit SubchannelCallRetryState(grpc_call_context_element* context) + : batch_payload(context), + started_send_initial_metadata(false), + completed_send_initial_metadata(false), + started_send_trailing_metadata(false), + completed_send_trailing_metadata(false), + started_recv_initial_metadata(false), + completed_recv_initial_metadata(false), + started_recv_trailing_metadata(false), + completed_recv_trailing_metadata(false), + retry_dispatched(false) {} + + // SubchannelCallBatchData.batch.payload points to this. + grpc_transport_stream_op_batch_payload batch_payload; + // For send_initial_metadata. + // Note that we need to make a copy of the initial metadata for each + // subchannel call instead of just referring to the copy in call_data, + // because filters in the subchannel stack will probably add entries, + // so we need to start in a pristine state for each attempt of the call. + grpc_linked_mdelem* send_initial_metadata_storage; + grpc_metadata_batch send_initial_metadata; + // For send_message. + // TODO(roth): Restructure this to eliminate use of ManualConstructor. + ManualConstructor send_message; + // For send_trailing_metadata. + grpc_linked_mdelem* send_trailing_metadata_storage; + grpc_metadata_batch send_trailing_metadata; + // For intercepting recv_initial_metadata. + grpc_metadata_batch recv_initial_metadata; + grpc_closure recv_initial_metadata_ready; + bool trailing_metadata_available = false; + // For intercepting recv_message. + grpc_closure recv_message_ready; + OrphanablePtr recv_message; + // For intercepting recv_trailing_metadata. + grpc_metadata_batch recv_trailing_metadata; + grpc_transport_stream_stats collect_stats; + grpc_closure recv_trailing_metadata_ready; + // These fields indicate which ops have been started and completed on + // this subchannel call. + size_t started_send_message_count = 0; + size_t completed_send_message_count = 0; + size_t started_recv_message_count = 0; + size_t completed_recv_message_count = 0; + bool started_send_initial_metadata : 1; + bool completed_send_initial_metadata : 1; + bool started_send_trailing_metadata : 1; + bool completed_send_trailing_metadata : 1; + bool started_recv_initial_metadata : 1; + bool completed_recv_initial_metadata : 1; + bool started_recv_trailing_metadata : 1; + bool completed_recv_trailing_metadata : 1; + // State for callback processing. + SubchannelCallBatchData* recv_initial_metadata_ready_deferred_batch = + nullptr; + grpc_error* recv_initial_metadata_error = GRPC_ERROR_NONE; + SubchannelCallBatchData* recv_message_ready_deferred_batch = nullptr; + grpc_error* recv_message_error = GRPC_ERROR_NONE; + SubchannelCallBatchData* recv_trailing_metadata_internal_batch = nullptr; + // NOTE: Do not move this next to the metadata bitfields above. That would + // save space but will also result in a data race because compiler + // will generate a 2 byte store which overwrites the meta-data + // fields upon setting this field. + bool retry_dispatched : 1; + }; + + // Pending batches stored in call data. + struct PendingBatch { + // The pending batch. If nullptr, this slot is empty. + grpc_transport_stream_op_batch* batch = nullptr; + // Indicates whether payload for send ops has been cached in CallData. + bool send_ops_cached = false; + }; + + void StartTransportStreamOpBatch(grpc_transport_stream_op_batch* batch); + + // Caches data for send ops so that it can be retried later, if not + // already cached. + void MaybeCacheSendOpsForBatch(PendingBatch* pending); + void FreeCachedSendInitialMetadata(); + // Frees cached send_message at index idx. + void FreeCachedSendMessage(size_t idx); + void FreeCachedSendTrailingMetadata(); + // Frees cached send ops that have already been completed after + // committing the call. + void FreeCachedSendOpDataAfterCommit(SubchannelCallRetryState* retry_state); + // Frees cached send ops that were completed by the completed batch in + // batch_data. Used when batches are completed after the call is committed. + void FreeCachedSendOpDataForCompletedBatch( + SubchannelCallBatchData* batch_data, + SubchannelCallRetryState* retry_state); + + // Returns the index into pending_batches_ to be used for batch. + static size_t GetBatchIndex(grpc_transport_stream_op_batch* batch); + void PendingBatchesAdd(grpc_transport_stream_op_batch* batch); + void PendingBatchClear(PendingBatch* pending); + void MaybeClearPendingBatch(PendingBatch* pending); + static void FailPendingBatchInCallCombiner(void* arg, grpc_error* error); + // A predicate type and some useful implementations for PendingBatchesFail(). + typedef bool (*YieldCallCombinerPredicate)( + const CallCombinerClosureList& closures); + static bool YieldCallCombiner(const CallCombinerClosureList& /*closures*/) { + return true; + } + static bool NoYieldCallCombiner(const CallCombinerClosureList& /*closures*/) { + return false; + } + static bool YieldCallCombinerIfPendingBatchesFound( + const CallCombinerClosureList& closures) { + return closures.size() > 0; + } + // Fails all pending batches. + // If yield_call_combiner_predicate returns true, assumes responsibility for + // yielding the call combiner. + void PendingBatchesFail( + grpc_error* error, + YieldCallCombinerPredicate yield_call_combiner_predicate); + static void ResumePendingBatchInCallCombiner(void* arg, grpc_error* ignored); + // Resumes all pending batches on lb_call_. + void PendingBatchesResume(); + // Returns a pointer to the first pending batch for which predicate(batch) + // returns true, or null if not found. + template + PendingBatch* PendingBatchFind(const char* log_message, Predicate predicate); + + // Commits the call so that no further retry attempts will be performed. + void RetryCommit(SubchannelCallRetryState* retry_state); + // Starts a retry after appropriate back-off. + void DoRetry(SubchannelCallRetryState* retry_state, + grpc_millis server_pushback_ms); + // Returns true if the call is being retried. + bool MaybeRetry(SubchannelCallBatchData* batch_data, grpc_status_code status, + grpc_mdelem* server_pushback_md); + + // Invokes recv_initial_metadata_ready for a subchannel batch. + static void InvokeRecvInitialMetadataCallback(void* arg, grpc_error* error); + // Intercepts recv_initial_metadata_ready callback for retries. + // Commits the call and returns the initial metadata up the stack. + static void RecvInitialMetadataReady(void* arg, grpc_error* error); + + // Invokes recv_message_ready for a subchannel batch. + static void InvokeRecvMessageCallback(void* arg, grpc_error* error); + // Intercepts recv_message_ready callback for retries. + // Commits the call and returns the message up the stack. + static void RecvMessageReady(void* arg, grpc_error* error); + + // Sets *status and *server_pushback_md based on md_batch and error. + // Only sets *server_pushback_md if server_pushback_md != nullptr. + void GetCallStatus(grpc_metadata_batch* md_batch, grpc_error* error, + grpc_status_code* status, + grpc_mdelem** server_pushback_md); + // Adds recv_trailing_metadata_ready closure to closures. + void AddClosureForRecvTrailingMetadataReady( + SubchannelCallBatchData* batch_data, grpc_error* error, + CallCombinerClosureList* closures); + // Adds any necessary closures for deferred recv_initial_metadata and + // recv_message callbacks to closures. + static void AddClosuresForDeferredRecvCallbacks( + SubchannelCallBatchData* batch_data, + SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); + // Returns true if any op in the batch was not yet started. + // Only looks at send ops, since recv ops are always started immediately. + bool PendingBatchIsUnstarted(PendingBatch* pending, + SubchannelCallRetryState* retry_state); + // For any pending batch containing an op that has not yet been started, + // adds the pending batch's completion closures to closures. + void AddClosuresToFailUnstartedPendingBatches( + SubchannelCallRetryState* retry_state, grpc_error* error, + CallCombinerClosureList* closures); + // Runs necessary closures upon completion of a call attempt. + void RunClosuresForCompletedCall(SubchannelCallBatchData* batch_data, + grpc_error* error); + // Intercepts recv_trailing_metadata_ready callback for retries. + // Commits the call and returns the trailing metadata up the stack. + static void RecvTrailingMetadataReady(void* arg, grpc_error* error); + + // Adds the on_complete closure for the pending batch completed in + // batch_data to closures. + void AddClosuresForCompletedPendingBatch(SubchannelCallBatchData* batch_data, + grpc_error* error, + CallCombinerClosureList* closures); + + // If there are any cached ops to replay or pending ops to start on the + // subchannel call, adds a closure to closures to invoke + // StartRetriableSubchannelBatches(). + void AddClosuresForReplayOrPendingSendOps( + SubchannelCallBatchData* batch_data, + SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); + + // Callback used to intercept on_complete from subchannel calls. + // Called only when retries are enabled. + static void OnComplete(void* arg, grpc_error* error); + + static void StartBatchInCallCombiner(void* arg, grpc_error* ignored); + // Adds a closure to closures that will execute batch in the call combiner. + void AddClosureForSubchannelBatch(grpc_transport_stream_op_batch* batch, + CallCombinerClosureList* closures); + // Adds retriable send_initial_metadata op to batch_data. + void AddRetriableSendInitialMetadataOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Adds retriable send_message op to batch_data. + void AddRetriableSendMessageOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Adds retriable send_trailing_metadata op to batch_data. + void AddRetriableSendTrailingMetadataOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Adds retriable recv_initial_metadata op to batch_data. + void AddRetriableRecvInitialMetadataOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Adds retriable recv_message op to batch_data. + void AddRetriableRecvMessageOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Adds retriable recv_trailing_metadata op to batch_data. + void AddRetriableRecvTrailingMetadataOp(SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data); + // Helper function used to start a recv_trailing_metadata batch. This + // is used in the case where a recv_initial_metadata or recv_message + // op fails in a way that we know the call is over but when the application + // has not yet started its own recv_trailing_metadata op. + void StartInternalRecvTrailingMetadata(); + // If there are any cached send ops that need to be replayed on the + // current subchannel call, creates and returns a new subchannel batch + // to replay those ops. Otherwise, returns nullptr. + SubchannelCallBatchData* MaybeCreateSubchannelBatchForReplay( + SubchannelCallRetryState* retry_state); + // Adds subchannel batches for pending batches to closures. + void AddSubchannelBatchesForPendingBatches( + SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures); + // Constructs and starts whatever subchannel batches are needed on the + // subchannel call. + static void StartRetriableSubchannelBatches(void* arg, grpc_error* ignored); + + static void CreateLbCall(void* arg, grpc_error* error); + + RetryFilter* chand_; + grpc_polling_entity* pollent_; + RefCountedPtr retry_throttle_data_; + const RetryMethodConfig* retry_policy_ = nullptr; + BackOff retry_backoff_; + + grpc_slice path_; // Request path. + gpr_cycle_counter call_start_time_; + grpc_millis deadline_; + Arena* arena_; + grpc_call_stack* owning_call_; + CallCombiner* call_combiner_; + grpc_call_context_element* call_context_; + + grpc_closure retry_closure_; + + // TODO(roth): Move this into the SubchannelCallRetryState struct as + // part of implementing hedging. + RefCountedPtr lb_call_; + + // Batches are added to this list when received from above. + // They are removed when we are done handling the batch (i.e., when + // either we have invoked all of the batch's callbacks or we have + // passed the batch down to the LB call and are not intercepting any of + // its callbacks). + // TODO(roth): Now that the retry code is split out into its own call + // object, revamp this to work in a cleaner way, since we no longer need + // for batches to ever wait for name resolution or LB picks. + PendingBatch pending_batches_[MAX_PENDING_BATCHES]; + bool pending_send_initial_metadata_ : 1; + bool pending_send_message_ : 1; + bool pending_send_trailing_metadata_ : 1; + + // Set when we get a cancel_stream op. + grpc_error* cancel_error_ = GRPC_ERROR_NONE; + + // Retry state. + bool enable_retries_ : 1; + bool retry_committed_ : 1; + bool last_attempt_got_server_pushback_ : 1; + int num_attempts_completed_ = 0; + size_t bytes_buffered_for_retry_ = 0; + grpc_timer retry_timer_; + + // The number of pending retriable subchannel batches containing send ops. + // We hold a ref to the call stack while this is non-zero, since replay + // batches may not complete until after all callbacks have been returned + // to the surface, and we need to make sure that the call is not destroyed + // until all of these batches have completed. + // Note that we actually only need to track replay batches, but it's + // easier to track all batches with send ops. + int num_pending_retriable_subchannel_send_batches_ = 0; + + // Cached data for retrying send ops. + // send_initial_metadata + bool seen_send_initial_metadata_ = false; + grpc_linked_mdelem* send_initial_metadata_storage_ = nullptr; + grpc_metadata_batch send_initial_metadata_; + uint32_t send_initial_metadata_flags_; + gpr_atm* peer_string_; + // send_message + // When we get a send_message op, we replace the original byte stream + // with a CachingByteStream that caches the slices to a local buffer for + // use in retries. + // Note: We inline the cache for the first 3 send_message ops and use + // dynamic allocation after that. This number was essentially picked + // at random; it could be changed in the future to tune performance. + absl::InlinedVector send_messages_; + // send_trailing_metadata + bool seen_send_trailing_metadata_ = false; + grpc_linked_mdelem* send_trailing_metadata_storage_ = nullptr; + grpc_metadata_batch send_trailing_metadata_; +}; + +// +// CallData vtable functions +// + +grpc_error* RetryFilter::CallData::Init(grpc_call_element* elem, + const grpc_call_element_args* args) { + auto* chand = static_cast(elem->channel_data); + new (elem->call_data) CallData(chand, *args); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p: created call=%p", chand, elem->call_data); + } + return GRPC_ERROR_NONE; +} + +void RetryFilter::CallData::Destroy(grpc_call_element* elem, + const grpc_call_final_info* /*final_info*/, + grpc_closure* then_schedule_closure) { + auto* calld = static_cast(elem->call_data); + RefCountedPtr subchannel_call; + if (GPR_LIKELY(calld->lb_call_ != nullptr)) { + subchannel_call = calld->lb_call_->subchannel_call(); + } + calld->~CallData(); + if (GPR_LIKELY(subchannel_call != nullptr)) { + subchannel_call->SetAfterCallStackDestroy(then_schedule_closure); + } else { + // TODO(yashkt) : This can potentially be a Closure::Run + ExecCtx::Run(DEBUG_LOCATION, then_schedule_closure, GRPC_ERROR_NONE); + } +} + +void RetryFilter::CallData::StartTransportStreamOpBatch( + grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { + auto* calld = static_cast(elem->call_data); + calld->StartTransportStreamOpBatch(batch); +} + +void RetryFilter::CallData::SetPollent(grpc_call_element* elem, + grpc_polling_entity* pollent) { + auto* calld = static_cast(elem->call_data); + calld->pollent_ = pollent; +} + +// +// CallData implementation +// + +const RetryMethodConfig* GetRetryPolicy( + const grpc_call_context_element* context) { + if (context == nullptr) return nullptr; + auto* svc_cfg_call_data = static_cast( + context[GRPC_CONTEXT_SERVICE_CONFIG_CALL_DATA].value); + if (svc_cfg_call_data == nullptr) return nullptr; + return static_cast( + svc_cfg_call_data->GetMethodParsedConfig( + RetryServiceConfigParser::ParserIndex())); +} + +RetryFilter::CallData::CallData(RetryFilter* chand, + const grpc_call_element_args& args) + : chand_(chand), + retry_throttle_data_(chand->retry_throttle_data_), + retry_policy_(GetRetryPolicy(args.context)), + retry_backoff_( + BackOff::Options() + .set_initial_backoff(retry_policy_ == nullptr + ? 0 + : retry_policy_->initial_backoff()) + .set_multiplier(retry_policy_ == nullptr + ? 0 + : retry_policy_->backoff_multiplier()) + .set_jitter(RETRY_BACKOFF_JITTER) + .set_max_backoff( + retry_policy_ == nullptr ? 0 : retry_policy_->max_backoff())), + path_(grpc_slice_ref_internal(args.path)), + call_start_time_(args.start_time), + deadline_(args.deadline), + arena_(args.arena), + owning_call_(args.call_stack), + call_combiner_(args.call_combiner), + call_context_(args.context), + pending_send_initial_metadata_(false), + pending_send_message_(false), + pending_send_trailing_metadata_(false), + enable_retries_(true), + retry_committed_(false), + last_attempt_got_server_pushback_(false) {} + +RetryFilter::CallData::~CallData() { + grpc_slice_unref_internal(path_); + GRPC_ERROR_UNREF(cancel_error_); + // Make sure there are no remaining pending batches. + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + GPR_ASSERT(pending_batches_[i].batch == nullptr); + } +} + +void RetryFilter::CallData::StartTransportStreamOpBatch( + grpc_transport_stream_op_batch* batch) { + // If we've previously been cancelled, immediately fail any new batches. + if (GPR_UNLIKELY(cancel_error_ != GRPC_ERROR_NONE)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: failing batch with error: %s", + chand_, this, grpc_error_string(cancel_error_)); + } + // Note: This will release the call combiner. + grpc_transport_stream_op_batch_finish_with_failure( + batch, GRPC_ERROR_REF(cancel_error_), call_combiner_); + return; + } + // Handle cancellation. + if (GPR_UNLIKELY(batch->cancel_stream)) { + // Stash a copy of cancel_error in our call data, so that we can use + // it for subsequent operations. This ensures that if the call is + // cancelled before any batches are passed down (e.g., if the deadline + // is in the past when the call starts), we can return the right + // error to the caller when the first batch does get passed down. + GRPC_ERROR_UNREF(cancel_error_); + cancel_error_ = GRPC_ERROR_REF(batch->payload->cancel_stream.cancel_error); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: recording cancel_error=%s", chand_, + this, grpc_error_string(cancel_error_)); + } + // If we do not have an LB call (i.e., a pick has not yet been started), + // fail all pending batches. Otherwise, send the cancellation down to the + // LB call. + if (lb_call_ == nullptr) { + // TODO(roth): If there is a pending retry callback, do we need to + // cancel it here? + PendingBatchesFail(GRPC_ERROR_REF(cancel_error_), NoYieldCallCombiner); + // Note: This will release the call combiner. + grpc_transport_stream_op_batch_finish_with_failure( + batch, GRPC_ERROR_REF(cancel_error_), call_combiner_); + } else { + // Note: This will release the call combiner. + lb_call_->StartTransportStreamOpBatch(batch); + } + return; + } + // Add the batch to the pending list. + PendingBatchesAdd(batch); + // Create LB call if needed. + // TODO(roth): If we get a new batch from the surface after the + // initial retry attempt has failed, while the retry timer is pending, + // we should queue the batch and not try to send it immediately. + if (lb_call_ == nullptr) { + // We do not yet have an LB call, so create one. + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: creating LB call", chand_, this); + } + CreateLbCall(this, GRPC_ERROR_NONE); + return; + } + // Send batches to LB call. + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: starting batch on lb_call=%p", chand_, + this, lb_call_.get()); + } + PendingBatchesResume(); +} + +// +// send op data caching +// + +void RetryFilter::CallData::MaybeCacheSendOpsForBatch(PendingBatch* pending) { + if (pending->send_ops_cached) return; + pending->send_ops_cached = true; + grpc_transport_stream_op_batch* batch = pending->batch; + // Save a copy of metadata for send_initial_metadata ops. + if (batch->send_initial_metadata) { + seen_send_initial_metadata_ = true; + GPR_ASSERT(send_initial_metadata_storage_ == nullptr); + grpc_metadata_batch* send_initial_metadata = + batch->payload->send_initial_metadata.send_initial_metadata; + send_initial_metadata_storage_ = + static_cast(arena_->Alloc( + sizeof(grpc_linked_mdelem) * send_initial_metadata->list.count)); + grpc_metadata_batch_copy(send_initial_metadata, &send_initial_metadata_, + send_initial_metadata_storage_); + send_initial_metadata_flags_ = + batch->payload->send_initial_metadata.send_initial_metadata_flags; + peer_string_ = batch->payload->send_initial_metadata.peer_string; + } + // Set up cache for send_message ops. + if (batch->send_message) { + ByteStreamCache* cache = arena_->New( + std::move(batch->payload->send_message.send_message)); + send_messages_.push_back(cache); + } + // Save metadata batch for send_trailing_metadata ops. + if (batch->send_trailing_metadata) { + seen_send_trailing_metadata_ = true; + GPR_ASSERT(send_trailing_metadata_storage_ == nullptr); + grpc_metadata_batch* send_trailing_metadata = + batch->payload->send_trailing_metadata.send_trailing_metadata; + send_trailing_metadata_storage_ = + static_cast(arena_->Alloc( + sizeof(grpc_linked_mdelem) * send_trailing_metadata->list.count)); + grpc_metadata_batch_copy(send_trailing_metadata, &send_trailing_metadata_, + send_trailing_metadata_storage_); + } +} + +void RetryFilter::CallData::FreeCachedSendInitialMetadata() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: destroying send_initial_metadata", + chand_, this); + } + grpc_metadata_batch_destroy(&send_initial_metadata_); +} + +void RetryFilter::CallData::FreeCachedSendMessage(size_t idx) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: destroying send_messages[%" PRIuPTR "]", chand_, + this, idx); + } + send_messages_[idx]->Destroy(); +} + +void RetryFilter::CallData::FreeCachedSendTrailingMetadata() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand_=%p calld=%p: destroying send_trailing_metadata", + chand_, this); + } + grpc_metadata_batch_destroy(&send_trailing_metadata_); +} + +void RetryFilter::CallData::FreeCachedSendOpDataAfterCommit( + SubchannelCallRetryState* retry_state) { + if (retry_state->completed_send_initial_metadata) { + FreeCachedSendInitialMetadata(); + } + for (size_t i = 0; i < retry_state->completed_send_message_count; ++i) { + FreeCachedSendMessage(i); + } + if (retry_state->completed_send_trailing_metadata) { + FreeCachedSendTrailingMetadata(); + } +} + +void RetryFilter::CallData::FreeCachedSendOpDataForCompletedBatch( + SubchannelCallBatchData* batch_data, + SubchannelCallRetryState* retry_state) { + if (batch_data->batch.send_initial_metadata) { + FreeCachedSendInitialMetadata(); + } + if (batch_data->batch.send_message) { + FreeCachedSendMessage(retry_state->completed_send_message_count - 1); + } + if (batch_data->batch.send_trailing_metadata) { + FreeCachedSendTrailingMetadata(); + } +} + +// +// pending_batches management +// + +size_t RetryFilter::CallData::GetBatchIndex( + grpc_transport_stream_op_batch* batch) { + // Note: It is important the send_initial_metadata be the first entry + // here, since the code in pick_subchannel_locked() assumes it will be. + if (batch->send_initial_metadata) return 0; + if (batch->send_message) return 1; + if (batch->send_trailing_metadata) return 2; + if (batch->recv_initial_metadata) return 3; + if (batch->recv_message) return 4; + if (batch->recv_trailing_metadata) return 5; + GPR_UNREACHABLE_CODE(return (size_t)-1); +} + +// This is called via the call combiner, so access to calld is synchronized. +void RetryFilter::CallData::PendingBatchesAdd( + grpc_transport_stream_op_batch* batch) { + const size_t idx = GetBatchIndex(batch); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand_=%p calld=%p: adding pending batch at index %" PRIuPTR, + chand_, this, idx); + } + PendingBatch* pending = &pending_batches_[idx]; + GPR_ASSERT(pending->batch == nullptr); + pending->batch = batch; + pending->send_ops_cached = false; + if (enable_retries_) { + // Update state in calld about pending batches. + // Also check if the batch takes us over the retry buffer limit. + // Note: We don't check the size of trailing metadata here, because + // gRPC clients do not send trailing metadata. + if (batch->send_initial_metadata) { + pending_send_initial_metadata_ = true; + bytes_buffered_for_retry_ += grpc_metadata_batch_size( + batch->payload->send_initial_metadata.send_initial_metadata); + } + if (batch->send_message) { + pending_send_message_ = true; + bytes_buffered_for_retry_ += + batch->payload->send_message.send_message->length(); + } + if (batch->send_trailing_metadata) { + pending_send_trailing_metadata_ = true; + } + if (GPR_UNLIKELY(bytes_buffered_for_retry_ > + chand_->per_rpc_retry_buffer_size_)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: exceeded retry buffer size, committing", + chand_, this); + } + SubchannelCallRetryState* retry_state = + lb_call_ == nullptr ? nullptr + : static_cast( + lb_call_->GetParentData()); + RetryCommit(retry_state); + // If we are not going to retry and have not yet started, pretend + // retries are disabled so that we don't bother with retry overhead. + if (num_attempts_completed_ == 0) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: disabling retries before first " + "attempt", + chand_, this); + } + // TODO(roth): Treat this as a commit? + enable_retries_ = false; + } + } + } +} + +void RetryFilter::CallData::PendingBatchClear(PendingBatch* pending) { + if (enable_retries_) { + if (pending->batch->send_initial_metadata) { + pending_send_initial_metadata_ = false; + } + if (pending->batch->send_message) { + pending_send_message_ = false; + } + if (pending->batch->send_trailing_metadata) { + pending_send_trailing_metadata_ = false; + } + } + pending->batch = nullptr; +} + +void RetryFilter::CallData::MaybeClearPendingBatch(PendingBatch* pending) { + grpc_transport_stream_op_batch* batch = pending->batch; + // We clear the pending batch if all of its callbacks have been + // scheduled and reset to nullptr. + if (batch->on_complete == nullptr && + (!batch->recv_initial_metadata || + batch->payload->recv_initial_metadata.recv_initial_metadata_ready == + nullptr) && + (!batch->recv_message || + batch->payload->recv_message.recv_message_ready == nullptr) && + (!batch->recv_trailing_metadata || + batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready == + nullptr)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: clearing pending batch", chand_, + this); + } + PendingBatchClear(pending); + } +} + +// This is called via the call combiner, so access to calld is synchronized. +void RetryFilter::CallData::FailPendingBatchInCallCombiner(void* arg, + grpc_error* error) { + grpc_transport_stream_op_batch* batch = + static_cast(arg); + CallData* call = static_cast(batch->handler_private.extra_arg); + // Note: This will release the call combiner. + grpc_transport_stream_op_batch_finish_with_failure( + batch, GRPC_ERROR_REF(error), call->call_combiner_); +} + +// This is called via the call combiner, so access to calld is synchronized. +void RetryFilter::CallData::PendingBatchesFail( + grpc_error* error, + YieldCallCombinerPredicate yield_call_combiner_predicate) { + GPR_ASSERT(error != GRPC_ERROR_NONE); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + size_t num_batches = 0; + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + if (pending_batches_[i].batch != nullptr) ++num_batches; + } + gpr_log(GPR_INFO, + "chand=%p calld=%p: failing %" PRIuPTR " pending batches: %s", + chand_, this, num_batches, grpc_error_string(error)); + } + CallCombinerClosureList closures; + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + grpc_transport_stream_op_batch* batch = pending->batch; + if (batch != nullptr) { + batch->handler_private.extra_arg = this; + GRPC_CLOSURE_INIT(&batch->handler_private.closure, + FailPendingBatchInCallCombiner, batch, + grpc_schedule_on_exec_ctx); + closures.Add(&batch->handler_private.closure, GRPC_ERROR_REF(error), + "PendingBatchesFail"); + PendingBatchClear(pending); + } + } + if (yield_call_combiner_predicate(closures)) { + closures.RunClosures(call_combiner_); + } else { + closures.RunClosuresWithoutYielding(call_combiner_); + } + GRPC_ERROR_UNREF(error); +} + +// This is called via the call combiner, so access to calld is synchronized. +void RetryFilter::CallData::ResumePendingBatchInCallCombiner( + void* arg, grpc_error* /*ignored*/) { + grpc_transport_stream_op_batch* batch = + static_cast(arg); + auto* lb_call = static_cast( + batch->handler_private.extra_arg); + // Note: This will release the call combiner. + lb_call->StartTransportStreamOpBatch(batch); +} + +// This is called via the call combiner, so access to calld is synchronized. +void RetryFilter::CallData::PendingBatchesResume() { + if (enable_retries_) { + StartRetriableSubchannelBatches(this, GRPC_ERROR_NONE); + return; + } + // Retries not enabled; send down batches as-is. + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + size_t num_batches = 0; + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + if (pending_batches_[i].batch != nullptr) ++num_batches; + } + gpr_log(GPR_INFO, + "chand=%p calld=%p: starting %" PRIuPTR + " pending batches on lb_call=%p", + chand_, this, num_batches, lb_call_.get()); + } + CallCombinerClosureList closures; + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + grpc_transport_stream_op_batch* batch = pending->batch; + if (batch != nullptr) { + batch->handler_private.extra_arg = lb_call_.get(); + GRPC_CLOSURE_INIT(&batch->handler_private.closure, + ResumePendingBatchInCallCombiner, batch, nullptr); + closures.Add(&batch->handler_private.closure, GRPC_ERROR_NONE, + "PendingBatchesResume"); + PendingBatchClear(pending); + } + } + // Note: This will release the call combiner. + closures.RunClosures(call_combiner_); +} + +template +RetryFilter::CallData::PendingBatch* RetryFilter::CallData::PendingBatchFind( + const char* log_message, Predicate predicate) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + grpc_transport_stream_op_batch* batch = pending->batch; + if (batch != nullptr && predicate(batch)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: %s pending batch at index %" PRIuPTR, + chand_, this, log_message, i); + } + return pending; + } + } + return nullptr; +} + +// +// retry code +// + +void RetryFilter::CallData::RetryCommit(SubchannelCallRetryState* retry_state) { + if (retry_committed_) return; + retry_committed_ = true; + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: committing retries", chand_, this); + } + if (retry_state != nullptr) { + FreeCachedSendOpDataAfterCommit(retry_state); + } +} + +void RetryFilter::CallData::DoRetry(SubchannelCallRetryState* retry_state, + grpc_millis server_pushback_ms) { + GPR_ASSERT(retry_policy_ != nullptr); + // Reset LB call. + lb_call_.reset(); + // Compute backoff delay. + grpc_millis next_attempt_time; + if (server_pushback_ms >= 0) { + next_attempt_time = ExecCtx::Get()->Now() + server_pushback_ms; + last_attempt_got_server_pushback_ = true; + } else { + if (num_attempts_completed_ == 1 || last_attempt_got_server_pushback_) { + last_attempt_got_server_pushback_ = false; + } + next_attempt_time = retry_backoff_.NextAttemptTime(); + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: retrying failed call in %" PRId64 " ms", chand_, + this, next_attempt_time - ExecCtx::Get()->Now()); + } + // Schedule retry after computed delay. + GRPC_CLOSURE_INIT(&retry_closure_, CreateLbCall, this, nullptr); + grpc_timer_init(&retry_timer_, next_attempt_time, &retry_closure_); + // Update bookkeeping. + if (retry_state != nullptr) retry_state->retry_dispatched = true; +} + +bool RetryFilter::CallData::MaybeRetry(SubchannelCallBatchData* batch_data, + grpc_status_code status, + grpc_mdelem* server_pushback_md) { + // Get retry policy. + if (retry_policy_ == nullptr) return false; + // If we've already dispatched a retry from this call, return true. + // This catches the case where the batch has multiple callbacks + // (i.e., it includes either recv_message or recv_initial_metadata). + SubchannelCallRetryState* retry_state = nullptr; + if (batch_data != nullptr) { + retry_state = static_cast( + batch_data->lb_call->GetParentData()); + if (retry_state->retry_dispatched) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: retry already dispatched", chand_, + this); + } + return true; + } + } + // Check status. + if (GPR_LIKELY(status == GRPC_STATUS_OK)) { + if (retry_throttle_data_ != nullptr) { + retry_throttle_data_->RecordSuccess(); + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: call succeeded", chand_, this); + } + return false; + } + // Status is not OK. Check whether the status is retryable. + if (!retry_policy_->retryable_status_codes().Contains(status)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: status %s not configured as retryable", + chand_, this, grpc_status_code_to_string(status)); + } + return false; + } + // Record the failure and check whether retries are throttled. + // Note that it's important for this check to come after the status + // code check above, since we should only record failures whose statuses + // match the configured retryable status codes, so that we don't count + // things like failures due to malformed requests (INVALID_ARGUMENT). + // Conversely, it's important for this to come before the remaining + // checks, so that we don't fail to record failures due to other factors. + if (retry_throttle_data_ != nullptr && + !retry_throttle_data_->RecordFailure()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: retries throttled", chand_, this); + } + return false; + } + // Check whether the call is committed. + if (retry_committed_) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: retries already committed", chand_, + this); + } + return false; + } + // Check whether we have retries remaining. + ++num_attempts_completed_; + if (num_attempts_completed_ >= retry_policy_->max_attempts()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: exceeded %d retry attempts", chand_, + this, retry_policy_->max_attempts()); + } + return false; + } + // If the call was cancelled from the surface, don't retry. + if (cancel_error_ != GRPC_ERROR_NONE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: call cancelled from surface, not " + "retrying", + chand_, this); + } + return false; + } + // Check server push-back. + grpc_millis server_pushback_ms = -1; + if (server_pushback_md != nullptr) { + // If the value is "-1" or any other unparseable string, we do not retry. + uint32_t ms; + if (!grpc_parse_slice_to_uint32(GRPC_MDVALUE(*server_pushback_md), &ms)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: not retrying due to server push-back", + chand_, this); + } + return false; + } else { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: server push-back: retry in %u ms", + chand_, this, ms); + } + server_pushback_ms = static_cast(ms); + } + } + DoRetry(retry_state, server_pushback_ms); + return true; +} + +// +// RetryFilter::CallData::SubchannelCallBatchData +// + +RetryFilter::CallData::SubchannelCallBatchData* +RetryFilter::CallData::SubchannelCallBatchData::Create(CallData* call, + int refcount, + bool set_on_complete) { + return call->arena_->New(call, refcount, + set_on_complete); +} + +RetryFilter::CallData::SubchannelCallBatchData::SubchannelCallBatchData( + CallData* call, int refcount, bool set_on_complete) + : call(call), lb_call(call->lb_call_) { + SubchannelCallRetryState* retry_state = + static_cast(lb_call->GetParentData()); + batch.payload = &retry_state->batch_payload; + gpr_ref_init(&refs, refcount); + if (set_on_complete) { + GRPC_CLOSURE_INIT(&on_complete, RetryFilter::CallData::OnComplete, this, + grpc_schedule_on_exec_ctx); + batch.on_complete = &on_complete; + } + GRPC_CALL_STACK_REF(call->owning_call_, "batch_data"); +} + +void RetryFilter::CallData::SubchannelCallBatchData::Destroy() { + SubchannelCallRetryState* retry_state = + static_cast(lb_call->GetParentData()); + if (batch.send_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_initial_metadata); + } + if (batch.send_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->send_trailing_metadata); + } + if (batch.recv_initial_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_initial_metadata); + } + if (batch.recv_trailing_metadata) { + grpc_metadata_batch_destroy(&retry_state->recv_trailing_metadata); + } + lb_call.reset(); + GRPC_CALL_STACK_UNREF(call->owning_call_, "batch_data"); +} + +// +// recv_initial_metadata callback handling +// + +void RetryFilter::CallData::InvokeRecvInitialMetadataCallback( + void* arg, grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + // Find pending batch. + PendingBatch* pending = batch_data->call->PendingBatchFind( + "invoking recv_initial_metadata_ready for", + [](grpc_transport_stream_op_batch* batch) { + return batch->recv_initial_metadata && + batch->payload->recv_initial_metadata + .recv_initial_metadata_ready != nullptr; + }); + GPR_ASSERT(pending != nullptr); + // Return metadata. + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + grpc_metadata_batch_move( + &retry_state->recv_initial_metadata, + pending->batch->payload->recv_initial_metadata.recv_initial_metadata); + // Update bookkeeping. + // Note: Need to do this before invoking the callback, since invoking + // the callback will result in yielding the call combiner. + grpc_closure* recv_initial_metadata_ready = + pending->batch->payload->recv_initial_metadata + .recv_initial_metadata_ready; + pending->batch->payload->recv_initial_metadata.recv_initial_metadata_ready = + nullptr; + batch_data->call->MaybeClearPendingBatch(pending); + batch_data->Unref(); + // Invoke callback. + Closure::Run(DEBUG_LOCATION, recv_initial_metadata_ready, + GRPC_ERROR_REF(error)); +} + +void RetryFilter::CallData::RecvInitialMetadataReady(void* arg, + grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + CallData* call = batch_data->call; + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: got recv_initial_metadata_ready, error=%s", + call->chand_, call, grpc_error_string(error)); + } + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + retry_state->completed_recv_initial_metadata = true; + // If a retry was already dispatched, then we're not going to use the + // result of this recv_initial_metadata op, so do nothing. + if (retry_state->retry_dispatched) { + GRPC_CALL_COMBINER_STOP( + call->call_combiner_, + "recv_initial_metadata_ready after retry dispatched"); + return; + } + // If we got an error or a Trailers-Only response and have not yet gotten + // the recv_trailing_metadata_ready callback, then defer propagating this + // callback back to the surface. We can evaluate whether to retry when + // recv_trailing_metadata comes back. + if (GPR_UNLIKELY((retry_state->trailing_metadata_available || + error != GRPC_ERROR_NONE) && + !retry_state->completed_recv_trailing_metadata)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: deferring recv_initial_metadata_ready " + "(Trailers-Only)", + call->chand_, call); + } + retry_state->recv_initial_metadata_ready_deferred_batch = batch_data; + retry_state->recv_initial_metadata_error = GRPC_ERROR_REF(error); + if (!retry_state->started_recv_trailing_metadata) { + // recv_trailing_metadata not yet started by application; start it + // ourselves to get status. + call->StartInternalRecvTrailingMetadata(); + } else { + GRPC_CALL_COMBINER_STOP( + call->call_combiner_, + "recv_initial_metadata_ready trailers-only or error"); + } + return; + } + // Received valid initial metadata, so commit the call. + call->RetryCommit(retry_state); + // Invoke the callback to return the result to the surface. + // Manually invoking a callback function; it does not take ownership of error. + call->InvokeRecvInitialMetadataCallback(batch_data, error); +} + +// +// recv_message callback handling +// + +void RetryFilter::CallData::InvokeRecvMessageCallback(void* arg, + grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + CallData* call = batch_data->call; + // Find pending op. + PendingBatch* pending = call->PendingBatchFind( + "invoking recv_message_ready for", + [](grpc_transport_stream_op_batch* batch) { + return batch->recv_message && + batch->payload->recv_message.recv_message_ready != nullptr; + }); + GPR_ASSERT(pending != nullptr); + // Return payload. + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + *pending->batch->payload->recv_message.recv_message = + std::move(retry_state->recv_message); + // Update bookkeeping. + // Note: Need to do this before invoking the callback, since invoking + // the callback will result in yielding the call combiner. + grpc_closure* recv_message_ready = + pending->batch->payload->recv_message.recv_message_ready; + pending->batch->payload->recv_message.recv_message_ready = nullptr; + call->MaybeClearPendingBatch(pending); + batch_data->Unref(); + // Invoke callback. + Closure::Run(DEBUG_LOCATION, recv_message_ready, GRPC_ERROR_REF(error)); +} + +void RetryFilter::CallData::RecvMessageReady(void* arg, grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + CallData* call = batch_data->call; + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: got recv_message_ready, error=%s", + call->chand_, call, grpc_error_string(error)); + } + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + ++retry_state->completed_recv_message_count; + // If a retry was already dispatched, then we're not going to use the + // result of this recv_message op, so do nothing. + if (retry_state->retry_dispatched) { + GRPC_CALL_COMBINER_STOP(call->call_combiner_, + "recv_message_ready after retry dispatched"); + return; + } + // If we got an error or the payload was nullptr and we have not yet gotten + // the recv_trailing_metadata_ready callback, then defer propagating this + // callback back to the surface. We can evaluate whether to retry when + // recv_trailing_metadata comes back. + if (GPR_UNLIKELY( + (retry_state->recv_message == nullptr || error != GRPC_ERROR_NONE) && + !retry_state->completed_recv_trailing_metadata)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: deferring recv_message_ready (nullptr " + "message and recv_trailing_metadata pending)", + call->chand_, call); + } + retry_state->recv_message_ready_deferred_batch = batch_data; + retry_state->recv_message_error = GRPC_ERROR_REF(error); + if (!retry_state->started_recv_trailing_metadata) { + // recv_trailing_metadata not yet started by application; start it + // ourselves to get status. + call->StartInternalRecvTrailingMetadata(); + } else { + GRPC_CALL_COMBINER_STOP(call->call_combiner_, "recv_message_ready null"); + } + return; + } + // Received a valid message, so commit the call. + call->RetryCommit(retry_state); + // Invoke the callback to return the result to the surface. + // Manually invoking a callback function; it does not take ownership of error. + call->InvokeRecvMessageCallback(batch_data, error); +} + +// +// recv_trailing_metadata handling +// + +void RetryFilter::CallData::GetCallStatus(grpc_metadata_batch* md_batch, + grpc_error* error, + grpc_status_code* status, + grpc_mdelem** server_pushback_md) { + if (error != GRPC_ERROR_NONE) { + grpc_error_get_status(error, deadline_, status, nullptr, nullptr, nullptr); + } else { + GPR_ASSERT(md_batch->idx.named.grpc_status != nullptr); + *status = + grpc_get_status_code_from_metadata(md_batch->idx.named.grpc_status->md); + if (server_pushback_md != nullptr && + md_batch->idx.named.grpc_retry_pushback_ms != nullptr) { + *server_pushback_md = &md_batch->idx.named.grpc_retry_pushback_ms->md; + } + } + GRPC_ERROR_UNREF(error); +} + +void RetryFilter::CallData::AddClosureForRecvTrailingMetadataReady( + SubchannelCallBatchData* batch_data, grpc_error* error, + CallCombinerClosureList* closures) { + // Find pending batch. + PendingBatch* pending = PendingBatchFind( + "invoking recv_trailing_metadata for", + [](grpc_transport_stream_op_batch* batch) { + return batch->recv_trailing_metadata && + batch->payload->recv_trailing_metadata + .recv_trailing_metadata_ready != nullptr; + }); + // If we generated the recv_trailing_metadata op internally via + // StartInternalRecvTrailingMetadata(), then there will be no pending batch. + if (pending == nullptr) { + GRPC_ERROR_UNREF(error); + return; + } + // Return metadata. + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + grpc_metadata_batch_move( + &retry_state->recv_trailing_metadata, + pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata); + // Add closure. + closures->Add(pending->batch->payload->recv_trailing_metadata + .recv_trailing_metadata_ready, + error, "recv_trailing_metadata_ready for pending batch"); + // Update bookkeeping. + pending->batch->payload->recv_trailing_metadata.recv_trailing_metadata_ready = + nullptr; + MaybeClearPendingBatch(pending); +} + +void RetryFilter::CallData::AddClosuresForDeferredRecvCallbacks( + SubchannelCallBatchData* batch_data, SubchannelCallRetryState* retry_state, + CallCombinerClosureList* closures) { + if (batch_data->batch.recv_trailing_metadata) { + // Add closure for deferred recv_initial_metadata_ready. + if (GPR_UNLIKELY(retry_state->recv_initial_metadata_ready_deferred_batch != + nullptr)) { + GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready, + InvokeRecvInitialMetadataCallback, + retry_state->recv_initial_metadata_ready_deferred_batch, + grpc_schedule_on_exec_ctx); + closures->Add(&retry_state->recv_initial_metadata_ready, + retry_state->recv_initial_metadata_error, + "resuming recv_initial_metadata_ready"); + retry_state->recv_initial_metadata_ready_deferred_batch = nullptr; + } + // Add closure for deferred recv_message_ready. + if (GPR_UNLIKELY(retry_state->recv_message_ready_deferred_batch != + nullptr)) { + GRPC_CLOSURE_INIT(&retry_state->recv_message_ready, + InvokeRecvMessageCallback, + retry_state->recv_message_ready_deferred_batch, + grpc_schedule_on_exec_ctx); + closures->Add(&retry_state->recv_message_ready, + retry_state->recv_message_error, + "resuming recv_message_ready"); + retry_state->recv_message_ready_deferred_batch = nullptr; + } + } +} + +bool RetryFilter::CallData::PendingBatchIsUnstarted( + PendingBatch* pending, SubchannelCallRetryState* retry_state) { + if (pending->batch == nullptr || pending->batch->on_complete == nullptr) { + return false; + } + if (pending->batch->send_initial_metadata && + !retry_state->started_send_initial_metadata) { + return true; + } + if (pending->batch->send_message && + retry_state->started_send_message_count < send_messages_.size()) { + return true; + } + if (pending->batch->send_trailing_metadata && + !retry_state->started_send_trailing_metadata) { + return true; + } + return false; +} + +void RetryFilter::CallData::AddClosuresToFailUnstartedPendingBatches( + SubchannelCallRetryState* retry_state, grpc_error* error, + CallCombinerClosureList* closures) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + if (PendingBatchIsUnstarted(pending, retry_state)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: failing unstarted pending batch at " + "index %" PRIuPTR, + chand_, this, i); + } + closures->Add(pending->batch->on_complete, GRPC_ERROR_REF(error), + "failing on_complete for pending batch"); + pending->batch->on_complete = nullptr; + MaybeClearPendingBatch(pending); + } + } + GRPC_ERROR_UNREF(error); +} + +void RetryFilter::CallData::RunClosuresForCompletedCall( + SubchannelCallBatchData* batch_data, grpc_error* error) { + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + // Construct list of closures to execute. + CallCombinerClosureList closures; + // First, add closure for recv_trailing_metadata_ready. + AddClosureForRecvTrailingMetadataReady(batch_data, GRPC_ERROR_REF(error), + &closures); + // If there are deferred recv_initial_metadata_ready or recv_message_ready + // callbacks, add them to closures. + AddClosuresForDeferredRecvCallbacks(batch_data, retry_state, &closures); + // Add closures to fail any pending batches that have not yet been started. + AddClosuresToFailUnstartedPendingBatches(retry_state, GRPC_ERROR_REF(error), + &closures); + // Don't need batch_data anymore. + batch_data->Unref(); + // Schedule all of the closures identified above. + // Note: This will release the call combiner. + closures.RunClosures(call_combiner_); + GRPC_ERROR_UNREF(error); +} + +void RetryFilter::CallData::RecvTrailingMetadataReady(void* arg, + grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + CallData* call = batch_data->call; + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: got recv_trailing_metadata_ready, error=%s", + call->chand_, call, grpc_error_string(error)); + } + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + retry_state->completed_recv_trailing_metadata = true; + // Get the call's status and check for server pushback metadata. + grpc_status_code status = GRPC_STATUS_OK; + grpc_mdelem* server_pushback_md = nullptr; + grpc_metadata_batch* md_batch = + batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata; + call->GetCallStatus(md_batch, GRPC_ERROR_REF(error), &status, + &server_pushback_md); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: call finished, status=%s", + call->chand_, call, grpc_status_code_to_string(status)); + } + // Check if we should retry. + if (call->MaybeRetry(batch_data, status, server_pushback_md)) { + // Unref batch_data for deferred recv_initial_metadata_ready or + // recv_message_ready callbacks, if any. + if (retry_state->recv_initial_metadata_ready_deferred_batch != nullptr) { + batch_data->Unref(); + GRPC_ERROR_UNREF(retry_state->recv_initial_metadata_error); + } + if (retry_state->recv_message_ready_deferred_batch != nullptr) { + batch_data->Unref(); + GRPC_ERROR_UNREF(retry_state->recv_message_error); + } + batch_data->Unref(); + return; + } + // Not retrying, so commit the call. + call->RetryCommit(retry_state); + // Run any necessary closures. + call->RunClosuresForCompletedCall(batch_data, GRPC_ERROR_REF(error)); +} + +// +// on_complete callback handling +// + +void RetryFilter::CallData::AddClosuresForCompletedPendingBatch( + SubchannelCallBatchData* batch_data, grpc_error* error, + CallCombinerClosureList* closures) { + PendingBatch* pending = PendingBatchFind( + "completed", [batch_data](grpc_transport_stream_op_batch* batch) { + // Match the pending batch with the same set of send ops as the + // subchannel batch we've just completed. + return batch->on_complete != nullptr && + batch_data->batch.send_initial_metadata == + batch->send_initial_metadata && + batch_data->batch.send_message == batch->send_message && + batch_data->batch.send_trailing_metadata == + batch->send_trailing_metadata; + }); + // If batch_data is a replay batch, then there will be no pending + // batch to complete. + if (pending == nullptr) { + GRPC_ERROR_UNREF(error); + return; + } + // Add closure. + closures->Add(pending->batch->on_complete, error, + "on_complete for pending batch"); + pending->batch->on_complete = nullptr; + MaybeClearPendingBatch(pending); +} + +void RetryFilter::CallData::AddClosuresForReplayOrPendingSendOps( + SubchannelCallBatchData* batch_data, SubchannelCallRetryState* retry_state, + CallCombinerClosureList* closures) { + bool have_pending_send_message_ops = + retry_state->started_send_message_count < send_messages_.size(); + bool have_pending_send_trailing_metadata_op = + seen_send_trailing_metadata_ && + !retry_state->started_send_trailing_metadata; + if (!have_pending_send_message_ops && + !have_pending_send_trailing_metadata_op) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + grpc_transport_stream_op_batch* batch = pending->batch; + if (batch == nullptr || pending->send_ops_cached) continue; + if (batch->send_message) have_pending_send_message_ops = true; + if (batch->send_trailing_metadata) { + have_pending_send_trailing_metadata_op = true; + } + } + } + if (have_pending_send_message_ops || have_pending_send_trailing_metadata_op) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: starting next batch for pending send op(s)", + chand_, this); + } + GRPC_CLOSURE_INIT(&batch_data->batch.handler_private.closure, + StartRetriableSubchannelBatches, this, + grpc_schedule_on_exec_ctx); + closures->Add(&batch_data->batch.handler_private.closure, GRPC_ERROR_NONE, + "starting next batch for send_* op(s)"); + } +} + +void RetryFilter::CallData::OnComplete(void* arg, grpc_error* error) { + SubchannelCallBatchData* batch_data = + static_cast(arg); + CallData* call = batch_data->call; + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: got on_complete, error=%s, batch=%s", + call->chand_, call, grpc_error_string(error), + grpc_transport_stream_op_batch_string(&batch_data->batch).c_str()); + } + SubchannelCallRetryState* retry_state = + static_cast( + batch_data->lb_call->GetParentData()); + // Update bookkeeping in retry_state. + if (batch_data->batch.send_initial_metadata) { + retry_state->completed_send_initial_metadata = true; + } + if (batch_data->batch.send_message) { + ++retry_state->completed_send_message_count; + } + if (batch_data->batch.send_trailing_metadata) { + retry_state->completed_send_trailing_metadata = true; + } + // If the call is committed, free cached data for send ops that we've just + // completed. + if (call->retry_committed_) { + call->FreeCachedSendOpDataForCompletedBatch(batch_data, retry_state); + } + // Construct list of closures to execute. + CallCombinerClosureList closures; + // If a retry was already dispatched, that means we saw + // recv_trailing_metadata before this, so we do nothing here. + // Otherwise, invoke the callback to return the result to the surface. + if (!retry_state->retry_dispatched) { + // Add closure for the completed pending batch, if any. + call->AddClosuresForCompletedPendingBatch(batch_data, GRPC_ERROR_REF(error), + &closures); + // If needed, add a callback to start any replay or pending send ops on + // the subchannel call. + if (!retry_state->completed_recv_trailing_metadata) { + call->AddClosuresForReplayOrPendingSendOps(batch_data, retry_state, + &closures); + } + } + // Track number of pending subchannel send batches and determine if this + // was the last one. + --call->num_pending_retriable_subchannel_send_batches_; + const bool last_send_batch_complete = + call->num_pending_retriable_subchannel_send_batches_ == 0; + // Don't need batch_data anymore. + batch_data->Unref(); + // Schedule all of the closures identified above. + // Note: This yeilds the call combiner. + closures.RunClosures(call->call_combiner_); + // If this was the last subchannel send batch, unref the call stack. + if (last_send_batch_complete) { + GRPC_CALL_STACK_UNREF(call->owning_call_, "subchannel_send_batches"); + } +} + +// +// subchannel batch construction +// + +void RetryFilter::CallData::StartBatchInCallCombiner(void* arg, + grpc_error* /*ignored*/) { + grpc_transport_stream_op_batch* batch = + static_cast(arg); + auto* lb_call = static_cast( + batch->handler_private.extra_arg); + // Note: This will release the call combiner. + lb_call->StartTransportStreamOpBatch(batch); +} + +void RetryFilter::CallData::AddClosureForSubchannelBatch( + grpc_transport_stream_op_batch* batch, CallCombinerClosureList* closures) { + batch->handler_private.extra_arg = lb_call_.get(); + GRPC_CLOSURE_INIT(&batch->handler_private.closure, StartBatchInCallCombiner, + batch, grpc_schedule_on_exec_ctx); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: starting subchannel batch: %s", + chand_, this, grpc_transport_stream_op_batch_string(batch).c_str()); + } + closures->Add(&batch->handler_private.closure, GRPC_ERROR_NONE, + "start_subchannel_batch"); +} + +void RetryFilter::CallData::AddRetriableSendInitialMetadataOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + // Maps the number of retries to the corresponding metadata value slice. + const grpc_slice* retry_count_strings[] = {&GRPC_MDSTR_1, &GRPC_MDSTR_2, + &GRPC_MDSTR_3, &GRPC_MDSTR_4}; + // We need to make a copy of the metadata batch for each attempt, since + // the filters in the subchannel stack may modify this batch, and we don't + // want those modifications to be passed forward to subsequent attempts. + // + // If we've already completed one or more attempts, add the + // grpc-retry-attempts header. + retry_state->send_initial_metadata_storage = + static_cast(arena_->Alloc( + sizeof(grpc_linked_mdelem) * + (send_initial_metadata_.list.count + (num_attempts_completed_ > 0)))); + grpc_metadata_batch_copy(&send_initial_metadata_, + &retry_state->send_initial_metadata, + retry_state->send_initial_metadata_storage); + if (GPR_UNLIKELY(retry_state->send_initial_metadata.idx.named + .grpc_previous_rpc_attempts != nullptr)) { + grpc_metadata_batch_remove(&retry_state->send_initial_metadata, + GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS); + } + if (GPR_UNLIKELY(num_attempts_completed_ > 0)) { + grpc_mdelem retry_md = grpc_mdelem_create( + GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS, + *retry_count_strings[num_attempts_completed_ - 1], nullptr); + grpc_error* error = grpc_metadata_batch_add_tail( + &retry_state->send_initial_metadata, + &retry_state + ->send_initial_metadata_storage[send_initial_metadata_.list.count], + retry_md, GRPC_BATCH_GRPC_PREVIOUS_RPC_ATTEMPTS); + if (GPR_UNLIKELY(error != GRPC_ERROR_NONE)) { + gpr_log(GPR_ERROR, "error adding retry metadata: %s", + grpc_error_string(error)); + GPR_ASSERT(false); + } + } + retry_state->started_send_initial_metadata = true; + batch_data->batch.send_initial_metadata = true; + batch_data->batch.payload->send_initial_metadata.send_initial_metadata = + &retry_state->send_initial_metadata; + batch_data->batch.payload->send_initial_metadata.send_initial_metadata_flags = + send_initial_metadata_flags_; + batch_data->batch.payload->send_initial_metadata.peer_string = peer_string_; +} + +void RetryFilter::CallData::AddRetriableSendMessageOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: starting calld->send_messages[%" PRIuPTR "]", + chand_, this, retry_state->started_send_message_count); + } + ByteStreamCache* cache = + send_messages_[retry_state->started_send_message_count]; + ++retry_state->started_send_message_count; + retry_state->send_message.Init(cache); + batch_data->batch.send_message = true; + batch_data->batch.payload->send_message.send_message.reset( + retry_state->send_message.get()); +} + +void RetryFilter::CallData::AddRetriableSendTrailingMetadataOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + // We need to make a copy of the metadata batch for each attempt, since + // the filters in the subchannel stack may modify this batch, and we don't + // want those modifications to be passed forward to subsequent attempts. + retry_state->send_trailing_metadata_storage = + static_cast(arena_->Alloc( + sizeof(grpc_linked_mdelem) * send_trailing_metadata_.list.count)); + grpc_metadata_batch_copy(&send_trailing_metadata_, + &retry_state->send_trailing_metadata, + retry_state->send_trailing_metadata_storage); + retry_state->started_send_trailing_metadata = true; + batch_data->batch.send_trailing_metadata = true; + batch_data->batch.payload->send_trailing_metadata.send_trailing_metadata = + &retry_state->send_trailing_metadata; +} + +void RetryFilter::CallData::AddRetriableRecvInitialMetadataOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + retry_state->started_recv_initial_metadata = true; + batch_data->batch.recv_initial_metadata = true; + grpc_metadata_batch_init(&retry_state->recv_initial_metadata); + batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata = + &retry_state->recv_initial_metadata; + batch_data->batch.payload->recv_initial_metadata.trailing_metadata_available = + &retry_state->trailing_metadata_available; + GRPC_CLOSURE_INIT(&retry_state->recv_initial_metadata_ready, + RecvInitialMetadataReady, batch_data, + grpc_schedule_on_exec_ctx); + batch_data->batch.payload->recv_initial_metadata.recv_initial_metadata_ready = + &retry_state->recv_initial_metadata_ready; +} + +void RetryFilter::CallData::AddRetriableRecvMessageOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + ++retry_state->started_recv_message_count; + batch_data->batch.recv_message = true; + batch_data->batch.payload->recv_message.recv_message = + &retry_state->recv_message; + GRPC_CLOSURE_INIT(&retry_state->recv_message_ready, RecvMessageReady, + batch_data, grpc_schedule_on_exec_ctx); + batch_data->batch.payload->recv_message.recv_message_ready = + &retry_state->recv_message_ready; +} + +void RetryFilter::CallData::AddRetriableRecvTrailingMetadataOp( + SubchannelCallRetryState* retry_state, + SubchannelCallBatchData* batch_data) { + retry_state->started_recv_trailing_metadata = true; + batch_data->batch.recv_trailing_metadata = true; + grpc_metadata_batch_init(&retry_state->recv_trailing_metadata); + batch_data->batch.payload->recv_trailing_metadata.recv_trailing_metadata = + &retry_state->recv_trailing_metadata; + batch_data->batch.payload->recv_trailing_metadata.collect_stats = + &retry_state->collect_stats; + GRPC_CLOSURE_INIT(&retry_state->recv_trailing_metadata_ready, + RecvTrailingMetadataReady, batch_data, + grpc_schedule_on_exec_ctx); + batch_data->batch.payload->recv_trailing_metadata + .recv_trailing_metadata_ready = + &retry_state->recv_trailing_metadata_ready; +} + +void RetryFilter::CallData::StartInternalRecvTrailingMetadata() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: call failed but recv_trailing_metadata not " + "started; starting it internally", + chand_, this); + } + SubchannelCallRetryState* retry_state = + static_cast(lb_call_->GetParentData()); + // Create batch_data with 2 refs, since this batch will be unreffed twice: + // once for the recv_trailing_metadata_ready callback when the subchannel + // batch returns, and again when we actually get a recv_trailing_metadata + // op from the surface. + SubchannelCallBatchData* batch_data = + SubchannelCallBatchData::Create(this, 2, false /* set_on_complete */); + AddRetriableRecvTrailingMetadataOp(retry_state, batch_data); + retry_state->recv_trailing_metadata_internal_batch = batch_data; + // Note: This will release the call combiner. + lb_call_->StartTransportStreamOpBatch(&batch_data->batch); +} + +// If there are any cached send ops that need to be replayed on the +// current subchannel call, creates and returns a new subchannel batch +// to replay those ops. Otherwise, returns nullptr. +RetryFilter::CallData::SubchannelCallBatchData* +RetryFilter::CallData::MaybeCreateSubchannelBatchForReplay( + SubchannelCallRetryState* retry_state) { + SubchannelCallBatchData* replay_batch_data = nullptr; + // send_initial_metadata. + if (seen_send_initial_metadata_ && + !retry_state->started_send_initial_metadata && + !pending_send_initial_metadata_) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: replaying previously completed " + "send_initial_metadata op", + chand_, this); + } + replay_batch_data = + SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); + AddRetriableSendInitialMetadataOp(retry_state, replay_batch_data); + } + // send_message. + // Note that we can only have one send_message op in flight at a time. + if (retry_state->started_send_message_count < send_messages_.size() && + retry_state->started_send_message_count == + retry_state->completed_send_message_count && + !pending_send_message_) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: replaying previously completed " + "send_message op", + chand_, this); + } + if (replay_batch_data == nullptr) { + replay_batch_data = + SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); + } + AddRetriableSendMessageOp(retry_state, replay_batch_data); + } + // send_trailing_metadata. + // Note that we only add this op if we have no more send_message ops + // to start, since we can't send down any more send_message ops after + // send_trailing_metadata. + if (seen_send_trailing_metadata_ && + retry_state->started_send_message_count == send_messages_.size() && + !retry_state->started_send_trailing_metadata && + !pending_send_trailing_metadata_) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: replaying previously completed " + "send_trailing_metadata op", + chand_, this); + } + if (replay_batch_data == nullptr) { + replay_batch_data = + SubchannelCallBatchData::Create(this, 1, true /* set_on_complete */); + } + AddRetriableSendTrailingMetadataOp(retry_state, replay_batch_data); + } + return replay_batch_data; +} + +void RetryFilter::CallData::AddSubchannelBatchesForPendingBatches( + SubchannelCallRetryState* retry_state, CallCombinerClosureList* closures) { + for (size_t i = 0; i < GPR_ARRAY_SIZE(pending_batches_); ++i) { + PendingBatch* pending = &pending_batches_[i]; + grpc_transport_stream_op_batch* batch = pending->batch; + if (batch == nullptr) continue; + // Skip any batch that either (a) has already been started on this + // subchannel call or (b) we can't start yet because we're still + // replaying send ops that need to be completed first. + // TODO(roth): Note that if any one op in the batch can't be sent + // yet due to ops that we're replaying, we don't start any of the ops + // in the batch. This is probably okay, but it could conceivably + // lead to increased latency in some cases -- e.g., we could delay + // starting a recv op due to it being in the same batch with a send + // op. If/when we revamp the callback protocol in + // transport_stream_op_batch, we may be able to fix this. + if (batch->send_initial_metadata && + retry_state->started_send_initial_metadata) { + continue; + } + if (batch->send_message && retry_state->completed_send_message_count < + retry_state->started_send_message_count) { + continue; + } + // Note that we only start send_trailing_metadata if we have no more + // send_message ops to start, since we can't send down any more + // send_message ops after send_trailing_metadata. + if (batch->send_trailing_metadata && + (retry_state->started_send_message_count + batch->send_message < + send_messages_.size() || + retry_state->started_send_trailing_metadata)) { + continue; + } + if (batch->recv_initial_metadata && + retry_state->started_recv_initial_metadata) { + continue; + } + if (batch->recv_message && retry_state->completed_recv_message_count < + retry_state->started_recv_message_count) { + continue; + } + if (batch->recv_trailing_metadata && + retry_state->started_recv_trailing_metadata) { + // If we previously completed a recv_trailing_metadata op + // initiated by StartInternalRecvTrailingMetadata(), use the + // result of that instead of trying to re-start this op. + if (GPR_UNLIKELY((retry_state->recv_trailing_metadata_internal_batch != + nullptr))) { + // If the batch completed, then trigger the completion callback + // directly, so that we return the previously returned results to + // the application. Otherwise, just unref the internally + // started subchannel batch, since we'll propagate the + // completion when it completes. + if (retry_state->completed_recv_trailing_metadata) { + // Batches containing recv_trailing_metadata always succeed. + closures->Add( + &retry_state->recv_trailing_metadata_ready, GRPC_ERROR_NONE, + "re-executing recv_trailing_metadata_ready to propagate " + "internally triggered result"); + } else { + retry_state->recv_trailing_metadata_internal_batch->Unref(); + } + retry_state->recv_trailing_metadata_internal_batch = nullptr; + } + continue; + } + // If we're not retrying, just send the batch as-is. + // TODO(roth): This condition doesn't seem exactly right -- maybe need a + // notion of "draining" once we've committed and are done replaying? + if (retry_policy_ == nullptr || retry_committed_) { + AddClosureForSubchannelBatch(batch, closures); + PendingBatchClear(pending); + continue; + } + // Create batch with the right number of callbacks. + const bool has_send_ops = batch->send_initial_metadata || + batch->send_message || + batch->send_trailing_metadata; + const int num_callbacks = has_send_ops + batch->recv_initial_metadata + + batch->recv_message + + batch->recv_trailing_metadata; + SubchannelCallBatchData* batch_data = SubchannelCallBatchData::Create( + this, num_callbacks, has_send_ops /* set_on_complete */); + // Cache send ops if needed. + MaybeCacheSendOpsForBatch(pending); + // send_initial_metadata. + if (batch->send_initial_metadata) { + AddRetriableSendInitialMetadataOp(retry_state, batch_data); + } + // send_message. + if (batch->send_message) { + AddRetriableSendMessageOp(retry_state, batch_data); + } + // send_trailing_metadata. + if (batch->send_trailing_metadata) { + AddRetriableSendTrailingMetadataOp(retry_state, batch_data); + } + // recv_initial_metadata. + if (batch->recv_initial_metadata) { + // recv_flags is only used on the server side. + GPR_ASSERT(batch->payload->recv_initial_metadata.recv_flags == nullptr); + AddRetriableRecvInitialMetadataOp(retry_state, batch_data); + } + // recv_message. + if (batch->recv_message) { + AddRetriableRecvMessageOp(retry_state, batch_data); + } + // recv_trailing_metadata. + if (batch->recv_trailing_metadata) { + AddRetriableRecvTrailingMetadataOp(retry_state, batch_data); + } + AddClosureForSubchannelBatch(&batch_data->batch, closures); + // Track number of pending subchannel send batches. + // If this is the first one, take a ref to the call stack. + if (batch->send_initial_metadata || batch->send_message || + batch->send_trailing_metadata) { + if (num_pending_retriable_subchannel_send_batches_ == 0) { + GRPC_CALL_STACK_REF(owning_call_, "subchannel_send_batches"); + } + ++num_pending_retriable_subchannel_send_batches_; + } + } +} + +void RetryFilter::CallData::StartRetriableSubchannelBatches( + void* arg, grpc_error* /*ignored*/) { + CallData* call = static_cast(arg); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: constructing retriable batches", + call->chand_, call); + } + SubchannelCallRetryState* retry_state = + static_cast(call->lb_call_->GetParentData()); + // Construct list of closures to execute, one for each pending batch. + CallCombinerClosureList closures; + // Replay previously-returned send_* ops if needed. + SubchannelCallBatchData* replay_batch_data = + call->MaybeCreateSubchannelBatchForReplay(retry_state); + if (replay_batch_data != nullptr) { + call->AddClosureForSubchannelBatch(&replay_batch_data->batch, &closures); + // Track number of pending subchannel send batches. + // If this is the first one, take a ref to the call stack. + if (call->num_pending_retriable_subchannel_send_batches_ == 0) { + GRPC_CALL_STACK_REF(call->owning_call_, "subchannel_send_batches"); + } + ++call->num_pending_retriable_subchannel_send_batches_; + } + // Now add pending batches. + call->AddSubchannelBatchesForPendingBatches(retry_state, &closures); + // Start batches on subchannel call. + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, + "chand=%p calld=%p: starting %" PRIuPTR + " retriable batches on lb_call=%p", + call->chand_, call, closures.size(), call->lb_call_.get()); + } + // Note: This will yield the call combiner. + closures.RunClosures(call->call_combiner_); +} + +void RetryFilter::CallData::CreateLbCall(void* arg, grpc_error* /*error*/) { + auto* calld = static_cast(arg); + const size_t parent_data_size = + calld->enable_retries_ ? sizeof(SubchannelCallRetryState) : 0; + grpc_call_element_args args = { + calld->owning_call_, nullptr, + calld->call_context_, calld->path_, + calld->call_start_time_, calld->deadline_, + calld->arena_, calld->call_combiner_}; + calld->lb_call_ = calld->chand_->client_channel_->CreateLoadBalancedCall( + args, calld->pollent_, parent_data_size); + if (GRPC_TRACE_FLAG_ENABLED(grpc_retry_trace)) { + gpr_log(GPR_INFO, "chand=%p calld=%p: create lb_call=%p", calld->chand_, + calld, calld->lb_call_.get()); + } + if (parent_data_size > 0) { + new (calld->lb_call_->GetParentData()) + SubchannelCallRetryState(calld->call_context_); + } + calld->PendingBatchesResume(); +} + +} // namespace + +const grpc_channel_filter kRetryFilterVtable = { + RetryFilter::CallData::StartTransportStreamOpBatch, + RetryFilter::StartTransportOp, + sizeof(RetryFilter::CallData), + RetryFilter::CallData::Init, + RetryFilter::CallData::SetPollent, + RetryFilter::CallData::Destroy, + sizeof(RetryFilter), + RetryFilter::Init, + RetryFilter::Destroy, + RetryFilter::GetChannelInfo, + "retry_filter", +}; + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/retry_filter.h b/src/core/ext/filters/client_channel/retry_filter.h new file mode 100644 index 00000000000..a96df8af1fc --- /dev/null +++ b/src/core/ext/filters/client_channel/retry_filter.h @@ -0,0 +1,30 @@ +// +// Copyright 2021 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_FILTER_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_FILTER_H + +#include + +#include "src/core/lib/channel/channel_stack.h" + +namespace grpc_core { + +extern const grpc_channel_filter kRetryFilterVtable; + +} // namespace grpc_core + +#endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_FILTER_H diff --git a/src/core/ext/filters/client_channel/retry_service_config.cc b/src/core/ext/filters/client_channel/retry_service_config.cc new file mode 100644 index 00000000000..f2a2e5e2d88 --- /dev/null +++ b/src/core/ext/filters/client_channel/retry_service_config.cc @@ -0,0 +1,285 @@ +// +// Copyright 2018 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "src/core/ext/filters/client_channel/retry_service_config.h" + +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "absl/types/optional.h" + +#include +#include +#include + +#include "src/core/ext/filters/client_channel/client_channel.h" +#include "src/core/ext/filters/client_channel/lb_policy_registry.h" +#include "src/core/ext/filters/client_channel/server_address.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/channel/status_util.h" +#include "src/core/lib/gpr/string.h" +#include "src/core/lib/gprpp/memory.h" +#include "src/core/lib/json/json_util.h" +#include "src/core/lib/uri/uri_parser.h" + +// As per the retry design, we do not allow more than 5 retry attempts. +#define MAX_MAX_RETRY_ATTEMPTS 5 + +namespace grpc_core { +namespace internal { + +namespace { +size_t g_retry_service_config_parser_index; +} + +size_t RetryServiceConfigParser::ParserIndex() { + return g_retry_service_config_parser_index; +} + +void RetryServiceConfigParser::Register() { + g_retry_service_config_parser_index = ServiceConfigParser::RegisterParser( + absl::make_unique()); +} + +namespace { + +grpc_error* ParseRetryThrottling(const Json& json, intptr_t* max_milli_tokens, + intptr_t* milli_token_ratio) { + if (json.type() != Json::Type::OBJECT) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling error:Type should be object"); + } + std::vector error_list; + // Parse maxTokens. + auto it = json.object_value().find("maxTokens"); + if (it == json.object_value().end()) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:maxTokens error:Not found")); + } else if (it->second.type() != Json::Type::NUMBER) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:maxTokens error:Type should be " + "number")); + } else { + *max_milli_tokens = + gpr_parse_nonnegative_int(it->second.string_value().c_str()) * 1000; + if (*max_milli_tokens <= 0) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:maxTokens error:should be " + "greater than zero")); + } + } + // Parse tokenRatio. + it = json.object_value().find("tokenRatio"); + if (it == json.object_value().end()) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:tokenRatio error:Not found")); + } else if (it->second.type() != Json::Type::NUMBER) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:tokenRatio error:type should be " + "number")); + } else { + // We support up to 3 decimal digits. + size_t whole_len = it->second.string_value().size(); + const char* value = it->second.string_value().c_str(); + uint32_t multiplier = 1; + uint32_t decimal_value = 0; + const char* decimal_point = strchr(value, '.'); + if (decimal_point != nullptr) { + whole_len = static_cast(decimal_point - value); + multiplier = 1000; + size_t decimal_len = strlen(decimal_point + 1); + if (decimal_len > 3) decimal_len = 3; + if (!gpr_parse_bytes_to_uint32(decimal_point + 1, decimal_len, + &decimal_value)) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:tokenRatio error:Failed " + "parsing")); + return GRPC_ERROR_CREATE_FROM_VECTOR("retryThrottling", &error_list); + } + uint32_t decimal_multiplier = 1; + for (size_t i = 0; i < (3 - decimal_len); ++i) { + decimal_multiplier *= 10; + } + decimal_value *= decimal_multiplier; + } + uint32_t whole_value; + if (!gpr_parse_bytes_to_uint32(value, whole_len, &whole_value)) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:tokenRatio error:Failed " + "parsing")); + return GRPC_ERROR_CREATE_FROM_VECTOR("retryThrottling", &error_list); + } + *milli_token_ratio = + static_cast((whole_value * multiplier) + decimal_value); + if (*milli_token_ratio <= 0) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryThrottling field:tokenRatio error:value should " + "be greater than 0")); + } + } + return GRPC_ERROR_CREATE_FROM_VECTOR("retryThrottling", &error_list); +} + +} // namespace + +std::unique_ptr +RetryServiceConfigParser::ParseGlobalParams(const grpc_channel_args* /*args*/, + const Json& json, + grpc_error** error) { + GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE); + auto it = json.object_value().find("retryThrottling"); + if (it == json.object_value().end()) return nullptr; + intptr_t max_milli_tokens = 0; + intptr_t milli_token_ratio = 0; + *error = + ParseRetryThrottling(it->second, &max_milli_tokens, &milli_token_ratio); + if (*error != GRPC_ERROR_NONE) return nullptr; + return absl::make_unique(max_milli_tokens, + milli_token_ratio); +} + +namespace { + +grpc_error* ParseRetryPolicy(const Json& json, int* max_attempts, + grpc_millis* initial_backoff, + grpc_millis* max_backoff, + float* backoff_multiplier, + StatusCodeSet* retryable_status_codes) { + if (json.type() != Json::Type::OBJECT) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryPolicy error:should be of type object"); + } + std::vector error_list; + // Parse maxAttempts. + auto it = json.object_value().find("maxAttempts"); + if (it != json.object_value().end()) { + if (it->second.type() != Json::Type::NUMBER) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:maxAttempts error:should be of type number")); + } else { + *max_attempts = + gpr_parse_nonnegative_int(it->second.string_value().c_str()); + if (*max_attempts <= 1) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:maxAttempts error:should be at least 2")); + } else if (*max_attempts > MAX_MAX_RETRY_ATTEMPTS) { + gpr_log(GPR_ERROR, + "service config: clamped retryPolicy.maxAttempts at %d", + MAX_MAX_RETRY_ATTEMPTS); + *max_attempts = MAX_MAX_RETRY_ATTEMPTS; + } + } + } + // Parse initialBackoff. + if (ParseJsonObjectFieldAsDuration(json.object_value(), "initialBackoff", + initial_backoff, &error_list) && + *initial_backoff == 0) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:initialBackoff error:must be greater than 0")); + } + // Parse maxBackoff. + if (ParseJsonObjectFieldAsDuration(json.object_value(), "maxBackoff", + max_backoff, &error_list) && + *max_backoff == 0) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:maxBackoff error:should be greater than 0")); + } + // Parse backoffMultiplier. + it = json.object_value().find("backoffMultiplier"); + if (it != json.object_value().end()) { + if (it->second.type() != Json::Type::NUMBER) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:backoffMultiplier error:should be of type number")); + } else { + if (sscanf(it->second.string_value().c_str(), "%f", backoff_multiplier) != + 1) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:backoffMultiplier error:failed to parse")); + } else if (*backoff_multiplier <= 0) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:backoffMultiplier error:should be greater than 0")); + } + } + } + // Parse retryableStatusCodes. + it = json.object_value().find("retryableStatusCodes"); + if (it != json.object_value().end()) { + if (it->second.type() != Json::Type::ARRAY) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryableStatusCodes error:should be of type array")); + } else { + for (const Json& element : it->second.array_value()) { + if (element.type() != Json::Type::STRING) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryableStatusCodes error:status codes should be of type " + "string")); + continue; + } + grpc_status_code status; + if (!grpc_status_code_from_string(element.string_value().c_str(), + &status)) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryableStatusCodes error:failed to parse status code")); + continue; + } + retryable_status_codes->Add(status); + } + if (retryable_status_codes->Empty()) { + error_list.push_back(GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryableStatusCodes error:should be non-empty")); + }; + } + } + // Make sure required fields are set. + if (error_list.empty()) { + if (*max_attempts == 0 || *initial_backoff == 0 || *max_backoff == 0 || + *backoff_multiplier == 0 || retryable_status_codes->Empty()) { + return GRPC_ERROR_CREATE_FROM_STATIC_STRING( + "field:retryPolicy error:Missing required field(s)"); + } + } + return GRPC_ERROR_CREATE_FROM_VECTOR("retryPolicy", &error_list); +} + +} // namespace + +std::unique_ptr +RetryServiceConfigParser::ParsePerMethodParams( + const grpc_channel_args* /*args*/, const Json& json, grpc_error** error) { + GPR_DEBUG_ASSERT(error != nullptr && *error == GRPC_ERROR_NONE); + // Parse retry policy. + auto it = json.object_value().find("retryPolicy"); + if (it == json.object_value().end()) return nullptr; + int max_attempts = 0; + grpc_millis initial_backoff = 0; + grpc_millis max_backoff = 0; + float backoff_multiplier = 0; + StatusCodeSet retryable_status_codes; + *error = ParseRetryPolicy(it->second, &max_attempts, &initial_backoff, + &max_backoff, &backoff_multiplier, + &retryable_status_codes); + if (*error != GRPC_ERROR_NONE) return nullptr; + return absl::make_unique(max_attempts, initial_backoff, + max_backoff, backoff_multiplier, + retryable_status_codes); +} + +} // namespace internal +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/retry_service_config.h b/src/core/ext/filters/client_channel/retry_service_config.h new file mode 100644 index 00000000000..837c755e11d --- /dev/null +++ b/src/core/ext/filters/client_channel/retry_service_config.h @@ -0,0 +1,90 @@ +// +// Copyright 2018 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_SERVICE_CONFIG_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_SERVICE_CONFIG_H + +#include + +#include + +#include "src/core/ext/filters/client_channel/retry_throttle.h" +#include "src/core/ext/filters/client_channel/service_config_parser.h" +#include "src/core/lib/channel/status_util.h" +#include "src/core/lib/iomgr/exec_ctx.h" // for grpc_millis + +namespace grpc_core { +namespace internal { + +class RetryGlobalConfig : public ServiceConfigParser::ParsedConfig { + public: + RetryGlobalConfig(intptr_t max_milli_tokens, intptr_t milli_token_ratio) + : max_milli_tokens_(max_milli_tokens), + milli_token_ratio_(milli_token_ratio) {} + + intptr_t max_milli_tokens() const { return max_milli_tokens_; } + intptr_t milli_token_ratio() const { return milli_token_ratio_; } + + private: + intptr_t max_milli_tokens_ = 0; + intptr_t milli_token_ratio_ = 0; +}; + +class RetryMethodConfig : public ServiceConfigParser::ParsedConfig { + public: + RetryMethodConfig(int max_attempts, grpc_millis initial_backoff, + grpc_millis max_backoff, float backoff_multiplier, + StatusCodeSet retryable_status_codes) + : max_attempts_(max_attempts), + initial_backoff_(initial_backoff), + max_backoff_(max_backoff), + backoff_multiplier_(backoff_multiplier), + retryable_status_codes_(retryable_status_codes) {} + + int max_attempts() const { return max_attempts_; } + grpc_millis initial_backoff() const { return initial_backoff_; } + grpc_millis max_backoff() const { return max_backoff_; } + float backoff_multiplier() const { return backoff_multiplier_; } + StatusCodeSet retryable_status_codes() const { + return retryable_status_codes_; + } + + private: + int max_attempts_ = 0; + grpc_millis initial_backoff_ = 0; + grpc_millis max_backoff_ = 0; + float backoff_multiplier_ = 0; + StatusCodeSet retryable_status_codes_; +}; + +class RetryServiceConfigParser : public ServiceConfigParser::Parser { + public: + std::unique_ptr ParseGlobalParams( + const grpc_channel_args* /*args*/, const Json& json, + grpc_error** error) override; + + std::unique_ptr ParsePerMethodParams( + const grpc_channel_args* /*args*/, const Json& json, + grpc_error** error) override; + + static size_t ParserIndex(); + static void Register(); +}; + +} // namespace internal +} // namespace grpc_core + +#endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_RETRY_SERVICE_CONFIG_H diff --git a/src/core/ext/xds/xds_client.cc b/src/core/ext/xds/xds_client.cc index 666e5359b2e..0d39254fec8 100644 --- a/src/core/ext/xds/xds_client.cc +++ b/src/core/ext/xds/xds_client.cc @@ -529,20 +529,18 @@ void XdsClient::ChannelState::MaybeStartLrsCall() { void XdsClient::ChannelState::StopLrsCall() { lrs_calld_.reset(); } void XdsClient::ChannelState::StartConnectivityWatchLocked() { - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_)); - GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); + ClientChannel* client_channel = ClientChannel::GetFromChannel(channel_); + GPR_ASSERT(client_channel != nullptr); watcher_ = new StateWatcher(Ref(DEBUG_LOCATION, "ChannelState+watch")); - grpc_client_channel_start_connectivity_watch( - client_channel_elem, GRPC_CHANNEL_IDLE, + client_channel->AddConnectivityWatcher( + GRPC_CHANNEL_IDLE, OrphanablePtr(watcher_)); } void XdsClient::ChannelState::CancelConnectivityWatchLocked() { - grpc_channel_element* client_channel_elem = - grpc_channel_stack_last_element(grpc_channel_get_channel_stack(channel_)); - GPR_ASSERT(client_channel_elem->filter == &grpc_client_channel_filter); - grpc_client_channel_stop_connectivity_watch(client_channel_elem, watcher_); + ClientChannel* client_channel = ClientChannel::GetFromChannel(channel_); + GPR_ASSERT(client_channel != nullptr); + client_channel->RemoveConnectivityWatcher(watcher_); } void XdsClient::ChannelState::SubscribeLocked(const std::string& type_url, diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 9cdca2257b1..652fba96f59 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -67,6 +67,8 @@ CORE_SOURCE_FILES = [ 'src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc', 'src/core/ext/filters/client_channel/resolver_registry.cc', 'src/core/ext/filters/client_channel/resolver_result_parsing.cc', + 'src/core/ext/filters/client_channel/retry_filter.cc', + 'src/core/ext/filters/client_channel/retry_service_config.cc', 'src/core/ext/filters/client_channel/retry_throttle.cc', 'src/core/ext/filters/client_channel/server_address.cc', 'src/core/ext/filters/client_channel/service_config.cc', diff --git a/test/core/client_channel/service_config_test.cc b/test/core/client_channel/service_config_test.cc index cc170c008e9..baea0e6e725 100644 --- a/test/core/client_channel/service_config_test.cc +++ b/test/core/client_channel/service_config_test.cc @@ -23,6 +23,7 @@ #include #include "src/core/ext/filters/client_channel/resolver_result_parsing.h" +#include "src/core/ext/filters/client_channel/retry_service_config.h" #include "src/core/ext/filters/client_channel/service_config.h" #include "src/core/ext/filters/client_channel/service_config_parser.h" #include "src/core/ext/filters/message_size/message_size_filter.h" @@ -33,6 +34,10 @@ namespace grpc_core { namespace testing { +// +// ServiceConfig tests +// + // Set this channel arg to true to disable parsing. #define GRPC_ARG_DISABLE_PARSING "disable_parsing" @@ -462,6 +467,10 @@ TEST_F(ErroredParsersScopingTest, MethodParams) { GRPC_ERROR_UNREF(error); } +// +// client_channel parser tests +// + class ClientChannelParserTest : public ::testing::Test { protected: void SetUp() override { @@ -621,84 +630,6 @@ TEST_F(ClientChannelParserTest, LoadBalancingPolicyXdsNotAllowed) { GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, ValidRetryThrottling) { - const char* test_json = - "{\n" - " \"retryThrottling\": {\n" - " \"maxTokens\": 2,\n" - " \"tokenRatio\": 1.0\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - ASSERT_EQ(error, GRPC_ERROR_NONE) << grpc_error_string(error); - const auto* parsed_config = - static_cast( - svc_cfg->GetGlobalParsedConfig(0)); - const auto retryThrottling = parsed_config->retry_throttling(); - ASSERT_TRUE(retryThrottling.has_value()); - EXPECT_EQ(retryThrottling.value().max_milli_tokens, 2000); - EXPECT_EQ(retryThrottling.value().milli_token_ratio, 1000); -} - -TEST_F(ClientChannelParserTest, RetryThrottlingMissingFields) { - const char* test_json = - "{\n" - " \"retryThrottling\": {\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - EXPECT_THAT(grpc_error_string(error), - ::testing::ContainsRegex( - "Service config parsing error.*referenced_errors.*" - "Global Params.*referenced_errors.*" - "Client channel global parser.*referenced_errors.*" - "field:retryThrottling field:maxTokens error:Not found.*" - "field:retryThrottling field:tokenRatio error:Not found")); - GRPC_ERROR_UNREF(error); -} - -TEST_F(ClientChannelParserTest, InvalidRetryThrottlingNegativeMaxTokens) { - const char* test_json = - "{\n" - " \"retryThrottling\": {\n" - " \"maxTokens\": -2,\n" - " \"tokenRatio\": 1.0\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - EXPECT_THAT(grpc_error_string(error), - ::testing::ContainsRegex( - "Service config parsing error.*referenced_errors.*" - "Global Params.*referenced_errors.*" - "Client channel global parser.*referenced_errors.*" - "field:retryThrottling field:maxTokens error:should " - "be greater than zero")); - GRPC_ERROR_UNREF(error); -} - -TEST_F(ClientChannelParserTest, InvalidRetryThrottlingInvalidTokenRatio) { - const char* test_json = - "{\n" - " \"retryThrottling\": {\n" - " \"maxTokens\": 2,\n" - " \"tokenRatio\": -1\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - EXPECT_THAT(grpc_error_string(error), - ::testing::ContainsRegex( - "Service config parsing error.*referenced_errors.*" - "Global Params.*referenced_errors.*" - "Client channel global parser.*referenced_errors.*" - "field:retryThrottling field:tokenRatio " - "error:Failed parsing")); - GRPC_ERROR_UNREF(error); -} - TEST_F(ClientChannelParserTest, ValidTimeout) { const char* test_json = "{\n" @@ -796,7 +727,136 @@ TEST_F(ClientChannelParserTest, InvalidWaitForReady) { GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, ValidRetryPolicy) { +TEST_F(ClientChannelParserTest, ValidHealthCheck) { + const char* test_json = + "{\n" + " \"healthCheckConfig\": {\n" + " \"serviceName\": \"health_check_service_name\"\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + ASSERT_EQ(error, GRPC_ERROR_NONE) << grpc_error_string(error); + const auto* parsed_config = + static_cast( + svc_cfg->GetGlobalParsedConfig(0)); + ASSERT_NE(parsed_config, nullptr); + EXPECT_EQ(parsed_config->health_check_service_name(), + "health_check_service_name"); +} + +TEST_F(ClientChannelParserTest, InvalidHealthCheckMultipleEntries) { + const char* test_json = + "{\n" + " \"healthCheckConfig\": {\n" + " \"serviceName\": \"health_check_service_name\"\n" + " },\n" + " \"healthCheckConfig\": {\n" + " \"serviceName\": \"health_check_service_name1\"\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + EXPECT_THAT(grpc_error_string(error), + ::testing::ContainsRegex( + "JSON parsing failed.*referenced_errors.*" + "duplicate key \"healthCheckConfig\" at index 104")); + GRPC_ERROR_UNREF(error); +} + +// +// retry parser tests +// + +class RetryParserTest : public ::testing::Test { + protected: + void SetUp() override { + ServiceConfigParser::Shutdown(); + ServiceConfigParser::Init(); + EXPECT_EQ(ServiceConfigParser::RegisterParser( + absl::make_unique()), + 0); + } +}; + +TEST_F(RetryParserTest, ValidRetryThrottling) { + const char* test_json = + "{\n" + " \"retryThrottling\": {\n" + " \"maxTokens\": 2,\n" + " \"tokenRatio\": 1.0\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + ASSERT_EQ(error, GRPC_ERROR_NONE) << grpc_error_string(error); + const auto* parsed_config = + static_cast( + svc_cfg->GetGlobalParsedConfig(0)); + ASSERT_NE(parsed_config, nullptr); + EXPECT_EQ(parsed_config->max_milli_tokens(), 2000); + EXPECT_EQ(parsed_config->milli_token_ratio(), 1000); +} + +TEST_F(RetryParserTest, RetryThrottlingMissingFields) { + const char* test_json = + "{\n" + " \"retryThrottling\": {\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + EXPECT_THAT(grpc_error_string(error), + ::testing::ContainsRegex( + "Service config parsing error.*referenced_errors.*" + "Global Params.*referenced_errors.*" + "retryThrottling.*referenced_errors.*" + "field:retryThrottling field:maxTokens error:Not found.*" + "field:retryThrottling field:tokenRatio error:Not found")); + GRPC_ERROR_UNREF(error); +} + +TEST_F(RetryParserTest, InvalidRetryThrottlingNegativeMaxTokens) { + const char* test_json = + "{\n" + " \"retryThrottling\": {\n" + " \"maxTokens\": -2,\n" + " \"tokenRatio\": 1.0\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + EXPECT_THAT(grpc_error_string(error), + ::testing::ContainsRegex( + "Service config parsing error.*referenced_errors.*" + "Global Params.*referenced_errors.*" + "retryThrottling.*referenced_errors.*" + "field:retryThrottling field:maxTokens error:should " + "be greater than zero")); + GRPC_ERROR_UNREF(error); +} + +TEST_F(RetryParserTest, InvalidRetryThrottlingInvalidTokenRatio) { + const char* test_json = + "{\n" + " \"retryThrottling\": {\n" + " \"maxTokens\": 2,\n" + " \"tokenRatio\": -1\n" + " }\n" + "}"; + grpc_error* error = GRPC_ERROR_NONE; + auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); + EXPECT_THAT(grpc_error_string(error), + ::testing::ContainsRegex( + "Service config parsing error.*referenced_errors.*" + "Global Params.*referenced_errors.*" + "retryThrottling.*referenced_errors.*" + "field:retryThrottling field:tokenRatio " + "error:Failed parsing")); + GRPC_ERROR_UNREF(error); +} + +TEST_F(RetryParserTest, ValidRetryPolicy) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -819,18 +879,18 @@ TEST_F(ClientChannelParserTest, ValidRetryPolicy) { grpc_slice_from_static_string("/TestServ/TestMethod")); ASSERT_NE(vector_ptr, nullptr); const auto* parsed_config = - static_cast( + static_cast( ((*vector_ptr)[0]).get()); - ASSERT_NE(parsed_config->retry_policy(), nullptr); - EXPECT_EQ(parsed_config->retry_policy()->max_attempts, 3); - EXPECT_EQ(parsed_config->retry_policy()->initial_backoff, 1000); - EXPECT_EQ(parsed_config->retry_policy()->max_backoff, 120000); - EXPECT_EQ(parsed_config->retry_policy()->backoff_multiplier, 1.6f); - EXPECT_TRUE(parsed_config->retry_policy()->retryable_status_codes.Contains( - GRPC_STATUS_ABORTED)); + ASSERT_NE(parsed_config, nullptr); + EXPECT_EQ(parsed_config->max_attempts(), 3); + EXPECT_EQ(parsed_config->initial_backoff(), 1000); + EXPECT_EQ(parsed_config->max_backoff(), 120000); + EXPECT_EQ(parsed_config->backoff_multiplier(), 1.6f); + EXPECT_TRUE( + parsed_config->retryable_status_codes().Contains(GRPC_STATUS_ABORTED)); } -TEST_F(ClientChannelParserTest, InvalidRetryPolicyMaxAttempts) { +TEST_F(RetryParserTest, InvalidRetryPolicyMaxAttempts) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -853,13 +913,12 @@ TEST_F(ClientChannelParserTest, InvalidRetryPolicyMaxAttempts) { "Service config parsing error.*referenced_errors.*" "Method Params.*referenced_errors.*" "methodConfig.*referenced_errors.*" - "Client channel parser.*referenced_errors.*" "retryPolicy.*referenced_errors.*" "field:maxAttempts error:should be at least 2")); GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, InvalidRetryPolicyInitialBackoff) { +TEST_F(RetryParserTest, InvalidRetryPolicyInitialBackoff) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -882,14 +941,13 @@ TEST_F(ClientChannelParserTest, InvalidRetryPolicyInitialBackoff) { "Service config parsing error.*referenced_errors.*" "Method Params.*referenced_errors.*" "methodConfig.*referenced_errors.*" - "Client channel parser.*referenced_errors.*" "retryPolicy.*referenced_errors.*" "field:initialBackoff error:type should be STRING of the " "form given by google.proto.Duration")); GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, InvalidRetryPolicyMaxBackoff) { +TEST_F(RetryParserTest, InvalidRetryPolicyMaxBackoff) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -912,14 +970,13 @@ TEST_F(ClientChannelParserTest, InvalidRetryPolicyMaxBackoff) { "Service config parsing error.*referenced_errors.*" "Method Params.*referenced_errors.*" "methodConfig.*referenced_errors.*" - "Client channel parser.*referenced_errors.*" "retryPolicy.*referenced_errors.*" "field:maxBackoff error:type should be STRING of the form " "given by google.proto.Duration")); GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, InvalidRetryPolicyBackoffMultiplier) { +TEST_F(RetryParserTest, InvalidRetryPolicyBackoffMultiplier) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -942,13 +999,12 @@ TEST_F(ClientChannelParserTest, InvalidRetryPolicyBackoffMultiplier) { "Service config parsing error.*referenced_errors.*" "Method Params.*referenced_errors.*" "methodConfig.*referenced_errors.*" - "Client channel parser.*referenced_errors.*" "retryPolicy.*referenced_errors.*" "field:backoffMultiplier error:should be of type number")); GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, InvalidRetryPolicyRetryableStatusCodes) { +TEST_F(RetryParserTest, InvalidRetryPolicyRetryableStatusCodes) { const char* test_json = "{\n" " \"methodConfig\": [ {\n" @@ -971,48 +1027,14 @@ TEST_F(ClientChannelParserTest, InvalidRetryPolicyRetryableStatusCodes) { "Service config parsing error.*referenced_errors.*" "Method Params.*referenced_errors.*" "methodConfig.*referenced_errors.*" - "Client channel parser.*referenced_errors.*" "retryPolicy.*referenced_errors.*" "field:retryableStatusCodes error:should be non-empty")); GRPC_ERROR_UNREF(error); } -TEST_F(ClientChannelParserTest, ValidHealthCheck) { - const char* test_json = - "{\n" - " \"healthCheckConfig\": {\n" - " \"serviceName\": \"health_check_service_name\"\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - ASSERT_EQ(error, GRPC_ERROR_NONE) << grpc_error_string(error); - const auto* parsed_config = - static_cast( - svc_cfg->GetGlobalParsedConfig(0)); - ASSERT_NE(parsed_config, nullptr); - EXPECT_EQ(parsed_config->health_check_service_name(), - "health_check_service_name"); -} - -TEST_F(ClientChannelParserTest, InvalidHealthCheckMultipleEntries) { - const char* test_json = - "{\n" - " \"healthCheckConfig\": {\n" - " \"serviceName\": \"health_check_service_name\"\n" - " },\n" - " \"healthCheckConfig\": {\n" - " \"serviceName\": \"health_check_service_name1\"\n" - " }\n" - "}"; - grpc_error* error = GRPC_ERROR_NONE; - auto svc_cfg = ServiceConfig::Create(nullptr, test_json, &error); - EXPECT_THAT(grpc_error_string(error), - ::testing::ContainsRegex( - "JSON parsing failed.*referenced_errors.*" - "duplicate key \"healthCheckConfig\" at index 104")); - GRPC_ERROR_UNREF(error); -} +// +// message_size parser tests +// class MessageSizeParserTest : public ::testing::Test { protected: diff --git a/test/cpp/microbenchmarks/bm_call_create.cc b/test/cpp/microbenchmarks/bm_call_create.cc index ab0c02aeea8..236ab1cb343 100644 --- a/test/cpp/microbenchmarks/bm_call_create.cc +++ b/test/cpp/microbenchmarks/bm_call_create.cc @@ -568,7 +568,8 @@ BENCHMARK_TEMPLATE(BM_IsolatedFilter, NoFilter, NoOp); typedef Fixture<&phony_filter::phony_filter, 0> PhonyFilter; BENCHMARK_TEMPLATE(BM_IsolatedFilter, PhonyFilter, NoOp); BENCHMARK_TEMPLATE(BM_IsolatedFilter, PhonyFilter, SendEmptyMetadata); -typedef Fixture<&grpc_client_channel_filter, 0> ClientChannelFilter; +typedef Fixture<&grpc_core::ClientChannel::kFilterVtable, 0> + ClientChannelFilter; BENCHMARK_TEMPLATE(BM_IsolatedFilter, ClientChannelFilter, NoOp); typedef Fixture<&grpc_message_compress_filter, CHECKS_NOT_LAST> CompressFilter; BENCHMARK_TEMPLATE(BM_IsolatedFilter, CompressFilter, NoOp); diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 7d55ce4139a..e7afde3ab2c 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1131,6 +1131,10 @@ src/core/ext/filters/client_channel/resolver_registry.cc \ src/core/ext/filters/client_channel/resolver_registry.h \ src/core/ext/filters/client_channel/resolver_result_parsing.cc \ src/core/ext/filters/client_channel/resolver_result_parsing.h \ +src/core/ext/filters/client_channel/retry_filter.cc \ +src/core/ext/filters/client_channel/retry_filter.h \ +src/core/ext/filters/client_channel/retry_service_config.cc \ +src/core/ext/filters/client_channel/retry_service_config.h \ src/core/ext/filters/client_channel/retry_throttle.cc \ src/core/ext/filters/client_channel/retry_throttle.h \ src/core/ext/filters/client_channel/server_address.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 0259d038040..f8275d564a0 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -959,6 +959,10 @@ src/core/ext/filters/client_channel/resolver_registry.cc \ src/core/ext/filters/client_channel/resolver_registry.h \ src/core/ext/filters/client_channel/resolver_result_parsing.cc \ src/core/ext/filters/client_channel/resolver_result_parsing.h \ +src/core/ext/filters/client_channel/retry_filter.cc \ +src/core/ext/filters/client_channel/retry_filter.h \ +src/core/ext/filters/client_channel/retry_service_config.cc \ +src/core/ext/filters/client_channel/retry_service_config.h \ src/core/ext/filters/client_channel/retry_throttle.cc \ src/core/ext/filters/client_channel/retry_throttle.h \ src/core/ext/filters/client_channel/server_address.cc \