Enable retries by default (#26766)

* Enable retries by default, but add a separate arg for hedging.

* don't need to explicitly enable retries in xDS config selector

* clang-format

* don't need retry_enabled bit anymore

* fix HTTP client filter to restore the send_message op in the batch

* fix retry cancellation when a batch fails on call attempt

* fix clang-tidy

* fix client channel to pass down batches even after cancellation

* fix retry code to pass transport stats back up to the surface

* add some missing payload propagation

* fix retry handling of callbacks for pending batches

* avoid scheduling the same callback twice

* fix some trace messages

* don't avoid starting recv_initial_metadata or recv_message due to recv_trailing_metadata already being started internally

* avoid restarting recv_trailing_metadata after commit if we've already started it internally

* use fast path when retries are not configured
pull/26360/head
Mark D. Roth 4 years ago committed by GitHub
parent a3d264e8fd
commit d140f14caf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      include/grpc/impl/codegen/grpc_types.h
  2. 2
      src/core/ext/filters/client_channel/client_channel.cc
  3. 15
      src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc
  4. 2
      test/core/end2end/tests/retry.cc
  5. 2
      test/core/end2end/tests/retry_cancel_during_delay.cc
  6. 2
      test/core/end2end/tests/retry_cancellation.cc
  7. 44
      test/core/end2end/tests/retry_disabled.cc
  8. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_delay.cc
  9. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
  10. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_subsequent_batch.cc
  11. 2
      test/core/end2end/tests/retry_lb_drop.cc
  12. 2
      test/core/end2end/tests/retry_non_retriable_status.cc
  13. 2
      test/core/end2end/tests/retry_non_retriable_status_before_recv_trailing_metadata_started.cc
  14. 2
      test/core/end2end/tests/retry_per_attempt_recv_timeout.cc
  15. 2
      test/core/end2end/tests/retry_per_attempt_recv_timeout_on_last_attempt.cc
  16. 2
      test/core/end2end/tests/retry_recv_initial_metadata.cc
  17. 2
      test/core/end2end/tests/retry_recv_message.cc
  18. 2
      test/core/end2end/tests/retry_recv_trailing_metadata_error.cc
  19. 2
      test/core/end2end/tests/retry_send_op_fails.cc
  20. 2
      test/core/end2end/tests/retry_server_pushback_delay.cc
  21. 2
      test/core/end2end/tests/retry_server_pushback_disabled.cc
  22. 2
      test/core/end2end/tests/retry_streaming.cc
  23. 2
      test/core/end2end/tests/retry_streaming_after_commit.cc
  24. 2
      test/core/end2end/tests/retry_streaming_succeeds_before_replay_finished.cc
  25. 2
      test/core/end2end/tests/retry_throttled.cc
  26. 2
      test/core/end2end/tests/retry_too_many_attempts.cc

@ -384,7 +384,7 @@ typedef struct {
Defaults to "blend". In the current implementation "blend" is equivalent to Defaults to "blend". In the current implementation "blend" is equivalent to
"latency". */ "latency". */
#define GRPC_ARG_OPTIMIZATION_TARGET "grpc.optimization_target" #define GRPC_ARG_OPTIMIZATION_TARGET "grpc.optimization_target"
/** Enables retry functionality. Defaults to false. When enabled, /** Enables retry functionality. Defaults to true. When enabled,
configurable retries are enabled when they are configured via the configurable retries are enabled when they are configured via the
service config. For details, see: service config. For details, see:
https://github.com/grpc/proposal/blob/master/A6-client-retries.md https://github.com/grpc/proposal/blob/master/A6-client-retries.md

@ -1516,7 +1516,7 @@ void ClientChannel::UpdateServiceConfigInDataPlaneLocked() {
channel_args_, args_to_add.data(), args_to_add.size()); channel_args_, args_to_add.data(), args_to_add.size());
new_args = config_selector->ModifyChannelArgs(new_args); new_args = config_selector->ModifyChannelArgs(new_args);
bool enable_retries = bool enable_retries =
grpc_channel_args_find_bool(new_args, GRPC_ARG_ENABLE_RETRIES, false); grpc_channel_args_find_bool(new_args, GRPC_ARG_ENABLE_RETRIES, true);
// Construct dynamic filter stack. // Construct dynamic filter stack.
std::vector<const grpc_channel_filter*> filters = std::vector<const grpc_channel_filter*> filters =
config_selector->GetFilters(); config_selector->GetFilters();

@ -242,7 +242,6 @@ class XdsResolver : public Resolver {
RouteTable route_table_; RouteTable route_table_;
std::map<absl::string_view, RefCountedPtr<ClusterState>> clusters_; std::map<absl::string_view, RefCountedPtr<ClusterState>> clusters_;
std::vector<const grpc_channel_filter*> filters_; std::vector<const grpc_channel_filter*> filters_;
bool retry_enabled_ = false;
}; };
void OnListenerUpdate(XdsApi::LdsUpdate listener); void OnListenerUpdate(XdsApi::LdsUpdate listener);
@ -471,7 +470,6 @@ grpc_error_handle XdsResolver::XdsConfigSelector::CreateMethodConfig(
std::vector<std::string> fields; std::vector<std::string> fields;
// Set retry policy if any. // Set retry policy if any.
if (route.retry_policy.has_value()) { if (route.retry_policy.has_value()) {
retry_enabled_ = true;
std::vector<std::string> retry_parts; std::vector<std::string> retry_parts;
retry_parts.push_back(absl::StrFormat( retry_parts.push_back(absl::StrFormat(
"\"retryPolicy\": {\n" "\"retryPolicy\": {\n"
@ -575,18 +573,7 @@ grpc_error_handle XdsResolver::XdsConfigSelector::CreateMethodConfig(
grpc_channel_args* XdsResolver::XdsConfigSelector::ModifyChannelArgs( grpc_channel_args* XdsResolver::XdsConfigSelector::ModifyChannelArgs(
grpc_channel_args* args) { grpc_channel_args* args) {
// The max number of args to add is 1 so far; when more args need to be added return args;
// we will increase the size of args_to_add accordingly;
absl::InlinedVector<grpc_arg, 1> args_to_add;
if (retry_enabled_) {
args_to_add.emplace_back(grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1));
}
if (args_to_add.empty()) return args;
grpc_channel_args* new_args = grpc_channel_args_copy_and_add(
args, args_to_add.data(), args_to_add.size());
grpc_channel_args_destroy(args);
return new_args;
} }
void XdsResolver::XdsConfigSelector::MaybeAddCluster(const std::string& name) { void XdsResolver::XdsConfigSelector::MaybeAddCluster(const std::string& name) {

@ -121,8 +121,6 @@ static void test_retry(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_cancel_during_delay(grpc_end2end_test_config config,
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -122,8 +122,6 @@ static void test_retry_cancellation(grpc_end2end_test_config config,
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -93,13 +93,11 @@ static void end_test(grpc_end2end_test_fixture* f) {
grpc_completion_queue_destroy(f->shutdown_cq); grpc_completion_queue_destroy(f->shutdown_cq);
} }
// Tests that we don't retry when retries are not enabled via the // Tests that we don't retry when retries are disabled via the
// GRPC_ARG_ENABLE_RETRIES channel arg, even when there is retry // GRPC_ARG_ENABLE_RETRIES channel arg, even when there is retry
// configuration in the service config. // configuration in the service config.
// - 1 retry allowed for ABORTED status // - 1 retry allowed for ABORTED status
// - first attempt returns ABORTED but does not retry // - first attempt returns ABORTED but does not retry
// TODO(roth): Update this when we change the default of
// GRPC_ARG_ENABLE_RETRIES to true.
static void test_retry_disabled(grpc_end2end_test_config config) { static void test_retry_disabled(grpc_end2end_test_config config) {
grpc_call* c; grpc_call* c;
grpc_call* s; grpc_call* s;
@ -123,24 +121,28 @@ static void test_retry_disabled(grpc_end2end_test_config config) {
int was_cancelled = 2; int was_cancelled = 2;
char* peer; char* peer;
grpc_arg arg = grpc_channel_arg_string_create( grpc_arg args[] = {
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), grpc_channel_arg_integer_create(
const_cast<char*>( const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 0),
"{\n" grpc_channel_arg_string_create(
" \"methodConfig\": [ {\n" const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
" \"name\": [\n" const_cast<char*>(
" { \"service\": \"service\", \"method\": \"method\" }\n" "{\n"
" ],\n" " \"methodConfig\": [ {\n"
" \"retryPolicy\": {\n" " \"name\": [\n"
" \"maxAttempts\": 2,\n" " { \"service\": \"service\", \"method\": \"method\" }\n"
" \"initialBackoff\": \"1s\",\n" " ],\n"
" \"maxBackoff\": \"120s\",\n" " \"retryPolicy\": {\n"
" \"backoffMultiplier\": 1.6,\n" " \"maxAttempts\": 2,\n"
" \"retryableStatusCodes\": [ \"ABORTED\" ]\n" " \"initialBackoff\": \"1s\",\n"
" }\n" " \"maxBackoff\": \"120s\",\n"
" } ]\n" " \"backoffMultiplier\": 1.6,\n"
"}")); " \"retryableStatusCodes\": [ \"ABORTED\" ]\n"
grpc_channel_args client_args = {1, &arg}; " }\n"
" } ]\n"
"}")),
};
grpc_channel_args client_args = {GPR_ARRAY_SIZE(args), args};
grpc_end2end_test_fixture f = grpc_end2end_test_fixture f =
begin_test(config, "retry_disabled", &client_args, nullptr); begin_test(config, "retry_disabled", &client_args, nullptr);

@ -127,8 +127,6 @@ static void test_retry_exceeds_buffer_size_in_delay(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -124,8 +124,6 @@ static void test_retry_exceeds_buffer_size_in_initial_batch(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -129,8 +129,6 @@ static void test_retry_exceeds_buffer_size_in_subsequent_batch(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -181,8 +181,6 @@ static void test_retry_lb_drop(grpc_end2end_test_config config) {
grpc_core::g_pick_args_vector = &pick_args_seen; grpc_core::g_pick_args_vector = &pick_args_seen;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_non_retriable_status(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -123,8 +123,6 @@ test_retry_non_retriable_status_before_recv_trailing_metadata_started(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -122,8 +122,6 @@ static void test_retry_per_attempt_recv_timeout(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create( grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1), const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(

@ -118,8 +118,6 @@ static void test_retry_per_attempt_recv_timeout_on_last_attempt(
grpc_slice details; grpc_slice details;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create( grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1), const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(

@ -125,8 +125,6 @@ static void test_retry_recv_initial_metadata(grpc_end2end_test_config config) {
{{nullptr, nullptr, nullptr, nullptr}}}; {{nullptr, nullptr, nullptr, nullptr}}};
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -121,8 +121,6 @@ static void test_retry_recv_message(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -125,8 +125,6 @@ static void test_retry_recv_trailing_metadata_error(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -127,8 +127,6 @@ static void test_retry_send_op_fails(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -126,8 +126,6 @@ static void test_retry_server_pushback_delay(grpc_end2end_test_config config) {
pushback_md.value = grpc_slice_from_static_string("2000"); pushback_md.value = grpc_slice_from_static_string("2000");
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -127,8 +127,6 @@ static void test_retry_server_pushback_disabled(
pushback_md.value = grpc_slice_from_static_string("-1"); pushback_md.value = grpc_slice_from_static_string("-1");
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -137,8 +137,6 @@ static void test_retry_streaming(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create( grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE), const_cast<char*>(GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE),
1024 * 8), 1024 * 8),

@ -127,8 +127,6 @@ static void test_retry_streaming_after_commit(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -128,8 +128,6 @@ static void test_retry_streaming_succeeds_before_replay_finished(
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_throttled(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

@ -121,8 +121,6 @@ static void test_retry_too_many_attempts(grpc_end2end_test_config config) {
char* peer; char* peer;
grpc_arg args[] = { grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create( grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>( const_cast<char*>(

Loading…
Cancel
Save