Enable retries by default (#26766)

* Enable retries by default, but add a separate arg for hedging.

* don't need to explicitly enable retries in xDS config selector

* clang-format

* don't need retry_enabled bit anymore

* fix HTTP client filter to restore the send_message op in the batch

* fix retry cancellation when a batch fails on call attempt

* fix clang-tidy

* fix client channel to pass down batches even after cancellation

* fix retry code to pass transport stats back up to the surface

* add some missing payload propagation

* fix retry handling of callbacks for pending batches

* avoid scheduling the same callback twice

* fix some trace messages

* don't avoid starting recv_initial_metadata or recv_message due to recv_trailing_metadata already being started internally

* avoid restarting recv_trailing_metadata after commit if we've already started it internally

* use fast path when retries are not configured
pull/26360/head
Mark D. Roth 4 years ago committed by GitHub
parent a3d264e8fd
commit d140f14caf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      include/grpc/impl/codegen/grpc_types.h
  2. 2
      src/core/ext/filters/client_channel/client_channel.cc
  3. 15
      src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc
  4. 2
      test/core/end2end/tests/retry.cc
  5. 2
      test/core/end2end/tests/retry_cancel_during_delay.cc
  6. 2
      test/core/end2end/tests/retry_cancellation.cc
  7. 44
      test/core/end2end/tests/retry_disabled.cc
  8. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_delay.cc
  9. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_initial_batch.cc
  10. 2
      test/core/end2end/tests/retry_exceeds_buffer_size_in_subsequent_batch.cc
  11. 2
      test/core/end2end/tests/retry_lb_drop.cc
  12. 2
      test/core/end2end/tests/retry_non_retriable_status.cc
  13. 2
      test/core/end2end/tests/retry_non_retriable_status_before_recv_trailing_metadata_started.cc
  14. 2
      test/core/end2end/tests/retry_per_attempt_recv_timeout.cc
  15. 2
      test/core/end2end/tests/retry_per_attempt_recv_timeout_on_last_attempt.cc
  16. 2
      test/core/end2end/tests/retry_recv_initial_metadata.cc
  17. 2
      test/core/end2end/tests/retry_recv_message.cc
  18. 2
      test/core/end2end/tests/retry_recv_trailing_metadata_error.cc
  19. 2
      test/core/end2end/tests/retry_send_op_fails.cc
  20. 2
      test/core/end2end/tests/retry_server_pushback_delay.cc
  21. 2
      test/core/end2end/tests/retry_server_pushback_disabled.cc
  22. 2
      test/core/end2end/tests/retry_streaming.cc
  23. 2
      test/core/end2end/tests/retry_streaming_after_commit.cc
  24. 2
      test/core/end2end/tests/retry_streaming_succeeds_before_replay_finished.cc
  25. 2
      test/core/end2end/tests/retry_throttled.cc
  26. 2
      test/core/end2end/tests/retry_too_many_attempts.cc

@ -384,7 +384,7 @@ typedef struct {
Defaults to "blend". In the current implementation "blend" is equivalent to
"latency". */
#define GRPC_ARG_OPTIMIZATION_TARGET "grpc.optimization_target"
/** Enables retry functionality. Defaults to false. When enabled,
/** Enables retry functionality. Defaults to true. When enabled,
configurable retries are enabled when they are configured via the
service config. For details, see:
https://github.com/grpc/proposal/blob/master/A6-client-retries.md

@ -1516,7 +1516,7 @@ void ClientChannel::UpdateServiceConfigInDataPlaneLocked() {
channel_args_, args_to_add.data(), args_to_add.size());
new_args = config_selector->ModifyChannelArgs(new_args);
bool enable_retries =
grpc_channel_args_find_bool(new_args, GRPC_ARG_ENABLE_RETRIES, false);
grpc_channel_args_find_bool(new_args, GRPC_ARG_ENABLE_RETRIES, true);
// Construct dynamic filter stack.
std::vector<const grpc_channel_filter*> filters =
config_selector->GetFilters();

@ -242,7 +242,6 @@ class XdsResolver : public Resolver {
RouteTable route_table_;
std::map<absl::string_view, RefCountedPtr<ClusterState>> clusters_;
std::vector<const grpc_channel_filter*> filters_;
bool retry_enabled_ = false;
};
void OnListenerUpdate(XdsApi::LdsUpdate listener);
@ -471,7 +470,6 @@ grpc_error_handle XdsResolver::XdsConfigSelector::CreateMethodConfig(
std::vector<std::string> fields;
// Set retry policy if any.
if (route.retry_policy.has_value()) {
retry_enabled_ = true;
std::vector<std::string> retry_parts;
retry_parts.push_back(absl::StrFormat(
"\"retryPolicy\": {\n"
@ -575,18 +573,7 @@ grpc_error_handle XdsResolver::XdsConfigSelector::CreateMethodConfig(
grpc_channel_args* XdsResolver::XdsConfigSelector::ModifyChannelArgs(
grpc_channel_args* args) {
// The max number of args to add is 1 so far; when more args need to be added
// we will increase the size of args_to_add accordingly;
absl::InlinedVector<grpc_arg, 1> args_to_add;
if (retry_enabled_) {
args_to_add.emplace_back(grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1));
}
if (args_to_add.empty()) return args;
grpc_channel_args* new_args = grpc_channel_args_copy_and_add(
args, args_to_add.data(), args_to_add.size());
grpc_channel_args_destroy(args);
return new_args;
return args;
}
void XdsResolver::XdsConfigSelector::MaybeAddCluster(const std::string& name) {

@ -121,8 +121,6 @@ static void test_retry(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_cancel_during_delay(grpc_end2end_test_config config,
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -122,8 +122,6 @@ static void test_retry_cancellation(grpc_end2end_test_config config,
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -93,13 +93,11 @@ static void end_test(grpc_end2end_test_fixture* f) {
grpc_completion_queue_destroy(f->shutdown_cq);
}
// Tests that we don't retry when retries are not enabled via the
// Tests that we don't retry when retries are disabled via the
// GRPC_ARG_ENABLE_RETRIES channel arg, even when there is retry
// configuration in the service config.
// - 1 retry allowed for ABORTED status
// - first attempt returns ABORTED but does not retry
// TODO(roth): Update this when we change the default of
// GRPC_ARG_ENABLE_RETRIES to true.
static void test_retry_disabled(grpc_end2end_test_config config) {
grpc_call* c;
grpc_call* s;
@ -123,24 +121,28 @@ static void test_retry_disabled(grpc_end2end_test_config config) {
int was_cancelled = 2;
char* peer;
grpc_arg arg = grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(
"{\n"
" \"methodConfig\": [ {\n"
" \"name\": [\n"
" { \"service\": \"service\", \"method\": \"method\" }\n"
" ],\n"
" \"retryPolicy\": {\n"
" \"maxAttempts\": 2,\n"
" \"initialBackoff\": \"1s\",\n"
" \"maxBackoff\": \"120s\",\n"
" \"backoffMultiplier\": 1.6,\n"
" \"retryableStatusCodes\": [ \"ABORTED\" ]\n"
" }\n"
" } ]\n"
"}"));
grpc_channel_args client_args = {1, &arg};
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 0),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(
"{\n"
" \"methodConfig\": [ {\n"
" \"name\": [\n"
" { \"service\": \"service\", \"method\": \"method\" }\n"
" ],\n"
" \"retryPolicy\": {\n"
" \"maxAttempts\": 2,\n"
" \"initialBackoff\": \"1s\",\n"
" \"maxBackoff\": \"120s\",\n"
" \"backoffMultiplier\": 1.6,\n"
" \"retryableStatusCodes\": [ \"ABORTED\" ]\n"
" }\n"
" } ]\n"
"}")),
};
grpc_channel_args client_args = {GPR_ARRAY_SIZE(args), args};
grpc_end2end_test_fixture f =
begin_test(config, "retry_disabled", &client_args, nullptr);

@ -127,8 +127,6 @@ static void test_retry_exceeds_buffer_size_in_delay(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -124,8 +124,6 @@ static void test_retry_exceeds_buffer_size_in_initial_batch(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -129,8 +129,6 @@ static void test_retry_exceeds_buffer_size_in_subsequent_batch(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -181,8 +181,6 @@ static void test_retry_lb_drop(grpc_end2end_test_config config) {
grpc_core::g_pick_args_vector = &pick_args_seen;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_non_retriable_status(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -123,8 +123,6 @@ test_retry_non_retriable_status_before_recv_trailing_metadata_started(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -122,8 +122,6 @@ static void test_retry_per_attempt_recv_timeout(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1),
grpc_channel_arg_string_create(

@ -118,8 +118,6 @@ static void test_retry_per_attempt_recv_timeout_on_last_attempt(
grpc_slice details;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_EXPERIMENTAL_ENABLE_HEDGING), 1),
grpc_channel_arg_string_create(

@ -125,8 +125,6 @@ static void test_retry_recv_initial_metadata(grpc_end2end_test_config config) {
{{nullptr, nullptr, nullptr, nullptr}}};
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -121,8 +121,6 @@ static void test_retry_recv_message(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -125,8 +125,6 @@ static void test_retry_recv_trailing_metadata_error(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -127,8 +127,6 @@ static void test_retry_send_op_fails(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -126,8 +126,6 @@ static void test_retry_server_pushback_delay(grpc_end2end_test_config config) {
pushback_md.value = grpc_slice_from_static_string("2000");
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -127,8 +127,6 @@ static void test_retry_server_pushback_disabled(
pushback_md.value = grpc_slice_from_static_string("-1");
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -137,8 +137,6 @@ static void test_retry_streaming(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_MAX_CHANNEL_TRACE_EVENT_MEMORY_PER_NODE),
1024 * 8),

@ -127,8 +127,6 @@ static void test_retry_streaming_after_commit(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -128,8 +128,6 @@ static void test_retry_streaming_succeeds_before_replay_finished(
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -120,8 +120,6 @@ static void test_retry_throttled(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

@ -121,8 +121,6 @@ static void test_retry_too_many_attempts(grpc_end2end_test_config config) {
char* peer;
grpc_arg args[] = {
grpc_channel_arg_integer_create(
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1),
grpc_channel_arg_string_create(
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG),
const_cast<char*>(

Loading…
Cancel
Save