mirror of https://github.com/grpc/grpc.git
fix retry code to handle failed send ops (#26418)
* optimize retry per-call-attempt memory usage * fix retry code to handle failed send ops * clang-format * fix memory leak * clang-format * fix proxy tests * fix race condition in testpull/26205/head
parent
2d8546a3c4
commit
1e2f19b286
10 changed files with 575 additions and 82 deletions
@ -0,0 +1,396 @@ |
|||||||
|
/*
|
||||||
|
* |
||||||
|
* Copyright 2017 gRPC authors. |
||||||
|
* |
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
* you may not use this file except in compliance with the License. |
||||||
|
* You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "test/core/end2end/end2end_tests.h" |
||||||
|
|
||||||
|
#include <stdio.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include <grpc/byte_buffer.h> |
||||||
|
#include <grpc/grpc.h> |
||||||
|
#include <grpc/support/alloc.h> |
||||||
|
#include <grpc/support/log.h> |
||||||
|
#include <grpc/support/string_util.h> |
||||||
|
#include <grpc/support/time.h> |
||||||
|
|
||||||
|
#include "src/core/lib/channel/channel_args.h" |
||||||
|
#include "src/core/lib/channel/channel_stack.h" |
||||||
|
#include "src/core/lib/channel/channel_stack_builder.h" |
||||||
|
#include "src/core/lib/gpr/string.h" |
||||||
|
#include "src/core/lib/gpr/useful.h" |
||||||
|
#include "src/core/lib/iomgr/exec_ctx.h" |
||||||
|
#include "src/core/lib/surface/channel_init.h" |
||||||
|
#include "src/core/lib/transport/static_metadata.h" |
||||||
|
|
||||||
|
#include "test/core/end2end/cq_verifier.h" |
||||||
|
#include "test/core/end2end/tests/cancel_test_helpers.h" |
||||||
|
|
||||||
|
static void* tag(intptr_t t) { return reinterpret_cast<void*>(t); } |
||||||
|
|
||||||
|
static grpc_end2end_test_fixture begin_test(grpc_end2end_test_config config, |
||||||
|
const char* test_name, |
||||||
|
grpc_channel_args* client_args, |
||||||
|
grpc_channel_args* server_args) { |
||||||
|
grpc_end2end_test_fixture f; |
||||||
|
gpr_log(GPR_INFO, "Running test: %s/%s", test_name, config.name); |
||||||
|
f = config.create_fixture(client_args, server_args); |
||||||
|
config.init_server(&f, server_args); |
||||||
|
config.init_client(&f, client_args); |
||||||
|
return f; |
||||||
|
} |
||||||
|
|
||||||
|
static gpr_timespec n_seconds_from_now(int n) { |
||||||
|
return grpc_timeout_seconds_to_deadline(n); |
||||||
|
} |
||||||
|
|
||||||
|
static gpr_timespec five_seconds_from_now(void) { |
||||||
|
return n_seconds_from_now(5); |
||||||
|
} |
||||||
|
|
||||||
|
static void drain_cq(grpc_completion_queue* cq) { |
||||||
|
grpc_event ev; |
||||||
|
do { |
||||||
|
ev = grpc_completion_queue_next(cq, five_seconds_from_now(), nullptr); |
||||||
|
} while (ev.type != GRPC_QUEUE_SHUTDOWN); |
||||||
|
} |
||||||
|
|
||||||
|
static void shutdown_server(grpc_end2end_test_fixture* f) { |
||||||
|
if (!f->server) return; |
||||||
|
grpc_server_shutdown_and_notify(f->server, f->shutdown_cq, tag(1000)); |
||||||
|
GPR_ASSERT(grpc_completion_queue_pluck(f->shutdown_cq, tag(1000), |
||||||
|
grpc_timeout_seconds_to_deadline(5), |
||||||
|
nullptr) |
||||||
|
.type == GRPC_OP_COMPLETE); |
||||||
|
grpc_server_destroy(f->server); |
||||||
|
f->server = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
static void shutdown_client(grpc_end2end_test_fixture* f) { |
||||||
|
if (!f->client) return; |
||||||
|
grpc_channel_destroy(f->client); |
||||||
|
f->client = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
static void end_test(grpc_end2end_test_fixture* f) { |
||||||
|
shutdown_server(f); |
||||||
|
shutdown_client(f); |
||||||
|
|
||||||
|
grpc_completion_queue_shutdown(f->cq); |
||||||
|
drain_cq(f->cq); |
||||||
|
grpc_completion_queue_destroy(f->cq); |
||||||
|
grpc_completion_queue_destroy(f->shutdown_cq); |
||||||
|
} |
||||||
|
|
||||||
|
// Tests failure on a send op batch:
|
||||||
|
// - 2 retries allowed for ABORTED status
|
||||||
|
// - on the first call attempt, the batch containing the
|
||||||
|
// send_initial_metadata op fails, and then the call returns ABORTED,
|
||||||
|
// all without ever going out on the wire
|
||||||
|
// - second attempt returns ABORTED but does not retry, because only 2
|
||||||
|
// attempts are allowed
|
||||||
|
static void test_retry_send_op_fails(grpc_end2end_test_config config) { |
||||||
|
grpc_call* c; |
||||||
|
grpc_call* s; |
||||||
|
grpc_op ops[6]; |
||||||
|
grpc_op* op; |
||||||
|
grpc_metadata_array initial_metadata_recv; |
||||||
|
grpc_metadata_array trailing_metadata_recv; |
||||||
|
grpc_metadata_array request_metadata_recv; |
||||||
|
grpc_call_details call_details; |
||||||
|
grpc_slice request_payload_slice = grpc_slice_from_static_string("foo"); |
||||||
|
grpc_slice response_payload_slice = grpc_slice_from_static_string("bar"); |
||||||
|
grpc_byte_buffer* request_payload = |
||||||
|
grpc_raw_byte_buffer_create(&request_payload_slice, 1); |
||||||
|
grpc_byte_buffer* response_payload = |
||||||
|
grpc_raw_byte_buffer_create(&response_payload_slice, 1); |
||||||
|
grpc_byte_buffer* request_payload_recv = nullptr; |
||||||
|
grpc_byte_buffer* response_payload_recv = nullptr; |
||||||
|
grpc_status_code status; |
||||||
|
grpc_call_error error; |
||||||
|
grpc_slice details; |
||||||
|
int was_cancelled = 2; |
||||||
|
char* peer; |
||||||
|
|
||||||
|
grpc_arg args[] = { |
||||||
|
grpc_channel_arg_integer_create( |
||||||
|
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1), |
||||||
|
grpc_channel_arg_string_create( |
||||||
|
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), |
||||||
|
const_cast<char*>( |
||||||
|
"{\n" |
||||||
|
" \"methodConfig\": [ {\n" |
||||||
|
" \"name\": [\n" |
||||||
|
" { \"service\": \"service\", \"method\": \"method\" }\n" |
||||||
|
" ],\n" |
||||||
|
" \"retryPolicy\": {\n" |
||||||
|
" \"maxAttempts\": 2,\n" |
||||||
|
" \"initialBackoff\": \"1s\",\n" |
||||||
|
" \"maxBackoff\": \"120s\",\n" |
||||||
|
" \"backoffMultiplier\": 1.6,\n" |
||||||
|
" \"retryableStatusCodes\": [ \"ABORTED\" ]\n" |
||||||
|
" }\n" |
||||||
|
" } ]\n" |
||||||
|
"}")), |
||||||
|
}; |
||||||
|
grpc_channel_args client_args = {GPR_ARRAY_SIZE(args), args}; |
||||||
|
grpc_end2end_test_fixture f = |
||||||
|
begin_test(config, "retry_send_op_fails", &client_args, nullptr); |
||||||
|
|
||||||
|
cq_verifier* cqv = cq_verifier_create(f.cq); |
||||||
|
|
||||||
|
gpr_timespec deadline = five_seconds_from_now(); |
||||||
|
c = grpc_channel_create_call(f.client, nullptr, GRPC_PROPAGATE_DEFAULTS, f.cq, |
||||||
|
grpc_slice_from_static_string("/service/method"), |
||||||
|
nullptr, deadline, nullptr); |
||||||
|
GPR_ASSERT(c); |
||||||
|
|
||||||
|
peer = grpc_call_get_peer(c); |
||||||
|
GPR_ASSERT(peer != nullptr); |
||||||
|
gpr_log(GPR_DEBUG, "client_peer_before_call=%s", peer); |
||||||
|
gpr_free(peer); |
||||||
|
|
||||||
|
grpc_metadata_array_init(&initial_metadata_recv); |
||||||
|
grpc_metadata_array_init(&trailing_metadata_recv); |
||||||
|
grpc_metadata_array_init(&request_metadata_recv); |
||||||
|
grpc_call_details_init(&call_details); |
||||||
|
grpc_slice status_details = grpc_slice_from_static_string("xyz"); |
||||||
|
|
||||||
|
// Start a batch containing send ops.
|
||||||
|
memset(ops, 0, sizeof(ops)); |
||||||
|
op = ops; |
||||||
|
op->op = GRPC_OP_SEND_INITIAL_METADATA; |
||||||
|
op->data.send_initial_metadata.count = 0; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_SEND_MESSAGE; |
||||||
|
op->data.send_message.send_message = request_payload; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT; |
||||||
|
op++; |
||||||
|
error = grpc_call_start_batch(c, ops, static_cast<size_t>(op - ops), tag(1), |
||||||
|
nullptr); |
||||||
|
GPR_ASSERT(GRPC_CALL_OK == error); |
||||||
|
|
||||||
|
// Start a batch containing recv ops.
|
||||||
|
memset(ops, 0, sizeof(ops)); |
||||||
|
op = ops; |
||||||
|
op->op = GRPC_OP_RECV_MESSAGE; |
||||||
|
op->data.recv_message.recv_message = &response_payload_recv; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_RECV_INITIAL_METADATA; |
||||||
|
op->data.recv_initial_metadata.recv_initial_metadata = &initial_metadata_recv; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_RECV_STATUS_ON_CLIENT; |
||||||
|
op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv; |
||||||
|
op->data.recv_status_on_client.status = &status; |
||||||
|
op->data.recv_status_on_client.status_details = &details; |
||||||
|
op++; |
||||||
|
error = grpc_call_start_batch(c, ops, static_cast<size_t>(op - ops), tag(2), |
||||||
|
nullptr); |
||||||
|
GPR_ASSERT(GRPC_CALL_OK == error); |
||||||
|
|
||||||
|
// Client send ops should now complete.
|
||||||
|
CQ_EXPECT_COMPLETION(cqv, tag(1), true); |
||||||
|
cq_verify(cqv); |
||||||
|
|
||||||
|
// Server should get a call.
|
||||||
|
error = |
||||||
|
grpc_server_request_call(f.server, &s, &call_details, |
||||||
|
&request_metadata_recv, f.cq, f.cq, tag(101)); |
||||||
|
GPR_ASSERT(GRPC_CALL_OK == error); |
||||||
|
CQ_EXPECT_COMPLETION(cqv, tag(101), true); |
||||||
|
cq_verify(cqv); |
||||||
|
|
||||||
|
// Server fails with status ABORTED.
|
||||||
|
memset(ops, 0, sizeof(ops)); |
||||||
|
op = ops; |
||||||
|
op->op = GRPC_OP_SEND_INITIAL_METADATA; |
||||||
|
op->data.send_initial_metadata.count = 0; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_SEND_STATUS_FROM_SERVER; |
||||||
|
op->data.send_status_from_server.trailing_metadata_count = 0; |
||||||
|
op->data.send_status_from_server.status = GRPC_STATUS_ABORTED; |
||||||
|
op->data.send_status_from_server.status_details = &status_details; |
||||||
|
op++; |
||||||
|
op->op = GRPC_OP_RECV_CLOSE_ON_SERVER; |
||||||
|
op->data.recv_close_on_server.cancelled = &was_cancelled; |
||||||
|
op++; |
||||||
|
error = grpc_call_start_batch(s, ops, static_cast<size_t>(op - ops), tag(102), |
||||||
|
nullptr); |
||||||
|
GPR_ASSERT(GRPC_CALL_OK == error); |
||||||
|
|
||||||
|
// In principle, the server batch should complete before the client
|
||||||
|
// recv ops batch, but in the proxy fixtures, there are multiple threads
|
||||||
|
// involved, so the completion order tends to be a little racy.
|
||||||
|
CQ_EXPECT_COMPLETION(cqv, tag(102), true); |
||||||
|
CQ_EXPECT_COMPLETION(cqv, tag(2), true); |
||||||
|
cq_verify(cqv); |
||||||
|
|
||||||
|
GPR_ASSERT(status == GRPC_STATUS_ABORTED); |
||||||
|
GPR_ASSERT(0 == grpc_slice_str_cmp(details, "xyz")); |
||||||
|
GPR_ASSERT(0 == grpc_slice_str_cmp(call_details.method, "/service/method")); |
||||||
|
GPR_ASSERT(0 == call_details.flags); |
||||||
|
GPR_ASSERT(was_cancelled == 0); |
||||||
|
|
||||||
|
// Make sure the "grpc-previous-rpc-attempts" header was sent in the retry.
|
||||||
|
bool found_retry_header = false; |
||||||
|
for (size_t i = 0; i < request_metadata_recv.count; ++i) { |
||||||
|
if (grpc_slice_eq(request_metadata_recv.metadata[i].key, |
||||||
|
GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS)) { |
||||||
|
GPR_ASSERT( |
||||||
|
grpc_slice_eq(request_metadata_recv.metadata[i].value, GRPC_MDSTR_1)); |
||||||
|
found_retry_header = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
GPR_ASSERT(found_retry_header); |
||||||
|
|
||||||
|
grpc_slice_unref(details); |
||||||
|
grpc_metadata_array_destroy(&initial_metadata_recv); |
||||||
|
grpc_metadata_array_destroy(&trailing_metadata_recv); |
||||||
|
grpc_metadata_array_destroy(&request_metadata_recv); |
||||||
|
grpc_call_details_destroy(&call_details); |
||||||
|
grpc_byte_buffer_destroy(request_payload); |
||||||
|
grpc_byte_buffer_destroy(response_payload); |
||||||
|
grpc_byte_buffer_destroy(request_payload_recv); |
||||||
|
grpc_byte_buffer_destroy(response_payload_recv); |
||||||
|
|
||||||
|
grpc_call_unref(c); |
||||||
|
grpc_call_unref(s); |
||||||
|
|
||||||
|
cq_verifier_destroy(cqv); |
||||||
|
|
||||||
|
end_test(&f); |
||||||
|
config.tear_down_data(&f); |
||||||
|
} |
||||||
|
|
||||||
|
namespace { |
||||||
|
|
||||||
|
// A filter that, for the first call it sees, will fail the batch
|
||||||
|
// containing send_initial_metadata and then fail the call with status
|
||||||
|
// ABORTED. All subsequent calls are allowed through without failures.
|
||||||
|
class FailFirstSendOpFilter { |
||||||
|
public: |
||||||
|
static grpc_channel_filter kFilterVtable; |
||||||
|
|
||||||
|
public: |
||||||
|
class CallData { |
||||||
|
public: |
||||||
|
static grpc_error_handle Init(grpc_call_element* elem, |
||||||
|
const grpc_call_element_args* args) { |
||||||
|
new (elem->call_data) CallData(args); |
||||||
|
return GRPC_ERROR_NONE; |
||||||
|
} |
||||||
|
|
||||||
|
static void Destroy(grpc_call_element* elem, |
||||||
|
const grpc_call_final_info* /*final_info*/, |
||||||
|
grpc_closure* /*ignored*/) { |
||||||
|
auto* calld = static_cast<CallData*>(elem->call_data); |
||||||
|
calld->~CallData(); |
||||||
|
} |
||||||
|
|
||||||
|
static void StartTransportStreamOpBatch( |
||||||
|
grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { |
||||||
|
auto* chand = static_cast<FailFirstSendOpFilter*>(elem->channel_data); |
||||||
|
auto* calld = static_cast<CallData*>(elem->call_data); |
||||||
|
if (!chand->seen_first_) { |
||||||
|
chand->seen_first_ = true; |
||||||
|
calld->fail_ = true; |
||||||
|
} |
||||||
|
if (calld->fail_ && !batch->cancel_stream) { |
||||||
|
grpc_transport_stream_op_batch_finish_with_failure( |
||||||
|
batch, |
||||||
|
grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( |
||||||
|
"FailFirstSendOpFilter failing batch"), |
||||||
|
GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_ABORTED), |
||||||
|
calld->call_combiner_); |
||||||
|
return; |
||||||
|
} |
||||||
|
grpc_call_next_op(elem, batch); |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
explicit CallData(const grpc_call_element_args* args) |
||||||
|
: call_combiner_(args->call_combiner) {} |
||||||
|
|
||||||
|
grpc_core::CallCombiner* call_combiner_; |
||||||
|
bool fail_ = false; |
||||||
|
}; |
||||||
|
|
||||||
|
static grpc_error_handle Init(grpc_channel_element* elem, |
||||||
|
grpc_channel_element_args* /*args*/) { |
||||||
|
new (elem->channel_data) FailFirstSendOpFilter(); |
||||||
|
return GRPC_ERROR_NONE; |
||||||
|
} |
||||||
|
|
||||||
|
static void Destroy(grpc_channel_element* elem) { |
||||||
|
auto* chand = static_cast<FailFirstSendOpFilter*>(elem->channel_data); |
||||||
|
chand->~FailFirstSendOpFilter(); |
||||||
|
} |
||||||
|
|
||||||
|
bool seen_first_ = false; |
||||||
|
}; |
||||||
|
|
||||||
|
grpc_channel_filter FailFirstSendOpFilter::kFilterVtable = { |
||||||
|
CallData::StartTransportStreamOpBatch, |
||||||
|
grpc_channel_next_op, |
||||||
|
sizeof(CallData), |
||||||
|
CallData::Init, |
||||||
|
grpc_call_stack_ignore_set_pollset_or_pollset_set, |
||||||
|
CallData::Destroy, |
||||||
|
sizeof(FailFirstSendOpFilter), |
||||||
|
Init, |
||||||
|
Destroy, |
||||||
|
grpc_channel_next_get_info, |
||||||
|
"FailFirstSendOpFilter", |
||||||
|
}; |
||||||
|
|
||||||
|
bool g_enable_filter = false; |
||||||
|
|
||||||
|
bool MaybeAddFilter(grpc_channel_stack_builder* builder, void* /*arg*/) { |
||||||
|
// Skip if filter is not enabled.
|
||||||
|
if (!g_enable_filter) return true; |
||||||
|
// Skip on proxy (which explicitly disables retries).
|
||||||
|
const grpc_channel_args* args = |
||||||
|
grpc_channel_stack_builder_get_channel_arguments(builder); |
||||||
|
if (!grpc_channel_args_find_bool(args, GRPC_ARG_ENABLE_RETRIES, true)) { |
||||||
|
return true; |
||||||
|
} |
||||||
|
// Install filter.
|
||||||
|
return grpc_channel_stack_builder_prepend_filter( |
||||||
|
builder, &FailFirstSendOpFilter::kFilterVtable, nullptr, nullptr); |
||||||
|
} |
||||||
|
|
||||||
|
void InitPlugin(void) { |
||||||
|
grpc_channel_init_register_stage(GRPC_CLIENT_SUBCHANNEL, 0, MaybeAddFilter, |
||||||
|
nullptr); |
||||||
|
} |
||||||
|
|
||||||
|
void DestroyPlugin(void) {} |
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void retry_send_op_fails(grpc_end2end_test_config config) { |
||||||
|
GPR_ASSERT(config.feature_mask & FEATURE_MASK_SUPPORTS_CLIENT_CHANNEL); |
||||||
|
g_enable_filter = true; |
||||||
|
test_retry_send_op_fails(config); |
||||||
|
g_enable_filter = false; |
||||||
|
} |
||||||
|
|
||||||
|
void retry_send_op_fails_pre_init(void) { |
||||||
|
grpc_register_plugin(InitPlugin, DestroyPlugin); |
||||||
|
} |
Loading…
Reference in new issue