mirror of https://github.com/grpc/grpc.git
fix retry code to handle failed send ops (#26418)
* optimize retry per-call-attempt memory usage * fix retry code to handle failed send ops * clang-format * fix memory leak * clang-format * fix proxy tests * fix race condition in testpull/26205/head
parent
2d8546a3c4
commit
1e2f19b286
10 changed files with 575 additions and 82 deletions
@ -0,0 +1,396 @@ |
||||
/*
|
||||
* |
||||
* Copyright 2017 gRPC authors. |
||||
* |
||||
* Licensed under the Apache License, Version 2.0 (the "License"); |
||||
* you may not use this file except in compliance with the License. |
||||
* You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
* |
||||
*/ |
||||
|
||||
#include "test/core/end2end/end2end_tests.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <string.h> |
||||
|
||||
#include <grpc/byte_buffer.h> |
||||
#include <grpc/grpc.h> |
||||
#include <grpc/support/alloc.h> |
||||
#include <grpc/support/log.h> |
||||
#include <grpc/support/string_util.h> |
||||
#include <grpc/support/time.h> |
||||
|
||||
#include "src/core/lib/channel/channel_args.h" |
||||
#include "src/core/lib/channel/channel_stack.h" |
||||
#include "src/core/lib/channel/channel_stack_builder.h" |
||||
#include "src/core/lib/gpr/string.h" |
||||
#include "src/core/lib/gpr/useful.h" |
||||
#include "src/core/lib/iomgr/exec_ctx.h" |
||||
#include "src/core/lib/surface/channel_init.h" |
||||
#include "src/core/lib/transport/static_metadata.h" |
||||
|
||||
#include "test/core/end2end/cq_verifier.h" |
||||
#include "test/core/end2end/tests/cancel_test_helpers.h" |
||||
|
||||
static void* tag(intptr_t t) { return reinterpret_cast<void*>(t); } |
||||
|
||||
static grpc_end2end_test_fixture begin_test(grpc_end2end_test_config config, |
||||
const char* test_name, |
||||
grpc_channel_args* client_args, |
||||
grpc_channel_args* server_args) { |
||||
grpc_end2end_test_fixture f; |
||||
gpr_log(GPR_INFO, "Running test: %s/%s", test_name, config.name); |
||||
f = config.create_fixture(client_args, server_args); |
||||
config.init_server(&f, server_args); |
||||
config.init_client(&f, client_args); |
||||
return f; |
||||
} |
||||
|
||||
static gpr_timespec n_seconds_from_now(int n) { |
||||
return grpc_timeout_seconds_to_deadline(n); |
||||
} |
||||
|
||||
static gpr_timespec five_seconds_from_now(void) { |
||||
return n_seconds_from_now(5); |
||||
} |
||||
|
||||
static void drain_cq(grpc_completion_queue* cq) { |
||||
grpc_event ev; |
||||
do { |
||||
ev = grpc_completion_queue_next(cq, five_seconds_from_now(), nullptr); |
||||
} while (ev.type != GRPC_QUEUE_SHUTDOWN); |
||||
} |
||||
|
||||
static void shutdown_server(grpc_end2end_test_fixture* f) { |
||||
if (!f->server) return; |
||||
grpc_server_shutdown_and_notify(f->server, f->shutdown_cq, tag(1000)); |
||||
GPR_ASSERT(grpc_completion_queue_pluck(f->shutdown_cq, tag(1000), |
||||
grpc_timeout_seconds_to_deadline(5), |
||||
nullptr) |
||||
.type == GRPC_OP_COMPLETE); |
||||
grpc_server_destroy(f->server); |
||||
f->server = nullptr; |
||||
} |
||||
|
||||
static void shutdown_client(grpc_end2end_test_fixture* f) { |
||||
if (!f->client) return; |
||||
grpc_channel_destroy(f->client); |
||||
f->client = nullptr; |
||||
} |
||||
|
||||
static void end_test(grpc_end2end_test_fixture* f) { |
||||
shutdown_server(f); |
||||
shutdown_client(f); |
||||
|
||||
grpc_completion_queue_shutdown(f->cq); |
||||
drain_cq(f->cq); |
||||
grpc_completion_queue_destroy(f->cq); |
||||
grpc_completion_queue_destroy(f->shutdown_cq); |
||||
} |
||||
|
||||
// Tests failure on a send op batch:
|
||||
// - 2 retries allowed for ABORTED status
|
||||
// - on the first call attempt, the batch containing the
|
||||
// send_initial_metadata op fails, and then the call returns ABORTED,
|
||||
// all without ever going out on the wire
|
||||
// - second attempt returns ABORTED but does not retry, because only 2
|
||||
// attempts are allowed
|
||||
static void test_retry_send_op_fails(grpc_end2end_test_config config) { |
||||
grpc_call* c; |
||||
grpc_call* s; |
||||
grpc_op ops[6]; |
||||
grpc_op* op; |
||||
grpc_metadata_array initial_metadata_recv; |
||||
grpc_metadata_array trailing_metadata_recv; |
||||
grpc_metadata_array request_metadata_recv; |
||||
grpc_call_details call_details; |
||||
grpc_slice request_payload_slice = grpc_slice_from_static_string("foo"); |
||||
grpc_slice response_payload_slice = grpc_slice_from_static_string("bar"); |
||||
grpc_byte_buffer* request_payload = |
||||
grpc_raw_byte_buffer_create(&request_payload_slice, 1); |
||||
grpc_byte_buffer* response_payload = |
||||
grpc_raw_byte_buffer_create(&response_payload_slice, 1); |
||||
grpc_byte_buffer* request_payload_recv = nullptr; |
||||
grpc_byte_buffer* response_payload_recv = nullptr; |
||||
grpc_status_code status; |
||||
grpc_call_error error; |
||||
grpc_slice details; |
||||
int was_cancelled = 2; |
||||
char* peer; |
||||
|
||||
grpc_arg args[] = { |
||||
grpc_channel_arg_integer_create( |
||||
const_cast<char*>(GRPC_ARG_ENABLE_RETRIES), 1), |
||||
grpc_channel_arg_string_create( |
||||
const_cast<char*>(GRPC_ARG_SERVICE_CONFIG), |
||||
const_cast<char*>( |
||||
"{\n" |
||||
" \"methodConfig\": [ {\n" |
||||
" \"name\": [\n" |
||||
" { \"service\": \"service\", \"method\": \"method\" }\n" |
||||
" ],\n" |
||||
" \"retryPolicy\": {\n" |
||||
" \"maxAttempts\": 2,\n" |
||||
" \"initialBackoff\": \"1s\",\n" |
||||
" \"maxBackoff\": \"120s\",\n" |
||||
" \"backoffMultiplier\": 1.6,\n" |
||||
" \"retryableStatusCodes\": [ \"ABORTED\" ]\n" |
||||
" }\n" |
||||
" } ]\n" |
||||
"}")), |
||||
}; |
||||
grpc_channel_args client_args = {GPR_ARRAY_SIZE(args), args}; |
||||
grpc_end2end_test_fixture f = |
||||
begin_test(config, "retry_send_op_fails", &client_args, nullptr); |
||||
|
||||
cq_verifier* cqv = cq_verifier_create(f.cq); |
||||
|
||||
gpr_timespec deadline = five_seconds_from_now(); |
||||
c = grpc_channel_create_call(f.client, nullptr, GRPC_PROPAGATE_DEFAULTS, f.cq, |
||||
grpc_slice_from_static_string("/service/method"), |
||||
nullptr, deadline, nullptr); |
||||
GPR_ASSERT(c); |
||||
|
||||
peer = grpc_call_get_peer(c); |
||||
GPR_ASSERT(peer != nullptr); |
||||
gpr_log(GPR_DEBUG, "client_peer_before_call=%s", peer); |
||||
gpr_free(peer); |
||||
|
||||
grpc_metadata_array_init(&initial_metadata_recv); |
||||
grpc_metadata_array_init(&trailing_metadata_recv); |
||||
grpc_metadata_array_init(&request_metadata_recv); |
||||
grpc_call_details_init(&call_details); |
||||
grpc_slice status_details = grpc_slice_from_static_string("xyz"); |
||||
|
||||
// Start a batch containing send ops.
|
||||
memset(ops, 0, sizeof(ops)); |
||||
op = ops; |
||||
op->op = GRPC_OP_SEND_INITIAL_METADATA; |
||||
op->data.send_initial_metadata.count = 0; |
||||
op++; |
||||
op->op = GRPC_OP_SEND_MESSAGE; |
||||
op->data.send_message.send_message = request_payload; |
||||
op++; |
||||
op->op = GRPC_OP_SEND_CLOSE_FROM_CLIENT; |
||||
op++; |
||||
error = grpc_call_start_batch(c, ops, static_cast<size_t>(op - ops), tag(1), |
||||
nullptr); |
||||
GPR_ASSERT(GRPC_CALL_OK == error); |
||||
|
||||
// Start a batch containing recv ops.
|
||||
memset(ops, 0, sizeof(ops)); |
||||
op = ops; |
||||
op->op = GRPC_OP_RECV_MESSAGE; |
||||
op->data.recv_message.recv_message = &response_payload_recv; |
||||
op++; |
||||
op->op = GRPC_OP_RECV_INITIAL_METADATA; |
||||
op->data.recv_initial_metadata.recv_initial_metadata = &initial_metadata_recv; |
||||
op++; |
||||
op->op = GRPC_OP_RECV_STATUS_ON_CLIENT; |
||||
op->data.recv_status_on_client.trailing_metadata = &trailing_metadata_recv; |
||||
op->data.recv_status_on_client.status = &status; |
||||
op->data.recv_status_on_client.status_details = &details; |
||||
op++; |
||||
error = grpc_call_start_batch(c, ops, static_cast<size_t>(op - ops), tag(2), |
||||
nullptr); |
||||
GPR_ASSERT(GRPC_CALL_OK == error); |
||||
|
||||
// Client send ops should now complete.
|
||||
CQ_EXPECT_COMPLETION(cqv, tag(1), true); |
||||
cq_verify(cqv); |
||||
|
||||
// Server should get a call.
|
||||
error = |
||||
grpc_server_request_call(f.server, &s, &call_details, |
||||
&request_metadata_recv, f.cq, f.cq, tag(101)); |
||||
GPR_ASSERT(GRPC_CALL_OK == error); |
||||
CQ_EXPECT_COMPLETION(cqv, tag(101), true); |
||||
cq_verify(cqv); |
||||
|
||||
// Server fails with status ABORTED.
|
||||
memset(ops, 0, sizeof(ops)); |
||||
op = ops; |
||||
op->op = GRPC_OP_SEND_INITIAL_METADATA; |
||||
op->data.send_initial_metadata.count = 0; |
||||
op++; |
||||
op->op = GRPC_OP_SEND_STATUS_FROM_SERVER; |
||||
op->data.send_status_from_server.trailing_metadata_count = 0; |
||||
op->data.send_status_from_server.status = GRPC_STATUS_ABORTED; |
||||
op->data.send_status_from_server.status_details = &status_details; |
||||
op++; |
||||
op->op = GRPC_OP_RECV_CLOSE_ON_SERVER; |
||||
op->data.recv_close_on_server.cancelled = &was_cancelled; |
||||
op++; |
||||
error = grpc_call_start_batch(s, ops, static_cast<size_t>(op - ops), tag(102), |
||||
nullptr); |
||||
GPR_ASSERT(GRPC_CALL_OK == error); |
||||
|
||||
// In principle, the server batch should complete before the client
|
||||
// recv ops batch, but in the proxy fixtures, there are multiple threads
|
||||
// involved, so the completion order tends to be a little racy.
|
||||
CQ_EXPECT_COMPLETION(cqv, tag(102), true); |
||||
CQ_EXPECT_COMPLETION(cqv, tag(2), true); |
||||
cq_verify(cqv); |
||||
|
||||
GPR_ASSERT(status == GRPC_STATUS_ABORTED); |
||||
GPR_ASSERT(0 == grpc_slice_str_cmp(details, "xyz")); |
||||
GPR_ASSERT(0 == grpc_slice_str_cmp(call_details.method, "/service/method")); |
||||
GPR_ASSERT(0 == call_details.flags); |
||||
GPR_ASSERT(was_cancelled == 0); |
||||
|
||||
// Make sure the "grpc-previous-rpc-attempts" header was sent in the retry.
|
||||
bool found_retry_header = false; |
||||
for (size_t i = 0; i < request_metadata_recv.count; ++i) { |
||||
if (grpc_slice_eq(request_metadata_recv.metadata[i].key, |
||||
GRPC_MDSTR_GRPC_PREVIOUS_RPC_ATTEMPTS)) { |
||||
GPR_ASSERT( |
||||
grpc_slice_eq(request_metadata_recv.metadata[i].value, GRPC_MDSTR_1)); |
||||
found_retry_header = true; |
||||
break; |
||||
} |
||||
} |
||||
GPR_ASSERT(found_retry_header); |
||||
|
||||
grpc_slice_unref(details); |
||||
grpc_metadata_array_destroy(&initial_metadata_recv); |
||||
grpc_metadata_array_destroy(&trailing_metadata_recv); |
||||
grpc_metadata_array_destroy(&request_metadata_recv); |
||||
grpc_call_details_destroy(&call_details); |
||||
grpc_byte_buffer_destroy(request_payload); |
||||
grpc_byte_buffer_destroy(response_payload); |
||||
grpc_byte_buffer_destroy(request_payload_recv); |
||||
grpc_byte_buffer_destroy(response_payload_recv); |
||||
|
||||
grpc_call_unref(c); |
||||
grpc_call_unref(s); |
||||
|
||||
cq_verifier_destroy(cqv); |
||||
|
||||
end_test(&f); |
||||
config.tear_down_data(&f); |
||||
} |
||||
|
||||
namespace { |
||||
|
||||
// A filter that, for the first call it sees, will fail the batch
|
||||
// containing send_initial_metadata and then fail the call with status
|
||||
// ABORTED. All subsequent calls are allowed through without failures.
|
||||
class FailFirstSendOpFilter { |
||||
public: |
||||
static grpc_channel_filter kFilterVtable; |
||||
|
||||
public: |
||||
class CallData { |
||||
public: |
||||
static grpc_error_handle Init(grpc_call_element* elem, |
||||
const grpc_call_element_args* args) { |
||||
new (elem->call_data) CallData(args); |
||||
return GRPC_ERROR_NONE; |
||||
} |
||||
|
||||
static void Destroy(grpc_call_element* elem, |
||||
const grpc_call_final_info* /*final_info*/, |
||||
grpc_closure* /*ignored*/) { |
||||
auto* calld = static_cast<CallData*>(elem->call_data); |
||||
calld->~CallData(); |
||||
} |
||||
|
||||
static void StartTransportStreamOpBatch( |
||||
grpc_call_element* elem, grpc_transport_stream_op_batch* batch) { |
||||
auto* chand = static_cast<FailFirstSendOpFilter*>(elem->channel_data); |
||||
auto* calld = static_cast<CallData*>(elem->call_data); |
||||
if (!chand->seen_first_) { |
||||
chand->seen_first_ = true; |
||||
calld->fail_ = true; |
||||
} |
||||
if (calld->fail_ && !batch->cancel_stream) { |
||||
grpc_transport_stream_op_batch_finish_with_failure( |
||||
batch, |
||||
grpc_error_set_int(GRPC_ERROR_CREATE_FROM_STATIC_STRING( |
||||
"FailFirstSendOpFilter failing batch"), |
||||
GRPC_ERROR_INT_GRPC_STATUS, GRPC_STATUS_ABORTED), |
||||
calld->call_combiner_); |
||||
return; |
||||
} |
||||
grpc_call_next_op(elem, batch); |
||||
} |
||||
|
||||
private: |
||||
explicit CallData(const grpc_call_element_args* args) |
||||
: call_combiner_(args->call_combiner) {} |
||||
|
||||
grpc_core::CallCombiner* call_combiner_; |
||||
bool fail_ = false; |
||||
}; |
||||
|
||||
static grpc_error_handle Init(grpc_channel_element* elem, |
||||
grpc_channel_element_args* /*args*/) { |
||||
new (elem->channel_data) FailFirstSendOpFilter(); |
||||
return GRPC_ERROR_NONE; |
||||
} |
||||
|
||||
static void Destroy(grpc_channel_element* elem) { |
||||
auto* chand = static_cast<FailFirstSendOpFilter*>(elem->channel_data); |
||||
chand->~FailFirstSendOpFilter(); |
||||
} |
||||
|
||||
bool seen_first_ = false; |
||||
}; |
||||
|
||||
grpc_channel_filter FailFirstSendOpFilter::kFilterVtable = { |
||||
CallData::StartTransportStreamOpBatch, |
||||
grpc_channel_next_op, |
||||
sizeof(CallData), |
||||
CallData::Init, |
||||
grpc_call_stack_ignore_set_pollset_or_pollset_set, |
||||
CallData::Destroy, |
||||
sizeof(FailFirstSendOpFilter), |
||||
Init, |
||||
Destroy, |
||||
grpc_channel_next_get_info, |
||||
"FailFirstSendOpFilter", |
||||
}; |
||||
|
||||
bool g_enable_filter = false; |
||||
|
||||
bool MaybeAddFilter(grpc_channel_stack_builder* builder, void* /*arg*/) { |
||||
// Skip if filter is not enabled.
|
||||
if (!g_enable_filter) return true; |
||||
// Skip on proxy (which explicitly disables retries).
|
||||
const grpc_channel_args* args = |
||||
grpc_channel_stack_builder_get_channel_arguments(builder); |
||||
if (!grpc_channel_args_find_bool(args, GRPC_ARG_ENABLE_RETRIES, true)) { |
||||
return true; |
||||
} |
||||
// Install filter.
|
||||
return grpc_channel_stack_builder_prepend_filter( |
||||
builder, &FailFirstSendOpFilter::kFilterVtable, nullptr, nullptr); |
||||
} |
||||
|
||||
void InitPlugin(void) { |
||||
grpc_channel_init_register_stage(GRPC_CLIENT_SUBCHANNEL, 0, MaybeAddFilter, |
||||
nullptr); |
||||
} |
||||
|
||||
void DestroyPlugin(void) {} |
||||
|
||||
} // namespace
|
||||
|
||||
void retry_send_op_fails(grpc_end2end_test_config config) { |
||||
GPR_ASSERT(config.feature_mask & FEATURE_MASK_SUPPORTS_CLIENT_CHANNEL); |
||||
g_enable_filter = true; |
||||
test_retry_send_op_fails(config); |
||||
g_enable_filter = false; |
||||
} |
||||
|
||||
void retry_send_op_fails_pre_init(void) { |
||||
grpc_register_plugin(InitPlugin, DestroyPlugin); |
||||
} |
Loading…
Reference in new issue