From ac88ca1907669fc542598504ebfdb818ebbf96bc Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Wed, 16 Nov 2016 14:21:29 -0800 Subject: [PATCH] Fix logic race in chttp2 write path IF: - we schedule a write in chttp2 in response to some stream op (which will cause a write that's covered by a poller to be scheduled on the combiner lock) - AND then, under that same combiner lock, we process a RST_STREAM - then we'll remove the op that's being processed, consequently removing the polling coverage - and then, IF that is the last poll on said transport, the transport will never write, which CAN cause servers to fail to shutdown --- .../chttp2/transport/chttp2_transport.c | 59 ++++++++++++++++--- .../ext/transport/chttp2/transport/internal.h | 10 ++++ .../ext/transport/chttp2/transport/writing.c | 2 + src/core/lib/iomgr/combiner.c | 11 +++- 4 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/core/ext/transport/chttp2/transport/chttp2_transport.c b/src/core/ext/transport/chttp2/transport/chttp2_transport.c index 127e1cdc132..570f2893021 100644 --- a/src/core/ext/transport/chttp2/transport/chttp2_transport.c +++ b/src/core/ext/transport/chttp2/transport/chttp2_transport.c @@ -111,9 +111,6 @@ static void incoming_byte_stream_update_flow_control(grpc_exec_ctx *exec_ctx, static void incoming_byte_stream_destroy_locked(grpc_exec_ctx *exec_ctx, void *byte_stream, grpc_error *error_ignored); -static void fail_pending_writes(grpc_exec_ctx *exec_ctx, - grpc_chttp2_transport *t, grpc_chttp2_stream *s, - grpc_error *error); static void benign_reclaimer(grpc_exec_ctx *exec_ctx, void *t, grpc_error *error); @@ -428,6 +425,7 @@ static void close_transport_locked(grpc_exec_ctx *exec_ctx, /* flush writable stream list to avoid dangling references */ grpc_chttp2_stream *s; while (grpc_chttp2_list_pop_writable_stream(t, &s)) { + grpc_chttp2_leave_writing_lists(exec_ctx, t, s); GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:close"); } end_all_the_calls(exec_ctx, t, GRPC_ERROR_REF(error)); @@ -523,6 +521,10 @@ static void destroy_stream_locked(grpc_exec_ctx *exec_ctx, void *sp, } } + if (s->fail_pending_writes_on_writes_finished_error != NULL) { + GRPC_ERROR_UNREF(s->fail_pending_writes_on_writes_finished_error); + } + GPR_ASSERT(s->send_initial_metadata_finished == NULL); GPR_ASSERT(s->fetching_send_message == NULL); GPR_ASSERT(s->send_trailing_metadata_finished == NULL); @@ -704,8 +706,6 @@ static void write_action_end_locked(grpc_exec_ctx *exec_ctx, void *tp, } } - grpc_chttp2_end_write(exec_ctx, t, GRPC_ERROR_REF(error)); - switch (t->write_state) { case GRPC_CHTTP2_WRITE_STATE_IDLE: GPR_UNREACHABLE_CODE(break); @@ -734,6 +734,8 @@ static void write_action_end_locked(grpc_exec_ctx *exec_ctx, void *tp, break; } + grpc_chttp2_end_write(exec_ctx, t, GRPC_ERROR_REF(error)); + GRPC_CHTTP2_UNREF_TRANSPORT(exec_ctx, t, "writing"); GPR_TIMER_END("terminate_writing_with_lock", 0); } @@ -1388,6 +1390,7 @@ static void remove_stream(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t, } } if (grpc_chttp2_list_remove_writable_stream(t, s)) { + grpc_chttp2_leave_writing_lists(exec_ctx, t, s); GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:remove_stream"); } @@ -1518,9 +1521,41 @@ static grpc_error *removal_error(grpc_error *extra_error, grpc_chttp2_stream *s, return error; } -static void fail_pending_writes(grpc_exec_ctx *exec_ctx, - grpc_chttp2_transport *t, grpc_chttp2_stream *s, - grpc_error *error) { +void grpc_chttp2_leave_writing_lists(grpc_exec_ctx *exec_ctx, + grpc_chttp2_transport *t, + grpc_chttp2_stream *s) { + if (s->need_fail_pending_writes_on_writes_finished) { + grpc_error *error = s->fail_pending_writes_on_writes_finished_error; + s->fail_pending_writes_on_writes_finished_error = NULL; + s->need_fail_pending_writes_on_writes_finished = false; + grpc_chttp2_fail_pending_writes(exec_ctx, t, s, error); + } +} + +void grpc_chttp2_fail_pending_writes(grpc_exec_ctx *exec_ctx, + grpc_chttp2_transport *t, + grpc_chttp2_stream *s, grpc_error *error) { + if (s->need_fail_pending_writes_on_writes_finished || + (t->write_state != GRPC_CHTTP2_WRITE_STATE_IDLE && + (s->included[GRPC_CHTTP2_LIST_WRITABLE] || + s->included[GRPC_CHTTP2_LIST_WRITING]))) { + /* If a write is in progress, and it involves this stream, wait for the + * write to complete before cancelling things out. If we don't do this, then + * our combiner lock might think that some operation on its queue might be + * covering a completion even though there is none, in which case we might + * offload to another thread, which isn't guarateed to exist */ + if (error != GRPC_ERROR_NONE) { + if (s->fail_pending_writes_on_writes_finished_error == GRPC_ERROR_NONE) { + s->fail_pending_writes_on_writes_finished_error = GRPC_ERROR_CREATE( + "Post-poned fail writes due to in-progress write"); + } + s->fail_pending_writes_on_writes_finished_error = grpc_error_add_child( + s->fail_pending_writes_on_writes_finished_error, error); + } + s->need_fail_pending_writes_on_writes_finished = true; + return; /* early out */ + } + error = removal_error(error, s, "Pending writes failed due to stream closure"); s->send_initial_metadata = NULL; @@ -1529,6 +1564,12 @@ static void fail_pending_writes(grpc_exec_ctx *exec_ctx, "send_initial_metadata_finished"); s->send_trailing_metadata = NULL; + if (s->send_trailing_metadata_finished) { + const char *why = grpc_error_string(error); + gpr_log(GPR_DEBUG, "cancel send_trailing_metadata: writing=%d %s", + t->write_state, why); + grpc_error_free_string(why); + } grpc_chttp2_complete_closure_step( exec_ctx, t, s, &s->send_trailing_metadata_finished, GRPC_ERROR_REF(error), "send_trailing_metadata_finished"); @@ -1574,7 +1615,7 @@ void grpc_chttp2_mark_stream_closed(grpc_exec_ctx *exec_ctx, if (close_writes && !s->write_closed) { s->write_closed_error = GRPC_ERROR_REF(error); s->write_closed = true; - fail_pending_writes(exec_ctx, t, s, GRPC_ERROR_REF(error)); + grpc_chttp2_fail_pending_writes(exec_ctx, t, s, GRPC_ERROR_REF(error)); grpc_chttp2_maybe_complete_recv_trailing_metadata(exec_ctx, t, s); } if (s->read_closed && s->write_closed) { diff --git a/src/core/ext/transport/chttp2/transport/internal.h b/src/core/ext/transport/chttp2/transport/internal.h index b74233d9923..6cba1e7fd20 100644 --- a/src/core/ext/transport/chttp2/transport/internal.h +++ b/src/core/ext/transport/chttp2/transport/internal.h @@ -409,6 +409,9 @@ struct grpc_chttp2_stream { grpc_error *read_closed_error; /** the error that resulted in this stream being write-closed */ grpc_error *write_closed_error; + /** should any writes be cleared once this stream becomes non-writable */ + bool need_fail_pending_writes_on_writes_finished; + grpc_error *fail_pending_writes_on_writes_finished_error; grpc_published_metadata_method published_metadata[2]; bool final_metadata_requested; @@ -689,4 +692,11 @@ void grpc_chttp2_maybe_complete_recv_trailing_metadata(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t, grpc_chttp2_stream *s); +void grpc_chttp2_leave_writing_lists(grpc_exec_ctx *exec_ctx, + grpc_chttp2_transport *t, + grpc_chttp2_stream *s); +void grpc_chttp2_fail_pending_writes(grpc_exec_ctx *exec_ctx, + grpc_chttp2_transport *t, + grpc_chttp2_stream *s, grpc_error *error); + #endif /* GRPC_CORE_EXT_TRANSPORT_CHTTP2_TRANSPORT_INTERNAL_H */ diff --git a/src/core/ext/transport/chttp2/transport/writing.c b/src/core/ext/transport/chttp2/transport/writing.c index 139e7387c4d..769b229a0da 100644 --- a/src/core/ext/transport/chttp2/transport/writing.c +++ b/src/core/ext/transport/chttp2/transport/writing.c @@ -208,6 +208,7 @@ bool grpc_chttp2_begin_write(grpc_exec_ctx *exec_ctx, GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:already_writing"); } } else { + grpc_chttp2_leave_writing_lists(exec_ctx, t, s); GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:no_write"); } } @@ -252,6 +253,7 @@ void grpc_chttp2_end_write(grpc_exec_ctx *exec_ctx, grpc_chttp2_transport *t, grpc_chttp2_mark_stream_closed(exec_ctx, t, s, !t->is_client, 1, GRPC_ERROR_REF(error)); } + grpc_chttp2_leave_writing_lists(exec_ctx, t, s); GRPC_CHTTP2_STREAM_UNREF(exec_ctx, s, "chttp2_writing:end"); } grpc_slice_buffer_reset_and_unref(&t->outbuf); diff --git a/src/core/lib/iomgr/combiner.c b/src/core/lib/iomgr/combiner.c index 60ee14eb232..cfc67020ae7 100644 --- a/src/core/lib/iomgr/combiner.c +++ b/src/core/lib/iomgr/combiner.c @@ -90,6 +90,12 @@ static bool is_covered_by_poller(grpc_combiner *lock) { gpr_atm_acq_load(&lock->elements_covered_by_poller) > 0; } +#define IS_COVERED_BY_POLLER_FMT "(final=%d elems=%" PRIdPTR ")->%d" +#define IS_COVERED_BY_POLLER_ARGS(lock) \ + (lock)->final_list_covered_by_poller, \ + gpr_atm_acq_load(&(lock)->elements_covered_by_poller), \ + is_covered_by_poller((lock)) + grpc_combiner *grpc_combiner_create(grpc_workqueue *optional_workqueue) { grpc_combiner *lock = gpr_malloc(sizeof(*lock)); lock->next_combiner_on_this_exec_ctx = NULL; @@ -197,9 +203,10 @@ bool grpc_combiner_continue_exec_ctx(grpc_exec_ctx *exec_ctx) { GRPC_COMBINER_TRACE( gpr_log(GPR_DEBUG, "C:%p grpc_combiner_continue_exec_ctx workqueue=%p " - "is_covered_by_poller=%d exec_ctx_ready_to_finish=%d " + "is_covered_by_poller=" IS_COVERED_BY_POLLER_FMT + " exec_ctx_ready_to_finish=%d " "time_to_execute_final_list=%d", - lock, lock->optional_workqueue, is_covered_by_poller(lock), + lock, lock->optional_workqueue, IS_COVERED_BY_POLLER_ARGS(lock), grpc_exec_ctx_ready_to_finish(exec_ctx), lock->time_to_execute_final_list));