From a9bd94335cd20ab918daf74c4e8881d56670633e Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 30 Nov 2016 09:38:55 -0800 Subject: [PATCH 1/3] Fix unref-while-lock-held bug. Only shut down handshaker if in progress. --- src/core/lib/channel/handshaker.c | 35 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/core/lib/channel/handshaker.c b/src/core/lib/channel/handshaker.c index f3bd91284ee..927723499a5 100644 --- a/src/core/lib/channel/handshaker.c +++ b/src/core/lib/channel/handshaker.c @@ -141,7 +141,8 @@ void grpc_handshake_manager_destroy(grpc_exec_ctx* exec_ctx, void grpc_handshake_manager_shutdown(grpc_exec_ctx* exec_ctx, grpc_handshake_manager* mgr) { gpr_mu_lock(&mgr->mu); - if (mgr->index > 0) { + // Shutdown the handshaker that's currently in progress, if any. + if (mgr->index > 0 && mgr->index <= mgr->count) { grpc_handshaker_shutdown(exec_ctx, mgr->handshakers[mgr->index - 1]); } gpr_mu_unlock(&mgr->mu); @@ -149,28 +150,27 @@ void grpc_handshake_manager_shutdown(grpc_exec_ctx* exec_ctx, // Helper function to call either the next handshaker or the // on_handshake_done callback. -static void call_next_handshaker_locked(grpc_exec_ctx* exec_ctx, +// Returns true if we've scheduled the on_handshake_done callback. +static bool call_next_handshaker_locked(grpc_exec_ctx* exec_ctx, grpc_handshake_manager* mgr, grpc_error* error) { GPR_ASSERT(mgr->index <= mgr->count); // If we got an error or we've finished the last handshaker, invoke // the on_handshake_done callback. Otherwise, call the next handshaker. + bool done = false; if (error != GRPC_ERROR_NONE || mgr->index == mgr->count) { // Cancel deadline timer, since we're invoking the on_handshake_done // callback now. grpc_timer_cancel(exec_ctx, &mgr->deadline_timer); grpc_exec_ctx_sched(exec_ctx, &mgr->on_handshake_done, error, NULL); - // Since we're invoking the final callback, we won't be coming back - // to this function, so we can release our reference to the - // handshake manager. - grpc_handshake_manager_unref(exec_ctx, mgr); - return; + done = true; + } else { + grpc_handshaker_do_handshake(exec_ctx, mgr->handshakers[mgr->index], + mgr->acceptor, &mgr->call_next_handshaker, + &mgr->args); } - // Call the next handshaker. - grpc_handshaker_do_handshake(exec_ctx, mgr->handshakers[mgr->index], - mgr->acceptor, &mgr->call_next_handshaker, - &mgr->args); ++mgr->index; + return done; } // A function used as the handshaker-done callback when chaining @@ -179,8 +179,14 @@ static void call_next_handshaker(grpc_exec_ctx* exec_ctx, void* arg, grpc_error* error) { grpc_handshake_manager* mgr = arg; gpr_mu_lock(&mgr->mu); - call_next_handshaker_locked(exec_ctx, mgr, GRPC_ERROR_REF(error)); + bool done = call_next_handshaker_locked(exec_ctx, mgr, GRPC_ERROR_REF(error)); gpr_mu_unlock(&mgr->mu); + // If we're invoked the final callback, we won't be coming back + // to this function, so we can release our reference to the + // handshake manager. + if (done) { + grpc_handshake_manager_unref(exec_ctx, mgr); + } } // Callback invoked when deadline is exceeded. @@ -217,6 +223,9 @@ void grpc_handshake_manager_do_handshake( on_timeout, mgr, gpr_now(GPR_CLOCK_MONOTONIC)); // Start first handshaker, which also owns a ref. gpr_ref(&mgr->refs); - call_next_handshaker_locked(exec_ctx, mgr, GRPC_ERROR_NONE); + bool done = call_next_handshaker_locked(exec_ctx, mgr, GRPC_ERROR_NONE); gpr_mu_unlock(&mgr->mu); + if (done) { + grpc_handshake_manager_unref(exec_ctx, mgr); + } } From c584d995d87038368959ed88f7e4acd875de09ce Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 30 Nov 2016 09:45:32 -0800 Subject: [PATCH 2/3] Eliminate some code duplication. --- .../client_channel/http_connect_handshaker.c | 76 +++++++++---------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/src/core/ext/client_channel/http_connect_handshaker.c b/src/core/ext/client_channel/http_connect_handshaker.c index 61fec5cba9a..6a34e390bce 100644 --- a/src/core/ext/client_channel/http_connect_handshaker.c +++ b/src/core/ext/client_channel/http_connect_handshaker.c @@ -106,6 +106,31 @@ static void cleanup_args_for_failure_locked( handshaker->args->args = NULL; } +// If the handshake failed or we're shutting down, clean up and invoke the +// callback with the error. +static void handshake_failed_locked(grpc_exec_ctx* exec_ctx, + http_connect_handshaker* handshaker, + grpc_error* error) { + if (error == GRPC_ERROR_NONE) { + // If we were shut down after an endpoint operation succeeded but + // before the endpoint callback was invoked, we need to generate our + // own error. + error = GRPC_ERROR_CREATE("Handshaker shutdown"); + } + if (!handshaker->shutdown) { + // TODO(ctiller): It is currently necessary to shutdown endpoints + // before destroying them, even if we know that there are no + // pending read/write callbacks. This should be fixed, at which + // point this can be removed. + grpc_endpoint_shutdown(exec_ctx, handshaker->args->endpoint); + // Not shutting down, so the handshake failed. Clean up before + // invoking the callback. + cleanup_args_for_failure_locked(handshaker); + } + // Invoke callback. + grpc_exec_ctx_sched(exec_ctx, handshaker->on_handshake_done, error, NULL); +} + // Callback invoked when finished writing HTTP CONNECT request. static void on_write_done(grpc_exec_ctx* exec_ctx, void* arg, grpc_error* error) { @@ -114,25 +139,7 @@ static void on_write_done(grpc_exec_ctx* exec_ctx, void* arg, if (error != GRPC_ERROR_NONE || handshaker->shutdown) { // If the write failed or we're shutting down, clean up and invoke the // callback with the error. - if (error == GRPC_ERROR_NONE) { - // If we were shut down after the write succeeded but before this - // callback was invoked, we need to generate our own error. - error = GRPC_ERROR_CREATE("Handshaker shutdown"); - } else { - GRPC_ERROR_REF(error); // Take ref for the handshake-done callback. - } - if (!handshaker->shutdown) { - // TODO(ctiller): It is currently necessary to shutdown endpoints - // before destroying them, even if we know that there are no - // pending read/write callbacks. This should be fixed, at which - // point this can be removed. - grpc_endpoint_shutdown(exec_ctx, handshaker->args->endpoint); - // Not shutting down, so the write failed. Clean up before - // invoking the callback. - cleanup_args_for_failure_locked(handshaker); - } - // Invoke callback. - grpc_exec_ctx_sched(exec_ctx, handshaker->on_handshake_done, error, NULL); + handshake_failed_locked(exec_ctx, handshaker, GRPC_ERROR_REF(error)); gpr_mu_unlock(&handshaker->mu); http_connect_handshaker_unref(exec_ctx, handshaker); } else { @@ -151,25 +158,9 @@ static void on_read_done(grpc_exec_ctx* exec_ctx, void* arg, http_connect_handshaker* handshaker = arg; gpr_mu_lock(&handshaker->mu); if (error != GRPC_ERROR_NONE || handshaker->shutdown) { - // If the write failed or we're shutting down, clean up and invoke the + // If the read failed or we're shutting down, clean up and invoke the // callback with the error. - if (error == GRPC_ERROR_NONE) { - // If we were shut down after the write succeeded but before this - // callback was invoked, we need to generate our own error. - error = GRPC_ERROR_CREATE("Handshaker shutdown"); - } else { - GRPC_ERROR_REF(error); // Take ref for the handshake-done callback. - } - if (!handshaker->shutdown) { - // TODO(ctiller): It is currently necessary to shutdown endpoints - // before destroying them, even if we know that there are no - // pending read/write callbacks. This should be fixed, at which - // point this can be removed. - grpc_endpoint_shutdown(exec_ctx, handshaker->args->endpoint); - // Not shutting down, so the write failed. Clean up before - // invoking the callback. - cleanup_args_for_failure_locked(handshaker); - } + handshake_failed_locked(exec_ctx, handshaker, GRPC_ERROR_REF(error)); goto done; } // Add buffer to parser. @@ -179,7 +170,10 @@ static void on_read_done(grpc_exec_ctx* exec_ctx, void* arg, error = grpc_http_parser_parse(&handshaker->http_parser, handshaker->args->read_buffer->slices[i], &body_start_offset); - if (error != GRPC_ERROR_NONE) goto done; + if (error != GRPC_ERROR_NONE) { + handshake_failed_locked(exec_ctx, handshaker, error); + goto done; + } if (handshaker->http_parser.state == GRPC_HTTP_BODY) { // Remove the data we've already read from the read buffer, // leaving only the leftover bytes (if any). @@ -228,10 +222,12 @@ static void on_read_done(grpc_exec_ctx* exec_ctx, void* arg, handshaker->http_response.status); error = GRPC_ERROR_CREATE(msg); gpr_free(msg); + handshake_failed_locked(exec_ctx, handshaker, error); + goto done; } -done: - // Invoke handshake-done callback. + // Success. Invoke handshake-done callback. grpc_exec_ctx_sched(exec_ctx, handshaker->on_handshake_done, error, NULL); +done: gpr_mu_unlock(&handshaker->mu); http_connect_handshaker_unref(exec_ctx, handshaker); } From b8f97a4ac02536bcb88867061b9dad6c35818f2a Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 30 Nov 2016 11:05:27 -0800 Subject: [PATCH 3/3] Allow handshaking to be retried. --- src/core/lib/channel/handshaker.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/core/lib/channel/handshaker.c b/src/core/lib/channel/handshaker.c index 927723499a5..992cd420de4 100644 --- a/src/core/lib/channel/handshaker.c +++ b/src/core/lib/channel/handshaker.c @@ -142,7 +142,7 @@ void grpc_handshake_manager_shutdown(grpc_exec_ctx* exec_ctx, grpc_handshake_manager* mgr) { gpr_mu_lock(&mgr->mu); // Shutdown the handshaker that's currently in progress, if any. - if (mgr->index > 0 && mgr->index <= mgr->count) { + if (mgr->index > 0) { grpc_handshaker_shutdown(exec_ctx, mgr->handshakers[mgr->index - 1]); } gpr_mu_unlock(&mgr->mu); @@ -157,20 +157,21 @@ static bool call_next_handshaker_locked(grpc_exec_ctx* exec_ctx, GPR_ASSERT(mgr->index <= mgr->count); // If we got an error or we've finished the last handshaker, invoke // the on_handshake_done callback. Otherwise, call the next handshaker. - bool done = false; if (error != GRPC_ERROR_NONE || mgr->index == mgr->count) { // Cancel deadline timer, since we're invoking the on_handshake_done // callback now. grpc_timer_cancel(exec_ctx, &mgr->deadline_timer); grpc_exec_ctx_sched(exec_ctx, &mgr->on_handshake_done, error, NULL); - done = true; - } else { - grpc_handshaker_do_handshake(exec_ctx, mgr->handshakers[mgr->index], - mgr->acceptor, &mgr->call_next_handshaker, - &mgr->args); + // Reset index to 0 so that we can start over if we re-attempt the + // connection. + mgr->index = 0; + return true; } + grpc_handshaker_do_handshake(exec_ctx, mgr->handshakers[mgr->index], + mgr->acceptor, &mgr->call_next_handshaker, + &mgr->args); ++mgr->index; - return done; + return false; } // A function used as the handshaker-done callback when chaining