From 7f43eaf365334f2c394e5e5c2b6e0777c72beaaa Mon Sep 17 00:00:00 2001 From: David Klempner Date: Wed, 18 Feb 2015 17:00:31 -0800 Subject: [PATCH 1/3] Ensure there is no concurrent poller for unary->multipoll The race here is: Initially unary poller has one FD, FD_1 Thread 1 adds FD_2, promoting to multipoll, entailing epoll_ctl on FD_1 and FD_2. Concurrently, Thread 2 returns from unary poll and executes one of FD_1's callbacks, which ultimately leads to a close(FD_1) which races with the epoll_ctl by thread 1. The solution is to ensure that we don't concurrently poll and promote, which requires a bit of extra care. --- src/core/iomgr/pollset_posix.c | 65 +++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/src/core/iomgr/pollset_posix.c b/src/core/iomgr/pollset_posix.c index 05b78adeb61..00a22e7ae37 100644 --- a/src/core/iomgr/pollset_posix.c +++ b/src/core/iomgr/pollset_posix.c @@ -201,11 +201,43 @@ static void become_empty_pollset(grpc_pollset *pollset) { * via poll() */ -static void unary_poll_pollset_add_fd(grpc_pollset *pollset, grpc_fd *fd) { + +typedef struct grpc_unary_promote_args { + const grpc_pollset_vtable *original_vtable; + grpc_pollset *pollset; + grpc_fd *fd; +} grpc_unary_promote_args; + +static void unary_poll_do_promote(void *args, int success) { + grpc_unary_promote_args *up_args = args; + const grpc_pollset_vtable *original_vtable = up_args->original_vtable; + grpc_pollset *pollset = up_args->pollset; + grpc_fd *fd = up_args->fd; grpc_fd *fds[2]; + gpr_free(up_args); + + gpr_mu_lock(&pollset->mu); + /* First we need to ensure that nobody is polling concurrently */ + while (pollset->counter != 0 && pollset->vtable == original_vtable) { + grpc_pollset_kick(pollset); + gpr_cv_wait(&pollset->cv, &pollset->mu, gpr_inf_future); + } + /* At this point the pollset may no longer be a unary poller. In that case + * we should just call the right add function and be done. */ + /* TODO(klempner): If we're not careful this could cause infinite recursion. + * That's not a problem for now because empty_pollset has a trivial poller + * and we don't have any mechanism to unbecome multipoller. */ + if (pollset->vtable != original_vtable) { + pollset->vtable->add_fd(pollset, fd); + gpr_cv_broadcast(&pollset->cv); + gpr_mu_unlock(&pollset->mu); + return; + } + if (fd == pollset->data.ptr) return; fds[0] = pollset->data.ptr; fds[1] = fd; + if (!grpc_fd_is_orphaned(fds[0])) { grpc_platform_become_multipoller(pollset, fds, GPR_ARRAY_SIZE(fds)); grpc_fd_unref(fds[0]); @@ -216,6 +248,37 @@ static void unary_poll_pollset_add_fd(grpc_pollset *pollset, grpc_fd *fd) { pollset->data.ptr = fd; grpc_fd_ref(fd); } + + gpr_cv_broadcast(&pollset->cv); + gpr_mu_unlock(&pollset->mu); + + /* Matching ref in unary_poll_pollset_add_fd */ + grpc_fd_unref(fd); +} + +static void unary_poll_pollset_add_fd(grpc_pollset *pollset, grpc_fd *fd) { + grpc_unary_promote_args *up_args; + if (fd == pollset->data.ptr) return; + + if (grpc_fd_is_orphaned(pollset->data.ptr)) { + /* old fd is orphaned and we haven't cleaned it up until now, so remain a + * unary poller */ + grpc_fd_unref(pollset->data.ptr); + pollset->data.ptr = fd; + grpc_fd_ref(fd); + return; + } + + /* Now we need to promote. This needs to happen when we're not polling. Since + * this may be called from poll, the wait needs to happen asynchronously. */ + grpc_fd_ref(fd); + up_args = gpr_malloc(sizeof(*up_args)); + up_args->pollset = pollset; + up_args->fd = fd; + up_args->original_vtable = pollset->vtable; + grpc_iomgr_add_callback(unary_poll_do_promote, up_args); + + grpc_pollset_kick(pollset); } static void unary_poll_pollset_del_fd(grpc_pollset *pollset, grpc_fd *fd) { From b505661b1ae833154deebb689c2c1ba415bc1c11 Mon Sep 17 00:00:00 2001 From: David Klempner Date: Tue, 24 Feb 2015 14:22:50 -0800 Subject: [PATCH 2/3] Add a shutdown API to pollset This allows us to safely asynchronously add FDs in the possibly-promoting unary add case. Also fix the unary add async path to properly handle more of the extra cases it needs to handle. --- src/core/iomgr/pollset.h | 5 ++ src/core/iomgr/pollset_posix.c | 122 ++++++++++++++++++++++------ src/core/iomgr/pollset_posix.h | 4 + src/core/iomgr/pollset_windows.c | 6 ++ src/core/surface/completion_queue.c | 9 +- 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/src/core/iomgr/pollset.h b/src/core/iomgr/pollset.h index 9d04b014ba7..c26947f37cb 100644 --- a/src/core/iomgr/pollset.h +++ b/src/core/iomgr/pollset.h @@ -52,9 +52,14 @@ #include "src/core/iomgr/pollset_windows.h" #endif + void grpc_pollset_init(grpc_pollset *pollset); +void grpc_pollset_shutdown(grpc_pollset *pollset, + void (*shutdown_done)(void *arg), + void *shutdown_done_arg); void grpc_pollset_destroy(grpc_pollset *pollset); + /* Do some work on a pollset. May involve invoking asynchronous callbacks, or actually polling file descriptors. diff --git a/src/core/iomgr/pollset_posix.c b/src/core/iomgr/pollset_posix.c index 39945ff3cb7..f0a8453fd77 100644 --- a/src/core/iomgr/pollset_posix.c +++ b/src/core/iomgr/pollset_posix.c @@ -55,6 +55,7 @@ static grpc_pollset g_backup_pollset; static int g_shutdown_backup_poller; static gpr_event g_backup_poller_done; +static gpr_event g_backup_pollset_shutdown_done; static void backup_poller(void *p) { gpr_timespec delta = gpr_time_from_millis(100); @@ -104,9 +105,14 @@ void grpc_pollset_global_init(void) { /* start the backup poller thread */ g_shutdown_backup_poller = 0; gpr_event_init(&g_backup_poller_done); + gpr_event_init(&g_backup_pollset_shutdown_done); gpr_thd_new(&id, backup_poller, NULL, NULL); } +static void on_backup_pollset_shutdown_done(void *arg) { + gpr_event_set(&g_backup_pollset_shutdown_done, (void *)1); +} + void grpc_pollset_global_shutdown(void) { /* terminate the backup poller thread */ gpr_mu_lock(&g_backup_pollset.mu); @@ -114,6 +120,10 @@ void grpc_pollset_global_shutdown(void) { gpr_mu_unlock(&g_backup_pollset.mu); gpr_event_wait(&g_backup_poller_done, gpr_inf_future); + grpc_pollset_shutdown(&g_backup_pollset, on_backup_pollset_shutdown_done, + NULL); + gpr_event_wait(&g_backup_pollset_shutdown_done, gpr_inf_future); + /* destroy the backup pollset */ grpc_pollset_destroy(&g_backup_pollset); @@ -130,6 +140,8 @@ void grpc_pollset_init(grpc_pollset *pollset) { gpr_mu_init(&pollset->mu); gpr_cv_init(&pollset->cv); grpc_pollset_kick_init(&pollset->kick_state); + pollset->in_flight_cbs = 0; + pollset->shutting_down = 0; become_empty_pollset(pollset); } @@ -163,7 +175,24 @@ int grpc_pollset_work(grpc_pollset *pollset, gpr_timespec deadline) { return pollset->vtable->maybe_work(pollset, deadline, now, 1); } +void grpc_pollset_shutdown(grpc_pollset *pollset, + void (*shutdown_done)(void *arg), + void *shutdown_done_arg) { + int in_flight_cbs; + gpr_mu_lock(&pollset->mu); + pollset->shutting_down = 1; + in_flight_cbs = pollset->in_flight_cbs; + pollset->shutdown_done_cb = shutdown_done; + pollset->shutdown_done_arg = shutdown_done_arg; + gpr_mu_unlock(&pollset->mu); + if (in_flight_cbs == 0) { + shutdown_done(shutdown_done_arg); + } +} + void grpc_pollset_destroy(grpc_pollset *pollset) { + GPR_ASSERT(pollset->shutting_down); + GPR_ASSERT(pollset->in_flight_cbs == 0); pollset->vtable->destroy(pollset); grpc_pollset_kick_destroy(&pollset->kick_state); gpr_mu_destroy(&pollset->mu); @@ -213,12 +242,21 @@ static void unary_poll_do_promote(void *args, int success) { const grpc_pollset_vtable *original_vtable = up_args->original_vtable; grpc_pollset *pollset = up_args->pollset; grpc_fd *fd = up_args->fd; - grpc_fd *fds[2]; + int do_shutdown_cb = 0; gpr_free(up_args); + /* + * This is quite tricky. There are a number of cases to keep in mind here: + * 1. fd may have been orphaned + * 2. The pollset may no longer be a unary poller (and we can't let case #1 + * leak to other pollset types!) + * 3. pollset's fd (which may have changed) may have been orphaned + * 4. The pollset may be shutting down. + */ + gpr_mu_lock(&pollset->mu); /* First we need to ensure that nobody is polling concurrently */ - while (pollset->counter != 0 && pollset->vtable == original_vtable) { + while (pollset->counter != 0) { grpc_pollset_kick(pollset); gpr_cv_wait(&pollset->cv, &pollset->mu, gpr_inf_future); } @@ -227,31 +265,44 @@ static void unary_poll_do_promote(void *args, int success) { /* TODO(klempner): If we're not careful this could cause infinite recursion. * That's not a problem for now because empty_pollset has a trivial poller * and we don't have any mechanism to unbecome multipoller. */ - if (pollset->vtable != original_vtable) { + pollset->in_flight_cbs--; + if (pollset->shutting_down) { + gpr_log(GPR_INFO, "Shutting down"); + /* We don't care about this pollset anymore. */ + if (pollset->in_flight_cbs == 0) { + do_shutdown_cb = 1; + } + } else if (grpc_fd_is_orphaned(fd)) { + /* Don't try to add it to anything, we'll drop our ref on it below */ + } else if (pollset->vtable != original_vtable) { + gpr_log(GPR_INFO, "Not original vtable"); pollset->vtable->add_fd(pollset, fd); - gpr_cv_broadcast(&pollset->cv); - gpr_mu_unlock(&pollset->mu); - return; - } - - if (fd == pollset->data.ptr) return; - fds[0] = pollset->data.ptr; - fds[1] = fd; - - if (!grpc_fd_is_orphaned(fds[0])) { - grpc_platform_become_multipoller(pollset, fds, GPR_ARRAY_SIZE(fds)); - grpc_fd_unref(fds[0]); - } else { - /* old fd is orphaned and we haven't cleaned it up until now, so remain a - * unary poller */ - grpc_fd_unref(fds[0]); - pollset->data.ptr = fd; - grpc_fd_ref(fd); + } else if (fd != pollset->data.ptr) { + grpc_fd *fds[2]; + fds[0] = pollset->data.ptr; + fds[1] = fd; + + if (!grpc_fd_is_orphaned(fds[0])) { + grpc_platform_become_multipoller(pollset, fds, GPR_ARRAY_SIZE(fds)); + grpc_fd_unref(fds[0]); + } else { + /* old fd is orphaned and we haven't cleaned it up until now, so remain a + * unary poller */ + /* Note that it is possible that fds[1] is also orphaned at this point. + * That's okay, we'll correct it at the next add or poll. */ + grpc_fd_unref(fds[0]); + pollset->data.ptr = fd; + grpc_fd_ref(fd); + } } gpr_cv_broadcast(&pollset->cv); gpr_mu_unlock(&pollset->mu); + if (do_shutdown_cb) { + pollset->shutdown_done_cb(pollset->shutdown_done_arg); + } + /* Matching ref in unary_poll_pollset_add_fd */ grpc_fd_unref(fd); } @@ -260,18 +311,31 @@ static void unary_poll_pollset_add_fd(grpc_pollset *pollset, grpc_fd *fd) { grpc_unary_promote_args *up_args; if (fd == pollset->data.ptr) return; - if (grpc_fd_is_orphaned(pollset->data.ptr)) { - /* old fd is orphaned and we haven't cleaned it up until now, so remain a - * unary poller */ - grpc_fd_unref(pollset->data.ptr); - pollset->data.ptr = fd; - grpc_fd_ref(fd); + if (!pollset->counter) { + /* Fast path -- no in flight cbs */ + /* TODO(klempner): Comment this out and fix any test failures or establish + * they are due to timing issues */ + grpc_fd *fds[2]; + fds[0] = pollset->data.ptr; + fds[1] = fd; + + if (!grpc_fd_is_orphaned(fds[0])) { + grpc_platform_become_multipoller(pollset, fds, GPR_ARRAY_SIZE(fds)); + grpc_fd_unref(fds[0]); + } else { + /* old fd is orphaned and we haven't cleaned it up until now, so remain a + * unary poller */ + grpc_fd_unref(fds[0]); + pollset->data.ptr = fd; + grpc_fd_ref(fd); + } return; } /* Now we need to promote. This needs to happen when we're not polling. Since * this may be called from poll, the wait needs to happen asynchronously. */ grpc_fd_ref(fd); + pollset->in_flight_cbs++; up_args = gpr_malloc(sizeof(*up_args)); up_args->pollset = pollset; up_args->fd = fd; @@ -301,6 +365,10 @@ static int unary_poll_pollset_maybe_work(grpc_pollset *pollset, if (pollset->counter) { return 0; } + if (pollset->in_flight_cbs) { + /* Give do_promote priority so we don't starve it out */ + return 0; + } fd = pollset->data.ptr; if (grpc_fd_is_orphaned(fd)) { grpc_fd_unref(fd); diff --git a/src/core/iomgr/pollset_posix.h b/src/core/iomgr/pollset_posix.h index 03b4c775b7f..86b6c9f20e8 100644 --- a/src/core/iomgr/pollset_posix.h +++ b/src/core/iomgr/pollset_posix.h @@ -55,6 +55,10 @@ typedef struct grpc_pollset { gpr_cv cv; grpc_pollset_kick_state kick_state; int counter; + int in_flight_cbs; + int shutting_down; + void (*shutdown_done_cb)(void *arg); + void *shutdown_done_arg; union { int fd; void *ptr; diff --git a/src/core/iomgr/pollset_windows.c b/src/core/iomgr/pollset_windows.c index d21072b2832..bea67116116 100644 --- a/src/core/iomgr/pollset_windows.c +++ b/src/core/iomgr/pollset_windows.c @@ -46,6 +46,12 @@ void grpc_pollset_init(grpc_pollset *pollset) { gpr_cv_init(&pollset->cv); } +void grpc_pollset_shutdown(grpc_pollset *pollset, + void (*shutdown_done)(void *arg), + void *shutdown_done_arg) { + shutdown_done(shutdown_done_arg); +} + void grpc_pollset_destroy(grpc_pollset *pollset) { gpr_mu_destroy(&pollset->mu); gpr_cv_destroy(&pollset->cv); diff --git a/src/core/surface/completion_queue.c b/src/core/surface/completion_queue.c index 2efc084d7b3..c4b8d60782f 100644 --- a/src/core/surface/completion_queue.c +++ b/src/core/surface/completion_queue.c @@ -389,12 +389,17 @@ void grpc_completion_queue_shutdown(grpc_completion_queue *cc) { } } -void grpc_completion_queue_destroy(grpc_completion_queue *cc) { - GPR_ASSERT(cc->queue == NULL); +static void on_pollset_destroy_done(void *arg) { + grpc_completion_queue *cc = arg; grpc_pollset_destroy(&cc->pollset); gpr_free(cc); } +void grpc_completion_queue_destroy(grpc_completion_queue *cc) { + GPR_ASSERT(cc->queue == NULL); + grpc_pollset_shutdown(&cc->pollset, on_pollset_destroy_done, cc); +} + void grpc_event_finish(grpc_event *base) { event *ev = (event *)base; ev->on_finish(ev->on_finish_user_data, GRPC_OP_OK); From c6bccc24d39cf651219e96e91ccc7767943272b8 Mon Sep 17 00:00:00 2001 From: David Klempner Date: Tue, 24 Feb 2015 17:33:05 -0800 Subject: [PATCH 3/3] Move close() to when the fd refcount goes to zero Instead, we do a shutdown() at the point we are currently closing, to kick off socket teardown while ensuring an fd ref is sufficient to make concurrent syscalls like epoll_ctl safe. --- src/core/iomgr/fd_posix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/iomgr/fd_posix.c b/src/core/iomgr/fd_posix.c index 41fd24e05a8..abdd49bbda3 100644 --- a/src/core/iomgr/fd_posix.c +++ b/src/core/iomgr/fd_posix.c @@ -38,6 +38,7 @@ #include "src/core/iomgr/fd_posix.h" #include +#include #include #include "src/core/iomgr/iomgr_internal.h" @@ -113,6 +114,7 @@ static void ref_by(grpc_fd *fd, int n) { static void unref_by(grpc_fd *fd, int n) { gpr_atm old = gpr_atm_full_fetch_add(&fd->refst, -n); if (old == n) { + close(fd->fd); grpc_iomgr_add_callback(fd->on_done, fd->on_done_user_data); freelist_fd(fd); grpc_iomgr_unref(); @@ -158,9 +160,9 @@ static void wake_watchers(grpc_fd *fd) { void grpc_fd_orphan(grpc_fd *fd, grpc_iomgr_cb_func on_done, void *user_data) { fd->on_done = on_done ? on_done : do_nothing; fd->on_done_user_data = user_data; + shutdown(fd->fd, SHUT_RDWR); ref_by(fd, 1); /* remove active status, but keep referenced */ wake_watchers(fd); - close(fd->fd); unref_by(fd, 2); /* drop the reference */ }