From e6506bc9b2cf32351587e90dca72aa18640615d6 Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Tue, 18 Jul 2017 21:43:45 -0700 Subject: [PATCH] Fix race-condition in epoll1 poller's begin_worker() and pollset_kick() which caused the designated poller to miss a kick in some cases --- src/core/lib/iomgr/ev_epoll1_linux.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/core/lib/iomgr/ev_epoll1_linux.c b/src/core/lib/iomgr/ev_epoll1_linux.c index f579b16fa58..bd7c955d035 100644 --- a/src/core/lib/iomgr/ev_epoll1_linux.c +++ b/src/core/lib/iomgr/ev_epoll1_linux.c @@ -600,6 +600,7 @@ static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, } gpr_mu_unlock(&neighbourhood->mu); } + worker_insert(pollset, worker); pollset->begin_refs--; if (worker->kick_state == UNKICKED) { @@ -628,7 +629,18 @@ static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker, pollset->shutting_down); } - return worker->kick_state == DESIGNATED_POLLER && !pollset->shutting_down; + /* We release pollset lock in this function at a couple of places: + * 1. Brielfly when assigning pollset to a neighbourhood + * 2. When doing gpr_cv_wait() + * It is possible that 'kicked_without_poller' was set to true during (1) and + * 'shutting_down' is set to true during (1) or (2). If either of them is + * true, this worker cannot do polling */ + + /* TODO(sreek): Perhaps there is a better way to handle kicked_without_poller + * case; especially when the worker is the DESIGNATED_POLLER */ + + return worker->kick_state == DESIGNATED_POLLER && !pollset->shutting_down && + !pollset->kicked_without_poller; } static bool check_neighbourhood_for_available_poller(