Merge pull request #12413 from sreecha/fix-epoll1-bug

Fix epoll1 hang
reviewable/pr11758/r1
Sree Kuchibhotla 8 years ago committed by GitHub
commit f54af4832e
  1. 45
      src/core/lib/iomgr/ev_epoll1_linux.c

@ -698,22 +698,30 @@ static bool begin_worker(grpc_pollset *pollset, grpc_pollset_worker *worker,
gpr_mu_unlock(&pollset->mu); gpr_mu_unlock(&pollset->mu);
goto retry_lock_neighbourhood; goto retry_lock_neighbourhood;
} }
pollset->seen_inactive = false;
if (neighbourhood->active_root == NULL) { /* In the brief time we released the pollset locks above, the worker MAY
neighbourhood->active_root = pollset->next = pollset->prev = pollset; have been kicked. In this case, the worker should get out of this
/* TODO: sreek. Why would this worker state be other than UNKICKED pollset ASAP and hence this should neither add the pollset to
* here ? (since the worker isn't added to the pollset yet, there is no neighbourhood nor mark the pollset as active.
* way it can be "found" by other threads to get kicked). */
On a side note, the only way a worker's kick state could have changed
/* If there is no designated poller, make this the designated poller */ at this point is if it were "kicked specifically". Since the worker has
if (worker->kick_state == UNKICKED && not added itself to the pollset yet (by calling worker_insert()), it is
gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) { not visible in the "kick any" path yet */
SET_KICK_STATE(worker, DESIGNATED_POLLER); if (worker->kick_state == UNKICKED) {
pollset->seen_inactive = false;
if (neighbourhood->active_root == NULL) {
neighbourhood->active_root = pollset->next = pollset->prev = pollset;
/* Make this the designated poller if there isn't one already */
if (worker->kick_state == UNKICKED &&
gpr_atm_no_barrier_cas(&g_active_poller, 0, (gpr_atm)worker)) {
SET_KICK_STATE(worker, DESIGNATED_POLLER);
}
} else {
pollset->next = neighbourhood->active_root;
pollset->prev = pollset->next->prev;
pollset->next->prev = pollset->prev->next = pollset;
} }
} else {
pollset->next = neighbourhood->active_root;
pollset->prev = pollset->next->prev;
pollset->next->prev = pollset->prev->next = pollset;
} }
} }
if (is_reassigning) { if (is_reassigning) {
@ -1001,6 +1009,7 @@ static grpc_error *pollset_kick(grpc_pollset *pollset,
gpr_log(GPR_ERROR, "%s", tmp); gpr_log(GPR_ERROR, "%s", tmp);
gpr_free(tmp); gpr_free(tmp);
} }
if (specific_worker == NULL) { if (specific_worker == NULL) {
if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) { if (gpr_tls_get(&g_current_thread_pollset) != (intptr_t)pollset) {
grpc_pollset_worker *root_worker = pollset->root_worker; grpc_pollset_worker *root_worker = pollset->root_worker;
@ -1076,7 +1085,11 @@ static grpc_error *pollset_kick(grpc_pollset *pollset,
} }
goto done; goto done;
} }
} else if (specific_worker->kick_state == KICKED) {
GPR_UNREACHABLE_CODE(goto done);
}
if (specific_worker->kick_state == KICKED) {
if (GRPC_TRACER_ON(grpc_polling_trace)) { if (GRPC_TRACER_ON(grpc_polling_trace)) {
gpr_log(GPR_ERROR, " .. specific worker already kicked"); gpr_log(GPR_ERROR, " .. specific worker already kicked");
} }

Loading…
Cancel
Save