From b17addf8058df70a760d52004c5b32a9e8905820 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 20 Sep 2024 16:00:58 -0700 Subject: [PATCH] Don't grab GIL from C-Core during shutdown (retry with fix) This is a retry of #37762 with a fix. We no longer hold the C mutex and GIL at the same to avoid a deadlock. MetadataPluginCallCredentials passes a function pointer to C-core to be called during destruction. This function grabs GIL which may race between GIL destruction during process shutdown. Since GIL destruction happens after Python's exit handlers, we cana mark that Python is shutting down from an exit handler and don't grab GIL in this function afterwards using a C mutex. --- .../grpc/_cython/_cygrpc/credentials.pyx.pxi | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi b/src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi index 181704cb85a..2e689bcec3d 100644 --- a/src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi +++ b/src/python/grpcio/grpc/_cython/_cygrpc/credentials.pyx.pxi @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import atexit def _spawn_callback_in_thread(cb_func, args): t = ForkManagedThread(target=cb_func, args=args) @@ -65,11 +66,59 @@ cdef int _get_metadata(void *state, return 0 # Asynchronous return -cdef void _destroy(void *state) except * with gil: - cpython.Py_DECREF(state) +# Protects access to GIL from _destroy() and to g_shutting_down. +# Do NOT hold this while holding GIL to prevent a deadlock. +cdef mutex g_shutdown_mu +# Number of C-core clean up calls in progress. Set to -1 when Python is shutting +# down. +cdef int g_shutting_down = 0 + +# This is called by C-core when the plugin is destroyed, which may race between +# GIL destruction during process shutdown. Since GIL destruction happens after +# Python's exit handlers, we mark that Python is shutting down from an exit +# handler and don't grab GIL in this function afterwards using a C mutex. +cdef void _destroy(void *state) nogil: + global g_shutdown_mu + global g_shutting_down + g_shutdown_mu.lock() + if g_shutting_down > -1: + g_shutting_down += 1 + g_shutdown_mu.unlock() + with gil: + cpython.Py_DECREF(state) + g_shutdown_mu.lock() + g_shutting_down -= 1 + g_shutdown_mu.unlock() grpc_shutdown() +g_shutdown_handler_registered = False + +def _maybe_register_shutdown_handler(): + global g_shutdown_handler_registered + if g_shutdown_handler_registered: + return + g_shutdown_handler_registered = True + atexit.register(_on_shutdown) + +cdef void _on_shutdown() nogil: + global g_shutdown_mu + global g_shutting_down + # Wait for up to ~2s if C-core is still cleaning up. + cdef int wait_ms = 10 + while wait_ms < 1500: + g_shutdown_mu.lock() + if g_shutting_down == 0: + g_shutting_down = -1 + g_shutdown_mu.unlock() + return + g_shutdown_mu.unlock() + with gil: + time.sleep(wait_ms / 1000) + wait_ms = wait_ms * 2 + with gil: + _LOGGER.error('Timed out waiting for C-core clean-up') + cdef class MetadataPluginCallCredentials(CallCredentials): def __cinit__(self, metadata_plugin, name): @@ -83,6 +132,7 @@ cdef class MetadataPluginCallCredentials(CallCredentials): c_metadata_plugin.state = self._metadata_plugin c_metadata_plugin.type = self._name cpython.Py_INCREF(self._metadata_plugin) + _maybe_register_shutdown_handler() fork_handlers_and_grpc_init() # TODO(yihuazhang): Expose min_security_level via the Python API so that # applications can decide what minimum security level their plugins require.