From 7ea88447d9c2a91ea0f013b335b1e4a6166aa529 Mon Sep 17 00:00:00 2001 From: Xuan Wang Date: Mon, 14 Oct 2024 15:16:27 -0700 Subject: [PATCH] [Cython AIO] Fix Aio tests time out issue (#37917) ### What's happening Some of our asyncio tests began timing out following a Cython upgrade to 3.0. This issue occurs consistently across both our Bazel and setup.py test environments. ### Why the time out After some investigation, we found that our code here:https://github.com/grpc/grpc/blob/4ffcdd4ab7976cac397fcff25b6f952d0b5c8168/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi#L115-L116 Was translated to this in Cython 0.29: ``` __pyx_f_7_cython_6cygrpc__unified_socket_write(__pyx_v_self->_write_fd); ``` And it changed to this in Cython 3.0: ``` __pyx_f_7_cython_6cygrpc__unified_socket_write(__pyx_v_self->_write_fd); if (unlikely(__Pyx_ErrOccurredWithGIL())) __PYX_ERR(7, 136, __pyx_L1_error) ``` Which indicates that this `nogil` function `_unified_socket_write` now requires GIL. ### What's new in Cython 3 * Cython 3 `cdef` functions with `void` return type will default to use `except *` as exception specification. * If function have `void` return type and defined as `nogil`, Cython will always re-acquire the GIL after the function call to check if an exception has been raised. * In some cases, this will cause a deadlock, especially if the function was called inside another `nogil` function. ### What's the fix * This PR changes those functions to use `noexcept` as exception specification since we don't expect them to throw any exception, and this is also the suggested workarounds in Cython documentation: https://cython.readthedocs.io/en/latest/src/userguide/language_basics.html#error-return-values ### Test * Tested locally by running `bazel test`, time our rate decreased from 5% to 0.3% in 3000 runs and 10s test time out. Closes #37917 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37917 from XuanWang-Amos:fix_cython_aio 063d27aee9192a26f36d16e6f056dcab1a48b9c9 PiperOrigin-RevId: 685851320 --- .../grpc/_cython/_cygrpc/aio/completion_queue.pxd.pxi | 4 ++-- .../grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pxd.pxi b/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pxd.pxi index 578131f7eef..ed9cab2b2b0 100644 --- a/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pxd.pxi +++ b/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pxd.pxi @@ -23,7 +23,7 @@ IF UNAME_SYSNAME == "Windows": int win_socket_send "send" (WIN_SOCKET s, const char *buf, int len, int flags) -cdef void _unified_socket_write(int fd) nogil +cdef void _unified_socket_write(int fd) noexcept nogil cdef class BaseCompletionQueue: @@ -48,5 +48,5 @@ cdef class PollerCompletionQueue(BaseCompletionQueue): cdef object _write_socket # socket.socket cdef dict _loops # Mapping[asyncio.AbstractLoop, _BoundEventLoop] - cdef void _poll(self) nogil + cdef void _poll(self) noexcept nogil cdef shutdown(self) diff --git a/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi b/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi index eb33e41aebb..288da3c6f7e 100644 --- a/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi +++ b/src/python/grpcio/grpc/_cython/_cygrpc/aio/completion_queue.pyx.pxi @@ -22,12 +22,12 @@ cdef float _POLL_AWAKE_INTERVAL_S = 0.2 cdef bint _has_fd_monitoring = True IF UNAME_SYSNAME == "Windows": - cdef void _unified_socket_write(int fd) nogil: + cdef void _unified_socket_write(int fd) noexcept nogil: win_socket_send(fd, b"1", 1, 0) ELSE: from posix cimport unistd - cdef void _unified_socket_write(int fd) nogil: + cdef void _unified_socket_write(int fd) noexcept nogil: unistd.write(fd, b"1", 1) @@ -94,7 +94,7 @@ cdef class PollerCompletionQueue(BaseCompletionQueue): else: self._loops[loop] = _BoundEventLoop(loop, self._read_socket, self._handle_events) - cdef void _poll(self) nogil: + cdef void _poll(self) noexcept nogil: cdef grpc_event event cdef CallbackContext *context