Adopt reviewer's advice

* Make graceful shutdown support calls from multi-coroutine * Added comments * Make graceful shutdown success test case more strict * Add 2 more concurrent graceful shutdown tests
5 years ago · 8168b9e1c9
parent 983e36d5a3
commit 8168b9e1c9
7 changed files with 153 additions and 75 deletions
--- a/src/python/grpcio/grpc/_cython/_cygrpc/aio/callbackcontext.pxd.pxi
+++ b/src/python/grpcio/grpc/_cython/_cygrpc/aio/callbackcontext.pxd.pxi
@ -15,6 +15,15 @@
 cimport cpython

 cdef struct CallbackContext:
+    # C struct to store callback context in the form of pointers.
+    #    
+    #   Attributes:
+    #     functor: A grpc_experimental_completion_queue_functor represents the
+    #       callback function in the only way C-Core understands.
+    #     waiter: An asyncio.Future object that fulfills when the callback is
+    #       invoked by C-Core.
+    #     failure_handler: A CallbackFailureHandler object that called when C-Core
+    #       returns 'success == 0' state.
    grpc_experimental_completion_queue_functor functor
-    cpython.PyObject *waiter  # asyncio.Future
-    cpython.PyObject *failure_handler  # cygrpc.CallbackFailureHandler
+    cpython.PyObject *waiter
+    cpython.PyObject *failure_handler
--- a/src/python/grpcio/grpc/_cython/_cygrpc/aio/server.pxd.pxi
+++ b/src/python/grpcio/grpc/_cython/_cygrpc/aio/server.pxd.pxi
@ -61,3 +61,6 @@ cdef class AioServer:
    cdef AioServerStatus _status
    cdef object _loop  # asyncio.EventLoop
    cdef object _serving_task  # asyncio.Task
+    cdef object _shutdown_lock  # asyncio.Lock
+    cdef object _shutdown_completed  # asyncio.Future
+    cdef CallbackWrapper _shutdown_callback_wrapper
--- a/src/python/grpcio/grpc/_cython/_cygrpc/aio/server.pyx.pxi
+++ b/src/python/grpcio/grpc/_cython/_cygrpc/aio/server.pyx.pxi
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+# TODO(https://github.com/grpc/grpc/issues/20850) refactor this.
 _LOGGER = logging.getLogger(__name__)


@ -282,6 +283,12 @@ cdef class AioServer:
        self.add_generic_rpc_handlers(generic_handlers)
        self._serving_task = None

+        self._shutdown_lock = asyncio.Lock()
+        self._shutdown_completed = self._loop.create_future()
+        self._shutdown_callback_wrapper = CallbackWrapper(
+            self._shutdown_completed,
+            SERVER_SHUTDOWN_FAILURE_HANDLER)
+
        if interceptors:
            raise NotImplementedError()
        if maximum_concurrent_rpcs:
@ -309,7 +316,7 @@ cdef class AioServer:
        server_started.set_result(True)

        while True:
-            # When shutdown process starts, no more new connections.
+            # When shutdown begins, no more new connections.
            if self._status != AIO_SERVER_STATUS_RUNNING:
                break

@ -336,34 +343,14 @@ cdef class AioServer:
        # Otherwise, the actual start time of the server is un-controllable.
        await server_started

-    async def shutdown(self, grace):
-        """Gracefully shutdown the C-Core server.
-
-        Application should only call shutdown once.
-
-        Args:
-          grace: An optional float indicates the length of grace period in
-            seconds.
-        """
-        if self._status != AIO_SERVER_STATUS_RUNNING:
-            # The server either is shutting down, or not started.
-            return
-        cdef object shutdown_completed = self._loop.create_future()
-        cdef CallbackWrapper wrapper = CallbackWrapper(
-            shutdown_completed,
-            SERVER_SHUTDOWN_FAILURE_HANDLER)
-        # NOTE(lidiz) Without Py_INCREF, the wrapper object will be destructed
-        # when calling "await". This is an over-optimization by Cython.
-        cpython.Py_INCREF(wrapper)
-
+    async def _start_shutting_down(self):
+        """Prepares the server to shutting down (NOT coroutine-safe)."""
        # Starts the shutdown process.
-        # The shutdown callback won't be called unless there is no live RPC.
+        # The shutdown callback won't be called until there is no live RPC.
        grpc_server_shutdown_and_notify(
            self._server.c_server,
            self._cq._cq,
-            wrapper.c_functor())
-        self._server.is_shutting_down = True
-        self._status = AIO_SERVER_STATUS_STOPPING
+            self._shutdown_callback_wrapper.c_functor())

        # Ensures the serving task (coroutine) exits.
        try:
@ -371,28 +358,56 @@ cdef class AioServer:
        except _RequestCallError:
            pass

+    async def shutdown(self, grace):
+        """Gracefully shutdown the C-Core server.
+
+        Application should only call shutdown once.
+
+        Args:
+          grace: An optional float indicating the length of grace period in
+            seconds.
+        """
+        if self._status == AIO_SERVER_STATUS_READY or self._status == AIO_SERVER_STATUS_STOPPED:
+            return
+
+        async with self._shutdown_lock:
+            if self._status == AIO_SERVER_STATUS_RUNNING:
+                await self._start_shutting_down()
+                self._server.is_shutting_down = True
+                self._status = AIO_SERVER_STATUS_STOPPING
+
        if grace is None:
            # Directly cancels all calls
            grpc_server_cancel_all_calls(self._server.c_server)
-            await shutdown_completed
+            await self._shutdown_completed
        else:
            try:
-                await asyncio.wait_for(asyncio.shield(shutdown_completed), grace)
+                await asyncio.wait_for(asyncio.shield(self._shutdown_completed), grace)
            except asyncio.TimeoutError:
                # Cancels all ongoing calls by the end of grace period.
                grpc_server_cancel_all_calls(self._server.c_server)
-                await shutdown_completed
+                await self._shutdown_completed

-        # Keeps wrapper object alive until now.
-        cpython.Py_DECREF(wrapper)
-        grpc_server_destroy(self._server.c_server)
-        self._server.c_server = NULL
-        self._server.is_shutdown = True
-        self._status = AIO_SERVER_STATUS_STOPPED
+        async with self._shutdown_lock:
+            if self._status == AIO_SERVER_STATUS_STOPPING:
+                grpc_server_destroy(self._server.c_server)
+                self._server.c_server = NULL
+                self._server.is_shutdown = True
+                self._status = AIO_SERVER_STATUS_STOPPED

-        # Shuts down the completion queue
-        await self._cq.shutdown()
+                # Shuts down the completion queue
+                await self._cq.shutdown()
+    
+    async def wait_for_termination(self, float timeout):
+        if timeout is None:
+            await self._shutdown_completed
+        else:
+            try:
+                await asyncio.wait_for(self._shutdown_completed, timeout)
+            except asyncio.TimeoutError:
+                return False
+        return True

    def __dealloc__(self):
        if self._status != AIO_SERVER_STATUS_STOPPED:
-            _LOGGER.error('Server is not stopped while deallocation: %d', self._status)
+            _LOGGER.error('__dealloc__ called on running server: %d', self._status)
--- a/src/python/grpcio/grpc/_cython/_cygrpc/server.pyx.pxi
+++ b/src/python/grpcio/grpc/_cython/_cygrpc/server.pyx.pxi
@ -66,8 +66,8 @@ cdef class Server:
    
    Args:
      backup_queue: a bool indicates whether to spawn a backup completion
-        queue. In case of no CQ is bound to the server, and the shutdown
-        process of server becomes un-observable.
+        queue. In the case that no CQ is bound to the server, and the shutdown
+        of server becomes un-observable.
    """
    if self.is_started:
      raise ValueError("the server has already started")
--- a/src/python/grpcio/grpc/experimental/aio/_server.py
+++ b/src/python/grpcio/grpc/experimental/aio/_server.py
@ -29,8 +29,6 @@ class Server:
        self._server = cygrpc.AioServer(self._loop, thread_pool,
                                        generic_handlers, interceptors, options,
                                        maximum_concurrent_rpcs, compression)
-        self._shutdown_started = False
-        self._shutdown_future = self._loop.create_future()

    def add_generic_rpc_handlers(
            self,
@ -92,26 +90,23 @@ class Server:
        This method immediately stops the server from servicing new RPCs in
        all cases.

-        If a grace period is specified, all RPCs active at the end of the grace
-        period are aborted.
+        If a grace period is specified, this method returns immediately and all
+        RPCs active at the end of the grace period are aborted. If a grace
+        period is not specified (by passing None for grace), all existing RPCs
+        are aborted immediately and this method blocks until the last RPC
+        handler terminates.

-        If a grace period is not specified (by passing None for `grace`), all
-        existing RPCs are aborted immediately and this method blocks until the
-        last RPC handler terminates.
-
-        Only the first call to "stop" sets the length of grace period.
-        Additional calls is allowed and will block until the termination of
-        the server.
+        This method is idempotent and may be called at any time. Passing a
+        smaller grace value in a subsequent call will have the effect of
+        stopping the Server sooner (passing None will have the effect of
+        stopping the server immediately). Passing a larger grace value in a
+        subsequent call will not have the effect of stopping the server later
+        (i.e. the most restrictive grace value is used).

        Args:
          grace: A duration of time in seconds or None.
        """
-        if self._shutdown_started:
-            await self._shutdown_future
-        else:
-            self._shutdown_started = True
-            await self._server.shutdown(grace)
-            self._shutdown_future.set_result(None)
+        await self._server.shutdown(grace)

    async def wait_for_termination(self,
                                   timeout: Optional[float] = None) -> bool:
@ -135,14 +130,7 @@ class Server:
        Returns:
          A bool indicates if the operation times out.
        """
-        if timeout is None:
-            await self._shutdown_future
-        else:
-            try:
-                await asyncio.wait_for(self._shutdown_future, timeout)
-            except asyncio.TimeoutError:
-                return False
-        return True
+        return await self._server.wait_for_termination(timeout)


 def server(migration_thread_pool=None,
--- a/src/python/grpcio_tests/tests_aio/unit/channel_test.py
+++ b/src/python/grpcio_tests/tests_aio/unit/channel_test.py
@ -108,7 +108,7 @@ class TestChannel(AioTestBase):
                response_deserializer=messages_pb2.SimpleResponse.FromString)
            response = await hi(messages_pb2.SimpleRequest())

-            self.assertEqual(type(response), messages_pb2.SimpleResponse)
+            self.assertIs(type(response), messages_pb2.SimpleResponse)

            await channel.close()

--- a/src/python/grpcio_tests/tests_aio/unit/server_test.py
+++ b/src/python/grpcio_tests/tests_aio/unit/server_test.py
@ -14,6 +14,7 @@

 import logging
 import unittest
+import time

 import grpc
 from grpc.experimental import aio
@ -22,7 +23,7 @@ from tests.unit.framework.common import test_constants

 _SIMPLE_UNARY_UNARY = '/test/SimpleUnaryUnary'
 _BLOCK_FOREVER = '/test/BlockForever'
-_BLOCK_SHORTLY = '/test/BlockShortly'
+_BLOCK_BRIEFLY = '/test/BlockBriefly'

 _REQUEST = b'\x00\x00\x00'
 _RESPONSE = b'\x01\x01\x01'
@ -40,7 +41,7 @@ class _GenericHandler(grpc.GenericRpcHandler):
    async def _block_forever(self, unused_request, unused_context):
        await asyncio.get_event_loop().create_future()

-    async def _block_shortly(self, unused_request, unused_context):
+    async def _BLOCK_BRIEFLY(self, unused_request, unused_context):
        await asyncio.sleep(test_constants.SHORT_TIMEOUT / 2)
        return _RESPONSE

@ -50,8 +51,8 @@ class _GenericHandler(grpc.GenericRpcHandler):
            return grpc.unary_unary_rpc_method_handler(self._unary_unary)
        if handler_details.method == _BLOCK_FOREVER:
            return grpc.unary_unary_rpc_method_handler(self._block_forever)
-        if handler_details.method == _BLOCK_SHORTLY:
-            return grpc.unary_unary_rpc_method_handler(self._block_shortly)
+        if handler_details.method == _BLOCK_BRIEFLY:
+            return grpc.unary_unary_rpc_method_handler(self._BLOCK_BRIEFLY)

    async def wait_for_call(self):
        await self._called
@ -87,6 +88,7 @@ class TestServer(AioTestBase):
            await server.stop(None)

        self.loop.run_until_complete(test_shutdown_body())
+        # Ensures no SIGSEGV triggered, and ends within timeout.

    def test_shutdown_after_call(self):

@ -107,12 +109,18 @@ class TestServer(AioTestBase):

            channel = aio.insecure_channel(server_target)
            call_task = self.loop.create_task(
-                channel.unary_unary(_BLOCK_SHORTLY)(_REQUEST))
+                channel.unary_unary(_BLOCK_BRIEFLY)(_REQUEST))
            await generic_handler.wait_for_call()

+            shutdown_start_time = time.time()
            await server.stop(test_constants.SHORT_TIMEOUT)
+            grace_period_length = time.time() - shutdown_start_time
+            self.assertGreater(grace_period_length,
+                               test_constants.SHORT_TIMEOUT / 3)
+
+            # Validates the states.
            await channel.close()
-            self.assertEqual(await call_task, _RESPONSE)
+            self.assertEqual(_RESPONSE, await call_task)
            self.assertTrue(call_task.done())

        self.loop.run_until_complete(test_graceful_shutdown_success_body())
@ -131,13 +139,68 @@ class TestServer(AioTestBase):

            with self.assertRaises(aio.AioRpcError) as exception_context:
                await call_task
-            self.assertEqual(exception_context.exception.code(),
-                             grpc.StatusCode.UNAVAILABLE)
+            self.assertEqual(grpc.StatusCode.UNAVAILABLE,
+                             exception_context.exception.code())
            self.assertIn('GOAWAY', exception_context.exception.details())
            await channel.close()

        self.loop.run_until_complete(test_graceful_shutdown_failed_body())

+    def test_concurrent_graceful_shutdown(self):
+
+        async def test_concurrent_graceful_shutdown_body():
+            server_target, server, generic_handler = await _start_test_server()
+
+            channel = aio.insecure_channel(server_target)
+            call_task = self.loop.create_task(
+                channel.unary_unary(_BLOCK_BRIEFLY)(_REQUEST))
+            await generic_handler.wait_for_call()
+
+            # Expects the shortest grace period to be effective.
+            shutdown_start_time = time.time()
+            await asyncio.gather(
+                server.stop(test_constants.LONG_TIMEOUT),
+                server.stop(test_constants.SHORT_TIMEOUT),
+                server.stop(test_constants.LONG_TIMEOUT),
+            )
+            grace_period_length = time.time() - shutdown_start_time
+            self.assertGreater(grace_period_length,
+                               test_constants.SHORT_TIMEOUT / 3)
+
+            await channel.close()
+            self.assertEqual(_RESPONSE, await call_task)
+            self.assertTrue(call_task.done())
+
+        self.loop.run_until_complete(test_concurrent_graceful_shutdown_body())
+
+    def test_concurrent_graceful_shutdown_immediate(self):
+
+        async def test_concurrent_graceful_shutdown_immediate_body():
+            server_target, server, generic_handler = await _start_test_server()
+
+            channel = aio.insecure_channel(server_target)
+            call_task = self.loop.create_task(
+                channel.unary_unary(_BLOCK_FOREVER)(_REQUEST))
+            await generic_handler.wait_for_call()
+
+            # Expects no grace period, due to the "server.stop(None)".
+            await asyncio.gather(
+                server.stop(test_constants.LONG_TIMEOUT),
+                server.stop(None),
+                server.stop(test_constants.SHORT_TIMEOUT),
+                server.stop(test_constants.LONG_TIMEOUT),
+            )
+
+            with self.assertRaises(aio.AioRpcError) as exception_context:
+                await call_task
+            self.assertEqual(grpc.StatusCode.UNAVAILABLE,
+                             exception_context.exception.code())
+            self.assertIn('GOAWAY', exception_context.exception.details())
+            await channel.close()
+
+        self.loop.run_until_complete(
+            test_concurrent_graceful_shutdown_immediate_body())
+
    @unittest.skip('https://github.com/grpc/grpc/issues/20818')
    def test_shutdown_before_call(self):