From c73932b261fdb09d766cf2f847824e53a4989f14 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Fri, 8 Jan 2021 08:44:09 -0800 Subject: [PATCH 1/5] add timeout tests --- tools/run_tests/run_xds_tests.py | 131 ++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 3 deletions(-) diff --git a/tools/run_tests/run_xds_tests.py b/tools/run_tests/run_xds_tests.py index e0122ac4723..6d68fb3696f 100755 --- a/tools/run_tests/run_xds_tests.py +++ b/tools/run_tests/run_xds_tests.py @@ -68,6 +68,7 @@ _ADDITIONAL_TEST_CASES = [ 'path_matching', 'header_matching', 'circuit_breaking', + 'timeout', ] @@ -345,7 +346,7 @@ def get_client_accumulated_stats(): return response -def configure_client(rpc_types, metadata): +def configure_client(rpc_types, metadata=[], timeout_sec=None): if CLIENT_HOSTS: hosts = CLIENT_HOSTS else: @@ -361,6 +362,8 @@ def configure_client(rpc_types, metadata): md.type = rpc_type md.key = md_key md.value = md_value + if timeout_sec: + request.timeout_sec = timeout_sec logger.debug( 'Invoking XdsUpdateClientConfigureService RPC to %s:%d: %s', host, args.stats_port, request) @@ -1395,7 +1398,7 @@ def test_circuit_breaking(gcp, original_backend_service, instance_group, configure_client([ messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL - ], []) + ]) logger.info('Patching url map with %s', route_rules) patch_url_map_backend_service(gcp, extra_backend_service, @@ -1445,7 +1448,7 @@ def test_circuit_breaking(gcp, original_backend_service, instance_group, # Avoid new RPCs being outstanding (some test clients create threads # for sending RPCs) after restoring backend services. configure_client( - [messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL], []) + [messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL]) finally: patch_url_map_backend_service(gcp, original_backend_service) patch_backend_service(gcp, original_backend_service, [instance_group]) @@ -1454,6 +1457,125 @@ def test_circuit_breaking(gcp, original_backend_service, instance_group, set_validate_for_proxyless(gcp, True) +def test_timeout(gcp, original_backend_service, instance_group): + logger.info('Running test_timeout') + + logger.info('waiting for original backends to become healthy') + wait_for_healthy_backends(gcp, original_backend_service, instance_group) + + # UnaryCall -> maxStreamDuration:3s + route_rules = [{ + 'priority': 0, + 'matchRules': [{ + 'fullPathMatch': '/grpc.testing.TestService/UnaryCall' + }], + 'service': original_backend_service.url, + 'routeAction': { + 'maxStreamDuration': {'seconds': 3}, + }, + }] + patch_url_map_backend_service(gcp, + original_backend_service, + route_rules=route_rules) + # A list of tuples (testcase_name, {client_config}, {expected_results}) + test_cases = [ + ( + 'app_timeout_exceeded', + # UnaryCall only with sleep-2; timeout=1s; calls timeout. + { + 'rpc_types': [ + messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, + ], + 'metadata': [ + (messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, + 'rpc-behavior', 'sleep-2'), + ], + 'timeout_sec': 1, + }, + { + 'UNARY_CALL': 4, # DEADLINE_EXCEEDED + }, + ), ( + 'timeout_not_exceeded', + # UnaryCall only with no sleep; calls succeed. + { + 'rpc_types': [ + messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, + ], + }, + { + 'UNARY_CALL': 0, + }, + ), ( + 'timeout_exceeded (UNARY_CALL), timeout_different_route (EMPTY_CALL)', + # UnaryCall and EmptyCall both sleep-4. + # UnaryCall timeouts, EmptyCall succeeds. + { + 'rpc_types': [ + messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, + messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL, + ], + 'metadata': [ + (messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL, + 'rpc-behavior', 'sleep-4'), + (messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL, + 'rpc-behavior', 'sleep-4'), + ], + }, + { + 'UNARY_CALL': 4, # DEADLINE_EXCEEDED + 'EMPTY_CALL': 0, + }, + ), + ] + + try: + for (testcase_name, client_config, expected_results) in test_cases: + logger.info('starting case %s', testcase_name) + configure_client(**client_config) + # wait a second to help ensure the client stops sending RPCs with + # the old config. We will make multiple attempts if it is failing, + # but this improves confidence that the test is valid if the + # previous client_config would lead to the same results. + time.sleep(1) + # Each attempt takes 10 seconds; 20 attempts is equivalent to 200 + # second timeout. + attempt_count = 20 + before_stats = get_client_accumulated_stats() + if not before_stats.stats_per_method: + raise ValueError( + 'stats.stats_per_method is None, the interop client stats service does not support this test case' + ) + for i in range(attempt_count): + logger.info('%s: attempt %d', testcase_name, i) + + test_runtime_secs = 10 + time.sleep(test_runtime_secs) + after_stats = get_client_accumulated_stats() + + success = True + for rpc, status in expected_results.items(): + qty = (after_stats.stats_per_method[rpc].result[status] - + before_stats.stats_per_method[rpc].result[status]) + want = test_runtime_secs * args.qps + # Allow 10% deviation from expectation to reduce flakiness + if qty < (want * .9) or qty > (want * 1.1): + logger.info('%s: failed due to %s[%s]: got %d want ~%d', + testcase_name, rpc, status, qty, want) + success = False + if success: + break + logger.info('%s attempt %d failed', testcase_name, i) + before_stats = after_stats + else: + raise Exception( + '%s: timeout waiting for expected results: %s; got %s' + % (testcase_name, expected_results, + after_stats.stats_per_method)) + finally: + patch_url_map_backend_service(gcp, original_backend_service) + + def set_validate_for_proxyless(gcp, validate_for_proxyless): if not gcp.alpha_compute: logger.debug( @@ -2279,6 +2401,7 @@ try: bootstrap_path = bootstrap_file.name client_env['GRPC_XDS_BOOTSTRAP'] = bootstrap_path client_env['GRPC_XDS_EXPERIMENTAL_CIRCUIT_BREAKING'] = 'true' + client_env['GRPC_XDS_EXPERIMENTAL_ENABLE_TIMEOUT'] = 'true' test_results = {} failed_tests = [] for test_case in args.test_case: @@ -2386,6 +2509,8 @@ try: elif test_case == 'circuit_breaking': test_circuit_breaking(gcp, backend_service, instance_group, same_zone_instance_group) + elif test_case == 'timeout': + test_timeout(gcp, backend_service, instance_group) else: logger.error('Unknown test case: %s', test_case) sys.exit(1) From dcbfcdbf80b6f8a89aea1b513f358ea2ee354f7b Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Mon, 1 Feb 2021 13:19:05 -0800 Subject: [PATCH 2/5] formatting --- tools/run_tests/run_xds_tests.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/run_tests/run_xds_tests.py b/tools/run_tests/run_xds_tests.py index 6d68fb3696f..12d85614263 100755 --- a/tools/run_tests/run_xds_tests.py +++ b/tools/run_tests/run_xds_tests.py @@ -1471,7 +1471,9 @@ def test_timeout(gcp, original_backend_service, instance_group): }], 'service': original_backend_service.url, 'routeAction': { - 'maxStreamDuration': {'seconds': 3}, + 'maxStreamDuration': { + 'seconds': 3, + }, }, }] patch_url_map_backend_service(gcp, @@ -1495,7 +1497,8 @@ def test_timeout(gcp, original_backend_service, instance_group): { 'UNARY_CALL': 4, # DEADLINE_EXCEEDED }, - ), ( + ), + ( 'timeout_not_exceeded', # UnaryCall only with no sleep; calls succeed. { @@ -1506,7 +1509,8 @@ def test_timeout(gcp, original_backend_service, instance_group): { 'UNARY_CALL': 0, }, - ), ( + ), + ( 'timeout_exceeded (UNARY_CALL), timeout_different_route (EMPTY_CALL)', # UnaryCall and EmptyCall both sleep-4. # UnaryCall timeouts, EmptyCall succeeds. @@ -1569,9 +1573,9 @@ def test_timeout(gcp, original_backend_service, instance_group): before_stats = after_stats else: raise Exception( - '%s: timeout waiting for expected results: %s; got %s' - % (testcase_name, expected_results, - after_stats.stats_per_method)) + '%s: timeout waiting for expected results: %s; got %s' % + (testcase_name, expected_results, + after_stats.stats_per_method)) finally: patch_url_map_backend_service(gcp, original_backend_service) From 7927145445b4fb4be78948c73116ad2d89142e13 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 Feb 2021 10:34:28 -0800 Subject: [PATCH 3/5] add test description --- doc/xds-test-descriptions.md | 38 +++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/doc/xds-test-descriptions.md b/doc/xds-test-descriptions.md index ec1b09763b9..692ffb62613 100644 --- a/doc/xds-test-descriptions.md +++ b/doc/xds-test-descriptions.md @@ -532,4 +532,40 @@ The test driver updates MIG_1's circuit breakers with max_request = 800. Test driver asserts: -1. After reaching steady state, there are 800 UnaryCall RPCs in-flight. \ No newline at end of file +1. After reaching steady state, there are 800 UnaryCall RPCs in-flight. + +### timeout + +This test verifies that traffic along a route with a `max_stream_duration` set +will cause timeouts on streams open longer than that duration. + +Client parameters: + +1. `--num_channels=1` +1. `--qps=100` + +Route Configuration: + +Two routes: + +1. Path match for `/grpc.testing.TestService/UnaryCall`, with a `route_action` + containing `max_stream_duration` of 3 seconds. +1. Default route containing no `max_stream_duration` setting. + +There are four sub-tests: + +1. `app_timeout_exceeded` + 1. Test client configured to send UnaryCall RPCs with a 1s application + timeout, and a metadata of `rpc-behavior: sleep-2`. + 1. Test driver asserts client recieves ~100% status `DEADLINE_EXCEEDED`. +1. `timeout_not_exceeded` + 1. Test client configured to send UnaryCall RPCs with the default + application timeout (20 seconds), and no metadata. + 1. Test driver asserts client recieves ~100% status `OK`. +1. `timeout_exceeded` (executed with the below test case) +1. `timeout_different_route` + 1. Test client configured to send UnaryCall RPCs and EmptyCall RPCs with + the default application timeout (20 seconds), and a metadata of + `rpc-behavior: sleep-2`. + 1. Test driver asserts client recieves ~100% status `OK` for EmptyCall + and ~100% status `DEADLINE_EXCEEDED` for UnaryCall. From 90fdf4cfe612cfe090d882091c0f3e65cf079b48 Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 Feb 2021 10:36:03 -0800 Subject: [PATCH 4/5] 2->4 --- doc/xds-test-descriptions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/xds-test-descriptions.md b/doc/xds-test-descriptions.md index 692ffb62613..9c15f5b798f 100644 --- a/doc/xds-test-descriptions.md +++ b/doc/xds-test-descriptions.md @@ -566,6 +566,6 @@ There are four sub-tests: 1. `timeout_different_route` 1. Test client configured to send UnaryCall RPCs and EmptyCall RPCs with the default application timeout (20 seconds), and a metadata of - `rpc-behavior: sleep-2`. + `rpc-behavior: sleep-4`. 1. Test driver asserts client recieves ~100% status `OK` for EmptyCall and ~100% status `DEADLINE_EXCEEDED` for UnaryCall. From 26b5c14e0211a3902e5de4d3c671df64cd4629ac Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Tue, 2 Feb 2021 10:36:33 -0800 Subject: [PATCH 5/5] not 'a metadata' --- doc/xds-test-descriptions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/xds-test-descriptions.md b/doc/xds-test-descriptions.md index 9c15f5b798f..7f460873f84 100644 --- a/doc/xds-test-descriptions.md +++ b/doc/xds-test-descriptions.md @@ -556,7 +556,7 @@ There are four sub-tests: 1. `app_timeout_exceeded` 1. Test client configured to send UnaryCall RPCs with a 1s application - timeout, and a metadata of `rpc-behavior: sleep-2`. + timeout, and metadata of `rpc-behavior: sleep-2`. 1. Test driver asserts client recieves ~100% status `DEADLINE_EXCEEDED`. 1. `timeout_not_exceeded` 1. Test client configured to send UnaryCall RPCs with the default @@ -565,7 +565,7 @@ There are four sub-tests: 1. `timeout_exceeded` (executed with the below test case) 1. `timeout_different_route` 1. Test client configured to send UnaryCall RPCs and EmptyCall RPCs with - the default application timeout (20 seconds), and a metadata of + the default application timeout (20 seconds), and metadata of `rpc-behavior: sleep-4`. 1. Test driver asserts client recieves ~100% status `OK` for EmptyCall and ~100% status `DEADLINE_EXCEEDED` for UnaryCall.