diff --git a/doc/xds-test-descriptions.md b/doc/xds-test-descriptions.md index b64431fe029..ec1b09763b9 100644 --- a/doc/xds-test-descriptions.md +++ b/doc/xds-test-descriptions.md @@ -463,6 +463,42 @@ Test driver asserts: 1. All backends in the primary locality receive at least 1 RPC. 1. No backends in the secondary locality receive RPCs. + +### load_based_failover + +This test verifies that traffic is partially diverted to a secondary locality +when the QPS is greater than the configured RPS in the priority locality. + +Client parameters: + +1. --num_channels=1 +1. --qps=100 + +Load balancer configuration: + +1. The primary MIG with 2 backends in the same zone as the client +1. The secondary MIG with 2 backends in a different zone + +Test driver asserts: + +1. All backends in the primary locality receive at least 1 RPC. +1. No backends in the secondary locality receive RPCs. + +The test driver sets `balancingMode` is `RATE`, and `maxRate` to 20 in the primary locality. + +Test driver asserts: + +1. All backends in the primary locality receive at least 1 RPC. +1. All backends in the secondary locality receive at least 1 RPC. + +The test driver set `maxRate` to 120 in the primary locality. + +Test driver asserts: + +1. All backends in the primary locality receive at least 1 RPC. +1. No backends in the secondary locality receive RPCs. + + ### circuit_breaking This test verifies that the maximum number of outstanding requests is limited diff --git a/tools/run_tests/run_xds_tests.py b/tools/run_tests/run_xds_tests.py index 8e712375afc..783170f6707 100755 --- a/tools/run_tests/run_xds_tests.py +++ b/tools/run_tests/run_xds_tests.py @@ -51,6 +51,7 @@ _TEST_CASES = [ 'backends_restart', 'change_backend_service', 'gentle_failover', + 'load_report_based_failover', 'ping_pong', 'remove_instance_group', 'round_robin', @@ -619,6 +620,56 @@ def test_gentle_failover(gcp, _WAIT_FOR_BACKEND_SEC) +def test_load_report_based_failover(gcp, backend_service, + primary_instance_group, + secondary_instance_group): + logger.info('Running test_load_report_based_failover') + try: + patch_backend_service( + gcp, backend_service, + [primary_instance_group, secondary_instance_group]) + primary_instance_names = get_instance_names(gcp, primary_instance_group) + secondary_instance_names = get_instance_names(gcp, + secondary_instance_group) + wait_for_healthy_backends(gcp, backend_service, primary_instance_group) + wait_for_healthy_backends(gcp, backend_service, + secondary_instance_group) + wait_until_all_rpcs_go_to_given_backends(primary_instance_names, + _WAIT_FOR_STATS_SEC) + # Set primary locality's balance mode to RATE, and RPS to 20% of the + # client's QPS. The secondary locality will be used. + max_rate = int(args.qps * 1 / 5) + logger.info('Patching backend service to RATE with %d max_rate', + max_rate) + patch_backend_service( + gcp, + backend_service, [primary_instance_group, secondary_instance_group], + balancing_mode='RATE', + max_rate=max_rate) + wait_until_all_rpcs_go_to_given_backends( + primary_instance_names + secondary_instance_names, + _WAIT_FOR_BACKEND_SEC) + + # Set primary locality's balance mode to RATE, and RPS to 120% of the + # client's QPS. Only the primary locality will be used. + max_rate = int(args.qps * 6 / 5) + logger.info('Patching backend service to RATE with %d max_rate', + max_rate) + patch_backend_service( + gcp, + backend_service, [primary_instance_group, secondary_instance_group], + balancing_mode='RATE', + max_rate=max_rate) + wait_until_all_rpcs_go_to_given_backends(primary_instance_names, + _WAIT_FOR_BACKEND_SEC) + logger.info("success") + finally: + patch_backend_service(gcp, backend_service, [primary_instance_group]) + instance_names = get_instance_names(gcp, primary_instance_group) + wait_until_all_rpcs_go_to_given_backends(instance_names, + _WAIT_FOR_BACKEND_SEC) + + def test_ping_pong(gcp, backend_service, instance_group): logger.info('Running test_ping_pong') wait_for_healthy_backends(gcp, backend_service, instance_group) @@ -1765,6 +1816,7 @@ def patch_backend_service(gcp, backend_service, instance_groups, balancing_mode='UTILIZATION', + max_rate=1, circuit_breakers=None): if gcp.alpha_compute: compute_to_use = gcp.alpha_compute @@ -1774,7 +1826,7 @@ def patch_backend_service(gcp, 'backends': [{ 'group': instance_group.url, 'balancingMode': balancing_mode, - 'maxRate': 1 if balancing_mode == 'RATE' else None + 'maxRate': max_rate if balancing_mode == 'RATE' else None } for instance_group in instance_groups], 'circuitBreakers': circuit_breakers, } @@ -2193,6 +2245,10 @@ try: elif test_case == 'gentle_failover': test_gentle_failover(gcp, backend_service, instance_group, secondary_zone_instance_group) + elif test_case == 'load_report_based_failover': + test_load_report_based_failover( + gcp, backend_service, instance_group, + secondary_zone_instance_group) elif test_case == 'ping_pong': test_ping_pong(gcp, backend_service, instance_group) elif test_case == 'remove_instance_group':