xds interop: add traffic splitting

pull/23009/head
Menghan Li 5 years ago
parent cf0727665d
commit 6764d1b441
  1. 156
      tools/run_tests/run_xds_tests.py

@ -52,6 +52,7 @@ _TEST_CASES = [
'round_robin',
'secondary_locality_gets_no_requests_on_partial_primary_failure',
'secondary_locality_gets_requests_on_primary_failure',
'traffic_splitting',
]
@ -103,7 +104,7 @@ argp.add_argument('--zone', default='us-central1-a')
argp.add_argument('--secondary_zone',
default='us-west1-b',
help='Zone to use for secondary TD locality tests')
argp.add_argument('--qps', default=10, type=int, help='Client QPS')
argp.add_argument('--qps', default=100, type=int, help='Client QPS')
argp.add_argument(
'--wait_for_backend_sec',
default=1200,
@ -290,7 +291,7 @@ def _verify_rpcs_to_given_backends(backends, timeout_sec, num_rpcs,
def wait_until_all_rpcs_go_to_given_backends_or_fail(backends,
timeout_sec,
num_rpcs=100):
num_rpcs=_NUM_TEST_RPCS):
_verify_rpcs_to_given_backends(backends,
timeout_sec,
num_rpcs,
@ -299,13 +300,48 @@ def wait_until_all_rpcs_go_to_given_backends_or_fail(backends,
def wait_until_all_rpcs_go_to_given_backends(backends,
timeout_sec,
num_rpcs=100):
num_rpcs=_NUM_TEST_RPCS):
_verify_rpcs_to_given_backends(backends,
timeout_sec,
num_rpcs,
allow_failures=False)
def compareDistributions(actual_distribution, expected_distribution, threshold):
"""Compare if two distributions are similar.
Args:
actual_distribution: A list of floats, contains the actual distribution.
expected_distribution: A list of floats, contains the expected distribution.
threshold: Number within [0,100], the threshold percentage by which the
actual distribution can differ from the expected distribution.
Returns:
The similarity between the distributions as a boolean. Returns true if the
actual distribution lies within the threshold of the expected
distribution, false otherwise.
Raises:
ValueError: if threshold is not with in [0,100].
Exception: containing detailed error messages.
"""
if len(expected_distribution) != len(actual_distribution):
raise Exception(
'Error: expected and actual distributions have different size (%d vs %d)'
% (len(expected_distribution), len(actual_distribution)))
if threshold < 0 or threshold > 100:
raise ValueError('Value error: Threshold should be between 0 to 100')
threshold_fraction = threshold / 100.0
for expected, actual in zip(expected_distribution, actual_distribution):
if actual < (expected * (1 - threshold_fraction)):
raise Exception("actual(%f) < expected(%f-%d%%)" %
(actual, expected, threshold))
if actual > (expected * (1 + threshold_fraction)):
raise Exception("actual(%f) > expected(%f+%d%%)" %
(actual, expected, threshold))
return True
def test_backends_restart(gcp, backend_service, instance_group):
logger.info('Running test_backends_restart')
instance_names = get_instance_names(gcp, instance_group)
@ -499,6 +535,87 @@ def test_secondary_locality_gets_requests_on_primary_failure(
patch_backend_instances(gcp, backend_service, [primary_instance_group])
def test_traffic_splitting(gcp, original_backend_service, instance_group,
alternate_backend_service, same_zone_instance_group):
logger.info('Running test_traffic_splitting')
logger.info('waiting for original to become healthy')
wait_for_healthy_backends(gcp, original_backend_service, instance_group)
patch_backend_instances(gcp, alternate_backend_service,
[same_zone_instance_group])
logger.info('waiting for alternate to become healthy')
wait_for_healthy_backends(gcp, alternate_backend_service,
same_zone_instance_group)
original_backend_instances = get_instance_names(gcp, instance_group)
logger.info('original backends instances: %s', original_backend_instances)
alternate_backend_instances = get_instance_names(gcp,
same_zone_instance_group)
logger.info('alternate backends instances: %s', alternate_backend_instances)
# Start with all traffic going to original_backend_service.
logger.info('waiting for traffic to all go to original')
wait_until_all_rpcs_go_to_given_backends(original_backend_instances,
_WAIT_FOR_STATS_SEC)
try:
# Path urlmap, change route action to traffic splitting between original
# and alternate.
logger.info('patching url map with traffic splitting')
expected_service_percentage = [20, 80]
patch_url_map_weighted_backend_services(
gcp, {
original_backend_service: expected_service_percentage[0],
alternate_backend_service: expected_service_percentage[1],
})
expected_instance_percentage = [
expected_service_percentage[0] * 1.0 /
len(original_backend_instances)
] * len(original_backend_instances) + [
expected_service_percentage[1] * 1.0 /
len(alternate_backend_instances)
] * len(alternate_backend_instances)
# Wait for traffic to go to both services.
logger.info(
'waiting for traffic to go to all backends (including alternate)')
wait_until_all_rpcs_go_to_given_backends(
original_backend_instances + alternate_backend_instances,
_WAIT_FOR_STATS_SEC)
# Verify that weights between two services is expected.
retry_count = 3
for i in range(retry_count):
stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
got_instance_count = [
stats.rpcs_by_peer[i] for i in original_backend_instances
] + [stats.rpcs_by_peer[i] for i in alternate_backend_instances]
total_count = sum(got_instance_count)
got_instance_percentage = [
x * 100.0 / total_count for x in got_instance_count
]
try:
compareDistributions(got_instance_percentage,
expected_instance_percentage, 5)
except Exception as e:
logger.warning('attempt %d', i)
logger.warning('got percentage: %s', got_instance_percentage)
logger.warning('expected percentage: %s',
expected_instance_percentage)
logger.warning(e)
if i == retry_count - 1:
raise Exception(
'RPC distribution (%s) differs from expected (%s)',
got_instance_percentage, expected_instance_percentage)
else:
logger.info("success")
finally:
patch_url_map_backend_service(gcp, original_backend_service)
patch_backend_instances(gcp, alternate_backend_service, [])
def get_startup_script(path_to_server_binary, service_port):
if path_to_server_binary:
return "nohup %s --port=%d 1>/dev/null &" % (path_to_server_binary,
@ -947,12 +1064,41 @@ def resize_instance_group(gcp,
def patch_url_map_backend_service(gcp, backend_service):
'''change url_map's backend service'''
config = {
'defaultService':
backend_service.url,
'pathMatchers': [{
'name': _PATH_MATCHER_NAME,
'defaultService': backend_service.url,
'defaultRouteAction': None,
}]
}
logger.debug('Sending GCP request with body=%s', config)
result = gcp.compute.urlMaps().patch(
project=gcp.project, urlMap=gcp.url_map.name,
body=config).execute(num_retries=_GCP_API_RETRIES)
wait_for_global_operation(gcp, result['name'])
def patch_url_map_weighted_backend_services(gcp, servicesWithWeights):
'''
change url_map's only path matcher's default route action
to traffic splitting. serviceWithWeights is a map from service
to weights.
'''
weightedBackendServices = [{
'backendService': service.url,
'weight': w,
} for service, w in servicesWithWeights.items()]
logger.debug('patching route action to %s', weightedBackendServices)
config = {
'pathMatchers': [{
'name': _PATH_MATCHER_NAME,
'defaultService': None,
'defaultRouteAction': {
'weightedBackendServices': weightedBackendServices
}
}]
}
logger.debug('Sending GCP request with body=%s', config)
@ -1245,6 +1391,10 @@ try:
test_secondary_locality_gets_requests_on_primary_failure(
gcp, backend_service, instance_group,
secondary_zone_instance_group)
elif test_case == 'traffic_splitting':
test_traffic_splitting(gcp, backend_service, instance_group,
alternate_backend_service,
same_zone_instance_group)
else:
logger.error('Unknown test case: %s', test_case)
sys.exit(1)

Loading…
Cancel
Save