Add additional xds test cases

pull/22193/head
Eric Gribkoff 5 years ago
parent 1576de5d35
commit ac6a79a108
  1. 261
      tools/run_tests/run_xds_tests.py

@ -53,13 +53,20 @@ argp.add_argument(
default='', default='',
help='Optional suffix for all generated GCP resource names. Useful to ' help='Optional suffix for all generated GCP resource names. Useful to '
'ensure distinct names across test runs.') 'ensure distinct names across test runs.')
argp.add_argument('--test_case', argp.add_argument(
default=None, '--test_case',
choices=[ default=None,
'all', choices=[
'ping_pong', 'all',
'round_robin', 'backends_restart',
]) 'change_backend_service',
'new_instance_group_receives_traffic',
'ping_pong',
'remove_instance_group',
'round_robin',
'secondary_locality_gets_requests_on_primary_failure',
'secondary_locality_gets_no_requests_on_partial_primary_failure',
])
argp.add_argument( argp.add_argument(
'--client_cmd', '--client_cmd',
default=None, default=None,
@ -202,6 +209,78 @@ def wait_until_only_given_instances_receive_load(backends,
raise Exception(error_msg) raise Exception(error_msg)
def test_backends_restart(gcp, backend_service, instance_group):
instance_names = get_instance_names(gcp, instance_group)
num_instances = len(instance_names)
start_time = time.time()
wait_until_only_given_instances_receive_load(instance_names,
_WAIT_FOR_STATS_SEC)
stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
resize_instance_group(gcp, instance_group, 0)
wait_until_only_given_instances_receive_load([],
_WAIT_FOR_BACKEND_SEC,
allow_failures=True)
resize_instance_group(gcp, instance_group, num_instances)
wait_for_healthy_backends(gcp, backend_service, instance_group)
new_instance_names = get_instance_names(gcp, instance_group)
wait_until_only_given_instances_receive_load(new_instance_names,
_WAIT_FOR_BACKEND_SEC)
new_stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
original_distribution = list(stats.rpcs_by_peer.values())
original_distribution.sort()
new_distribution = list(new_stats.rpcs_by_peer.values())
new_distribution.sort()
if original_distribution != new_distribution:
raise Exception('Distributions do not match: ', stats, new_stats)
def test_change_backend_service(gcp, original_backend_service, instance_group,
alternate_backend_service,
same_zone_instance_group):
original_backend_instances = get_instance_names(gcp, instance_group)
alternate_backend_instances = get_instance_names(gcp,
same_zone_instance_group)
patch_backend_instances(gcp, alternate_backend_service,
[same_zone_instance_group])
wait_for_healthy_backends(gcp, original_backend_instances, instance_group)
wait_for_healthy_backends(gcp, alternate_backend_service,
same_zone_instance_group)
wait_until_only_given_instances_receive_load(original_backend_instances,
_WAIT_FOR_STATS_SEC)
try:
patch_url_map_backend_service(gcp, alternate_backend_service)
stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
if stats.num_failures > 0:
raise Exception('Unexpected failure: %s', stats)
wait_until_only_given_instances_receive_load(
alternate_backend_instances, _WAIT_FOR_STATS_SEC)
finally:
patch_url_map_backend_service(gcp, original_backend_service)
patch_backend_instances(gcp, alternate_backend_service, [])
def test_new_instance_group_receives_traffic(gcp, backend_service,
instance_group,
same_zone_instance_group):
instance_names = get_instance_names(gcp, instance_group)
wait_until_only_given_instances_receive_load(instance_names,
_WAIT_FOR_STATS_SEC)
try:
patch_backend_instances(gcp,
backend_service,
[instance_group, same_zone_instance_group],
balancing_mode='RATE')
wait_for_healthy_backends(gcp, backend_service, instance_group)
wait_for_healthy_backends(gcp, backend_service,
same_zone_instance_group)
combined_instance_names = instance_names + get_instance_names(
gcp, same_zone_instance_group)
wait_until_only_given_instances_receive_load(combined_instance_names,
_WAIT_FOR_BACKEND_SEC)
finally:
patch_backend_instances(gcp, backend_service, [instance_group])
def test_ping_pong(gcp, backend_service, instance_group): def test_ping_pong(gcp, backend_service, instance_group):
wait_for_healthy_backends(gcp, backend_service, instance_group) wait_for_healthy_backends(gcp, backend_service, instance_group)
instance_names = get_instance_names(gcp, instance_group) instance_names = get_instance_names(gcp, instance_group)
@ -222,6 +301,32 @@ def test_ping_pong(gcp, backend_service, instance_group):
raise Exception(error_msg) raise Exception(error_msg)
def test_remove_instance_group(gcp, backend_service, instance_group,
same_zone_instance_group):
try:
patch_backend_instances(gcp,
backend_service,
[instance_group, same_zone_instance_group],
balancing_mode='RATE')
wait_for_healthy_backends(gcp, backend_service, instance_group)
wait_for_healthy_backends(gcp, backend_service,
same_zone_instance_group)
instance_names = get_instance_names(gcp, instance_group)
same_zone_instance_names = get_instance_names(gcp,
same_zone_instance_group)
wait_until_only_given_instances_receive_load(
instance_names + same_zone_instance_names, _WAIT_FOR_BACKEND_SEC)
patch_backend_instances(gcp,
backend_service, [same_zone_instance_group],
balancing_mode='RATE')
wait_until_only_given_instances_receive_load(same_zone_instance_names,
_WAIT_FOR_BACKEND_SEC)
finally:
patch_backend_instances(gcp, backend_service, [instance_group])
wait_until_only_given_instances_receive_load(instance_names,
_WAIT_FOR_BACKEND_SEC)
def test_round_robin(gcp, backend_service, instance_group): def test_round_robin(gcp, backend_service, instance_group):
wait_for_healthy_backends(gcp, backend_service, instance_group) wait_for_healthy_backends(gcp, backend_service, instance_group)
instance_names = get_instance_names(gcp, instance_group) instance_names = get_instance_names(gcp, instance_group)
@ -242,6 +347,61 @@ def test_round_robin(gcp, backend_service, instance_group):
'for instance %s (%s)', threshold, instance, stats) 'for instance %s (%s)', threshold, instance, stats)
def test_secondary_locality_gets_no_requests_on_partial_primary_failure(
gcp, backend_service, primary_instance_group,
secondary_zone_instance_group):
try:
patch_backend_instances(
gcp, backend_service,
[primary_instance_group, secondary_zone_instance_group])
wait_for_healthy_backends(gcp, backend_service, primary_instance_group)
wait_for_healthy_backends(gcp, backend_service,
secondary_zone_instance_group)
primary_instance_names = get_instance_names(gcp, instance_group)
secondary_instance_names = get_instance_names(
gcp, secondary_zone_instance_group)
wait_until_only_given_instances_receive_load(primary_instance_names,
_WAIT_FOR_STATS_SEC)
original_size = len(primary_instance_names)
resize_instance_group(gcp, primary_instance_group, original_size - 1)
remaining_instance_names = get_instance_names(gcp,
primary_instance_group)
wait_until_only_given_instances_receive_load(remaining_instance_names,
_WAIT_FOR_BACKEND_SEC)
finally:
patch_backend_instances(gcp, backend_service, [primary_instance_group])
resize_instance_group(gcp, primary_instance_group, original_size)
def test_secondary_locality_gets_requests_on_primary_failure(
gcp, backend_service, primary_instance_group,
secondary_zone_instance_group):
try:
patch_backend_instances(
gcp, backend_service,
[primary_instance_group, secondary_zone_instance_group])
wait_for_healthy_backends(gcp, backend_service, primary_instance_group)
wait_for_healthy_backends(gcp, backend_service,
secondary_zone_instance_group)
primary_instance_names = get_instance_names(gcp, instance_group)
secondary_instance_names = get_instance_names(
gcp, secondary_zone_instance_group)
wait_until_only_given_instances_receive_load(primary_instance_names,
_WAIT_FOR_BACKEND_SEC)
original_size = len(primary_instance_names)
resize_instance_group(gcp, primary_instance_group, 0)
wait_until_only_given_instances_receive_load(secondary_instance_names,
_WAIT_FOR_BACKEND_SEC)
resize_instance_group(gcp, primary_instance_group, original_size)
new_instance_names = get_instance_names(gcp, primary_instance_group)
wait_for_healthy_backends(gcp, backend_service, primary_instance_group)
wait_until_only_given_instances_receive_load(new_instance_names,
_WAIT_FOR_BACKEND_SEC)
finally:
patch_backend_instances(gcp, backend_service, [primary_instance_group])
def create_instance_template(gcp, name, network, source_image): def create_instance_template(gcp, name, network, source_image):
config = { config = {
'name': name, 'name': name,
@ -496,6 +656,58 @@ def delete_instance_template(gcp):
logger.info('Delete failed: %s', http_error) logger.info('Delete failed: %s', http_error)
def patch_backend_instances(gcp,
backend_service,
instance_groups,
balancing_mode='UTILIZATION'):
config = {
'backends': [{
'group': instance_group.url,
'balancingMode': balancing_mode,
'maxRate': 1 if balancing_mode == 'RATE' else None
} for instance_group in instance_groups],
}
result = gcp.compute.backendServices().patch(
project=gcp.project, backendService=backend_service.name,
body=config).execute()
wait_for_global_operation(gcp, result['name'])
def resize_instance_group(gcp, instance_group, new_size, timeout_sec=120):
result = gcp.compute.instanceGroupManagers().resize(
project=gcp.project,
zone=instance_group.zone,
instanceGroupManager=instance_group.name,
size=new_size).execute()
wait_for_zone_operation(gcp,
instance_group.zone,
result['name'],
timeout_sec=360)
start_time = time.time()
while True:
current_size = len(get_instance_names(gcp, instance_group))
if current_size == new_size:
break
if time.time() - start_time > timeout_sec:
raise Exception('Failed to resize primary instance group')
time.sleep(1)
def patch_url_map_backend_service(gcp, backend_service):
config = {
'defaultService':
backend_service.url,
'pathMatchers': [{
'name': _PATH_MATCHER_NAME,
'defaultService': backend_service.url,
}]
}
result = gcp.compute.urlMaps().patch(project=gcp.project,
urlMap=gcp.url_map.name,
body=config).execute()
wait_for_global_operation(gcp, result['name'])
def wait_for_global_operation(gcp, def wait_for_global_operation(gcp,
operation, operation,
timeout_sec=_WAIT_FOR_OPERATION_SEC): timeout_sec=_WAIT_FOR_OPERATION_SEC):
@ -665,8 +877,7 @@ try:
backend_service = add_backend_service(gcp, backend_service_name) backend_service = add_backend_service(gcp, backend_service_name)
alternate_backend_service = add_backend_service( alternate_backend_service = add_backend_service(
gcp, alternate_backend_service_name) gcp, alternate_backend_service_name)
create_url_map(gcp, url_map_name, gcp.backend_services[0], create_url_map(gcp, url_map_name, backend_service, service_host_name)
service_host_name)
create_target_http_proxy(gcp, target_http_proxy_name) create_target_http_proxy(gcp, target_http_proxy_name)
potential_service_ports = list(args.service_port_range) potential_service_ports = list(args.service_port_range)
random.shuffle(potential_service_ports) random.shuffle(potential_service_ports)
@ -766,12 +977,44 @@ try:
client_process = start_xds_client(cmd, gcp.service_port) client_process = start_xds_client(cmd, gcp.service_port)
if args.test_case == 'all': if args.test_case == 'all':
test_backends_restart(gcp, backend_service, instance_group)
test_change_backend_service(gcp, backend_service, instance_group,
alternate_backend_service,
same_zone_instance_group)
test_new_instance_group_receives_traffic(gcp, backend_service,
instance_group,
same_zone_instance_group)
test_ping_pong(gcp, backend_service, instance_group) test_ping_pong(gcp, backend_service, instance_group)
test_remove_instance_group(gcp, backend_service, instance_group,
same_zone_instance_group)
test_round_robin(gcp, backend_service, instance_group) test_round_robin(gcp, backend_service, instance_group)
test_secondary_locality_gets_no_requests_on_partial_primary_failure(
gcp, backend_service, instance_group, secondary_zone_instance_group)
test_secondary_locality_gets_requests_on_primary_failure(
gcp, backend_service, instance_group, secondary_zone_instance_group)
elif args.test_case == 'backends_restart':
test_backends_restart(gcp, backend_service, instance_group)
elif args.test_case == 'change_backend_service':
test_change_backend_service(gcp, backend_service, instance_group,
alternate_backend_service,
same_zone_instance_group)
elif args.test_case == 'new_instance_group_receives_traffic':
test_new_instance_group_receives_traffic(gcp, backend_service,
instance_group,
same_zone_instance_group)
elif args.test_case == 'ping_pong': elif args.test_case == 'ping_pong':
test_ping_pong(gcp, backend_service, instance_group) test_ping_pong(gcp, backend_service, instance_group)
elif args.test_case == 'remove_instance_group':
test_remove_instance_group(gcp, backend_service, instance_group,
same_zone_instance_group)
elif args.test_case == 'round_robin': elif args.test_case == 'round_robin':
test_round_robin(gcp, backend_service, instance_group) test_round_robin(gcp, backend_service, instance_group)
elif args.test_case == 'secondary_locality_gets_no_requests_on_partial_primary_failure':
test_secondary_locality_gets_no_requests_on_partial_primary_failure(
gcp, backend_service, instance_group, secondary_zone_instance_group)
elif args.test_case == 'secondary_locality_gets_requests_on_primary_failure':
test_secondary_locality_gets_requests_on_primary_failure(
gcp, backend_service, instance_group, secondary_zone_instance_group)
else: else:
logger.error('Unknown test case: %s', args.test_case) logger.error('Unknown test case: %s', args.test_case)
sys.exit(1) sys.exit(1)

Loading…
Cancel
Save