run_xds_tests: add fault_injection tests (#25641)

pull/25661/head
Doug Fawley 4 years ago committed by GitHub
parent 5139a012e7
commit 6276b66490
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 183
      tools/run_tests/run_xds_tests.py

@ -69,10 +69,11 @@ _ADDITIONAL_TEST_CASES = [
'header_matching',
'circuit_breaking',
'timeout',
'fault_injection',
]
# Test cases that require the V3 API. Skipped in older runs.
_V3_TEST_CASES = frozenset(['timeout'])
_V3_TEST_CASES = frozenset(['timeout', 'fault_injection'])
# Test cases that require the alpha API. Skipped for stable API runs.
_ALPHA_TEST_CASES = frozenset(['timeout'])
@ -1574,6 +1575,7 @@ def test_timeout(gcp, original_backend_service, instance_group):
testcase_name, rpc, status, qty, want)
success = False
if success:
logger.info('success')
break
logger.info('%s attempt %d failed', testcase_name, i)
before_stats = after_stats
@ -1586,6 +1588,182 @@ def test_timeout(gcp, original_backend_service, instance_group):
patch_url_map_backend_service(gcp, original_backend_service)
def test_fault_injection(gcp, original_backend_service, instance_group):
logger.info('Running test_fault_injection')
logger.info('waiting for original backends to become healthy')
wait_for_healthy_backends(gcp, original_backend_service, instance_group)
testcase_header = 'fi_testcase'
def _route(pri, name, fi_policy):
return {
'priority': pri,
'matchRules': [{
'prefixMatch':
'/',
'headerMatches': [{
'headerName': testcase_header,
'exactMatch': name,
}],
}],
'service': original_backend_service.url,
'routeAction': {
'faultInjectionPolicy': fi_policy
},
}
def _abort(pct):
return {
'abort': {
'httpStatus': 401,
'percentage': pct,
}
}
def _delay(pct):
return {
'delay': {
'fixedDelay': {
'seconds': '20'
},
'percentage': pct,
}
}
zero_route = _abort(0)
zero_route.update(_delay(0))
route_rules = [
_route(0, 'zero_percent_fault_injection', zero_route),
_route(1, 'always_delay', _delay(100)),
_route(2, 'always_abort', _abort(100)),
_route(3, 'delay_half', _delay(50)),
_route(4, 'abort_half', _abort(50)),
{
'priority': 5,
'matchRules': [{
'prefixMatch': '/'
}],
'service': original_backend_service.url,
},
]
set_validate_for_proxyless(gcp, False)
patch_url_map_backend_service(gcp,
original_backend_service,
route_rules=route_rules)
# A list of tuples (testcase_name, {client_config}, {code: percent}). Each
# test case will set the testcase_header with the testcase_name for routing
# to the appropriate config for the case, defined above.
test_cases = [
(
'zero_percent_fault_injection',
{},
{
0: 1
}, # OK
),
(
'non_matching_fault_injection', # Not in route_rules, above.
{},
{
0: 1
}, # OK
),
(
'always_delay',
{
'timeout_sec': 2
},
{
4: 1
}, # DEADLINE_EXCEEDED
),
(
'always_abort',
{},
{
16: 1
}, # UNAUTHENTICATED
),
(
'delay_half',
{
'timeout_sec': 2
},
{
4: .5,
0: .5
}, # DEADLINE_EXCEEDED / OK: 50% / 50%
),
(
'abort_half',
{},
{
16: .5,
0: .5
}, # UNAUTHENTICATED / OK: 50% / 50%
)
]
try:
for (testcase_name, client_config, expected_results) in test_cases:
logger.info('starting case %s', testcase_name)
client_config['metadata'] = [
(messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
testcase_header, testcase_name)
]
client_config['rpc_types'] = [
messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
]
configure_client(**client_config)
# wait a second to help ensure the client stops sending RPCs with
# the old config. We will make multiple attempts if it is failing,
# but this improves confidence that the test is valid if the
# previous client_config would lead to the same results.
time.sleep(1)
# Each attempt takes 10 seconds; 20 attempts is equivalent to 200
# second timeout.
attempt_count = 20
before_stats = get_client_accumulated_stats()
if not before_stats.stats_per_method:
raise ValueError(
'stats.stats_per_method is None, the interop client stats service does not support this test case'
)
for i in range(attempt_count):
logger.info('%s: attempt %d', testcase_name, i)
test_runtime_secs = 10
time.sleep(test_runtime_secs)
after_stats = get_client_accumulated_stats()
success = True
for status, pct in expected_results.items():
rpc = 'UNARY_CALL'
qty = (after_stats.stats_per_method[rpc].result[status] -
before_stats.stats_per_method[rpc].result[status])
want = pct * args.qps * test_runtime_secs
# Allow 10% deviation from expectation to reduce flakiness
VARIANCE_ALLOWED = 0.1
if abs(qty - want) > want * VARIANCE_ALLOWED:
logger.info('%s: failed due to %s[%s]: got %d want ~%d',
testcase_name, rpc, status, qty, want)
success = False
if success:
logger.info('success')
break
logger.info('%s attempt %d failed', testcase_name, i)
before_stats = after_stats
else:
raise Exception(
'%s: timeout waiting for expected results: %s; got %s' %
(testcase_name, expected_results,
after_stats.stats_per_method))
finally:
patch_url_map_backend_service(gcp, original_backend_service)
set_validate_for_proxyless(gcp, True)
def set_validate_for_proxyless(gcp, validate_for_proxyless):
if not gcp.alpha_compute:
logger.debug(
@ -2417,6 +2595,7 @@ try:
client_env['GRPC_XDS_BOOTSTRAP'] = bootstrap_path
client_env['GRPC_XDS_EXPERIMENTAL_CIRCUIT_BREAKING'] = 'true'
client_env['GRPC_XDS_EXPERIMENTAL_ENABLE_TIMEOUT'] = 'true'
client_env['GRPC_XDS_EXPERIMENTAL_FAULT_INJECTION'] = 'true'
test_results = {}
failed_tests = []
for test_case in args.test_case:
@ -2534,6 +2713,8 @@ try:
same_zone_instance_group)
elif test_case == 'timeout':
test_timeout(gcp, backend_service, instance_group)
elif test_case == 'fault_injection':
test_fault_injection(gcp, backend_service, instance_group)
else:
logger.error('Unknown test case: %s', test_case)
sys.exit(1)

Loading…
Cancel
Save