Merge pull request #22763 from ericgribkoff/fail_on_failure_flag

Add --fail_on_failed_rpc flag to xds tests
pull/22778/head
Eric Gribkoff 5 years ago committed by GitHub
commit 04277beda6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      doc/xds-test-descriptions.md
  2. 16
      test/cpp/interop/xds_interop_client.cc
  3. 2
      tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh
  4. 30
      tools/run_tests/run_xds_tests.py

@ -25,6 +25,9 @@ The code for the xDS test client can be at:
Clients should accept these arguments: Clients should accept these arguments:
* --fail_on_failed_rpcs=BOOL
* If true, the client should exit with a non-zero return code if any RPCs
fail. Default is false.
* --num_channels=CHANNELS * --num_channels=CHANNELS
* The number of channels to create to the server. * The number of channels to create to the server.
* --qps=QPS * --qps=QPS
@ -88,6 +91,7 @@ Client parameters:
1. --num_channels=1 1. --num_channels=1
1. --qps=10 1. --qps=10
1. --fail_on_failed_rpc=true
Load balancer configuration: Load balancer configuration:
@ -106,6 +110,7 @@ Client parameters:
1. --num_channels=1 1. --num_channels=1
1. --qps=10 1. --qps=10
1. --fail_on_failed_rpc=true
Load balancer configuration: Load balancer configuration:
@ -220,6 +225,7 @@ Client parameters:
1. --num_channels=1 1. --num_channels=1
1. --qps=10 1. --qps=10
1. --fail_on_failed_rpc=true
Load balancer configuration: Load balancer configuration:
@ -268,6 +274,7 @@ Client parameters:
1. --num_channels=1 1. --num_channels=1
1. --qps=10 1. --qps=10
1. --fail_on_failed_rpc=true
Load balancer configuration: Load balancer configuration:

@ -38,10 +38,11 @@
#include "test/core/util/test_config.h" #include "test/core/util/test_config.h"
#include "test/cpp/util/test_config.h" #include "test/cpp/util/test_config.h"
DEFINE_bool(fail_on_failed_rpc, false, "Fail client if any RPCs fail.");
DEFINE_int32(num_channels, 1, "Number of channels."); DEFINE_int32(num_channels, 1, "Number of channels.");
DEFINE_bool(print_response, false, "Write RPC response to stdout."); DEFINE_bool(print_response, false, "Write RPC response to stdout.");
DEFINE_int32(qps, 1, "Qps per channel."); DEFINE_int32(qps, 1, "Qps per channel.");
DEFINE_int32(rpc_timeout_sec, 10, "Per RPC timeout seconds."); DEFINE_int32(rpc_timeout_sec, 30, "Per RPC timeout seconds.");
DEFINE_string(server, "localhost:50051", "Address of server."); DEFINE_string(server, "localhost:50051", "Address of server.");
DEFINE_int32(stats_port, 50052, DEFINE_int32(stats_port, 50052,
"Port to expose peer distribution stats service."); "Port to expose peer distribution stats service.");
@ -155,14 +156,19 @@ class TestClient {
} }
} }
if (!call->status.ok()) {
if (FLAGS_print_response || FLAGS_fail_on_failed_rpc) {
std::cout << "RPC failed: " << call->status.error_code() << ": "
<< call->status.error_message() << std::endl;
}
if (FLAGS_fail_on_failed_rpc) {
abort();
}
} else {
if (FLAGS_print_response) { if (FLAGS_print_response) {
if (call->status.ok()) {
std::cout << "Greeting: Hello world, this is " std::cout << "Greeting: Hello world, this is "
<< call->response.hostname() << ", from " << call->response.hostname() << ", from "
<< call->context.peer() << std::endl; << call->context.peer() << std::endl;
} else {
std::cout << "RPC failed: " << call->status.error_code() << ": "
<< call->status.error_message() << std::endl;
} }
} }

@ -56,4 +56,4 @@ GRPC_VERBOSITY=debug GRPC_TRACE=xds_client,xds_resolver,cds_lb,eds_lb,priority_l
--path_to_server_binary=/java_server/grpc-java/interop-testing/build/install/grpc-interop-testing/bin/xds-test-server \ --path_to_server_binary=/java_server/grpc-java/interop-testing/build/install/grpc-interop-testing/bin/xds-test-server \
--gcp_suffix=$(date '+%s') \ --gcp_suffix=$(date '+%s') \
--verbose \ --verbose \
--client_cmd='bazel-bin/test/cpp/interop/xds_interop_client --server=xds-experimental:///{server_uri} --stats_port={stats_port} --qps={qps}' --client_cmd='bazel-bin/test/cpp/interop/xds_interop_client --server=xds-experimental:///{server_uri} --stats_port={stats_port} --qps={qps} {fail_on_failed_rpc}'

@ -199,6 +199,7 @@ _INSTANCE_GROUP_SIZE = args.instance_group_size
_NUM_TEST_RPCS = 10 * args.qps _NUM_TEST_RPCS = 10 * args.qps
_WAIT_FOR_STATS_SEC = 180 _WAIT_FOR_STATS_SEC = 180
_WAIT_FOR_URL_MAP_PATCH_SEC = 300 _WAIT_FOR_URL_MAP_PATCH_SEC = 300
_CONNECTION_TIMEOUT_SEC = 60
_GCP_API_RETRIES = 5 _GCP_API_RETRIES = 5
_BOOTSTRAP_TEMPLATE = """ _BOOTSTRAP_TEMPLATE = """
{{ {{
@ -221,6 +222,10 @@ _BOOTSTRAP_TEMPLATE = """
] ]
}}] }}]
}}""" % (args.network.split('/')[-1], args.zone, args.xds_server) }}""" % (args.network.split('/')[-1], args.zone, args.xds_server)
_TESTS_TO_FAIL_ON_RPC_FAILURE = [
'change_backend_service', 'new_instance_group_receives_traffic',
'ping_pong', 'round_robin'
]
_TESTS_USING_SECONDARY_IG = [ _TESTS_USING_SECONDARY_IG = [
'secondary_locality_gets_no_requests_on_partial_primary_failure', 'secondary_locality_gets_no_requests_on_partial_primary_failure',
'secondary_locality_gets_requests_on_primary_failure' 'secondary_locality_gets_requests_on_primary_failure'
@ -249,15 +254,12 @@ def get_client_stats(num_rpcs, timeout_sec):
request = messages_pb2.LoadBalancerStatsRequest() request = messages_pb2.LoadBalancerStatsRequest()
request.num_rpcs = num_rpcs request.num_rpcs = num_rpcs
request.timeout_sec = timeout_sec request.timeout_sec = timeout_sec
rpc_timeout = timeout_sec * 2 # Allow time for connection establishment rpc_timeout = timeout_sec + _CONNECTION_TIMEOUT_SEC
try:
response = stub.GetClientStats(request, response = stub.GetClientStats(request,
wait_for_ready=True, wait_for_ready=True,
timeout=rpc_timeout) timeout=rpc_timeout)
logger.debug('Invoked GetClientStats RPC: %s', response) logger.debug('Invoked GetClientStats RPC: %s', response)
return response return response
except grpc.RpcError as rpc_error:
logger.exception('GetClientStats RPC failed')
def _verify_rpcs_to_given_backends(backends, timeout_sec, num_rpcs, def _verify_rpcs_to_given_backends(backends, timeout_sec, num_rpcs,
@ -1178,7 +1180,6 @@ try:
wait_for_healthy_backends(gcp, backend_service, instance_group) wait_for_healthy_backends(gcp, backend_service, instance_group)
if args.test_case: if args.test_case:
if gcp.service_port == _DEFAULT_SERVICE_PORT: if gcp.service_port == _DEFAULT_SERVICE_PORT:
server_uri = service_host_name server_uri = service_host_name
else: else:
@ -1192,10 +1193,6 @@ try:
node_id=socket.gethostname()).encode('utf-8')) node_id=socket.gethostname()).encode('utf-8'))
bootstrap_path = bootstrap_file.name bootstrap_path = bootstrap_file.name
client_env = dict(os.environ, GRPC_XDS_BOOTSTRAP=bootstrap_path) client_env = dict(os.environ, GRPC_XDS_BOOTSTRAP=bootstrap_path)
client_cmd = shlex.split(
args.client_cmd.format(server_uri=server_uri,
stats_port=args.stats_port,
qps=args.qps))
test_results = {} test_results = {}
failed_tests = [] failed_tests = []
@ -1207,6 +1204,15 @@ try:
test_log_filename = os.path.join(log_dir, _SPONGE_LOG_NAME) test_log_filename = os.path.join(log_dir, _SPONGE_LOG_NAME)
test_log_file = open(test_log_filename, 'w+') test_log_file = open(test_log_filename, 'w+')
client_process = None client_process = None
if test_case in _TESTS_TO_FAIL_ON_RPC_FAILURE:
fail_on_failed_rpc = '--fail_on_failed_rpc=true'
else:
fail_on_failed_rpc = '--fail_on_failed_rpc=false'
client_cmd = shlex.split(
args.client_cmd.format(server_uri=server_uri,
stats_port=args.stats_port,
qps=args.qps,
fail_on_failed_rpc=fail_on_failed_rpc))
try: try:
client_process = subprocess.Popen(client_cmd, client_process = subprocess.Popen(client_cmd,
env=client_env, env=client_env,
@ -1242,6 +1248,10 @@ try:
else: else:
logger.error('Unknown test case: %s', test_case) logger.error('Unknown test case: %s', test_case)
sys.exit(1) sys.exit(1)
if client_process.poll() is not None:
raise Exception(
'Client process exited prematurely with exit code %d' %
client_process.returncode)
result.state = 'PASSED' result.state = 'PASSED'
result.returncode = 0 result.returncode = 0
except Exception as e: except Exception as e:
@ -1250,7 +1260,7 @@ try:
result.state = 'FAILED' result.state = 'FAILED'
result.message = str(e) result.message = str(e)
finally: finally:
if client_process: if client_process and not client_process.returncode:
client_process.terminate() client_process.terminate()
test_log_file.close() test_log_file.close()
# Workaround for Python 3, as report_utils will invoke decode() on # Workaround for Python 3, as report_utils will invoke decode() on

Loading…
Cancel
Save