diff --git a/doc/xds-test-descriptions.md b/doc/xds-test-descriptions.md index 999238a162e..46414384e95 100644 --- a/doc/xds-test-descriptions.md +++ b/doc/xds-test-descriptions.md @@ -25,6 +25,9 @@ The code for the xDS test client can be at: Clients should accept these arguments: +* --fail_on_failed_rpcs=BOOL + * If true, the client should exit with a non-zero return code if any RPCs + fail. Default is false. * --num_channels=CHANNELS * The number of channels to create to the server. * --qps=QPS @@ -88,6 +91,7 @@ Client parameters: 1. --num_channels=1 1. --qps=10 +1. --fail_on_failed_rpc=true Load balancer configuration: @@ -106,6 +110,7 @@ Client parameters: 1. --num_channels=1 1. --qps=10 +1. --fail_on_failed_rpc=true Load balancer configuration: @@ -220,6 +225,7 @@ Client parameters: 1. --num_channels=1 1. --qps=10 +1. --fail_on_failed_rpc=true Load balancer configuration: @@ -268,6 +274,7 @@ Client parameters: 1. --num_channels=1 1. --qps=10 +1. --fail_on_failed_rpc=true Load balancer configuration: diff --git a/test/cpp/interop/xds_interop_client.cc b/test/cpp/interop/xds_interop_client.cc index 6cb32b2b0e8..6d7f3c21690 100644 --- a/test/cpp/interop/xds_interop_client.cc +++ b/test/cpp/interop/xds_interop_client.cc @@ -38,10 +38,11 @@ #include "test/core/util/test_config.h" #include "test/cpp/util/test_config.h" +DEFINE_bool(fail_on_failed_rpc, false, "Fail client if any RPCs fail."); DEFINE_int32(num_channels, 1, "Number of channels."); DEFINE_bool(print_response, false, "Write RPC response to stdout."); DEFINE_int32(qps, 1, "Qps per channel."); -DEFINE_int32(rpc_timeout_sec, 10, "Per RPC timeout seconds."); +DEFINE_int32(rpc_timeout_sec, 30, "Per RPC timeout seconds."); DEFINE_string(server, "localhost:50051", "Address of server."); DEFINE_int32(stats_port, 50052, "Port to expose peer distribution stats service."); @@ -155,14 +156,19 @@ class TestClient { } } - if (FLAGS_print_response) { - if (call->status.ok()) { + if (!call->status.ok()) { + if (FLAGS_print_response || FLAGS_fail_on_failed_rpc) { + std::cout << "RPC failed: " << call->status.error_code() << ": " + << call->status.error_message() << std::endl; + } + if (FLAGS_fail_on_failed_rpc) { + abort(); + } + } else { + if (FLAGS_print_response) { std::cout << "Greeting: Hello world, this is " << call->response.hostname() << ", from " << call->context.peer() << std::endl; - } else { - std::cout << "RPC failed: " << call->status.error_code() << ": " - << call->status.error_message() << std::endl; } } diff --git a/tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh b/tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh index 943ba2b1981..54ade769e75 100755 --- a/tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh +++ b/tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh @@ -56,4 +56,4 @@ GRPC_VERBOSITY=debug GRPC_TRACE=xds_client,xds_resolver,cds_lb,eds_lb,priority_l --path_to_server_binary=/java_server/grpc-java/interop-testing/build/install/grpc-interop-testing/bin/xds-test-server \ --gcp_suffix=$(date '+%s') \ --verbose \ - --client_cmd='bazel-bin/test/cpp/interop/xds_interop_client --server=xds-experimental:///{server_uri} --stats_port={stats_port} --qps={qps}' + --client_cmd='bazel-bin/test/cpp/interop/xds_interop_client --server=xds-experimental:///{server_uri} --stats_port={stats_port} --qps={qps} {fail_on_failed_rpc}' diff --git a/tools/run_tests/run_xds_tests.py b/tools/run_tests/run_xds_tests.py index 8f1d9b151f8..4dc18494e6b 100755 --- a/tools/run_tests/run_xds_tests.py +++ b/tools/run_tests/run_xds_tests.py @@ -199,6 +199,7 @@ _INSTANCE_GROUP_SIZE = args.instance_group_size _NUM_TEST_RPCS = 10 * args.qps _WAIT_FOR_STATS_SEC = 180 _WAIT_FOR_URL_MAP_PATCH_SEC = 300 +_CONNECTION_TIMEOUT_SEC = 60 _GCP_API_RETRIES = 5 _BOOTSTRAP_TEMPLATE = """ {{ @@ -221,6 +222,10 @@ _BOOTSTRAP_TEMPLATE = """ ] }}] }}""" % (args.network.split('/')[-1], args.zone, args.xds_server) +_TESTS_TO_FAIL_ON_RPC_FAILURE = [ + 'change_backend_service', 'new_instance_group_receives_traffic', + 'ping_pong', 'round_robin' +] _TESTS_USING_SECONDARY_IG = [ 'secondary_locality_gets_no_requests_on_partial_primary_failure', 'secondary_locality_gets_requests_on_primary_failure' @@ -249,15 +254,12 @@ def get_client_stats(num_rpcs, timeout_sec): request = messages_pb2.LoadBalancerStatsRequest() request.num_rpcs = num_rpcs request.timeout_sec = timeout_sec - rpc_timeout = timeout_sec * 2 # Allow time for connection establishment - try: - response = stub.GetClientStats(request, - wait_for_ready=True, - timeout=rpc_timeout) - logger.debug('Invoked GetClientStats RPC: %s', response) - return response - except grpc.RpcError as rpc_error: - logger.exception('GetClientStats RPC failed') + rpc_timeout = timeout_sec + _CONNECTION_TIMEOUT_SEC + response = stub.GetClientStats(request, + wait_for_ready=True, + timeout=rpc_timeout) + logger.debug('Invoked GetClientStats RPC: %s', response) + return response def _verify_rpcs_to_given_backends(backends, timeout_sec, num_rpcs, @@ -1178,7 +1180,6 @@ try: wait_for_healthy_backends(gcp, backend_service, instance_group) if args.test_case: - if gcp.service_port == _DEFAULT_SERVICE_PORT: server_uri = service_host_name else: @@ -1192,10 +1193,6 @@ try: node_id=socket.gethostname()).encode('utf-8')) bootstrap_path = bootstrap_file.name client_env = dict(os.environ, GRPC_XDS_BOOTSTRAP=bootstrap_path) - client_cmd = shlex.split( - args.client_cmd.format(server_uri=server_uri, - stats_port=args.stats_port, - qps=args.qps)) test_results = {} failed_tests = [] @@ -1207,6 +1204,15 @@ try: test_log_filename = os.path.join(log_dir, _SPONGE_LOG_NAME) test_log_file = open(test_log_filename, 'w+') client_process = None + if test_case in _TESTS_TO_FAIL_ON_RPC_FAILURE: + fail_on_failed_rpc = '--fail_on_failed_rpc=true' + else: + fail_on_failed_rpc = '--fail_on_failed_rpc=false' + client_cmd = shlex.split( + args.client_cmd.format(server_uri=server_uri, + stats_port=args.stats_port, + qps=args.qps, + fail_on_failed_rpc=fail_on_failed_rpc)) try: client_process = subprocess.Popen(client_cmd, env=client_env, @@ -1242,6 +1248,10 @@ try: else: logger.error('Unknown test case: %s', test_case) sys.exit(1) + if client_process.poll() is not None: + raise Exception( + 'Client process exited prematurely with exit code %d' % + client_process.returncode) result.state = 'PASSED' result.returncode = 0 except Exception as e: @@ -1250,7 +1260,7 @@ try: result.state = 'FAILED' result.message = str(e) finally: - if client_process: + if client_process and not client_process.returncode: client_process.terminate() test_log_file.close() # Workaround for Python 3, as report_utils will invoke decode() on