diff --git a/tools/run_tests/run_xds_tests.py b/tools/run_tests/run_xds_tests.py index 303deef98e4..591e8004624 100755 --- a/tools/run_tests/run_xds_tests.py +++ b/tools/run_tests/run_xds_tests.py @@ -966,6 +966,12 @@ def test_header_matching(gcp, original_backend_service, instance_group, set_validate_for_proxyless(gcp, True) +def get_serving_status(instance, service_port): + with grpc.insecure_channel('%s:%d' % (instance, service_port)) as channel: + health_stub = health_pb2_grpc.HealthStub(channel) + return health_stub.Check(health_pb2.HealthCheckRequest()) + + def set_serving_status(instances, service_port, serving): logger.info('setting %s serving status to %s', instances, serving) for instance in instances: @@ -973,16 +979,13 @@ def set_serving_status(instances, service_port, serving): (instance, service_port)) as channel: logger.info('setting %s serving status to %s', instance, serving) stub = test_pb2_grpc.XdsUpdateHealthServiceStub(channel) - health_stub = health_pb2_grpc.HealthStub(channel) - retry_count = 5 for i in range(5): if serving: stub.SetServing(empty_pb2.Empty()) else: stub.SetNotServing(empty_pb2.Empty()) - serving_status = health_stub.Check( - health_pb2.HealthCheckRequest()) + serving_status = get_serving_status(instance, service_port) logger.info('got instance service status %s', serving_status) want_status = health_pb2.HealthCheckResponse.SERVING if serving else health_pb2.HealthCheckResponse.NOT_SERVING if serving_status.status == want_status: @@ -1550,14 +1553,23 @@ def wait_for_healthy_backends(gcp, timeout_sec=_WAIT_FOR_BACKEND_SEC): start_time = time.time() config = {'group': instance_group.url} - expected_size = len(get_instance_names(gcp, instance_group)) + instance_names = get_instance_names(gcp, instance_group) + expected_size = len(instance_names) while time.time() - start_time <= timeout_sec: + for instance_name in instance_names: + try: + status = get_serving_status(instance_name, gcp.service_port) + logger.info('serving status response from %s: %s', + instance_name, status) + except grpc.RpcError as rpc_error: + logger.info('checking serving status of %s failed: %s', + instance_name, rpc_error) result = gcp.compute.backendServices().getHealth( project=gcp.project, backendService=backend_service.name, body=config).execute(num_retries=_GCP_API_RETRIES) if 'healthStatus' in result: - logger.info('received healthStatus: %s', result['healthStatus']) + logger.info('received GCP healthStatus: %s', result['healthStatus']) healthy = True for instance in result['healthStatus']: if instance['healthState'] != 'HEALTHY': @@ -1565,7 +1577,9 @@ def wait_for_healthy_backends(gcp, break if healthy and expected_size == len(result['healthStatus']): return - time.sleep(2) + else: + logger.info('no healthStatus received from GCP') + time.sleep(5) raise Exception('Not all backends became healthy within %d seconds: %s' % (timeout_sec, result))