Reduce xds_end2end_test's error tolerance to 0.05 (#25738)

* WIP sync API with 1.6k threads doesn't work * Please don't fail * Fix the tests * Spread to DropTest * Code clean-up and extend delay RPC timeout * Address comments * Address comments * Deflake MSAN * Make MSAN more robust && normal runs faster * Address comments * Remove the clock change in fault injection filter * Polish comments && stricten the connection timeout
4 years ago · 13e04b4b9c
parent e372a63cf7
commit 13e04b4b9c
1 changed files with 285 additions and 272 deletions
--- a/test/cpp/end2end/xds_end2end_test.cc
+++ b/test/cpp/end2end/xds_end2end_test.cc
@ -1606,6 +1606,34 @@ grpc_millis NowFromCycleCounter() {
  return grpc_cycle_counter_to_millis_round_up(now);
 }

+// Returns the number of RPCs needed to pass error_tolerance at 99.99% chance.
+// Rolling dices in drop/fault-injection generates a binomial distribution (if
+// our code is not horribly wrong). Let's make "n" the number of samples, "p"
+// the probabilty. If we have np>5 & n(1-p)>5, we can approximately treat the
+// binomial distribution as a normal distribution.
+//
+// For normal distribution, we can easily look up how many standard deviation we
+// need to reach 99.99%. Based on Wiki's table
+// https://en.wikipedia.org/wiki/Standard_normal_table, we need 3.89 sigma
+// (standard deviation) to cover the probability area of 99.99%. In another
+// word, for a sample with size "n" probability "p" error-tolerance "k", we want
+// the error always land within 3.89 sigma. The sigma of binominal distribution
+// and be computed as sqrt(np(1-p)). Hence, we have the equation:
+//
+//   kn <= 3.89 * sqrt(np(1-p))
+//
+// E.g., with p=0.5 k=0.1, n >= 378; with p=0.5 k=0.05, n >= 1513; with p=0.5
+// k=0.01, n >= 37830.
+size_t ComputeIdealNumRpcs(double p, double error_tolerance) {
+  GPR_ASSERT(p >= 0 && p <= 1);
+  size_t num_rpcs =
+      ceil(p * (1 - p) * 3.89 * 3.89 / error_tolerance / error_tolerance);
+  gpr_log(GPR_INFO,
+          "Sending %" PRIuPTR " RPCs for percentage=%.3f error_tolerance=%.3f",
+          num_rpcs, p, error_tolerance);
+  return num_rpcs;
+}
+
 // Channel arg pointer vtable for storing xDS channel args in the parent
 // channel's channel args.
 void* ChannelArgsArgCopy(void* p) {
@ -2557,6 +2585,49 @@ class XdsEnd2endTest : public ::testing::TestWithParam<TestType> {
    Status status_;
  };

+  struct ConcurrentRpc {
+    ClientContext context;
+    Status status;
+    grpc_millis elapsed_time;
+    EchoResponse response;
+  };
+
+  std::vector<ConcurrentRpc> SendConcurrentRpcs(
+      grpc::testing::EchoTestService::Stub* stub, size_t num_rpcs,
+      const RpcOptions& rpc_options) {
+    // Variables for RPCs.
+    std::vector<ConcurrentRpc> rpcs(num_rpcs);
+    EchoRequest request;
+    // Variables for synchronization
+    absl::Mutex mu;
+    absl::CondVar cv;
+    size_t completed = 0;
+    // Set-off callback RPCs
+    for (size_t i = 0; i < num_rpcs; i++) {
+      ConcurrentRpc* rpc = &rpcs[i];
+      rpc_options.SetupRpc(&rpc->context, &request);
+      grpc_millis t0 = NowFromCycleCounter();
+      stub->experimental_async()->Echo(
+          &rpc->context, &request, &rpc->response,
+          [rpc, &mu, &completed, &cv, num_rpcs, t0](Status s) {
+            rpc->status = s;
+            rpc->elapsed_time = NowFromCycleCounter() - t0;
+            bool done;
+            {
+              absl::MutexLock lock(&mu);
+              done = (++completed) == num_rpcs;
+            }
+            if (done) cv.Signal();
+          });
+    }
+    {
+      absl::MutexLock lock(&mu);
+      cv.Wait(&mu);
+    }
+    EXPECT_EQ(completed, num_rpcs);
+    return rpcs;
+  }
+
  const size_t num_backends_;
  const size_t num_balancers_;
  const int client_load_reporting_interval_seconds_;
@ -4567,10 +4638,14 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedCluster) {
  const char* kNewCluster2Name = "new_cluster_2";
  const char* kNewEdsService2Name = "new_eds_service_name_2";
  const char* kNotUsedClusterName = "not_used_cluster";
-  const size_t kNumEcho1Rpcs = 1000;
-  const size_t kNumEchoRpcs = 10;
+  const size_t kNumEchoRpcs = 10;  // RPCs that will go to a fixed backend.
  const size_t kWeight75 = 75;
  const size_t kWeight25 = 25;
+  const double kErrorTolerance = 0.05;
+  const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
+  const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
+  const size_t kNumEcho1Rpcs =
+      ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Populate new EDS resources.
@ -4637,24 +4712,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedCluster) {
  EXPECT_EQ(0, backends_[2]->backend_service()->request_count());
  const int weight_25_request_count =
      backends_[2]->backend_service1()->request_count();
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      weight_75_request_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 + kErrorTolerance))));
-  // TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
-  // test from flaking while debugging potential root cause.
-  const double kErrorToleranceSmallLoad = 0.3;
  gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
          weight_75_request_count, weight_25_request_count);
-  EXPECT_THAT(weight_25_request_count,
-              ::testing::AllOf(
-                  ::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 - kErrorToleranceSmallLoad)),
-                  ::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 + kErrorToleranceSmallLoad))));
+  EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs,
+              ::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs,
+              ::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
 }

 TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
@ -4662,9 +4725,13 @@ TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
  const char* kNewEdsService1Name = "new_eds_service_name_1";
  const char* kNewCluster2Name = "new_cluster_2";
  const char* kNewEdsService2Name = "new_eds_service_name_2";
-  const size_t kNumEchoRpcs = 1000;
  const size_t kWeight75 = 75;
  const size_t kWeight25 = 25;
+  const double kErrorTolerance = 0.05;
+  const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
+  const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
+  const size_t kNumEchoRpcs =
+      ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Populate new EDS resources.
@ -4718,24 +4785,12 @@ TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
      backends_[1]->backend_service()->request_count();
  const int weight_25_request_count =
      backends_[2]->backend_service()->request_count();
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      weight_75_request_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEchoRpcs) *
-                                     kWeight75 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEchoRpcs) *
-                                     kWeight75 / 100 * (1 + kErrorTolerance))));
-  // TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
-  // test from flaking while debugging potential root cause.
-  const double kErrorToleranceSmallLoad = 0.3;
  gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
          weight_75_request_count, weight_25_request_count);
-  EXPECT_THAT(weight_25_request_count,
-              ::testing::AllOf(
-                  ::testing::Ge(static_cast<double>(kNumEchoRpcs) * kWeight25 /
-                                100 * (1 - kErrorToleranceSmallLoad)),
-                  ::testing::Le(static_cast<double>(kNumEchoRpcs) * kWeight25 /
-                                100 * (1 + kErrorToleranceSmallLoad))));
+  EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEchoRpcs,
+              ::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEchoRpcs,
+              ::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
 }

 TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
@ -4745,11 +4800,18 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
  const char* kNewEdsService2Name = "new_eds_service_name_2";
  const char* kNewCluster3Name = "new_cluster_3";
  const char* kNewEdsService3Name = "new_eds_service_name_3";
-  const size_t kNumEcho1Rpcs = 1000;
  const size_t kNumEchoRpcs = 10;
  const size_t kWeight75 = 75;
  const size_t kWeight25 = 25;
  const size_t kWeight50 = 50;
+  const double kErrorTolerance = 0.05;
+  const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
+  const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
+  const double kWeight50Percent = static_cast<double>(kWeight50) / 100;
+  const size_t kNumEcho1Rpcs7525 =
+      ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
+  const size_t kNumEcho1Rpcs5050 =
+      ComputeIdealNumRpcs(kWeight50Percent, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Populate new EDS resources.
@ -4811,7 +4873,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
  WaitForAllBackends(0, 1);
  WaitForAllBackends(1, 3, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
  CheckRpcSendOk(kNumEchoRpcs);
-  CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
+  CheckRpcSendOk(kNumEcho1Rpcs7525,
+                 RpcOptions().set_rpc_service(SERVICE_ECHO1));
  // Make sure RPCs all go to the correct backend.
  EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -4824,24 +4887,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
      backends_[2]->backend_service1()->request_count();
  EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      weight_75_request_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 + kErrorTolerance))));
-  // TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
-  // test from flaking while debugging potential root cause.
-  const double kErrorToleranceSmallLoad = 0.3;
  gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
          weight_75_request_count, weight_25_request_count);
-  EXPECT_THAT(weight_25_request_count,
-              ::testing::AllOf(
-                  ::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 - kErrorToleranceSmallLoad)),
-                  ::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 + kErrorToleranceSmallLoad))));
+  EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
  // Change Route Configurations: same clusters different weights.
  weighted_cluster1->mutable_weight()->set_value(kWeight50);
  weighted_cluster2->mutable_weight()->set_value(kWeight50);
@ -4852,7 +4903,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
  ResetBackendCounters();
  WaitForAllBackends(3, 4);
  CheckRpcSendOk(kNumEchoRpcs);
-  CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
+  CheckRpcSendOk(kNumEcho1Rpcs5050,
+                 RpcOptions().set_rpc_service(SERVICE_ECHO1));
  // Make sure RPCs all go to the correct backend.
  EXPECT_EQ(0, backends_[0]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -4865,17 +4917,11 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
  EXPECT_EQ(kNumEchoRpcs, backends_[3]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
  EXPECT_THAT(
-      weight_50_request_count_1,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 + kErrorTolerance))));
+      static_cast<double>(weight_50_request_count_1) / kNumEcho1Rpcs5050,
+      ::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
  EXPECT_THAT(
-      weight_50_request_count_2,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 + kErrorTolerance))));
+      static_cast<double>(weight_50_request_count_2) / kNumEcho1Rpcs5050,
+      ::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
 }

 TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
@ -4885,11 +4931,18 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  const char* kNewEdsService2Name = "new_eds_service_name_2";
  const char* kNewCluster3Name = "new_cluster_3";
  const char* kNewEdsService3Name = "new_eds_service_name_3";
-  const size_t kNumEcho1Rpcs = 1000;
  const size_t kNumEchoRpcs = 10;
  const size_t kWeight75 = 75;
  const size_t kWeight25 = 25;
  const size_t kWeight50 = 50;
+  const double kErrorTolerance = 0.05;
+  const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
+  const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
+  const double kWeight50Percent = static_cast<double>(kWeight50) / 100;
+  const size_t kNumEcho1Rpcs7525 =
+      ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
+  const size_t kNumEcho1Rpcs5050 =
+      ComputeIdealNumRpcs(kWeight50Percent, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Populate new EDS resources.
@ -4951,7 +5004,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  WaitForAllBackends(0, 1);
  WaitForAllBackends(1, 2, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
  CheckRpcSendOk(kNumEchoRpcs);
-  CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
+  CheckRpcSendOk(kNumEcho1Rpcs7525,
+                 RpcOptions().set_rpc_service(SERVICE_ECHO1));
  // Make sure RPCs all go to the correct backend.
  EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
  int weight_25_request_count =
@ -4963,24 +5017,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  EXPECT_EQ(0, backends_[2]->backend_service1()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      weight_75_request_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 + kErrorTolerance))));
-  // TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
-  // test from flaking while debugging potential root cause.
-  const double kErrorToleranceSmallLoad = 0.3;
  gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
          weight_75_request_count, weight_25_request_count);
-  EXPECT_THAT(weight_25_request_count,
-              ::testing::AllOf(
-                  ::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 - kErrorToleranceSmallLoad)),
-                  ::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 + kErrorToleranceSmallLoad))));
+  EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
  // Change Route Configurations: new set of clusters with different weights.
  weighted_cluster1->mutable_weight()->set_value(kWeight50);
  weighted_cluster2->set_name(kNewCluster2Name);
@ -4989,7 +5031,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  ResetBackendCounters();
  WaitForAllBackends(2, 3, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
  CheckRpcSendOk(kNumEchoRpcs);
-  CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
+  CheckRpcSendOk(kNumEcho1Rpcs5050,
+                 RpcOptions().set_rpc_service(SERVICE_ECHO1));
  // Make sure RPCs all go to the correct backend.
  EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -5002,17 +5045,11 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
  EXPECT_THAT(
-      weight_50_request_count_1,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 + kErrorTolerance))));
+      static_cast<double>(weight_50_request_count_1) / kNumEcho1Rpcs5050,
+      ::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
  EXPECT_THAT(
-      weight_50_request_count_2,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight50 / 100 * (1 + kErrorTolerance))));
+      static_cast<double>(weight_50_request_count_2) / kNumEcho1Rpcs5050,
+      ::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
  // Change Route Configurations.
  weighted_cluster1->mutable_weight()->set_value(kWeight75);
  weighted_cluster2->set_name(kNewCluster3Name);
@ -5021,7 +5058,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  ResetBackendCounters();
  WaitForAllBackends(3, 4, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
  CheckRpcSendOk(kNumEchoRpcs);
-  CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
+  CheckRpcSendOk(kNumEcho1Rpcs7525,
+                 RpcOptions().set_rpc_service(SERVICE_ECHO1));
  // Make sure RPCs all go to the correct backend.
  EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
  EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -5031,22 +5069,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
  EXPECT_EQ(0, backends_[2]->backend_service1()->request_count());
  EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
  weight_25_request_count = backends_[3]->backend_service1()->request_count();
-  EXPECT_THAT(
-      weight_75_request_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
-                                     kWeight75 / 100 * (1 + kErrorTolerance))));
-  // TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
-  // test from flaking while debugging potential root cause.
  gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
          weight_75_request_count, weight_25_request_count);
-  EXPECT_THAT(weight_25_request_count,
-              ::testing::AllOf(
-                  ::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 - kErrorToleranceSmallLoad)),
-                  ::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
-                                100 * (1 + kErrorToleranceSmallLoad))));
+  EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
+              ::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
 }

 TEST_P(LdsRdsTest, XdsRoutingClusterUpdateClusters) {
@ -5625,7 +5653,12 @@ TEST_P(LdsRdsTest, XdsRoutingHeadersMatchingSpecialCasesToIgnore) {
 TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
  const char* kNewClusterName = "new_cluster";
  const char* kNewEdsServiceName = "new_eds_service_name";
-  const size_t kNumRpcs = 1000;
+  const double kErrorTolerance = 0.05;
+  const size_t kRouteMatchNumerator = 25;
+  const double kRouteMatchPercent =
+      static_cast<double>(kRouteMatchNumerator) / 100;
+  const size_t kNumRpcs =
+      ComputeIdealNumRpcs(kRouteMatchPercent, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Populate new EDS resources.
@ -5650,7 +5683,7 @@ TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
  route1->mutable_match()
      ->mutable_runtime_fraction()
      ->mutable_default_value()
-      ->set_numerator(25);
+      ->set_numerator(kRouteMatchNumerator);
  route1->mutable_route()->set_cluster(kNewClusterName);
  auto* default_route = route_config.mutable_virtual_hosts(0)->add_routes();
  default_route->mutable_match()->set_prefix("");
@ -5662,19 +5695,10 @@ TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
      backends_[0]->backend_service()->request_count();
  const int matched_backend_count =
      backends_[1]->backend_service()->request_count();
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      default_backend_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumRpcs) * 75 / 100 *
-                                     (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumRpcs) * 75 / 100 *
-                                     (1 + kErrorTolerance))));
-  EXPECT_THAT(
-      matched_backend_count,
-      ::testing::AllOf(::testing::Ge(static_cast<double>(kNumRpcs) * 25 / 100 *
-                                     (1 - kErrorTolerance)),
-                       ::testing::Le(static_cast<double>(kNumRpcs) * 25 / 100 *
-                                     (1 + kErrorTolerance))));
+  EXPECT_THAT(static_cast<double>(default_backend_count) / kNumRpcs,
+              ::testing::DoubleNear(1 - kRouteMatchPercent, kErrorTolerance));
+  EXPECT_THAT(static_cast<double>(matched_backend_count) / kNumRpcs,
+              ::testing::DoubleNear(kRouteMatchPercent, kErrorTolerance));
  const auto response_state = RouteConfigurationResponseState(0);
  EXPECT_EQ(response_state.state, AdsServiceImpl::ResponseState::ACKED);
 }
@ -8854,7 +8878,6 @@ using LocalityMapTest = BasicTest;
 TEST_P(LocalityMapTest, WeightedRoundRobin) {
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 5000;
  const int kLocalityWeight0 = 2;
  const int kLocalityWeight1 = 8;
  const int kTotalLocalityWeight = kLocalityWeight0 + kLocalityWeight1;
@ -8862,6 +8885,9 @@ TEST_P(LocalityMapTest, WeightedRoundRobin) {
      static_cast<double>(kLocalityWeight0) / kTotalLocalityWeight;
  const double kLocalityWeightRate1 =
      static_cast<double>(kLocalityWeight1) / kTotalLocalityWeight;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs =
+      ComputeIdealNumRpcs(kLocalityWeightRate0, kErrorTolerance);
  // ADS response contains 2 localities, each of which contains 1 backend.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts(0, 1), kLocalityWeight0},
@ -8880,15 +8906,10 @@ TEST_P(LocalityMapTest, WeightedRoundRobin) {
  const double locality_picked_rate_1 =
      static_cast<double>(backends_[1]->backend_service()->request_count()) /
      kNumRpcs;
-  const double kErrorTolerance = 0.2;
  EXPECT_THAT(locality_picked_rate_0,
-              ::testing::AllOf(
-                  ::testing::Ge(kLocalityWeightRate0 * (1 - kErrorTolerance)),
-                  ::testing::Le(kLocalityWeightRate0 * (1 + kErrorTolerance))));
+              ::testing::DoubleNear(kLocalityWeightRate0, kErrorTolerance));
  EXPECT_THAT(locality_picked_rate_1,
-              ::testing::AllOf(
-                  ::testing::Ge(kLocalityWeightRate1 * (1 - kErrorTolerance)),
-                  ::testing::Le(kLocalityWeightRate1 * (1 + kErrorTolerance))));
+              ::testing::DoubleNear(kLocalityWeightRate1, kErrorTolerance));
 }

 // Tests that we correctly handle a locality containing no endpoints.
@ -9305,13 +9326,15 @@ using DropTest = BasicTest;
 TEST_P(DropTest, Vanilla) {
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 5000;
  const uint32_t kDropPerMillionForLb = 100000;
  const uint32_t kDropPerMillionForThrottle = 200000;
  const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
  const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
-  const double KDropRateForLbAndThrottle =
+  const double kDropRateForLbAndThrottle =
      kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs =
+      ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
  // The ADS response contains two drop categories.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts()},
@ -9337,21 +9360,18 @@ TEST_P(DropTest, Vanilla) {
  }
  // The drop rate should be roughly equal to the expectation.
  const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(
-          ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
-          ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
+                                                    kErrorTolerance));
 }

 // Tests that drop config is converted correctly from per hundred.
 TEST_P(DropTest, DropPerHundred) {
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 5000;
  const uint32_t kDropPerHundredForLb = 10;
  const double kDropRateForLb = kDropPerHundredForLb / 100.0;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
  // The ADS response contains one drop category.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts()},
@ -9377,20 +9397,18 @@ TEST_P(DropTest, DropPerHundred) {
  }
  // The drop rate should be roughly equal to the expectation.
  const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
-                       ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate,
+              ::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
 }

 // Tests that drop config is converted correctly from per ten thousand.
 TEST_P(DropTest, DropPerTenThousand) {
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 5000;
  const uint32_t kDropPerTenThousandForLb = 1000;
  const double kDropRateForLb = kDropPerTenThousandForLb / 10000.0;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
  // The ADS response contains one drop category.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts()},
@ -9416,24 +9434,25 @@ TEST_P(DropTest, DropPerTenThousand) {
  }
  // The drop rate should be roughly equal to the expectation.
  const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
-                       ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate,
+              ::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
 }

 // Tests that drop is working correctly after update.
 TEST_P(DropTest, Update) {
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 3000;
  const uint32_t kDropPerMillionForLb = 100000;
  const uint32_t kDropPerMillionForThrottle = 200000;
+  const double kErrorTolerance = 0.05;
  const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
  const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
-  const double KDropRateForLbAndThrottle =
+  const double kDropRateForLbAndThrottle =
      kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
+  const size_t kNumRpcsLbOnly =
+      ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
+  const size_t kNumRpcsBoth =
+      ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
  // The first ADS response contains one drop category.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts()},
@ -9442,10 +9461,10 @@ TEST_P(DropTest, Update) {
  balancers_[0]->ads_service()->SetEdsResource(
      BuildEdsResource(args, DefaultEdsServiceName()));
  WaitForAllBackends();
-  // Send kNumRpcs RPCs and count the drops.
+  // Send kNumRpcsLbOnly RPCs and count the drops.
  size_t num_drops = 0;
  gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
-  for (size_t i = 0; i < kNumRpcs; ++i) {
+  for (size_t i = 0; i < kNumRpcsLbOnly; ++i) {
    EchoResponse response;
    const Status status = SendRpc(RpcOptions(), &response);
    if (!status.ok() &&
@ -9459,13 +9478,10 @@ TEST_P(DropTest, Update) {
  }
  gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
  // The drop rate should be roughly equal to the expectation.
-  double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
+  double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcsLbOnly;
  gpr_log(GPR_INFO, "First batch drop rate %f", seen_drop_rate);
-  const double kErrorTolerance = 0.3;
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
-                       ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate,
+              ::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
  // The second ADS response contains two drop categories, send an update EDS
  // response.
  args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
@ -9475,8 +9491,8 @@ TEST_P(DropTest, Update) {
  // Wait until the drop rate increases to the middle of the two configs, which
  // implies that the update has been in effect.
  const double kDropRateThreshold =
-      (kDropRateForLb + KDropRateForLbAndThrottle) / 2;
-  size_t num_rpcs = kNumRpcs;
+      (kDropRateForLb + kDropRateForLbAndThrottle) / 2;
+  size_t num_rpcs = kNumRpcsBoth;
  while (seen_drop_rate < kDropRateThreshold) {
    EchoResponse response;
    const Status status = SendRpc(RpcOptions(), &response);
@ -9491,10 +9507,10 @@ TEST_P(DropTest, Update) {
    }
    seen_drop_rate = static_cast<double>(num_drops) / num_rpcs;
  }
-  // Send kNumRpcs RPCs and count the drops.
+  // Send kNumRpcsBoth RPCs and count the drops.
  num_drops = 0;
  gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
-  for (size_t i = 0; i < kNumRpcs; ++i) {
+  for (size_t i = 0; i < kNumRpcsBoth; ++i) {
    EchoResponse response;
    const Status status = SendRpc(RpcOptions(), &response);
    if (!status.ok() &&
@ -9508,13 +9524,10 @@ TEST_P(DropTest, Update) {
  }
  gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
  // The new drop rate should be roughly equal to the expectation.
-  seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
+  seen_drop_rate = static_cast<double>(num_drops) / kNumRpcsBoth;
  gpr_log(GPR_INFO, "Second batch drop rate %f", seen_drop_rate);
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(
-          ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
-          ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
+                                                    kErrorTolerance));
 }

 // Tests that all the RPCs are dropped if any drop category drops 100%.
@ -9979,13 +9992,15 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
  }
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
-  const size_t kNumRpcs = 3000;
  const uint32_t kDropPerMillionForLb = 100000;
  const uint32_t kDropPerMillionForThrottle = 200000;
+  const double kErrorTolerance = 0.05;
  const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
  const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
-  const double KDropRateForLbAndThrottle =
+  const double kDropRateForLbAndThrottle =
      kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
+  const size_t kNumRpcs =
+      ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
  // The ADS response contains two drop categories.
  AdsServiceImpl::EdsResourceArgs args({
      {"locality0", GetBackendPorts()},
@ -10014,12 +10029,8 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
  }
  // The drop rate should be roughly equal to the expectation.
  const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
-  const double kErrorTolerance = 0.2;
-  EXPECT_THAT(
-      seen_drop_rate,
-      ::testing::AllOf(
-          ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
-          ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
+  EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
+                                                    kErrorTolerance));
  // Check client stats.
  const size_t total_rpc = num_warmup + kNumRpcs;
  ClientStats client_stats;
@ -10033,17 +10044,13 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
               client_stats.total_dropped_requests() <
           total_rpc);
  EXPECT_EQ(num_drops, client_stats.total_dropped_requests());
+  EXPECT_THAT(static_cast<double>(client_stats.dropped_requests(kLbDropType)) /
+                  total_rpc,
+              ::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
  EXPECT_THAT(
-      client_stats.dropped_requests(kLbDropType),
-      ::testing::AllOf(
-          ::testing::Ge(total_rpc * kDropRateForLb * (1 - kErrorTolerance)),
-          ::testing::Le(total_rpc * kDropRateForLb * (1 + kErrorTolerance))));
-  EXPECT_THAT(client_stats.dropped_requests(kThrottleDropType),
-              ::testing::AllOf(
-                  ::testing::Ge(total_rpc * (1 - kDropRateForLb) *
-                                kDropRateForThrottle * (1 - kErrorTolerance)),
-                  ::testing::Le(total_rpc * (1 - kDropRateForLb) *
-                                kDropRateForThrottle * (1 + kErrorTolerance))));
+      static_cast<double>(client_stats.dropped_requests(kThrottleDropType)) /
+          (total_rpc * (1 - kDropRateForLb)),
+      ::testing::DoubleNear(kDropRateForThrottle, kErrorTolerance));
 }

 class FaultInjectionTest : public XdsEnd2endTest {
@ -10147,10 +10154,10 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionWithoutListenerFilter) {
 }

 TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbort) {
-  const size_t kNumRpcs = 2000;
  const uint32_t kAbortPercentagePerHundred = 50;
  const double kAbortRate = kAbortPercentagePerHundred / 100.0;
-  const double kErrorTolerance = 0.2;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10179,16 +10186,15 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbort) {
  // The abort rate should be roughly equal to the expectation.
  const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
  EXPECT_THAT(seen_abort_rate,
-              ::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
-                               ::testing::Le(kAbortRate + kErrorTolerance)));
+              ::testing::DoubleNear(kAbortRate, kErrorTolerance));
 }

 TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) {
-  const size_t kNumRpcs = 2000;
  const uint32_t kAbortPercentageCap = 100;
  const uint32_t kAbortPercentage = 50;
  const double kAbortRate = kAbortPercentage / 100.0;
-  const double kErrorTolerance = 0.2;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10220,19 +10226,16 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) {
  // The abort rate should be roughly equal to the expectation.
  const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
  EXPECT_THAT(seen_abort_rate,
-              ::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
-                               ::testing::Le(kAbortRate + kErrorTolerance)));
+              ::testing::DoubleNear(kAbortRate, kErrorTolerance));
 }

-// TODO(lidiz) reduce the error tolerance to a lower level without dramatically
-// increase the duration of fault injection tests.
 TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
-  const size_t kNumRpcs = 100;
+  const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000;
  const uint32_t kFixedDelaySeconds = 100;
-  const uint32_t kRpcTimeoutMilliseconds = 10;  // 10 ms
-  const uint32_t kDelayPercentagePerHundred = 95;
+  const uint32_t kDelayPercentagePerHundred = 50;
  const double kDelayRate = kDelayPercentagePerHundred / 100.0;
-  const double kErrorTolerance = 0.2;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10251,30 +10254,31 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
  // Config fault injection via different setup
  SetFilterConfig(http_fault);
  // Send kNumRpcs RPCs and count the delays.
-  int num_total = 0, num_ok = 0, num_delayed = 0, num_dropped = 0;
-  RpcOptions options = RpcOptions()
-                           .set_timeout_ms(kRpcTimeoutMilliseconds)
-                           .set_skip_cancelled_check(true);
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    SendRpcAndCount(&num_total, &num_ok, &num_delayed, &num_dropped, options);
+  RpcOptions rpc_options = RpcOptions()
+                               .set_timeout_ms(kRpcTimeoutMilliseconds)
+                               .set_skip_cancelled_check(true);
+  std::vector<ConcurrentRpc> rpcs =
+      SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
+  size_t num_delayed = 0;
+  for (auto& rpc : rpcs) {
+    if (rpc.status.error_code() == grpc::OK) continue;
+    EXPECT_EQ(grpc::DEADLINE_EXCEEDED, rpc.status.error_code());
+    ++num_delayed;
  }
-  EXPECT_EQ(kNumRpcs, num_total);
-  EXPECT_EQ(0, num_dropped);
  // The delay rate should be roughly equal to the expectation.
  const double seen_delay_rate = static_cast<double>(num_delayed) / kNumRpcs;
  EXPECT_THAT(seen_delay_rate,
-              ::testing::AllOf(::testing::Ge(kDelayRate - kErrorTolerance),
-                               ::testing::Le(kDelayRate + kErrorTolerance)));
+              ::testing::DoubleNear(kDelayRate, kErrorTolerance));
 }

 TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
-  const size_t kNumRpcs = 100;
-  const uint32_t kFixedDelayMilliseconds = 100000;  // 100 seconds
-  const uint32_t kRpcTimeoutMilliseconds = 10;      // 10 ms
+  const uint32_t kFixedDelayMilliseconds = 100000;
+  const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000;
  const uint32_t kDelayPercentageCap = 100;
  const uint32_t kDelayPercentage = 50;
  const double kDelayRate = kDelayPercentage / 100.0;
-  const double kErrorTolerance = 0.2;
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10296,27 +10300,33 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
      {"x-envoy-fault-delay-request-percentage",
       std::to_string(kDelayPercentage)},
  };
-  int num_total = 0, num_ok = 0, num_delayed = 0, num_dropped = 0;
-  RpcOptions options = RpcOptions()
-                           .set_metadata(metadata)
-                           .set_timeout_ms(kRpcTimeoutMilliseconds)
-                           .set_skip_cancelled_check(true);
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    SendRpcAndCount(&num_total, &num_ok, &num_delayed, &num_dropped, options);
+  RpcOptions rpc_options = RpcOptions()
+                               .set_metadata(metadata)
+                               .set_timeout_ms(kRpcTimeoutMilliseconds)
+                               .set_skip_cancelled_check(true);
+  std::vector<ConcurrentRpc> rpcs =
+      SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
+  size_t num_delayed = 0;
+  for (auto& rpc : rpcs) {
+    if (rpc.status.error_code() == grpc::OK) continue;
+    EXPECT_EQ(grpc::DEADLINE_EXCEEDED, rpc.status.error_code());
+    ++num_delayed;
  }
  // The delay rate should be roughly equal to the expectation.
  const double seen_delay_rate = static_cast<double>(num_delayed) / kNumRpcs;
  EXPECT_THAT(seen_delay_rate,
-              ::testing::AllOf(::testing::Ge(kDelayRate - kErrorTolerance),
-                               ::testing::Le(kDelayRate + kErrorTolerance)));
+              ::testing::DoubleNear(kDelayRate, kErrorTolerance));
 }

 TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
-  const size_t kNumRpcs = 500;
  const uint32_t kAbortPercentagePerHundred = 50;
  const double kAbortRate = kAbortPercentagePerHundred / 100.0;
-  const uint32_t kFixedDelayNanos = 10 * 1000 * 1000;  // 10 ms
-  const double kErrorTolerance = 0.2;
+  const uint32_t kFixedDelaySeconds = 1;
+  const uint32_t kRpcTimeoutMilliseconds = 100 * 1000;  // 100s should not reach
+  const uint32_t kConnectionTimeoutMilliseconds =
+      10 * 1000;  // 10s should not reach
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10336,25 +10346,29 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
  delay_percentage->set_numerator(1000000);  // Always inject DELAY!
  delay_percentage->set_denominator(FractionalPercent::MILLION);
  auto* fixed_delay = http_fault.mutable_delay()->mutable_fixed_delay();
-  fixed_delay->set_nanos(kFixedDelayNanos);
+  fixed_delay->set_seconds(kFixedDelaySeconds);
  // Config fault injection via different setup
  SetFilterConfig(http_fault);
+  // Allow the channel to connect to one backends, so the herd of queued RPCs
+  // won't be executed on the same ExecCtx object and using the cached Now()
+  // value, which causes millisecond level delay error.
+  channel_->WaitForConnected(
+      grpc_timeout_milliseconds_to_deadline(kConnectionTimeoutMilliseconds));
  // Send kNumRpcs RPCs and count the aborts.
-  int num_total = 0, num_ok = 0, num_failure = 0, num_aborted = 0;
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    grpc_millis t0 = NowFromCycleCounter();
-    SendRpcAndCount(&num_total, &num_ok, &num_failure, &num_aborted,
-                    RpcOptions(), "Fault injected");
-    grpc_millis t1 = NowFromCycleCounter();
-    EXPECT_GE(t1, t0 + kFixedDelayNanos / 1000 / 1000);
+  int num_aborted = 0;
+  RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMilliseconds);
+  std::vector<ConcurrentRpc> rpcs =
+      SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
+  for (auto& rpc : rpcs) {
+    EXPECT_GE(rpc.elapsed_time, kFixedDelaySeconds * 1000);
+    if (rpc.status.error_code() == grpc::OK) continue;
+    EXPECT_EQ("Fault injected", rpc.status.error_message());
+    ++num_aborted;
  }
-  EXPECT_EQ(kNumRpcs, num_total);
-  EXPECT_EQ(0, num_failure);
  // The abort rate should be roughly equal to the expectation.
  const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
  EXPECT_THAT(seen_abort_rate,
-              ::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
-                               ::testing::Le(kAbortRate + kErrorTolerance)));
+              ::testing::DoubleNear(kAbortRate, kErrorTolerance));
 }

 // This test and the above test apply different denominators to delay and abort.
@ -10362,11 +10376,14 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
 // in our code.
 TEST_P(FaultInjectionTest,
       XdsFaultInjectionAlwaysDelayPercentageAbortSwitchDenominator) {
-  const size_t kNumRpcs = 100;
  const uint32_t kAbortPercentagePerMillion = 500000;
  const double kAbortRate = kAbortPercentagePerMillion / 1000000.0;
-  const uint32_t kFixedDelayNanos = 10 * 1000 * 1000;  // 10 ms
-  const double kErrorTolerance = 0.2;
+  const uint32_t kFixedDelaySeconds = 1;                // 1s
+  const uint32_t kRpcTimeoutMilliseconds = 100 * 1000;  // 100s should not reach
+  const uint32_t kConnectionTimeoutMilliseconds =
+      10 * 1000;  // 10s should not reach
+  const double kErrorTolerance = 0.05;
+  const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
  SetNextResolution({});
  SetNextResolutionForLbChannelAllBalancers();
  // Create an EDS resource
@ -10386,31 +10403,35 @@ TEST_P(FaultInjectionTest,
  delay_percentage->set_numerator(100);  // Always inject DELAY!
  delay_percentage->set_denominator(FractionalPercent::HUNDRED);
  auto* fixed_delay = http_fault.mutable_delay()->mutable_fixed_delay();
-  fixed_delay->set_nanos(kFixedDelayNanos);
+  fixed_delay->set_seconds(kFixedDelaySeconds);
  // Config fault injection via different setup
  SetFilterConfig(http_fault);
+  // Allow the channel to connect to one backends, so the herd of queued RPCs
+  // won't be executed on the same ExecCtx object and using the cached Now()
+  // value, which causes millisecond level delay error.
+  channel_->WaitForConnected(
+      grpc_timeout_milliseconds_to_deadline(kConnectionTimeoutMilliseconds));
  // Send kNumRpcs RPCs and count the aborts.
-  int num_total = 0, num_ok = 0, num_failure = 0, num_aborted = 0;
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    grpc_millis t0 = NowFromCycleCounter();
-    SendRpcAndCount(&num_total, &num_ok, &num_failure, &num_aborted,
-                    RpcOptions(), "Fault injected");
-    grpc_millis t1 = NowFromCycleCounter();
-    EXPECT_GE(t1, t0 + kFixedDelayNanos / 1000 / 1000);
+  int num_aborted = 0;
+  RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMilliseconds);
+  std::vector<ConcurrentRpc> rpcs =
+      SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
+  for (auto& rpc : rpcs) {
+    EXPECT_GE(rpc.elapsed_time, kFixedDelaySeconds * 1000);
+    if (rpc.status.error_code() == grpc::OK) continue;
+    EXPECT_EQ("Fault injected", rpc.status.error_message());
+    ++num_aborted;
  }
-  EXPECT_EQ(kNumRpcs, num_total);
-  EXPECT_EQ(0, num_failure);
  // The abort rate should be roughly equal to the expectation.
  const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
  EXPECT_THAT(seen_abort_rate,
-              ::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
-                               ::testing::Le(kAbortRate + kErrorTolerance)));
+              ::testing::DoubleNear(kAbortRate, kErrorTolerance));
 }

 TEST_P(FaultInjectionTest, XdsFaultInjectionMaxFault) {
  const uint32_t kMaxFault = 10;
  const uint32_t kNumRpcs = 30;  // kNumRpcs should be bigger than kMaxFault
-  const uint32_t kRpcTimeoutMs = 2000;     // 2 seconds
+  const uint32_t kRpcTimeoutMs = 4000;     // 4 seconds
  const uint32_t kLongDelaySeconds = 100;  // 100 seconds
  const uint32_t kAlwaysDelayPercentage = 100;
  SetNextResolution({});
@ -10434,25 +10455,17 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionMaxFault) {
  SetFilterConfig(http_fault);
  // Sends a batch of long running RPCs with long timeout to consume all
  // active faults quota.
-  int num_ok = 0, num_delayed = 0;
-  LongRunningRpc rpcs[kNumRpcs];
+  int num_delayed = 0;
  RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMs);
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    rpcs[i].StartRpc(stub_.get(), rpc_options);
-  }
-  for (size_t i = 0; i < kNumRpcs; ++i) {
-    Status status = rpcs[i].GetStatus();
-    if (status.ok()) {
-      ++num_ok;
-    } else {
-      EXPECT_EQ(StatusCode::DEADLINE_EXCEEDED, status.error_code());
-      ++num_delayed;
-    }
+  std::vector<ConcurrentRpc> rpcs =
+      SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
+  for (auto& rpc : rpcs) {
+    if (rpc.status.error_code() == grpc::OK) continue;
+    EXPECT_EQ(StatusCode::DEADLINE_EXCEEDED, rpc.status.error_code());
+    ++num_delayed;
  }
  // Only kMaxFault number of RPC should be fault injected..
  EXPECT_EQ(kMaxFault, num_delayed);
-  // Other RPCs should be ok.
-  EXPECT_EQ(kNumRpcs - kMaxFault, num_ok);
 }

 class BootstrapSourceTest : public XdsEnd2endTest {