Reduce xds_end2end_test's error tolerance to 0.05 (#25738)

* WIP sync API with 1.6k threads doesn't work

* Please don't fail

* Fix the tests

* Spread to DropTest

* Code clean-up and extend delay RPC timeout

* Address comments

* Address comments

* Deflake MSAN

* Make MSAN more robust && normal runs faster

* Address comments

* Remove the clock change in fault injection filter

* Polish comments && stricten the connection timeout
pull/26118/head
Lidi Zheng 4 years ago committed by GitHub
parent e372a63cf7
commit 13e04b4b9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 557
      test/cpp/end2end/xds_end2end_test.cc

@ -1606,6 +1606,34 @@ grpc_millis NowFromCycleCounter() {
return grpc_cycle_counter_to_millis_round_up(now);
}
// Returns the number of RPCs needed to pass error_tolerance at 99.99% chance.
// Rolling dices in drop/fault-injection generates a binomial distribution (if
// our code is not horribly wrong). Let's make "n" the number of samples, "p"
// the probabilty. If we have np>5 & n(1-p)>5, we can approximately treat the
// binomial distribution as a normal distribution.
//
// For normal distribution, we can easily look up how many standard deviation we
// need to reach 99.99%. Based on Wiki's table
// https://en.wikipedia.org/wiki/Standard_normal_table, we need 3.89 sigma
// (standard deviation) to cover the probability area of 99.99%. In another
// word, for a sample with size "n" probability "p" error-tolerance "k", we want
// the error always land within 3.89 sigma. The sigma of binominal distribution
// and be computed as sqrt(np(1-p)). Hence, we have the equation:
//
// kn <= 3.89 * sqrt(np(1-p))
//
// E.g., with p=0.5 k=0.1, n >= 378; with p=0.5 k=0.05, n >= 1513; with p=0.5
// k=0.01, n >= 37830.
size_t ComputeIdealNumRpcs(double p, double error_tolerance) {
GPR_ASSERT(p >= 0 && p <= 1);
size_t num_rpcs =
ceil(p * (1 - p) * 3.89 * 3.89 / error_tolerance / error_tolerance);
gpr_log(GPR_INFO,
"Sending %" PRIuPTR " RPCs for percentage=%.3f error_tolerance=%.3f",
num_rpcs, p, error_tolerance);
return num_rpcs;
}
// Channel arg pointer vtable for storing xDS channel args in the parent
// channel's channel args.
void* ChannelArgsArgCopy(void* p) {
@ -2557,6 +2585,49 @@ class XdsEnd2endTest : public ::testing::TestWithParam<TestType> {
Status status_;
};
struct ConcurrentRpc {
ClientContext context;
Status status;
grpc_millis elapsed_time;
EchoResponse response;
};
std::vector<ConcurrentRpc> SendConcurrentRpcs(
grpc::testing::EchoTestService::Stub* stub, size_t num_rpcs,
const RpcOptions& rpc_options) {
// Variables for RPCs.
std::vector<ConcurrentRpc> rpcs(num_rpcs);
EchoRequest request;
// Variables for synchronization
absl::Mutex mu;
absl::CondVar cv;
size_t completed = 0;
// Set-off callback RPCs
for (size_t i = 0; i < num_rpcs; i++) {
ConcurrentRpc* rpc = &rpcs[i];
rpc_options.SetupRpc(&rpc->context, &request);
grpc_millis t0 = NowFromCycleCounter();
stub->experimental_async()->Echo(
&rpc->context, &request, &rpc->response,
[rpc, &mu, &completed, &cv, num_rpcs, t0](Status s) {
rpc->status = s;
rpc->elapsed_time = NowFromCycleCounter() - t0;
bool done;
{
absl::MutexLock lock(&mu);
done = (++completed) == num_rpcs;
}
if (done) cv.Signal();
});
}
{
absl::MutexLock lock(&mu);
cv.Wait(&mu);
}
EXPECT_EQ(completed, num_rpcs);
return rpcs;
}
const size_t num_backends_;
const size_t num_balancers_;
const int client_load_reporting_interval_seconds_;
@ -4567,10 +4638,14 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedCluster) {
const char* kNewCluster2Name = "new_cluster_2";
const char* kNewEdsService2Name = "new_eds_service_name_2";
const char* kNotUsedClusterName = "not_used_cluster";
const size_t kNumEcho1Rpcs = 1000;
const size_t kNumEchoRpcs = 10;
const size_t kNumEchoRpcs = 10; // RPCs that will go to a fixed backend.
const size_t kWeight75 = 75;
const size_t kWeight25 = 25;
const double kErrorTolerance = 0.05;
const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
const size_t kNumEcho1Rpcs =
ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Populate new EDS resources.
@ -4637,24 +4712,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedCluster) {
EXPECT_EQ(0, backends_[2]->backend_service()->request_count());
const int weight_25_request_count =
backends_[2]->backend_service1()->request_count();
const double kErrorTolerance = 0.2;
EXPECT_THAT(
weight_75_request_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 + kErrorTolerance))));
// TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
// test from flaking while debugging potential root cause.
const double kErrorToleranceSmallLoad = 0.3;
gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
weight_75_request_count, weight_25_request_count);
EXPECT_THAT(weight_25_request_count,
::testing::AllOf(
::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 - kErrorToleranceSmallLoad)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 + kErrorToleranceSmallLoad))));
EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs,
::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs,
::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
}
TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
@ -4662,9 +4725,13 @@ TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
const char* kNewEdsService1Name = "new_eds_service_name_1";
const char* kNewCluster2Name = "new_cluster_2";
const char* kNewEdsService2Name = "new_eds_service_name_2";
const size_t kNumEchoRpcs = 1000;
const size_t kWeight75 = 75;
const size_t kWeight25 = 25;
const double kErrorTolerance = 0.05;
const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
const size_t kNumEchoRpcs =
ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Populate new EDS resources.
@ -4718,24 +4785,12 @@ TEST_P(LdsRdsTest, RouteActionWeightedTargetDefaultRoute) {
backends_[1]->backend_service()->request_count();
const int weight_25_request_count =
backends_[2]->backend_service()->request_count();
const double kErrorTolerance = 0.2;
EXPECT_THAT(
weight_75_request_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEchoRpcs) *
kWeight75 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEchoRpcs) *
kWeight75 / 100 * (1 + kErrorTolerance))));
// TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
// test from flaking while debugging potential root cause.
const double kErrorToleranceSmallLoad = 0.3;
gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
weight_75_request_count, weight_25_request_count);
EXPECT_THAT(weight_25_request_count,
::testing::AllOf(
::testing::Ge(static_cast<double>(kNumEchoRpcs) * kWeight25 /
100 * (1 - kErrorToleranceSmallLoad)),
::testing::Le(static_cast<double>(kNumEchoRpcs) * kWeight25 /
100 * (1 + kErrorToleranceSmallLoad))));
EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEchoRpcs,
::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEchoRpcs,
::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
}
TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
@ -4745,11 +4800,18 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
const char* kNewEdsService2Name = "new_eds_service_name_2";
const char* kNewCluster3Name = "new_cluster_3";
const char* kNewEdsService3Name = "new_eds_service_name_3";
const size_t kNumEcho1Rpcs = 1000;
const size_t kNumEchoRpcs = 10;
const size_t kWeight75 = 75;
const size_t kWeight25 = 25;
const size_t kWeight50 = 50;
const double kErrorTolerance = 0.05;
const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
const double kWeight50Percent = static_cast<double>(kWeight50) / 100;
const size_t kNumEcho1Rpcs7525 =
ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
const size_t kNumEcho1Rpcs5050 =
ComputeIdealNumRpcs(kWeight50Percent, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Populate new EDS resources.
@ -4811,7 +4873,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
WaitForAllBackends(0, 1);
WaitForAllBackends(1, 3, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEchoRpcs);
CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEcho1Rpcs7525,
RpcOptions().set_rpc_service(SERVICE_ECHO1));
// Make sure RPCs all go to the correct backend.
EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -4824,24 +4887,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
backends_[2]->backend_service1()->request_count();
EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
const double kErrorTolerance = 0.2;
EXPECT_THAT(
weight_75_request_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 + kErrorTolerance))));
// TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
// test from flaking while debugging potential root cause.
const double kErrorToleranceSmallLoad = 0.3;
gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
weight_75_request_count, weight_25_request_count);
EXPECT_THAT(weight_25_request_count,
::testing::AllOf(
::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 - kErrorToleranceSmallLoad)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 + kErrorToleranceSmallLoad))));
EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
// Change Route Configurations: same clusters different weights.
weighted_cluster1->mutable_weight()->set_value(kWeight50);
weighted_cluster2->mutable_weight()->set_value(kWeight50);
@ -4852,7 +4903,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
ResetBackendCounters();
WaitForAllBackends(3, 4);
CheckRpcSendOk(kNumEchoRpcs);
CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEcho1Rpcs5050,
RpcOptions().set_rpc_service(SERVICE_ECHO1));
// Make sure RPCs all go to the correct backend.
EXPECT_EQ(0, backends_[0]->backend_service()->request_count());
EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -4865,17 +4917,11 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateWeights) {
EXPECT_EQ(kNumEchoRpcs, backends_[3]->backend_service()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
EXPECT_THAT(
weight_50_request_count_1,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 + kErrorTolerance))));
static_cast<double>(weight_50_request_count_1) / kNumEcho1Rpcs5050,
::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
EXPECT_THAT(
weight_50_request_count_2,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 + kErrorTolerance))));
static_cast<double>(weight_50_request_count_2) / kNumEcho1Rpcs5050,
::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
}
TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
@ -4885,11 +4931,18 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
const char* kNewEdsService2Name = "new_eds_service_name_2";
const char* kNewCluster3Name = "new_cluster_3";
const char* kNewEdsService3Name = "new_eds_service_name_3";
const size_t kNumEcho1Rpcs = 1000;
const size_t kNumEchoRpcs = 10;
const size_t kWeight75 = 75;
const size_t kWeight25 = 25;
const size_t kWeight50 = 50;
const double kErrorTolerance = 0.05;
const double kWeight75Percent = static_cast<double>(kWeight75) / 100;
const double kWeight25Percent = static_cast<double>(kWeight25) / 100;
const double kWeight50Percent = static_cast<double>(kWeight50) / 100;
const size_t kNumEcho1Rpcs7525 =
ComputeIdealNumRpcs(kWeight75Percent, kErrorTolerance);
const size_t kNumEcho1Rpcs5050 =
ComputeIdealNumRpcs(kWeight50Percent, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Populate new EDS resources.
@ -4951,7 +5004,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
WaitForAllBackends(0, 1);
WaitForAllBackends(1, 2, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEchoRpcs);
CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEcho1Rpcs7525,
RpcOptions().set_rpc_service(SERVICE_ECHO1));
// Make sure RPCs all go to the correct backend.
EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
int weight_25_request_count =
@ -4963,24 +5017,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
EXPECT_EQ(0, backends_[2]->backend_service1()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
const double kErrorTolerance = 0.2;
EXPECT_THAT(
weight_75_request_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 + kErrorTolerance))));
// TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
// test from flaking while debugging potential root cause.
const double kErrorToleranceSmallLoad = 0.3;
gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
weight_75_request_count, weight_25_request_count);
EXPECT_THAT(weight_25_request_count,
::testing::AllOf(
::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 - kErrorToleranceSmallLoad)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 + kErrorToleranceSmallLoad))));
EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
// Change Route Configurations: new set of clusters with different weights.
weighted_cluster1->mutable_weight()->set_value(kWeight50);
weighted_cluster2->set_name(kNewCluster2Name);
@ -4989,7 +5031,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
ResetBackendCounters();
WaitForAllBackends(2, 3, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEchoRpcs);
CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEcho1Rpcs5050,
RpcOptions().set_rpc_service(SERVICE_ECHO1));
// Make sure RPCs all go to the correct backend.
EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -5002,17 +5045,11 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service1()->request_count());
EXPECT_THAT(
weight_50_request_count_1,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 + kErrorTolerance))));
static_cast<double>(weight_50_request_count_1) / kNumEcho1Rpcs5050,
::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
EXPECT_THAT(
weight_50_request_count_2,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight50 / 100 * (1 + kErrorTolerance))));
static_cast<double>(weight_50_request_count_2) / kNumEcho1Rpcs5050,
::testing::DoubleNear(kWeight50Percent, kErrorTolerance));
// Change Route Configurations.
weighted_cluster1->mutable_weight()->set_value(kWeight75);
weighted_cluster2->set_name(kNewCluster3Name);
@ -5021,7 +5058,8 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
ResetBackendCounters();
WaitForAllBackends(3, 4, true, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEchoRpcs);
CheckRpcSendOk(kNumEcho1Rpcs, RpcOptions().set_rpc_service(SERVICE_ECHO1));
CheckRpcSendOk(kNumEcho1Rpcs7525,
RpcOptions().set_rpc_service(SERVICE_ECHO1));
// Make sure RPCs all go to the correct backend.
EXPECT_EQ(kNumEchoRpcs, backends_[0]->backend_service()->request_count());
EXPECT_EQ(0, backends_[0]->backend_service1()->request_count());
@ -5031,22 +5069,12 @@ TEST_P(LdsRdsTest, XdsRoutingWeightedClusterUpdateClusters) {
EXPECT_EQ(0, backends_[2]->backend_service1()->request_count());
EXPECT_EQ(0, backends_[3]->backend_service()->request_count());
weight_25_request_count = backends_[3]->backend_service1()->request_count();
EXPECT_THAT(
weight_75_request_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) *
kWeight75 / 100 * (1 + kErrorTolerance))));
// TODO(@donnadionne): Reduce tolerance: increased the tolerance to keep the
// test from flaking while debugging potential root cause.
gpr_log(GPR_INFO, "target_75 received %d rpcs and target_25 received %d rpcs",
weight_75_request_count, weight_25_request_count);
EXPECT_THAT(weight_25_request_count,
::testing::AllOf(
::testing::Ge(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 - kErrorToleranceSmallLoad)),
::testing::Le(static_cast<double>(kNumEcho1Rpcs) * kWeight25 /
100 * (1 + kErrorToleranceSmallLoad))));
EXPECT_THAT(static_cast<double>(weight_75_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight75Percent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(weight_25_request_count) / kNumEcho1Rpcs7525,
::testing::DoubleNear(kWeight25Percent, kErrorTolerance));
}
TEST_P(LdsRdsTest, XdsRoutingClusterUpdateClusters) {
@ -5625,7 +5653,12 @@ TEST_P(LdsRdsTest, XdsRoutingHeadersMatchingSpecialCasesToIgnore) {
TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
const char* kNewClusterName = "new_cluster";
const char* kNewEdsServiceName = "new_eds_service_name";
const size_t kNumRpcs = 1000;
const double kErrorTolerance = 0.05;
const size_t kRouteMatchNumerator = 25;
const double kRouteMatchPercent =
static_cast<double>(kRouteMatchNumerator) / 100;
const size_t kNumRpcs =
ComputeIdealNumRpcs(kRouteMatchPercent, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Populate new EDS resources.
@ -5650,7 +5683,7 @@ TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
route1->mutable_match()
->mutable_runtime_fraction()
->mutable_default_value()
->set_numerator(25);
->set_numerator(kRouteMatchNumerator);
route1->mutable_route()->set_cluster(kNewClusterName);
auto* default_route = route_config.mutable_virtual_hosts(0)->add_routes();
default_route->mutable_match()->set_prefix("");
@ -5662,19 +5695,10 @@ TEST_P(LdsRdsTest, XdsRoutingRuntimeFractionMatching) {
backends_[0]->backend_service()->request_count();
const int matched_backend_count =
backends_[1]->backend_service()->request_count();
const double kErrorTolerance = 0.2;
EXPECT_THAT(
default_backend_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumRpcs) * 75 / 100 *
(1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumRpcs) * 75 / 100 *
(1 + kErrorTolerance))));
EXPECT_THAT(
matched_backend_count,
::testing::AllOf(::testing::Ge(static_cast<double>(kNumRpcs) * 25 / 100 *
(1 - kErrorTolerance)),
::testing::Le(static_cast<double>(kNumRpcs) * 25 / 100 *
(1 + kErrorTolerance))));
EXPECT_THAT(static_cast<double>(default_backend_count) / kNumRpcs,
::testing::DoubleNear(1 - kRouteMatchPercent, kErrorTolerance));
EXPECT_THAT(static_cast<double>(matched_backend_count) / kNumRpcs,
::testing::DoubleNear(kRouteMatchPercent, kErrorTolerance));
const auto response_state = RouteConfigurationResponseState(0);
EXPECT_EQ(response_state.state, AdsServiceImpl::ResponseState::ACKED);
}
@ -8854,7 +8878,6 @@ using LocalityMapTest = BasicTest;
TEST_P(LocalityMapTest, WeightedRoundRobin) {
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 5000;
const int kLocalityWeight0 = 2;
const int kLocalityWeight1 = 8;
const int kTotalLocalityWeight = kLocalityWeight0 + kLocalityWeight1;
@ -8862,6 +8885,9 @@ TEST_P(LocalityMapTest, WeightedRoundRobin) {
static_cast<double>(kLocalityWeight0) / kTotalLocalityWeight;
const double kLocalityWeightRate1 =
static_cast<double>(kLocalityWeight1) / kTotalLocalityWeight;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs =
ComputeIdealNumRpcs(kLocalityWeightRate0, kErrorTolerance);
// ADS response contains 2 localities, each of which contains 1 backend.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts(0, 1), kLocalityWeight0},
@ -8880,15 +8906,10 @@ TEST_P(LocalityMapTest, WeightedRoundRobin) {
const double locality_picked_rate_1 =
static_cast<double>(backends_[1]->backend_service()->request_count()) /
kNumRpcs;
const double kErrorTolerance = 0.2;
EXPECT_THAT(locality_picked_rate_0,
::testing::AllOf(
::testing::Ge(kLocalityWeightRate0 * (1 - kErrorTolerance)),
::testing::Le(kLocalityWeightRate0 * (1 + kErrorTolerance))));
::testing::DoubleNear(kLocalityWeightRate0, kErrorTolerance));
EXPECT_THAT(locality_picked_rate_1,
::testing::AllOf(
::testing::Ge(kLocalityWeightRate1 * (1 - kErrorTolerance)),
::testing::Le(kLocalityWeightRate1 * (1 + kErrorTolerance))));
::testing::DoubleNear(kLocalityWeightRate1, kErrorTolerance));
}
// Tests that we correctly handle a locality containing no endpoints.
@ -9305,13 +9326,15 @@ using DropTest = BasicTest;
TEST_P(DropTest, Vanilla) {
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 5000;
const uint32_t kDropPerMillionForLb = 100000;
const uint32_t kDropPerMillionForThrottle = 200000;
const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
const double KDropRateForLbAndThrottle =
const double kDropRateForLbAndThrottle =
kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs =
ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
// The ADS response contains two drop categories.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts()},
@ -9337,21 +9360,18 @@ TEST_P(DropTest, Vanilla) {
}
// The drop rate should be roughly equal to the expectation.
const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
const double kErrorTolerance = 0.2;
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(
::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
kErrorTolerance));
}
// Tests that drop config is converted correctly from per hundred.
TEST_P(DropTest, DropPerHundred) {
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 5000;
const uint32_t kDropPerHundredForLb = 10;
const double kDropRateForLb = kDropPerHundredForLb / 100.0;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
// The ADS response contains one drop category.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts()},
@ -9377,20 +9397,18 @@ TEST_P(DropTest, DropPerHundred) {
}
// The drop rate should be roughly equal to the expectation.
const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
const double kErrorTolerance = 0.2;
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate,
::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
}
// Tests that drop config is converted correctly from per ten thousand.
TEST_P(DropTest, DropPerTenThousand) {
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 5000;
const uint32_t kDropPerTenThousandForLb = 1000;
const double kDropRateForLb = kDropPerTenThousandForLb / 10000.0;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
// The ADS response contains one drop category.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts()},
@ -9416,24 +9434,25 @@ TEST_P(DropTest, DropPerTenThousand) {
}
// The drop rate should be roughly equal to the expectation.
const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
const double kErrorTolerance = 0.2;
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate,
::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
}
// Tests that drop is working correctly after update.
TEST_P(DropTest, Update) {
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 3000;
const uint32_t kDropPerMillionForLb = 100000;
const uint32_t kDropPerMillionForThrottle = 200000;
const double kErrorTolerance = 0.05;
const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
const double KDropRateForLbAndThrottle =
const double kDropRateForLbAndThrottle =
kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
const size_t kNumRpcsLbOnly =
ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance);
const size_t kNumRpcsBoth =
ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
// The first ADS response contains one drop category.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts()},
@ -9442,10 +9461,10 @@ TEST_P(DropTest, Update) {
balancers_[0]->ads_service()->SetEdsResource(
BuildEdsResource(args, DefaultEdsServiceName()));
WaitForAllBackends();
// Send kNumRpcs RPCs and count the drops.
// Send kNumRpcsLbOnly RPCs and count the drops.
size_t num_drops = 0;
gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
for (size_t i = 0; i < kNumRpcs; ++i) {
for (size_t i = 0; i < kNumRpcsLbOnly; ++i) {
EchoResponse response;
const Status status = SendRpc(RpcOptions(), &response);
if (!status.ok() &&
@ -9459,13 +9478,10 @@ TEST_P(DropTest, Update) {
}
gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
// The drop rate should be roughly equal to the expectation.
double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcsLbOnly;
gpr_log(GPR_INFO, "First batch drop rate %f", seen_drop_rate);
const double kErrorTolerance = 0.3;
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate,
::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
// The second ADS response contains two drop categories, send an update EDS
// response.
args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
@ -9475,8 +9491,8 @@ TEST_P(DropTest, Update) {
// Wait until the drop rate increases to the middle of the two configs, which
// implies that the update has been in effect.
const double kDropRateThreshold =
(kDropRateForLb + KDropRateForLbAndThrottle) / 2;
size_t num_rpcs = kNumRpcs;
(kDropRateForLb + kDropRateForLbAndThrottle) / 2;
size_t num_rpcs = kNumRpcsBoth;
while (seen_drop_rate < kDropRateThreshold) {
EchoResponse response;
const Status status = SendRpc(RpcOptions(), &response);
@ -9491,10 +9507,10 @@ TEST_P(DropTest, Update) {
}
seen_drop_rate = static_cast<double>(num_drops) / num_rpcs;
}
// Send kNumRpcs RPCs and count the drops.
// Send kNumRpcsBoth RPCs and count the drops.
num_drops = 0;
gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
for (size_t i = 0; i < kNumRpcs; ++i) {
for (size_t i = 0; i < kNumRpcsBoth; ++i) {
EchoResponse response;
const Status status = SendRpc(RpcOptions(), &response);
if (!status.ok() &&
@ -9508,13 +9524,10 @@ TEST_P(DropTest, Update) {
}
gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
// The new drop rate should be roughly equal to the expectation.
seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
seen_drop_rate = static_cast<double>(num_drops) / kNumRpcsBoth;
gpr_log(GPR_INFO, "Second batch drop rate %f", seen_drop_rate);
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(
::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
kErrorTolerance));
}
// Tests that all the RPCs are dropped if any drop category drops 100%.
@ -9979,13 +9992,15 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
}
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
const size_t kNumRpcs = 3000;
const uint32_t kDropPerMillionForLb = 100000;
const uint32_t kDropPerMillionForThrottle = 200000;
const double kErrorTolerance = 0.05;
const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
const double KDropRateForLbAndThrottle =
const double kDropRateForLbAndThrottle =
kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
const size_t kNumRpcs =
ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance);
// The ADS response contains two drop categories.
AdsServiceImpl::EdsResourceArgs args({
{"locality0", GetBackendPorts()},
@ -10014,12 +10029,8 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
}
// The drop rate should be roughly equal to the expectation.
const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
const double kErrorTolerance = 0.2;
EXPECT_THAT(
seen_drop_rate,
::testing::AllOf(
::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLbAndThrottle,
kErrorTolerance));
// Check client stats.
const size_t total_rpc = num_warmup + kNumRpcs;
ClientStats client_stats;
@ -10033,17 +10044,13 @@ TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
client_stats.total_dropped_requests() <
total_rpc);
EXPECT_EQ(num_drops, client_stats.total_dropped_requests());
EXPECT_THAT(static_cast<double>(client_stats.dropped_requests(kLbDropType)) /
total_rpc,
::testing::DoubleNear(kDropRateForLb, kErrorTolerance));
EXPECT_THAT(
client_stats.dropped_requests(kLbDropType),
::testing::AllOf(
::testing::Ge(total_rpc * kDropRateForLb * (1 - kErrorTolerance)),
::testing::Le(total_rpc * kDropRateForLb * (1 + kErrorTolerance))));
EXPECT_THAT(client_stats.dropped_requests(kThrottleDropType),
::testing::AllOf(
::testing::Ge(total_rpc * (1 - kDropRateForLb) *
kDropRateForThrottle * (1 - kErrorTolerance)),
::testing::Le(total_rpc * (1 - kDropRateForLb) *
kDropRateForThrottle * (1 + kErrorTolerance))));
static_cast<double>(client_stats.dropped_requests(kThrottleDropType)) /
(total_rpc * (1 - kDropRateForLb)),
::testing::DoubleNear(kDropRateForThrottle, kErrorTolerance));
}
class FaultInjectionTest : public XdsEnd2endTest {
@ -10147,10 +10154,10 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionWithoutListenerFilter) {
}
TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbort) {
const size_t kNumRpcs = 2000;
const uint32_t kAbortPercentagePerHundred = 50;
const double kAbortRate = kAbortPercentagePerHundred / 100.0;
const double kErrorTolerance = 0.2;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10179,16 +10186,15 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbort) {
// The abort rate should be roughly equal to the expectation.
const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
EXPECT_THAT(seen_abort_rate,
::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
::testing::Le(kAbortRate + kErrorTolerance)));
::testing::DoubleNear(kAbortRate, kErrorTolerance));
}
TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) {
const size_t kNumRpcs = 2000;
const uint32_t kAbortPercentageCap = 100;
const uint32_t kAbortPercentage = 50;
const double kAbortRate = kAbortPercentage / 100.0;
const double kErrorTolerance = 0.2;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10220,19 +10226,16 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) {
// The abort rate should be roughly equal to the expectation.
const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
EXPECT_THAT(seen_abort_rate,
::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
::testing::Le(kAbortRate + kErrorTolerance)));
::testing::DoubleNear(kAbortRate, kErrorTolerance));
}
// TODO(lidiz) reduce the error tolerance to a lower level without dramatically
// increase the duration of fault injection tests.
TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
const size_t kNumRpcs = 100;
const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000;
const uint32_t kFixedDelaySeconds = 100;
const uint32_t kRpcTimeoutMilliseconds = 10; // 10 ms
const uint32_t kDelayPercentagePerHundred = 95;
const uint32_t kDelayPercentagePerHundred = 50;
const double kDelayRate = kDelayPercentagePerHundred / 100.0;
const double kErrorTolerance = 0.2;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10251,30 +10254,31 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
// Config fault injection via different setup
SetFilterConfig(http_fault);
// Send kNumRpcs RPCs and count the delays.
int num_total = 0, num_ok = 0, num_delayed = 0, num_dropped = 0;
RpcOptions options = RpcOptions()
.set_timeout_ms(kRpcTimeoutMilliseconds)
.set_skip_cancelled_check(true);
for (size_t i = 0; i < kNumRpcs; ++i) {
SendRpcAndCount(&num_total, &num_ok, &num_delayed, &num_dropped, options);
RpcOptions rpc_options = RpcOptions()
.set_timeout_ms(kRpcTimeoutMilliseconds)
.set_skip_cancelled_check(true);
std::vector<ConcurrentRpc> rpcs =
SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
size_t num_delayed = 0;
for (auto& rpc : rpcs) {
if (rpc.status.error_code() == grpc::OK) continue;
EXPECT_EQ(grpc::DEADLINE_EXCEEDED, rpc.status.error_code());
++num_delayed;
}
EXPECT_EQ(kNumRpcs, num_total);
EXPECT_EQ(0, num_dropped);
// The delay rate should be roughly equal to the expectation.
const double seen_delay_rate = static_cast<double>(num_delayed) / kNumRpcs;
EXPECT_THAT(seen_delay_rate,
::testing::AllOf(::testing::Ge(kDelayRate - kErrorTolerance),
::testing::Le(kDelayRate + kErrorTolerance)));
::testing::DoubleNear(kDelayRate, kErrorTolerance));
}
TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
const size_t kNumRpcs = 100;
const uint32_t kFixedDelayMilliseconds = 100000; // 100 seconds
const uint32_t kRpcTimeoutMilliseconds = 10; // 10 ms
const uint32_t kFixedDelayMilliseconds = 100000;
const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000;
const uint32_t kDelayPercentageCap = 100;
const uint32_t kDelayPercentage = 50;
const double kDelayRate = kDelayPercentage / 100.0;
const double kErrorTolerance = 0.2;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10296,27 +10300,33 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
{"x-envoy-fault-delay-request-percentage",
std::to_string(kDelayPercentage)},
};
int num_total = 0, num_ok = 0, num_delayed = 0, num_dropped = 0;
RpcOptions options = RpcOptions()
.set_metadata(metadata)
.set_timeout_ms(kRpcTimeoutMilliseconds)
.set_skip_cancelled_check(true);
for (size_t i = 0; i < kNumRpcs; ++i) {
SendRpcAndCount(&num_total, &num_ok, &num_delayed, &num_dropped, options);
RpcOptions rpc_options = RpcOptions()
.set_metadata(metadata)
.set_timeout_ms(kRpcTimeoutMilliseconds)
.set_skip_cancelled_check(true);
std::vector<ConcurrentRpc> rpcs =
SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
size_t num_delayed = 0;
for (auto& rpc : rpcs) {
if (rpc.status.error_code() == grpc::OK) continue;
EXPECT_EQ(grpc::DEADLINE_EXCEEDED, rpc.status.error_code());
++num_delayed;
}
// The delay rate should be roughly equal to the expectation.
const double seen_delay_rate = static_cast<double>(num_delayed) / kNumRpcs;
EXPECT_THAT(seen_delay_rate,
::testing::AllOf(::testing::Ge(kDelayRate - kErrorTolerance),
::testing::Le(kDelayRate + kErrorTolerance)));
::testing::DoubleNear(kDelayRate, kErrorTolerance));
}
TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
const size_t kNumRpcs = 500;
const uint32_t kAbortPercentagePerHundred = 50;
const double kAbortRate = kAbortPercentagePerHundred / 100.0;
const uint32_t kFixedDelayNanos = 10 * 1000 * 1000; // 10 ms
const double kErrorTolerance = 0.2;
const uint32_t kFixedDelaySeconds = 1;
const uint32_t kRpcTimeoutMilliseconds = 100 * 1000; // 100s should not reach
const uint32_t kConnectionTimeoutMilliseconds =
10 * 1000; // 10s should not reach
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10336,25 +10346,29 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
delay_percentage->set_numerator(1000000); // Always inject DELAY!
delay_percentage->set_denominator(FractionalPercent::MILLION);
auto* fixed_delay = http_fault.mutable_delay()->mutable_fixed_delay();
fixed_delay->set_nanos(kFixedDelayNanos);
fixed_delay->set_seconds(kFixedDelaySeconds);
// Config fault injection via different setup
SetFilterConfig(http_fault);
// Allow the channel to connect to one backends, so the herd of queued RPCs
// won't be executed on the same ExecCtx object and using the cached Now()
// value, which causes millisecond level delay error.
channel_->WaitForConnected(
grpc_timeout_milliseconds_to_deadline(kConnectionTimeoutMilliseconds));
// Send kNumRpcs RPCs and count the aborts.
int num_total = 0, num_ok = 0, num_failure = 0, num_aborted = 0;
for (size_t i = 0; i < kNumRpcs; ++i) {
grpc_millis t0 = NowFromCycleCounter();
SendRpcAndCount(&num_total, &num_ok, &num_failure, &num_aborted,
RpcOptions(), "Fault injected");
grpc_millis t1 = NowFromCycleCounter();
EXPECT_GE(t1, t0 + kFixedDelayNanos / 1000 / 1000);
int num_aborted = 0;
RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMilliseconds);
std::vector<ConcurrentRpc> rpcs =
SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
for (auto& rpc : rpcs) {
EXPECT_GE(rpc.elapsed_time, kFixedDelaySeconds * 1000);
if (rpc.status.error_code() == grpc::OK) continue;
EXPECT_EQ("Fault injected", rpc.status.error_message());
++num_aborted;
}
EXPECT_EQ(kNumRpcs, num_total);
EXPECT_EQ(0, num_failure);
// The abort rate should be roughly equal to the expectation.
const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
EXPECT_THAT(seen_abort_rate,
::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
::testing::Le(kAbortRate + kErrorTolerance)));
::testing::DoubleNear(kAbortRate, kErrorTolerance));
}
// This test and the above test apply different denominators to delay and abort.
@ -10362,11 +10376,14 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
// in our code.
TEST_P(FaultInjectionTest,
XdsFaultInjectionAlwaysDelayPercentageAbortSwitchDenominator) {
const size_t kNumRpcs = 100;
const uint32_t kAbortPercentagePerMillion = 500000;
const double kAbortRate = kAbortPercentagePerMillion / 1000000.0;
const uint32_t kFixedDelayNanos = 10 * 1000 * 1000; // 10 ms
const double kErrorTolerance = 0.2;
const uint32_t kFixedDelaySeconds = 1; // 1s
const uint32_t kRpcTimeoutMilliseconds = 100 * 1000; // 100s should not reach
const uint32_t kConnectionTimeoutMilliseconds =
10 * 1000; // 10s should not reach
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -10386,31 +10403,35 @@ TEST_P(FaultInjectionTest,
delay_percentage->set_numerator(100); // Always inject DELAY!
delay_percentage->set_denominator(FractionalPercent::HUNDRED);
auto* fixed_delay = http_fault.mutable_delay()->mutable_fixed_delay();
fixed_delay->set_nanos(kFixedDelayNanos);
fixed_delay->set_seconds(kFixedDelaySeconds);
// Config fault injection via different setup
SetFilterConfig(http_fault);
// Allow the channel to connect to one backends, so the herd of queued RPCs
// won't be executed on the same ExecCtx object and using the cached Now()
// value, which causes millisecond level delay error.
channel_->WaitForConnected(
grpc_timeout_milliseconds_to_deadline(kConnectionTimeoutMilliseconds));
// Send kNumRpcs RPCs and count the aborts.
int num_total = 0, num_ok = 0, num_failure = 0, num_aborted = 0;
for (size_t i = 0; i < kNumRpcs; ++i) {
grpc_millis t0 = NowFromCycleCounter();
SendRpcAndCount(&num_total, &num_ok, &num_failure, &num_aborted,
RpcOptions(), "Fault injected");
grpc_millis t1 = NowFromCycleCounter();
EXPECT_GE(t1, t0 + kFixedDelayNanos / 1000 / 1000);
int num_aborted = 0;
RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMilliseconds);
std::vector<ConcurrentRpc> rpcs =
SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
for (auto& rpc : rpcs) {
EXPECT_GE(rpc.elapsed_time, kFixedDelaySeconds * 1000);
if (rpc.status.error_code() == grpc::OK) continue;
EXPECT_EQ("Fault injected", rpc.status.error_message());
++num_aborted;
}
EXPECT_EQ(kNumRpcs, num_total);
EXPECT_EQ(0, num_failure);
// The abort rate should be roughly equal to the expectation.
const double seen_abort_rate = static_cast<double>(num_aborted) / kNumRpcs;
EXPECT_THAT(seen_abort_rate,
::testing::AllOf(::testing::Ge(kAbortRate - kErrorTolerance),
::testing::Le(kAbortRate + kErrorTolerance)));
::testing::DoubleNear(kAbortRate, kErrorTolerance));
}
TEST_P(FaultInjectionTest, XdsFaultInjectionMaxFault) {
const uint32_t kMaxFault = 10;
const uint32_t kNumRpcs = 30; // kNumRpcs should be bigger than kMaxFault
const uint32_t kRpcTimeoutMs = 2000; // 2 seconds
const uint32_t kRpcTimeoutMs = 4000; // 4 seconds
const uint32_t kLongDelaySeconds = 100; // 100 seconds
const uint32_t kAlwaysDelayPercentage = 100;
SetNextResolution({});
@ -10434,25 +10455,17 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionMaxFault) {
SetFilterConfig(http_fault);
// Sends a batch of long running RPCs with long timeout to consume all
// active faults quota.
int num_ok = 0, num_delayed = 0;
LongRunningRpc rpcs[kNumRpcs];
int num_delayed = 0;
RpcOptions rpc_options = RpcOptions().set_timeout_ms(kRpcTimeoutMs);
for (size_t i = 0; i < kNumRpcs; ++i) {
rpcs[i].StartRpc(stub_.get(), rpc_options);
}
for (size_t i = 0; i < kNumRpcs; ++i) {
Status status = rpcs[i].GetStatus();
if (status.ok()) {
++num_ok;
} else {
EXPECT_EQ(StatusCode::DEADLINE_EXCEEDED, status.error_code());
++num_delayed;
}
std::vector<ConcurrentRpc> rpcs =
SendConcurrentRpcs(stub_.get(), kNumRpcs, rpc_options);
for (auto& rpc : rpcs) {
if (rpc.status.error_code() == grpc::OK) continue;
EXPECT_EQ(StatusCode::DEADLINE_EXCEEDED, rpc.status.error_code());
++num_delayed;
}
// Only kMaxFault number of RPC should be fault injected..
EXPECT_EQ(kMaxFault, num_delayed);
// Other RPCs should be ok.
EXPECT_EQ(kNumRpcs - kMaxFault, num_ok);
}
class BootstrapSourceTest : public XdsEnd2endTest {

Loading…
Cancel
Save