From b97693d57231a4266742d9e33d1c7c576f96b65e Mon Sep 17 00:00:00 2001 From: Lidi Zheng Date: Fri, 9 Jul 2021 10:33:43 -0700 Subject: [PATCH] [Roll Forward] Tighten the error tolerance requirement by 100x (#26626) * Tighten the error tolerance requirement by 10x * Make it 5 sigma instead of 4.5 * Rewrap comments * Loosen the max concurrent requests in certain test cases --- test/cpp/end2end/xds_end2end_test.cc | 55 ++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/test/cpp/end2end/xds_end2end_test.cc b/test/cpp/end2end/xds_end2end_test.cc index 7e49da8d420..e9f4c6815db 100644 --- a/test/cpp/end2end/xds_end2end_test.cc +++ b/test/cpp/end2end/xds_end2end_test.cc @@ -1615,28 +1615,25 @@ grpc_millis NowFromCycleCounter() { return grpc_cycle_counter_to_millis_round_up(now); } -// Returns the number of RPCs needed to pass error_tolerance at 99.995% chance. -// Rolling dices in drop/fault-injection generates a binomial distribution (if -// our code is not horribly wrong). Let's make "n" the number of samples, "p" -// the probabilty. If we have np>5 & n(1-p)>5, we can approximately treat the -// binomial distribution as a normal distribution. +// Returns the number of RPCs needed to pass error_tolerance at 99.99994% +// chance. Rolling dices in drop/fault-injection generates a binomial +// distribution (if our code is not horribly wrong). Let's make "n" the number +// of samples, "p" the probabilty. If we have np>5 & n(1-p)>5, we can +// approximately treat the binomial distribution as a normal distribution. // // For normal distribution, we can easily look up how many standard deviation we // need to reach 99.995%. Based on Wiki's table -// https://en.wikipedia.org/wiki/Standard_normal_table, we need 4.00 sigma -// (standard deviation) to cover the probability area of 99.995%. In another -// word, for a sample with size "n" probability "p" error-tolerance "k", we want -// the error always land within 4.00 sigma. The sigma of binominal distribution -// and be computed as sqrt(np(1-p)). Hence, we have the equation: +// https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule, we need 5.00 +// sigma (standard deviation) to cover the probability area of 99.99994%. In +// another word, for a sample with size "n" probability "p" error-tolerance "k", +// we want the error always land within 5.00 sigma. The sigma of binominal +// distribution and be computed as sqrt(np(1-p)). Hence, we have the equation: // -// kn <= 4.00 * sqrt(np(1-p)) -// -// E.g., with p=0.5 k=0.1, n >= 400; with p=0.5 k=0.05, n >= 1600; with p=0.5 -// k=0.01, n >= 40000. +// kn <= 5.00 * sqrt(np(1-p)) size_t ComputeIdealNumRpcs(double p, double error_tolerance) { GPR_ASSERT(p >= 0 && p <= 1); size_t num_rpcs = - ceil(p * (1 - p) * 4.00 * 4.00 / error_tolerance / error_tolerance); + ceil(p * (1 - p) * 5.00 * 5.00 / error_tolerance / error_tolerance); gpr_log(GPR_INFO, "Sending %" PRIuPTR " RPCs for percentage=%.3f error_tolerance=%.3f", num_rpcs, p, error_tolerance); @@ -11537,6 +11534,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) { const double kDelayRate = kDelayPercentagePerHundred / 100.0; const double kErrorTolerance = 0.05; const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance); + const size_t kMaxConcurrentRequests = kNumRpcs; SetNextResolution({}); SetNextResolutionForLbChannelAllBalancers(); // Create an EDS resource @@ -11545,6 +11543,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) { }); balancers_[0]->ads_service()->SetEdsResource( BuildEdsResource(args, DefaultEdsServiceName())); + // Loosen the max concurrent request limit + Cluster cluster = default_cluster_; + auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds(); + threshold->set_priority(RoutingPriority::DEFAULT); + threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests); + balancers_[0]->ads_service()->SetCdsResource(cluster); // Construct the fault injection filter config HTTPFault http_fault; auto* delay_percentage = http_fault.mutable_delay()->mutable_percentage(); @@ -11580,6 +11584,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) { const double kDelayRate = kDelayPercentage / 100.0; const double kErrorTolerance = 0.05; const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance); + const size_t kMaxConcurrentRequests = kNumRpcs; SetNextResolution({}); SetNextResolutionForLbChannelAllBalancers(); // Create an EDS resource @@ -11588,6 +11593,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) { }); balancers_[0]->ads_service()->SetEdsResource( BuildEdsResource(args, DefaultEdsServiceName())); + // Loosen the max concurrent request limit + Cluster cluster = default_cluster_; + auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds(); + threshold->set_priority(RoutingPriority::DEFAULT); + threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests); + balancers_[0]->ads_service()->SetCdsResource(cluster); // Construct the fault injection filter config HTTPFault http_fault; http_fault.mutable_delay()->mutable_header_delay(); @@ -11628,6 +11639,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) { 10 * 1000; // 10s should not reach const double kErrorTolerance = 0.05; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); + const size_t kMaxConcurrentRequests = kNumRpcs; SetNextResolution({}); SetNextResolutionForLbChannelAllBalancers(); // Create an EDS resource @@ -11636,6 +11648,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) { }); balancers_[0]->ads_service()->SetEdsResource( BuildEdsResource(args, DefaultEdsServiceName())); + // Loosen the max concurrent request limit + Cluster cluster = default_cluster_; + auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds(); + threshold->set_priority(RoutingPriority::DEFAULT); + threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests); + balancers_[0]->ads_service()->SetCdsResource(cluster); // Construct the fault injection filter config HTTPFault http_fault; auto* abort_percentage = http_fault.mutable_abort()->mutable_percentage(); @@ -11685,6 +11703,7 @@ TEST_P(FaultInjectionTest, 10 * 1000; // 10s should not reach const double kErrorTolerance = 0.05; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); + const size_t kMaxConcurrentRequests = kNumRpcs; SetNextResolution({}); SetNextResolutionForLbChannelAllBalancers(); // Create an EDS resource @@ -11693,6 +11712,12 @@ TEST_P(FaultInjectionTest, }); balancers_[0]->ads_service()->SetEdsResource( BuildEdsResource(args, DefaultEdsServiceName())); + // Loosen the max concurrent request limit + Cluster cluster = default_cluster_; + auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds(); + threshold->set_priority(RoutingPriority::DEFAULT); + threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests); + balancers_[0]->ads_service()->SetCdsResource(cluster); // Construct the fault injection filter config HTTPFault http_fault; auto* abort_percentage = http_fault.mutable_abort()->mutable_percentage();