[Roll Forward] Tighten the error tolerance requirement by 100x (#26626)

* Tighten the error tolerance requirement by 10x

* Make it 5 sigma instead of 4.5

* Rewrap comments

* Loosen the max concurrent requests in certain test cases
pull/26645/head
Lidi Zheng 4 years ago committed by GitHub
parent dc6f6e36fc
commit b97693d572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 55
      test/cpp/end2end/xds_end2end_test.cc

@ -1615,28 +1615,25 @@ grpc_millis NowFromCycleCounter() {
return grpc_cycle_counter_to_millis_round_up(now);
}
// Returns the number of RPCs needed to pass error_tolerance at 99.995% chance.
// Rolling dices in drop/fault-injection generates a binomial distribution (if
// our code is not horribly wrong). Let's make "n" the number of samples, "p"
// the probabilty. If we have np>5 & n(1-p)>5, we can approximately treat the
// binomial distribution as a normal distribution.
// Returns the number of RPCs needed to pass error_tolerance at 99.99994%
// chance. Rolling dices in drop/fault-injection generates a binomial
// distribution (if our code is not horribly wrong). Let's make "n" the number
// of samples, "p" the probabilty. If we have np>5 & n(1-p)>5, we can
// approximately treat the binomial distribution as a normal distribution.
//
// For normal distribution, we can easily look up how many standard deviation we
// need to reach 99.995%. Based on Wiki's table
// https://en.wikipedia.org/wiki/Standard_normal_table, we need 4.00 sigma
// (standard deviation) to cover the probability area of 99.995%. In another
// word, for a sample with size "n" probability "p" error-tolerance "k", we want
// the error always land within 4.00 sigma. The sigma of binominal distribution
// and be computed as sqrt(np(1-p)). Hence, we have the equation:
// https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule, we need 5.00
// sigma (standard deviation) to cover the probability area of 99.99994%. In
// another word, for a sample with size "n" probability "p" error-tolerance "k",
// we want the error always land within 5.00 sigma. The sigma of binominal
// distribution and be computed as sqrt(np(1-p)). Hence, we have the equation:
//
// kn <= 4.00 * sqrt(np(1-p))
//
// E.g., with p=0.5 k=0.1, n >= 400; with p=0.5 k=0.05, n >= 1600; with p=0.5
// k=0.01, n >= 40000.
// kn <= 5.00 * sqrt(np(1-p))
size_t ComputeIdealNumRpcs(double p, double error_tolerance) {
GPR_ASSERT(p >= 0 && p <= 1);
size_t num_rpcs =
ceil(p * (1 - p) * 4.00 * 4.00 / error_tolerance / error_tolerance);
ceil(p * (1 - p) * 5.00 * 5.00 / error_tolerance / error_tolerance);
gpr_log(GPR_INFO,
"Sending %" PRIuPTR " RPCs for percentage=%.3f error_tolerance=%.3f",
num_rpcs, p, error_tolerance);
@ -11537,6 +11534,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
const double kDelayRate = kDelayPercentagePerHundred / 100.0;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
const size_t kMaxConcurrentRequests = kNumRpcs;
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -11545,6 +11543,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) {
});
balancers_[0]->ads_service()->SetEdsResource(
BuildEdsResource(args, DefaultEdsServiceName()));
// Loosen the max concurrent request limit
Cluster cluster = default_cluster_;
auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds();
threshold->set_priority(RoutingPriority::DEFAULT);
threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests);
balancers_[0]->ads_service()->SetCdsResource(cluster);
// Construct the fault injection filter config
HTTPFault http_fault;
auto* delay_percentage = http_fault.mutable_delay()->mutable_percentage();
@ -11580,6 +11584,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
const double kDelayRate = kDelayPercentage / 100.0;
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance);
const size_t kMaxConcurrentRequests = kNumRpcs;
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -11588,6 +11593,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) {
});
balancers_[0]->ads_service()->SetEdsResource(
BuildEdsResource(args, DefaultEdsServiceName()));
// Loosen the max concurrent request limit
Cluster cluster = default_cluster_;
auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds();
threshold->set_priority(RoutingPriority::DEFAULT);
threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests);
balancers_[0]->ads_service()->SetCdsResource(cluster);
// Construct the fault injection filter config
HTTPFault http_fault;
http_fault.mutable_delay()->mutable_header_delay();
@ -11628,6 +11639,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
10 * 1000; // 10s should not reach
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
const size_t kMaxConcurrentRequests = kNumRpcs;
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -11636,6 +11648,12 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) {
});
balancers_[0]->ads_service()->SetEdsResource(
BuildEdsResource(args, DefaultEdsServiceName()));
// Loosen the max concurrent request limit
Cluster cluster = default_cluster_;
auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds();
threshold->set_priority(RoutingPriority::DEFAULT);
threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests);
balancers_[0]->ads_service()->SetCdsResource(cluster);
// Construct the fault injection filter config
HTTPFault http_fault;
auto* abort_percentage = http_fault.mutable_abort()->mutable_percentage();
@ -11685,6 +11703,7 @@ TEST_P(FaultInjectionTest,
10 * 1000; // 10s should not reach
const double kErrorTolerance = 0.05;
const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance);
const size_t kMaxConcurrentRequests = kNumRpcs;
SetNextResolution({});
SetNextResolutionForLbChannelAllBalancers();
// Create an EDS resource
@ -11693,6 +11712,12 @@ TEST_P(FaultInjectionTest,
});
balancers_[0]->ads_service()->SetEdsResource(
BuildEdsResource(args, DefaultEdsServiceName()));
// Loosen the max concurrent request limit
Cluster cluster = default_cluster_;
auto* threshold = cluster.mutable_circuit_breakers()->add_thresholds();
threshold->set_priority(RoutingPriority::DEFAULT);
threshold->mutable_max_requests()->set_value(kMaxConcurrentRequests);
balancers_[0]->ads_service()->SetCdsResource(cluster);
// Construct the fault injection filter config
HTTPFault http_fault;
auto* abort_percentage = http_fault.mutable_abort()->mutable_percentage();

Loading…
Cancel
Save