From 02df22f52f85916d026f08ab449c720da3faaa9d Mon Sep 17 00:00:00 2001 From: Yash Tibrewal Date: Thu, 18 Aug 2022 15:34:55 -0700 Subject: [PATCH] FaultInjection: Fix random number generation (#30623) * FaultInjection: Fix random number generation * Put random generation under a mutex * Fix IWYU * Regenerate projects * Modify timeouts * Dbg build knobs * Remove unnecessary slowdown factor * Tune error tolerance and add note on broken computation of ComputeIdealNumRpcs --- BUILD | 2 ++ .../fault_injection/fault_injection_filter.cc | 33 ++++++++++++------- .../fault_injection/fault_injection_filter.h | 8 +++++ test/cpp/end2end/xds/xds_end2end_test_lib.h | 3 ++ .../xds/xds_fault_injection_end2end_test.cc | 16 ++++----- tools/distrib/fix_build_deps.py | 2 ++ 6 files changed, 44 insertions(+), 20 deletions(-) diff --git a/BUILD b/BUILD index 2cdd312dab6..6c801fda52d 100644 --- a/BUILD +++ b/BUILD @@ -3842,7 +3842,9 @@ grpc_cc_library( "src/core/ext/filters/fault_injection/service_config_parser.h", ], external_deps = [ + "absl/base:core_headers", "absl/memory", + "absl/random", "absl/status", "absl/status:statusor", "absl/strings", diff --git a/src/core/ext/filters/fault_injection/fault_injection_filter.cc b/src/core/ext/filters/fault_injection/fault_injection_filter.cc index f2113a282d9..8011ac59d2a 100644 --- a/src/core/ext/filters/fault_injection/fault_injection_filter.cc +++ b/src/core/ext/filters/fault_injection/fault_injection_filter.cc @@ -19,7 +19,6 @@ #include "src/core/ext/filters/fault_injection/fault_injection_filter.h" #include -#include #include #include @@ -70,12 +69,14 @@ auto AsInt(absl::string_view s) -> absl::optional { return absl::nullopt; } -inline bool UnderFraction(const uint32_t numerator, +inline bool UnderFraction(absl::InsecureBitGen* rand_generator, + const uint32_t numerator, const uint32_t denominator) { if (numerator <= 0) return false; if (numerator >= denominator) return true; // Generate a random number in [0, denominator). - const uint32_t random_number = rand() % denominator; + const uint32_t random_number = + absl::Uniform(absl::IntervalClosedOpen, *rand_generator, 0u, denominator); return random_number < numerator; } @@ -139,7 +140,8 @@ FaultInjectionFilter::FaultInjectionFilter(ChannelFilter::Args filter_args) filter_args.channel_stack(), filter_args.uninitialized_channel_element())), service_config_parser_index_( - FaultInjectionServiceConfigParser::ParserIndex()) {} + FaultInjectionServiceConfigParser::ParserIndex()), + mu_(new Mutex) {} // Construct a promise for one call. ArenaPromise FaultInjectionFilter::MakeCallPromise( @@ -219,14 +221,21 @@ FaultInjectionFilter::MakeInjectionDecision( } } // Roll the dice - const bool delay_request = - delay != Duration::Zero() && - UnderFraction(delay_percentage_numerator, - fi_policy->delay_percentage_denominator); - const bool abort_request = - abort_code != GRPC_STATUS_OK && - UnderFraction(abort_percentage_numerator, - fi_policy->abort_percentage_denominator); + bool delay_request = delay != Duration::Zero(); + bool abort_request = abort_code != GRPC_STATUS_OK; + if (delay_request || abort_request) { + MutexLock lock(mu_.get()); + if (delay_request) { + delay_request = + UnderFraction(&delay_rand_generator_, delay_percentage_numerator, + fi_policy->delay_percentage_denominator); + } + if (abort_request) { + abort_request = + UnderFraction(&abort_rand_generator_, abort_percentage_numerator, + fi_policy->abort_percentage_denominator); + } + } return InjectionDecision( fi_policy->max_faults, delay_request ? delay : Duration::Zero(), diff --git a/src/core/ext/filters/fault_injection/fault_injection_filter.h b/src/core/ext/filters/fault_injection/fault_injection_filter.h index 8ac7ab9585c..611c3d7f45b 100644 --- a/src/core/ext/filters/fault_injection/fault_injection_filter.h +++ b/src/core/ext/filters/fault_injection/fault_injection_filter.h @@ -21,11 +21,16 @@ #include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/random/random.h" #include "absl/status/statusor.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/channel_fwd.h" #include "src/core/lib/channel/promise_based_filter.h" +#include "src/core/lib/gprpp/sync.h" #include "src/core/lib/promise/arena_promise.h" #include "src/core/lib/transport/transport.h" @@ -60,6 +65,9 @@ class FaultInjectionFilter : public ChannelFilter { // The relative index of instances of the same filter. size_t index_; const size_t service_config_parser_index_; + std::unique_ptr mu_; + absl::InsecureBitGen abort_rand_generator_ ABSL_GUARDED_BY(mu_); + absl::InsecureBitGen delay_rand_generator_ ABSL_GUARDED_BY(mu_); }; } // namespace grpc_core diff --git a/test/cpp/end2end/xds/xds_end2end_test_lib.h b/test/cpp/end2end/xds/xds_end2end_test_lib.h index 9b7b4d1a367..7ed7a5a672a 100644 --- a/test/cpp/end2end/xds/xds_end2end_test_lib.h +++ b/test/cpp/end2end/xds/xds_end2end_test_lib.h @@ -1025,6 +1025,9 @@ class XdsEnd2endTest : public ::testing::TestWithParam { // the equation: // // kn <= 5.00 * sqrt(np(1-p)) + // TODO(yashykt): The above explanation assumes a normal distribution, but we + // use a uniform distribution instead. We need a better estimate of how many + // RPCs are needed with what error tolerance. static size_t ComputeIdealNumRpcs(double p, double error_tolerance) { GPR_ASSERT(p >= 0 && p <= 1); size_t num_rpcs = diff --git a/test/cpp/end2end/xds/xds_fault_injection_end2end_test.cc b/test/cpp/end2end/xds/xds_fault_injection_end2end_test.cc index 72700576f38..96591d49f3e 100644 --- a/test/cpp/end2end/xds/xds_fault_injection_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_fault_injection_end2end_test.cc @@ -148,7 +148,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbort) { CreateAndStartBackends(1); const uint32_t kAbortPercentagePerHundred = 50; const double kAbortRate = kAbortPercentagePerHundred / 100.0; - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); // Create an EDS resource EdsResourceArgs args({{"locality0", CreateEndpointsForBackends()}}); @@ -176,7 +176,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) { const uint32_t kAbortPercentageCap = 100; const uint32_t kAbortPercentage = 50; const double kAbortRate = kAbortPercentage / 100.0; - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); // Create an EDS resource EdsResourceArgs args({{"locality0", CreateEndpointsForBackends()}}); @@ -204,11 +204,11 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageAbortViaHeaders) { TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) { CreateAndStartBackends(1); - const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000; + const uint32_t kRpcTimeoutMilliseconds = 10000; const uint32_t kFixedDelaySeconds = 100; const uint32_t kDelayPercentagePerHundred = 50; const double kDelayRate = kDelayPercentagePerHundred / 100.0; - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance); const size_t kMaxConcurrentRequests = kNumRpcs; // Create an EDS resource @@ -250,11 +250,11 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelay) { TEST_P(FaultInjectionTest, XdsFaultInjectionPercentageDelayViaHeaders) { CreateAndStartBackends(1); const uint32_t kFixedDelayMilliseconds = 100000; - const uint32_t kRpcTimeoutMilliseconds = grpc_test_slowdown_factor() * 3000; + const uint32_t kRpcTimeoutMilliseconds = 10000; const uint32_t kDelayPercentageCap = 100; const uint32_t kDelayPercentage = 50; const double kDelayRate = kDelayPercentage / 100.0; - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kDelayRate, kErrorTolerance); const size_t kMaxConcurrentRequests = kNumRpcs; // Create an EDS resource @@ -338,7 +338,7 @@ TEST_P(FaultInjectionTest, XdsFaultInjectionAlwaysDelayPercentageAbort) { const uint32_t kRpcTimeoutMilliseconds = 100 * 1000; // 100s should not reach const uint32_t kConnectionTimeoutMilliseconds = 10 * 1000; // 10s should not reach - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); const size_t kMaxConcurrentRequests = kNumRpcs; // Create an EDS resource @@ -399,7 +399,7 @@ TEST_P(FaultInjectionTest, const uint32_t kRpcTimeoutMilliseconds = 100 * 1000; // 100s should not reach const uint32_t kConnectionTimeoutMilliseconds = 10 * 1000; // 10s should not reach - const double kErrorTolerance = 0.05; + const double kErrorTolerance = 0.1; const size_t kNumRpcs = ComputeIdealNumRpcs(kAbortRate, kErrorTolerance); const size_t kMaxConcurrentRequests = kNumRpcs; // Create an EDS resource diff --git a/tools/distrib/fix_build_deps.py b/tools/distrib/fix_build_deps.py index cff53e5dc15..d907d195e94 100755 --- a/tools/distrib/fix_build_deps.py +++ b/tools/distrib/fix_build_deps.py @@ -85,6 +85,8 @@ EXTERNAL_DEPS = { 'absl/meta:type_traits', 'absl/random/random.h': 'absl/random', + 'absl/random/distributions.h': + 'absl/random:distributions', 'absl/status/status.h': 'absl/status', 'absl/status/statusor.h':