[WRR] fix some edge cases in scheduler logic (#33045)

This corresponds to two recent changes made to our internal implementation. See b/276292666 for details.
2 years ago · e872fb91d9
parent f1bba530a5
commit e872fb91d9
2 changed files with 114 additions and 6 deletions
--- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc
+++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc
@ -31,7 +31,50 @@
 namespace grpc_core {

 namespace {
+
 constexpr uint16_t kMaxWeight = std::numeric_limits<uint16_t>::max();
+
+// Assumsing the mean of all known weights is M, StaticStrideScheduler will cap
+// from above all known weights that are bigger than M*kMaxRatio (to
+// M*kMaxRatio).
+//
+// This is done to limit the number of rounds for picks.
+constexpr double kMaxRatio = 10;
+
+// Assumsing the mean of all known weights is M, StaticStrideScheduler will cap
+// from below all known weights to M*kMinRatio.
+//
+// This is done as a performance optimization for edge cases when channels with
+// large weights are non-accepting (and thus WeightedRoundRobin will retry
+// picking them over and over again), and there are also channels with near-zero
+// weights that are possibly accepting. In this case, without kMinRatio, it
+// would potentially require WeightedRoundRobin to perform thousands of picks
+// until it gets a single channel with near-zero weight. This was a part of what
+// hapenned in b/276292666.
+//
+// The current value of 0.01 was chosen without any experimenting. It should
+// ensure that WeightedRoundRobin doesn't do much more than an order of 100
+// picks of non-accepting channels with high weights in such corner cases. But
+// it also makes WeightedRoundRobin to send slightly more requests to
+// potentially very bad tasks (that would have near-zero weights) than zero.
+// This is not necesserily a downside, though. Perhaps this is not a problem at
+// all and we should increase this value (to 0.05 or 0.1) to save CPU cycles.
+//
+// Note that this class treats weights that are exactly equal to zero as unknown
+// and thus needing to be replaced with M. This behavior itself makes sense
+// (fresh channels without feedback information will get an average flow of
+// requests). However, it follows from this that this class will replace weight
+// = 0 with M, but weight = epsilon with M*kMinRatio, and this step function is
+// logically faulty. A demonstration of this is that the function that computes
+// weights in WeightedRoundRobin
+// (http://google3/production/rpc/stubs/core/loadbalancing/weightedroundrobin.cc;l=324-325;rcl=514986476)
+// will cap some epsilon values to zero. There should be a clear distinction
+// between "task is new, weight is unknown" and "task is unhealthy, weight is
+// very low". A better solution would be to not mix "unknown" and "weight" into
+// a single value but represent weights as std::optional<float> or, if memory
+// usage is a concern, use NaN as the indicator of unknown weight.
+constexpr double kMinRatio = 0.01;
+
 }  // namespace

 absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
@ -45,10 +88,10 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
  const size_t n = float_weights.size();
  size_t num_zero_weight_channels = 0;
  double sum = 0;
-  float max = 0;
+  float unscaled_max = 0;
  for (const float weight : float_weights) {
    sum += weight;
-    max = std::max(max, weight);
+    unscaled_max = std::max(unscaled_max, weight);
    if (weight == 0) {
      ++num_zero_weight_channels;
    }
@ -59,6 +102,13 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
  // Mean of non-zero weights before scaling to `kMaxWeight`.
  const double unscaled_mean =
      sum / static_cast<double>(n - num_zero_weight_channels);
+  const double ratio = unscaled_max / unscaled_mean;
+
+  // Adjust max value such that ratio does not exceed kMaxRatio. This should
+  // ensure that we on average do at most kMaxRatio rounds for picks.
+  if (ratio > kMaxRatio) {
+    unscaled_max = kMaxRatio * unscaled_mean;
+  }

  // Scale weights such that the largest is equal to `kMaxWeight`. This should
  // be accurate enough once we convert to an integer. Quantisation errors won't
@ -66,15 +116,35 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make(
  // TODO(b/190488683): it may be more stable over updates if we try to keep
  // `scaling_factor` consistent, and only change it when we can't accurately
  // represent the new weights.
-  const double scaling_factor = kMaxWeight / max;
+  const double scaling_factor = kMaxWeight / unscaled_max;
+
+  // Note that since we cap the weights to stay within kMaxRatio, `mean` might
+  // not match the actual mean of the values that end up in the scheduler.
  const uint16_t mean = std::lround(scaling_factor * unscaled_mean);

+  // We compute weight_lower_bound and cap it to 1 from below so that in the
+  // worst case we represent tiny weights as 1 but not as 0 (which would cause
+  // an infinite loop as in b/276292666). This capping to 1 is probably only
+  // useful in case someone misconfigures kMinRatio to be very small.
+  //
+  // NOMUTANTS -- We have tests for this expression, but they are not precise
+  // enough to catch errors of plus/minus 1, what mutation testing does.
+  const uint16_t weight_lower_bound =
+      std::max(static_cast<uint16_t>(1),
+               static_cast<uint16_t>(std::lround(mean * kMinRatio)));
+
  std::vector<uint16_t> weights;
  weights.reserve(n);
  for (size_t i = 0; i < n; ++i) {
-    weights.push_back(float_weights[i] == 0
-                          ? mean
-                          : std::lround(float_weights[i] * scaling_factor));
+    if (float_weights[i] == 0) {  // Weight is unknown.
+      weights.push_back(mean);
+    } else {
+      const double float_weight_capped_from_above =
+          std::min(float_weights[i], unscaled_max);
+      const uint16_t weight =
+          std::lround(float_weight_capped_from_above * scaling_factor);
+      weights.push_back(std::max(weight, weight_lower_bound));
+    }
  }

  GPR_ASSERT(weights.size() == float_weights.size());
--- a/test/core/client_channel/lb_policy/static_stride_scheduler_test.cc
+++ b/test/core/client_channel/lb_policy/static_stride_scheduler_test.cc
@ -187,6 +187,44 @@ TEST(StaticStrideSchedulerTest, LargestIsPickedEveryGeneration) {
  EXPECT_EQ(largest_weight_pick_count, kMaxWeight);
 }

+TEST(StaticStrideSchedulerTest, MaxIsClampedForHighRatio) {
+  uint32_t sequence = 0;
+  const std::vector<float> weights{81, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                   1,  1, 1, 1, 1, 1, 1, 1, 1, 1};
+  const absl::optional<StaticStrideScheduler> scheduler =
+      StaticStrideScheduler::Make(absl::MakeSpan(weights),
+                                  [&] { return sequence++; });
+  ASSERT_TRUE(scheduler.has_value());
+
+  // max gets clamped to mean*maxRatio = 50 for this set of weights. So if we
+  // pick 50 + 19 times we should get all possible picks.
+  std::vector<int> picks(weights.size());
+  for (int i = 0; i < 69; ++i) {
+    ++picks[scheduler->Pick()];
+  }
+  EXPECT_THAT(picks, ElementsAre(50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                 1, 1, 1, 1, 1));
+}
+
+TEST(StaticStrideSchedulerTest, MinIsClampedForHighRatio) {
+  uint32_t sequence = 0;
+  const std::vector<float> weights{100, 1e-10};
+  const absl::optional<StaticStrideScheduler> scheduler =
+      StaticStrideScheduler::Make(absl::MakeSpan(weights),
+                                  [&] { return sequence++; });
+  ASSERT_TRUE(scheduler.has_value());
+
+  // We pick 201 elements and ensure that the second channel (with epsilon
+  // weight) also gets picked. The math is: mean value of elements is ~50, so
+  // the first channel keeps its weight of 100, but the second element's weight
+  // gets capped from below to 50*0.01 = 0.5.
+  std::vector<int> picks(weights.size());
+  for (int i = 0; i < 201; ++i) {
+    ++picks[scheduler->Pick()];
+  }
+  EXPECT_THAT(picks, ElementsAre(200, 1));
+}
+
 }  // namespace
 }  // namespace grpc_core