|
|
|
@ -31,7 +31,50 @@ |
|
|
|
|
namespace grpc_core { |
|
|
|
|
|
|
|
|
|
namespace { |
|
|
|
|
|
|
|
|
|
constexpr uint16_t kMaxWeight = std::numeric_limits<uint16_t>::max(); |
|
|
|
|
|
|
|
|
|
// Assumsing the mean of all known weights is M, StaticStrideScheduler will cap
|
|
|
|
|
// from above all known weights that are bigger than M*kMaxRatio (to
|
|
|
|
|
// M*kMaxRatio).
|
|
|
|
|
//
|
|
|
|
|
// This is done to limit the number of rounds for picks.
|
|
|
|
|
constexpr double kMaxRatio = 10; |
|
|
|
|
|
|
|
|
|
// Assumsing the mean of all known weights is M, StaticStrideScheduler will cap
|
|
|
|
|
// from below all known weights to M*kMinRatio.
|
|
|
|
|
//
|
|
|
|
|
// This is done as a performance optimization for edge cases when channels with
|
|
|
|
|
// large weights are non-accepting (and thus WeightedRoundRobin will retry
|
|
|
|
|
// picking them over and over again), and there are also channels with near-zero
|
|
|
|
|
// weights that are possibly accepting. In this case, without kMinRatio, it
|
|
|
|
|
// would potentially require WeightedRoundRobin to perform thousands of picks
|
|
|
|
|
// until it gets a single channel with near-zero weight. This was a part of what
|
|
|
|
|
// hapenned in b/276292666.
|
|
|
|
|
//
|
|
|
|
|
// The current value of 0.01 was chosen without any experimenting. It should
|
|
|
|
|
// ensure that WeightedRoundRobin doesn't do much more than an order of 100
|
|
|
|
|
// picks of non-accepting channels with high weights in such corner cases. But
|
|
|
|
|
// it also makes WeightedRoundRobin to send slightly more requests to
|
|
|
|
|
// potentially very bad tasks (that would have near-zero weights) than zero.
|
|
|
|
|
// This is not necesserily a downside, though. Perhaps this is not a problem at
|
|
|
|
|
// all and we should increase this value (to 0.05 or 0.1) to save CPU cycles.
|
|
|
|
|
//
|
|
|
|
|
// Note that this class treats weights that are exactly equal to zero as unknown
|
|
|
|
|
// and thus needing to be replaced with M. This behavior itself makes sense
|
|
|
|
|
// (fresh channels without feedback information will get an average flow of
|
|
|
|
|
// requests). However, it follows from this that this class will replace weight
|
|
|
|
|
// = 0 with M, but weight = epsilon with M*kMinRatio, and this step function is
|
|
|
|
|
// logically faulty. A demonstration of this is that the function that computes
|
|
|
|
|
// weights in WeightedRoundRobin
|
|
|
|
|
// (http://google3/production/rpc/stubs/core/loadbalancing/weightedroundrobin.cc;l=324-325;rcl=514986476)
|
|
|
|
|
// will cap some epsilon values to zero. There should be a clear distinction
|
|
|
|
|
// between "task is new, weight is unknown" and "task is unhealthy, weight is
|
|
|
|
|
// very low". A better solution would be to not mix "unknown" and "weight" into
|
|
|
|
|
// a single value but represent weights as std::optional<float> or, if memory
|
|
|
|
|
// usage is a concern, use NaN as the indicator of unknown weight.
|
|
|
|
|
constexpr double kMinRatio = 0.01; |
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make( |
|
|
|
@ -45,10 +88,10 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make( |
|
|
|
|
const size_t n = float_weights.size(); |
|
|
|
|
size_t num_zero_weight_channels = 0; |
|
|
|
|
double sum = 0; |
|
|
|
|
float max = 0; |
|
|
|
|
float unscaled_max = 0; |
|
|
|
|
for (const float weight : float_weights) { |
|
|
|
|
sum += weight; |
|
|
|
|
max = std::max(max, weight); |
|
|
|
|
unscaled_max = std::max(unscaled_max, weight); |
|
|
|
|
if (weight == 0) { |
|
|
|
|
++num_zero_weight_channels; |
|
|
|
|
} |
|
|
|
@ -59,6 +102,13 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make( |
|
|
|
|
// Mean of non-zero weights before scaling to `kMaxWeight`.
|
|
|
|
|
const double unscaled_mean = |
|
|
|
|
sum / static_cast<double>(n - num_zero_weight_channels); |
|
|
|
|
const double ratio = unscaled_max / unscaled_mean; |
|
|
|
|
|
|
|
|
|
// Adjust max value such that ratio does not exceed kMaxRatio. This should
|
|
|
|
|
// ensure that we on average do at most kMaxRatio rounds for picks.
|
|
|
|
|
if (ratio > kMaxRatio) { |
|
|
|
|
unscaled_max = kMaxRatio * unscaled_mean; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Scale weights such that the largest is equal to `kMaxWeight`. This should
|
|
|
|
|
// be accurate enough once we convert to an integer. Quantisation errors won't
|
|
|
|
@ -66,15 +116,35 @@ absl::optional<StaticStrideScheduler> StaticStrideScheduler::Make( |
|
|
|
|
// TODO(b/190488683): it may be more stable over updates if we try to keep
|
|
|
|
|
// `scaling_factor` consistent, and only change it when we can't accurately
|
|
|
|
|
// represent the new weights.
|
|
|
|
|
const double scaling_factor = kMaxWeight / max; |
|
|
|
|
const double scaling_factor = kMaxWeight / unscaled_max; |
|
|
|
|
|
|
|
|
|
// Note that since we cap the weights to stay within kMaxRatio, `mean` might
|
|
|
|
|
// not match the actual mean of the values that end up in the scheduler.
|
|
|
|
|
const uint16_t mean = std::lround(scaling_factor * unscaled_mean); |
|
|
|
|
|
|
|
|
|
// We compute weight_lower_bound and cap it to 1 from below so that in the
|
|
|
|
|
// worst case we represent tiny weights as 1 but not as 0 (which would cause
|
|
|
|
|
// an infinite loop as in b/276292666). This capping to 1 is probably only
|
|
|
|
|
// useful in case someone misconfigures kMinRatio to be very small.
|
|
|
|
|
//
|
|
|
|
|
// NOMUTANTS -- We have tests for this expression, but they are not precise
|
|
|
|
|
// enough to catch errors of plus/minus 1, what mutation testing does.
|
|
|
|
|
const uint16_t weight_lower_bound = |
|
|
|
|
std::max(static_cast<uint16_t>(1), |
|
|
|
|
static_cast<uint16_t>(std::lround(mean * kMinRatio))); |
|
|
|
|
|
|
|
|
|
std::vector<uint16_t> weights; |
|
|
|
|
weights.reserve(n); |
|
|
|
|
for (size_t i = 0; i < n; ++i) { |
|
|
|
|
weights.push_back(float_weights[i] == 0 |
|
|
|
|
? mean |
|
|
|
|
: std::lround(float_weights[i] * scaling_factor)); |
|
|
|
|
if (float_weights[i] == 0) { // Weight is unknown.
|
|
|
|
|
weights.push_back(mean); |
|
|
|
|
} else { |
|
|
|
|
const double float_weight_capped_from_above = |
|
|
|
|
std::min(float_weights[i], unscaled_max); |
|
|
|
|
const uint16_t weight = |
|
|
|
|
std::lround(float_weight_capped_from_above * scaling_factor); |
|
|
|
|
weights.push_back(std::max(weight, weight_lower_bound)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
GPR_ASSERT(weights.size() == float_weights.size()); |
|
|
|
|