use new top-level fields in LRS

pull/37467/head
Mark D. Roth 5 months ago
parent ef8008b7fc
commit 1e8b534734
  1. 1
      BUILD
  2. 53
      src/core/xds/xds_client/lrs_client.cc
  3. 25
      src/proto/grpc/testing/xds/v3/load_report.proto
  4. 59
      test/cpp/end2end/xds/xds_cluster_end2end_test.cc
  5. 30
      test/cpp/end2end/xds/xds_server.h

@ -4415,6 +4415,7 @@ grpc_cc_library(
external_deps = [
"absl/base:core_headers",
"absl/cleanup",
"absl/functional:function_ref",
"absl/log:check",
"absl/log:log",
"absl/memory",

@ -22,6 +22,7 @@
#include <vector>
#include "absl/cleanup/cleanup.h"
#include "absl/functional/function_ref.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/strings/string_view.h"
@ -1095,21 +1096,19 @@ std::string LrsClient::CreateLrsInitialRequest() {
namespace {
void MaybeAddLoadMetric(
void MaybeAddUnnamedMetric(
const LrsApiContext& context,
envoy_config_endpoint_v3_UpstreamLocalityStats* output,
absl::string_view metric_name,
const LrsClient::ClusterLocalityStats::BackendMetric& backend_metric) {
const LrsClient::ClusterLocalityStats::BackendMetric& backend_metric,
absl::FunctionRef<envoy_config_endpoint_v3_UnnamedEndpointLoadMetricStats*(
envoy_config_endpoint_v3_UpstreamLocalityStats*, upb_Arena*)>
add_field,
envoy_config_endpoint_v3_UpstreamLocalityStats* output) {
if (backend_metric.IsZero()) return;
envoy_config_endpoint_v3_EndpointLoadMetricStats* load_metric =
envoy_config_endpoint_v3_UpstreamLocalityStats_add_load_metric_stats(
output, context.arena);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_metric_name(
load_metric, StdStringToUpbString(metric_name));
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_num_requests_finished_with_metric(
load_metric, backend_metric.num_requests_finished_with_metric);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_total_metric_value(
load_metric, backend_metric.total_metric_value);
auto* metric_proto = add_field(output, context.arena);
envoy_config_endpoint_v3_UnnamedEndpointLoadMetricStats_set_num_requests_finished_with_metric(
metric_proto, backend_metric.num_requests_finished_with_metric);
envoy_config_endpoint_v3_UnnamedEndpointLoadMetricStats_set_total_metric_value(
metric_proto, backend_metric.total_metric_value);
}
void LocalityStatsPopulate(
@ -1143,17 +1142,31 @@ void LocalityStatsPopulate(
envoy_config_endpoint_v3_UpstreamLocalityStats_set_total_issued_requests(
output, snapshot.total_issued_requests);
// Add backend metrics.
MaybeAddLoadMetric(context, output, "cpu_utilization",
snapshot.cpu_utilization);
MaybeAddLoadMetric(context, output, "mem_utilization",
snapshot.mem_utilization);
MaybeAddLoadMetric(context, output, "application_utilization",
snapshot.application_utilization);
MaybeAddUnnamedMetric(
context, snapshot.cpu_utilization,
envoy_config_endpoint_v3_UpstreamLocalityStats_mutable_cpu_utilization,
output);
MaybeAddUnnamedMetric(
context, snapshot.mem_utilization,
envoy_config_endpoint_v3_UpstreamLocalityStats_mutable_mem_utilization,
output);
MaybeAddUnnamedMetric(
context, snapshot.application_utilization,
envoy_config_endpoint_v3_UpstreamLocalityStats_mutable_application_utilization,
output);
for (const auto& p : snapshot.backend_metrics) {
const std::string& metric_name = p.first;
const LrsClient::ClusterLocalityStats::BackendMetric& metric_value =
p.second;
MaybeAddLoadMetric(context, output, metric_name, metric_value);
envoy_config_endpoint_v3_EndpointLoadMetricStats* load_metric =
envoy_config_endpoint_v3_UpstreamLocalityStats_add_load_metric_stats(
output, context.arena);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_metric_name(
load_metric, StdStringToUpbString(metric_name));
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_num_requests_finished_with_metric(
load_metric, metric_value.num_requests_finished_with_metric);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_total_metric_value(
load_metric, metric_value.total_metric_value);
}
}

@ -51,7 +51,20 @@ message UpstreamLocalityStats {
// upstream endpoints in the locality.
uint64 total_issued_requests = 8;
// Stats for multi-dimensional load balancing.
// CPU utilization stats for multi-dimensional load balancing.
// This typically comes from endpoint metrics reported via ORCA.
UnnamedEndpointLoadMetricStats cpu_utilization = 12;
// Memory utilization for multi-dimensional load balancing.
// This typically comes from endpoint metrics reported via ORCA.
UnnamedEndpointLoadMetricStats mem_utilization = 13;
// Blended application-defined utilization for multi-dimensional load balancing.
// This typically comes from endpoint metrics reported via ORCA.
UnnamedEndpointLoadMetricStats application_utilization = 14;
// Named stats for multi-dimensional load balancing.
// These typically come from endpoint metrics reported via ORCA.
repeated EndpointLoadMetricStats load_metric_stats = 5;
// Endpoint granularity stats information for this locality. This information
@ -117,6 +130,16 @@ message EndpointLoadMetricStats {
double total_metric_value = 3;
}
// Same as EndpointLoadMetricStats, except without the metric_name field.
message UnnamedEndpointLoadMetricStats {
// Number of calls that finished and included this metric.
uint64 num_requests_finished_with_metric = 1;
// Sum of metric values across all calls that finished with this metric for
// load_reporting_interval.
double total_metric_value = 2;
}
// Per cluster load stats. Envoy reports these stats a management server in a
// :ref:`LoadStatsRequest<envoy_api_msg_service.load_stats.v3.LoadStatsRequest>`
// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs.

@ -1797,6 +1797,9 @@ TEST_P(ClientLoadReportingTest, OrcaPropagation) {
::testing::Pair("locality1", ::testing::_)));
size_t num_successful_rpcs = 0;
size_t num_failed_rpcs = 0;
ClientStats::LocalityStats::LoadMetric cpu_utilization;
ClientStats::LocalityStats::LoadMetric mem_utilization;
ClientStats::LocalityStats::LoadMetric application_utilization;
std::map<std::string, ClientStats::LocalityStats::LoadMetric>
named_metrics_total;
for (const auto& p : client_stats.locality_stats()) {
@ -1806,6 +1809,9 @@ TEST_P(ClientLoadReportingTest, OrcaPropagation) {
p.second.total_successful_requests + p.second.total_error_requests);
num_successful_rpcs += p.second.total_successful_requests;
num_failed_rpcs += p.second.total_error_requests;
cpu_utilization += p.second.cpu_utilization;
mem_utilization += p.second.mem_utilization;
application_utilization += p.second.application_utilization;
for (const auto& s : p.second.load_metrics) {
named_metrics_total[s.first] += s.second;
}
@ -1813,37 +1819,32 @@ TEST_P(ClientLoadReportingTest, OrcaPropagation) {
EXPECT_EQ(num_successful_rpcs, total_successful_rpcs_sent);
EXPECT_EQ(num_failed_rpcs, total_failed_rpcs_sent);
EXPECT_EQ(num_successful_rpcs + num_failed_rpcs, total_rpcs_sent);
EXPECT_THAT(
cpu_utilization,
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) * backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.8 +
(kNumFailuresPerAddress * backends_.size()) * 0.4));
EXPECT_THAT(
mem_utilization,
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) * backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.6 +
(kNumFailuresPerAddress * backends_.size()) * 0.3));
EXPECT_THAT(
application_utilization,
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) * backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.4 +
(kNumFailuresPerAddress * backends_.size()) * 0.2));
EXPECT_THAT(
named_metrics_total,
::testing::UnorderedElementsAre(
::testing::Pair(
"named_metrics.foo",
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) *
backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 1.0 +
(kNumFailuresPerAddress * backends_.size()) * 0.3)),
::testing::Pair(
"cpu_utilization",
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) *
backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.8 +
(kNumFailuresPerAddress * backends_.size()) * 0.4)),
::testing::Pair(
"mem_utilization",
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) *
backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.6 +
(kNumFailuresPerAddress * backends_.size()) * 0.3)),
::testing::Pair(
"application_utilization",
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) *
backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 0.4 +
(kNumFailuresPerAddress * backends_.size()) * 0.2))));
::testing::UnorderedElementsAre(::testing::Pair(
"named_metrics.foo",
LoadMetricEq(
(kNumRpcsPerAddress + kNumFailuresPerAddress) * backends_.size(),
(kNumRpcsPerAddress * backends_.size()) * 1.0 +
(kNumFailuresPerAddress * backends_.size()) * 0.3))));
// The LRS service got a single request, and sent a single response.
EXPECT_EQ(1U, balancer_->lrs_service()->request_count());
EXPECT_EQ(1U, balancer_->lrs_service()->response_count());

@ -617,8 +617,19 @@ class LrsServiceImpl
// Stats for a given locality.
struct LocalityStats {
struct LoadMetric {
uint64_t num_requests_finished_with_metric;
double total_metric_value;
uint64_t num_requests_finished_with_metric = 0;
double total_metric_value = 0;
LoadMetric() = default;
// Works for both EndpointLoadMetricStats and
// UnnamedEndpointLoadMetricStats.
template <typename T>
explicit LoadMetric(const T& stats)
: num_requests_finished_with_metric(
stats.num_requests_finished_with_metric()),
total_metric_value(stats.total_metric_value()) {}
LoadMetric& operator+=(const LoadMetric& other) {
num_requests_finished_with_metric +=
other.num_requests_finished_with_metric;
@ -640,10 +651,13 @@ class LrsServiceImpl
total_error_requests(
upstream_locality_stats.total_error_requests()),
total_issued_requests(
upstream_locality_stats.total_issued_requests()) {
upstream_locality_stats.total_issued_requests()),
cpu_utilization(upstream_locality_stats.cpu_utilization()),
mem_utilization(upstream_locality_stats.mem_utilization()),
application_utilization(
upstream_locality_stats.application_utilization()) {
for (const auto& s : upstream_locality_stats.load_metric_stats()) {
load_metrics[s.metric_name()] += LoadMetric{
s.num_requests_finished_with_metric(), s.total_metric_value()};
load_metrics[s.metric_name()] += LoadMetric(s);
}
}
@ -652,6 +666,9 @@ class LrsServiceImpl
total_requests_in_progress += other.total_requests_in_progress;
total_error_requests += other.total_error_requests;
total_issued_requests += other.total_issued_requests;
cpu_utilization += other.cpu_utilization;
mem_utilization += other.mem_utilization;
application_utilization += other.application_utilization;
for (const auto& p : other.load_metrics) {
load_metrics[p.first] += p.second;
}
@ -662,6 +679,9 @@ class LrsServiceImpl
uint64_t total_requests_in_progress = 0;
uint64_t total_error_requests = 0;
uint64_t total_issued_requests = 0;
LoadMetric cpu_utilization;
LoadMetric mem_utilization;
LoadMetric application_utilization;
std::map<std::string, LoadMetric> load_metrics;
};

Loading…
Cancel
Save