Merge branch 'xds_client_lrs_refactor' into orca_lrs_propagation_changes

pull/37467/head
Mark D. Roth 2 months ago
commit 1af67d4da5
  1. 7
      BUILD
  2. 2
      CMakeLists.txt
  3. 2
      Makefile
  4. 6
      Package.swift
  5. 6
      build_autogenerated.yaml
  6. 2
      config.m4
  7. 2
      config.w32
  8. 8
      gRPC-C++.podspec
  9. 10
      gRPC-Core.podspec
  10. 6
      grpc.gemspec
  11. 6
      package.xml
  12. 17
      src/core/BUILD
  13. 52
      src/core/load_balancing/xds/xds_cluster_impl.cc
  14. 2
      src/core/load_balancing/xds/xds_wrr_locality.cc
  15. 41
      src/core/xds/grpc/xds_client_grpc.cc
  16. 10
      src/core/xds/grpc/xds_client_grpc.h
  17. 2
      src/core/xds/grpc/xds_cluster.cc
  18. 16
      src/core/xds/grpc/xds_cluster.h
  19. 2
      src/core/xds/grpc/xds_cluster_parser.cc
  20. 2
      src/core/xds/grpc/xds_endpoint.h
  21. 117
      src/core/xds/grpc/xds_transport_grpc.cc
  22. 40
      src/core/xds/grpc/xds_transport_grpc.h
  23. 1248
      src/core/xds/xds_client/lrs_client.cc
  24. 374
      src/core/xds/xds_client/lrs_client.h
  25. 262
      src/core/xds/xds_client/xds_api.cc
  26. 37
      src/core/xds/xds_client/xds_api.h
  27. 41
      src/core/xds/xds_client/xds_backend_metric_propagation.h
  28. 601
      src/core/xds/xds_client/xds_client.cc
  29. 94
      src/core/xds/xds_client/xds_client.h
  30. 206
      src/core/xds/xds_client/xds_client_stats.cc
  31. 285
      src/core/xds/xds_client/xds_client_stats.h
  32. 103
      src/core/xds/xds_client/xds_locality.h
  33. 40
      src/core/xds/xds_client/xds_transport.h
  34. 2
      src/python/grpcio/grpc_core_dependencies.py
  35. 11
      test/core/xds/xds_client_fuzzer.cc
  36. 70
      test/core/xds/xds_client_test.cc
  37. 4
      test/core/xds/xds_cluster_resource_type_test.cc
  38. 2
      test/core/xds/xds_endpoint_resource_type_test.cc
  39. 71
      test/core/xds/xds_transport_fake.cc
  40. 61
      test/core/xds/xds_transport_fake.h
  41. 6
      tools/doxygen/Doxyfile.c++.internal
  42. 6
      tools/doxygen/Doxyfile.core.internal

@ -4390,17 +4390,18 @@ grpc_cc_library(
grpc_cc_library(
name = "xds_client",
srcs = [
"//src/core:xds/xds_client/lrs_client.cc",
"//src/core:xds/xds_client/xds_api.cc",
"//src/core:xds/xds_client/xds_bootstrap.cc",
"//src/core:xds/xds_client/xds_client.cc",
"//src/core:xds/xds_client/xds_client_stats.cc",
],
hdrs = [
"//src/core:xds/xds_client/lrs_client.h",
"//src/core:xds/xds_client/xds_api.h",
"//src/core:xds/xds_client/xds_bootstrap.h",
"//src/core:xds/xds_client/xds_channel_args.h",
"//src/core:xds/xds_client/xds_client.h",
"//src/core:xds/xds_client/xds_client_stats.h",
"//src/core:xds/xds_client/xds_locality.h",
"//src/core:xds/xds_client/xds_metrics.h",
"//src/core:xds/xds_client/xds_resource_type.h",
"//src/core:xds/xds_client/xds_resource_type_impl.h",
@ -4459,9 +4460,11 @@ grpc_cc_library(
"//src/core:json",
"//src/core:per_cpu",
"//src/core:ref_counted",
"//src/core:ref_counted_string",
"//src/core:time",
"//src/core:upb_utils",
"//src/core:useful",
"//src/core:xds_backend_metric_propagation",
],
)

2
CMakeLists.txt generated

@ -2661,10 +2661,10 @@ add_library(grpc
src/core/xds/grpc/xds_routing.cc
src/core/xds/grpc/xds_server_grpc.cc
src/core/xds/grpc/xds_transport_grpc.cc
src/core/xds/xds_client/lrs_client.cc
src/core/xds/xds_client/xds_api.cc
src/core/xds/xds_client/xds_bootstrap.cc
src/core/xds/xds_client/xds_client.cc
src/core/xds/xds_client/xds_client_stats.cc
)
target_compile_features(grpc PUBLIC cxx_std_14)

2
Makefile generated

@ -1515,10 +1515,10 @@ LIBGRPC_SRC = \
src/core/xds/grpc/xds_routing.cc \
src/core/xds/grpc/xds_server_grpc.cc \
src/core/xds/grpc/xds_transport_grpc.cc \
src/core/xds/xds_client/lrs_client.cc \
src/core/xds/xds_client/xds_api.cc \
src/core/xds/xds_client/xds_bootstrap.cc \
src/core/xds/xds_client/xds_client.cc \
src/core/xds/xds_client/xds_client_stats.cc \
third_party/abseil-cpp/absl/base/internal/cycleclock.cc \
third_party/abseil-cpp/absl/base/internal/low_level_alloc.cc \
third_party/abseil-cpp/absl/base/internal/raw_logging.cc \

6
Package.swift generated

@ -2031,15 +2031,17 @@ let package = Package(
"src/core/xds/grpc/xds_server_grpc.h",
"src/core/xds/grpc/xds_transport_grpc.cc",
"src/core/xds/grpc/xds_transport_grpc.h",
"src/core/xds/xds_client/lrs_client.cc",
"src/core/xds/xds_client/lrs_client.h",
"src/core/xds/xds_client/xds_api.cc",
"src/core/xds/xds_client/xds_api.h",
"src/core/xds/xds_client/xds_backend_metric_propagation.h",
"src/core/xds/xds_client/xds_bootstrap.cc",
"src/core/xds/xds_client/xds_bootstrap.h",
"src/core/xds/xds_client/xds_channel_args.h",
"src/core/xds/xds_client/xds_client.cc",
"src/core/xds/xds_client/xds_client.h",
"src/core/xds/xds_client/xds_client_stats.cc",
"src/core/xds/xds_client/xds_client_stats.h",
"src/core/xds/xds_client/xds_locality.h",
"src/core/xds/xds_client/xds_metrics.h",
"src/core/xds/xds_client/xds_resource_type.h",
"src/core/xds/xds_client/xds_resource_type_impl.h",

@ -1256,11 +1256,13 @@ libs:
- src/core/xds/grpc/xds_routing.h
- src/core/xds/grpc/xds_server_grpc.h
- src/core/xds/grpc/xds_transport_grpc.h
- src/core/xds/xds_client/lrs_client.h
- src/core/xds/xds_client/xds_api.h
- src/core/xds/xds_client/xds_backend_metric_propagation.h
- src/core/xds/xds_client/xds_bootstrap.h
- src/core/xds/xds_client/xds_channel_args.h
- src/core/xds/xds_client/xds_client.h
- src/core/xds/xds_client/xds_client_stats.h
- src/core/xds/xds_client/xds_locality.h
- src/core/xds/xds_client/xds_metrics.h
- src/core/xds/xds_client/xds_resource_type.h
- src/core/xds/xds_client/xds_resource_type_impl.h
@ -2076,10 +2078,10 @@ libs:
- src/core/xds/grpc/xds_routing.cc
- src/core/xds/grpc/xds_server_grpc.cc
- src/core/xds/grpc/xds_transport_grpc.cc
- src/core/xds/xds_client/lrs_client.cc
- src/core/xds/xds_client/xds_api.cc
- src/core/xds/xds_client/xds_bootstrap.cc
- src/core/xds/xds_client/xds_client.cc
- src/core/xds/xds_client/xds_client_stats.cc
deps:
- upb_json_lib
- upb_textformat_lib

2
config.m4 generated

@ -890,10 +890,10 @@ if test "$PHP_GRPC" != "no"; then
src/core/xds/grpc/xds_routing.cc \
src/core/xds/grpc/xds_server_grpc.cc \
src/core/xds/grpc/xds_transport_grpc.cc \
src/core/xds/xds_client/lrs_client.cc \
src/core/xds/xds_client/xds_api.cc \
src/core/xds/xds_client/xds_bootstrap.cc \
src/core/xds/xds_client/xds_client.cc \
src/core/xds/xds_client/xds_client_stats.cc \
src/php/ext/grpc/byte_buffer.c \
src/php/ext/grpc/call.c \
src/php/ext/grpc/call_credentials.c \

2
config.w32 generated

@ -855,10 +855,10 @@ if (PHP_GRPC != "no") {
"src\\core\\xds\\grpc\\xds_routing.cc " +
"src\\core\\xds\\grpc\\xds_server_grpc.cc " +
"src\\core\\xds\\grpc\\xds_transport_grpc.cc " +
"src\\core\\xds\\xds_client\\lrs_client.cc " +
"src\\core\\xds\\xds_client\\xds_api.cc " +
"src\\core\\xds\\xds_client\\xds_bootstrap.cc " +
"src\\core\\xds\\xds_client\\xds_client.cc " +
"src\\core\\xds\\xds_client\\xds_client_stats.cc " +
"src\\php\\ext\\grpc\\byte_buffer.c " +
"src\\php\\ext\\grpc\\call.c " +
"src\\php\\ext\\grpc\\call_credentials.c " +

8
gRPC-C++.podspec generated

@ -1370,11 +1370,13 @@ Pod::Spec.new do |s|
'src/core/xds/grpc/xds_routing.h',
'src/core/xds/grpc/xds_server_grpc.h',
'src/core/xds/grpc/xds_transport_grpc.h',
'src/core/xds/xds_client/lrs_client.h',
'src/core/xds/xds_client/xds_api.h',
'src/core/xds/xds_client/xds_backend_metric_propagation.h',
'src/core/xds/xds_client/xds_bootstrap.h',
'src/core/xds/xds_client/xds_channel_args.h',
'src/core/xds/xds_client/xds_client.h',
'src/core/xds/xds_client/xds_client_stats.h',
'src/core/xds/xds_client/xds_locality.h',
'src/core/xds/xds_client/xds_metrics.h',
'src/core/xds/xds_client/xds_resource_type.h',
'src/core/xds/xds_client/xds_resource_type_impl.h',
@ -2667,11 +2669,13 @@ Pod::Spec.new do |s|
'src/core/xds/grpc/xds_routing.h',
'src/core/xds/grpc/xds_server_grpc.h',
'src/core/xds/grpc/xds_transport_grpc.h',
'src/core/xds/xds_client/lrs_client.h',
'src/core/xds/xds_client/xds_api.h',
'src/core/xds/xds_client/xds_backend_metric_propagation.h',
'src/core/xds/xds_client/xds_bootstrap.h',
'src/core/xds/xds_client/xds_channel_args.h',
'src/core/xds/xds_client/xds_client.h',
'src/core/xds/xds_client/xds_client_stats.h',
'src/core/xds/xds_client/xds_locality.h',
'src/core/xds/xds_client/xds_metrics.h',
'src/core/xds/xds_client/xds_resource_type.h',
'src/core/xds/xds_client/xds_resource_type_impl.h',

10
gRPC-Core.podspec generated

@ -2147,15 +2147,17 @@ Pod::Spec.new do |s|
'src/core/xds/grpc/xds_server_grpc.h',
'src/core/xds/grpc/xds_transport_grpc.cc',
'src/core/xds/grpc/xds_transport_grpc.h',
'src/core/xds/xds_client/lrs_client.cc',
'src/core/xds/xds_client/lrs_client.h',
'src/core/xds/xds_client/xds_api.cc',
'src/core/xds/xds_client/xds_api.h',
'src/core/xds/xds_client/xds_backend_metric_propagation.h',
'src/core/xds/xds_client/xds_bootstrap.cc',
'src/core/xds/xds_client/xds_bootstrap.h',
'src/core/xds/xds_client/xds_channel_args.h',
'src/core/xds/xds_client/xds_client.cc',
'src/core/xds/xds_client/xds_client.h',
'src/core/xds/xds_client/xds_client_stats.cc',
'src/core/xds/xds_client/xds_client_stats.h',
'src/core/xds/xds_client/xds_locality.h',
'src/core/xds/xds_client/xds_metrics.h',
'src/core/xds/xds_client/xds_resource_type.h',
'src/core/xds/xds_client/xds_resource_type_impl.h',
@ -3450,11 +3452,13 @@ Pod::Spec.new do |s|
'src/core/xds/grpc/xds_routing.h',
'src/core/xds/grpc/xds_server_grpc.h',
'src/core/xds/grpc/xds_transport_grpc.h',
'src/core/xds/xds_client/lrs_client.h',
'src/core/xds/xds_client/xds_api.h',
'src/core/xds/xds_client/xds_backend_metric_propagation.h',
'src/core/xds/xds_client/xds_bootstrap.h',
'src/core/xds/xds_client/xds_channel_args.h',
'src/core/xds/xds_client/xds_client.h',
'src/core/xds/xds_client/xds_client_stats.h',
'src/core/xds/xds_client/xds_locality.h',
'src/core/xds/xds_client/xds_metrics.h',
'src/core/xds/xds_client/xds_resource_type.h',
'src/core/xds/xds_client/xds_resource_type_impl.h',

6
grpc.gemspec generated

@ -2033,15 +2033,17 @@ Gem::Specification.new do |s|
s.files += %w( src/core/xds/grpc/xds_server_grpc.h )
s.files += %w( src/core/xds/grpc/xds_transport_grpc.cc )
s.files += %w( src/core/xds/grpc/xds_transport_grpc.h )
s.files += %w( src/core/xds/xds_client/lrs_client.cc )
s.files += %w( src/core/xds/xds_client/lrs_client.h )
s.files += %w( src/core/xds/xds_client/xds_api.cc )
s.files += %w( src/core/xds/xds_client/xds_api.h )
s.files += %w( src/core/xds/xds_client/xds_backend_metric_propagation.h )
s.files += %w( src/core/xds/xds_client/xds_bootstrap.cc )
s.files += %w( src/core/xds/xds_client/xds_bootstrap.h )
s.files += %w( src/core/xds/xds_client/xds_channel_args.h )
s.files += %w( src/core/xds/xds_client/xds_client.cc )
s.files += %w( src/core/xds/xds_client/xds_client.h )
s.files += %w( src/core/xds/xds_client/xds_client_stats.cc )
s.files += %w( src/core/xds/xds_client/xds_client_stats.h )
s.files += %w( src/core/xds/xds_client/xds_locality.h )
s.files += %w( src/core/xds/xds_client/xds_metrics.h )
s.files += %w( src/core/xds/xds_client/xds_resource_type.h )
s.files += %w( src/core/xds/xds_client/xds_resource_type_impl.h )

6
package.xml generated

@ -2015,15 +2015,17 @@
<file baseinstalldir="/" name="src/core/xds/grpc/xds_server_grpc.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/grpc/xds_transport_grpc.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/grpc/xds_transport_grpc.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/lrs_client.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/lrs_client.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_api.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_api.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_backend_metric_propagation.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_bootstrap.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_bootstrap.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_channel_args.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_client.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_client.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_client_stats.cc" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_client_stats.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_locality.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_metrics.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_resource_type.h" role="src" />
<file baseinstalldir="/" name="src/core/xds/xds_client/xds_resource_type_impl.h" role="src" />

@ -5630,6 +5630,22 @@ grpc_cc_library(
],
)
grpc_cc_library(
name = "xds_backend_metric_propagation",
hdrs = [
"xds/xds_client/xds_backend_metric_propagation.h",
],
external_deps = [
"absl/container:flat_hash_set",
],
language = "c++",
tags = ["nofixdeps"],
visibility = ["@grpc:xds_client_core"],
deps = [
"ref_counted",
],
)
# TODO(roth): Split this up into individual targets.
grpc_cc_library(
name = "grpc_xds_client",
@ -5676,6 +5692,7 @@ grpc_cc_library(
external_deps = [
"absl/base:core_headers",
"absl/cleanup",
"absl/container:flat_hash_map",
"absl/functional:bind_front",
"absl/log:check",
"absl/log:log",

@ -73,7 +73,7 @@
#include "src/core/xds/grpc/xds_endpoint.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_client.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
namespace grpc_core {
@ -189,13 +189,13 @@ class XdsClusterImplLb final : public LoadBalancingPolicy {
private:
class StatsSubchannelWrapper final : public DelegatingSubchannel {
public:
// If load reporting is enabled and we have an XdsClusterLocalityStats
// If load reporting is enabled and we have a ClusterLocalityStats
// object, that object already contains the locality label. We
// need to store the locality label directly only in the case where
// load reporting is disabled.
using LocalityData = absl::variant<
RefCountedStringValue /*locality*/,
RefCountedPtr<XdsClusterLocalityStats> /*locality_stats*/>;
RefCountedPtr<LrsClient::ClusterLocalityStats> /*locality_stats*/>;
StatsSubchannelWrapper(
RefCountedPtr<SubchannelInterface> wrapped_subchannel,
@ -213,20 +213,20 @@ class XdsClusterImplLb final : public LoadBalancingPolicy {
return Match(
locality_data_,
[](RefCountedStringValue locality) { return locality; },
[](const RefCountedPtr<XdsClusterLocalityStats>& locality_stats) {
[](const RefCountedPtr<LrsClient::ClusterLocalityStats>&
locality_stats) {
return locality_stats->locality_name()->human_readable_string();
});
}
XdsClusterLocalityStats* locality_stats() const {
LrsClient::ClusterLocalityStats* locality_stats() const {
return Match(
locality_data_,
[](const RefCountedStringValue&) {
return static_cast<XdsClusterLocalityStats*>(nullptr);
return static_cast<LrsClient::ClusterLocalityStats*>(nullptr);
},
[](const RefCountedPtr<XdsClusterLocalityStats>& locality_stats) {
return locality_stats.get();
});
[](const RefCountedPtr<LrsClient::ClusterLocalityStats>&
locality_stats) { return locality_stats.get(); });
}
RefCountedPtr<const BackendMetricPropagation> backend_metric_propagation()
@ -261,7 +261,7 @@ class XdsClusterImplLb final : public LoadBalancingPolicy {
RefCountedStringValue service_telemetry_label_;
RefCountedStringValue namespace_telemetry_label_;
RefCountedPtr<XdsEndpointResource::DropConfig> drop_config_;
RefCountedPtr<XdsClusterDropStats> drop_stats_;
RefCountedPtr<LrsClient::ClusterDropStats> drop_stats_;
RefCountedPtr<SubchannelPicker> picker_;
};
@ -315,7 +315,7 @@ class XdsClusterImplLb final : public LoadBalancingPolicy {
RefCountedPtr<GrpcXdsClient> xds_client_;
// The stats for client-side load reporting.
RefCountedPtr<XdsClusterDropStats> drop_stats_;
RefCountedPtr<LrsClient::ClusterDropStats> drop_stats_;
OrphanablePtr<LoadBalancingPolicy> child_policy_;
@ -335,7 +335,7 @@ class XdsClusterImplLb::Picker::SubchannelCallTracker final
SubchannelCallTracker(
std::unique_ptr<LoadBalancingPolicy::SubchannelCallTrackerInterface>
original_subchannel_call_tracker,
RefCountedPtr<XdsClusterLocalityStats> locality_stats,
RefCountedPtr<LrsClient::ClusterLocalityStats> locality_stats,
RefCountedPtr<const BackendMetricPropagation> backend_metric_propagation,
RefCountedPtr<CircuitBreakerCallCounterMap::CallCounter> call_counter)
: original_subchannel_call_tracker_(
@ -390,7 +390,7 @@ class XdsClusterImplLb::Picker::SubchannelCallTracker final
private:
std::unique_ptr<LoadBalancingPolicy::SubchannelCallTrackerInterface>
original_subchannel_call_tracker_;
RefCountedPtr<XdsClusterLocalityStats> locality_stats_;
RefCountedPtr<LrsClient::ClusterLocalityStats> locality_stats_;
RefCountedPtr<const BackendMetricPropagation> backend_metric_propagation_;
RefCountedPtr<CircuitBreakerCallCounterMap::CallCounter> call_counter_;
#ifndef NDEBUG
@ -465,7 +465,7 @@ LoadBalancingPolicy::PickResult XdsClusterImplLb::Picker::Pick(
subchannel_wrapper->locality());
}
// Handle load reporting.
RefCountedPtr<XdsClusterLocalityStats> locality_stats;
RefCountedPtr<LrsClient::ClusterLocalityStats> locality_stats;
if (subchannel_wrapper->locality_stats() != nullptr) {
locality_stats = subchannel_wrapper->locality_stats()->Ref(
DEBUG_LOCATION, "SubchannelCallTracker");
@ -630,14 +630,15 @@ absl::Status XdsClusterImplLb::UpdateLocked(UpdateArgs args) {
// Note: We need a drop stats object whenever load reporting is enabled,
// even if we have no EDS drop config, because we also use it when
// reporting circuit breaker drops.
if (!new_cluster_config.cluster->lrs_load_reporting_server.has_value()) {
if (new_cluster_config.cluster->lrs_load_reporting_server == nullptr) {
drop_stats_.reset();
} else if (cluster_resource_ == nullptr ||
old_eds_service_name != new_eds_service_name ||
cluster_resource_->lrs_load_reporting_server !=
new_cluster_config.cluster->lrs_load_reporting_server) {
drop_stats_ = xds_client_->AddClusterDropStats(
*new_cluster_config.cluster->lrs_load_reporting_server,
!LrsServersEqual(
cluster_resource_->lrs_load_reporting_server,
new_cluster_config.cluster->lrs_load_reporting_server)) {
drop_stats_ = xds_client_->lrs_client().AddClusterDropStats(
new_cluster_config.cluster->lrs_load_reporting_server,
new_config->cluster_name(), new_eds_service_name);
if (drop_stats_ == nullptr) {
LOG(ERROR)
@ -831,12 +832,13 @@ RefCountedPtr<SubchannelInterface> XdsClusterImplLb::Helper::CreateSubchannel(
// (if load reporting is enabled) the locality stats object, which
// will be used by the picker.
auto locality_name = per_address_args.GetObjectRef<XdsLocalityName>();
RefCountedPtr<XdsClusterLocalityStats> locality_stats;
if (parent()->cluster_resource_->lrs_load_reporting_server.has_value()) {
locality_stats = parent()->xds_client_->AddClusterLocalityStats(
parent()->cluster_resource_->lrs_load_reporting_server.value(),
parent()->config_->cluster_name(),
GetEdsResourceName(*parent()->cluster_resource_), locality_name);
RefCountedPtr<LrsClient::ClusterLocalityStats> locality_stats;
if (parent()->cluster_resource_->lrs_load_reporting_server != nullptr) {
locality_stats =
parent()->xds_client_->lrs_client().AddClusterLocalityStats(
parent()->cluster_resource_->lrs_load_reporting_server,
parent()->config_->cluster_name(),
GetEdsResourceName(*parent()->cluster_resource_), locality_name);
if (locality_stats == nullptr) {
LOG(ERROR)
<< "[xds_cluster_impl_lb " << parent()

@ -50,7 +50,7 @@
#include "src/core/util/json/json_args.h"
#include "src/core/util/json/json_object_loader.h"
#include "src/core/util/json/json_writer.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
namespace grpc_core {

@ -242,7 +242,7 @@ absl::StatusOr<RefCountedPtr<GrpcXdsClient>> GrpcXdsClient::GetOrCreate(
auto channel_args = ChannelArgs::FromC(xds_channel_args);
return MakeRefCounted<GrpcXdsClient>(
key, std::move(*bootstrap), channel_args,
MakeOrphanable<GrpcXdsTransportFactory>(channel_args));
MakeRefCounted<GrpcXdsTransportFactory>(channel_args));
}
// Otherwise, use the global instance.
MutexLock lock(g_mu);
@ -265,7 +265,7 @@ absl::StatusOr<RefCountedPtr<GrpcXdsClient>> GrpcXdsClient::GetOrCreate(
auto channel_args = ChannelArgs::FromC(g_channel_args);
auto xds_client = MakeRefCounted<GrpcXdsClient>(
key, std::move(*bootstrap), channel_args,
MakeOrphanable<GrpcXdsTransportFactory>(channel_args));
MakeRefCounted<GrpcXdsTransportFactory>(channel_args));
g_xds_client_map->emplace(xds_client->key(), xds_client.get());
GRPC_TRACE_LOG(xds_client, INFO) << "[xds_client " << xds_client.get()
<< "] Created xDS client for key " << key;
@ -286,21 +286,28 @@ GlobalStatsPluginRegistry::StatsPluginGroup GetStatsPluginGroupForKey(
return GlobalStatsPluginRegistry::GetStatsPluginsForChannel(scope);
}
std::string UserAgentName() {
return absl::StrCat("gRPC C-core ", GPR_PLATFORM_STRING,
GRPC_XDS_USER_AGENT_NAME_SUFFIX_STRING);
}
std::string UserAgentVersion() {
return absl::StrCat("C-core ", grpc_version_string(),
GRPC_XDS_USER_AGENT_NAME_SUFFIX_STRING,
GRPC_XDS_USER_AGENT_VERSION_SUFFIX_STRING);
}
} // namespace
GrpcXdsClient::GrpcXdsClient(
absl::string_view key, std::unique_ptr<GrpcXdsBootstrap> bootstrap,
absl::string_view key, std::shared_ptr<GrpcXdsBootstrap> bootstrap,
const ChannelArgs& args,
OrphanablePtr<XdsTransportFactory> transport_factory)
RefCountedPtr<XdsTransportFactory> transport_factory)
: XdsClient(
std::move(bootstrap), std::move(transport_factory),
bootstrap, transport_factory,
grpc_event_engine::experimental::GetDefaultEventEngine(),
std::make_unique<MetricsReporter>(*this),
absl::StrCat("gRPC C-core ", GPR_PLATFORM_STRING,
GRPC_XDS_USER_AGENT_NAME_SUFFIX_STRING),
absl::StrCat("C-core ", grpc_version_string(),
GRPC_XDS_USER_AGENT_NAME_SUFFIX_STRING,
GRPC_XDS_USER_AGENT_VERSION_SUFFIX_STRING),
std::make_unique<MetricsReporter>(*this), UserAgentName(),
UserAgentVersion(),
std::max(Duration::Zero(),
args.GetDurationFromIntMillis(
GRPC_ARG_XDS_RESOURCE_DOES_NOT_EXIST_TIMEOUT_MS)
@ -314,11 +321,16 @@ GrpcXdsClient::GrpcXdsClient(
[this](CallbackMetricReporter& reporter) {
ReportCallbackMetrics(reporter);
},
Duration::Seconds(5), kMetricConnected, kMetricResources)) {}
Duration::Seconds(5), kMetricConnected, kMetricResources)),
lrs_client_(MakeRefCounted<LrsClient>(
std::move(bootstrap), UserAgentName(), UserAgentVersion(),
std::move(transport_factory),
grpc_event_engine::experimental::GetDefaultEventEngine())) {}
void GrpcXdsClient::Orphaned() {
registered_metric_callback_.reset();
XdsClient::Orphaned();
lrs_client_.reset();
MutexLock lock(g_mu);
auto it = g_xds_client_map->find(key_);
if (it != g_xds_client_map->end() && it->second == this) {
@ -326,6 +338,11 @@ void GrpcXdsClient::Orphaned() {
}
}
void GrpcXdsClient::ResetBackoff() {
XdsClient::ResetBackoff();
lrs_client_->ResetBackoff();
}
grpc_pollset_set* GrpcXdsClient::interested_parties() const {
return reinterpret_cast<GrpcXdsTransportFactory*>(transport_factory())
->interested_parties();

@ -34,6 +34,7 @@
#include "src/core/util/useful.h"
#include "src/core/xds/grpc/certificate_provider_store.h"
#include "src/core/xds/grpc/xds_bootstrap_grpc.h"
#include "src/core/xds/xds_client/lrs_client.h"
#include "src/core/xds/xds_client/xds_client.h"
#include "src/core/xds/xds_client/xds_transport.h"
@ -61,9 +62,9 @@ class GrpcXdsClient final : public XdsClient {
// that also use certificate_provider_store(), but we should consider
// alternatives for that case as well.
GrpcXdsClient(absl::string_view key,
std::unique_ptr<GrpcXdsBootstrap> bootstrap,
std::shared_ptr<GrpcXdsBootstrap> bootstrap,
const ChannelArgs& args,
OrphanablePtr<XdsTransportFactory> transport_factory);
RefCountedPtr<XdsTransportFactory> transport_factory);
// Helpers for encoding the XdsClient object in channel args.
static absl::string_view ChannelArgName() {
@ -73,6 +74,8 @@ class GrpcXdsClient final : public XdsClient {
return QsortCompare(a, b);
}
void ResetBackoff() override;
grpc_pollset_set* interested_parties() const;
CertificateProviderStore& certificate_provider_store() const {
@ -81,6 +84,8 @@ class GrpcXdsClient final : public XdsClient {
absl::string_view key() const { return key_; }
LrsClient& lrs_client() { return *lrs_client_; }
// Builds ClientStatusResponse containing all resources from all XdsClients
static grpc_slice DumpAllClientConfigs();
@ -94,6 +99,7 @@ class GrpcXdsClient final : public XdsClient {
OrphanablePtr<CertificateProviderStore> certificate_provider_store_;
GlobalStatsPluginRegistry::StatsPluginGroup stats_plugin_group_;
std::unique_ptr<RegisteredMetricCallback> registered_metric_callback_;
RefCountedPtr<LrsClient> lrs_client_;
};
namespace internal {

@ -49,7 +49,7 @@ std::string XdsClusterResource::ToString() const {
});
contents.push_back(absl::StrCat("lb_policy_config=",
JsonDump(Json::FromArray(lb_policy_config))));
if (lrs_load_reporting_server.has_value()) {
if (lrs_load_reporting_server != nullptr) {
contents.push_back(absl::StrCat("lrs_load_reporting_server_name=",
lrs_load_reporting_server->server_uri()));
}

@ -36,6 +36,15 @@
namespace grpc_core {
inline bool LrsServersEqual(
const std::shared_ptr<const GrpcXdsServer>& lrs_server1,
const std::shared_ptr<const GrpcXdsServer>& lrs_server2) {
if (lrs_server1 == nullptr) return lrs_server2 == nullptr;
if (lrs_server2 == nullptr) return false;
// Neither one is null, so compare them.
return *lrs_server1 == *lrs_server2;
}
struct XdsClusterResource : public XdsResourceType::ResourceData {
struct Eds {
// If empty, defaults to the cluster name.
@ -72,8 +81,8 @@ struct XdsClusterResource : public XdsResourceType::ResourceData {
// Note: Remaining fields are not used for aggregate clusters.
// The LRS server to use for load reporting.
// If not set, load reporting will be disabled.
absl::optional<GrpcXdsServer> lrs_load_reporting_server;
// If null, load reporting will be disabled.
std::shared_ptr<const GrpcXdsServer> lrs_load_reporting_server;
// The set of metrics to propagate from ORCA to LRS.
RefCountedPtr<const BackendMetricPropagation> lrs_backend_metric_propagation;
@ -95,7 +104,8 @@ struct XdsClusterResource : public XdsResourceType::ResourceData {
bool operator==(const XdsClusterResource& other) const {
return type == other.type && lb_policy_config == other.lb_policy_config &&
lrs_load_reporting_server == other.lrs_load_reporting_server &&
LrsServersEqual(lrs_load_reporting_server,
other.lrs_load_reporting_server) &&
lrs_backend_metric_propagation ==
other.lrs_backend_metric_propagation &&
common_tls_context == other.common_tls_context &&

@ -455,7 +455,7 @@ absl::StatusOr<std::shared_ptr<const XdsClusterResource>> CdsResourceParse(
ValidationErrors::ScopedField field(&errors, ".lrs_server");
errors.AddError("ConfigSource is not self");
}
cds_update->lrs_load_reporting_server.emplace(
cds_update->lrs_load_reporting_server = std::make_shared<GrpcXdsServer>(
static_cast<const GrpcXdsServer&>(context.server));
}
// Record LRS metric propagation.

@ -29,7 +29,7 @@
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/sync.h"
#include "src/core/resolver/endpoint_addresses.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
#include "src/core/xds/xds_client/xds_resource_type.h"
#include "src/core/xds/xds_client/xds_resource_type_impl.h"

@ -41,6 +41,7 @@
#include "src/core/lib/channel/channel_fwd.h"
#include "src/core/lib/channel/channel_stack.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/event_engine/default_event_engine.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
@ -68,7 +69,7 @@ namespace grpc_core {
//
GrpcXdsTransportFactory::GrpcXdsTransport::GrpcStreamingCall::GrpcStreamingCall(
RefCountedPtr<GrpcXdsTransportFactory> factory, Channel* channel,
WeakRefCountedPtr<GrpcXdsTransportFactory> factory, Channel* channel,
const char* method,
std::unique_ptr<StreamingCall::EventHandler> event_handler)
: factory_(std::move(factory)), event_handler_(std::move(event_handler)) {
@ -229,25 +230,24 @@ void GrpcXdsTransportFactory::GrpcXdsTransport::GrpcStreamingCall::
class GrpcXdsTransportFactory::GrpcXdsTransport::StateWatcher final
: public AsyncConnectivityStateWatcherInterface {
public:
explicit StateWatcher(
std::function<void(absl::Status)> on_connectivity_failure)
: on_connectivity_failure_(std::move(on_connectivity_failure)) {}
explicit StateWatcher(RefCountedPtr<ConnectivityFailureWatcher> watcher)
: watcher_(std::move(watcher)) {}
private:
void OnConnectivityStateChange(grpc_connectivity_state new_state,
const absl::Status& status) override {
if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
on_connectivity_failure_(absl::Status(
watcher_->OnConnectivityFailure(absl::Status(
status.code(),
absl::StrCat("channel in TRANSIENT_FAILURE: ", status.message())));
}
}
std::function<void(absl::Status)> on_connectivity_failure_;
RefCountedPtr<ConnectivityFailureWatcher> watcher_;
};
//
// GrpcXdsClient::GrpcXdsTransport
// GrpcXdsTransportFactory::GrpcXdsTransport
//
namespace {
@ -264,35 +264,74 @@ RefCountedPtr<Channel> CreateXdsChannel(const ChannelArgs& args,
} // namespace
GrpcXdsTransportFactory::GrpcXdsTransport::GrpcXdsTransport(
GrpcXdsTransportFactory* factory, const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status)
: factory_(factory) {
channel_ = CreateXdsChannel(factory->args_,
WeakRefCountedPtr<GrpcXdsTransportFactory> factory,
const XdsBootstrap::XdsServer& server, absl::Status* status)
: XdsTransport(GRPC_TRACE_FLAG_ENABLED(xds_client_refcount)
? "GrpcXdsTransport"
: nullptr),
factory_(std::move(factory)),
key_(server.Key()) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[GrpcXdsTransport " << this << "] created";
channel_ = CreateXdsChannel(factory_->args_,
static_cast<const GrpcXdsServer&>(server));
CHECK(channel_ != nullptr);
if (channel_->IsLame()) {
*status = absl::UnavailableError("xds client has a lame channel");
} else {
watcher_ = new StateWatcher(std::move(on_connectivity_failure));
channel_->AddConnectivityWatcher(
GRPC_CHANNEL_IDLE,
OrphanablePtr<AsyncConnectivityStateWatcherInterface>(watcher_));
}
}
void GrpcXdsTransportFactory::GrpcXdsTransport::Orphan() {
if (!channel_->IsLame()) {
channel_->RemoveConnectivityWatcher(watcher_);
GrpcXdsTransportFactory::GrpcXdsTransport::~GrpcXdsTransport() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[GrpcXdsTransport " << this << "] destroying";
}
void GrpcXdsTransportFactory::GrpcXdsTransport::Orphaned() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[GrpcXdsTransport " << this << "] orphaned";
{
MutexLock lock(&factory_->mu_);
auto it = factory_->transports_.find(key_);
if (it != factory_->transports_.end() && it->second == this) {
factory_->transports_.erase(it);
}
}
// Do an async hop before unreffing. This avoids a deadlock upon
// shutdown in the case where the xDS channel is itself an xDS channel
// (e.g., when using one control plane to find another control plane).
grpc_event_engine::experimental::GetDefaultEventEngine()->Run([this]() {
ApplicationCallbackExecCtx application_exec_ctx;
ExecCtx exec_ctx;
Unref();
});
grpc_event_engine::experimental::GetDefaultEventEngine()->Run(
[self = WeakRefAsSubclass<GrpcXdsTransport>()]() mutable {
ApplicationCallbackExecCtx application_exec_ctx;
ExecCtx exec_ctx;
self.reset();
});
}
void GrpcXdsTransportFactory::GrpcXdsTransport::StartConnectivityFailureWatch(
RefCountedPtr<ConnectivityFailureWatcher> watcher) {
if (channel_->IsLame()) return;
auto* state_watcher = new StateWatcher(watcher);
{
MutexLock lock(&mu_);
watchers_.emplace(watcher, state_watcher);
}
channel_->AddConnectivityWatcher(
GRPC_CHANNEL_IDLE,
OrphanablePtr<AsyncConnectivityStateWatcherInterface>(state_watcher));
}
void GrpcXdsTransportFactory::GrpcXdsTransport::StopConnectivityFailureWatch(
const RefCountedPtr<ConnectivityFailureWatcher>& watcher) {
if (channel_->IsLame()) return;
StateWatcher* state_watcher = nullptr;
{
MutexLock lock(&mu_);
auto it = watchers_.find(watcher);
if (it == watchers_.end()) return;
state_watcher = it->second;
watchers_.erase(it);
}
channel_->RemoveConnectivityWatcher(state_watcher);
}
OrphanablePtr<XdsTransportFactory::XdsTransport::StreamingCall>
@ -300,9 +339,8 @@ GrpcXdsTransportFactory::GrpcXdsTransport::CreateStreamingCall(
const char* method,
std::unique_ptr<StreamingCall::EventHandler> event_handler) {
return MakeOrphanable<GrpcStreamingCall>(
factory_->RefAsSubclass<GrpcXdsTransportFactory>(DEBUG_LOCATION,
"StreamingCall"),
channel_.get(), method, std::move(event_handler));
factory_.WeakRef(DEBUG_LOCATION, "StreamingCall"), channel_.get(), method,
std::move(event_handler));
}
void GrpcXdsTransportFactory::GrpcXdsTransport::ResetBackoff() {
@ -336,13 +374,22 @@ GrpcXdsTransportFactory::~GrpcXdsTransportFactory() {
ShutdownInternally();
}
OrphanablePtr<XdsTransportFactory::XdsTransport>
GrpcXdsTransportFactory::Create(
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status) {
return MakeOrphanable<GrpcXdsTransport>(
this, server, std::move(on_connectivity_failure), status);
RefCountedPtr<XdsTransportFactory::XdsTransport>
GrpcXdsTransportFactory::GetTransport(const XdsBootstrap::XdsServer& server,
absl::Status* status) {
std::string key = server.Key();
RefCountedPtr<GrpcXdsTransport> transport;
MutexLock lock(&mu_);
auto it = transports_.find(key);
if (it != transports_.end()) {
transport = it->second->RefIfNonZero().TakeAsSubclass<GrpcXdsTransport>();
}
if (transport == nullptr) {
transport = MakeRefCounted<GrpcXdsTransport>(
WeakRefAsSubclass<GrpcXdsTransportFactory>(), server, status);
transports_.emplace(std::move(key), transport.get());
}
return transport;
}
} // namespace grpc_core

@ -21,6 +21,7 @@
#include <memory>
#include <string>
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include <grpc/grpc.h>
@ -31,6 +32,7 @@
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/sync.h"
#include "src/core/lib/iomgr/closure.h"
#include "src/core/lib/iomgr/error.h"
#include "src/core/lib/iomgr/iomgr_fwd.h"
@ -47,18 +49,20 @@ class GrpcXdsTransportFactory final : public XdsTransportFactory {
explicit GrpcXdsTransportFactory(const ChannelArgs& args);
~GrpcXdsTransportFactory() override;
void Orphan() override { Unref(); }
void Orphaned() override {}
OrphanablePtr<XdsTransport> Create(
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status) override;
RefCountedPtr<XdsTransport> GetTransport(
const XdsBootstrap::XdsServer& server, absl::Status* status) override;
grpc_pollset_set* interested_parties() const { return interested_parties_; }
private:
ChannelArgs args_;
grpc_pollset_set* interested_parties_;
Mutex mu_;
absl::flat_hash_map<std::string /*XdsServer key*/, GrpcXdsTransport*>
transports_ ABSL_GUARDED_BY(&mu_);
};
class GrpcXdsTransportFactory::GrpcXdsTransport final
@ -66,12 +70,16 @@ class GrpcXdsTransportFactory::GrpcXdsTransport final
public:
class GrpcStreamingCall;
GrpcXdsTransport(GrpcXdsTransportFactory* factory,
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status);
GrpcXdsTransport(WeakRefCountedPtr<GrpcXdsTransportFactory> factory,
const XdsBootstrap::XdsServer& server, absl::Status* status);
~GrpcXdsTransport() override;
void Orphan() override;
void Orphaned() override;
void StartConnectivityFailureWatch(
RefCountedPtr<ConnectivityFailureWatcher> watcher) override;
void StopConnectivityFailureWatch(
const RefCountedPtr<ConnectivityFailureWatcher>& watcher) override;
OrphanablePtr<StreamingCall> CreateStreamingCall(
const char* method,
@ -82,15 +90,19 @@ class GrpcXdsTransportFactory::GrpcXdsTransport final
private:
class StateWatcher;
GrpcXdsTransportFactory* factory_; // Not owned.
WeakRefCountedPtr<GrpcXdsTransportFactory> factory_;
std::string key_;
RefCountedPtr<Channel> channel_;
StateWatcher* watcher_;
Mutex mu_;
absl::flat_hash_map<RefCountedPtr<ConnectivityFailureWatcher>, StateWatcher*>
watchers_ ABSL_GUARDED_BY(&mu_);
};
class GrpcXdsTransportFactory::GrpcXdsTransport::GrpcStreamingCall final
: public XdsTransportFactory::XdsTransport::StreamingCall {
public:
GrpcStreamingCall(RefCountedPtr<GrpcXdsTransportFactory> factory,
GrpcStreamingCall(WeakRefCountedPtr<GrpcXdsTransportFactory> factory,
Channel* channel, const char* method,
std::unique_ptr<StreamingCall::EventHandler> event_handler);
~GrpcStreamingCall() override;
@ -107,7 +119,7 @@ class GrpcXdsTransportFactory::GrpcXdsTransport::GrpcStreamingCall final
static void OnResponseReceived(void* arg, grpc_error_handle /*error*/);
static void OnStatusReceived(void* arg, grpc_error_handle /*error*/);
RefCountedPtr<GrpcXdsTransportFactory> factory_;
WeakRefCountedPtr<GrpcXdsTransportFactory> factory_;
std::unique_ptr<StreamingCall::EventHandler> event_handler_;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,374 @@
//
// Copyright 2019 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_XDS_XDS_CLIENT_LRS_CLIENT_H
#define GRPC_SRC_CORE_XDS_XDS_CLIENT_LRS_CLIENT_H
#include <atomic>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include "absl/base/thread_annotations.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "upb/reflection/def.hpp"
#include <grpc/event_engine/event_engine.h>
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/gprpp/dual_ref_counted.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/per_cpu.h"
#include "src/core/lib/gprpp/ref_counted.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/sync.h"
#include "src/core/lib/gprpp/time.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/uri/uri_parser.h"
#include "src/core/xds/xds_client/xds_api.h"
#include "src/core/xds/xds_client/xds_backend_metric_propagation.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_locality.h"
#include "src/core/xds/xds_client/xds_metrics.h"
#include "src/core/xds/xds_client/xds_resource_type.h"
#include "src/core/xds/xds_client/xds_transport.h"
namespace grpc_core {
bool XdsOrcaLrsPropagationChangesEnabled();
class LrsClient : public DualRefCounted<LrsClient> {
public:
// Drop stats for an xds cluster.
class ClusterDropStats final : public RefCounted<ClusterDropStats> {
public:
// The total number of requests dropped for any reason is the sum of
// uncategorized_drops, and dropped_requests map.
using CategorizedDropsMap = std::map<std::string /* category */, uint64_t>;
struct Snapshot {
uint64_t uncategorized_drops = 0;
// The number of requests dropped for the specific drop categories
// outlined in the drop_overloads field in the EDS response.
CategorizedDropsMap categorized_drops;
Snapshot& operator+=(const Snapshot& other) {
uncategorized_drops += other.uncategorized_drops;
for (const auto& p : other.categorized_drops) {
categorized_drops[p.first] += p.second;
}
return *this;
}
bool IsZero() const {
if (uncategorized_drops != 0) return false;
for (const auto& p : categorized_drops) {
if (p.second != 0) return false;
}
return true;
}
};
ClusterDropStats(RefCountedPtr<LrsClient> lrs_client,
absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name);
~ClusterDropStats() override;
// Returns a snapshot of this instance and resets all the counters.
Snapshot GetSnapshotAndReset();
void AddUncategorizedDrops();
void AddCallDropped(const std::string& category);
private:
RefCountedPtr<LrsClient> lrs_client_;
absl::string_view lrs_server_;
absl::string_view cluster_name_;
absl::string_view eds_service_name_;
std::atomic<uint64_t> uncategorized_drops_{0};
// Protects categorized_drops_. A mutex is necessary because the length of
// dropped_requests can be accessed by both the picker (from data plane
// mutex) and the load reporting thread (from the control plane combiner).
Mutex mu_;
CategorizedDropsMap categorized_drops_ ABSL_GUARDED_BY(mu_);
};
// Locality stats for an xds cluster.
class ClusterLocalityStats final : public RefCounted<ClusterLocalityStats> {
public:
struct BackendMetric {
uint64_t num_requests_finished_with_metric = 0;
double total_metric_value = 0;
BackendMetric(BackendMetric&& other)
: num_requests_finished_with_metric(
std::exchange(other.num_requests_finished_with_metric, 0)),
total_metric_value(std::exchange(other.total_metric_value, 0)) {}
BackendMetric& operator+=(const BackendMetric& other) {
num_requests_finished_with_metric +=
other.num_requests_finished_with_metric;
total_metric_value += other.total_metric_value;
return *this;
}
bool IsZero() const {
return num_requests_finished_with_metric == 0 &&
total_metric_value == 0;
}
};
struct Snapshot {
uint64_t total_successful_requests = 0;
uint64_t total_requests_in_progress = 0;
uint64_t total_error_requests = 0;
uint64_t total_issued_requests = 0;
BackendMetric cpu_utilization;
BackendMetric mem_utilization;
BackendMetric application_utilization;
std::map<std::string, BackendMetric> backend_metrics;
Snapshot& operator+=(const Snapshot& other) {
total_successful_requests += other.total_successful_requests;
total_requests_in_progress += other.total_requests_in_progress;
total_error_requests += other.total_error_requests;
total_issued_requests += other.total_issued_requests;
cpu_utilization += other.cpu_utilization;
mem_utilization += other.mem_utilization;
application_utilization += other.application_utilization;
for (const auto& p : other.backend_metrics) {
backend_metrics[p.first] += p.second;
}
return *this;
}
bool IsZero() const {
if (total_successful_requests != 0 || total_requests_in_progress != 0 ||
total_error_requests != 0 || total_issued_requests != 0 ||
!cpu_utilization.IsZero() || !mem_utilization.IsZero() ||
!application_utilization.IsZero()) {
return false;
}
for (const auto& p : backend_metrics) {
if (!p.second.IsZero()) return false;
}
return true;
}
};
ClusterLocalityStats(RefCountedPtr<LrsClient> lrs_client,
absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> name);
~ClusterLocalityStats() override;
// Returns a snapshot of this instance and resets all the counters.
Snapshot GetSnapshotAndReset();
void AddCallStarted();
void AddCallFinished(const BackendMetricPropagation& propagation,
const BackendMetricData* backend_metrics,
bool fail = false);
XdsLocalityName* locality_name() const { return name_.get(); }
private:
struct Stats {
std::atomic<uint64_t> total_successful_requests{0};
std::atomic<uint64_t> total_requests_in_progress{0};
std::atomic<uint64_t> total_error_requests{0};
std::atomic<uint64_t> total_issued_requests{0};
Mutex backend_metrics_mu;
BackendMetric cpu_utilization ABSL_GUARDED_BY(backend_metrics_mu);
BackendMetric mem_utilization ABSL_GUARDED_BY(backend_metrics_mu);
BackendMetric application_utilization ABSL_GUARDED_BY(backend_metrics_mu);
std::map<std::string, BackendMetric> backend_metrics
ABSL_GUARDED_BY(backend_metrics_mu);
};
RefCountedPtr<LrsClient> lrs_client_;
absl::string_view lrs_server_;
absl::string_view cluster_name_;
absl::string_view eds_service_name_;
RefCountedPtr<XdsLocalityName> name_;
PerCpu<Stats> stats_{PerCpuOptions().SetMaxShards(32).SetCpusPerShard(4)};
};
LrsClient(
std::shared_ptr<XdsBootstrap> bootstrap, std::string user_agent_name,
std::string user_agent_version,
RefCountedPtr<XdsTransportFactory> transport_factory,
std::shared_ptr<grpc_event_engine::experimental::EventEngine> engine);
~LrsClient() override;
// Adds drop stats for cluster_name and eds_service_name.
RefCountedPtr<ClusterDropStats> AddClusterDropStats(
std::shared_ptr<const XdsBootstrap::XdsServer> lrs_server,
absl::string_view cluster_name, absl::string_view eds_service_name);
// Adds locality stats for cluster_name and eds_service_name for the
// specified locality.
RefCountedPtr<ClusterLocalityStats> AddClusterLocalityStats(
std::shared_ptr<const XdsBootstrap::XdsServer> lrs_server,
absl::string_view cluster_name, absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> locality);
// Resets connection backoff state.
void ResetBackoff();
XdsTransportFactory* transport_factory() const {
return transport_factory_.get();
}
grpc_event_engine::experimental::EventEngine* engine() {
return engine_.get();
}
private:
// Contains a channel to the LRS server and all the data related to the
// channel.
class LrsChannel final : public DualRefCounted<LrsChannel> {
public:
template <typename T>
class RetryableCall;
class LrsCall;
LrsChannel(WeakRefCountedPtr<LrsClient> lrs_client,
std::shared_ptr<const XdsBootstrap::XdsServer> server);
~LrsChannel() override;
LrsClient* lrs_client() const { return lrs_client_.get(); }
void ResetBackoff();
void MaybeStartLrsCall();
absl::string_view server_uri() const { return server_->server_uri(); }
private:
void Orphaned() override;
void StopLrsCallLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&LrsClient::mu_);
// The owning LrsClient.
WeakRefCountedPtr<LrsClient> lrs_client_;
std::shared_ptr<const XdsBootstrap::XdsServer> server_;
RefCountedPtr<XdsTransportFactory::XdsTransport> transport_;
// The retryable LRS call.
OrphanablePtr<RetryableCall<LrsCall>> lrs_call_;
};
struct LoadReportState {
struct LocalityState {
ClusterLocalityStats* locality_stats = nullptr;
ClusterLocalityStats::Snapshot deleted_locality_stats;
};
ClusterDropStats* drop_stats = nullptr;
ClusterDropStats::Snapshot deleted_drop_stats;
std::map<RefCountedPtr<XdsLocalityName>, LocalityState,
XdsLocalityName::Less>
locality_stats;
Timestamp last_report_time = Timestamp::Now();
};
// Load report data.
using LoadReportMap = std::map<
std::pair<std::string /*cluster_name*/, std::string /*eds_service_name*/>,
LoadReportState>;
struct LoadReportServer {
RefCountedPtr<LrsChannel> lrs_channel;
LoadReportMap load_report_map;
};
struct ClusterLoadReport {
ClusterDropStats::Snapshot dropped_requests;
std::map<RefCountedPtr<XdsLocalityName>, ClusterLocalityStats::Snapshot,
XdsLocalityName::Less>
locality_stats;
Duration load_report_interval;
};
using ClusterLoadReportMap = std::map<
std::pair<std::string /*cluster_name*/, std::string /*eds_service_name*/>,
ClusterLoadReport>;
void Orphaned() override;
ClusterLoadReportMap BuildLoadReportSnapshotLocked(
const XdsBootstrap::XdsServer& lrs_server, bool send_all_clusters,
const std::set<std::string>& clusters) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
RefCountedPtr<LrsChannel> GetOrCreateLrsChannelLocked(
std::shared_ptr<const XdsBootstrap::XdsServer> server, const char* reason)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
static bool LoadReportCountersAreZero(const ClusterLoadReportMap& snapshot);
void RemoveClusterDropStats(absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name,
ClusterDropStats* cluster_drop_stats);
void RemoveClusterLocalityStats(
absl::string_view lrs_server, absl::string_view cluster_name,
absl::string_view eds_service_name,
const RefCountedPtr<XdsLocalityName>& locality,
ClusterLocalityStats* cluster_locality_stats);
// Creates an initial LRS request.
std::string CreateLrsInitialRequest() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_);
// Creates an LRS request sending a client-side load report.
std::string CreateLrsRequest(ClusterLoadReportMap cluster_load_report_map)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_);
// Parses the LRS response and populates send_all_clusters,
// cluster_names, and load_reporting_interval.
absl::Status ParseLrsResponse(absl::string_view encoded_response,
bool* send_all_clusters,
std::set<std::string>* cluster_names,
Duration* load_reporting_interval)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_);
std::shared_ptr<XdsBootstrap> bootstrap_;
const std::string user_agent_name_;
const std::string user_agent_version_;
RefCountedPtr<XdsTransportFactory> transport_factory_;
std::shared_ptr<grpc_event_engine::experimental::EventEngine> engine_;
Mutex mu_;
upb::DefPool def_pool_ ABSL_GUARDED_BY(mu_);
// Map of existing LRS channels.
std::map<std::string /*XdsServer key*/, LrsChannel*> lrs_channel_map_
ABSL_GUARDED_BY(mu_);
std::map<std::string /*XdsServer key*/, LoadReportServer, std::less<>>
load_report_map_ ABSL_GUARDED_BY(mu_);
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_XDS_XDS_CLIENT_LRS_CLIENT_H

@ -154,42 +154,50 @@ std::string SerializeDiscoveryRequest(
void XdsApi::PopulateNode(envoy_config_core_v3_Node* node_msg,
upb_Arena* arena) {
if (node_ != nullptr) {
if (!node_->id().empty()) {
PopulateXdsNode(node_, user_agent_name_, user_agent_version_, node_msg,
arena);
}
void PopulateXdsNode(const XdsBootstrap::Node* node,
absl::string_view user_agent_name,
absl::string_view user_agent_version,
envoy_config_core_v3_Node* node_msg, upb_Arena* arena) {
if (node != nullptr) {
if (!node->id().empty()) {
envoy_config_core_v3_Node_set_id(node_msg,
StdStringToUpbString(node_->id()));
StdStringToUpbString(node->id()));
}
if (!node_->cluster().empty()) {
if (!node->cluster().empty()) {
envoy_config_core_v3_Node_set_cluster(
node_msg, StdStringToUpbString(node_->cluster()));
node_msg, StdStringToUpbString(node->cluster()));
}
if (!node_->metadata().empty()) {
if (!node->metadata().empty()) {
google_protobuf_Struct* metadata =
envoy_config_core_v3_Node_mutable_metadata(node_msg, arena);
PopulateMetadata(metadata, node_->metadata(), arena);
PopulateMetadata(metadata, node->metadata(), arena);
}
if (!node_->locality_region().empty() || !node_->locality_zone().empty() ||
!node_->locality_sub_zone().empty()) {
if (!node->locality_region().empty() || !node->locality_zone().empty() ||
!node->locality_sub_zone().empty()) {
envoy_config_core_v3_Locality* locality =
envoy_config_core_v3_Node_mutable_locality(node_msg, arena);
if (!node_->locality_region().empty()) {
if (!node->locality_region().empty()) {
envoy_config_core_v3_Locality_set_region(
locality, StdStringToUpbString(node_->locality_region()));
locality, StdStringToUpbString(node->locality_region()));
}
if (!node_->locality_zone().empty()) {
if (!node->locality_zone().empty()) {
envoy_config_core_v3_Locality_set_zone(
locality, StdStringToUpbString(node_->locality_zone()));
locality, StdStringToUpbString(node->locality_zone()));
}
if (!node_->locality_sub_zone().empty()) {
if (!node->locality_sub_zone().empty()) {
envoy_config_core_v3_Locality_set_sub_zone(
locality, StdStringToUpbString(node_->locality_sub_zone()));
locality, StdStringToUpbString(node->locality_sub_zone()));
}
}
}
envoy_config_core_v3_Node_set_user_agent_name(
node_msg, StdStringToUpbString(user_agent_name_));
node_msg, StdStringToUpbString(user_agent_name));
envoy_config_core_v3_Node_set_user_agent_version(
node_msg, StdStringToUpbString(user_agent_version_));
node_msg, StdStringToUpbString(user_agent_version));
envoy_config_core_v3_Node_add_client_features(
node_msg,
upb_StringView_FromString("envoy.lb.does_not_support_overprovisioning"),
@ -344,224 +352,4 @@ absl::Status XdsApi::ParseAdsResponse(absl::string_view encoded_response,
return absl::OkStatus();
}
namespace {
void MaybeLogLrsRequest(
const XdsApiContext& context,
const envoy_service_load_stats_v3_LoadStatsRequest* request) {
if (GRPC_TRACE_FLAG_ENABLED_OBJ(*context.tracer) && ABSL_VLOG_IS_ON(2)) {
const upb_MessageDef* msg_type =
envoy_service_load_stats_v3_LoadStatsRequest_getmsgdef(
context.def_pool);
char buf[10240];
upb_TextEncode(reinterpret_cast<const upb_Message*>(request), msg_type,
nullptr, 0, buf, sizeof(buf));
VLOG(2) << "[xds_client " << context.client
<< "] constructed LRS request: " << buf;
}
}
std::string SerializeLrsRequest(
const XdsApiContext& context,
const envoy_service_load_stats_v3_LoadStatsRequest* request) {
size_t output_length;
char* output = envoy_service_load_stats_v3_LoadStatsRequest_serialize(
request, context.arena, &output_length);
return std::string(output, output_length);
}
} // namespace
std::string XdsApi::CreateLrsInitialRequest() {
upb::Arena arena;
const XdsApiContext context = {client_, tracer_, def_pool_->ptr(),
arena.ptr()};
// Create a request.
envoy_service_load_stats_v3_LoadStatsRequest* request =
envoy_service_load_stats_v3_LoadStatsRequest_new(arena.ptr());
// Populate node.
envoy_config_core_v3_Node* node_msg =
envoy_service_load_stats_v3_LoadStatsRequest_mutable_node(request,
arena.ptr());
PopulateNode(node_msg, arena.ptr());
envoy_config_core_v3_Node_add_client_features(
node_msg,
upb_StringView_FromString("envoy.lrs.supports_send_all_clusters"),
arena.ptr());
MaybeLogLrsRequest(context, request);
return SerializeLrsRequest(context, request);
}
namespace {
void LocalityStatsPopulate(
const XdsApiContext& context,
envoy_config_endpoint_v3_UpstreamLocalityStats* output,
const XdsLocalityName& locality_name,
const XdsClusterLocalityStats::Snapshot& snapshot) {
// Set locality.
envoy_config_core_v3_Locality* locality =
envoy_config_endpoint_v3_UpstreamLocalityStats_mutable_locality(
output, context.arena);
if (!locality_name.region().empty()) {
envoy_config_core_v3_Locality_set_region(
locality, StdStringToUpbString(locality_name.region()));
}
if (!locality_name.zone().empty()) {
envoy_config_core_v3_Locality_set_zone(
locality, StdStringToUpbString(locality_name.zone()));
}
if (!locality_name.sub_zone().empty()) {
envoy_config_core_v3_Locality_set_sub_zone(
locality, StdStringToUpbString(locality_name.sub_zone()));
}
// Set total counts.
envoy_config_endpoint_v3_UpstreamLocalityStats_set_total_successful_requests(
output, snapshot.total_successful_requests);
envoy_config_endpoint_v3_UpstreamLocalityStats_set_total_requests_in_progress(
output, snapshot.total_requests_in_progress);
envoy_config_endpoint_v3_UpstreamLocalityStats_set_total_error_requests(
output, snapshot.total_error_requests);
envoy_config_endpoint_v3_UpstreamLocalityStats_set_total_issued_requests(
output, snapshot.total_issued_requests);
// Add backend metrics.
for (const auto& p : snapshot.backend_metrics) {
const std::string& metric_name = p.first;
const XdsClusterLocalityStats::BackendMetric& metric_value = p.second;
envoy_config_endpoint_v3_EndpointLoadMetricStats* load_metric =
envoy_config_endpoint_v3_UpstreamLocalityStats_add_load_metric_stats(
output, context.arena);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_metric_name(
load_metric, StdStringToUpbString(metric_name));
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_num_requests_finished_with_metric(
load_metric, metric_value.num_requests_finished_with_metric);
envoy_config_endpoint_v3_EndpointLoadMetricStats_set_total_metric_value(
load_metric, metric_value.total_metric_value);
}
}
} // namespace
std::string XdsApi::CreateLrsRequest(
ClusterLoadReportMap cluster_load_report_map) {
upb::Arena arena;
const XdsApiContext context = {client_, tracer_, def_pool_->ptr(),
arena.ptr()};
// Create a request.
envoy_service_load_stats_v3_LoadStatsRequest* request =
envoy_service_load_stats_v3_LoadStatsRequest_new(arena.ptr());
for (auto& p : cluster_load_report_map) {
const std::string& cluster_name = p.first.first;
const std::string& eds_service_name = p.first.second;
const ClusterLoadReport& load_report = p.second;
// Add cluster stats.
envoy_config_endpoint_v3_ClusterStats* cluster_stats =
envoy_service_load_stats_v3_LoadStatsRequest_add_cluster_stats(
request, arena.ptr());
// Set the cluster name.
envoy_config_endpoint_v3_ClusterStats_set_cluster_name(
cluster_stats, StdStringToUpbString(cluster_name));
// Set EDS service name, if non-empty.
if (!eds_service_name.empty()) {
envoy_config_endpoint_v3_ClusterStats_set_cluster_service_name(
cluster_stats, StdStringToUpbString(eds_service_name));
}
// Add locality stats.
for (const auto& p : load_report.locality_stats) {
const XdsLocalityName& locality_name = *p.first;
const auto& snapshot = p.second;
envoy_config_endpoint_v3_UpstreamLocalityStats* locality_stats =
envoy_config_endpoint_v3_ClusterStats_add_upstream_locality_stats(
cluster_stats, arena.ptr());
LocalityStatsPopulate(context, locality_stats, locality_name, snapshot);
}
// Add dropped requests.
uint64_t total_dropped_requests = 0;
for (const auto& p : load_report.dropped_requests.categorized_drops) {
const std::string& category = p.first;
const uint64_t count = p.second;
envoy_config_endpoint_v3_ClusterStats_DroppedRequests* dropped_requests =
envoy_config_endpoint_v3_ClusterStats_add_dropped_requests(
cluster_stats, arena.ptr());
envoy_config_endpoint_v3_ClusterStats_DroppedRequests_set_category(
dropped_requests, StdStringToUpbString(category));
envoy_config_endpoint_v3_ClusterStats_DroppedRequests_set_dropped_count(
dropped_requests, count);
total_dropped_requests += count;
}
total_dropped_requests += load_report.dropped_requests.uncategorized_drops;
// Set total dropped requests.
envoy_config_endpoint_v3_ClusterStats_set_total_dropped_requests(
cluster_stats, total_dropped_requests);
// Set real load report interval.
gpr_timespec timespec = load_report.load_report_interval.as_timespec();
google_protobuf_Duration* load_report_interval =
envoy_config_endpoint_v3_ClusterStats_mutable_load_report_interval(
cluster_stats, arena.ptr());
google_protobuf_Duration_set_seconds(load_report_interval, timespec.tv_sec);
google_protobuf_Duration_set_nanos(load_report_interval, timespec.tv_nsec);
}
MaybeLogLrsRequest(context, request);
return SerializeLrsRequest(context, request);
}
namespace {
void MaybeLogLrsResponse(
const XdsApiContext& context,
const envoy_service_load_stats_v3_LoadStatsResponse* response) {
if (GRPC_TRACE_FLAG_ENABLED_OBJ(*context.tracer) && ABSL_VLOG_IS_ON(2)) {
const upb_MessageDef* msg_type =
envoy_service_load_stats_v3_LoadStatsResponse_getmsgdef(
context.def_pool);
char buf[10240];
upb_TextEncode(reinterpret_cast<const upb_Message*>(response), msg_type,
nullptr, 0, buf, sizeof(buf));
VLOG(2) << "[xds_client " << context.client
<< "] received LRS response: " << buf;
}
}
} // namespace
absl::Status XdsApi::ParseLrsResponse(absl::string_view encoded_response,
bool* send_all_clusters,
std::set<std::string>* cluster_names,
Duration* load_reporting_interval) {
upb::Arena arena;
// Decode the response.
const envoy_service_load_stats_v3_LoadStatsResponse* decoded_response =
envoy_service_load_stats_v3_LoadStatsResponse_parse(
encoded_response.data(), encoded_response.size(), arena.ptr());
// Parse the response.
if (decoded_response == nullptr) {
return absl::UnavailableError("Can't decode response.");
}
const XdsApiContext context = {client_, tracer_, def_pool_->ptr(),
arena.ptr()};
MaybeLogLrsResponse(context, decoded_response);
// Check send_all_clusters.
if (envoy_service_load_stats_v3_LoadStatsResponse_send_all_clusters(
decoded_response)) {
*send_all_clusters = true;
} else {
// Store the cluster names.
size_t size;
const upb_StringView* clusters =
envoy_service_load_stats_v3_LoadStatsResponse_clusters(decoded_response,
&size);
for (size_t i = 0; i < size; ++i) {
cluster_names->emplace(UpbStringToStdString(clusters[i]));
}
}
// Get the load report interval.
const google_protobuf_Duration* load_reporting_interval_duration =
envoy_service_load_stats_v3_LoadStatsResponse_load_reporting_interval(
decoded_response);
*load_reporting_interval = Duration::FromSecondsAndNanoseconds(
google_protobuf_Duration_seconds(load_reporting_interval_duration),
google_protobuf_Duration_nanos(load_reporting_interval_duration));
return absl::OkStatus();
}
} // namespace grpc_core

@ -38,16 +38,14 @@
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/time.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
namespace grpc_core {
class XdsClient;
// TODO(roth): When we have time, split this into multiple pieces:
// - ADS request/response handling
// - LRS request/response handling
// - CSDS response generation
// TODO(roth): When we have time, remove this class and move its
// functionality directly inside of XdsClient.
class XdsApi final {
public:
// Interface defined by caller and passed to ParseAdsResponse().
@ -81,17 +79,6 @@ class XdsApi final {
absl::string_view message) = 0;
};
struct ClusterLoadReport {
XdsClusterDropStats::Snapshot dropped_requests;
std::map<RefCountedPtr<XdsLocalityName>, XdsClusterLocalityStats::Snapshot,
XdsLocalityName::Less>
locality_stats;
Duration load_report_interval;
};
using ClusterLoadReportMap = std::map<
std::pair<std::string /*cluster_name*/, std::string /*eds_service_name*/>,
ClusterLoadReport>;
// The metadata of the xDS resource; used by the xDS config dump.
struct ResourceMetadata {
// Resource status from the view of a xDS client, which tells the
@ -160,19 +147,6 @@ class XdsApi final {
absl::Status ParseAdsResponse(absl::string_view encoded_response,
AdsResponseParserInterface* parser);
// Creates an initial LRS request.
std::string CreateLrsInitialRequest();
// Creates an LRS request sending a client-side load report.
std::string CreateLrsRequest(ClusterLoadReportMap cluster_load_report_map);
// Parses the LRS response and populates send_all_clusters,
// cluster_names, and load_reporting_interval.
absl::Status ParseLrsResponse(absl::string_view encoded_response,
bool* send_all_clusters,
std::set<std::string>* cluster_names,
Duration* load_reporting_interval);
void PopulateNode(envoy_config_core_v3_Node* node_msg, upb_Arena* arena);
private:
@ -184,6 +158,11 @@ class XdsApi final {
const std::string user_agent_version_;
};
void PopulateXdsNode(const XdsBootstrap::Node* node,
absl::string_view user_agent_name,
absl::string_view user_agent_version,
envoy_config_core_v3_Node* node_msg, upb_Arena* arena);
} // namespace grpc_core
#endif // GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_API_H

@ -0,0 +1,41 @@
//
// Copyright 2024 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_BACKEND_METRIC_PROPAGATION_H
#define GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_BACKEND_METRIC_PROPAGATION_H
#include <string>
#include "absl/container/flat_hash_set.h"
#include "src/core/lib/gprpp/ref_counted.h"
namespace grpc_core {
struct BackendMetricPropagation
: public RefCountedPtr<BackendMetricPropagation> {
static constexpr uint8_t kCpuUtilization = 1;
static constexpr uint8_t kMemUtilization = 2;
static constexpr uint8_t kApplicationUtilization = 4;
static constexpr uint8_t kNamedMetricsAll = 8;
uint8_t propagation_bits = 0;
absl::flat_hash_set<std::string> named_metric_keys;
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_BACKEND_METRIC_PROPAGATION_H

@ -56,7 +56,7 @@
#include "src/core/util/upb_utils.h"
#include "src/core/xds/xds_client/xds_api.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
#define GRPC_XDS_INITIAL_CONNECT_BACKOFF_SECONDS 1
#define GRPC_XDS_RECONNECT_BACKOFF_MULTIPLIER 1.6
@ -74,6 +74,9 @@ using ::grpc_event_engine::experimental::EventEngine;
// An xds call wrapper that can restart a call upon failure. Holds a ref to
// the xds channel. The template parameter is the kind of wrapped xds call.
// TODO(roth): This is basically the same code as in LrsClient, and
// probably very similar to many other places in the codebase.
// Consider refactoring this into a common utility library somehow.
template <typename T>
class XdsClient::XdsChannel::RetryableCall final
: public InternallyRefCounted<RetryableCall<T>> {
@ -355,98 +358,22 @@ class XdsClient::XdsChannel::AdsCall final
std::map<const XdsResourceType*, ResourceTypeState> state_map_;
};
// Contains an LRS call to the xds server.
class XdsClient::XdsChannel::LrsCall final
: public InternallyRefCounted<LrsCall> {
public:
// The ctor and dtor should not be used directly.
explicit LrsCall(RefCountedPtr<RetryableCall<LrsCall>> retryable_call);
//
// XdsClient::XdsChannel::ConnectivityFailureWatcher
//
void Orphan() override;
class XdsClient::XdsChannel::ConnectivityFailureWatcher
: public XdsTransportFactory::XdsTransport::ConnectivityFailureWatcher {
public:
explicit ConnectivityFailureWatcher(WeakRefCountedPtr<XdsChannel> xds_channel)
: xds_channel_(std::move(xds_channel)) {}
RetryableCall<LrsCall>* retryable_call() { return retryable_call_.get(); }
XdsChannel* xds_channel() const { return retryable_call_->xds_channel(); }
XdsClient* xds_client() const { return xds_channel()->xds_client(); }
bool seen_response() const { return seen_response_; }
void OnConnectivityFailure(absl::Status status) override {
xds_channel_->OnConnectivityFailure(std::move(status));
}
private:
class StreamEventHandler final
: public XdsTransportFactory::XdsTransport::StreamingCall::EventHandler {
public:
explicit StreamEventHandler(RefCountedPtr<LrsCall> lrs_call)
: lrs_call_(std::move(lrs_call)) {}
void OnRequestSent(bool /*ok*/) override { lrs_call_->OnRequestSent(); }
void OnRecvMessage(absl::string_view payload) override {
lrs_call_->OnRecvMessage(payload);
}
void OnStatusReceived(absl::Status status) override {
lrs_call_->OnStatusReceived(std::move(status));
}
private:
RefCountedPtr<LrsCall> lrs_call_;
};
// A repeating timer for a particular duration.
class Timer final : public InternallyRefCounted<Timer> {
public:
explicit Timer(RefCountedPtr<LrsCall> lrs_call)
: lrs_call_(std::move(lrs_call)) {}
~Timer() override { lrs_call_.reset(DEBUG_LOCATION, "LRS timer"); }
// Disable thread-safety analysis because this method is called via
// OrphanablePtr<>, but there's no way to pass the lock annotation
// through there.
void Orphan() override ABSL_NO_THREAD_SAFETY_ANALYSIS;
void ScheduleNextReportLocked()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&XdsClient::mu_);
private:
bool IsCurrentTimerOnCall() const {
return this == lrs_call_->timer_.get();
}
XdsClient* xds_client() const { return lrs_call_->xds_client(); }
void OnNextReportTimer();
// The owning LRS call.
RefCountedPtr<LrsCall> lrs_call_;
absl::optional<EventEngine::TaskHandle> timer_handle_
ABSL_GUARDED_BY(&XdsClient::mu_);
};
void MaybeScheduleNextReportLocked()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&XdsClient::mu_);
void SendReportLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&XdsClient::mu_);
void SendMessageLocked(std::string payload)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&XdsClient::mu_);
void OnRequestSent();
void OnRecvMessage(absl::string_view payload);
void OnStatusReceived(absl::Status status);
bool IsCurrentCallOnChannel() const;
// The owning RetryableCall<>.
RefCountedPtr<RetryableCall<LrsCall>> retryable_call_;
OrphanablePtr<XdsTransportFactory::XdsTransport::StreamingCall>
streaming_call_;
bool seen_response_ = false;
bool send_message_pending_ ABSL_GUARDED_BY(&XdsClient::mu_) = false;
// Load reporting state.
bool send_all_clusters_ = false;
std::set<std::string> cluster_names_; // Asked for by the LRS server.
Duration load_reporting_interval_;
bool last_report_counters_were_zero_ = false;
OrphanablePtr<Timer> timer_;
WeakRefCountedPtr<XdsChannel> xds_channel_;
};
//
@ -464,15 +391,15 @@ XdsClient::XdsChannel::XdsChannel(WeakRefCountedPtr<XdsClient> xds_client,
<< "[xds_client " << xds_client_.get() << "] creating channel " << this
<< " for server " << server.server_uri();
absl::Status status;
transport_ = xds_client_->transport_factory_->Create(
server,
[self = WeakRef(DEBUG_LOCATION, "OnConnectivityFailure")](
absl::Status status) {
self->OnConnectivityFailure(std::move(status));
},
&status);
transport_ = xds_client_->transport_factory_->GetTransport(server, &status);
CHECK(transport_ != nullptr);
if (!status.ok()) SetChannelStatusLocked(std::move(status));
if (!status.ok()) {
SetChannelStatusLocked(std::move(status));
} else {
failure_watcher_ = MakeRefCounted<ConnectivityFailureWatcher>(
WeakRef(DEBUG_LOCATION, "OnConnectivityFailure"));
transport_->StartConnectivityFailureWatch(failure_watcher_);
}
}
XdsClient::XdsChannel::~XdsChannel() {
@ -491,13 +418,16 @@ void XdsClient::XdsChannel::Orphaned() ABSL_NO_THREAD_SAFETY_ANALYSIS {
<< "[xds_client " << xds_client() << "] orphaning xds channel " << this
<< " for server " << server_.server_uri();
shutting_down_ = true;
if (failure_watcher_ != nullptr) {
transport_->StopConnectivityFailureWatch(failure_watcher_);
failure_watcher_.reset();
}
transport_.reset();
// At this time, all strong refs are removed, remove from channel map to
// prevent subsequent subscription from trying to use this XdsChannel as
// it is shutting down.
xds_client_->xds_channel_map_.erase(server_.Key());
ads_call_.reset();
lrs_call_.reset();
}
void XdsClient::XdsChannel::ResetBackoff() { transport_->ResetBackoff(); }
@ -506,21 +436,6 @@ XdsClient::XdsChannel::AdsCall* XdsClient::XdsChannel::ads_call() const {
return ads_call_->call();
}
XdsClient::XdsChannel::LrsCall* XdsClient::XdsChannel::lrs_call() const {
return lrs_call_->call();
}
void XdsClient::XdsChannel::MaybeStartLrsCall() {
if (lrs_call_ != nullptr) return;
lrs_call_.reset(
new RetryableCall<LrsCall>(WeakRef(DEBUG_LOCATION, "XdsChannel+lrs")));
}
void XdsClient::XdsChannel::StopLrsCallLocked() {
xds_client_->xds_load_report_server_map_.erase(server_.Key());
lrs_call_.reset();
}
void XdsClient::XdsChannel::SubscribeLocked(const XdsResourceType* type,
const XdsResourceName& name) {
if (ads_call_ == nullptr) {
@ -1280,241 +1195,6 @@ XdsClient::XdsChannel::AdsCall::ResourceNamesForRequest(
return resource_names;
}
//
// XdsClient::XdsChannel::LrsCall::Timer
//
void XdsClient::XdsChannel::LrsCall::Timer::Orphan() {
if (timer_handle_.has_value()) {
xds_client()->engine()->Cancel(*timer_handle_);
timer_handle_.reset();
}
Unref(DEBUG_LOCATION, "Orphan");
}
void XdsClient::XdsChannel::LrsCall::Timer::ScheduleNextReportLocked() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client() << "] xds server "
<< lrs_call_->xds_channel()->server_.server_uri()
<< ": scheduling next load report in "
<< lrs_call_->load_reporting_interval_;
timer_handle_ = xds_client()->engine()->RunAfter(
lrs_call_->load_reporting_interval_,
[self = Ref(DEBUG_LOCATION, "timer")]() {
ApplicationCallbackExecCtx callback_exec_ctx;
ExecCtx exec_ctx;
self->OnNextReportTimer();
});
}
void XdsClient::XdsChannel::LrsCall::Timer::OnNextReportTimer() {
MutexLock lock(&xds_client()->mu_);
timer_handle_.reset();
if (IsCurrentTimerOnCall()) lrs_call_->SendReportLocked();
}
//
// XdsClient::XdsChannel::LrsCall
//
XdsClient::XdsChannel::LrsCall::LrsCall(
RefCountedPtr<RetryableCall<LrsCall>> retryable_call)
: InternallyRefCounted<LrsCall>(
GRPC_TRACE_FLAG_ENABLED(xds_client_refcount) ? "LrsCall" : nullptr),
retryable_call_(std::move(retryable_call)) {
// Init the LRS call. Note that the call will progress every time there's
// activity in xds_client()->interested_parties_, which is comprised of
// the polling entities from client_channel.
CHECK_NE(xds_client(), nullptr);
const char* method =
"/envoy.service.load_stats.v3.LoadReportingService/StreamLoadStats";
streaming_call_ = xds_channel()->transport_->CreateStreamingCall(
method, std::make_unique<StreamEventHandler>(
// Passing the initial ref here. This ref will go away when
// the StreamEventHandler is destroyed.
RefCountedPtr<LrsCall>(this)));
CHECK(streaming_call_ != nullptr);
// Start the call.
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": starting LRS call (lrs_call=" << this
<< ", streaming_call=" << streaming_call_.get() << ")";
// Send the initial request.
std::string serialized_payload = xds_client()->api_.CreateLrsInitialRequest();
SendMessageLocked(std::move(serialized_payload));
// Read initial response.
streaming_call_->StartRecvMessage();
}
void XdsClient::XdsChannel::LrsCall::Orphan() {
timer_.reset();
// Note that the initial ref is held by the StreamEventHandler, which
// will be destroyed when streaming_call_ is destroyed, which may not happen
// here, since there may be other refs held to streaming_call_ by internal
// callbacks.
streaming_call_.reset();
}
void XdsClient::XdsChannel::LrsCall::MaybeScheduleNextReportLocked() {
// If there are no more registered stats to report, cancel the call.
auto it = xds_client()->xds_load_report_server_map_.find(
xds_channel()->server_.Key());
if (it == xds_client()->xds_load_report_server_map_.end() ||
it->second.load_report_map.empty()) {
it->second.xds_channel->StopLrsCallLocked();
return;
}
// Don't start if the previous send_message op hasn't completed yet.
// If this happens, we'll be called again from OnRequestSent().
if (send_message_pending_) return;
// Don't start if no LRS response has arrived.
if (!seen_response()) return;
// If there is no timer, create one.
// This happens on the initial response and whenever the interval changes.
if (timer_ == nullptr) {
timer_ = MakeOrphanable<Timer>(Ref(DEBUG_LOCATION, "LRS timer"));
}
// Schedule the next load report.
timer_->ScheduleNextReportLocked();
}
namespace {
bool LoadReportCountersAreZero(const XdsApi::ClusterLoadReportMap& snapshot) {
for (const auto& p : snapshot) {
const XdsApi::ClusterLoadReport& cluster_snapshot = p.second;
if (!cluster_snapshot.dropped_requests.IsZero()) return false;
for (const auto& q : cluster_snapshot.locality_stats) {
const XdsClusterLocalityStats::Snapshot& locality_snapshot = q.second;
if (!locality_snapshot.IsZero()) return false;
}
}
return true;
}
} // namespace
void XdsClient::XdsChannel::LrsCall::SendReportLocked() {
// Construct snapshot from all reported stats.
XdsApi::ClusterLoadReportMap snapshot =
xds_client()->BuildLoadReportSnapshotLocked(
xds_channel()->server_, send_all_clusters_, cluster_names_);
// Skip client load report if the counters were all zero in the last
// report and they are still zero in this one.
const bool old_val = last_report_counters_were_zero_;
last_report_counters_were_zero_ = LoadReportCountersAreZero(snapshot);
if (old_val && last_report_counters_were_zero_) {
MaybeScheduleNextReportLocked();
return;
}
// Send a request that contains the snapshot.
std::string serialized_payload =
xds_client()->api_.CreateLrsRequest(std::move(snapshot));
SendMessageLocked(std::move(serialized_payload));
}
void XdsClient::XdsChannel::LrsCall::SendMessageLocked(std::string payload) {
send_message_pending_ = true;
streaming_call_->SendMessage(std::move(payload));
}
void XdsClient::XdsChannel::LrsCall::OnRequestSent() {
MutexLock lock(&xds_client()->mu_);
send_message_pending_ = false;
if (IsCurrentCallOnChannel()) MaybeScheduleNextReportLocked();
}
void XdsClient::XdsChannel::LrsCall::OnRecvMessage(absl::string_view payload) {
MutexLock lock(&xds_client()->mu_);
// If we're no longer the current call, ignore the result.
if (!IsCurrentCallOnChannel()) return;
// Start recv after any code branch
auto cleanup = absl::MakeCleanup(
[call = streaming_call_.get()]() { call->StartRecvMessage(); });
// Parse the response.
bool send_all_clusters = false;
std::set<std::string> new_cluster_names;
Duration new_load_reporting_interval;
absl::Status status = xds_client()->api_.ParseLrsResponse(
payload, &send_all_clusters, &new_cluster_names,
&new_load_reporting_interval);
if (!status.ok()) {
LOG(ERROR) << "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": LRS response parsing failed: " << status;
return;
}
seen_response_ = true;
if (GRPC_TRACE_FLAG_ENABLED(xds_client)) {
LOG(INFO) << "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": LRS response received, " << new_cluster_names.size()
<< " cluster names, send_all_clusters=" << send_all_clusters
<< ", load_report_interval="
<< new_load_reporting_interval.millis() << "ms";
size_t i = 0;
for (const auto& name : new_cluster_names) {
LOG(INFO) << "[xds_client " << xds_client() << "] cluster_name " << i++
<< ": " << name;
}
}
if (new_load_reporting_interval <
Duration::Milliseconds(GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS)) {
new_load_reporting_interval =
Duration::Milliseconds(GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS);
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": increased load_report_interval to minimum value "
<< GRPC_XDS_MIN_CLIENT_LOAD_REPORTING_INTERVAL_MS << "ms";
}
// Ignore identical update.
if (send_all_clusters == send_all_clusters_ &&
cluster_names_ == new_cluster_names &&
load_reporting_interval_ == new_load_reporting_interval) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": incoming LRS response identical to current, ignoring.";
return;
}
// If the interval has changed, we'll need to restart the timer below.
const bool restart_timer =
load_reporting_interval_ != new_load_reporting_interval;
// Record the new config.
send_all_clusters_ = send_all_clusters;
cluster_names_ = std::move(new_cluster_names);
load_reporting_interval_ = new_load_reporting_interval;
// Restart timer if needed.
if (restart_timer) {
timer_.reset();
MaybeScheduleNextReportLocked();
}
}
void XdsClient::XdsChannel::LrsCall::OnStatusReceived(absl::Status status) {
MutexLock lock(&xds_client()->mu_);
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client() << "] xds server "
<< xds_channel()->server_.server_uri()
<< ": LRS call status received (xds_channel=" << xds_channel()
<< ", lrs_call=" << this << ", streaming_call=" << streaming_call_.get()
<< "): " << status;
// Ignore status from a stale call.
if (IsCurrentCallOnChannel()) {
// Try to restart the call.
retryable_call_->OnCallFinishedLocked();
}
}
bool XdsClient::XdsChannel::LrsCall::IsCurrentCallOnChannel() const {
// If the retryable LRS call is null (which only happens when the xds
// channel is shutting down), all the LRS calls are stale.
if (xds_channel()->lrs_call_ == nullptr) return false;
return this == xds_channel()->lrs_call_->call();
}
//
// XdsClient
//
@ -1522,8 +1202,8 @@ bool XdsClient::XdsChannel::LrsCall::IsCurrentCallOnChannel() const {
constexpr absl::string_view XdsClient::kOldStyleAuthority;
XdsClient::XdsClient(
std::unique_ptr<XdsBootstrap> bootstrap,
OrphanablePtr<XdsTransportFactory> transport_factory,
std::shared_ptr<XdsBootstrap> bootstrap,
RefCountedPtr<XdsTransportFactory> transport_factory,
std::shared_ptr<grpc_event_engine::experimental::EventEngine> engine,
std::unique_ptr<XdsMetricsReporter> metrics_reporter,
std::string user_agent_name, std::string user_agent_version,
@ -1562,13 +1242,6 @@ void XdsClient::Orphaned() {
// Clear cache and any remaining watchers that may not have been cancelled.
authority_state_map_.clear();
invalid_watchers_.clear();
// We may still be sending lingering queued load report data, so don't
// just clear the load reporting map, but we do want to clear the refs
// we're holding to the XdsChannel objects, to make sure that
// everything shuts down properly.
for (auto& p : xds_load_report_server_map_) {
p.second.xds_channel.reset(DEBUG_LOCATION, "XdsClient::Orphan()");
}
}
RefCountedPtr<XdsClient::XdsChannel> XdsClient::GetOrCreateXdsChannelLocked(
@ -1842,140 +1515,6 @@ std::string XdsClient::ConstructFullXdsResourceName(
return key.id;
}
RefCountedPtr<XdsClusterDropStats> XdsClient::AddClusterDropStats(
const XdsBootstrap::XdsServer& xds_server, absl::string_view cluster_name,
absl::string_view eds_service_name) {
auto key =
std::make_pair(std::string(cluster_name), std::string(eds_service_name));
RefCountedPtr<XdsClusterDropStats> cluster_drop_stats;
{
MutexLock lock(&mu_);
// We jump through some hoops here to make sure that the
// absl::string_views stored in the XdsClusterDropStats object point
// to the strings in the xds_load_report_server_map_ keys, so that
// they have the same lifetime.
auto server_it = xds_load_report_server_map_
.emplace(xds_server.Key(), LoadReportServer())
.first;
if (server_it->second.xds_channel == nullptr) {
server_it->second.xds_channel = GetOrCreateXdsChannelLocked(
xds_server, "load report map (drop stats)");
}
auto load_report_it = server_it->second.load_report_map
.emplace(std::move(key), LoadReportState())
.first;
LoadReportState& load_report_state = load_report_it->second;
if (load_report_state.drop_stats != nullptr) {
cluster_drop_stats = load_report_state.drop_stats->RefIfNonZero();
}
if (cluster_drop_stats == nullptr) {
if (load_report_state.drop_stats != nullptr) {
load_report_state.deleted_drop_stats +=
load_report_state.drop_stats->GetSnapshotAndReset();
}
cluster_drop_stats = MakeRefCounted<XdsClusterDropStats>(
Ref(DEBUG_LOCATION, "DropStats"), server_it->first /*xds_server*/,
load_report_it->first.first /*cluster_name*/,
load_report_it->first.second /*eds_service_name*/);
load_report_state.drop_stats = cluster_drop_stats.get();
}
server_it->second.xds_channel->MaybeStartLrsCall();
}
work_serializer_.DrainQueue();
return cluster_drop_stats;
}
void XdsClient::RemoveClusterDropStats(
absl::string_view xds_server_key, absl::string_view cluster_name,
absl::string_view eds_service_name,
XdsClusterDropStats* cluster_drop_stats) {
MutexLock lock(&mu_);
auto server_it = xds_load_report_server_map_.find(xds_server_key);
if (server_it == xds_load_report_server_map_.end()) return;
auto load_report_it = server_it->second.load_report_map.find(
std::make_pair(std::string(cluster_name), std::string(eds_service_name)));
if (load_report_it == server_it->second.load_report_map.end()) return;
LoadReportState& load_report_state = load_report_it->second;
if (load_report_state.drop_stats == cluster_drop_stats) {
// Record final snapshot in deleted_drop_stats, which will be
// added to the next load report.
load_report_state.deleted_drop_stats +=
load_report_state.drop_stats->GetSnapshotAndReset();
load_report_state.drop_stats = nullptr;
}
}
RefCountedPtr<XdsClusterLocalityStats> XdsClient::AddClusterLocalityStats(
const XdsBootstrap::XdsServer& xds_server, absl::string_view cluster_name,
absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> locality) {
auto key =
std::make_pair(std::string(cluster_name), std::string(eds_service_name));
RefCountedPtr<XdsClusterLocalityStats> cluster_locality_stats;
{
MutexLock lock(&mu_);
// We jump through some hoops here to make sure that the
// absl::string_views stored in the XdsClusterDropStats object point
// to the strings in the xds_load_report_server_map_ keys, so that
// they have the same lifetime.
auto server_it = xds_load_report_server_map_
.emplace(xds_server.Key(), LoadReportServer())
.first;
if (server_it->second.xds_channel == nullptr) {
server_it->second.xds_channel = GetOrCreateXdsChannelLocked(
xds_server, "load report map (locality stats)");
}
auto load_report_it = server_it->second.load_report_map
.emplace(std::move(key), LoadReportState())
.first;
LoadReportState& load_report_state = load_report_it->second;
LoadReportState::LocalityState& locality_state =
load_report_state.locality_stats[locality];
if (locality_state.locality_stats != nullptr) {
cluster_locality_stats = locality_state.locality_stats->RefIfNonZero();
}
if (cluster_locality_stats == nullptr) {
if (locality_state.locality_stats != nullptr) {
locality_state.deleted_locality_stats +=
locality_state.locality_stats->GetSnapshotAndReset();
}
cluster_locality_stats = MakeRefCounted<XdsClusterLocalityStats>(
Ref(DEBUG_LOCATION, "LocalityStats"), server_it->first /*xds_server*/,
load_report_it->first.first /*cluster_name*/,
load_report_it->first.second /*eds_service_name*/,
std::move(locality));
locality_state.locality_stats = cluster_locality_stats.get();
}
server_it->second.xds_channel->MaybeStartLrsCall();
}
work_serializer_.DrainQueue();
return cluster_locality_stats;
}
void XdsClient::RemoveClusterLocalityStats(
absl::string_view xds_server_key, absl::string_view cluster_name,
absl::string_view eds_service_name,
const RefCountedPtr<XdsLocalityName>& locality,
XdsClusterLocalityStats* cluster_locality_stats) {
MutexLock lock(&mu_);
auto server_it = xds_load_report_server_map_.find(xds_server_key);
if (server_it == xds_load_report_server_map_.end()) return;
auto load_report_it = server_it->second.load_report_map.find(
std::make_pair(std::string(cluster_name), std::string(eds_service_name)));
if (load_report_it == server_it->second.load_report_map.end()) return;
LoadReportState& load_report_state = load_report_it->second;
auto locality_it = load_report_state.locality_stats.find(locality);
if (locality_it == load_report_state.locality_stats.end()) return;
LoadReportState::LocalityState& locality_state = locality_it->second;
if (locality_state.locality_stats == cluster_locality_stats) {
// Record final snapshot in deleted_locality_stats, which will be
// added to the next load report.
locality_state.deleted_locality_stats +=
locality_state.locality_stats->GetSnapshotAndReset();
locality_state.locality_stats = nullptr;
}
}
void XdsClient::ResetBackoff() {
MutexLock lock(&mu_);
for (auto& p : xds_channel_map_) {
@ -2018,86 +1557,6 @@ void XdsClient::NotifyWatchersOnResourceDoesNotExist(
DEBUG_LOCATION);
}
XdsApi::ClusterLoadReportMap XdsClient::BuildLoadReportSnapshotLocked(
const XdsBootstrap::XdsServer& xds_server, bool send_all_clusters,
const std::set<std::string>& clusters) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << this << "] start building load report";
XdsApi::ClusterLoadReportMap snapshot_map;
auto server_it = xds_load_report_server_map_.find(xds_server.Key());
if (server_it == xds_load_report_server_map_.end()) return snapshot_map;
auto& load_report_map = server_it->second.load_report_map;
for (auto load_report_it = load_report_map.begin();
load_report_it != load_report_map.end();) {
// Cluster key is cluster and EDS service name.
const auto& cluster_key = load_report_it->first;
LoadReportState& load_report = load_report_it->second;
// If the CDS response for a cluster indicates to use LRS but the
// LRS server does not say that it wants reports for this cluster,
// then we'll have stats objects here whose data we're not going to
// include in the load report. However, we still need to clear out
// the data from the stats objects, so that if the LRS server starts
// asking for the data in the future, we don't incorrectly include
// data from previous reporting intervals in that future report.
const bool record_stats =
send_all_clusters || clusters.find(cluster_key.first) != clusters.end();
XdsApi::ClusterLoadReport snapshot;
// Aggregate drop stats.
snapshot.dropped_requests = std::move(load_report.deleted_drop_stats);
if (load_report.drop_stats != nullptr) {
snapshot.dropped_requests +=
load_report.drop_stats->GetSnapshotAndReset();
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << this << "] cluster=" << cluster_key.first
<< " eds_service_name=" << cluster_key.second
<< " drop_stats=" << load_report.drop_stats;
}
// Aggregate locality stats.
for (auto it = load_report.locality_stats.begin();
it != load_report.locality_stats.end();) {
const RefCountedPtr<XdsLocalityName>& locality_name = it->first;
auto& locality_state = it->second;
XdsClusterLocalityStats::Snapshot& locality_snapshot =
snapshot.locality_stats[locality_name];
locality_snapshot = std::move(locality_state.deleted_locality_stats);
if (locality_state.locality_stats != nullptr) {
locality_snapshot +=
locality_state.locality_stats->GetSnapshotAndReset();
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << this
<< "] cluster=" << cluster_key.first.c_str()
<< " eds_service_name=" << cluster_key.second.c_str()
<< " locality=" << locality_name->human_readable_string().c_str()
<< " locality_stats=" << locality_state.locality_stats;
}
// If the only thing left in this entry was final snapshots from
// deleted locality stats objects, remove the entry.
if (locality_state.locality_stats == nullptr) {
it = load_report.locality_stats.erase(it);
} else {
++it;
}
}
// Compute load report interval.
const Timestamp now = Timestamp::Now();
snapshot.load_report_interval = now - load_report.last_report_time;
load_report.last_report_time = now;
// Record snapshot.
if (record_stats) {
snapshot_map[cluster_key] = std::move(snapshot);
}
// If the only thing left in this entry was final snapshots from
// deleted stats objects, remove the entry.
if (load_report.locality_stats.empty() &&
load_report.drop_stats == nullptr) {
load_report_it = load_report_map.erase(load_report_it);
} else {
++load_report_it;
}
}
return snapshot_map;
}
namespace {
google_protobuf_Timestamp* EncodeTimestamp(Timestamp value, upb_Arena* arena) {

@ -44,7 +44,7 @@
#include "src/core/lib/uri/uri_parser.h"
#include "src/core/xds/xds_client/xds_api.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
#include "src/core/xds/xds_client/xds_metrics.h"
#include "src/core/xds/xds_client/xds_resource_type.h"
#include "src/core/xds/xds_client/xds_transport.h"
@ -84,22 +84,14 @@ class XdsClient : public DualRefCounted<XdsClient> {
};
XdsClient(
std::unique_ptr<XdsBootstrap> bootstrap,
OrphanablePtr<XdsTransportFactory> transport_factory,
std::shared_ptr<XdsBootstrap> bootstrap,
RefCountedPtr<XdsTransportFactory> transport_factory,
std::shared_ptr<grpc_event_engine::experimental::EventEngine> engine,
std::unique_ptr<XdsMetricsReporter> metrics_reporter,
std::string user_agent_name, std::string user_agent_version,
Duration resource_request_timeout = Duration::Seconds(15));
~XdsClient() override;
const XdsBootstrap& bootstrap() const {
return *bootstrap_; // ctor asserts that it is non-null
}
XdsTransportFactory* transport_factory() const {
return transport_factory_.get();
}
// Start and cancel watch for a resource.
//
// The XdsClient takes ownership of the watcher, but the caller may
@ -126,29 +118,16 @@ class XdsClient : public DualRefCounted<XdsClient> {
ResourceWatcherInterface* watcher,
bool delay_unsubscription = false);
// Adds and removes drop stats for cluster_name and eds_service_name.
RefCountedPtr<XdsClusterDropStats> AddClusterDropStats(
const XdsBootstrap::XdsServer& xds_server, absl::string_view cluster_name,
absl::string_view eds_service_name);
void RemoveClusterDropStats(absl::string_view xds_server,
absl::string_view cluster_name,
absl::string_view eds_service_name,
XdsClusterDropStats* cluster_drop_stats);
// Adds and removes locality stats for cluster_name and eds_service_name
// for the specified locality.
RefCountedPtr<XdsClusterLocalityStats> AddClusterLocalityStats(
const XdsBootstrap::XdsServer& xds_server, absl::string_view cluster_name,
absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> locality);
void RemoveClusterLocalityStats(
absl::string_view xds_server, absl::string_view cluster_name,
absl::string_view eds_service_name,
const RefCountedPtr<XdsLocalityName>& locality,
XdsClusterLocalityStats* cluster_locality_stats);
// Resets connection backoff state.
void ResetBackoff();
virtual void ResetBackoff();
const XdsBootstrap& bootstrap() const {
return *bootstrap_; // ctor asserts that it is non-null
}
XdsTransportFactory* transport_factory() const {
return transport_factory_.get();
}
grpc_event_engine::experimental::EventEngine* engine() {
return engine_.get();
@ -212,7 +191,6 @@ class XdsClient : public DualRefCounted<XdsClient> {
class RetryableCall;
class AdsCall;
class LrsCall;
XdsChannel(WeakRefCountedPtr<XdsClient> xds_client,
const XdsBootstrap::XdsServer& server);
@ -220,13 +198,9 @@ class XdsClient : public DualRefCounted<XdsClient> {
XdsClient* xds_client() const { return xds_client_.get(); }
AdsCall* ads_call() const;
LrsCall* lrs_call() const;
void ResetBackoff();
void MaybeStartLrsCall();
void StopLrsCallLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&XdsClient::mu_);
// Returns non-OK if there has been an error since the last time the
// ADS stream saw a response.
const absl::Status& status() const { return status_; }
@ -242,6 +216,8 @@ class XdsClient : public DualRefCounted<XdsClient> {
absl::string_view server_uri() const { return server_.server_uri(); }
private:
class ConnectivityFailureWatcher;
// Attempts to find a suitable Xds fallback server. Returns true if
// a connection to a suitable server had been established.
bool MaybeFallbackLocked(const std::string& authority,
@ -262,13 +238,14 @@ class XdsClient : public DualRefCounted<XdsClient> {
const XdsBootstrap::XdsServer& server_; // Owned by bootstrap.
OrphanablePtr<XdsTransportFactory::XdsTransport> transport_;
RefCountedPtr<XdsTransportFactory::XdsTransport> transport_;
RefCountedPtr<XdsTransportFactory::XdsTransport::ConnectivityFailureWatcher>
failure_watcher_;
bool shutting_down_ = false;
// The retryable ADS and LRS calls.
OrphanablePtr<RetryableCall<AdsCall>> ads_call_;
OrphanablePtr<RetryableCall<LrsCall>> lrs_call_;
// Stores the most recent accepted resource version for each resource type.
std::map<const XdsResourceType*, std::string /*version*/>
@ -292,30 +269,6 @@ class XdsClient : public DualRefCounted<XdsClient> {
resource_map;
};
struct LoadReportState {
struct LocalityState {
XdsClusterLocalityStats* locality_stats = nullptr;
XdsClusterLocalityStats::Snapshot deleted_locality_stats;
};
XdsClusterDropStats* drop_stats = nullptr;
XdsClusterDropStats::Snapshot deleted_drop_stats;
std::map<RefCountedPtr<XdsLocalityName>, LocalityState,
XdsLocalityName::Less>
locality_stats;
Timestamp last_report_time = Timestamp::Now();
};
// Load report data.
using LoadReportMap = std::map<
std::pair<std::string /*cluster_name*/, std::string /*eds_service_name*/>,
LoadReportState>;
struct LoadReportServer {
RefCountedPtr<XdsChannel> xds_channel;
LoadReportMap load_report_map;
};
// Sends an error notification to a specific set of watchers.
void NotifyWatchersOnErrorLocked(
const std::map<ResourceWatcherInterface*,
@ -334,22 +287,20 @@ class XdsClient : public DualRefCounted<XdsClient> {
const XdsResourceType* GetResourceTypeLocked(absl::string_view resource_type)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
bool HasUncachedResources(const AuthorityState& authority_state);
absl::StatusOr<XdsResourceName> ParseXdsResourceName(
absl::string_view name, const XdsResourceType* type);
static std::string ConstructFullXdsResourceName(
absl::string_view authority, absl::string_view resource_type,
const XdsResourceKey& key);
XdsApi::ClusterLoadReportMap BuildLoadReportSnapshotLocked(
const XdsBootstrap::XdsServer& xds_server, bool send_all_clusters,
const std::set<std::string>& clusters) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
RefCountedPtr<XdsChannel> GetOrCreateXdsChannelLocked(
const XdsBootstrap::XdsServer& server, const char* reason)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
bool HasUncachedResources(const AuthorityState& authority_state);
std::unique_ptr<XdsBootstrap> bootstrap_;
OrphanablePtr<XdsTransportFactory> transport_factory_;
std::shared_ptr<XdsBootstrap> bootstrap_;
RefCountedPtr<XdsTransportFactory> transport_factory_;
const Duration request_timeout_;
const bool xds_federation_enabled_;
XdsApi api_;
@ -371,9 +322,6 @@ class XdsClient : public DualRefCounted<XdsClient> {
std::map<std::string /*authority*/, AuthorityState> authority_state_map_
ABSL_GUARDED_BY(mu_);
std::map<std::string /*XdsServer key*/, LoadReportServer, std::less<>>
xds_load_report_server_map_ ABSL_GUARDED_BY(mu_);
// Stores started watchers whose resource name was not parsed successfully,
// waiting to be cancelled or reset in Orphan().
std::map<ResourceWatcherInterface*, RefCountedPtr<ResourceWatcherInterface>>

@ -1,206 +0,0 @@
//
//
// Copyright 2018 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "absl/log/log.h"
#include <grpc/support/port_platform.h>
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/xds/xds_client/xds_client.h"
namespace grpc_core {
namespace {
uint64_t GetAndResetCounter(std::atomic<uint64_t>* from) {
return from->exchange(0, std::memory_order_relaxed);
}
} // namespace
//
// XdsClusterDropStats
//
XdsClusterDropStats::XdsClusterDropStats(RefCountedPtr<XdsClient> xds_client,
absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name)
: RefCounted(GRPC_TRACE_FLAG_ENABLED(xds_client_refcount)
? "XdsClusterDropStats"
: nullptr),
xds_client_(std::move(xds_client)),
lrs_server_(lrs_server),
cluster_name_(cluster_name),
eds_service_name_(eds_service_name) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client_.get() << "] created drop stats " << this
<< " for {" << lrs_server_ << ", " << cluster_name_ << ", "
<< eds_service_name_ << "}";
}
XdsClusterDropStats::~XdsClusterDropStats() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client_.get() << "] destroying drop stats "
<< this << " for {" << lrs_server_ << ", " << cluster_name_ << ", "
<< eds_service_name_ << "}";
xds_client_->RemoveClusterDropStats(lrs_server_, cluster_name_,
eds_service_name_, this);
xds_client_.reset(DEBUG_LOCATION, "DropStats");
}
XdsClusterDropStats::Snapshot XdsClusterDropStats::GetSnapshotAndReset() {
Snapshot snapshot;
snapshot.uncategorized_drops = GetAndResetCounter(&uncategorized_drops_);
MutexLock lock(&mu_);
snapshot.categorized_drops = std::move(categorized_drops_);
return snapshot;
}
void XdsClusterDropStats::AddUncategorizedDrops() {
uncategorized_drops_.fetch_add(1);
}
void XdsClusterDropStats::AddCallDropped(const std::string& category) {
MutexLock lock(&mu_);
++categorized_drops_[category];
}
//
// XdsClusterLocalityStats
//
// TODO(roth): Remove this once the feature passes interop tests.
bool XdsOrcaLrsPropagationChangesEnabled() {
auto value = GetEnv("GRPC_EXPERIMENTAL_XDS_ORCA_LRS_PROPAGATION");
if (!value.has_value()) return false;
bool parsed_value;
bool parse_succeeded = gpr_parse_bool_value(value->c_str(), &parsed_value);
return parse_succeeded && parsed_value;
}
XdsClusterLocalityStats::XdsClusterLocalityStats(
RefCountedPtr<XdsClient> xds_client, absl::string_view lrs_server,
absl::string_view cluster_name, absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> name)
: RefCounted(GRPC_TRACE_FLAG_ENABLED(xds_client_refcount)
? "XdsClusterLocalityStats"
: nullptr),
xds_client_(std::move(xds_client)),
lrs_server_(lrs_server),
cluster_name_(cluster_name),
eds_service_name_(eds_service_name),
name_(std::move(name)) {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client_.get() << "] created locality stats "
<< this << " for {" << lrs_server_ << ", " << cluster_name_ << ", "
<< eds_service_name_ << ", "
<< (name_ == nullptr ? "<none>" : name_->human_readable_string().c_str())
<< "}";
}
XdsClusterLocalityStats::~XdsClusterLocalityStats() {
GRPC_TRACE_LOG(xds_client, INFO)
<< "[xds_client " << xds_client_.get() << "] destroying locality stats "
<< this << " for {" << lrs_server_ << ", " << cluster_name_ << ", "
<< eds_service_name_ << ", "
<< (name_ == nullptr ? "<none>" : name_->human_readable_string().c_str())
<< "}";
xds_client_->RemoveClusterLocalityStats(lrs_server_, cluster_name_,
eds_service_name_, name_, this);
xds_client_.reset(DEBUG_LOCATION, "LocalityStats");
}
XdsClusterLocalityStats::Snapshot
XdsClusterLocalityStats::GetSnapshotAndReset() {
Snapshot snapshot;
for (auto& percpu_stats : stats_) {
Snapshot percpu_snapshot = {
GetAndResetCounter(&percpu_stats.total_successful_requests),
// Don't reset total_requests_in_progress because it's
// not related to a single reporting interval.
percpu_stats.total_requests_in_progress.load(std::memory_order_relaxed),
GetAndResetCounter(&percpu_stats.total_error_requests),
GetAndResetCounter(&percpu_stats.total_issued_requests),
{}, {}, {}, {}};
{
MutexLock lock(&percpu_stats.backend_metrics_mu);
percpu_snapshot.cpu_utilization = std::move(percpu_stats.cpu_utilization);
permem_snapshot.mem_utilization = std::move(permem_stats.mem_utilization);
perapplication_snapshot.application_utilization =
std::move(perapplication_stats.application_utilization);
percpu_snapshot.backend_metrics = std::move(percpu_stats.backend_metrics);
}
snapshot += percpu_snapshot;
}
return snapshot;
}
void XdsClusterLocalityStats::AddCallStarted() {
Stats& stats = stats_.this_cpu();
stats.total_issued_requests.fetch_add(1, std::memory_order_relaxed);
stats.total_requests_in_progress.fetch_add(1, std::memory_order_relaxed);
}
void XdsClusterLocalityStats::AddCallFinished(
const BackendMetricPropagation& propagation,
const BackendMetricData* backend_metrics, bool fail) {
Stats& stats = stats_.this_cpu();
std::atomic<uint64_t>& to_increment =
fail ? stats.total_error_requests : stats.total_successful_requests;
to_increment.fetch_add(1, std::memory_order_relaxed);
stats.total_requests_in_progress.fetch_add(-1, std::memory_order_acq_rel);
if (backend_metrics == nullptr) return;
MutexLock lock(&stats.backend_metrics_mu);
if (!XdsOrcaLrsPropagationChangesEnabled()) {
for (const auto& m : backend_metrics->named_metrics) {
stats.backend_metrics[std::string(m.first)] += BackendMetric{1, m.second};
}
return;
}
if (propagation.propagation_bits &
BackendMetricPropagation::kCpuUtilization) {
stats.cpu_utilization += BackendMetric{1, backend_metrics->cpu_utilization};
}
if (propagation.propagation_bits &
BackendMetricPropagation::kMemUtilization) {
stats.mem_utilization += BackendMetric{1, backend_metrics->mem_utilization};
}
if (propagation.propagation_bits &
BackendMetricPropagation::kApplicationUtilization) {
stats.application_utilization +=
BackendMetric{1, backend_metrics->application_utilization};
}
if (propagation.propagation_bits &
BackendMetricPropagation::kNamedMetricsAll ||
!propagation.named_metric_keys.empty()) {
for (const auto& m : backend_metrics->named_metrics) {
if (propagation.propagation_bits &
BackendMetricPropagation::kNamedMetricsAll ||
propagation.named_metric_keys.contains(m.first)) {
stats.backend_metrics[absl::StrCat("named_metrics.", m.first)] +=
BackendMetric{1, m.second};
}
}
}
}
} // namespace grpc_core

@ -1,285 +0,0 @@
//
//
// Copyright 2018 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
#ifndef GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_CLIENT_STATS_H
#define GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_CLIENT_STATS_H
#include <atomic>
#include <cstdint>
#include <map>
#include <string>
#include <utility>
#include "absl/base/thread_annotations.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include <grpc/support/port_platform.h>
#include "src/core/lib/gprpp/per_cpu.h"
#include "src/core/lib/gprpp/ref_counted.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/sync.h"
#include "src/core/resolver/endpoint_addresses.h"
#include "src/core/telemetry/call_tracer.h"
#include "src/core/util/useful.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
namespace grpc_core {
// Forward declaration to avoid circular dependency.
class XdsClient;
// Locality name.
class XdsLocalityName final : public RefCounted<XdsLocalityName> {
public:
struct Less {
bool operator()(const XdsLocalityName* lhs,
const XdsLocalityName* rhs) const {
if (lhs == nullptr || rhs == nullptr) return QsortCompare(lhs, rhs);
return lhs->Compare(*rhs) < 0;
}
bool operator()(const RefCountedPtr<XdsLocalityName>& lhs,
const RefCountedPtr<XdsLocalityName>& rhs) const {
return (*this)(lhs.get(), rhs.get());
}
};
XdsLocalityName(std::string region, std::string zone, std::string sub_zone)
: region_(std::move(region)),
zone_(std::move(zone)),
sub_zone_(std::move(sub_zone)),
human_readable_string_(
absl::StrFormat("{region=\"%s\", zone=\"%s\", sub_zone=\"%s\"}",
region_, zone_, sub_zone_)) {}
bool operator==(const XdsLocalityName& other) const {
return region_ == other.region_ && zone_ == other.zone_ &&
sub_zone_ == other.sub_zone_;
}
bool operator!=(const XdsLocalityName& other) const {
return !(*this == other);
}
int Compare(const XdsLocalityName& other) const {
int cmp_result = region_.compare(other.region_);
if (cmp_result != 0) return cmp_result;
cmp_result = zone_.compare(other.zone_);
if (cmp_result != 0) return cmp_result;
return sub_zone_.compare(other.sub_zone_);
}
const std::string& region() const { return region_; }
const std::string& zone() const { return zone_; }
const std::string& sub_zone() const { return sub_zone_; }
const RefCountedStringValue& human_readable_string() const {
return human_readable_string_;
}
// Channel args traits.
static absl::string_view ChannelArgName() {
return GRPC_ARG_NO_SUBCHANNEL_PREFIX "xds_locality_name";
}
static int ChannelArgsCompare(const XdsLocalityName* a,
const XdsLocalityName* b) {
return a->Compare(*b);
}
private:
std::string region_;
std::string zone_;
std::string sub_zone_;
RefCountedStringValue human_readable_string_;
};
// Drop stats for an xds cluster.
class XdsClusterDropStats final : public RefCounted<XdsClusterDropStats> {
public:
// The total number of requests dropped for any reason is the sum of
// uncategorized_drops, and dropped_requests map.
using CategorizedDropsMap = std::map<std::string /* category */, uint64_t>;
struct Snapshot {
uint64_t uncategorized_drops = 0;
// The number of requests dropped for the specific drop categories
// outlined in the drop_overloads field in the EDS response.
CategorizedDropsMap categorized_drops;
Snapshot& operator+=(const Snapshot& other) {
uncategorized_drops += other.uncategorized_drops;
for (const auto& p : other.categorized_drops) {
categorized_drops[p.first] += p.second;
}
return *this;
}
bool IsZero() const {
if (uncategorized_drops != 0) return false;
for (const auto& p : categorized_drops) {
if (p.second != 0) return false;
}
return true;
}
};
XdsClusterDropStats(RefCountedPtr<XdsClient> xds_client,
absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name);
~XdsClusterDropStats() override;
// Returns a snapshot of this instance and resets all the counters.
Snapshot GetSnapshotAndReset();
void AddUncategorizedDrops();
void AddCallDropped(const std::string& category);
private:
RefCountedPtr<XdsClient> xds_client_;
absl::string_view lrs_server_;
absl::string_view cluster_name_;
absl::string_view eds_service_name_;
std::atomic<uint64_t> uncategorized_drops_{0};
// Protects categorized_drops_. A mutex is necessary because the length of
// dropped_requests can be accessed by both the picker (from data plane
// mutex) and the load reporting thread (from the control plane combiner).
Mutex mu_;
CategorizedDropsMap categorized_drops_ ABSL_GUARDED_BY(mu_);
};
bool XdsOrcaLrsPropagationChangesEnabled();
// Locality stats for an xds cluster.
class XdsClusterLocalityStats final
: public RefCounted<XdsClusterLocalityStats> {
public:
struct BackendMetric {
uint64_t num_requests_finished_with_metric = 0;
double total_metric_value = 0;
BackendMetric(BackendMetric&& other)
: num_requests_finished_with_metric(
std::exchange(other.num_requests_finished_with_metric, 0)),
total_metric_value(std::exchange(other.total_metric_value, 0)) {}
BackendMetric& operator+=(const BackendMetric& other) {
num_requests_finished_with_metric +=
other.num_requests_finished_with_metric;
total_metric_value += other.total_metric_value;
return *this;
}
bool IsZero() const {
return num_requests_finished_with_metric == 0 && total_metric_value == 0;
}
};
struct Snapshot {
uint64_t total_successful_requests = 0;
uint64_t total_requests_in_progress = 0;
uint64_t total_error_requests = 0;
uint64_t total_issued_requests = 0;
BackendMetric cpu_utilization;
BackendMetric mem_utilization;
BackendMetric application_utilization;
std::map<std::string, BackendMetric> backend_metrics;
Snapshot& operator+=(const Snapshot& other) {
total_successful_requests += other.total_successful_requests;
total_requests_in_progress += other.total_requests_in_progress;
total_error_requests += other.total_error_requests;
total_issued_requests += other.total_issued_requests;
cpu_utilization += other.cpu_utilization;
mem_utilization += other.mem_utilization;
application_utilization += other.application_utilization;
for (const auto& p : other.backend_metrics) {
backend_metrics[p.first] += p.second;
}
return *this;
}
bool IsZero() const {
if (total_successful_requests != 0 || total_requests_in_progress != 0 ||
total_error_requests != 0 || total_issued_requests != 0 ||
!cpu_utilization.IsZero() || !mem_utilization.IsZero() ||
!application_utilization.IsZero()) {
return false;
}
for (const auto& p : backend_metrics) {
if (!p.second.IsZero()) return false;
}
return true;
}
};
struct BackendMetricPropagation
: public RefCountedPtr<BackendMetricPropagation> {
static constexpr uint8_t kCpuUtilization = 1;
static constexpr uint8_t kMemUtilization = 2;
static constexpr uint8_t kApplicationUtilization = 4;
static constexpr uint8_t kNamedMetricsAll = 8;
uint8_t propagation_bits = 0;
absl::flat_hash_set<std::string> named_metric_keys;
};
XdsClusterLocalityStats(RefCountedPtr<XdsClient> xds_client,
absl::string_view lrs_server,
absl::string_view cluster_name,
absl::string_view eds_service_name,
RefCountedPtr<XdsLocalityName> name);
~XdsClusterLocalityStats() override;
// Returns a snapshot of this instance and resets all the counters.
Snapshot GetSnapshotAndReset();
void AddCallStarted();
void AddCallFinished(const BackendMetricPropagation& propagation,
const BackendMetricData* backend_metrics,
bool fail = false);
XdsLocalityName* locality_name() const { return name_.get(); }
private:
struct Stats {
std::atomic<uint64_t> total_successful_requests{0};
std::atomic<uint64_t> total_requests_in_progress{0};
std::atomic<uint64_t> total_error_requests{0};
std::atomic<uint64_t> total_issued_requests{0};
Mutex backend_metrics_mu;
BackendMetric cpu_utilization ABSL_GUARDED_BY(backend_metrics_mu);
BackendMetric mem_utilization ABSL_GUARDED_BY(backend_metrics_mu);
BackendMetric application_utilization ABSL_GUARDED_BY(backend_metrics_mu);
std::map<std::string, BackendMetric> backend_metrics
ABSL_GUARDED_BY(backend_metrics_mu);
};
RefCountedPtr<XdsClient> xds_client_;
absl::string_view lrs_server_;
absl::string_view cluster_name_;
absl::string_view eds_service_name_;
RefCountedPtr<XdsLocalityName> name_;
PerCpu<Stats> stats_{PerCpuOptions().SetMaxShards(32).SetCpusPerShard(4)};
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_CLIENT_STATS_H

@ -0,0 +1,103 @@
//
//
// Copyright 2018 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//
#ifndef GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_LOCALITY_H
#define GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_LOCALITY_H
#include <string>
#include <utility>
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "src/core/lib/gprpp/ref_counted.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/ref_counted_string.h"
#include "src/core/resolver/endpoint_addresses.h"
#include "src/core/util/useful.h"
namespace grpc_core {
// An xDS locality name.
class XdsLocalityName final : public RefCounted<XdsLocalityName> {
public:
struct Less {
bool operator()(const XdsLocalityName* lhs,
const XdsLocalityName* rhs) const {
if (lhs == nullptr || rhs == nullptr) return QsortCompare(lhs, rhs);
return lhs->Compare(*rhs) < 0;
}
bool operator()(const RefCountedPtr<XdsLocalityName>& lhs,
const RefCountedPtr<XdsLocalityName>& rhs) const {
return (*this)(lhs.get(), rhs.get());
}
};
XdsLocalityName(std::string region, std::string zone, std::string sub_zone)
: region_(std::move(region)),
zone_(std::move(zone)),
sub_zone_(std::move(sub_zone)),
human_readable_string_(
absl::StrFormat("{region=\"%s\", zone=\"%s\", sub_zone=\"%s\"}",
region_, zone_, sub_zone_)) {}
bool operator==(const XdsLocalityName& other) const {
return region_ == other.region_ && zone_ == other.zone_ &&
sub_zone_ == other.sub_zone_;
}
bool operator!=(const XdsLocalityName& other) const {
return !(*this == other);
}
int Compare(const XdsLocalityName& other) const {
int cmp_result = region_.compare(other.region_);
if (cmp_result != 0) return cmp_result;
cmp_result = zone_.compare(other.zone_);
if (cmp_result != 0) return cmp_result;
return sub_zone_.compare(other.sub_zone_);
}
const std::string& region() const { return region_; }
const std::string& zone() const { return zone_; }
const std::string& sub_zone() const { return sub_zone_; }
const RefCountedStringValue& human_readable_string() const {
return human_readable_string_;
}
// Channel args traits.
static absl::string_view ChannelArgName() {
return GRPC_ARG_NO_SUBCHANNEL_PREFIX "xds_locality_name";
}
static int ChannelArgsCompare(const XdsLocalityName* a,
const XdsLocalityName* b) {
return a->Compare(*b);
}
private:
std::string region_;
std::string zone_;
std::string sub_zone_;
RefCountedStringValue human_readable_string_;
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_LOCALITY_H

@ -17,25 +17,23 @@
#ifndef GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_TRANSPORT_H
#define GRPC_SRC_CORE_XDS_XDS_CLIENT_XDS_TRANSPORT_H
#include <functional>
#include <memory>
#include <string>
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include <grpc/support/port_platform.h>
#include "src/core/lib/gprpp/dual_ref_counted.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
namespace grpc_core {
// A factory for creating new XdsTransport instances.
class XdsTransportFactory : public InternallyRefCounted<XdsTransportFactory> {
class XdsTransportFactory : public DualRefCounted<XdsTransportFactory> {
public:
// Represents a transport for xDS communication (e.g., a gRPC channel).
class XdsTransport : public InternallyRefCounted<XdsTransport> {
class XdsTransport : public DualRefCounted<XdsTransport> {
public:
// Represents a bidi streaming RPC call.
class StreamingCall : public InternallyRefCounted<StreamingCall> {
@ -63,6 +61,25 @@ class XdsTransportFactory : public InternallyRefCounted<XdsTransportFactory> {
virtual void StartRecvMessage() = 0;
};
// A watcher for connectivity failures.
class ConnectivityFailureWatcher
: public RefCounted<ConnectivityFailureWatcher> {
public:
// Will be invoked whenever there is a connectivity failure on the
// transport.
virtual void OnConnectivityFailure(absl::Status status) = 0;
};
explicit XdsTransport(const char* trace = nullptr)
: DualRefCounted(trace) {}
// Starts a connectivity failure watcher on the transport.
virtual void StartConnectivityFailureWatch(
RefCountedPtr<ConnectivityFailureWatcher> watcher) = 0;
// Stops a connectivity failure watcher on the transport.
virtual void StopConnectivityFailureWatch(
const RefCountedPtr<ConnectivityFailureWatcher>& watcher) = 0;
// Create a streaming call on this transport for the specified method.
// Events on the stream will be reported to event_handler.
virtual OrphanablePtr<StreamingCall> CreateStreamingCall(
@ -73,15 +90,14 @@ class XdsTransportFactory : public InternallyRefCounted<XdsTransportFactory> {
virtual void ResetBackoff() = 0;
};
// Creates a new transport for the specified server.
// The on_connectivity_failure callback will be invoked whenever there is
// a connectivity failure on the transport.
// Returns a transport for the specified server. If there is already
// a transport for the server, returns a new ref to that transport;
// otherwise, creates a new transport.
//
// *status will be set if there is an error creating the channel,
// although the returned channel must still accept calls (which may fail).
virtual OrphanablePtr<XdsTransport> Create(
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status) = 0;
virtual RefCountedPtr<XdsTransport> GetTransport(
const XdsBootstrap::XdsServer& server, absl::Status* status) = 0;
};
} // namespace grpc_core

@ -864,10 +864,10 @@ CORE_SOURCE_FILES = [
'src/core/xds/grpc/xds_routing.cc',
'src/core/xds/grpc/xds_server_grpc.cc',
'src/core/xds/grpc/xds_transport_grpc.cc',
'src/core/xds/xds_client/lrs_client.cc',
'src/core/xds/xds_client/xds_api.cc',
'src/core/xds/xds_client/xds_bootstrap.cc',
'src/core/xds/xds_client/xds_client.cc',
'src/core/xds/xds_client/xds_client_stats.cc',
'third_party/abseil-cpp/absl/base/internal/cycleclock.cc',
'third_party/abseil-cpp/absl/base/internal/low_level_alloc.cc',
'third_party/abseil-cpp/absl/base/internal/raw_logging.cc',

@ -61,13 +61,12 @@ class Fuzzer {
// Leave xds_client_ unset, so Act() will be a no-op.
return;
}
auto transport_factory = MakeOrphanable<FakeXdsTransportFactory>(
transport_factory_ = MakeRefCounted<FakeXdsTransportFactory>(
[]() { Crash("Multiple concurrent reads"); });
transport_factory->SetAutoCompleteMessagesFromClient(false);
transport_factory->SetAbortOnUndrainedMessages(false);
transport_factory_ = transport_factory.get();
transport_factory_->SetAutoCompleteMessagesFromClient(false);
transport_factory_->SetAbortOnUndrainedMessages(false);
xds_client_ = MakeRefCounted<XdsClient>(
std::move(*bootstrap), std::move(transport_factory),
std::move(*bootstrap), transport_factory_,
grpc_event_engine::experimental::GetDefaultEventEngine(),
/*metrics_reporter=*/nullptr, "foo agent", "foo version");
}
@ -322,7 +321,7 @@ class Fuzzer {
}
RefCountedPtr<XdsClient> xds_client_;
FakeXdsTransportFactory* transport_factory_;
RefCountedPtr<FakeXdsTransportFactory> transport_factory_;
// Maps of currently active watchers for each resource type, keyed by
// resource name.

@ -729,14 +729,12 @@ class XdsClientTest : public ::testing::Test {
void InitXdsClient(
FakeXdsBootstrap::Builder bootstrap_builder = FakeXdsBootstrap::Builder(),
Duration resource_request_timeout = Duration::Seconds(15)) {
auto transport_factory = MakeOrphanable<FakeXdsTransportFactory>(
transport_factory_ = MakeRefCounted<FakeXdsTransportFactory>(
[]() { FAIL() << "Multiple concurrent reads"; });
transport_factory_ =
transport_factory->Ref().TakeAsSubclass<FakeXdsTransportFactory>();
auto metrics_reporter = std::make_unique<MetricsReporter>();
metrics_reporter_ = metrics_reporter.get();
xds_client_ = MakeRefCounted<XdsClient>(
bootstrap_builder.Build(), std::move(transport_factory),
bootstrap_builder.Build(), transport_factory_,
grpc_event_engine::experimental::GetDefaultEventEngine(),
std::move(metrics_reporter), "foo agent", "foo version",
resource_request_timeout * grpc_test_slowdown_factor());
@ -986,7 +984,7 @@ TEST_F(XdsClientTest, BasicWatch) {
/*resource_names=*/{"foo1"});
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
// Check metric data.
EXPECT_TRUE(metrics_reporter_->WaitForMetricsReporterData(
::testing::ElementsAre(::testing::Pair(
@ -1081,7 +1079,7 @@ TEST_F(XdsClientTest, UpdateFromServer) {
/*resource_names=*/{"foo1"});
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, MultipleWatchersForSameResource) {
@ -1185,7 +1183,7 @@ TEST_F(XdsClientTest, MultipleWatchersForSameResource) {
ASSERT_FALSE(WaitForRequest(stream.get()));
// Now cancel the second watcher.
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, SubscribeToMultipleResources) {
@ -1319,7 +1317,7 @@ TEST_F(XdsClientTest, SubscribeToMultipleResources) {
/*error_detail=*/absl::OkStatus(), /*resource_names=*/{"foo2"});
// Now cancel watch for "foo2".
CancelFooWatch(watcher2.get(), "foo2");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, UpdateContainsOnlyChangedResource) {
@ -1441,7 +1439,7 @@ TEST_F(XdsClientTest, UpdateContainsOnlyChangedResource) {
/*error_detail=*/absl::OkStatus(), /*resource_names=*/{"foo2"});
// Now cancel watch for "foo2".
CancelFooWatch(watcher2.get(), "foo2");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceValidationFailure) {
@ -1568,7 +1566,7 @@ TEST_F(XdsClientTest, ResourceValidationFailure) {
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceValidationFailureMultipleResources) {
@ -1767,7 +1765,7 @@ TEST_F(XdsClientTest, ResourceValidationFailureMultipleResources) {
CancelFooWatch(watcher2.get(), "foo2", /*delay_unsubscription=*/true);
CancelFooWatch(watcher3.get(), "foo3", /*delay_unsubscription=*/true);
CancelFooWatch(watcher4.get(), "foo4");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceValidationFailureForCachedResource) {
@ -1882,7 +1880,7 @@ TEST_F(XdsClientTest, ResourceValidationFailureForCachedResource) {
// Cancel watches.
CancelFooWatch(watcher.get(), "foo1");
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, WildcardCapableResponseWithEmptyResource) {
@ -1949,7 +1947,7 @@ TEST_F(XdsClientTest, WildcardCapableResponseWithEmptyResource) {
/*resource_names=*/{"wc1"});
// Cancel watch.
CancelWildcardCapableWatch(watcher.get(), "wc1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
// This tests resource removal triggered by the server when using a
@ -2079,7 +2077,7 @@ TEST_F(XdsClientTest, ResourceDeletion) {
// Cancel watch.
CancelWildcardCapableWatch(watcher.get(), "wc1");
CancelWildcardCapableWatch(watcher2.get(), "wc1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
// This tests that when we ignore resource deletions from the server
@ -2213,7 +2211,7 @@ TEST_F(XdsClientTest, ResourceDeletionIgnoredWhenConfigured) {
// Cancel watch.
CancelWildcardCapableWatch(watcher.get(), "wc1");
CancelWildcardCapableWatch(watcher2.get(), "wc1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, StreamClosedByServer) {
@ -2262,7 +2260,7 @@ TEST_F(XdsClientTest, StreamClosedByServer) {
// XdsClient should NOT report error to watcher, because we saw a
// response on the stream before it failed.
// Stream should be orphaned.
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
// Check metric data.
EXPECT_THAT(GetServerConnections(), ::testing::ElementsAre(::testing::Pair(
kDefaultXdsServerUrl, true)));
@ -2306,7 +2304,7 @@ TEST_F(XdsClientTest, StreamClosedByServer) {
// Cancel watcher.
CancelFooWatch(watcher.get(), "foo1");
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, StreamClosedByServerWithoutSeeingResponse) {
@ -2390,7 +2388,7 @@ TEST_F(XdsClientTest, StreamClosedByServerWithoutSeeingResponse) {
/*resource_names=*/{"foo1"});
// Cancel watcher.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ConnectionFails) {
@ -2486,7 +2484,7 @@ TEST_F(XdsClientTest, ConnectionFails) {
// Cancel watches.
CancelFooWatch(watcher.get(), "foo1");
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceDoesNotExistUponTimeout) {
@ -2572,7 +2570,7 @@ TEST_F(XdsClientTest, ResourceDoesNotExistUponTimeout) {
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
CancelFooWatch(watcher2.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceDoesNotExistAfterStreamRestart) {
@ -2687,7 +2685,7 @@ TEST_F(XdsClientTest, ResourceDoesNotExistAfterStreamRestart) {
/*resource_names=*/{"foo1"});
// Cancel watcher.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, DoesNotExistTimerNotStartedUntilSendCompletes) {
@ -2765,7 +2763,7 @@ TEST_F(XdsClientTest, DoesNotExistTimerNotStartedUntilSendCompletes) {
stream->CompleteSendMessageFromClient();
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
// In https://github.com/grpc/grpc/issues/29583, we ran into a case
@ -2921,7 +2919,7 @@ TEST_F(XdsClientTest,
// Cancel watches.
CancelFooWatch(watcher.get(), "foo1", /*delay_unsubscription=*/true);
CancelFooWatch(watcher2.get(), "foo2");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, DoNotSendDoesNotExistForCachedResource) {
@ -3041,7 +3039,7 @@ TEST_F(XdsClientTest, DoNotSendDoesNotExistForCachedResource) {
/*resource_names=*/{"foo1"});
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, ResourceWrappedInResourceMessage) {
@ -3096,7 +3094,7 @@ TEST_F(XdsClientTest, ResourceWrappedInResourceMessage) {
/*resource_names=*/{"foo1"});
// Cancel watch.
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, MultipleResourceTypes) {
@ -3211,7 +3209,7 @@ TEST_F(XdsClientTest, MultipleResourceTypes) {
/*error_detail=*/absl::OkStatus(), /*resource_names=*/{});
// Now cancel watch for "bar1".
CancelBarWatch(watcher2.get(), "bar1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, Federation) {
@ -3360,10 +3358,10 @@ TEST_F(XdsClientTest, Federation) {
/*resource_names=*/{kXdstpResourceName});
// Cancel watch for "foo1".
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
// Now cancel watch for xdstp resource name.
CancelFooWatch(watcher2.get(), kXdstpResourceName);
EXPECT_TRUE(stream2->Orphaned());
EXPECT_TRUE(stream2->IsOrphaned());
}
TEST_F(XdsClientTest, FederationAuthorityDefaultsToTopLevelXdsServer) {
@ -3486,7 +3484,7 @@ TEST_F(XdsClientTest, FederationAuthorityDefaultsToTopLevelXdsServer) {
/*resource_names=*/{kXdstpResourceName});
// Now cancel watch for xdstp resource name.
CancelFooWatch(watcher2.get(), kXdstpResourceName);
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, FederationWithUnknownAuthority) {
@ -3566,7 +3564,7 @@ TEST_F(XdsClientTest, FederationDisabledWithNewStyleName) {
/*resource_names=*/{kXdstpResourceName});
// Cancel watch.
CancelFooWatch(watcher.get(), kXdstpResourceName);
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, FederationChannelFailureReportedToWatchers) {
@ -3719,10 +3717,10 @@ TEST_F(XdsClientTest, FederationChannelFailureReportedToWatchers) {
::testing::Pair(authority_server.server_uri(), 1))));
// Cancel watch for "foo1".
CancelFooWatch(watcher.get(), "foo1");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
// Now cancel watch for xdstp resource name.
CancelFooWatch(watcher2.get(), kXdstpResourceName);
EXPECT_TRUE(stream2->Orphaned());
EXPECT_TRUE(stream2->IsOrphaned());
}
TEST_F(XdsClientTest, AdsReadWaitsForHandleRelease) {
@ -3808,7 +3806,7 @@ TEST_F(XdsClientTest, AdsReadWaitsForHandleRelease) {
/*error_detail=*/absl::OkStatus(),
/*resource_names=*/{"foo2"});
CancelFooWatch(watcher2.get(), "foo2");
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
TEST_F(XdsClientTest, FallbackAndRecover) {
@ -4017,7 +4015,7 @@ TEST_F(XdsClientTest, FallbackAndRecover) {
EXPECT_THAT(GetServerConnections(), ::testing::ElementsAre(::testing::Pair(
kDefaultXdsServerUrl, true)));
// Result (remote): The stream to the fallback server has been orphaned.
EXPECT_TRUE(stream2->Orphaned());
EXPECT_TRUE(stream2->IsOrphaned());
// Result (local): Resources are delivered to watchers.
resource = watcher->WaitForNextResource();
ASSERT_NE(resource, nullptr);
@ -4038,7 +4036,7 @@ TEST_F(XdsClientTest, FallbackAndRecover) {
CancelFooWatch(watcher.get(), "foo1", /*delay_unsubscription=*/true);
CancelFooWatch(watcher2.get(), "foo2");
// Result (remote): The stream to the primary server has been orphaned.
EXPECT_TRUE(stream->Orphaned());
EXPECT_TRUE(stream->IsOrphaned());
}
// Test for both servers being unavailable
@ -4165,7 +4163,7 @@ TEST_F(XdsClientTest, FallbackOnStartup) {
.set_nonce("D")
.AddFooResource(XdsFooResource("foo1", 42))
.Serialize());
EXPECT_TRUE(fallback_stream->Orphaned());
EXPECT_TRUE(fallback_stream->IsOrphaned());
resource = watcher->WaitForNextResource();
ASSERT_NE(resource, nullptr);
EXPECT_EQ(resource->name, "foo1");

@ -170,7 +170,7 @@ TEST_F(XdsClusterTest, MinimumValidConfig) {
EXPECT_EQ(JsonDump(Json::FromArray(resource.lb_policy_config)),
"[{\"xds_wrr_locality_experimental\":{\"childPolicy\":"
"[{\"round_robin\":{}}]}}]");
EXPECT_FALSE(resource.lrs_load_reporting_server.has_value());
EXPECT_EQ(resource.lrs_load_reporting_server, nullptr);
EXPECT_EQ(resource.max_concurrent_requests, 1024);
EXPECT_FALSE(resource.outlier_detection.has_value());
}
@ -1134,7 +1134,7 @@ TEST_F(LrsTest, Valid) {
EXPECT_EQ(*decode_result.name, "foo");
auto& resource =
static_cast<const XdsClusterResource&>(**decode_result.resource);
ASSERT_TRUE(resource.lrs_load_reporting_server.has_value());
ASSERT_NE(resource.lrs_load_reporting_server, nullptr);
EXPECT_EQ(*resource.lrs_load_reporting_server,
*xds_client_->bootstrap().servers().front());
}

@ -49,7 +49,7 @@
#include "src/core/xds/grpc/xds_health_status.h"
#include "src/core/xds/xds_client/xds_bootstrap.h"
#include "src/core/xds/xds_client/xds_client.h"
#include "src/core/xds/xds_client/xds_client_stats.h"
#include "src/core/xds/xds_client/xds_locality.h"
#include "src/core/xds/xds_client/xds_resource_type.h"
#include "src/proto/grpc/testing/xds/v3/address.pb.h"
#include "src/proto/grpc/testing/xds/v3/base.pb.h"

@ -187,7 +187,7 @@ void FakeXdsTransportFactory::FakeStreamingCall::MaybeSendStatusToClient(
event_handler->OnStatusReceived(std::move(status));
}
bool FakeXdsTransportFactory::FakeStreamingCall::Orphaned() {
bool FakeXdsTransportFactory::FakeStreamingCall::IsOrphaned() {
MutexLock lock(&mu_);
return orphaned_;
}
@ -198,18 +198,18 @@ bool FakeXdsTransportFactory::FakeStreamingCall::Orphaned() {
void FakeXdsTransportFactory::FakeXdsTransport::TriggerConnectionFailure(
absl::Status status) {
RefCountedPtr<RefCountedOnConnectivityFailure> on_connectivity_failure;
std::set<RefCountedPtr<ConnectivityFailureWatcher>> watchers;
{
MutexLock lock(&mu_);
on_connectivity_failure = on_connectivity_failure_;
watchers = watchers_;
}
ExecCtx exec_ctx;
if (on_connectivity_failure != nullptr) {
on_connectivity_failure->Run(std::move(status));
for (const auto& watcher : watchers) {
watcher->OnConnectivityFailure(status);
}
}
void FakeXdsTransportFactory::FakeXdsTransport::Orphan() {
void FakeXdsTransportFactory::FakeXdsTransport::Orphaned() {
{
MutexLock lock(&factory_->mu_);
auto it = factory_->transport_map_.find(server_.Key());
@ -220,16 +220,14 @@ void FakeXdsTransportFactory::FakeXdsTransport::Orphan() {
factory_.reset();
{
MutexLock lock(&mu_);
// Can't destroy on_connectivity_failure_ synchronously, since that
// operation will trigger code in XdsClient that acquires its mutex, but
// it was already holding its mutex when it called us, so it would deadlock.
GetDefaultEventEngine()->Run([on_connectivity_failure = std::move(
on_connectivity_failure_)]() mutable {
// Can't destroy watchers synchronously, since that operation will trigger
// code in XdsClient that acquires its mutex, but it was already holding
// its mutex when it called us, so it would deadlock.
GetDefaultEventEngine()->Run([watchers = std::move(watchers_)]() mutable {
ExecCtx exec_ctx;
on_connectivity_failure.reset();
watchers.clear();
});
}
Unref();
}
RefCountedPtr<FakeXdsTransportFactory::FakeStreamingCall>
@ -255,12 +253,24 @@ void FakeXdsTransportFactory::FakeXdsTransport::RemoveStream(
}
}
void FakeXdsTransportFactory::FakeXdsTransport::StartConnectivityFailureWatch(
RefCountedPtr<ConnectivityFailureWatcher> watcher) {
MutexLock lock(&mu_);
watchers_.insert(std::move(watcher));
}
void FakeXdsTransportFactory::FakeXdsTransport::StopConnectivityFailureWatch(
const RefCountedPtr<ConnectivityFailureWatcher>& watcher) {
MutexLock lock(&mu_);
watchers_.erase(watcher);
}
OrphanablePtr<XdsTransportFactory::XdsTransport::StreamingCall>
FakeXdsTransportFactory::FakeXdsTransport::CreateStreamingCall(
const char* method,
std::unique_ptr<StreamingCall::EventHandler> event_handler) {
auto call = MakeOrphanable<FakeStreamingCall>(
RefAsSubclass<FakeXdsTransport>(), method, std::move(event_handler));
WeakRefAsSubclass<FakeXdsTransport>(), method, std::move(event_handler));
MutexLock lock(&mu_);
active_calls_[method] = call->Ref().TakeAsSubclass<FakeStreamingCall>();
cv_.Signal();
@ -274,19 +284,18 @@ FakeXdsTransportFactory::FakeXdsTransport::CreateStreamingCall(
constexpr char FakeXdsTransportFactory::kAdsMethod[];
constexpr char FakeXdsTransportFactory::kLrsMethod[];
OrphanablePtr<XdsTransportFactory::XdsTransport>
FakeXdsTransportFactory::Create(
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* /*status*/) {
RefCountedPtr<XdsTransportFactory::XdsTransport>
FakeXdsTransportFactory::GetTransport(const XdsBootstrap::XdsServer& server,
absl::Status* /*status*/) {
std::string key = server.Key();
MutexLock lock(&mu_);
auto& entry = transport_map_[server.Key()];
CHECK(entry == nullptr);
auto transport = MakeOrphanable<FakeXdsTransport>(
RefAsSubclass<FakeXdsTransportFactory>(), server,
std::move(on_connectivity_failure), auto_complete_messages_from_client_,
abort_on_undrained_messages_);
entry = transport->Ref().TakeAsSubclass<FakeXdsTransport>();
auto transport = GetTransportLocked(key);
if (transport == nullptr) {
transport = MakeRefCounted<FakeXdsTransport>(
WeakRefAsSubclass<FakeXdsTransportFactory>(), server,
auto_complete_messages_from_client_, abort_on_undrained_messages_);
transport_map_.emplace(std::move(key), transport.get());
}
return transport;
}
@ -318,8 +327,16 @@ FakeXdsTransportFactory::WaitForStream(const XdsBootstrap::XdsServer& server,
RefCountedPtr<FakeXdsTransportFactory::FakeXdsTransport>
FakeXdsTransportFactory::GetTransport(const XdsBootstrap::XdsServer& server) {
std::string key = server.Key();
MutexLock lock(&mu_);
return transport_map_[server.Key()];
return GetTransportLocked(key);
}
RefCountedPtr<FakeXdsTransportFactory::FakeXdsTransport>
FakeXdsTransportFactory::GetTransportLocked(const std::string& key) {
auto it = transport_map_.find(key);
if (it == transport_map_.end()) return nullptr;
return it->second->RefIfNonZero().TakeAsSubclass<FakeXdsTransport>();
}
} // namespace grpc_core

@ -57,7 +57,7 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
class FakeStreamingCall : public XdsTransport::StreamingCall {
public:
FakeStreamingCall(
RefCountedPtr<FakeXdsTransport> transport, const char* method,
WeakRefCountedPtr<FakeXdsTransport> transport, const char* method,
std::unique_ptr<StreamingCall::EventHandler> event_handler)
: transport_(std::move(transport)),
method_(method),
@ -68,6 +68,8 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
void Orphan() override;
bool IsOrphaned();
void StartRecvMessage() override;
using StreamingCall::Ref; // Make it public.
@ -86,8 +88,6 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
void SendMessageToClient(absl::string_view payload);
void MaybeSendStatusToClient(absl::Status status);
bool Orphaned();
bool WaitForReadsStarted(size_t expected, absl::Duration timeout) {
MutexLock lock(&mu_);
const absl::Time deadline = absl::Now() + timeout;
@ -124,7 +124,7 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_);
void MaybeDeliverMessageToClient();
RefCountedPtr<FakeXdsTransport> transport_;
WeakRefCountedPtr<FakeXdsTransport> transport_;
const char* method_;
Mutex mu_;
@ -144,8 +144,6 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
: too_many_pending_reads_callback_(
std::move(too_many_pending_reads_callback)) {}
using XdsTransportFactory::Ref; // Make it public.
void TriggerConnectionFailure(const XdsBootstrap::XdsServer& server,
absl::Status status);
@ -174,26 +172,22 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
const XdsBootstrap::XdsServer& server, const char* method,
absl::Duration timeout);
void Orphan() override { Unref(); }
void Orphaned() override {}
private:
class FakeXdsTransport : public XdsTransport {
public:
FakeXdsTransport(RefCountedPtr<FakeXdsTransportFactory> factory,
FakeXdsTransport(WeakRefCountedPtr<FakeXdsTransportFactory> factory,
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
bool auto_complete_messages_from_client,
bool abort_on_undrained_messages)
: factory_(std::move(factory)),
server_(server),
auto_complete_messages_from_client_(
auto_complete_messages_from_client),
abort_on_undrained_messages_(abort_on_undrained_messages),
on_connectivity_failure_(
MakeRefCounted<RefCountedOnConnectivityFailure>(
std::move(on_connectivity_failure))) {}
abort_on_undrained_messages_(abort_on_undrained_messages) {}
void Orphan() override;
void Orphaned() override;
bool auto_complete_messages_from_client() const {
return auto_complete_messages_from_client_;
@ -203,8 +197,6 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
return abort_on_undrained_messages_;
}
using XdsTransport::Ref; // Make it public.
void TriggerConnectionFailure(absl::Status status);
RefCountedPtr<FakeStreamingCall> WaitForStream(const char* method,
@ -217,20 +209,10 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
const XdsBootstrap::XdsServer* server() const { return &server_; }
private:
class RefCountedOnConnectivityFailure
: public RefCounted<RefCountedOnConnectivityFailure> {
public:
explicit RefCountedOnConnectivityFailure(
std::function<void(absl::Status)> on_connectivity_failure)
: on_connectivity_failure_(std::move(on_connectivity_failure)) {}
void Run(absl::Status status) {
on_connectivity_failure_(std::move(status));
}
private:
std::function<void(absl::Status)> on_connectivity_failure_;
};
void StartConnectivityFailureWatch(
RefCountedPtr<ConnectivityFailureWatcher> watcher) override;
void StopConnectivityFailureWatch(
const RefCountedPtr<ConnectivityFailureWatcher>& watcher) override;
OrphanablePtr<StreamingCall> CreateStreamingCall(
const char* method,
@ -238,30 +220,33 @@ class FakeXdsTransportFactory : public XdsTransportFactory {
void ResetBackoff() override {}
RefCountedPtr<FakeXdsTransportFactory> factory_;
WeakRefCountedPtr<FakeXdsTransportFactory> factory_;
const XdsBootstrap::XdsServer& server_;
const bool auto_complete_messages_from_client_;
const bool abort_on_undrained_messages_;
Mutex mu_;
CondVar cv_;
RefCountedPtr<RefCountedOnConnectivityFailure> on_connectivity_failure_
std::set<RefCountedPtr<ConnectivityFailureWatcher>> watchers_
ABSL_GUARDED_BY(&mu_);
std::map<std::string /*method*/, RefCountedPtr<FakeStreamingCall>>
active_calls_ ABSL_GUARDED_BY(&mu_);
};
OrphanablePtr<XdsTransport> Create(
const XdsBootstrap::XdsServer& server,
std::function<void(absl::Status)> on_connectivity_failure,
absl::Status* status) override;
// Returns an existing transport or creates a new one.
RefCountedPtr<XdsTransport> GetTransport(
const XdsBootstrap::XdsServer& server, absl::Status* /*status*/) override;
// Returns an existing transport, if any, or nullptr.
RefCountedPtr<FakeXdsTransport> GetTransport(
const XdsBootstrap::XdsServer& server);
RefCountedPtr<FakeXdsTransport> GetTransportLocked(const std::string& key)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(&mu_);
Mutex mu_;
std::map<std::string /*XdsServer key*/, RefCountedPtr<FakeXdsTransport>>
transport_map_ ABSL_GUARDED_BY(&mu_);
std::map<std::string /*XdsServer key*/, FakeXdsTransport*> transport_map_
ABSL_GUARDED_BY(&mu_);
bool auto_complete_messages_from_client_ ABSL_GUARDED_BY(&mu_) = true;
bool abort_on_undrained_messages_ ABSL_GUARDED_BY(&mu_) = true;
std::function<void()> too_many_pending_reads_callback_;

@ -3039,15 +3039,17 @@ src/core/xds/grpc/xds_server_grpc.cc \
src/core/xds/grpc/xds_server_grpc.h \
src/core/xds/grpc/xds_transport_grpc.cc \
src/core/xds/grpc/xds_transport_grpc.h \
src/core/xds/xds_client/lrs_client.cc \
src/core/xds/xds_client/lrs_client.h \
src/core/xds/xds_client/xds_api.cc \
src/core/xds/xds_client/xds_api.h \
src/core/xds/xds_client/xds_backend_metric_propagation.h \
src/core/xds/xds_client/xds_bootstrap.cc \
src/core/xds/xds_client/xds_bootstrap.h \
src/core/xds/xds_client/xds_channel_args.h \
src/core/xds/xds_client/xds_client.cc \
src/core/xds/xds_client/xds_client.h \
src/core/xds/xds_client/xds_client_stats.cc \
src/core/xds/xds_client/xds_client_stats.h \
src/core/xds/xds_client/xds_locality.h \
src/core/xds/xds_client/xds_metrics.h \
src/core/xds/xds_client/xds_resource_type.h \
src/core/xds/xds_client/xds_resource_type_impl.h \

@ -2817,15 +2817,17 @@ src/core/xds/grpc/xds_server_grpc.cc \
src/core/xds/grpc/xds_server_grpc.h \
src/core/xds/grpc/xds_transport_grpc.cc \
src/core/xds/grpc/xds_transport_grpc.h \
src/core/xds/xds_client/lrs_client.cc \
src/core/xds/xds_client/lrs_client.h \
src/core/xds/xds_client/xds_api.cc \
src/core/xds/xds_client/xds_api.h \
src/core/xds/xds_client/xds_backend_metric_propagation.h \
src/core/xds/xds_client/xds_bootstrap.cc \
src/core/xds/xds_client/xds_bootstrap.h \
src/core/xds/xds_client/xds_channel_args.h \
src/core/xds/xds_client/xds_client.cc \
src/core/xds/xds_client/xds_client.h \
src/core/xds/xds_client/xds_client_stats.cc \
src/core/xds/xds_client/xds_client_stats.h \
src/core/xds/xds_client/xds_locality.h \
src/core/xds/xds_client/xds_metrics.h \
src/core/xds/xds_client/xds_resource_type.h \
src/core/xds/xds_client/xds_resource_type_impl.h \

Loading…
Cancel
Save