[LB policies] revert changes for dualstack design (#33718)

This reverts the following PRs: #32692 #33087 #33093 #33427 #33568

These changes seem to have introduced some flaky crashes. Reverting
while I investigate.
pull/33720/head
Mark D. Roth 2 years ago committed by GitHub
parent c96f3dce4e
commit 083bbee480
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      CMakeLists.txt
  2. 2
      Makefile
  3. 3
      Package.swift
  4. 6
      build_autogenerated.yaml
  5. 1
      config.m4
  6. 1
      config.w32
  7. 4
      gRPC-C++.podspec
  8. 5
      gRPC-Core.podspec
  9. 3
      grpc.gemspec
  10. 2
      grpc.gyp
  11. 3
      package.xml
  12. 41
      src/core/BUILD
  13. 188
      src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
  14. 214
      src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
  15. 47
      src/core/ext/filters/client_channel/lb_policy/health_check_client.cc
  16. 5
      src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h
  17. 113
      src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc
  18. 7
      src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h
  19. 531
      src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
  20. 16
      src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
  21. 891
      src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc
  22. 438
      src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
  23. 488
      src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
  24. 652
      src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc
  25. 41
      src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc
  26. 2
      src/core/ext/filters/client_channel/subchannel.h
  27. 1
      src/core/lib/resolver/server_address.h
  28. 1
      src/python/grpcio/grpc_core_dependencies.py
  29. 146
      test/core/client_channel/lb_policy/lb_policy_test_lib.h
  30. 13
      test/core/client_channel/lb_policy/outlier_detection_test.cc
  31. 31
      test/core/client_channel/lb_policy/pick_first_test.cc
  32. 4
      test/core/client_channel/lb_policy/round_robin_test.cc
  33. 4
      test/core/client_channel/lb_policy/weighted_round_robin_test.cc
  34. 36
      test/core/client_channel/lb_policy/xds_override_host_test.cc
  35. 15
      test/cpp/end2end/client_lb_end2end_test.cc
  36. 6
      test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc
  37. 3
      tools/doxygen/Doxyfile.c++.internal
  38. 3
      tools/doxygen/Doxyfile.core.internal

2
CMakeLists.txt generated

@ -1698,7 +1698,6 @@ add_library(grpc
src/core/ext/filters/client_channel/http_proxy.cc
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
@ -2740,7 +2739,6 @@ add_library(grpc_unsecure
src/core/ext/filters/client_channel/http_proxy.cc
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc

2
Makefile generated

@ -980,7 +980,6 @@ LIBGRPC_SRC = \
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \
@ -1875,7 +1874,6 @@ LIBGRPC_UNSECURE_SRC = \
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \

3
Package.swift generated

@ -148,8 +148,6 @@ let package = Package(
"src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h",
"src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc",
"src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h",
"src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc",
"src/core/ext/filters/client_channel/lb_policy/endpoint_list.h",
"src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc",
"src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h",
"src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc",
@ -175,6 +173,7 @@ let package = Package(
"src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h",
"src/core/ext/filters/client_channel/lb_policy/rls/rls.cc",
"src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc",
"src/core/ext/filters/client_channel/lb_policy/subchannel_list.h",
"src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc",
"src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h",
"src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc",

@ -231,7 +231,6 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/address_filtering.h
- src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h
@ -244,6 +243,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h
- src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
- src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h
- src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
- src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h
- src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h
- src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h
@ -1037,7 +1037,6 @@ libs:
- src/core/ext/filters/client_channel/http_proxy.cc
- src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
@ -1958,7 +1957,6 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/address_filtering.h
- src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h
@ -1970,6 +1968,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h
- src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h
- src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
- src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
- src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h
- src/core/ext/filters/client_channel/local_subchannel_pool.h
- src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.h
@ -2371,7 +2370,6 @@ libs:
- src/core/ext/filters/client_channel/http_proxy.cc
- src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc

1
config.m4 generated

@ -59,7 +59,6 @@ if test "$PHP_GRPC" != "no"; then
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \

1
config.w32 generated

@ -24,7 +24,6 @@ if (PHP_GRPC != "no") {
"src\\core\\ext\\filters\\client_channel\\http_proxy.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\address_filtering.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\child_policy_handler.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\endpoint_list.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\client_load_reporting_filter.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_balancer_addresses.cc " +

4
gRPC-C++.podspec generated

@ -263,7 +263,6 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -276,6 +275,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h',
@ -1327,7 +1327,6 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -1340,6 +1339,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h',

5
gRPC-Core.podspec generated

@ -249,8 +249,6 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
@ -276,6 +274,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/rls/rls.cc',
'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc',
@ -2079,7 +2078,6 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -2092,6 +2090,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h',
'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h',

3
grpc.gemspec generated

@ -154,8 +154,6 @@ Gem::Specification.new do |s|
s.files += %w( src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc )
@ -181,6 +179,7 @@ Gem::Specification.new do |s|
s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/rls/rls.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/subchannel_list.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc )

2
grpc.gyp generated

@ -284,7 +284,6 @@
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',
@ -1119,7 +1118,6 @@
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',

3
package.xml generated

@ -136,8 +136,6 @@
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc" role="src" />
@ -163,6 +161,7 @@
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/rls/rls.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc" role="src" />

@ -4582,40 +4582,32 @@ grpc_cc_library(
"//:grpc_trace",
"//:orphanable",
"//:ref_counted_ptr",
"//:sockaddr_utils",
"//:work_serializer",
],
)
grpc_cc_library(
name = "lb_endpoint_list",
srcs = [
"ext/filters/client_channel/lb_policy/endpoint_list.cc",
],
name = "grpc_lb_subchannel_list",
hdrs = [
"ext/filters/client_channel/lb_policy/endpoint_list.h",
"ext/filters/client_channel/lb_policy/subchannel_list.h",
],
external_deps = [
"absl/functional:any_invocable",
"absl/status",
"absl/status:statusor",
"absl/types:optional",
],
language = "c++",
deps = [
"channel_args",
"delegating_helper",
"grpc_lb_policy_pick_first",
"json",
"dual_ref_counted",
"gpr_manual_constructor",
"health_check_client",
"iomgr_fwd",
"lb_policy",
"lb_policy_registry",
"pollset_set",
"subchannel_interface",
"//:config",
"//:debug_location",
"//:gpr",
"//:grpc_base",
"//:orphanable",
"//:grpc_client_channel",
"//:ref_counted_ptr",
"//:server_address",
"//:work_serializer",
@ -4642,8 +4634,8 @@ grpc_cc_library(
deps = [
"channel_args",
"env",
"health_check_client",
"iomgr_fwd",
"grpc_lb_subchannel_list",
"grpc_outlier_detection_header",
"json",
"json_args",
"json_object_loader",
@ -4659,6 +4651,7 @@ grpc_cc_library(
"//:orphanable",
"//:ref_counted_ptr",
"//:server_address",
"//:work_serializer",
],
)
@ -4683,18 +4676,16 @@ grpc_cc_library(
deps = [
"channel_args",
"closure",
"delegating_helper",
"error",
"grpc_lb_policy_pick_first",
"grpc_lb_subchannel_list",
"grpc_service_config",
"json",
"json_args",
"json_object_loader",
"lb_policy",
"lb_policy_factory",
"lb_policy_registry",
"pollset_set",
"ref_counted",
"subchannel_interface",
"unique_type_name",
"validation_errors",
"//:config",
@ -4727,10 +4718,11 @@ grpc_cc_library(
language = "c++",
deps = [
"channel_args",
"grpc_lb_subchannel_list",
"json",
"lb_endpoint_list",
"lb_policy",
"lb_policy_factory",
"subchannel_interface",
"//:config",
"//:debug_location",
"//:gpr",
@ -4772,16 +4764,15 @@ grpc_cc_library(
"absl/status:statusor",
"absl/strings",
"absl/types:optional",
"absl/types:variant",
],
language = "c++",
deps = [
"channel_args",
"grpc_backend_metric_data",
"grpc_lb_subchannel_list",
"json",
"json_args",
"json_object_loader",
"lb_endpoint_list",
"lb_policy",
"lb_policy_factory",
"ref_counted",
@ -4819,6 +4810,7 @@ grpc_cc_library(
"time",
"validation_errors",
"//:gpr_platform",
"//:server_address",
],
)
@ -4845,6 +4837,7 @@ grpc_cc_library(
"lb_policy",
"lb_policy_factory",
"lb_policy_registry",
"match",
"pollset_set",
"ref_counted",
"subchannel_interface",

@ -1,188 +0,0 @@
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <grpc/support/port_platform.h>
#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h"
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/types/optional.h"
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/json.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/iomgr/pollset_set.h"
#include "src/core/lib/json/json.h"
#include "src/core/lib/load_balancing/delegating_helper.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/lb_policy_registry.h"
#include "src/core/lib/resolver/server_address.h"
namespace grpc_core {
//
// EndpointList::Endpoint::Helper
//
class EndpointList::Endpoint::Helper
: public LoadBalancingPolicy::DelegatingChannelControlHelper {
public:
explicit Helper(RefCountedPtr<Endpoint> endpoint)
: endpoint_(std::move(endpoint)) {}
~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); }
RefCountedPtr<SubchannelInterface> CreateSubchannel(
ServerAddress address, const ChannelArgs& args) override {
return endpoint_->CreateSubchannel(std::move(address), args);
}
void UpdateState(
grpc_connectivity_state state, const absl::Status& status,
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker) override {
auto old_state = std::exchange(endpoint_->connectivity_state_, state);
endpoint_->picker_ = std::move(picker);
endpoint_->OnStateUpdate(old_state, state, status);
}
private:
LoadBalancingPolicy::ChannelControlHelper* parent_helper() const override {
return endpoint_->endpoint_list_->channel_control_helper();
}
RefCountedPtr<Endpoint> endpoint_;
};
//
// EndpointList::Endpoint
//
void EndpointList::Endpoint::Init(
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer) {
ChannelArgs child_args =
args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true)
.Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true);
LoadBalancingPolicy::Args lb_policy_args;
lb_policy_args.work_serializer = std::move(work_serializer);
lb_policy_args.args = child_args;
lb_policy_args.channel_control_helper =
std::make_unique<Helper>(Ref(DEBUG_LOCATION, "Helper"));
child_policy_ =
CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy(
"pick_first", std::move(lb_policy_args));
if (GPR_UNLIKELY(endpoint_list_->tracer_ != nullptr)) {
gpr_log(GPR_INFO, "[%s %p] endpoint %p: created child policy %p",
endpoint_list_->tracer_, endpoint_list_->policy_.get(), this,
child_policy_.get());
}
// Add our interested_parties pollset_set to that of the newly created
// child policy. This will make the child policy progress upon activity on
// this policy, which in turn is tied to the application's call.
grpc_pollset_set_add_pollset_set(
child_policy_->interested_parties(),
endpoint_list_->policy_->interested_parties());
// Construct pick_first config.
auto config =
CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig(
Json::FromArray(
{Json::FromObject({{"pick_first", Json::FromObject({})}})}));
GPR_ASSERT(config.ok());
// Update child policy.
LoadBalancingPolicy::UpdateArgs update_args;
update_args.addresses.emplace().emplace_back(address);
update_args.args = child_args;
update_args.config = std::move(*config);
// TODO(roth): If the child reports a non-OK status with the update,
// we need to propagate that back to the resolver somehow.
(void)child_policy_->UpdateLocked(std::move(update_args));
}
void EndpointList::Endpoint::Orphan() {
// Remove pollset_set linkage.
grpc_pollset_set_del_pollset_set(
child_policy_->interested_parties(),
endpoint_list_->policy_->interested_parties());
child_policy_.reset();
picker_.reset();
Unref();
}
void EndpointList::Endpoint::ResetBackoffLocked() {
if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked();
}
void EndpointList::Endpoint::ExitIdleLocked() {
if (child_policy_ != nullptr) child_policy_->ExitIdleLocked();
}
size_t EndpointList::Endpoint::Index() const {
for (size_t i = 0; i < endpoint_list_->endpoints_.size(); ++i) {
if (endpoint_list_->endpoints_[i].get() == this) return i;
}
return -1;
}
RefCountedPtr<SubchannelInterface> EndpointList::Endpoint::CreateSubchannel(
ServerAddress address, const ChannelArgs& args) {
return endpoint_list_->channel_control_helper()->CreateSubchannel(
std::move(address), args);
}
//
// EndpointList
//
void EndpointList::Init(
const ServerAddressList& addresses, const ChannelArgs& args,
absl::AnyInvocable<OrphanablePtr<Endpoint>(
RefCountedPtr<EndpointList>, const ServerAddress&, const ChannelArgs&)>
create_endpoint) {
for (const ServerAddress& address : addresses) {
endpoints_.push_back(
create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), address, args));
}
}
void EndpointList::ResetBackoffLocked() {
for (const auto& endpoint : endpoints_) {
endpoint->ResetBackoffLocked();
}
}
bool EndpointList::AllEndpointsSeenInitialState() const {
for (const auto& endpoint : endpoints_) {
if (!endpoint->connectivity_state().has_value()) return false;
}
return true;
}
} // namespace grpc_core

@ -1,214 +0,0 @@
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H
#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H
#include <grpc/support/port_platform.h>
#include <stdlib.h>
#include <memory>
#include <utility>
#include <vector>
#include "absl/functional/any_invocable.h"
#include "absl/status/status.h"
#include "absl/types/optional.h"
#include <grpc/impl/connectivity_state.h>
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/subchannel_interface.h"
#include "src/core/lib/resolver/server_address.h"
namespace grpc_core {
// A list of endpoints for use in a petiole LB policy. Each endpoint may
// have one or more addresses, which will be passed down to a pick_first
// child policy.
//
// To use this, a petiole policy must define its own subclass of both
// EndpointList and EndpointList::Endpoint, like so:
/*
class MyEndpointList : public EndpointList {
public:
MyEndpointList(RefCountedPtr<MyLbPolicy> lb_policy,
const ServerAddressList& addresses, const ChannelArgs& args)
: EndpointList(std::move(lb_policy),
GRPC_TRACE_FLAG_ENABLED(grpc_my_tracer)
? "MyEndpointList"
: nullptr) {
Init(addresses, args,
[&](RefCountedPtr<MyEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args) {
return MakeOrphanable<MyEndpoint>(
std::move(endpoint_list), address, args,
policy<MyLbPolicy>()->work_serializer());
});
}
private:
class MyEndpoint : public Endpoint {
public:
MyEndpoint(RefCountedPtr<MyEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer)
: Endpoint(std::move(endpoint_list)) {
Init(address, args, std::move(work_serializer));
}
private:
void OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state,
const absl::Status& status) override {
// ...handle connectivity state change...
}
};
LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const override {
return policy<MyLbPolicy>()->channel_control_helper();
}
};
*/
// TODO(roth): Consider wrapping this in an LB policy subclass for petiole
// policies to inherit from.
class EndpointList : public InternallyRefCounted<EndpointList> {
public:
// An individual endpoint.
class Endpoint : public InternallyRefCounted<Endpoint> {
public:
~Endpoint() override { endpoint_list_.reset(DEBUG_LOCATION, "Endpoint"); }
void Orphan() override;
void ResetBackoffLocked();
void ExitIdleLocked();
absl::optional<grpc_connectivity_state> connectivity_state() const {
return connectivity_state_;
}
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker() const {
return picker_;
}
protected:
// We use two-phase initialization here to ensure that the vtable is
// initialized before we need to use it. Subclass must invoke Init()
// from inside its ctor.
explicit Endpoint(RefCountedPtr<EndpointList> endpoint_list)
: endpoint_list_(std::move(endpoint_list)) {}
void Init(const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer);
// Templated for convenience, to provide a short-hand for
// down-casting in the caller.
template <typename T>
T* endpoint_list() const {
return static_cast<T*>(endpoint_list_.get());
}
// Templated for convenience, to provide a short-hand for down-casting
// in the caller.
template <typename T>
T* policy() const {
return endpoint_list_->policy<T>();
}
// Returns the index of this endpoint within the EndpointList.
// Intended for trace logging.
size_t Index() const;
private:
class Helper;
// Called when the child policy reports a connectivity state update.
virtual void OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state, const absl::Status& status) = 0;
// Called to create a subchannel. Subclasses may override.
virtual RefCountedPtr<SubchannelInterface> CreateSubchannel(
ServerAddress address, const ChannelArgs& args);
RefCountedPtr<EndpointList> endpoint_list_;
OrphanablePtr<LoadBalancingPolicy> child_policy_;
absl::optional<grpc_connectivity_state> connectivity_state_;
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker_;
};
~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); }
void Orphan() override {
endpoints_.clear();
Unref();
}
size_t size() const { return endpoints_.size(); }
const std::vector<OrphanablePtr<Endpoint>>& endpoints() const {
return endpoints_;
}
void ResetBackoffLocked();
protected:
// We use two-phase initialization here to ensure that the vtable is
// initialized before we need to use it. Subclass must invoke Init()
// from inside its ctor.
EndpointList(RefCountedPtr<LoadBalancingPolicy> policy, const char* tracer)
: policy_(std::move(policy)), tracer_(tracer) {}
void Init(const ServerAddressList& addresses, const ChannelArgs& args,
absl::AnyInvocable<OrphanablePtr<Endpoint>(
RefCountedPtr<EndpointList>, const ServerAddress&,
const ChannelArgs&)>
create_endpoint);
// Templated for convenience, to provide a short-hand for down-casting
// in the caller.
template <typename T>
T* policy() const {
return static_cast<T*>(policy_.get());
}
// Returns true if all endpoints have seen their initial connectivity
// state notification.
bool AllEndpointsSeenInitialState() const;
private:
// Returns the parent policy's helper. Needed because the accessor
// method is protected on LoadBalancingPolicy.
virtual LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const = 0;
RefCountedPtr<LoadBalancingPolicy> policy_;
const char* tracer_;
std::vector<OrphanablePtr<Endpoint>> endpoints_;
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H

@ -28,7 +28,6 @@
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "upb/base/string_view.h"
@ -45,7 +44,6 @@
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h"
#include "src/core/ext/filters/client_channel/subchannel.h"
#include "src/core/ext/filters/client_channel/subchannel_stream_client.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/channel/channel_trace.h"
#include "src/core/lib/debug/trace.h"
@ -116,7 +114,7 @@ void HealthProducer::HealthChecker::Orphan() {
void HealthProducer::HealthChecker::AddWatcherLocked(HealthWatcher* watcher) {
watchers_.insert(watcher);
if (state_.has_value()) watcher->Notify(*state_, status_);
watcher->Notify(state_, status_);
}
bool HealthProducer::HealthChecker::RemoveWatcherLocked(
@ -130,18 +128,13 @@ void HealthProducer::HealthChecker::OnConnectivityStateChangeLocked(
if (state == GRPC_CHANNEL_READY) {
// We should already be in CONNECTING, and we don't want to change
// that until we see the initial response on the stream.
if (!state_.has_value()) {
state_ = GRPC_CHANNEL_CONNECTING;
status_ = absl::OkStatus();
} else {
GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING);
}
GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING);
// Start the health watch stream.
StartHealthStreamLocked();
} else {
state_ = state;
status_ = status;
NotifyWatchersLocked(*state_, status_);
NotifyWatchersLocked(state_, status_);
// We're not connected, so stop health checking.
stream_client_.reset();
}
@ -184,21 +177,12 @@ void HealthProducer::HealthChecker::NotifyWatchersLocked(
void HealthProducer::HealthChecker::OnHealthWatchStatusChange(
grpc_connectivity_state state, const absl::Status& status) {
if (state == GRPC_CHANNEL_SHUTDOWN) return;
// Prepend the subchannel's address to the status if needed.
absl::Status use_status;
if (!status.ok()) {
std::string address_str =
grpc_sockaddr_to_uri(&producer_->subchannel_->address())
.value_or("<unknown address type>");
use_status = absl::Status(
status.code(), absl::StrCat(address_str, ": ", status.message()));
}
work_serializer_->Schedule(
[self = Ref(), state, status = std::move(use_status)]() mutable {
[self = Ref(), state, status]() {
MutexLock lock(&self->producer_->mu_);
if (self->stream_client_ != nullptr) {
self->state_ = state;
self->status_ = std::move(status);
self->status_ = status;
for (HealthWatcher* watcher : self->watchers_) {
watcher->Notify(state, self->status_);
}
@ -378,7 +362,7 @@ void HealthProducer::AddWatcher(
grpc_pollset_set_add_pollset_set(interested_parties_,
watcher->interested_parties());
if (!health_check_service_name.has_value()) {
if (state_.has_value()) watcher->Notify(*state_, status_);
watcher->Notify(state_, status_);
non_health_watchers_.insert(watcher);
} else {
auto it =
@ -435,13 +419,6 @@ void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state,
//
HealthWatcher::~HealthWatcher() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) {
gpr_log(GPR_INFO,
"HealthWatcher %p: unregistering from producer %p "
"(health_check_service_name=\"%s\")",
this, producer_.get(),
health_check_service_name_.value_or("N/A").c_str());
}
if (producer_ != nullptr) {
producer_->RemoveWatcher(this, health_check_service_name_);
}
@ -468,13 +445,6 @@ void HealthWatcher::SetSubchannel(Subchannel* subchannel) {
if (created) producer_->Start(subchannel->Ref());
// Register ourself with the producer.
producer_->AddWatcher(this, health_check_service_name_);
if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) {
gpr_log(GPR_INFO,
"HealthWatcher %p: registered with producer %p (created=%d, "
"health_check_service_name=\"%s\")",
this, producer_.get(), created,
health_check_service_name_.value_or("N/A").c_str());
}
}
void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) {
@ -500,11 +470,6 @@ MakeHealthCheckWatcher(
health_check_service_name =
args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME);
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) {
gpr_log(GPR_INFO,
"creating HealthWatcher -- health_check_service_name=\"%s\"",
health_check_service_name.value_or("N/A").c_str());
}
return std::make_unique<HealthWatcher>(std::move(work_serializer),
std::move(health_check_service_name),
std::move(watcher));

@ -127,8 +127,7 @@ class HealthProducer : public Subchannel::DataProducerInterface {
std::shared_ptr<WorkSerializer> work_serializer_ =
std::make_shared<WorkSerializer>();
absl::optional<grpc_connectivity_state> state_
ABSL_GUARDED_BY(&HealthProducer::mu_);
grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_);
absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_);
OrphanablePtr<SubchannelStreamClient> stream_client_
ABSL_GUARDED_BY(&HealthProducer::mu_);
@ -144,7 +143,7 @@ class HealthProducer : public Subchannel::DataProducerInterface {
grpc_pollset_set* interested_parties_;
Mutex mu_;
absl::optional<grpc_connectivity_state> state_ ABSL_GUARDED_BY(&mu_);
grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_);
absl::Status status_ ABSL_GUARDED_BY(&mu_);
RefCountedPtr<ConnectedSubchannel> connected_subchannel_
ABSL_GUARDED_BY(&mu_);

@ -50,6 +50,7 @@
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/match.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
@ -124,9 +125,12 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
class SubchannelWrapper : public DelegatingSubchannel {
public:
SubchannelWrapper(RefCountedPtr<SubchannelState> subchannel_state,
RefCountedPtr<SubchannelInterface> subchannel)
RefCountedPtr<SubchannelInterface> subchannel,
bool disable_via_raw_connectivity_watch)
: DelegatingSubchannel(std::move(subchannel)),
subchannel_state_(std::move(subchannel_state)) {
subchannel_state_(std::move(subchannel_state)),
disable_via_raw_connectivity_watch_(
disable_via_raw_connectivity_watch) {
if (subchannel_state_ != nullptr) {
subchannel_state_->AddSubchannel(this);
if (subchannel_state_->ejection_time().has_value()) {
@ -145,6 +149,12 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
void Uneject();
void WatchConnectivityState(
std::unique_ptr<ConnectivityStateWatcherInterface> watcher) override;
void CancelConnectivityStateWatch(
ConnectivityStateWatcherInterface* watcher) override;
void AddDataWatcher(std::unique_ptr<DataWatcherInterface> watcher) override;
RefCountedPtr<SubchannelState> subchannel_state() const {
@ -152,6 +162,11 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
}
private:
// TODO(roth): As a temporary hack, this needs to handle watchers
// stored as both unique_ptr<> and shared_ptr<>, since the former is
// used for raw connectivity state watches and the latter is used
// for health watches. This hack will go away as part of implementing
// dualstack backend support.
class WatcherWrapper
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
@ -161,10 +176,16 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
bool ejected)
: watcher_(std::move(health_watcher)), ejected_(ejected) {}
WatcherWrapper(std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
watcher,
bool ejected)
: watcher_(std::move(watcher)), ejected_(ejected) {}
void Eject() {
ejected_ = true;
if (last_seen_state_.has_value()) {
watcher_->OnConnectivityStateChange(
watcher()->OnConnectivityStateChange(
GRPC_CHANNEL_TRANSIENT_FAILURE,
absl::UnavailableError(
"subchannel ejected by outlier detection"));
@ -174,8 +195,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
void Uneject() {
ejected_ = false;
if (last_seen_state_.has_value()) {
watcher_->OnConnectivityStateChange(*last_seen_state_,
last_seen_status_);
watcher()->OnConnectivityStateChange(*last_seen_state_,
last_seen_status_);
}
}
@ -190,16 +211,30 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
status = absl::UnavailableError(
"subchannel ejected by outlier detection");
}
watcher_->OnConnectivityStateChange(new_state, status);
watcher()->OnConnectivityStateChange(new_state, status);
}
}
grpc_pollset_set* interested_parties() override {
return watcher_->interested_parties();
return watcher()->interested_parties();
}
private:
std::shared_ptr<SubchannelInterface::ConnectivityStateWatcherInterface>
SubchannelInterface::ConnectivityStateWatcherInterface* watcher() const {
return Match(
watcher_,
[](const std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); },
[](const std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); });
}
absl::variant<std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>,
std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>>
watcher_;
absl::optional<grpc_connectivity_state> last_seen_state_;
absl::Status last_seen_status_;
@ -207,8 +242,12 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
};
RefCountedPtr<SubchannelState> subchannel_state_;
const bool disable_via_raw_connectivity_watch_;
bool ejected_ = false;
WatcherWrapper* watcher_wrapper_ = nullptr;
std::map<SubchannelInterface::ConnectivityStateWatcherInterface*,
WatcherWrapper*>
watchers_;
WatcherWrapper* watcher_wrapper_ = nullptr; // For health watching.
};
class SubchannelState : public RefCounted<SubchannelState> {
@ -389,14 +428,50 @@ class OutlierDetectionLb : public LoadBalancingPolicy {
void OutlierDetectionLb::SubchannelWrapper::Eject() {
ejected_ = true;
// Ejecting the subchannel may cause the child policy to cancel the watch,
// so we need to be prepared for the map to be modified while we are
// iterating.
for (auto it = watchers_.begin(); it != watchers_.end();) {
WatcherWrapper* watcher = it->second;
++it;
watcher->Eject();
}
if (watcher_wrapper_ != nullptr) watcher_wrapper_->Eject();
}
void OutlierDetectionLb::SubchannelWrapper::Uneject() {
ejected_ = false;
for (auto& watcher : watchers_) {
watcher.second->Uneject();
}
if (watcher_wrapper_ != nullptr) watcher_wrapper_->Uneject();
}
void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState(
std::unique_ptr<ConnectivityStateWatcherInterface> watcher) {
if (disable_via_raw_connectivity_watch_) {
wrapped_subchannel()->WatchConnectivityState(std::move(watcher));
return;
}
ConnectivityStateWatcherInterface* watcher_ptr = watcher.get();
auto watcher_wrapper =
std::make_unique<WatcherWrapper>(std::move(watcher), ejected_);
watchers_.emplace(watcher_ptr, watcher_wrapper.get());
wrapped_subchannel()->WatchConnectivityState(std::move(watcher_wrapper));
}
void OutlierDetectionLb::SubchannelWrapper::CancelConnectivityStateWatch(
ConnectivityStateWatcherInterface* watcher) {
if (disable_via_raw_connectivity_watch_) {
wrapped_subchannel()->CancelConnectivityStateWatch(watcher);
return;
}
auto it = watchers_.find(watcher);
if (it == watchers_.end()) return;
wrapped_subchannel()->CancelConnectivityStateWatch(it->second);
watchers_.erase(it);
}
void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher(
std::unique_ptr<DataWatcherInterface> watcher) {
auto* w = static_cast<InternalSubchannelDataWatcherInterface*>(watcher.get());
@ -702,12 +777,22 @@ OrphanablePtr<LoadBalancingPolicy> OutlierDetectionLb::CreateChildPolicyLocked(
RefCountedPtr<SubchannelInterface> OutlierDetectionLb::Helper::CreateSubchannel(
ServerAddress address, const ChannelArgs& args) {
if (parent()->shutting_down_) return nullptr;
// If the address has the DisableOutlierDetectionAttribute attribute,
// ignore it for raw connectivity state updates.
// TODO(roth): This is a hack to prevent outlier_detection from
// working with pick_first, as per discussion in
// https://github.com/grpc/grpc/issues/32967. Remove this as part of
// implementing dualstack backend support.
const bool disable_via_raw_connectivity_watch =
address.args().GetInt(GRPC_ARG_OUTLIER_DETECTION_DISABLE) == 1;
RefCountedPtr<SubchannelState> subchannel_state;
std::string key = MakeKeyForAddress(address);
if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) {
gpr_log(GPR_INFO,
"[outlier_detection_lb %p] using key %s for subchannel address %s",
parent(), key.c_str(), address.ToString().c_str());
"[outlier_detection_lb %p] using key %s for subchannel "
"address %s, disable_via_raw_connectivity_watch=%d",
parent(), key.c_str(), address.ToString().c_str(),
disable_via_raw_connectivity_watch);
}
if (!key.empty()) {
auto it = parent()->subchannel_state_map_.find(key);
@ -716,8 +801,10 @@ RefCountedPtr<SubchannelInterface> OutlierDetectionLb::Helper::CreateSubchannel(
}
}
auto subchannel = MakeRefCounted<SubchannelWrapper>(
subchannel_state, parent()->channel_control_helper()->CreateSubchannel(
std::move(address), args));
subchannel_state,
parent()->channel_control_helper()->CreateSubchannel(std::move(address),
args),
disable_via_raw_connectivity_watch);
if (subchannel_state != nullptr) {
subchannel_state->AddSubchannel(subchannel.get());
}

@ -28,6 +28,7 @@
#include "src/core/lib/json/json.h"
#include "src/core/lib/json/json_args.h"
#include "src/core/lib/json/json_object_loader.h"
#include "src/core/lib/resolver/server_address.h"
namespace grpc_core {
@ -89,6 +90,12 @@ struct OutlierDetectionConfig {
ValidationErrors* errors);
};
// TODO(roth): This is a horrible hack used to disable outlier detection
// when used with the pick_first policy. Remove this as part of
// implementing the dualstack backend design.
#define GRPC_ARG_OUTLIER_DETECTION_DISABLE \
GRPC_ARG_NO_SUBCHANNEL_PREFIX "outlier_detection_disable"
} // namespace grpc_core
#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_OUTLIER_DETECTION_OUTLIER_DETECTION_H

@ -16,8 +16,6 @@
#include <grpc/support/port_platform.h>
#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h"
#include <inttypes.h>
#include <string.h>
@ -35,21 +33,22 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include <grpc/grpc.h>
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h"
#include "src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h"
#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/gpr/string.h"
#include "src/core/lib/gprpp/crash.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/env.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/validation_errors.h"
#include "src/core/lib/iomgr/iomgr_fwd.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/json/json.h"
#include "src/core/lib/json/json_args.h"
#include "src/core/lib/json/json_object_loader.h"
@ -118,87 +117,52 @@ class PickFirst : public LoadBalancingPolicy {
private:
~PickFirst() override;
class SubchannelList : public InternallyRefCounted<SubchannelList> {
public:
class SubchannelData {
public:
SubchannelData(SubchannelList* subchannel_list,
RefCountedPtr<SubchannelInterface> subchannel);
SubchannelInterface* subchannel() const { return subchannel_.get(); }
absl::optional<grpc_connectivity_state> connectivity_state() const {
return connectivity_state_;
}
// Returns the index into the subchannel list of this object.
size_t Index() const {
return static_cast<size_t>(this -
&subchannel_list_->subchannels_.front());
}
// Resets the connection backoff.
void ResetBackoffLocked() {
if (subchannel_ != nullptr) subchannel_->ResetBackoff();
}
// Cancels any pending connectivity watch and unrefs the subchannel.
void ShutdownLocked();
private:
// Watcher for subchannel connectivity state.
class Watcher
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
Watcher(SubchannelData* subchannel_data,
RefCountedPtr<SubchannelList> subchannel_list)
: subchannel_data_(subchannel_data),
subchannel_list_(std::move(subchannel_list)) {}
~Watcher() override {
subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor");
}
class PickFirstSubchannelList;
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) override {
subchannel_data_->OnConnectivityStateChange(new_state,
std::move(status));
}
class PickFirstSubchannelData
: public SubchannelData<PickFirstSubchannelList,
PickFirstSubchannelData> {
public:
PickFirstSubchannelData(
SubchannelList<PickFirstSubchannelList, PickFirstSubchannelData>*
subchannel_list,
const ServerAddress& address,
RefCountedPtr<SubchannelInterface> subchannel)
: SubchannelData(subchannel_list, address, std::move(subchannel)) {}
void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) override;
// Processes the connectivity change to READY for an unselected subchannel.
void ProcessUnselectedReadyLocked();
};
grpc_pollset_set* interested_parties() override {
return subchannel_list_->policy()->interested_parties();
}
class PickFirstSubchannelList
: public SubchannelList<PickFirstSubchannelList,
PickFirstSubchannelData> {
public:
PickFirstSubchannelList(PickFirst* policy, ServerAddressList addresses,
const ChannelArgs& args)
: SubchannelList(policy,
(GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)
? "PickFirstSubchannelList"
: nullptr),
std::move(addresses), policy->channel_control_helper(),
args) {
// Need to maintain a ref to the LB policy as long as we maintain
// any references to subchannels, since the subchannels'
// pollset_sets will include the LB policy's pollset_set.
policy->Ref(DEBUG_LOCATION, "subchannel_list").release();
// Note that we do not start trying to connect to any subchannel here,
// since we will wait until we see the initial connectivity state for all
// subchannels before doing that.
}
private:
SubchannelData* subchannel_data_;
RefCountedPtr<SubchannelList> subchannel_list_;
};
// This method will be invoked once soon after instantiation to report
// the current connectivity state, and it will then be invoked again
// whenever the connectivity state changes.
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status);
// Processes the connectivity change to READY for an unselected
// subchannel.
void ProcessUnselectedReadyLocked();
// Backpointer to owning subchannel list. Not owned.
SubchannelList* subchannel_list_;
// The subchannel.
RefCountedPtr<SubchannelInterface> subchannel_;
// Will be non-null when the subchannel's state is being watched.
SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ =
nullptr;
// Data updated by the watcher.
absl::optional<grpc_connectivity_state> connectivity_state_;
absl::Status connectivity_status_;
};
SubchannelList(RefCountedPtr<PickFirst> policy, ServerAddressList addresses,
const ChannelArgs& args);
~SubchannelList() override;
~PickFirstSubchannelList() override {
PickFirst* p = static_cast<PickFirst*>(policy());
p->Unref(DEBUG_LOCATION, "subchannel_list");
}
bool in_transient_failure() const { return in_transient_failure_; }
void set_in_transient_failure(bool in_transient_failure) {
@ -208,63 +172,15 @@ class PickFirst : public LoadBalancingPolicy {
size_t attempting_index() const { return attempting_index_; }
void set_attempting_index(size_t index) { attempting_index_ = index; }
// The number of subchannels in the list.
size_t size() const { return subchannels_.size(); }
// Returns true if the subchannel list is shutting down.
bool shutting_down() const { return shutting_down_; }
// Accessors.
PickFirst* policy() const { return policy_.get(); }
// Resets connection backoff of all subchannels.
void ResetBackoffLocked();
// Returns true if all subchannels have seen their initial
// connectivity state notifications.
bool AllSubchannelsSeenInitialState();
void Orphan() override;
private:
// Backpointer to owning policy.
RefCountedPtr<PickFirst> policy_;
ChannelArgs args_;
// The list of subchannels.
std::vector<SubchannelData> subchannels_;
// Is this list shutting down? This may be true due to the shutdown of the
// policy itself or because a newer update has arrived while this one hadn't
// finished processing.
bool shutting_down_ = false;
std::shared_ptr<WorkSerializer> work_serializer() const override {
return static_cast<PickFirst*>(policy())->work_serializer();
}
bool in_transient_failure_ = false;
size_t attempting_index_ = 0;
};
class HealthWatcher
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
explicit HealthWatcher(RefCountedPtr<PickFirst> policy)
: policy_(std::move(policy)) {}
~HealthWatcher() override {
policy_.reset(DEBUG_LOCATION, "HealthWatcher dtor");
}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) override;
grpc_pollset_set* interested_parties() override {
return policy_->interested_parties();
}
private:
RefCountedPtr<PickFirst> policy_;
};
class Picker : public SubchannelPicker {
public:
explicit Picker(RefCountedPtr<SubchannelInterface> subchannel)
@ -282,24 +198,14 @@ class PickFirst : public LoadBalancingPolicy {
void AttemptToConnectUsingLatestUpdateArgsLocked();
void UnsetSelectedSubchannel();
// Whether we should enable health watching.
const bool enable_health_watch_;
// Whether we should omit our status message prefix.
const bool omit_status_message_prefix_;
// Lateset update args.
UpdateArgs latest_update_args_;
// All our subchannels.
OrphanablePtr<SubchannelList> subchannel_list_;
RefCountedPtr<PickFirstSubchannelList> subchannel_list_;
// Latest pending subchannel list.
OrphanablePtr<SubchannelList> latest_pending_subchannel_list_;
RefCountedPtr<PickFirstSubchannelList> latest_pending_subchannel_list_;
// Selected subchannel in \a subchannel_list_.
SubchannelList::SubchannelData* selected_ = nullptr;
// Health watcher for the selected subchannel.
SubchannelInterface::ConnectivityStateWatcherInterface* health_watcher_ =
nullptr;
SubchannelInterface::DataWatcherInterface* health_data_watcher_ = nullptr;
PickFirstSubchannelData* selected_ = nullptr;
// Are we in IDLE state?
bool idle_ = false;
// Are we shut down?
@ -308,16 +214,7 @@ class PickFirst : public LoadBalancingPolicy {
absl::BitGen bit_gen_;
};
PickFirst::PickFirst(Args args)
: LoadBalancingPolicy(std::move(args)),
enable_health_watch_(
channel_args()
.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING)
.value_or(false)),
omit_status_message_prefix_(
channel_args()
.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)
.value_or(false)) {
PickFirst::PickFirst(Args args) : LoadBalancingPolicy(std::move(args)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "Pick First %p created.", this);
}
@ -336,7 +233,6 @@ void PickFirst::ShutdownLocked() {
gpr_log(GPR_INFO, "Pick First %p Shutting down", this);
}
shutdown_ = true;
UnsetSelectedSubchannel();
subchannel_list_.reset();
latest_pending_subchannel_list_.reset();
}
@ -372,11 +268,13 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() {
"[PF %p] Shutting down previous pending subchannel list %p", this,
latest_pending_subchannel_list_.get());
}
latest_pending_subchannel_list_ = MakeOrphanable<SubchannelList>(
Ref(), std::move(addresses), latest_update_args_.args);
latest_pending_subchannel_list_ = MakeRefCounted<PickFirstSubchannelList>(
this, std::move(addresses), latest_update_args_.args);
latest_pending_subchannel_list_->StartWatchingLocked(
latest_update_args_.args);
// Empty update or no valid subchannels. Put the channel in
// TRANSIENT_FAILURE and request re-resolution.
if (latest_pending_subchannel_list_->size() == 0) {
if (latest_pending_subchannel_list_->num_subchannels() == 0) {
absl::Status status =
latest_update_args_.addresses.ok()
? absl::UnavailableError(absl::StrCat(
@ -389,8 +287,9 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() {
}
// If the new update is empty or we don't yet have a selected subchannel in
// the current list, replace the current subchannel list immediately.
if (latest_pending_subchannel_list_->size() == 0 || selected_ == nullptr) {
UnsetSelectedSubchannel();
if (latest_pending_subchannel_list_->num_subchannels() == 0 ||
selected_ == nullptr) {
selected_ = nullptr;
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) &&
subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[PF %p] Shutting down previous subchannel list %p",
@ -411,6 +310,8 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) {
this, args.addresses.status().ToString().c_str());
}
}
// Add GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
args.args = args.args.Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, 1);
// Set return status based on the address list.
absl::Status status;
if (!args.addresses.ok()) {
@ -423,6 +324,19 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) {
absl::c_shuffle(*args.addresses, bit_gen_);
}
}
// TODO(roth): This is a hack to disable outlier_detection when used
// with pick_first, for the reasons described in
// https://github.com/grpc/grpc/issues/32967. Remove this when
// implementing the dualstack design.
if (args.addresses.ok()) {
ServerAddressList addresses;
for (const auto& address : *args.addresses) {
addresses.emplace_back(
address.address(),
address.args().Set(GRPC_ARG_OUTLIER_DETECTION_DISABLE, 1));
}
args.addresses = std::move(addresses);
}
// If the update contains a resolver error and we have a previous update
// that was not a resolver error, keep using the previous addresses.
if (!args.addresses.ok() && latest_update_args_.config != nullptr) {
@ -438,122 +352,18 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) {
return status;
}
void PickFirst::UnsetSelectedSubchannel() {
if (selected_ != nullptr && health_data_watcher_ != nullptr) {
selected_->subchannel()->CancelDataWatcher(health_data_watcher_);
}
selected_ = nullptr;
health_watcher_ = nullptr;
health_data_watcher_ = nullptr;
}
//
// PickFirst::HealthWatcher
//
void PickFirst::HealthWatcher::OnConnectivityStateChange(
grpc_connectivity_state new_state, absl::Status status) {
if (policy_->health_watcher_ != this) return;
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] health watch state update: %s (%s)",
policy_.get(), ConnectivityStateName(new_state),
status.ToString().c_str());
}
switch (new_state) {
case GRPC_CHANNEL_READY:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::OkStatus(),
MakeRefCounted<Picker>(policy_->selected_->subchannel()->Ref()));
break;
case GRPC_CHANNEL_IDLE:
// If the subchannel becomes disconnected, the health watcher
// might happen to see the change before the raw connectivity
// state watcher does. In this case, ignore it, since the raw
// connectivity state watcher will handle it shortly.
break;
case GRPC_CHANNEL_CONNECTING:
policy_->channel_control_helper()->UpdateState(
new_state, absl::OkStatus(),
MakeRefCounted<QueuePicker>(policy_->Ref()));
break;
case GRPC_CHANNEL_TRANSIENT_FAILURE:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
break;
case GRPC_CHANNEL_SHUTDOWN:
Crash("health watcher reported state SHUTDOWN");
}
}
//
// PickFirst::SubchannelList::SubchannelData
//
PickFirst::SubchannelList::SubchannelData::SubchannelData(
SubchannelList* subchannel_list,
RefCountedPtr<SubchannelInterface> subchannel)
: subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): starting watch",
subchannel_list_->policy(), subchannel_list_, Index(),
subchannel_list_->size(), subchannel_.get());
}
auto watcher = std::make_unique<Watcher>(
this, subchannel_list_->Ref(DEBUG_LOCATION, "Watcher"));
pending_watcher_ = watcher.get();
subchannel_->WatchConnectivityState(std::move(watcher));
}
void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() {
if (subchannel_ != nullptr) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): cancelling watch and unreffing subchannel",
subchannel_list_->policy(), subchannel_list_, Index(),
subchannel_list_->size(), subchannel_.get());
}
subchannel_->CancelConnectivityStateWatch(pending_watcher_);
pending_watcher_ = nullptr;
subchannel_.reset();
}
}
void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
grpc_connectivity_state new_state, absl::Status status) {
PickFirst* p = subchannel_list_->policy();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(
GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): connectivity changed: old_state=%s, new_state=%s, "
"status=%s, shutting_down=%d, pending_watcher=%p, "
"p->selected_=%p, p->subchannel_list_=%p, "
"p->latest_pending_subchannel_list_=%p",
p, subchannel_list_, Index(), subchannel_list_->size(),
subchannel_.get(),
(connectivity_state_.has_value()
? ConnectivityStateName(*connectivity_state_)
: "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str(),
subchannel_list_->shutting_down(), pending_watcher_, p->selected_,
p->subchannel_list_.get(), p->latest_pending_subchannel_list_.get());
}
if (subchannel_list_->shutting_down() || pending_watcher_ == nullptr) return;
void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
// The notification must be for a subchannel in either the current or
// latest pending subchannel lists.
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() ||
subchannel_list_ == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
subchannel_list() == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN);
absl::optional<grpc_connectivity_state> old_state = connectivity_state_;
connectivity_state_ = new_state;
connectivity_status_ = status;
// Handle updates for the currently selected subchannel.
if (p->selected_ == this) {
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get());
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get());
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p selected subchannel connectivity changed to %s", p,
@ -570,15 +380,17 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->UnsetSelectedSubchannel();
p->selected_ = nullptr;
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
// Set our state to that of the pending subchannel list.
if (p->subchannel_list_->in_transient_failure()) {
absl::Status status = absl::UnavailableError(absl::StrCat(
"selected subchannel failed; switching to pending update; "
"last failure: ",
p->subchannel_list_->subchannels_.back()
.connectivity_status_.ToString()));
p->subchannel_list_
->subchannel(p->subchannel_list_->num_subchannels())
->connectivity_status()
.ToString()));
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
@ -598,7 +410,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
// and we could switch to that rather than going IDLE.
// Enter idle.
p->idle_ = true;
p->UnsetSelectedSubchannel();
p->selected_ = nullptr;
p->subchannel_list_.reset();
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_IDLE, absl::Status(),
@ -616,7 +428,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
// select in place of the current one.
// If the subchannel is READY, use it.
if (new_state == GRPC_CHANNEL_READY) {
subchannel_list_->set_in_transient_failure(false);
subchannel_list()->set_in_transient_failure(false);
ProcessUnselectedReadyLocked();
return;
}
@ -626,36 +438,36 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
// Otherwise, do nothing, since we'll continue to wait until all of
// the subchannels report their state.
if (!old_state.has_value()) {
if (subchannel_list_->AllSubchannelsSeenInitialState()) {
subchannel_list_->subchannels_.front().subchannel_->RequestConnection();
if (subchannel_list()->AllSubchannelsSeenInitialState()) {
subchannel_list()->subchannel(0)->subchannel()->RequestConnection();
}
return;
}
// Ignore any other updates for subchannels we're not currently trying to
// connect to.
if (Index() != subchannel_list_->attempting_index()) return;
if (Index() != subchannel_list()->attempting_index()) return;
// Otherwise, process connectivity state.
switch (new_state) {
case GRPC_CHANNEL_READY:
// Already handled this case above, so this should not happen.
GPR_UNREACHABLE_CODE(break);
case GRPC_CHANNEL_TRANSIENT_FAILURE: {
size_t next_index = (Index() + 1) % subchannel_list_->size();
subchannel_list_->set_attempting_index(next_index);
SubchannelData& sd = subchannel_list_->subchannels_[next_index];
size_t next_index = (Index() + 1) % subchannel_list()->num_subchannels();
subchannel_list()->set_attempting_index(next_index);
PickFirstSubchannelData* sd = subchannel_list()->subchannel(next_index);
// If we're tried all subchannels, set state to TRANSIENT_FAILURE.
if (sd.Index() == 0) {
if (sd->Index() == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p subchannel list %p failed to connect to "
"all subchannels",
p, subchannel_list_);
p, subchannel_list());
}
subchannel_list_->set_in_transient_failure(true);
subchannel_list()->set_in_transient_failure(true);
// In case 2, swap to the new subchannel list. This means reporting
// TRANSIENT_FAILURE and dropping the existing (working) connection,
// but we can't ignore what the control plane has told us.
if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) {
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
@ -663,19 +475,17 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->UnsetSelectedSubchannel();
p->selected_ = nullptr; // owned by p->subchannel_list_
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// If this is the current subchannel list (either because we were
// in case 1 or because we were in case 2 and just promoted it to
// be the current list), re-resolve and report new state.
if (subchannel_list_ == p->subchannel_list_.get()) {
if (subchannel_list() == p->subchannel_list_.get()) {
p->channel_control_helper()->RequestReresolution();
absl::Status status = absl::UnavailableError(absl::StrCat(
(p->omit_status_message_prefix_
? ""
: "failed to connect to all addresses; last error: "),
connectivity_status_.ToString()));
absl::Status status = absl::UnavailableError(
absl::StrCat("failed to connect to all addresses; last error: ",
connectivity_status().ToString()));
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
@ -687,21 +497,21 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
// If it's already in CONNECTING, we don't need to do this.
// If it's in TRANSIENT_FAILURE, then we will trigger the
// connection attempt later when it reports IDLE.
auto sd_state = sd.connectivity_state();
auto sd_state = sd->connectivity_state();
if (sd_state.has_value() && *sd_state == GRPC_CHANNEL_IDLE) {
sd.subchannel_->RequestConnection();
sd->subchannel()->RequestConnection();
}
break;
}
case GRPC_CHANNEL_IDLE: {
subchannel_->RequestConnection();
subchannel()->RequestConnection();
break;
}
case GRPC_CHANNEL_CONNECTING: {
// Only update connectivity state in case 1, and only if we're not
// already in TRANSIENT_FAILURE.
if (subchannel_list_ == p->subchannel_list_.get() &&
!subchannel_list_->in_transient_failure()) {
if (subchannel_list() == p->subchannel_list_.get() &&
!subchannel_list()->in_transient_failure()) {
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_CONNECTING, absl::Status(),
MakeRefCounted<QueuePicker>(p->Ref(DEBUG_LOCATION, "QueuePicker")));
@ -713,8 +523,8 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
}
}
void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list_->policy());
void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
// If we get here, there are two possible cases:
// 1. We do not currently have a selected subchannel, and the update is
// for a subchannel in p->subchannel_list_ that we're trying to
@ -724,10 +534,10 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() {
// for a subchannel in p->latest_pending_subchannel_list_. The
// goal here is to find a subchannel from the update that we can
// select in place of the current one.
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() ||
subchannel_list_ == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
subchannel_list() == p->latest_pending_subchannel_list_.get());
// Case 2. Promote p->latest_pending_subchannel_list_ to p->subchannel_list_.
if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) {
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
@ -739,114 +549,17 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() {
}
// Cases 1 and 2.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p,
subchannel_.get());
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel());
}
p->selected_ = this;
// If health checking is enabled, start the health watch, but don't
// report a new picker -- we want to stay in CONNECTING while we wait
// for the health status notification.
// If health checking is NOT enabled, report READY.
if (p->enable_health_watch_) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] starting health watch", p);
}
auto watcher = std::make_unique<HealthWatcher>(
p->Ref(DEBUG_LOCATION, "HealthWatcher"));
p->health_watcher_ = watcher.get();
auto health_data_watcher = MakeHealthCheckWatcher(
p->work_serializer(), subchannel_list_->args_, std::move(watcher));
p->health_data_watcher_ = health_data_watcher.get();
subchannel_->AddDataWatcher(std::move(health_data_watcher));
} else {
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(subchannel_->Ref()));
}
// Unref all other subchannels in the list.
for (size_t i = 0; i < subchannel_list_->size(); ++i) {
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(subchannel()->Ref()));
for (size_t i = 0; i < subchannel_list()->num_subchannels(); ++i) {
if (i != Index()) {
subchannel_list_->subchannels_[i].ShutdownLocked();
}
}
}
//
// PickFirst::SubchannelList
//
PickFirst::SubchannelList::SubchannelList(RefCountedPtr<PickFirst> policy,
ServerAddressList addresses,
const ChannelArgs& args)
: InternallyRefCounted<SubchannelList>(
GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) ? "SubchannelList"
: nullptr),
policy_(std::move(policy)),
args_(args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING)
.Remove(
GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] Creating subchannel list %p for %" PRIuPTR
" subchannels - channel args: %s",
policy_.get(), this, addresses.size(), args_.ToString().c_str());
}
subchannels_.reserve(addresses.size());
// Create a subchannel for each address.
for (const ServerAddress& address : addresses) {
RefCountedPtr<SubchannelInterface> subchannel =
policy_->channel_control_helper()->CreateSubchannel(address, args_);
if (subchannel == nullptr) {
// Subchannel could not be created.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] could not create subchannel for address %s, ignoring",
policy_.get(), address.ToString().c_str());
}
continue;
subchannel_list()->subchannel(i)->ShutdownLocked();
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR
": Created subchannel %p for address %s",
policy_.get(), this, subchannels_.size(), subchannel.get(),
address.ToString().c_str());
}
subchannels_.emplace_back(this, std::move(subchannel));
}
}
PickFirst::SubchannelList::~SubchannelList() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] Destroying subchannel_list %p", policy_.get(),
this);
}
}
void PickFirst::SubchannelList::Orphan() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] Shutting down subchannel_list %p", policy_.get(),
this);
}
GPR_ASSERT(!shutting_down_);
shutting_down_ = true;
for (auto& sd : subchannels_) {
sd.ShutdownLocked();
}
Unref();
}
void PickFirst::SubchannelList::ResetBackoffLocked() {
for (auto& sd : subchannels_) {
sd.ResetBackoffLocked();
}
}
bool PickFirst::SubchannelList::AllSubchannelsSeenInitialState() {
for (auto& sd : subchannels_) {
if (!sd.connectivity_state().has_value()) return false;
}
return true;
}
//

@ -17,22 +17,6 @@
#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H
#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H
#include <grpc/support/port_platform.h>
#include "src/core/lib/resolver/server_address.h"
// Internal channel arg to enable health checking in pick_first.
// Intended to be used by petiole policies (e.g., round_robin) that
// delegate to pick_first.
#define GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING \
GRPC_ARG_NO_SUBCHANNEL_PREFIX "pick_first_enable_health_checking"
// Internal channel arg to tell pick_first to omit the prefix it normally
// adds to error status messages. Intended to be used by petiole policies
// (e.g., round_robin) that want to add their own prefixes.
#define GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX \
GRPC_ARG_NO_SUBCHANNEL_PREFIX "pick_first_omit_status_message_prefix"
namespace grpc_core {
// TODO(eostroukhov): Remove once this feature is no longer experimental.
bool ShufflePickFirstEnabled();

@ -37,7 +37,7 @@
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h"
#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
@ -48,6 +48,7 @@
#include "src/core/lib/json/json.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/lb_policy_factory.h"
#include "src/core/lib/load_balancing/subchannel_interface.h"
#include "src/core/lib/resolver/server_address.h"
#include "src/core/lib/transport/connectivity_state.h"
@ -73,60 +74,93 @@ class RoundRobin : public LoadBalancingPolicy {
void ResetBackoffLocked() override;
private:
class RoundRobinEndpointList : public EndpointList {
~RoundRobin() override;
// Forward declaration.
class RoundRobinSubchannelList;
// Data for a particular subchannel in a subchannel list.
// This subclass adds the following functionality:
// - Tracks the previous connectivity state of the subchannel, so that
// we know how many subchannels are in each state.
class RoundRobinSubchannelData
: public SubchannelData<RoundRobinSubchannelList,
RoundRobinSubchannelData> {
public:
RoundRobinEndpointList(RefCountedPtr<RoundRobin> round_robin,
const ServerAddressList& addresses,
const ChannelArgs& args)
: EndpointList(std::move(round_robin),
GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)
? "RoundRobinEndpointList"
: nullptr) {
Init(addresses, args,
[&](RefCountedPtr<RoundRobinEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args) {
return MakeOrphanable<RoundRobinEndpoint>(
std::move(endpoint_list), address, args,
policy<RoundRobin>()->work_serializer());
});
RoundRobinSubchannelData(
SubchannelList<RoundRobinSubchannelList, RoundRobinSubchannelData>*
subchannel_list,
const ServerAddress& address,
RefCountedPtr<SubchannelInterface> subchannel)
: SubchannelData(subchannel_list, address, std::move(subchannel)) {}
absl::optional<grpc_connectivity_state> connectivity_state() const {
return logical_connectivity_state_;
}
private:
class RoundRobinEndpoint : public Endpoint {
public:
RoundRobinEndpoint(RefCountedPtr<RoundRobinEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer)
: Endpoint(std::move(endpoint_list)) {
Init(address, args, std::move(work_serializer));
}
private:
// Called when the child policy reports a connectivity state update.
void OnStateUpdate(absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state,
const absl::Status& status) override;
};
LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const override {
return policy<RoundRobin>()->channel_control_helper();
// Performs connectivity state updates that need to be done only
// after we have started watching.
void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) override;
// Updates the logical connectivity state.
void UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state);
// The logical connectivity state of the subchannel.
// Note that the logical connectivity state may differ from the
// actual reported state in some cases (e.g., after we see
// TRANSIENT_FAILURE, we ignore any subsequent state changes until
// we see READY).
absl::optional<grpc_connectivity_state> logical_connectivity_state_;
};
// A list of subchannels.
class RoundRobinSubchannelList
: public SubchannelList<RoundRobinSubchannelList,
RoundRobinSubchannelData> {
public:
RoundRobinSubchannelList(RoundRobin* policy, ServerAddressList addresses,
const ChannelArgs& args)
: SubchannelList(policy,
(GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)
? "RoundRobinSubchannelList"
: nullptr),
std::move(addresses), policy->channel_control_helper(),
args) {
// Need to maintain a ref to the LB policy as long as we maintain
// any references to subchannels, since the subchannels'
// pollset_sets will include the LB policy's pollset_set.
policy->Ref(DEBUG_LOCATION, "subchannel_list").release();
}
// Updates the counters of children in each state when a
// child transitions from old_state to new_state.
~RoundRobinSubchannelList() override {
RoundRobin* p = static_cast<RoundRobin*>(policy());
p->Unref(DEBUG_LOCATION, "subchannel_list");
}
// Updates the counters of subchannels in each state when a
// subchannel transitions from old_state to new_state.
void UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state);
// Ensures that the right child list is used and then updates
// the RR policy's connectivity state based on the child list's
// Ensures that the right subchannel list is used and then updates
// the RR policy's connectivity state based on the subchannel list's
// state counters.
void MaybeUpdateRoundRobinConnectivityStateLocked(
absl::Status status_for_tf);
private:
std::shared_ptr<WorkSerializer> work_serializer() const override {
return static_cast<RoundRobin*>(policy())->work_serializer();
}
std::string CountersString() const {
return absl::StrCat("num_children=", size(), " num_ready=", num_ready_,
return absl::StrCat("num_subchannels=", num_subchannels(),
" num_ready=", num_ready_,
" num_connecting=", num_connecting_,
" num_transient_failure=", num_transient_failure_);
}
@ -140,9 +174,7 @@ class RoundRobin : public LoadBalancingPolicy {
class Picker : public SubchannelPicker {
public:
Picker(RoundRobin* parent,
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>>
pickers);
Picker(RoundRobin* parent, RoundRobinSubchannelList* subchannel_list);
PickResult Pick(PickArgs args) override;
@ -151,20 +183,18 @@ class RoundRobin : public LoadBalancingPolicy {
RoundRobin* parent_;
std::atomic<size_t> last_picked_index_;
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers_;
std::vector<RefCountedPtr<SubchannelInterface>> subchannels_;
};
~RoundRobin() override;
void ShutdownLocked() override;
// Current child list.
OrphanablePtr<RoundRobinEndpointList> endpoint_list_;
// Latest pending child list.
// When we get an updated address list, we create a new child list
// for it here, and we wait to swap it into endpoint_list_ until the new
// List of subchannels.
RefCountedPtr<RoundRobinSubchannelList> subchannel_list_;
// Latest pending subchannel list.
// When we get an updated address list, we create a new subchannel list
// for it here, and we wait to swap it into subchannel_list_ until the new
// list becomes READY.
OrphanablePtr<RoundRobinEndpointList> latest_pending_endpoint_list_;
RefCountedPtr<RoundRobinSubchannelList> latest_pending_subchannel_list_;
bool shutdown_ = false;
@ -175,32 +205,38 @@ class RoundRobin : public LoadBalancingPolicy {
// RoundRobin::Picker
//
RoundRobin::Picker::Picker(
RoundRobin* parent,
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers)
: parent_(parent), pickers_(std::move(pickers)) {
RoundRobin::Picker::Picker(RoundRobin* parent,
RoundRobinSubchannelList* subchannel_list)
: parent_(parent) {
for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) {
RoundRobinSubchannelData* sd = subchannel_list->subchannel(i);
if (sd->connectivity_state().value_or(GRPC_CHANNEL_IDLE) ==
GRPC_CHANNEL_READY) {
subchannels_.push_back(sd->subchannel()->Ref());
}
}
// For discussion on why we generate a random starting index for
// the picker, see https://github.com/grpc/grpc-go/issues/2580.
size_t index = absl::Uniform<size_t>(parent->bit_gen_, 0, pickers_.size());
size_t index =
absl::Uniform<size_t>(parent->bit_gen_, 0, subchannels_.size());
last_picked_index_.store(index, std::memory_order_relaxed);
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p picker %p] created picker from endpoint_list=%p "
"with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR,
parent_, this, parent_->endpoint_list_.get(), pickers_.size(),
index);
"[RR %p picker %p] created picker from subchannel_list=%p "
"with %" PRIuPTR " READY subchannels; last_picked_index_=%" PRIuPTR,
parent_, this, subchannel_list, subchannels_.size(), index);
}
}
RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs args) {
RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs /*args*/) {
size_t index = last_picked_index_.fetch_add(1, std::memory_order_relaxed) %
pickers_.size();
subchannels_.size();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p picker %p] using picker index %" PRIuPTR ", picker=%p",
parent_, this, index, pickers_[index].get());
"[RR %p picker %p] returning index %" PRIuPTR ", subchannel=%p",
parent_, this, index, subchannels_[index].get());
}
return pickers_[index]->Pick(args);
return PickResult::Complete(subchannels_[index]);
}
//
@ -217,8 +253,8 @@ RoundRobin::~RoundRobin() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this);
}
GPR_ASSERT(endpoint_list_ == nullptr);
GPR_ASSERT(latest_pending_endpoint_list_ == nullptr);
GPR_ASSERT(subchannel_list_ == nullptr);
GPR_ASSERT(latest_pending_subchannel_list_ == nullptr);
}
void RoundRobin::ShutdownLocked() {
@ -226,14 +262,14 @@ void RoundRobin::ShutdownLocked() {
gpr_log(GPR_INFO, "[RR %p] Shutting down", this);
}
shutdown_ = true;
endpoint_list_.reset();
latest_pending_endpoint_list_.reset();
subchannel_list_.reset();
latest_pending_subchannel_list_.reset();
}
void RoundRobin::ResetBackoffLocked() {
endpoint_list_->ResetBackoffLocked();
if (latest_pending_endpoint_list_ != nullptr) {
latest_pending_endpoint_list_->ResetBackoffLocked();
subchannel_list_->ResetBackoffLocked();
if (latest_pending_subchannel_list_ != nullptr) {
latest_pending_subchannel_list_->ResetBackoffLocked();
}
}
@ -250,31 +286,28 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) {
gpr_log(GPR_INFO, "[RR %p] received update with address error: %s", this,
args.addresses.status().ToString().c_str());
}
// If we already have a child list, then keep using the existing
// If we already have a subchannel list, then keep using the existing
// list, but still report back that the update was not accepted.
if (endpoint_list_ != nullptr) return args.addresses.status();
if (subchannel_list_ != nullptr) return args.addresses.status();
}
// Create new child list, replacing the previous pending list, if any.
// Create new subchannel list, replacing the previous pending list, if any.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) &&
latest_pending_endpoint_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", this,
latest_pending_endpoint_list_.get());
latest_pending_subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous pending subchannel list %p",
this, latest_pending_subchannel_list_.get());
}
latest_pending_endpoint_list_ = MakeOrphanable<RoundRobinEndpointList>(
Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses),
args.args);
latest_pending_subchannel_list_ = MakeRefCounted<RoundRobinSubchannelList>(
this, std::move(addresses), args.args);
latest_pending_subchannel_list_->StartWatchingLocked(args.args);
// If the new list is empty, immediately promote it to
// endpoint_list_ and report TRANSIENT_FAILURE.
// TODO(roth): As part of adding dualstack backend support, we need to
// also handle the case where the list of addresses for a given
// endpoint is empty.
if (latest_pending_endpoint_list_->size() == 0) {
// subchannel_list_ and report TRANSIENT_FAILURE.
if (latest_pending_subchannel_list_->num_subchannels() == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) &&
endpoint_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous child list %p", this,
endpoint_list_.get());
subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous subchannel list %p", this,
subchannel_list_.get());
}
endpoint_list_ = std::move(latest_pending_endpoint_list_);
subchannel_list_ = std::move(latest_pending_subchannel_list_);
absl::Status status =
args.addresses.ok() ? absl::UnavailableError(absl::StrCat(
"empty address list: ", args.resolution_note))
@ -285,64 +318,26 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) {
return status;
}
// Otherwise, if this is the initial update, immediately promote it to
// endpoint_list_.
if (endpoint_list_ == nullptr) {
endpoint_list_ = std::move(latest_pending_endpoint_list_);
// subchannel_list_.
if (subchannel_list_.get() == nullptr) {
subchannel_list_ = std::move(latest_pending_subchannel_list_);
}
return absl::OkStatus();
}
//
// RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint
//
void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state, const absl::Status& status) {
auto* rr_endpoint_list = endpoint_list<RoundRobinEndpointList>();
auto* round_robin = policy<RoundRobin>();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] connectivity changed for child %p, endpoint_list %p "
"(index %" PRIuPTR " of %" PRIuPTR
"): prev_state=%s new_state=%s "
"(%s)",
round_robin, this, rr_endpoint_list, Index(),
rr_endpoint_list->size(),
(old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str());
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection",
round_robin, this);
}
ExitIdleLocked();
}
// If state changed, update state counters.
if (!old_state.has_value() || *old_state != new_state) {
rr_endpoint_list->UpdateStateCountersLocked(old_state, new_state);
}
// Update the policy state.
rr_endpoint_list->MaybeUpdateRoundRobinConnectivityStateLocked(status);
}
//
// RoundRobin::RoundRobinEndpointList
// RoundRobinSubchannelList
//
void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked(
void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
// We treat IDLE the same as CONNECTING, since it will immediately
// transition into that state anyway.
if (old_state.has_value()) {
GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN);
if (*old_state == GRPC_CHANNEL_READY) {
GPR_ASSERT(num_ready_ > 0);
--num_ready_;
} else if (*old_state == GRPC_CHANNEL_CONNECTING ||
*old_state == GRPC_CHANNEL_IDLE) {
} else if (*old_state == GRPC_CHANNEL_CONNECTING) {
GPR_ASSERT(num_connecting_ > 0);
--num_connecting_;
} else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
@ -353,90 +348,161 @@ void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked(
GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN);
if (new_state == GRPC_CHANNEL_READY) {
++num_ready_;
} else if (new_state == GRPC_CHANNEL_CONNECTING ||
new_state == GRPC_CHANNEL_IDLE) {
} else if (new_state == GRPC_CHANNEL_CONNECTING) {
++num_connecting_;
} else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
++num_transient_failure_;
}
}
void RoundRobin::RoundRobinEndpointList::
void RoundRobin::RoundRobinSubchannelList::
MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) {
auto* round_robin = policy<RoundRobin>();
// If this is latest_pending_endpoint_list_, then swap it into
// endpoint_list_ in the following cases:
// - endpoint_list_ has no READY children.
// - This list has at least one READY child and we have seen the
// initial connectivity state notification for all children.
// - All of the children in this list are in TRANSIENT_FAILURE.
RoundRobin* p = static_cast<RoundRobin*>(policy());
// If this is latest_pending_subchannel_list_, then swap it into
// subchannel_list_ in the following cases:
// - subchannel_list_ has no READY subchannels.
// - This list has at least one READY subchannel and we have seen the
// initial connectivity state notification for all subchannels.
// - All of the subchannels in this list are in TRANSIENT_FAILURE.
// (This may cause the channel to go from READY to TRANSIENT_FAILURE,
// but we're doing what the control plane told us to do.)
if (round_robin->latest_pending_endpoint_list_.get() == this &&
(round_robin->endpoint_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllEndpointsSeenInitialState()) ||
num_transient_failure_ == size())) {
if (p->latest_pending_subchannel_list_.get() == this &&
(p->subchannel_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllSubchannelsSeenInitialState()) ||
num_transient_failure_ == num_subchannels())) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
const std::string old_counters_string =
round_robin->endpoint_list_ != nullptr
? round_robin->endpoint_list_->CountersString()
: "";
gpr_log(GPR_INFO,
"[RR %p] swapping out child list %p (%s) in favor of %p (%s)",
round_robin, round_robin->endpoint_list_.get(),
old_counters_string.c_str(), this, CountersString().c_str());
p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString()
: "";
gpr_log(
GPR_INFO,
"[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", p,
p->subchannel_list_.get(), old_counters_string.c_str(), this,
CountersString().c_str());
}
round_robin->endpoint_list_ =
std::move(round_robin->latest_pending_endpoint_list_);
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// Only set connectivity state if this is the current child list.
if (round_robin->endpoint_list_.get() != this) return;
// FIXME: scan children each time instead of keeping counters?
// Only set connectivity state if this is the current subchannel list.
if (p->subchannel_list_.get() != this) return;
// First matching rule wins:
// 1) ANY child is READY => policy is READY.
// 2) ANY child is CONNECTING => policy is CONNECTING.
// 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
// 1) ANY subchannel is READY => policy is READY.
// 2) ANY subchannel is CONNECTING => policy is CONNECTING.
// 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
if (num_ready_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] reporting READY with child list %p",
round_robin, this);
gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", p,
this);
}
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers;
for (const auto& endpoint : endpoints()) {
auto state = endpoint->connectivity_state();
if (state.has_value() && *state == GRPC_CHANNEL_READY) {
pickers.push_back(endpoint->picker());
}
}
GPR_ASSERT(!pickers.empty());
round_robin->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::OkStatus(),
MakeRefCounted<Picker>(round_robin, std::move(pickers)));
p->channel_control_helper()->UpdateState(GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(p, this));
} else if (num_connecting_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with child list %p",
round_robin, this);
gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p",
p, this);
}
round_robin->channel_control_helper()->UpdateState(
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_CONNECTING, absl::Status(),
MakeRefCounted<QueuePicker>(nullptr));
} else if (num_transient_failure_ == size()) {
MakeRefCounted<QueuePicker>(p->Ref(DEBUG_LOCATION, "QueuePicker")));
} else if (num_transient_failure_ == num_subchannels()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s",
round_robin, this, status_for_tf.ToString().c_str());
"[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s",
p, this, status_for_tf.ToString().c_str());
}
if (!status_for_tf.ok()) {
last_failure_ = absl::UnavailableError(
absl::StrCat("connections to all backends failing; last error: ",
status_for_tf.message()));
status_for_tf.ToString()));
}
round_robin->channel_control_helper()->UpdateState(
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_,
MakeRefCounted<TransientFailurePicker>(last_failure_));
}
}
//
// RoundRobinSubchannelData
//
void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
RoundRobin* p = static_cast<RoundRobin*>(subchannel_list()->policy());
GPR_ASSERT(subchannel() != nullptr);
// If this is not the initial state notification and the new state is
// TRANSIENT_FAILURE or IDLE, re-resolve.
// Note that we don't want to do this on the initial state notification,
// because that would result in an endless loop of re-resolution.
if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE ||
new_state == GRPC_CHANNEL_IDLE)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] Subchannel %p reported %s; requesting re-resolution", p,
subchannel(), ConnectivityStateName(new_state));
}
p->channel_control_helper()->RequestReresolution();
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] Subchannel %p reported IDLE; requesting connection", p,
subchannel());
}
subchannel()->RequestConnection();
}
// Update logical connectivity state.
UpdateLogicalConnectivityStateLocked(new_state);
// Update the policy state.
subchannel_list()->MaybeUpdateRoundRobinConnectivityStateLocked(
connectivity_status());
}
void RoundRobin::RoundRobinSubchannelData::UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state) {
RoundRobin* p = static_cast<RoundRobin*>(subchannel_list()->policy());
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(
GPR_INFO,
"[RR %p] connectivity changed for subchannel %p, subchannel_list %p "
"(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels(),
(logical_connectivity_state_.has_value()
? ConnectivityStateName(*logical_connectivity_state_)
: "N/A"),
ConnectivityStateName(connectivity_state));
}
// Decide what state to report for aggregation purposes.
// If the last logical state was TRANSIENT_FAILURE, then ignore the
// state change unless the new state is READY.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE &&
connectivity_state != GRPC_CHANNEL_READY) {
return;
}
// If the new state is IDLE, treat it as CONNECTING, since it will
// immediately transition into CONNECTING anyway.
if (connectivity_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR
" of %" PRIuPTR "): treating IDLE as CONNECTING",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels());
}
connectivity_state = GRPC_CHANNEL_CONNECTING;
}
// If no change, return false.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == connectivity_state) {
return;
}
// Otherwise, update counters and logical state.
subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_,
connectivity_state);
logical_connectivity_state_ = connectivity_state;
}
//
// factory
//

@ -0,0 +1,488 @@
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H
#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H
#include <grpc/support/port_platform.h>
#include <inttypes.h>
#include <string.h>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/status/status.h"
#include "absl/types/optional.h"
#include <grpc/grpc.h>
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/client_channel_internal.h"
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/dual_ref_counted.h"
#include "src/core/lib/gprpp/manual_constructor.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/iomgr/iomgr_fwd.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/subchannel_interface.h"
#include "src/core/lib/resolver/server_address.h"
#include "src/core/lib/transport/connectivity_state.h"
// Code for maintaining a list of subchannels within an LB policy.
//
// To use this, callers must create their own subclasses, like so:
//
// class MySubchannelList; // Forward declaration.
// class MySubchannelData
// : public SubchannelData<MySubchannelList, MySubchannelData> {
// public:
// void ProcessConnectivityChangeLocked(
// absl::optional<grpc_connectivity_state> old_state,
// grpc_connectivity_state new_state) override {
// // ...code to handle connectivity changes...
// }
// };
// class MySubchannelList
// : public SubchannelList<MySubchannelList, MySubchannelData> {
// };
//
// All methods will be called from within the client_channel work serializer.
namespace grpc_core {
// Forward declaration.
template <typename SubchannelListType, typename SubchannelDataType>
class SubchannelList;
// Stores data for a particular subchannel in a subchannel list.
// Callers must create a subclass that implements the
// ProcessConnectivityChangeLocked() method.
template <typename SubchannelListType, typename SubchannelDataType>
class SubchannelData {
public:
// Returns a pointer to the subchannel list containing this object.
SubchannelListType* subchannel_list() const {
return static_cast<SubchannelListType*>(subchannel_list_);
}
// Returns the index into the subchannel list of this object.
size_t Index() const {
return static_cast<size_t>(static_cast<const SubchannelDataType*>(this) -
subchannel_list_->subchannel(0));
}
// Returns a pointer to the subchannel.
SubchannelInterface* subchannel() const { return subchannel_.get(); }
// Returns the cached connectivity state, if any.
absl::optional<grpc_connectivity_state> connectivity_state() {
return connectivity_state_;
}
absl::Status connectivity_status() { return connectivity_status_; }
// Resets the connection backoff.
void ResetBackoffLocked();
// Cancels any pending connectivity watch and unrefs the subchannel.
void ShutdownLocked();
protected:
SubchannelData(
SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list,
const ServerAddress& address,
RefCountedPtr<SubchannelInterface> subchannel);
virtual ~SubchannelData();
// This method will be invoked once soon after instantiation to report
// the current connectivity state, and it will then be invoked again
// whenever the connectivity state changes.
virtual void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) = 0;
private:
// For accessing StartConnectivityWatchLocked().
friend class SubchannelList<SubchannelListType, SubchannelDataType>;
// Watcher for subchannel connectivity state.
class Watcher
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
Watcher(
SubchannelData<SubchannelListType, SubchannelDataType>* subchannel_data,
WeakRefCountedPtr<SubchannelListType> subchannel_list)
: subchannel_data_(subchannel_data),
subchannel_list_(std::move(subchannel_list)) {}
~Watcher() override {
subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor");
}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) override;
grpc_pollset_set* interested_parties() override {
return subchannel_list_->policy()->interested_parties();
}
private:
SubchannelData<SubchannelListType, SubchannelDataType>* subchannel_data_;
WeakRefCountedPtr<SubchannelListType> subchannel_list_;
};
// Starts watching the connectivity state of the subchannel.
// ProcessConnectivityChangeLocked() will be called whenever the
// connectivity state changes.
void StartConnectivityWatchLocked(const ChannelArgs& args);
// Cancels watching the connectivity state of the subchannel.
void CancelConnectivityWatchLocked(const char* reason);
// Unrefs the subchannel.
void UnrefSubchannelLocked(const char* reason);
// Backpointer to owning subchannel list. Not owned.
SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list_;
// The subchannel.
RefCountedPtr<SubchannelInterface> subchannel_;
// Will be non-null when the subchannel's state is being watched.
SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ =
nullptr;
SubchannelInterface::DataWatcherInterface* health_watcher_ = nullptr;
// Data updated by the watcher.
absl::optional<grpc_connectivity_state> connectivity_state_;
absl::Status connectivity_status_;
};
// A list of subchannels.
template <typename SubchannelListType, typename SubchannelDataType>
class SubchannelList : public DualRefCounted<SubchannelListType> {
public:
// Starts watching the connectivity state of all subchannels.
// Must be called immediately after instantiation.
void StartWatchingLocked(const ChannelArgs& args);
// The number of subchannels in the list.
size_t num_subchannels() const { return subchannels_.size(); }
// The data for the subchannel at a particular index.
SubchannelDataType* subchannel(size_t index) {
return subchannels_[index].get();
}
// Returns true if the subchannel list is shutting down.
bool shutting_down() const { return shutting_down_; }
// Accessors.
LoadBalancingPolicy* policy() const { return policy_; }
const char* tracer() const { return tracer_; }
// Resets connection backoff of all subchannels.
void ResetBackoffLocked();
// Returns true if all subchannels have seen their initial
// connectivity state notifications.
bool AllSubchannelsSeenInitialState();
void Orphan() override;
protected:
SubchannelList(LoadBalancingPolicy* policy, const char* tracer,
ServerAddressList addresses,
LoadBalancingPolicy::ChannelControlHelper* helper,
const ChannelArgs& args);
virtual ~SubchannelList();
private:
// For accessing Ref() and Unref().
friend class SubchannelData<SubchannelListType, SubchannelDataType>;
virtual std::shared_ptr<WorkSerializer> work_serializer() const = 0;
// Backpointer to owning policy.
LoadBalancingPolicy* policy_;
const char* tracer_;
absl::optional<std::string> health_check_service_name_;
// The list of subchannels.
// We use ManualConstructor here to support SubchannelDataType classes
// that are not copyable.
std::vector<ManualConstructor<SubchannelDataType>> subchannels_;
// Is this list shutting down? This may be true due to the shutdown of the
// policy itself or because a newer update has arrived while this one hadn't
// finished processing.
bool shutting_down_ = false;
};
//
// implementation -- no user-servicable parts below
//
//
// SubchannelData::Watcher
//
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType, SubchannelDataType>::Watcher::
OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) {
if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) {
gpr_log(
GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): connectivity changed: old_state=%s, new_state=%s, "
"status=%s, shutting_down=%d, pending_watcher=%p, health_watcher=%p",
subchannel_list_->tracer(), subchannel_list_->policy(),
subchannel_list_.get(), subchannel_data_->Index(),
subchannel_list_->num_subchannels(),
subchannel_data_->subchannel_.get(),
(subchannel_data_->connectivity_state_.has_value()
? ConnectivityStateName(*subchannel_data_->connectivity_state_)
: "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str(),
subchannel_list_->shutting_down(), subchannel_data_->pending_watcher_,
subchannel_data_->health_watcher_);
}
if (!subchannel_list_->shutting_down() &&
(subchannel_data_->pending_watcher_ != nullptr ||
subchannel_data_->health_watcher_ != nullptr)) {
absl::optional<grpc_connectivity_state> old_state =
subchannel_data_->connectivity_state_;
subchannel_data_->connectivity_state_ = new_state;
subchannel_data_->connectivity_status_ = status;
// Call the subclass's ProcessConnectivityChangeLocked() method.
subchannel_data_->ProcessConnectivityChangeLocked(old_state, new_state);
}
}
//
// SubchannelData
//
template <typename SubchannelListType, typename SubchannelDataType>
SubchannelData<SubchannelListType, SubchannelDataType>::SubchannelData(
SubchannelList<SubchannelListType, SubchannelDataType>* subchannel_list,
const ServerAddress& /*address*/,
RefCountedPtr<SubchannelInterface> subchannel)
: subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) {}
template <typename SubchannelListType, typename SubchannelDataType>
SubchannelData<SubchannelListType, SubchannelDataType>::~SubchannelData() {
GPR_ASSERT(subchannel_ == nullptr);
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType, SubchannelDataType>::
UnrefSubchannelLocked(const char* reason) {
if (subchannel_ != nullptr) {
if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): unreffing subchannel (%s)",
subchannel_list_->tracer(), subchannel_list_->policy(),
subchannel_list_, Index(), subchannel_list_->num_subchannels(),
subchannel_.get(), reason);
}
subchannel_.reset();
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType,
SubchannelDataType>::ResetBackoffLocked() {
if (subchannel_ != nullptr) {
subchannel_->ResetBackoff();
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType, SubchannelDataType>::
StartConnectivityWatchLocked(const ChannelArgs& args) {
if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) {
gpr_log(
GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): starting watch "
"(health_check_service_name=\"%s\")",
subchannel_list_->tracer(), subchannel_list_->policy(),
subchannel_list_, Index(), subchannel_list_->num_subchannels(),
subchannel_.get(),
subchannel_list()->health_check_service_name_.value_or("N/A").c_str());
}
GPR_ASSERT(pending_watcher_ == nullptr);
GPR_ASSERT(health_watcher_ == nullptr);
auto watcher = std::make_unique<Watcher>(
this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher"));
if (subchannel_list()->health_check_service_name_.has_value()) {
auto health_watcher = MakeHealthCheckWatcher(
subchannel_list_->work_serializer(), args, std::move(watcher));
health_watcher_ = health_watcher.get();
subchannel_->AddDataWatcher(std::move(health_watcher));
} else {
pending_watcher_ = watcher.get();
subchannel_->WatchConnectivityState(std::move(watcher));
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType, SubchannelDataType>::
CancelConnectivityWatchLocked(const char* reason) {
if (pending_watcher_ != nullptr) {
if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): canceling connectivity watch (%s)",
subchannel_list_->tracer(), subchannel_list_->policy(),
subchannel_list_, Index(), subchannel_list_->num_subchannels(),
subchannel_.get(), reason);
}
subchannel_->CancelConnectivityStateWatch(pending_watcher_);
pending_watcher_ = nullptr;
} else if (health_watcher_ != nullptr) {
if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): canceling health watch (%s)",
subchannel_list_->tracer(), subchannel_list_->policy(),
subchannel_list_, Index(), subchannel_list_->num_subchannels(),
subchannel_.get(), reason);
}
subchannel_->CancelDataWatcher(health_watcher_);
health_watcher_ = nullptr;
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelData<SubchannelListType, SubchannelDataType>::ShutdownLocked() {
CancelConnectivityWatchLocked("shutdown");
UnrefSubchannelLocked("shutdown");
}
//
// SubchannelList
//
template <typename SubchannelListType, typename SubchannelDataType>
SubchannelList<SubchannelListType, SubchannelDataType>::SubchannelList(
LoadBalancingPolicy* policy, const char* tracer,
ServerAddressList addresses,
LoadBalancingPolicy::ChannelControlHelper* helper, const ChannelArgs& args)
: DualRefCounted<SubchannelListType>(tracer),
policy_(policy),
tracer_(tracer) {
if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) {
health_check_service_name_ =
args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME);
}
if (GPR_UNLIKELY(tracer_ != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels",
tracer_, policy, this, addresses.size());
}
subchannels_.reserve(addresses.size());
// Create a subchannel for each address.
for (ServerAddress address : addresses) {
RefCountedPtr<SubchannelInterface> subchannel =
helper->CreateSubchannel(address, args);
if (subchannel == nullptr) {
// Subchannel could not be created.
if (GPR_UNLIKELY(tracer_ != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] could not create subchannel for address %s, ignoring",
tracer_, policy_, address.ToString().c_str());
}
continue;
}
if (GPR_UNLIKELY(tracer_ != nullptr)) {
gpr_log(GPR_INFO,
"[%s %p] subchannel list %p index %" PRIuPTR
": Created subchannel %p for address %s",
tracer_, policy_, this, subchannels_.size(), subchannel.get(),
address.ToString().c_str());
}
subchannels_.emplace_back();
subchannels_.back().Init(this, std::move(address), std::move(subchannel));
}
}
template <typename SubchannelListType, typename SubchannelDataType>
SubchannelList<SubchannelListType, SubchannelDataType>::~SubchannelList() {
if (GPR_UNLIKELY(tracer_ != nullptr)) {
gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", tracer_, policy_,
this);
}
for (auto& sd : subchannels_) {
sd.Destroy();
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelList<SubchannelListType, SubchannelDataType>::
StartWatchingLocked(const ChannelArgs& args) {
for (auto& sd : subchannels_) {
sd->StartConnectivityWatchLocked(args);
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelList<SubchannelListType, SubchannelDataType>::Orphan() {
if (GPR_UNLIKELY(tracer_ != nullptr)) {
gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p", tracer_,
policy_, this);
}
GPR_ASSERT(!shutting_down_);
shutting_down_ = true;
for (auto& sd : subchannels_) {
sd->ShutdownLocked();
}
}
template <typename SubchannelListType, typename SubchannelDataType>
void SubchannelList<SubchannelListType,
SubchannelDataType>::ResetBackoffLocked() {
for (auto& sd : subchannels_) {
sd->ResetBackoffLocked();
}
}
template <typename SubchannelListType, typename SubchannelDataType>
bool SubchannelList<SubchannelListType,
SubchannelDataType>::AllSubchannelsSeenInitialState() {
for (size_t i = 0; i < num_subchannels(); ++i) {
if (!subchannel(i)->connectivity_state().has_value()) return false;
}
return true;
}
} // namespace grpc_core
#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H

@ -39,15 +39,14 @@
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include <grpc/event_engine/event_engine.h>
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h"
#include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h"
#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h"
#include "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/channel/channel_args.h"
@ -153,11 +152,11 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
private:
// Represents the weight for a given address.
class EndpointWeight : public RefCounted<EndpointWeight> {
class AddressWeight : public RefCounted<AddressWeight> {
public:
EndpointWeight(RefCountedPtr<WeightedRoundRobin> wrr, std::string key)
AddressWeight(RefCountedPtr<WeightedRoundRobin> wrr, std::string key)
: wrr_(std::move(wrr)), key_(std::move(key)) {}
~EndpointWeight() override;
~AddressWeight() override;
void MaybeUpdateWeight(double qps, double eps, double utilization,
float error_utilization_penalty);
@ -177,83 +176,109 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
Timestamp last_update_time_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfPast();
};
class WrrEndpointList : public EndpointList {
// Forward declaration.
class WeightedRoundRobinSubchannelList;
// Data for a particular subchannel in a subchannel list.
// This subclass adds the following functionality:
// - Tracks the previous connectivity state of the subchannel, so that
// we know how many subchannels are in each state.
class WeightedRoundRobinSubchannelData
: public SubchannelData<WeightedRoundRobinSubchannelList,
WeightedRoundRobinSubchannelData> {
public:
class WrrEndpoint : public Endpoint {
WeightedRoundRobinSubchannelData(
SubchannelList<WeightedRoundRobinSubchannelList,
WeightedRoundRobinSubchannelData>* subchannel_list,
const ServerAddress& address, RefCountedPtr<SubchannelInterface> sc);
absl::optional<grpc_connectivity_state> connectivity_state() const {
return logical_connectivity_state_;
}
RefCountedPtr<AddressWeight> weight() const { return weight_; }
private:
class OobWatcher : public OobBackendMetricWatcher {
public:
WrrEndpoint(RefCountedPtr<WrrEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer)
: Endpoint(std::move(endpoint_list)),
weight_(policy<WeightedRoundRobin>()->GetOrCreateWeight(
address.address())) {
Init(address, args, std::move(work_serializer));
}
OobWatcher(RefCountedPtr<AddressWeight> weight,
float error_utilization_penalty)
: weight_(std::move(weight)),
error_utilization_penalty_(error_utilization_penalty) {}
RefCountedPtr<EndpointWeight> weight() const { return weight_; }
void OnBackendMetricReport(
const BackendMetricData& backend_metric_data) override;
private:
class OobWatcher : public OobBackendMetricWatcher {
public:
OobWatcher(RefCountedPtr<EndpointWeight> weight,
float error_utilization_penalty)
: weight_(std::move(weight)),
error_utilization_penalty_(error_utilization_penalty) {}
void OnBackendMetricReport(
const BackendMetricData& backend_metric_data) override;
private:
RefCountedPtr<EndpointWeight> weight_;
const float error_utilization_penalty_;
};
RefCountedPtr<SubchannelInterface> CreateSubchannel(
ServerAddress address, const ChannelArgs& args) override;
// Called when the child policy reports a connectivity state update.
void OnStateUpdate(absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state,
const absl::Status& status) override;
RefCountedPtr<EndpointWeight> weight_;
RefCountedPtr<AddressWeight> weight_;
const float error_utilization_penalty_;
};
WrrEndpointList(RefCountedPtr<WeightedRoundRobin> wrr,
const ServerAddressList& addresses, const ChannelArgs& args)
: EndpointList(std::move(wrr),
GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)
? "WrrEndpointList"
: nullptr) {
Init(addresses, args,
[&](RefCountedPtr<WrrEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args) {
return MakeOrphanable<WrrEndpoint>(
std::move(endpoint_list), address, args,
policy<WeightedRoundRobin>()->work_serializer());
});
// Performs connectivity state updates that need to be done only
// after we have started watching.
void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) override;
// Updates the logical connectivity state.
void UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state);
// The logical connectivity state of the subchannel.
// Note that the logical connectivity state may differ from the
// actual reported state in some cases (e.g., after we see
// TRANSIENT_FAILURE, we ignore any subsequent state changes until
// we see READY).
absl::optional<grpc_connectivity_state> logical_connectivity_state_;
RefCountedPtr<AddressWeight> weight_;
};
// A list of subchannels.
class WeightedRoundRobinSubchannelList
: public SubchannelList<WeightedRoundRobinSubchannelList,
WeightedRoundRobinSubchannelData> {
public:
WeightedRoundRobinSubchannelList(WeightedRoundRobin* policy,
ServerAddressList addresses,
const ChannelArgs& args)
: SubchannelList(policy,
(GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)
? "WeightedRoundRobinSubchannelList"
: nullptr),
std::move(addresses), policy->channel_control_helper(),
args) {
// Need to maintain a ref to the LB policy as long as we maintain
// any references to subchannels, since the subchannels'
// pollset_sets will include the LB policy's pollset_set.
policy->Ref(DEBUG_LOCATION, "subchannel_list").release();
}
private:
LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const override {
return policy<WeightedRoundRobin>()->channel_control_helper();
~WeightedRoundRobinSubchannelList() override {
WeightedRoundRobin* p = static_cast<WeightedRoundRobin*>(policy());
p->Unref(DEBUG_LOCATION, "subchannel_list");
}
// Updates the counters of children in each state when a
// child transitions from old_state to new_state.
// Updates the counters of subchannels in each state when a
// subchannel transitions from old_state to new_state.
void UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state);
// Ensures that the right child list is used and then updates
// the WRR policy's connectivity state based on the child list's
// Ensures that the right subchannel list is used and then updates
// the aggregated connectivity state based on the subchannel list's
// state counters.
void MaybeUpdateAggregatedConnectivityStateLocked(
absl::Status status_for_tf);
private:
std::shared_ptr<WorkSerializer> work_serializer() const override {
return static_cast<WeightedRoundRobin*>(policy())->work_serializer();
}
std::string CountersString() const {
return absl::StrCat("num_children=", size(), " num_ready=", num_ready_,
return absl::StrCat("num_subchannels=", num_subchannels(),
" num_ready=", num_ready_,
" num_connecting=", num_connecting_,
" num_transient_failure=", num_transient_failure_);
}
@ -270,7 +295,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
class Picker : public SubchannelPicker {
public:
Picker(RefCountedPtr<WeightedRoundRobin> wrr,
WrrEndpointList* endpoint_list);
WeightedRoundRobinSubchannelList* subchannel_list);
~Picker() override;
@ -282,34 +307,31 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
// A call tracker that collects per-call endpoint utilization reports.
class SubchannelCallTracker : public SubchannelCallTrackerInterface {
public:
SubchannelCallTracker(
RefCountedPtr<EndpointWeight> weight, float error_utilization_penalty,
std::unique_ptr<SubchannelCallTrackerInterface> child_tracker)
SubchannelCallTracker(RefCountedPtr<AddressWeight> weight,
float error_utilization_penalty)
: weight_(std::move(weight)),
error_utilization_penalty_(error_utilization_penalty),
child_tracker_(std::move(child_tracker)) {}
error_utilization_penalty_(error_utilization_penalty) {}
void Start() override;
void Start() override {}
void Finish(FinishArgs args) override;
private:
RefCountedPtr<EndpointWeight> weight_;
RefCountedPtr<AddressWeight> weight_;
const float error_utilization_penalty_;
std::unique_ptr<SubchannelCallTrackerInterface> child_tracker_;
};
// Info stored about each endpoint.
struct EndpointInfo {
EndpointInfo(RefCountedPtr<SubchannelPicker> picker,
RefCountedPtr<EndpointWeight> weight)
: picker(std::move(picker)), weight(std::move(weight)) {}
// Info stored about each subchannel.
struct SubchannelInfo {
SubchannelInfo(RefCountedPtr<SubchannelInterface> subchannel,
RefCountedPtr<AddressWeight> weight)
: subchannel(std::move(subchannel)), weight(std::move(weight)) {}
RefCountedPtr<SubchannelPicker> picker;
RefCountedPtr<EndpointWeight> weight;
RefCountedPtr<SubchannelInterface> subchannel;
RefCountedPtr<AddressWeight> weight;
};
// Returns the index into endpoints_ to be picked.
// Returns the index into subchannels_ to be picked.
size_t PickIndex();
// Builds a new scheduler and swaps it into place, then starts a
@ -319,7 +341,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
RefCountedPtr<WeightedRoundRobin> wrr_;
RefCountedPtr<WeightedRoundRobinConfig> config_;
std::vector<EndpointInfo> endpoints_;
std::vector<SubchannelInfo> subchannels_;
Mutex scheduler_mu_;
std::shared_ptr<StaticStrideScheduler> scheduler_
@ -337,22 +359,23 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
void ShutdownLocked() override;
RefCountedPtr<EndpointWeight> GetOrCreateWeight(
RefCountedPtr<AddressWeight> GetOrCreateWeight(
const grpc_resolved_address& address);
RefCountedPtr<WeightedRoundRobinConfig> config_;
// List of endpoints.
OrphanablePtr<WrrEndpointList> endpoint_list_;
// Latest pending endpoint list.
// When we get an updated address list, we create a new endpoint list
// for it here, and we wait to swap it into endpoint_list_ until the new
// List of subchannels.
RefCountedPtr<WeightedRoundRobinSubchannelList> subchannel_list_;
// Latest pending subchannel list.
// When we get an updated address list, we create a new subchannel list
// for it here, and we wait to swap it into subchannel_list_ until the new
// list becomes READY.
OrphanablePtr<WrrEndpointList> latest_pending_endpoint_list_;
RefCountedPtr<WeightedRoundRobinSubchannelList>
latest_pending_subchannel_list_;
Mutex endpoint_weight_map_mu_;
std::map<std::string, EndpointWeight*, std::less<>> endpoint_weight_map_
ABSL_GUARDED_BY(&endpoint_weight_map_mu_);
Mutex address_weight_map_mu_;
std::map<std::string, AddressWeight*, std::less<>> address_weight_map_
ABSL_GUARDED_BY(&address_weight_map_mu_);
bool shutdown_ = false;
@ -363,18 +386,18 @@ class WeightedRoundRobin : public LoadBalancingPolicy {
};
//
// WeightedRoundRobin::EndpointWeight
// WeightedRoundRobin::AddressWeight
//
WeightedRoundRobin::EndpointWeight::~EndpointWeight() {
MutexLock lock(&wrr_->endpoint_weight_map_mu_);
auto it = wrr_->endpoint_weight_map_.find(key_);
if (it != wrr_->endpoint_weight_map_.end() && it->second == this) {
wrr_->endpoint_weight_map_.erase(it);
WeightedRoundRobin::AddressWeight::~AddressWeight() {
MutexLock lock(&wrr_->address_weight_map_mu_);
auto it = wrr_->address_weight_map_.find(key_);
if (it != wrr_->address_weight_map_.end() && it->second == this) {
wrr_->address_weight_map_.erase(it);
}
}
void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight(
void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight(
double qps, double eps, double utilization,
float error_utilization_penalty) {
// Compute weight.
@ -414,7 +437,7 @@ void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight(
last_update_time_ = now;
}
float WeightedRoundRobin::EndpointWeight::GetWeight(
float WeightedRoundRobin::AddressWeight::GetWeight(
Timestamp now, Duration weight_expiration_period,
Duration blackout_period) {
MutexLock lock(&mu_);
@ -445,7 +468,7 @@ float WeightedRoundRobin::EndpointWeight::GetWeight(
return weight_;
}
void WeightedRoundRobin::EndpointWeight::ResetNonEmptySince() {
void WeightedRoundRobin::AddressWeight::ResetNonEmptySince() {
MutexLock lock(&mu_);
non_empty_since_ = Timestamp::InfFuture();
}
@ -454,13 +477,8 @@ void WeightedRoundRobin::EndpointWeight::ResetNonEmptySince() {
// WeightedRoundRobin::Picker::SubchannelCallTracker
//
void WeightedRoundRobin::Picker::SubchannelCallTracker::Start() {
if (child_tracker_ != nullptr) child_tracker_->Start();
}
void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish(
FinishArgs args) {
if (child_tracker_ != nullptr) child_tracker_->Finish(args);
auto* backend_metric_data =
args.backend_metric_accessor->GetBackendMetricData();
double qps = 0;
@ -481,22 +499,23 @@ void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish(
// WeightedRoundRobin::Picker
//
WeightedRoundRobin::Picker::Picker(RefCountedPtr<WeightedRoundRobin> wrr,
WrrEndpointList* endpoint_list)
WeightedRoundRobin::Picker::Picker(
RefCountedPtr<WeightedRoundRobin> wrr,
WeightedRoundRobinSubchannelList* subchannel_list)
: wrr_(std::move(wrr)),
config_(wrr_->config_),
last_picked_index_(absl::Uniform<size_t>(wrr_->bit_gen_)) {
for (auto& endpoint : endpoint_list->endpoints()) {
auto* ep = static_cast<WrrEndpointList::WrrEndpoint*>(endpoint.get());
if (ep->connectivity_state() == GRPC_CHANNEL_READY) {
endpoints_.emplace_back(ep->picker(), ep->weight());
for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) {
WeightedRoundRobinSubchannelData* sd = subchannel_list->subchannel(i);
if (sd->connectivity_state() == GRPC_CHANNEL_READY) {
subchannels_.emplace_back(sd->subchannel()->Ref(), sd->weight());
}
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p picker %p] created picker from endpoint_list=%p "
"[WRR %p picker %p] created picker from subchannel_list=%p "
"with %" PRIuPTR " subchannels",
wrr_.get(), this, endpoint_list, endpoints_.size());
wrr_.get(), this, subchannel_list, subchannels_.size());
}
BuildSchedulerAndStartTimerLocked();
}
@ -514,30 +533,26 @@ void WeightedRoundRobin::Picker::Orphan() {
}
wrr_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_);
timer_handle_.reset();
wrr_.reset();
}
WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick(PickArgs args) {
WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick(
PickArgs /*args*/) {
size_t index = PickIndex();
GPR_ASSERT(index < endpoints_.size());
auto& endpoint_info = endpoints_[index];
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p picker %p] returning index %" PRIuPTR ", picker=%p",
wrr_.get(), this, index, endpoint_info.picker.get());
}
auto result = endpoint_info.picker->Pick(args);
GPR_ASSERT(index < subchannels_.size());
auto& subchannel_info = subchannels_[index];
// Collect per-call utilization data if needed.
std::unique_ptr<SubchannelCallTrackerInterface> subchannel_call_tracker;
if (!config_->enable_oob_load_report()) {
auto* complete = absl::get_if<PickResult::Complete>(&result.result);
if (complete != nullptr) {
complete->subchannel_call_tracker =
std::make_unique<SubchannelCallTracker>(
endpoint_info.weight, config_->error_utilization_penalty(),
std::move(complete->subchannel_call_tracker));
}
subchannel_call_tracker = std::make_unique<SubchannelCallTracker>(
subchannel_info.weight, config_->error_utilization_penalty());
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p picker %p] returning index %" PRIuPTR ", subchannel=%p",
wrr_.get(), this, index, subchannel_info.subchannel.get());
}
return result;
return PickResult::Complete(subchannel_info.subchannel,
std::move(subchannel_call_tracker));
}
size_t WeightedRoundRobin::Picker::PickIndex() {
@ -551,16 +566,16 @@ size_t WeightedRoundRobin::Picker::PickIndex() {
if (scheduler != nullptr) return scheduler->Pick();
// We don't have a scheduler (i.e., either all of the weights are 0 or
// there is only one subchannel), so fall back to RR.
return last_picked_index_.fetch_add(1) % endpoints_.size();
return last_picked_index_.fetch_add(1) % subchannels_.size();
}
void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() {
// Build scheduler.
const Timestamp now = Timestamp::Now();
std::vector<float> weights;
weights.reserve(endpoints_.size());
for (const auto& endpoint : endpoints_) {
weights.push_back(endpoint.weight->GetWeight(
weights.reserve(subchannels_.size());
for (const auto& subchannel : subchannels_) {
weights.push_back(subchannel.weight->GetWeight(
now, config_->weight_expiration_period(), config_->blackout_period()));
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
@ -586,10 +601,6 @@ void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() {
scheduler_ = std::move(scheduler);
}
// Start timer.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO, "[WRR %p picker %p] scheduling timer for %s", wrr_.get(),
this, config_->weight_update_period().ToString().c_str());
}
WeakRefCountedPtr<Picker> self = WeakRef();
timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter(
config_->weight_update_period(), [self = std::move(self)]() mutable {
@ -625,8 +636,8 @@ WeightedRoundRobin::~WeightedRoundRobin() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO, "[WRR %p] Destroying Round Robin policy", this);
}
GPR_ASSERT(endpoint_list_ == nullptr);
GPR_ASSERT(latest_pending_endpoint_list_ == nullptr);
GPR_ASSERT(subchannel_list_ == nullptr);
GPR_ASSERT(latest_pending_subchannel_list_ == nullptr);
}
void WeightedRoundRobin::ShutdownLocked() {
@ -634,14 +645,14 @@ void WeightedRoundRobin::ShutdownLocked() {
gpr_log(GPR_INFO, "[WRR %p] Shutting down", this);
}
shutdown_ = true;
endpoint_list_.reset();
latest_pending_endpoint_list_.reset();
subchannel_list_.reset();
latest_pending_subchannel_list_.reset();
}
void WeightedRoundRobin::ResetBackoffLocked() {
endpoint_list_->ResetBackoffLocked();
if (latest_pending_endpoint_list_ != nullptr) {
latest_pending_endpoint_list_->ResetBackoffLocked();
subchannel_list_->ResetBackoffLocked();
if (latest_pending_subchannel_list_ != nullptr) {
latest_pending_subchannel_list_->ResetBackoffLocked();
}
}
@ -681,28 +692,27 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) {
}
// If we already have a subchannel list, then keep using the existing
// list, but still report back that the update was not accepted.
if (endpoint_list_ != nullptr) return args.addresses.status();
if (subchannel_list_ != nullptr) return args.addresses.status();
}
// Create new subchannel list, replacing the previous pending list, if any.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) &&
latest_pending_endpoint_list_ != nullptr) {
latest_pending_subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p",
this, latest_pending_endpoint_list_.get());
this, latest_pending_subchannel_list_.get());
}
latest_pending_endpoint_list_ =
MakeOrphanable<WrrEndpointList>(Ref(), std::move(addresses), args.args);
latest_pending_subchannel_list_ =
MakeRefCounted<WeightedRoundRobinSubchannelList>(
this, std::move(addresses), args.args);
latest_pending_subchannel_list_->StartWatchingLocked(args.args);
// If the new list is empty, immediately promote it to
// endpoint_list_ and report TRANSIENT_FAILURE.
// TODO(roth): As part of adding dualstack backend support, we need to
// also handle the case where the list of addresses for a given
// endpoint is empty.
if (latest_pending_endpoint_list_->size() == 0) {
// subchannel_list_ and report TRANSIENT_FAILURE.
if (latest_pending_subchannel_list_->num_subchannels() == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) &&
endpoint_list_ != nullptr) {
subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this,
endpoint_list_.get());
subchannel_list_.get());
}
endpoint_list_ = std::move(latest_pending_endpoint_list_);
subchannel_list_ = std::move(latest_pending_subchannel_list_);
absl::Status status =
args.addresses.ok() ? absl::UnavailableError(absl::StrCat(
"empty address list: ", args.resolution_note))
@ -713,126 +723,42 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) {
return status;
}
// Otherwise, if this is the initial update, immediately promote it to
// endpoint_list_.
if (endpoint_list_.get() == nullptr) {
endpoint_list_ = std::move(latest_pending_endpoint_list_);
// subchannel_list_.
if (subchannel_list_.get() == nullptr) {
subchannel_list_ = std::move(latest_pending_subchannel_list_);
}
return absl::OkStatus();
}
RefCountedPtr<WeightedRoundRobin::EndpointWeight>
RefCountedPtr<WeightedRoundRobin::AddressWeight>
WeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) {
auto key = grpc_sockaddr_to_uri(&address);
if (!key.ok()) return nullptr;
MutexLock lock(&endpoint_weight_map_mu_);
auto it = endpoint_weight_map_.find(*key);
if (it != endpoint_weight_map_.end()) {
MutexLock lock(&address_weight_map_mu_);
auto it = address_weight_map_.find(*key);
if (it != address_weight_map_.end()) {
auto weight = it->second->RefIfNonZero();
if (weight != nullptr) return weight;
}
auto weight = MakeRefCounted<EndpointWeight>(
Ref(DEBUG_LOCATION, "EndpointWeight"), *key);
endpoint_weight_map_.emplace(*key, weight.get());
auto weight =
MakeRefCounted<AddressWeight>(Ref(DEBUG_LOCATION, "AddressWeight"), *key);
address_weight_map_.emplace(*key, weight.get());
return weight;
}
//
// WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher
//
void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher::
OnBackendMetricReport(const BackendMetricData& backend_metric_data) {
double utilization = backend_metric_data.application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data.cpu_utilization;
}
weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps,
utilization, error_utilization_penalty_);
}
//
// WeightedRoundRobin::WrrEndpointList::WrrEndpoint
//
RefCountedPtr<SubchannelInterface>
WeightedRoundRobin::WrrEndpointList::WrrEndpoint::CreateSubchannel(
ServerAddress address, const ChannelArgs& args) {
auto* wrr = policy<WeightedRoundRobin>();
auto subchannel =
wrr->channel_control_helper()->CreateSubchannel(std::move(address), args);
// Start OOB watch if configured.
if (wrr->config_->enable_oob_load_report()) {
subchannel->AddDataWatcher(MakeOobBackendMetricWatcher(
wrr->config_->oob_reporting_period(),
std::make_unique<OobWatcher>(
weight_, wrr->config_->error_utilization_penalty())));
}
return subchannel;
}
void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state, const absl::Status& status) {
auto* wrr_endpoint_list = endpoint_list<WrrEndpointList>();
auto* wrr = policy<WeightedRoundRobin>();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] connectivity changed for child %p, endpoint_list %p "
"(index %" PRIuPTR " of %" PRIuPTR
"): prev_state=%s new_state=%s (%s)",
wrr, this, wrr_endpoint_list, Index(), wrr_endpoint_list->size(),
(old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str());
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] child %p reported IDLE; requesting connection", wrr,
this);
}
ExitIdleLocked();
} else if (new_state == GRPC_CHANNEL_READY) {
// If we transition back to READY state, restart the blackout period.
// Skip this if this is the initial notification for this
// subchannel (which happens whenever we get updated addresses and
// create a new endpoint list). Also skip it if the previous state
// was READY (which should never happen in practice, but we've seen
// at least one bug that caused this in the outlier_detection
// policy, so let's be defensive here).
//
// Note that we cannot guarantee that we will never receive
// lingering callbacks for backend metric reports from the previous
// connection after the new connection has been established, but they
// should be masked by new backend metric reports from the new
// connection by the time the blackout period ends.
if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) {
weight_->ResetNonEmptySince();
}
}
// If state changed, update state counters.
if (!old_state.has_value() || *old_state != new_state) {
wrr_endpoint_list->UpdateStateCountersLocked(old_state, new_state);
}
// Update the policy state.
wrr_endpoint_list->MaybeUpdateAggregatedConnectivityStateLocked(status);
}
//
// WeightedRoundRobin::WrrEndpointList
// WeightedRoundRobin::WeightedRoundRobinSubchannelList
//
void WeightedRoundRobin::WrrEndpointList::UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
// We treat IDLE the same as CONNECTING, since it will immediately
// transition into that state anyway.
void WeightedRoundRobin::WeightedRoundRobinSubchannelList::
UpdateStateCountersLocked(absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
if (old_state.has_value()) {
GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN);
if (*old_state == GRPC_CHANNEL_READY) {
GPR_ASSERT(num_ready_ > 0);
--num_ready_;
} else if (*old_state == GRPC_CHANNEL_CONNECTING ||
*old_state == GRPC_CHANNEL_IDLE) {
} else if (*old_state == GRPC_CHANNEL_CONNECTING) {
GPR_ASSERT(num_connecting_ > 0);
--num_connecting_;
} else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
@ -843,79 +769,217 @@ void WeightedRoundRobin::WrrEndpointList::UpdateStateCountersLocked(
GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN);
if (new_state == GRPC_CHANNEL_READY) {
++num_ready_;
} else if (new_state == GRPC_CHANNEL_CONNECTING ||
new_state == GRPC_CHANNEL_IDLE) {
} else if (new_state == GRPC_CHANNEL_CONNECTING) {
++num_connecting_;
} else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
++num_transient_failure_;
}
}
void WeightedRoundRobin::WrrEndpointList::
void WeightedRoundRobin::WeightedRoundRobinSubchannelList::
MaybeUpdateAggregatedConnectivityStateLocked(absl::Status status_for_tf) {
auto* wrr = policy<WeightedRoundRobin>();
// If this is latest_pending_endpoint_list_, then swap it into
// endpoint_list_ in the following cases:
// - endpoint_list_ has no READY children.
// - This list has at least one READY child and we have seen the
// initial connectivity state notification for all children.
// - All of the children in this list are in TRANSIENT_FAILURE.
WeightedRoundRobin* p = static_cast<WeightedRoundRobin*>(policy());
// If this is latest_pending_subchannel_list_, then swap it into
// subchannel_list_ in the following cases:
// - subchannel_list_ has no READY subchannels.
// - This list has at least one READY subchannel and we have seen the
// initial connectivity state notification for all subchannels.
// - All of the subchannels in this list are in TRANSIENT_FAILURE.
// (This may cause the channel to go from READY to TRANSIENT_FAILURE,
// but we're doing what the control plane told us to do.)
if (wrr->latest_pending_endpoint_list_.get() == this &&
(wrr->endpoint_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllEndpointsSeenInitialState()) ||
num_transient_failure_ == size())) {
if (p->latest_pending_subchannel_list_.get() == this &&
(p->subchannel_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllSubchannelsSeenInitialState()) ||
num_transient_failure_ == num_subchannels())) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
const std::string old_counters_string =
wrr->endpoint_list_ != nullptr ? wrr->endpoint_list_->CountersString()
p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString()
: "";
gpr_log(GPR_INFO,
"[WRR %p] swapping out endpoint list %p (%s) in favor of %p (%s)",
wrr, wrr->endpoint_list_.get(), old_counters_string.c_str(), this,
CountersString().c_str());
gpr_log(
GPR_INFO,
"[WRR %p] swapping out subchannel list %p (%s) in favor of %p (%s)",
p, p->subchannel_list_.get(), old_counters_string.c_str(), this,
CountersString().c_str());
}
wrr->endpoint_list_ = std::move(wrr->latest_pending_endpoint_list_);
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// Only set connectivity state if this is the current endpoint list.
if (wrr->endpoint_list_.get() != this) return;
// Only set connectivity state if this is the current subchannel list.
if (p->subchannel_list_.get() != this) return;
// First matching rule wins:
// 1) ANY child is READY => policy is READY.
// 2) ANY child is CONNECTING => policy is CONNECTING.
// 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
// 1) ANY subchannel is READY => policy is READY.
// 2) ANY subchannel is CONNECTING => policy is CONNECTING.
// 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
if (num_ready_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO, "[WRR %p] reporting READY with endpoint list %p", wrr,
gpr_log(GPR_INFO, "[WRR %p] reporting READY with subchannel list %p", p,
this);
}
wrr->channel_control_helper()->UpdateState(
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(wrr->Ref(), this));
MakeRefCounted<Picker>(p->Ref(), this));
} else if (num_connecting_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with endpoint list %p",
wrr, this);
gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with subchannel list %p",
p, this);
}
wrr->channel_control_helper()->UpdateState(
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_CONNECTING, absl::Status(),
MakeRefCounted<QueuePicker>(nullptr));
} else if (num_transient_failure_ == size()) {
MakeRefCounted<QueuePicker>(p->Ref(DEBUG_LOCATION, "QueuePicker")));
} else if (num_transient_failure_ == num_subchannels()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] reporting TRANSIENT_FAILURE with endpoint list %p: %s",
wrr, this, status_for_tf.ToString().c_str());
gpr_log(
GPR_INFO,
"[WRR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", p,
this, status_for_tf.ToString().c_str());
}
if (!status_for_tf.ok()) {
last_failure_ = absl::UnavailableError(
absl::StrCat("connections to all backends failing; last error: ",
status_for_tf.ToString()));
}
wrr->channel_control_helper()->UpdateState(
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_,
MakeRefCounted<TransientFailurePicker>(last_failure_));
}
}
//
// WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher
//
void WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher::
OnBackendMetricReport(const BackendMetricData& backend_metric_data) {
double utilization = backend_metric_data.application_utilization;
if (utilization <= 0) {
utilization = backend_metric_data.cpu_utilization;
}
weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps,
utilization, error_utilization_penalty_);
}
//
// WeightedRoundRobin::WeightedRoundRobinSubchannelData
//
WeightedRoundRobin::WeightedRoundRobinSubchannelData::
WeightedRoundRobinSubchannelData(
SubchannelList<WeightedRoundRobinSubchannelList,
WeightedRoundRobinSubchannelData>* subchannel_list,
const ServerAddress& address, RefCountedPtr<SubchannelInterface> sc)
: SubchannelData(subchannel_list, address, std::move(sc)),
weight_(static_cast<WeightedRoundRobin*>(subchannel_list->policy())
->GetOrCreateWeight(address.address())) {
// Start OOB watch if configured.
WeightedRoundRobin* p =
static_cast<WeightedRoundRobin*>(subchannel_list->policy());
if (p->config_->enable_oob_load_report()) {
subchannel()->AddDataWatcher(MakeOobBackendMetricWatcher(
p->config_->oob_reporting_period(),
std::make_unique<OobWatcher>(weight_,
p->config_->error_utilization_penalty())));
}
}
void WeightedRoundRobin::WeightedRoundRobinSubchannelData::
ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
WeightedRoundRobin* p =
static_cast<WeightedRoundRobin*>(subchannel_list()->policy());
GPR_ASSERT(subchannel() != nullptr);
// If this is not the initial state notification and the new state is
// TRANSIENT_FAILURE or IDLE, re-resolve.
// Note that we don't want to do this on the initial state notification,
// because that would result in an endless loop of re-resolution.
if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE ||
new_state == GRPC_CHANNEL_IDLE)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] Subchannel %p reported %s; requesting re-resolution", p,
subchannel(), ConnectivityStateName(new_state));
}
p->channel_control_helper()->RequestReresolution();
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] Subchannel %p reported IDLE; requesting connection", p,
subchannel());
}
subchannel()->RequestConnection();
} else if (new_state == GRPC_CHANNEL_READY) {
// If we transition back to READY state, restart the blackout period.
// Skip this if this is the initial notification for this
// subchannel (which happens whenever we get updated addresses and
// create a new endpoint list). Also skip it if the previous state
// was READY (which should never happen in practice, but we've seen
// at least one bug that caused this in the outlier_detection
// policy, so let's be defensive here).
//
// Note that we cannot guarantee that we will never receive
// lingering callbacks for backend metric reports from the previous
// connection after the new connection has been established, but they
// should be masked by new backend metric reports from the new
// connection by the time the blackout period ends.
if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) {
weight_->ResetNonEmptySince();
}
}
// Update logical connectivity state.
UpdateLogicalConnectivityStateLocked(new_state);
// Update the policy state.
subchannel_list()->MaybeUpdateAggregatedConnectivityStateLocked(
connectivity_status());
}
void WeightedRoundRobin::WeightedRoundRobinSubchannelData::
UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state) {
WeightedRoundRobin* p =
static_cast<WeightedRoundRobin*>(subchannel_list()->policy());
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(
GPR_INFO,
"[WRR %p] connectivity changed for subchannel %p, subchannel_list %p "
"(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels(),
(logical_connectivity_state_.has_value()
? ConnectivityStateName(*logical_connectivity_state_)
: "N/A"),
ConnectivityStateName(connectivity_state));
}
// Decide what state to report for aggregation purposes.
// If the last logical state was TRANSIENT_FAILURE, then ignore the
// state change unless the new state is READY.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE &&
connectivity_state != GRPC_CHANNEL_READY) {
return;
}
// If the new state is IDLE, treat it as CONNECTING, since it will
// immediately transition into CONNECTING anyway.
if (connectivity_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) {
gpr_log(GPR_INFO,
"[WRR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR
" of %" PRIuPTR "): treating IDLE as CONNECTING",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels());
}
connectivity_state = GRPC_CHANNEL_CONNECTING;
}
// If no change, return false.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == connectivity_state) {
return;
}
// Otherwise, update counters and logical state.
subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_,
connectivity_state);
logical_connectivity_state_ = connectivity_state;
}
//
// factory
//

@ -18,7 +18,6 @@
#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h"
#include <inttypes.h>
#include <stddef.h>
#include <algorithm>
@ -431,10 +430,7 @@ void XdsOverrideHostLb::ResetBackoffLocked() {
absl::Status XdsOverrideHostLb::UpdateLocked(UpdateArgs args) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] Received update with %" PRIuPTR
" addresses",
this, args.addresses.ok() ? args.addresses->size() : 0);
gpr_log(GPR_INFO, "[xds_override_host_lb %p] Received update", this);
}
auto old_config = std::move(config_);
// Update config.
@ -502,10 +498,6 @@ OrphanablePtr<LoadBalancingPolicy> XdsOverrideHostLb::CreateChildPolicyLocked(
absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
absl::StatusOr<ServerAddressList> addresses) {
if (!addresses.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", this,
addresses.status().ToString().c_str());
}
return addresses;
}
ServerAddressList return_value;
@ -513,30 +505,13 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
for (const auto& address : *addresses) {
XdsHealthStatus status = GetAddressHealthStatus(address);
if (status.status() != XdsHealthStatus::kDraining) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: not draining, "
"passing to child",
this, address.ToString().c_str());
}
return_value.push_back(address);
} else if (!config_->override_host_status_set().Contains(status)) {
// Skip draining hosts if not in the override status set.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: draining but not in "
"override_host_status set -- ignoring",
this, address.ToString().c_str());
}
continue;
}
auto key = grpc_sockaddr_to_uri(&address.address());
if (key.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: adding map key %s", this,
address.ToString().c_str(), key->c_str());
}
addresses_for_map.emplace(std::move(*key), status);
}
}
@ -544,10 +519,6 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
MutexLock lock(&subchannel_map_mu_);
for (auto it = subchannel_map_.begin(); it != subchannel_map_.end();) {
if (addresses_for_map.find(it->first) == addresses_for_map.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] removing map key %s",
this, it->first.c_str());
}
it = subchannel_map_.erase(it);
} else {
++it;
@ -556,20 +527,10 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
for (const auto& key_status : addresses_for_map) {
auto it = subchannel_map_.find(key_status.first);
if (it == subchannel_map_.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", this,
key_status.first.c_str());
}
subchannel_map_.emplace(std::piecewise_construct,
std::forward_as_tuple(key_status.first),
std::forward_as_tuple(key_status.second));
} else {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] setting EDS health status for "
"%s to %s",
this, key_status.first.c_str(), key_status.second.ToString());
}
it->second.SetEdsHealthStatus(key_status.second);
}
}

@ -212,8 +212,6 @@ class Subchannel : public DualRefCounted<Subchannel> {
channelz::SubchannelNode* channelz_node();
const grpc_resolved_address& address() const { return key_.address(); }
// Starts watching the subchannel's connectivity state.
// The first callback to the watcher will be delivered ~immediately.
// Subsequent callbacks will be delivered as the subchannel's state

@ -58,7 +58,6 @@ class ServerAddress {
ServerAddress& operator=(ServerAddress&& other) noexcept;
bool operator==(const ServerAddress& other) const { return Cmp(other) == 0; }
bool operator<(const ServerAddress& other) const { return Cmp(other) < 0; }
int Cmp(const ServerAddress& other) const;

@ -33,7 +33,6 @@ CORE_SOURCE_FILES = [
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',

@ -57,10 +57,8 @@
#include "src/core/ext/filters/client_channel/client_channel_internal.h"
#include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h"
#include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h"
#include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h"
#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h"
#include "src/core/ext/filters/client_channel/subchannel_pool_interface.h"
#include "src/core/lib/address_utils/parse_address.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
@ -113,10 +111,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
~FakeSubchannel() override {
if (orca_watcher_ != nullptr) {
MutexLock lock(&state_->backend_metric_watcher_mu_);
state_->orca_watchers_.erase(orca_watcher_.get());
}
for (const auto& p : watcher_map_) {
state_->state_tracker_.RemoveWatcher(p.second);
state_->watchers_.erase(orca_watcher_.get());
}
}
@ -126,11 +121,6 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Converts between
// SubchannelInterface::ConnectivityStateWatcherInterface and
// ConnectivityStateWatcherInterface.
//
// We support both unique_ptr<> and shared_ptr<>, since raw
// connectivity watches use the latter but health watches use the
// former.
// TODO(roth): Clean this up.
class WatcherWrapper : public AsyncConnectivityStateWatcherInterface {
public:
WatcherWrapper(
@ -142,59 +132,33 @@ class LoadBalancingPolicyTest : public ::testing::Test {
std::move(work_serializer)),
watcher_(std::move(watcher)) {}
WatcherWrapper(
std::shared_ptr<WorkSerializer> work_serializer,
std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
watcher)
: AsyncConnectivityStateWatcherInterface(
std::move(work_serializer)),
watcher_(std::move(watcher)) {}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
const absl::Status& status) override {
watcher()->OnConnectivityStateChange(new_state, status);
watcher_->OnConnectivityStateChange(new_state, status);
}
private:
SubchannelInterface::ConnectivityStateWatcherInterface* watcher()
const {
return Match(
watcher_,
[](const std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); },
[](const std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); });
}
absl::variant<
std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>,
std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>>
std::unique_ptr<SubchannelInterface::ConnectivityStateWatcherInterface>
watcher_;
};
void WatchConnectivityState(
std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
auto* watcher_ptr = watcher.get();
watcher) override {
auto watcher_wrapper = MakeOrphanable<WatcherWrapper>(
work_serializer_, std::move(watcher));
watcher_map_[watcher_ptr] = watcher_wrapper.get();
watcher_map_[watcher.get()] = watcher_wrapper.get();
MutexLock lock(&state_->mu_);
state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN,
std::move(watcher_wrapper));
}
void CancelConnectivityStateWatch(
ConnectivityStateWatcherInterface* watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
ConnectivityStateWatcherInterface* watcher) override {
auto it = watcher_map_.find(watcher);
if (it == watcher_map_.end()) return;
MutexLock lock(&state_->mu_);
state_->state_tracker_.RemoveWatcher(it->second);
watcher_map_.erase(it);
}
@ -204,56 +168,19 @@ class LoadBalancingPolicyTest : public ::testing::Test {
state_->requested_connection_ = true;
}
void AddDataWatcher(std::unique_ptr<DataWatcherInterface> watcher)
override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
void AddDataWatcher(
std::unique_ptr<DataWatcherInterface> watcher) override {
MutexLock lock(&state_->backend_metric_watcher_mu_);
auto* w =
static_cast<InternalSubchannelDataWatcherInterface*>(watcher.get());
if (w->type() == OrcaProducer::Type()) {
GPR_ASSERT(orca_watcher_ == nullptr);
orca_watcher_.reset(static_cast<OrcaWatcher*>(watcher.release()));
state_->orca_watchers_.insert(orca_watcher_.get());
} else if (w->type() == HealthProducer::Type()) {
// TODO(roth): Support health checking in test framework.
// For now, we just hard-code this to the raw connectivity state.
GPR_ASSERT(health_watcher_ == nullptr);
GPR_ASSERT(health_watcher_wrapper_ == nullptr);
health_watcher_.reset(static_cast<HealthWatcher*>(watcher.release()));
auto connectivity_watcher = health_watcher_->TakeWatcher();
auto* connectivity_watcher_ptr = connectivity_watcher.get();
auto watcher_wrapper = MakeOrphanable<WatcherWrapper>(
work_serializer_, std::move(connectivity_watcher));
health_watcher_wrapper_ = watcher_wrapper.get();
state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN,
std::move(watcher_wrapper));
gpr_log(GPR_INFO,
"AddDataWatcher(): added HealthWatch=%p "
"connectivity_watcher=%p watcher_wrapper=%p",
health_watcher_.get(), connectivity_watcher_ptr,
health_watcher_wrapper_);
}
GPR_ASSERT(orca_watcher_ == nullptr);
orca_watcher_.reset(static_cast<OrcaWatcher*>(watcher.release()));
state_->watchers_.insert(orca_watcher_.get());
}
void CancelDataWatcher(DataWatcherInterface* watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
void CancelDataWatcher(DataWatcherInterface* watcher) override {
MutexLock lock(&state_->backend_metric_watcher_mu_);
auto* w = static_cast<InternalSubchannelDataWatcherInterface*>(watcher);
if (w->type() == OrcaProducer::Type()) {
if (orca_watcher_.get() != static_cast<OrcaWatcher*>(watcher)) return;
state_->orca_watchers_.erase(orca_watcher_.get());
orca_watcher_.reset();
} else if (w->type() == HealthProducer::Type()) {
if (health_watcher_.get() != static_cast<HealthWatcher*>(watcher)) {
return;
}
gpr_log(GPR_INFO,
"CancelDataWatcher(): cancelling HealthWatch=%p "
"watcher_wrapper=%p",
health_watcher_.get(), health_watcher_wrapper_);
state_->state_tracker_.RemoveWatcher(health_watcher_wrapper_);
health_watcher_wrapper_ = nullptr;
health_watcher_.reset();
}
if (orca_watcher_.get() != static_cast<OrcaWatcher*>(watcher)) return;
state_->watchers_.erase(orca_watcher_.get());
orca_watcher_.reset();
}
// Don't need this method, so it's a no-op.
@ -264,16 +191,11 @@ class LoadBalancingPolicyTest : public ::testing::Test {
std::map<SubchannelInterface::ConnectivityStateWatcherInterface*,
WatcherWrapper*>
watcher_map_;
std::unique_ptr<HealthWatcher> health_watcher_;
WatcherWrapper* health_watcher_wrapper_ = nullptr;
std::unique_ptr<OrcaWatcher> orca_watcher_;
};
SubchannelState(absl::string_view address,
std::shared_ptr<WorkSerializer> work_serializer)
: address_(address),
work_serializer_(std::move(work_serializer)),
state_tracker_("LoadBalancingPolicyTest") {}
explicit SubchannelState(absl::string_view address)
: address_(address), state_tracker_("LoadBalancingPolicyTest") {}
const std::string& address() const { return address_; }
@ -328,14 +250,10 @@ class LoadBalancingPolicyTest : public ::testing::Test {
<< "bug in test: " << ConnectivityStateName(state)
<< " must have OK status: " << status;
}
work_serializer_->Run(
[this, state, status, location]()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_) {
AssertValidConnectivityStateTransition(state_tracker_.state(),
state, location);
state_tracker_.SetState(state, status, "set from test");
},
DEBUG_LOCATION);
MutexLock lock(&mu_);
AssertValidConnectivityStateTransition(state_tracker_.state(), state,
location);
state_tracker_.SetState(state, status, "set from test");
}
// Indicates if any of the associated SubchannelInterface objects
@ -355,7 +273,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Sends an OOB backend metric report to all watchers.
void SendOobBackendMetricReport(const BackendMetricData& backend_metrics) {
MutexLock lock(&backend_metric_watcher_mu_);
for (const auto* watcher : orca_watchers_) {
for (const auto* watcher : watchers_) {
watcher->watcher()->OnBackendMetricReport(backend_metrics);
}
}
@ -364,7 +282,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
void CheckOobReportingPeriod(Duration expected,
SourceLocation location = SourceLocation()) {
MutexLock lock(&backend_metric_watcher_mu_);
for (const auto* watcher : orca_watchers_) {
for (const auto* watcher : watchers_) {
EXPECT_EQ(watcher->report_interval(), expected)
<< location.file() << ":" << location.line();
}
@ -372,15 +290,16 @@ class LoadBalancingPolicyTest : public ::testing::Test {
private:
const std::string address_;
std::shared_ptr<WorkSerializer> work_serializer_;
ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(*work_serializer_);
Mutex mu_;
ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(&mu_);
Mutex requested_connection_mu_;
bool requested_connection_ ABSL_GUARDED_BY(&requested_connection_mu_) =
false;
Mutex backend_metric_watcher_mu_;
std::set<OrcaWatcher*> orca_watchers_
std::set<OrcaWatcher*> watchers_
ABSL_GUARDED_BY(&backend_metric_watcher_mu_);
};
@ -498,8 +417,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
GPR_ASSERT(address_uri.ok());
it = test_->subchannel_pool_
.emplace(std::piecewise_construct, std::forward_as_tuple(key),
std::forward_as_tuple(std::move(*address_uri),
work_serializer_))
std::forward_as_tuple(std::move(*address_uri)))
.first;
}
return it->second.CreateSubchannel(work_serializer_);
@ -1009,6 +927,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Expect startup with RR with a set of addresses.
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> ExpectRoundRobinStartup(
absl::Span<const absl::string_view> addresses) {
ExpectConnectingUpdate();
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker;
for (size_t i = 0; i < addresses.size(); ++i) {
auto* subchannel = FindSubchannel(addresses[i]);
@ -1016,7 +935,6 @@ class LoadBalancingPolicyTest : public ::testing::Test {
if (subchannel == nullptr) return nullptr;
EXPECT_TRUE(subchannel->ConnectionRequested());
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
if (i == 0) ExpectConnectingUpdate();
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
if (i == 0) {
picker = WaitForConnected();
@ -1086,7 +1004,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
SubchannelKey key(MakeAddress(address), args);
auto it = subchannel_pool_
.emplace(std::piecewise_construct, std::forward_as_tuple(key),
std::forward_as_tuple(address, work_serializer_))
std::forward_as_tuple(address))
.first;
return &it->second;
}

@ -37,6 +37,7 @@
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/time.h"
@ -183,6 +184,8 @@ TEST_F(OutlierDetectionTest, Basic) {
absl::Status status = ApplyUpdate(
BuildUpdate({kAddressUri}, ConfigBuilder().Build()), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have reported CONNECTING state.
ExpectConnectingUpdate();
// LB policy should have created a subchannel for the address.
auto* subchannel = FindSubchannel(kAddressUri);
ASSERT_NE(subchannel, nullptr);
@ -191,8 +194,6 @@ TEST_F(OutlierDetectionTest, Basic) {
EXPECT_TRUE(subchannel->ConnectionRequested());
// This causes the subchannel to start to connect, so it reports CONNECTING.
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
// LB policy should have reported CONNECTING state.
ExpectConnectingUpdate();
// When the subchannel becomes connected, it reports READY.
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
// The LB policy will report CONNECTING some number of times (doesn't
@ -229,6 +230,8 @@ TEST_F(OutlierDetectionTest, FailurePercentage) {
time_cache_.IncrementBy(Duration::Seconds(10));
RunTimerCallback();
gpr_log(GPR_INFO, "### ejection complete");
// Expect a re-resolution request.
ExpectReresolutionRequest();
// Expect a picker update.
std::vector<absl::string_view> remaining_addresses;
for (const auto& addr : kAddresses) {
@ -251,8 +254,10 @@ TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) {
.Build()),
lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for the first address.
auto* subchannel = FindSubchannel(kAddresses[0]);
// LB policy should have created a subchannel for the first address with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel, nullptr);
// When the LB policy receives the subchannel's initial connectivity
// state notification (IDLE), it will request a connection.

@ -33,6 +33,7 @@
#include <grpc/grpc.h>
#include <grpc/support/json.h>
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
@ -73,7 +74,8 @@ class PickFirstTest : public LoadBalancingPolicyTest {
// We will remove entries as each subchannel starts to connect.
std::map<SubchannelState*, absl::string_view> subchannels;
for (auto address : addresses) {
auto* subchannel = FindSubchannel(address);
auto* subchannel = FindSubchannel(
address, ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel, nullptr);
subchannels.emplace(subchannel, address);
}
@ -134,10 +136,13 @@ TEST_F(PickFirstTest, FirstAddressWorks) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(kAddresses[1]);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.
@ -167,10 +172,13 @@ TEST_F(PickFirstTest, FirstAddressFails) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(kAddresses[1]);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.
@ -209,10 +217,13 @@ TEST_F(PickFirstTest, GoesIdleWhenConnectionFailsThenCanReconnect) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(kAddresses[1]);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.

@ -42,6 +42,8 @@ class RoundRobinTest : public LoadBalancingPolicyTest {
void ExpectStartup(absl::Span<const absl::string_view> addresses) {
EXPECT_EQ(ApplyUpdate(BuildUpdate(addresses, nullptr), lb_policy_.get()),
absl::OkStatus());
// Expect the initial CONNECTNG update with a picker that queues.
ExpectConnectingUpdate();
// RR should have created a subchannel for each address.
for (size_t i = 0; i < addresses.size(); ++i) {
auto* subchannel = FindSubchannel(addresses[i]);
@ -50,8 +52,6 @@ class RoundRobinTest : public LoadBalancingPolicyTest {
EXPECT_TRUE(subchannel->ConnectionRequested());
// The subchannel will connect successfully.
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
// Expect the initial CONNECTNG update with a picker that queues.
if (i == 0) ExpectConnectingUpdate();
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
// As each subchannel becomes READY, we should get a new picker that
// includes the behavior. Note that there may be any number of

@ -127,6 +127,8 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest {
EXPECT_EQ(ApplyUpdate(BuildUpdate(update_addresses, config_builder.Build()),
lb_policy_.get()),
absl::OkStatus());
// Expect the initial CONNECTNG update with a picker that queues.
ExpectConnectingUpdate(location);
// RR should have created a subchannel for each address.
for (size_t i = 0; i < addresses.size(); ++i) {
auto* subchannel = FindSubchannel(addresses[i]);
@ -140,8 +142,6 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest {
<< location.line();
// The subchannel will connect successfully.
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
// Expect the initial CONNECTNG update with a picker that queues.
if (i == 0) ExpectConnectingUpdate(location);
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
}
return WaitForConnected(location);

@ -29,7 +29,6 @@
#include <grpc/grpc.h>
#include <grpc/support/json.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/stateful_session/stateful_session_filter.h"
#include "src/core/ext/xds/xds_health_status.h"
@ -210,31 +209,18 @@ TEST_F(XdsOverrideHostTest, FailedSubchannelIsNotPicked) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Subchannel for address 1 becomes disconnected.
gpr_log(GPR_INFO, "### subchannel 1 reporting IDLE");
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
gpr_log(GPR_INFO,
"### expecting RR picks to exclude the disconnected subchannel");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// It starts trying to reconnect...
gpr_log(GPR_INFO, "### subchannel 1 reporting CONNECTING");
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
gpr_log(GPR_INFO, "### expecting RR picks again");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// ...but the connection attempt fails.
gpr_log(GPR_INFO, "### subchannel 1 reporting TRANSIENT_FAILURE");
subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE,
absl::ResourceExhaustedError("Hmmmm"));
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
// The host override is not used.
gpr_log(GPR_INFO, "### checking that host override is not used");
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]},
MakeOverrideHostAttribute(kAddresses[1]));
@ -306,12 +292,6 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Send an update that marks the endpoints with different EDS health
// states, but those states are present in override_host_status.
// The picker should use the DRAINING host when a call's override
// points to that hose, but the host should not be used if there is no
// override pointing to it.
gpr_log(GPR_INFO, "### sending update with DRAINING host");
ApplyUpdateWithHealthStatuses(
{{kAddresses[0], XdsHealthStatus::HealthStatus::kUnknown},
{kAddresses[1], XdsHealthStatus::HealthStatus::kDraining},
@ -319,35 +299,23 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) {
{"UNKNOWN", "HEALTHY", "DRAINING"});
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
// There are two notifications - one from child policy and one from the parent
// policy due to draining channel update
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// Now the connection to the draining host gets dropped.
// The picker should queue picks where the override host is IDLE.
// All picks without an override host should not use this host.
gpr_log(GPR_INFO, "### closing connection to DRAINING host");
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel should have been asked to reconnect as a result of the
// queued pick above. It will therefore transition into state CONNECTING.
// The pick behavior is the same as above: The picker should queue
// picks where the override host is CONNECTING. All picks without an
// override host should not use this host.
gpr_log(GPR_INFO, "### subchannel starts reconnecting");
EXPECT_TRUE(subchannel->ConnectionRequested());
ExpectQueueEmpty();
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel now becomes connected again.
// Now picks with this override host can be completed again.
// Picks without an override host still don't use the draining host.
gpr_log(GPR_INFO, "### subchannel becomes reconnected");
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),

@ -2080,8 +2080,7 @@ TEST_F(RoundRobinTest, HealthChecking) {
EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
"UNAVAILABLE: backend unhealthy");
// Clean up.
EnableDefaultHealthCheckService(false);
}
@ -2139,8 +2138,7 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) {
EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1));
CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
"UNAVAILABLE: backend unhealthy");
// Second channel should be READY.
EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1));
CheckRpcSendOk(DEBUG_LOCATION, stub2);
@ -2185,8 +2183,7 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) {
EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1));
CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
"UNAVAILABLE: backend unhealthy");
// Second channel should be READY.
EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1));
CheckRpcSendOk(DEBUG_LOCATION, stub2);
@ -2894,8 +2891,10 @@ TEST_F(ClientLbAddressTest, Basic) {
// Make sure that the attributes wind up on the subchannels.
std::vector<std::string> expected;
for (const int port : GetServersPorts()) {
expected.emplace_back(absl::StrCat(ipv6_only_ ? "[::1]:" : "127.0.0.1:",
port, " args={test_key=test_value}"));
expected.emplace_back(absl::StrCat(
ipv6_only_ ? "[::1]:" : "127.0.0.1:", port,
" args={grpc.internal.no_subchannel.outlier_detection_disable=1, "
"test_key=test_value}"));
}
EXPECT_EQ(addresses_seen(), expected);
}

@ -995,7 +995,7 @@ TEST_P(RingHashTest, ReattemptWhenAllEndpointsUnreachable) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
MakeConnectionFailureRegex(
"ring hash cannot find a connected endpoint; first failure: "),
"ring hash cannot find a connected subchannel; first failure: "),
RpcOptions().set_metadata(std::move(metadata)));
StartBackend(0);
// Ensure we are actively connecting without any traffic.
@ -1034,7 +1034,7 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
MakeConnectionFailureRegex(
"ring hash cannot find a connected endpoint; first failure: "),
"ring hash cannot find a connected subchannel; first failure: "),
rpc_options);
gpr_log(GPR_INFO, "=== DONE WITH FIRST RPC ===");
EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel_->GetState(false));
@ -1070,7 +1070,7 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) {
CheckRpcSendFailure(
DEBUG_LOCATION, StatusCode::UNAVAILABLE,
MakeConnectionFailureRegex(
"ring hash cannot find a connected endpoint; first failure: "),
"ring hash cannot find a connected subchannel; first failure: "),
rpc_options);
gpr_log(GPR_INFO, "=== STARTING BACKEND 1 ===");
StartBackend(1);

@ -1113,8 +1113,6 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \
src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
@ -1140,6 +1138,7 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \
src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \
src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \
src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \

@ -919,8 +919,6 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \
src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
@ -946,6 +944,7 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \
src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \
src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \
src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \
src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \

Loading…
Cancel
Save