[round robin] delegate to pick_first instead of creating subchannels directly (#32692)

More work on the dualstack backend design:
- Change round_robin to delegate to pick_first instead of creating
subchannels directly.
- Change pick_first such that when it is the child of a petiole policy,
it will unconditionally start a health watch.
- Change the client-side health checking code such that if client-side
health checking is not enabled, it will return the subchannel's raw
connectivity state.
- As part of this, we introduce a new endpoint_list library to be used
by petiole policies, which is intended to replace the existing
subchannel_list library. The only policy that will still directly
interact with subchannels is pick_first, so the relevant parts of the
subchannel_list functionality have been copied directly into that
policy. The subchannel_list library will be removed after all petiole
policies are updated to delegate to pick_first.
pull/33087/head^2
Mark D. Roth 1 year ago committed by GitHub
parent 875b7fdcff
commit 27a778fece
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      CMakeLists.txt
  2. 2
      Makefile
  3. 3
      Package.swift
  4. 6
      build_autogenerated.yaml
  5. 1
      config.m4
  6. 1
      config.w32
  7. 4
      gRPC-C++.podspec
  8. 5
      gRPC-Core.podspec
  9. 3
      grpc.gemspec
  10. 2
      grpc.gyp
  11. 3
      package.xml
  12. 46
      src/core/BUILD
  13. 188
      src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
  14. 212
      src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
  15. 40
      src/core/ext/filters/client_channel/lb_policy/health_check_client.cc
  16. 5
      src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h
  17. 515
      src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc
  18. 36
      src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
  19. 438
      src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc
  20. 41
      src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc
  21. 2
      src/core/ext/filters/client_channel/subchannel.h
  22. 1
      src/python/grpcio/grpc_core_dependencies.py
  23. 146
      test/core/client_channel/lb_policy/lb_policy_test_lib.h
  24. 13
      test/core/client_channel/lb_policy/outlier_detection_test.cc
  25. 31
      test/core/client_channel/lb_policy/pick_first_test.cc
  26. 4
      test/core/client_channel/lb_policy/round_robin_test.cc
  27. 36
      test/core/client_channel/lb_policy/xds_override_host_test.cc
  28. 9
      test/cpp/end2end/client_lb_end2end_test.cc
  29. 3
      tools/doxygen/Doxyfile.c++.internal
  30. 3
      tools/doxygen/Doxyfile.core.internal

2
CMakeLists.txt generated

@ -1690,6 +1690,7 @@ add_library(grpc
src/core/ext/filters/client_channel/http_proxy.cc
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
@ -2729,6 +2730,7 @@ add_library(grpc_unsecure
src/core/ext/filters/client_channel/http_proxy.cc
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc

2
Makefile generated

@ -980,6 +980,7 @@ LIBGRPC_SRC = \
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \
@ -1872,6 +1873,7 @@ LIBGRPC_UNSECURE_SRC = \
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \

@ -148,6 +148,8 @@ let package = Package(
"src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h",
"src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc",
"src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h",
"src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc",
"src/core/ext/filters/client_channel/lb_policy/endpoint_list.h",
"src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc",
"src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h",
"src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc",
@ -167,6 +169,7 @@ let package = Package(
"src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc",
"src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h",
"src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc",
"src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h",
"src/core/ext/filters/client_channel/lb_policy/priority/priority.cc",
"src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc",
"src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h",

@ -231,6 +231,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/address_filtering.h
- src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h
@ -241,6 +242,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h
- src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h
- src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h
- src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
- src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h
- src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
- src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h
@ -1034,6 +1036,7 @@ libs:
- src/core/ext/filters/client_channel/http_proxy.cc
- src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc
@ -1952,6 +1955,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/address_filtering.h
- src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h
@ -1962,6 +1966,7 @@ libs:
- src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h
- src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h
- src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h
- src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h
- src/core/ext/filters/client_channel/lb_policy/subchannel_list.h
- src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h
- src/core/ext/filters/client_channel/local_subchannel_pool.h
@ -2363,6 +2368,7 @@ libs:
- src/core/ext/filters/client_channel/http_proxy.cc
- src/core/ext/filters/client_channel/lb_policy/address_filtering.cc
- src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc
- src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc
- src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc

1
config.m4 generated

@ -59,6 +59,7 @@ if test "$PHP_GRPC" != "no"; then
src/core/ext/filters/client_channel/http_proxy.cc \
src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \

1
config.w32 generated

@ -24,6 +24,7 @@ if (PHP_GRPC != "no") {
"src\\core\\ext\\filters\\client_channel\\http_proxy.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\address_filtering.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\child_policy_handler.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\endpoint_list.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\client_load_reporting_filter.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb.cc " +
"src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_balancer_addresses.cc " +

4
gRPC-C++.podspec generated

@ -263,6 +263,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -273,6 +274,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h',
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h',
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',
@ -1324,6 +1326,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -1334,6 +1337,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h',
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h',
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',

5
gRPC-Core.podspec generated

@ -249,6 +249,8 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
@ -268,6 +270,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc',
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/priority/priority.cc',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
@ -2073,6 +2076,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/address_filtering.h',
'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h',
@ -2083,6 +2087,7 @@ Pod::Spec.new do |s|
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h',
'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h',
'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h',
'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h',
'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h',
'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h',
'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h',

3
grpc.gemspec generated

@ -154,6 +154,8 @@ Gem::Specification.new do |s|
s.files += %w( src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc )
@ -173,6 +175,7 @@ Gem::Specification.new do |s|
s.files += %w( src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/priority/priority.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc )
s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h )

2
grpc.gyp generated

@ -284,6 +284,7 @@
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',
@ -1116,6 +1117,7 @@
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',

3
package.xml generated

@ -136,6 +136,8 @@
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc" role="src" />
@ -155,6 +157,7 @@
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/priority/priority.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc" role="src" />
<file baseinstalldir="/" name="src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h" role="src" />

@ -4577,6 +4577,7 @@ grpc_cc_library(
"//:grpc_trace",
"//:orphanable",
"//:ref_counted_ptr",
"//:sockaddr_utils",
"//:work_serializer",
],
)
@ -4609,11 +4610,49 @@ grpc_cc_library(
],
)
grpc_cc_library(
name = "lb_endpoint_list",
srcs = [
"ext/filters/client_channel/lb_policy/endpoint_list.cc",
],
hdrs = [
"ext/filters/client_channel/lb_policy/endpoint_list.h",
],
external_deps = [
"absl/functional:any_invocable",
"absl/status",
"absl/status:statusor",
"absl/types:optional",
],
language = "c++",
deps = [
"channel_args",
"delegating_helper",
"grpc_lb_policy_pick_first",
"json",
"lb_policy",
"lb_policy_registry",
"pollset_set",
"subchannel_interface",
"//:config",
"//:debug_location",
"//:gpr",
"//:grpc_base",
"//:orphanable",
"//:ref_counted_ptr",
"//:server_address",
"//:work_serializer",
],
)
grpc_cc_library(
name = "grpc_lb_policy_pick_first",
srcs = [
"ext/filters/client_channel/lb_policy/pick_first/pick_first.cc",
],
hdrs = [
"ext/filters/client_channel/lb_policy/pick_first/pick_first.h",
],
external_deps = [
"absl/algorithm:container",
"absl/random",
@ -4626,8 +4665,9 @@ grpc_cc_library(
deps = [
"channel_args",
"env",
"grpc_lb_subchannel_list",
"grpc_outlier_detection_header",
"health_check_client",
"iomgr_fwd",
"json",
"json_args",
"json_object_loader",
@ -4643,7 +4683,6 @@ grpc_cc_library(
"//:orphanable",
"//:ref_counted_ptr",
"//:server_address",
"//:work_serializer",
],
)
@ -4710,11 +4749,10 @@ grpc_cc_library(
language = "c++",
deps = [
"channel_args",
"grpc_lb_subchannel_list",
"json",
"lb_endpoint_list",
"lb_policy",
"lb_policy_factory",
"subchannel_interface",
"//:config",
"//:debug_location",
"//:gpr",

@ -0,0 +1,188 @@
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <grpc/support/port_platform.h>
#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h"
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/types/optional.h"
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/json.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/iomgr/pollset_set.h"
#include "src/core/lib/json/json.h"
#include "src/core/lib/load_balancing/delegating_helper.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/lb_policy_registry.h"
#include "src/core/lib/resolver/server_address.h"
namespace grpc_core {
//
// EndpointList::Endpoint::Helper
//
class EndpointList::Endpoint::Helper
: public LoadBalancingPolicy::DelegatingChannelControlHelper {
public:
explicit Helper(RefCountedPtr<Endpoint> endpoint)
: endpoint_(std::move(endpoint)) {}
~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); }
RefCountedPtr<SubchannelInterface> CreateSubchannel(
ServerAddress address, const ChannelArgs& args) override {
return endpoint_->CreateSubchannel(std::move(address), args);
}
void UpdateState(
grpc_connectivity_state state, const absl::Status& status,
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker) override {
auto old_state = std::exchange(endpoint_->connectivity_state_, state);
endpoint_->picker_ = std::move(picker);
endpoint_->OnStateUpdate(old_state, state, status);
}
private:
LoadBalancingPolicy::ChannelControlHelper* parent_helper() const override {
return endpoint_->endpoint_list_->channel_control_helper();
}
RefCountedPtr<Endpoint> endpoint_;
};
//
// EndpointList::Endpoint
//
void EndpointList::Endpoint::Init(
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer) {
ChannelArgs child_args =
args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true)
.Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true);
LoadBalancingPolicy::Args lb_policy_args;
lb_policy_args.work_serializer = std::move(work_serializer);
lb_policy_args.args = child_args;
lb_policy_args.channel_control_helper =
std::make_unique<Helper>(Ref(DEBUG_LOCATION, "Helper"));
child_policy_ =
CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy(
"pick_first", std::move(lb_policy_args));
if (GPR_UNLIKELY(endpoint_list_->tracer_ != nullptr)) {
gpr_log(GPR_INFO, "[%s %p] endpoint %p: created child policy %p",
endpoint_list_->tracer_, endpoint_list_->policy_.get(), this,
child_policy_.get());
}
// Add our interested_parties pollset_set to that of the newly created
// child policy. This will make the child policy progress upon activity on
// this policy, which in turn is tied to the application's call.
grpc_pollset_set_add_pollset_set(
child_policy_->interested_parties(),
endpoint_list_->policy_->interested_parties());
// Construct pick_first config.
auto config =
CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig(
Json::FromArray(
{Json::FromObject({{"pick_first", Json::FromObject({})}})}));
GPR_ASSERT(config.ok());
// Update child policy.
LoadBalancingPolicy::UpdateArgs update_args;
update_args.addresses.emplace().emplace_back(address);
update_args.args = child_args;
update_args.config = std::move(*config);
// TODO(roth): If the child reports a non-OK status with the update,
// we need to propagate that back to the resolver somehow.
(void)child_policy_->UpdateLocked(std::move(update_args));
}
void EndpointList::Endpoint::Orphan() {
// Remove pollset_set linkage.
grpc_pollset_set_del_pollset_set(
child_policy_->interested_parties(),
endpoint_list_->policy_->interested_parties());
child_policy_.reset();
picker_.reset();
Unref();
}
void EndpointList::Endpoint::ResetBackoffLocked() {
if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked();
}
void EndpointList::Endpoint::ExitIdleLocked() {
if (child_policy_ != nullptr) child_policy_->ExitIdleLocked();
}
size_t EndpointList::Endpoint::Index() const {
for (size_t i = 0; i < endpoint_list_->endpoints_.size(); ++i) {
if (endpoint_list_->endpoints_[i].get() == this) return i;
}
return -1;
}
RefCountedPtr<SubchannelInterface> EndpointList::Endpoint::CreateSubchannel(
ServerAddress address, const ChannelArgs& args) {
return endpoint_list_->channel_control_helper()->CreateSubchannel(
std::move(address), args);
}
//
// EndpointList
//
void EndpointList::Init(
const ServerAddressList& addresses, const ChannelArgs& args,
absl::AnyInvocable<OrphanablePtr<Endpoint>(
RefCountedPtr<EndpointList>, const ServerAddress&, const ChannelArgs&)>
create_endpoint) {
for (const ServerAddress& address : addresses) {
endpoints_.push_back(
create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), address, args));
}
}
void EndpointList::ResetBackoffLocked() {
for (const auto& endpoint : endpoints_) {
endpoint->ResetBackoffLocked();
}
}
bool EndpointList::AllEndpointsSeenInitialState() const {
for (const auto& endpoint : endpoints_) {
if (!endpoint->connectivity_state().has_value()) return false;
}
return true;
}
} // namespace grpc_core

@ -0,0 +1,212 @@
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H
#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H
#include <grpc/support/port_platform.h>
#include <stdlib.h>
#include <memory>
#include <utility>
#include <vector>
#include "absl/functional/any_invocable.h"
#include "absl/status/status.h"
#include "absl/types/optional.h"
#include <grpc/impl/connectivity_state.h>
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/subchannel_interface.h"
#include "src/core/lib/resolver/server_address.h"
namespace grpc_core {
// A list of endpoints for use in a petiole LB policy. Each endpoint may
// have one or more addresses, which will be passed down to a pick_first
// child policy.
//
// To use this, a petiole policy must define its own subclass of both
// EndpointList and EndpointList::Endpoint, like so:
/*
class MyEndpointList : public EndpointList {
public:
MyEndpointList(RefCountedPtr<MyLbPolicy> lb_policy,
const ServerAddressList& addresses, const ChannelArgs& args)
: EndpointList(std::move(lb_policy),
GRPC_TRACE_FLAG_ENABLED(grpc_my_tracer)
? "MyEndpointList"
: nullptr) {
Init(addresses, args,
[&](RefCountedPtr<MyEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args) {
return MakeOrphanable<MyEndpoint>(
std::move(endpoint_list), address, args,
policy<MyLbPolicy>()->work_serializer());
});
}
private:
class MyEndpoint : public Endpoint {
public:
MyEndpoint(RefCountedPtr<MyEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer)
: Endpoint(std::move(endpoint_list)) {
Init(address, args, std::move(work_serializer));
}
private:
void OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state,
const absl::Status& status) override {
// ...handle connectivity state change...
}
};
LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const override {
return policy<MyLbPolicy>()->channel_control_helper();
}
};
*/
class EndpointList : public InternallyRefCounted<EndpointList> {
public:
// An individual endpoint.
class Endpoint : public InternallyRefCounted<Endpoint> {
public:
~Endpoint() override { endpoint_list_.reset(DEBUG_LOCATION, "Endpoint"); }
void Orphan() override;
void ResetBackoffLocked();
void ExitIdleLocked();
absl::optional<grpc_connectivity_state> connectivity_state() const {
return connectivity_state_;
}
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker() const {
return picker_;
}
protected:
// We use two-phase initialization here to ensure that the vtable is
// initialized before we need to use it. Subclass must invoke Init()
// from inside its ctor.
explicit Endpoint(RefCountedPtr<EndpointList> endpoint_list)
: endpoint_list_(std::move(endpoint_list)) {}
void Init(const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer);
// Templated for convenience, to provide a short-hand for
// down-casting in the caller.
template <typename T>
T* endpoint_list() const {
return static_cast<T*>(endpoint_list_.get());
}
// Templated for convenience, to provide a short-hand for down-casting
// in the caller.
template <typename T>
T* policy() const {
return endpoint_list_->policy<T>();
}
// Returns the index of this endpoint within the EndpointList.
// Intended for trace logging.
size_t Index() const;
private:
class Helper;
// Called when the child policy reports a connectivity state update.
virtual void OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state, const absl::Status& status) = 0;
// Called to create a subchannel. Subclasses may override.
virtual RefCountedPtr<SubchannelInterface> CreateSubchannel(
ServerAddress address, const ChannelArgs& args);
RefCountedPtr<EndpointList> endpoint_list_;
OrphanablePtr<LoadBalancingPolicy> child_policy_;
absl::optional<grpc_connectivity_state> connectivity_state_;
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker_;
};
~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); }
void Orphan() override {
endpoints_.clear();
Unref();
}
size_t size() const { return endpoints_.size(); }
const std::vector<OrphanablePtr<Endpoint>>& endpoints() const {
return endpoints_;
}
void ResetBackoffLocked();
protected:
// We use two-phase initialization here to ensure that the vtable is
// initialized before we need to use it. Subclass must invoke Init()
// from inside its ctor.
EndpointList(RefCountedPtr<LoadBalancingPolicy> policy, const char* tracer)
: policy_(std::move(policy)), tracer_(tracer) {}
void Init(const ServerAddressList& addresses, const ChannelArgs& args,
absl::AnyInvocable<OrphanablePtr<Endpoint>(
RefCountedPtr<EndpointList>, const ServerAddress&,
const ChannelArgs&)>
create_endpoint);
// Templated for convenience, to provide a short-hand for down-casting
// in the caller.
template <typename T>
T* policy() const {
return static_cast<T*>(policy_.get());
}
// Returns true if all endpoints have seen their initial connectivity
// state notification.
bool AllEndpointsSeenInitialState() const;
private:
// Returns the parent policy's helper. Needed because the accessor
// method is protected on LoadBalancingPolicy.
virtual LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const = 0;
RefCountedPtr<LoadBalancingPolicy> policy_;
const char* tracer_;
std::vector<OrphanablePtr<Endpoint>> endpoints_;
};
} // namespace grpc_core
#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H

@ -28,6 +28,7 @@
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "upb/base/string_view.h"
@ -44,6 +45,7 @@
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h"
#include "src/core/ext/filters/client_channel/subchannel.h"
#include "src/core/ext/filters/client_channel/subchannel_stream_client.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/channel/channel_trace.h"
#include "src/core/lib/debug/trace.h"
@ -114,7 +116,7 @@ void HealthProducer::HealthChecker::Orphan() {
void HealthProducer::HealthChecker::AddWatcherLocked(HealthWatcher* watcher) {
watchers_.insert(watcher);
watcher->Notify(state_, status_);
if (state_.has_value()) watcher->Notify(*state_, status_);
}
bool HealthProducer::HealthChecker::RemoveWatcherLocked(
@ -128,13 +130,18 @@ void HealthProducer::HealthChecker::OnConnectivityStateChangeLocked(
if (state == GRPC_CHANNEL_READY) {
// We should already be in CONNECTING, and we don't want to change
// that until we see the initial response on the stream.
GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING);
if (!state_.has_value()) {
state_ = GRPC_CHANNEL_CONNECTING;
status_ = absl::OkStatus();
} else {
GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING);
}
// Start the health watch stream.
StartHealthStreamLocked();
} else {
state_ = state;
status_ = status;
NotifyWatchersLocked(state_, status_);
NotifyWatchersLocked(*state_, status_);
// We're not connected, so stop health checking.
stream_client_.reset();
}
@ -177,12 +184,21 @@ void HealthProducer::HealthChecker::NotifyWatchersLocked(
void HealthProducer::HealthChecker::OnHealthWatchStatusChange(
grpc_connectivity_state state, const absl::Status& status) {
if (state == GRPC_CHANNEL_SHUTDOWN) return;
// Prepend the subchannel's address to the status if needed.
absl::Status use_status;
if (!status.ok()) {
std::string address_str =
grpc_sockaddr_to_uri(&producer_->subchannel_->address())
.value_or("<unknown address type>");
use_status = absl::Status(
status.code(), absl::StrCat(address_str, ": ", status.message()));
}
work_serializer_->Schedule(
[self = Ref(), state, status]() {
[self = Ref(), state, status = std::move(use_status)]() mutable {
MutexLock lock(&self->producer_->mu_);
if (self->stream_client_ != nullptr) {
self->state_ = state;
self->status_ = status;
self->status_ = std::move(status);
for (HealthWatcher* watcher : self->watchers_) {
watcher->Notify(state, self->status_);
}
@ -362,7 +378,7 @@ void HealthProducer::AddWatcher(
grpc_pollset_set_add_pollset_set(interested_parties_,
watcher->interested_parties());
if (!health_check_service_name.has_value()) {
watcher->Notify(state_, status_);
if (state_.has_value()) watcher->Notify(*state_, status_);
non_health_watchers_.insert(watcher);
} else {
auto it =
@ -445,6 +461,13 @@ void HealthWatcher::SetSubchannel(Subchannel* subchannel) {
if (created) producer_->Start(subchannel->Ref());
// Register ourself with the producer.
producer_->AddWatcher(this, health_check_service_name_);
if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) {
gpr_log(GPR_INFO,
"HealthWatcher %p: registered with producer %p (created=%d, "
"health_check_service_name=\"%s\")",
this, producer_.get(), created,
health_check_service_name_.value_or("N/A").c_str());
}
}
void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) {
@ -470,6 +493,11 @@ MakeHealthCheckWatcher(
health_check_service_name =
args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME);
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) {
gpr_log(GPR_INFO,
"creating HealthWatcher -- health_check_service_name=\"%s\"",
health_check_service_name.value_or("N/A").c_str());
}
return std::make_unique<HealthWatcher>(std::move(work_serializer),
std::move(health_check_service_name),
std::move(watcher));

@ -127,7 +127,8 @@ class HealthProducer : public Subchannel::DataProducerInterface {
std::shared_ptr<WorkSerializer> work_serializer_ =
std::make_shared<WorkSerializer>();
grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_);
absl::optional<grpc_connectivity_state> state_
ABSL_GUARDED_BY(&HealthProducer::mu_);
absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_);
OrphanablePtr<SubchannelStreamClient> stream_client_
ABSL_GUARDED_BY(&HealthProducer::mu_);
@ -143,7 +144,7 @@ class HealthProducer : public Subchannel::DataProducerInterface {
grpc_pollset_set* interested_parties_;
Mutex mu_;
grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_);
absl::optional<grpc_connectivity_state> state_ ABSL_GUARDED_BY(&mu_);
absl::Status status_ ABSL_GUARDED_BY(&mu_);
RefCountedPtr<ConnectedSubchannel> connected_subchannel_
ABSL_GUARDED_BY(&mu_);

@ -16,6 +16,8 @@
#include <grpc/support/port_platform.h>
#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h"
#include <inttypes.h>
#include <string.h>
@ -33,22 +35,22 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include <grpc/grpc.h>
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h"
#include "src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h"
#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
#include "src/core/lib/gpr/string.h"
#include "src/core/lib/gprpp/crash.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/env.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/validation_errors.h"
#include "src/core/lib/gprpp/work_serializer.h"
#include "src/core/lib/iomgr/iomgr_fwd.h"
#include "src/core/lib/json/json.h"
#include "src/core/lib/json/json_args.h"
#include "src/core/lib/json/json_object_loader.h"
@ -117,52 +119,87 @@ class PickFirst : public LoadBalancingPolicy {
private:
~PickFirst() override;
class PickFirstSubchannelList;
class PickFirstSubchannelData
: public SubchannelData<PickFirstSubchannelList,
PickFirstSubchannelData> {
class SubchannelList : public InternallyRefCounted<SubchannelList> {
public:
PickFirstSubchannelData(
SubchannelList<PickFirstSubchannelList, PickFirstSubchannelData>*
subchannel_list,
const ServerAddress& address,
RefCountedPtr<SubchannelInterface> subchannel)
: SubchannelData(subchannel_list, address, std::move(subchannel)) {}
void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) override;
// Processes the connectivity change to READY for an unselected subchannel.
void ProcessUnselectedReadyLocked();
};
class SubchannelData {
public:
SubchannelData(SubchannelList* subchannel_list,
RefCountedPtr<SubchannelInterface> subchannel);
SubchannelInterface* subchannel() const { return subchannel_.get(); }
absl::optional<grpc_connectivity_state> connectivity_state() const {
return connectivity_state_;
}
class PickFirstSubchannelList
: public SubchannelList<PickFirstSubchannelList,
PickFirstSubchannelData> {
public:
PickFirstSubchannelList(PickFirst* policy, ServerAddressList addresses,
const ChannelArgs& args)
: SubchannelList(policy,
(GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)
? "PickFirstSubchannelList"
: nullptr),
std::move(addresses), policy->channel_control_helper(),
args) {
// Need to maintain a ref to the LB policy as long as we maintain
// any references to subchannels, since the subchannels'
// pollset_sets will include the LB policy's pollset_set.
policy->Ref(DEBUG_LOCATION, "subchannel_list").release();
// Note that we do not start trying to connect to any subchannel here,
// since we will wait until we see the initial connectivity state for all
// subchannels before doing that.
}
// Returns the index into the subchannel list of this object.
size_t Index() const {
return static_cast<size_t>(this -
&subchannel_list_->subchannels_.front());
}
~PickFirstSubchannelList() override {
PickFirst* p = static_cast<PickFirst*>(policy());
p->Unref(DEBUG_LOCATION, "subchannel_list");
}
// Resets the connection backoff.
void ResetBackoffLocked() {
if (subchannel_ != nullptr) subchannel_->ResetBackoff();
}
// Cancels any pending connectivity watch and unrefs the subchannel.
void ShutdownLocked();
private:
// Watcher for subchannel connectivity state.
class Watcher
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
Watcher(SubchannelData* subchannel_data,
RefCountedPtr<SubchannelList> subchannel_list)
: subchannel_data_(subchannel_data),
subchannel_list_(std::move(subchannel_list)) {}
~Watcher() override {
subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor");
}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) override {
subchannel_data_->OnConnectivityStateChange(new_state,
std::move(status));
}
grpc_pollset_set* interested_parties() override {
return subchannel_list_->policy()->interested_parties();
}
private:
SubchannelData* subchannel_data_;
RefCountedPtr<SubchannelList> subchannel_list_;
};
// This method will be invoked once soon after instantiation to report
// the current connectivity state, and it will then be invoked again
// whenever the connectivity state changes.
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status);
// Processes the connectivity change to READY for an unselected
// subchannel.
void ProcessUnselectedReadyLocked();
// Backpointer to owning subchannel list. Not owned.
SubchannelList* subchannel_list_;
// The subchannel.
RefCountedPtr<SubchannelInterface> subchannel_;
// Will be non-null when the subchannel's state is being watched.
SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ =
nullptr;
// Data updated by the watcher.
absl::optional<grpc_connectivity_state> connectivity_state_;
absl::Status connectivity_status_;
};
SubchannelList(RefCountedPtr<PickFirst> policy, ServerAddressList addresses,
const ChannelArgs& args);
~SubchannelList() override;
bool in_transient_failure() const { return in_transient_failure_; }
void set_in_transient_failure(bool in_transient_failure) {
@ -172,15 +209,64 @@ class PickFirst : public LoadBalancingPolicy {
size_t attempting_index() const { return attempting_index_; }
void set_attempting_index(size_t index) { attempting_index_ = index; }
// The number of subchannels in the list.
size_t size() const { return subchannels_.size(); }
// Returns true if the subchannel list is shutting down.
bool shutting_down() const { return shutting_down_; }
// Accessors.
PickFirst* policy() const { return policy_.get(); }
// Resets connection backoff of all subchannels.
void ResetBackoffLocked();
// Returns true if all subchannels have seen their initial
// connectivity state notifications.
bool AllSubchannelsSeenInitialState();
void Orphan() override;
private:
std::shared_ptr<WorkSerializer> work_serializer() const override {
return static_cast<PickFirst*>(policy())->work_serializer();
}
// Backpointer to owning policy.
RefCountedPtr<PickFirst> policy_;
const bool enable_health_watch_;
ChannelArgs args_;
// The list of subchannels.
std::vector<SubchannelData> subchannels_;
// Is this list shutting down? This may be true due to the shutdown of the
// policy itself or because a newer update has arrived while this one hadn't
// finished processing.
bool shutting_down_ = false;
bool in_transient_failure_ = false;
size_t attempting_index_ = 0;
};
class HealthWatcher
: public SubchannelInterface::ConnectivityStateWatcherInterface {
public:
explicit HealthWatcher(RefCountedPtr<PickFirst> policy)
: policy_(std::move(policy)) {}
~HealthWatcher() override {
policy_.reset(DEBUG_LOCATION, "HealthWatcher dtor");
}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
absl::Status status) override;
grpc_pollset_set* interested_parties() override {
return policy_->interested_parties();
}
private:
RefCountedPtr<PickFirst> policy_;
};
class Picker : public SubchannelPicker {
public:
explicit Picker(RefCountedPtr<SubchannelInterface> subchannel)
@ -198,14 +284,22 @@ class PickFirst : public LoadBalancingPolicy {
void AttemptToConnectUsingLatestUpdateArgsLocked();
void UnsetSelectedSubchannel();
// Whether we should omit our status message prefix.
const bool omit_status_message_prefix_;
// Lateset update args.
UpdateArgs latest_update_args_;
// All our subchannels.
RefCountedPtr<PickFirstSubchannelList> subchannel_list_;
OrphanablePtr<SubchannelList> subchannel_list_;
// Latest pending subchannel list.
RefCountedPtr<PickFirstSubchannelList> latest_pending_subchannel_list_;
OrphanablePtr<SubchannelList> latest_pending_subchannel_list_;
// Selected subchannel in \a subchannel_list_.
PickFirstSubchannelData* selected_ = nullptr;
SubchannelList::SubchannelData* selected_ = nullptr;
// Health watcher for the selected subchannel.
SubchannelInterface::ConnectivityStateWatcherInterface* health_watcher_ =
nullptr;
SubchannelInterface::DataWatcherInterface* health_data_watcher_ = nullptr;
// Are we in IDLE state?
bool idle_ = false;
// Are we shut down?
@ -214,7 +308,12 @@ class PickFirst : public LoadBalancingPolicy {
absl::BitGen bit_gen_;
};
PickFirst::PickFirst(Args args) : LoadBalancingPolicy(std::move(args)) {
PickFirst::PickFirst(Args args)
: LoadBalancingPolicy(std::move(args)),
omit_status_message_prefix_(
channel_args()
.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)
.value_or(false)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "Pick First %p created.", this);
}
@ -233,6 +332,7 @@ void PickFirst::ShutdownLocked() {
gpr_log(GPR_INFO, "Pick First %p Shutting down", this);
}
shutdown_ = true;
UnsetSelectedSubchannel();
subchannel_list_.reset();
latest_pending_subchannel_list_.reset();
}
@ -268,13 +368,11 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() {
"[PF %p] Shutting down previous pending subchannel list %p", this,
latest_pending_subchannel_list_.get());
}
latest_pending_subchannel_list_ = MakeRefCounted<PickFirstSubchannelList>(
this, std::move(addresses), latest_update_args_.args);
latest_pending_subchannel_list_->StartWatchingLocked(
latest_update_args_.args);
latest_pending_subchannel_list_ = MakeOrphanable<SubchannelList>(
Ref(), std::move(addresses), latest_update_args_.args);
// Empty update or no valid subchannels. Put the channel in
// TRANSIENT_FAILURE and request re-resolution.
if (latest_pending_subchannel_list_->num_subchannels() == 0) {
if (latest_pending_subchannel_list_->size() == 0) {
absl::Status status =
latest_update_args_.addresses.ok()
? absl::UnavailableError(absl::StrCat(
@ -287,9 +385,8 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() {
}
// If the new update is empty or we don't yet have a selected subchannel in
// the current list, replace the current subchannel list immediately.
if (latest_pending_subchannel_list_->num_subchannels() == 0 ||
selected_ == nullptr) {
selected_ = nullptr;
if (latest_pending_subchannel_list_->size() == 0 || selected_ == nullptr) {
UnsetSelectedSubchannel();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) &&
subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[PF %p] Shutting down previous subchannel list %p",
@ -310,8 +407,6 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) {
this, args.addresses.status().ToString().c_str());
}
}
// Add GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
args.args = args.args.Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, 1);
// Set return status based on the address list.
absl::Status status;
if (!args.addresses.ok()) {
@ -352,18 +447,122 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) {
return status;
}
void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
void PickFirst::UnsetSelectedSubchannel() {
if (selected_ != nullptr && health_data_watcher_ != nullptr) {
selected_->subchannel()->CancelDataWatcher(health_data_watcher_);
}
selected_ = nullptr;
health_watcher_ = nullptr;
health_data_watcher_ = nullptr;
}
//
// PickFirst::HealthWatcher
//
void PickFirst::HealthWatcher::OnConnectivityStateChange(
grpc_connectivity_state new_state, absl::Status status) {
if (policy_->health_watcher_ != this) return;
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] health watch state update: %s (%s)",
policy_.get(), ConnectivityStateName(new_state),
status.ToString().c_str());
}
switch (new_state) {
case GRPC_CHANNEL_READY:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::OkStatus(),
MakeRefCounted<Picker>(policy_->selected_->subchannel()->Ref()));
break;
case GRPC_CHANNEL_IDLE:
// If the subchannel becomes disconnected, the health watcher
// might happen to see the change before the raw connectivity
// state watcher does. In this case, ignore it, since the raw
// connectivity state watcher will handle it shortly.
break;
case GRPC_CHANNEL_CONNECTING:
policy_->channel_control_helper()->UpdateState(
new_state, absl::OkStatus(),
MakeRefCounted<QueuePicker>(policy_->Ref()));
break;
case GRPC_CHANNEL_TRANSIENT_FAILURE:
policy_->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
break;
case GRPC_CHANNEL_SHUTDOWN:
Crash("health watcher reported state SHUTDOWN");
}
}
//
// PickFirst::SubchannelList::SubchannelData
//
PickFirst::SubchannelList::SubchannelData::SubchannelData(
SubchannelList* subchannel_list,
RefCountedPtr<SubchannelInterface> subchannel)
: subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): starting watch",
subchannel_list_->policy(), subchannel_list_, Index(),
subchannel_list_->size(), subchannel_.get());
}
auto watcher = std::make_unique<Watcher>(
this, subchannel_list_->Ref(DEBUG_LOCATION, "Watcher"));
pending_watcher_ = watcher.get();
subchannel_->WatchConnectivityState(std::move(watcher));
}
void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() {
if (subchannel_ != nullptr) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): cancelling watch and unreffing subchannel",
subchannel_list_->policy(), subchannel_list_, Index(),
subchannel_list_->size(), subchannel_.get());
}
subchannel_->CancelConnectivityStateWatch(pending_watcher_);
pending_watcher_ = nullptr;
subchannel_.reset();
}
}
void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange(
grpc_connectivity_state new_state, absl::Status status) {
PickFirst* p = subchannel_list_->policy();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(
GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR
" (subchannel %p): connectivity changed: old_state=%s, new_state=%s, "
"status=%s, shutting_down=%d, pending_watcher=%p, "
"p->selected_=%p, p->subchannel_list_=%p, "
"p->latest_pending_subchannel_list_=%p",
p, subchannel_list_, Index(), subchannel_list_->size(),
subchannel_.get(),
(connectivity_state_.has_value()
? ConnectivityStateName(*connectivity_state_)
: "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str(),
subchannel_list_->shutting_down(), pending_watcher_, p->selected_,
p->subchannel_list_.get(), p->latest_pending_subchannel_list_.get());
}
if (subchannel_list_->shutting_down() || pending_watcher_ == nullptr) return;
// The notification must be for a subchannel in either the current or
// latest pending subchannel lists.
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
subchannel_list() == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() ||
subchannel_list_ == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN);
absl::optional<grpc_connectivity_state> old_state = connectivity_state_;
connectivity_state_ = new_state;
connectivity_status_ = status;
// Handle updates for the currently selected subchannel.
if (p->selected_ == this) {
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get());
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get());
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p selected subchannel connectivity changed to %s", p,
@ -380,17 +579,15 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->selected_ = nullptr;
p->UnsetSelectedSubchannel();
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
// Set our state to that of the pending subchannel list.
if (p->subchannel_list_->in_transient_failure()) {
absl::Status status = absl::UnavailableError(absl::StrCat(
"selected subchannel failed; switching to pending update; "
"last failure: ",
p->subchannel_list_
->subchannel(p->subchannel_list_->num_subchannels())
->connectivity_status()
.ToString()));
p->subchannel_list_->subchannels_.back()
.connectivity_status_.ToString()));
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
@ -410,7 +607,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// and we could switch to that rather than going IDLE.
// Enter idle.
p->idle_ = true;
p->selected_ = nullptr;
p->UnsetSelectedSubchannel();
p->subchannel_list_.reset();
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_IDLE, absl::Status(),
@ -428,7 +625,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// select in place of the current one.
// If the subchannel is READY, use it.
if (new_state == GRPC_CHANNEL_READY) {
subchannel_list()->set_in_transient_failure(false);
subchannel_list_->set_in_transient_failure(false);
ProcessUnselectedReadyLocked();
return;
}
@ -438,36 +635,36 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// Otherwise, do nothing, since we'll continue to wait until all of
// the subchannels report their state.
if (!old_state.has_value()) {
if (subchannel_list()->AllSubchannelsSeenInitialState()) {
subchannel_list()->subchannel(0)->subchannel()->RequestConnection();
if (subchannel_list_->AllSubchannelsSeenInitialState()) {
subchannel_list_->subchannels_.front().subchannel_->RequestConnection();
}
return;
}
// Ignore any other updates for subchannels we're not currently trying to
// connect to.
if (Index() != subchannel_list()->attempting_index()) return;
if (Index() != subchannel_list_->attempting_index()) return;
// Otherwise, process connectivity state.
switch (new_state) {
case GRPC_CHANNEL_READY:
// Already handled this case above, so this should not happen.
GPR_UNREACHABLE_CODE(break);
case GRPC_CHANNEL_TRANSIENT_FAILURE: {
size_t next_index = (Index() + 1) % subchannel_list()->num_subchannels();
subchannel_list()->set_attempting_index(next_index);
PickFirstSubchannelData* sd = subchannel_list()->subchannel(next_index);
size_t next_index = (Index() + 1) % subchannel_list_->size();
subchannel_list_->set_attempting_index(next_index);
SubchannelData& sd = subchannel_list_->subchannels_[next_index];
// If we're tried all subchannels, set state to TRANSIENT_FAILURE.
if (sd->Index() == 0) {
if (sd.Index() == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p subchannel list %p failed to connect to "
"all subchannels",
p, subchannel_list());
p, subchannel_list_);
}
subchannel_list()->set_in_transient_failure(true);
subchannel_list_->set_in_transient_failure(true);
// In case 2, swap to the new subchannel list. This means reporting
// TRANSIENT_FAILURE and dropping the existing (working) connection,
// but we can't ignore what the control plane has told us.
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
@ -475,17 +672,19 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
p, p->latest_pending_subchannel_list_.get(),
p->subchannel_list_.get());
}
p->selected_ = nullptr; // owned by p->subchannel_list_
p->UnsetSelectedSubchannel();
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
}
// If this is the current subchannel list (either because we were
// in case 1 or because we were in case 2 and just promoted it to
// be the current list), re-resolve and report new state.
if (subchannel_list() == p->subchannel_list_.get()) {
if (subchannel_list_ == p->subchannel_list_.get()) {
p->channel_control_helper()->RequestReresolution();
absl::Status status = absl::UnavailableError(
absl::StrCat("failed to connect to all addresses; last error: ",
connectivity_status().ToString()));
absl::Status status = absl::UnavailableError(absl::StrCat(
(p->omit_status_message_prefix_
? ""
: "failed to connect to all addresses; last error: "),
connectivity_status_.ToString()));
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, status,
MakeRefCounted<TransientFailurePicker>(status));
@ -497,21 +696,21 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
// If it's already in CONNECTING, we don't need to do this.
// If it's in TRANSIENT_FAILURE, then we will trigger the
// connection attempt later when it reports IDLE.
auto sd_state = sd->connectivity_state();
auto sd_state = sd.connectivity_state();
if (sd_state.has_value() && *sd_state == GRPC_CHANNEL_IDLE) {
sd->subchannel()->RequestConnection();
sd.subchannel_->RequestConnection();
}
break;
}
case GRPC_CHANNEL_IDLE: {
subchannel()->RequestConnection();
subchannel_->RequestConnection();
break;
}
case GRPC_CHANNEL_CONNECTING: {
// Only update connectivity state in case 1, and only if we're not
// already in TRANSIENT_FAILURE.
if (subchannel_list() == p->subchannel_list_.get() &&
!subchannel_list()->in_transient_failure()) {
if (subchannel_list_ == p->subchannel_list_.get() &&
!subchannel_list_->in_transient_failure()) {
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_CONNECTING, absl::Status(),
MakeRefCounted<QueuePicker>(p->Ref(DEBUG_LOCATION, "QueuePicker")));
@ -523,8 +722,8 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked(
}
}
void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list()->policy());
void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() {
PickFirst* p = static_cast<PickFirst*>(subchannel_list_->policy());
// If we get here, there are two possible cases:
// 1. We do not currently have a selected subchannel, and the update is
// for a subchannel in p->subchannel_list_ that we're trying to
@ -534,10 +733,10 @@ void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
// for a subchannel in p->latest_pending_subchannel_list_. The
// goal here is to find a subchannel from the update that we can
// select in place of the current one.
GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() ||
subchannel_list() == p->latest_pending_subchannel_list_.get());
GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() ||
subchannel_list_ == p->latest_pending_subchannel_list_.get());
// Case 2. Promote p->latest_pending_subchannel_list_ to p->subchannel_list_.
if (subchannel_list() == p->latest_pending_subchannel_list_.get()) {
if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"Pick First %p promoting pending subchannel list %p to "
@ -549,19 +748,119 @@ void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() {
}
// Cases 1 and 2.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel());
gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p,
subchannel_.get());
}
p->selected_ = this;
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(subchannel()->Ref()));
for (size_t i = 0; i < subchannel_list()->num_subchannels(); ++i) {
// If health checking is enabled, start the health watch, but don't
// report a new picker -- we want to stay in CONNECTING while we wait
// for the health status notification.
// If health checking is NOT enabled, report READY.
if (subchannel_list_->enable_health_watch_) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] starting health watch", p);
}
auto watcher = std::make_unique<HealthWatcher>(
p->Ref(DEBUG_LOCATION, "HealthWatcher"));
p->health_watcher_ = watcher.get();
auto health_data_watcher = MakeHealthCheckWatcher(
p->work_serializer(), subchannel_list_->args_, std::move(watcher));
p->health_data_watcher_ = health_data_watcher.get();
subchannel_->AddDataWatcher(std::move(health_data_watcher));
} else {
p->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(subchannel_->Ref()));
}
// Unref all other subchannels in the list.
for (size_t i = 0; i < subchannel_list_->size(); ++i) {
if (i != Index()) {
subchannel_list()->subchannel(i)->ShutdownLocked();
subchannel_list_->subchannels_[i].ShutdownLocked();
}
}
}
//
// PickFirst::SubchannelList
//
PickFirst::SubchannelList::SubchannelList(RefCountedPtr<PickFirst> policy,
ServerAddressList addresses,
const ChannelArgs& args)
: InternallyRefCounted<SubchannelList>(
GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) ? "SubchannelList"
: nullptr),
policy_(std::move(policy)),
enable_health_watch_(
args.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING)
.value_or(false)),
args_(args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING)
.Remove(
GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] Creating subchannel list %p for %" PRIuPTR
" subchannels - channel args: %s",
policy_.get(), this, addresses.size(), args_.ToString().c_str());
}
subchannels_.reserve(addresses.size());
// Create a subchannel for each address.
for (const ServerAddress& address : addresses) {
RefCountedPtr<SubchannelInterface> subchannel =
policy_->channel_control_helper()->CreateSubchannel(address, args_);
if (subchannel == nullptr) {
// Subchannel could not be created.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] could not create subchannel for address %s, ignoring",
policy_.get(), address.ToString().c_str());
}
continue;
}
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO,
"[PF %p] subchannel list %p index %" PRIuPTR
": Created subchannel %p for address %s",
policy_.get(), this, subchannels_.size(), subchannel.get(),
address.ToString().c_str());
}
subchannels_.emplace_back(this, std::move(subchannel));
}
}
PickFirst::SubchannelList::~SubchannelList() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] Destroying subchannel_list %p", policy_.get(),
this);
}
}
void PickFirst::SubchannelList::Orphan() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) {
gpr_log(GPR_INFO, "[PF %p] Shutting down subchannel_list %p", policy_.get(),
this);
}
GPR_ASSERT(!shutting_down_);
shutting_down_ = true;
for (auto& sd : subchannels_) {
sd.ShutdownLocked();
}
Unref();
}
void PickFirst::SubchannelList::ResetBackoffLocked() {
for (auto& sd : subchannels_) {
sd.ResetBackoffLocked();
}
}
bool PickFirst::SubchannelList::AllSubchannelsSeenInitialState() {
for (auto& sd : subchannels_) {
if (!sd.connectivity_state().has_value()) return false;
}
return true;
}
//
// factory
//

@ -0,0 +1,36 @@
//
// Copyright 2023 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H
#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H
#include <grpc/support/port_platform.h>
#include "src/core/lib/resolver/server_address.h"
// Internal channel arg to enable health checking in pick_first.
// Intended to be used by petiole policies (e.g., round_robin) that
// delegate to pick_first.
#define GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING \
GRPC_ARG_NO_SUBCHANNEL_PREFIX "pick_first_enable_health_checking"
// Internal channel arg to tell pick_first to omit the prefix it normally
// adds to error status messages. Intended to be used by petiole policies
// (e.g., round_robin) that want to add their own prefixes.
#define GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX \
GRPC_ARG_NO_SUBCHANNEL_PREFIX "pick_first_omit_status_message_prefix"
#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H

@ -37,7 +37,7 @@
#include <grpc/impl/connectivity_state.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h"
#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/config/core_configuration.h"
#include "src/core/lib/debug/trace.h"
@ -48,7 +48,6 @@
#include "src/core/lib/json/json.h"
#include "src/core/lib/load_balancing/lb_policy.h"
#include "src/core/lib/load_balancing/lb_policy_factory.h"
#include "src/core/lib/load_balancing/subchannel_interface.h"
#include "src/core/lib/resolver/server_address.h"
#include "src/core/lib/transport/connectivity_state.h"
@ -74,93 +73,60 @@ class RoundRobin : public LoadBalancingPolicy {
void ResetBackoffLocked() override;
private:
~RoundRobin() override;
// Forward declaration.
class RoundRobinSubchannelList;
// Data for a particular subchannel in a subchannel list.
// This subclass adds the following functionality:
// - Tracks the previous connectivity state of the subchannel, so that
// we know how many subchannels are in each state.
class RoundRobinSubchannelData
: public SubchannelData<RoundRobinSubchannelList,
RoundRobinSubchannelData> {
class RoundRobinEndpointList : public EndpointList {
public:
RoundRobinSubchannelData(
SubchannelList<RoundRobinSubchannelList, RoundRobinSubchannelData>*
subchannel_list,
const ServerAddress& address,
RefCountedPtr<SubchannelInterface> subchannel)
: SubchannelData(subchannel_list, address, std::move(subchannel)) {}
absl::optional<grpc_connectivity_state> connectivity_state() const {
return logical_connectivity_state_;
RoundRobinEndpointList(RefCountedPtr<RoundRobin> round_robin,
const ServerAddressList& addresses,
const ChannelArgs& args)
: EndpointList(std::move(round_robin),
GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)
? "RoundRobinEndpointList"
: nullptr) {
Init(addresses, args,
[&](RefCountedPtr<RoundRobinEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args) {
return MakeOrphanable<RoundRobinEndpoint>(
std::move(endpoint_list), address, args,
policy<RoundRobin>()->work_serializer());
});
}
private:
// Performs connectivity state updates that need to be done only
// after we have started watching.
void ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) override;
// Updates the logical connectivity state.
void UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state);
// The logical connectivity state of the subchannel.
// Note that the logical connectivity state may differ from the
// actual reported state in some cases (e.g., after we see
// TRANSIENT_FAILURE, we ignore any subsequent state changes until
// we see READY).
absl::optional<grpc_connectivity_state> logical_connectivity_state_;
};
// A list of subchannels.
class RoundRobinSubchannelList
: public SubchannelList<RoundRobinSubchannelList,
RoundRobinSubchannelData> {
public:
RoundRobinSubchannelList(RoundRobin* policy, ServerAddressList addresses,
const ChannelArgs& args)
: SubchannelList(policy,
(GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)
? "RoundRobinSubchannelList"
: nullptr),
std::move(addresses), policy->channel_control_helper(),
args) {
// Need to maintain a ref to the LB policy as long as we maintain
// any references to subchannels, since the subchannels'
// pollset_sets will include the LB policy's pollset_set.
policy->Ref(DEBUG_LOCATION, "subchannel_list").release();
class RoundRobinEndpoint : public Endpoint {
public:
RoundRobinEndpoint(RefCountedPtr<RoundRobinEndpointList> endpoint_list,
const ServerAddress& address, const ChannelArgs& args,
std::shared_ptr<WorkSerializer> work_serializer)
: Endpoint(std::move(endpoint_list)) {
Init(address, args, std::move(work_serializer));
}
private:
// Called when the child policy reports a connectivity state update.
void OnStateUpdate(absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state,
const absl::Status& status) override;
};
LoadBalancingPolicy::ChannelControlHelper* channel_control_helper()
const override {
return policy<RoundRobin>()->channel_control_helper();
}
~RoundRobinSubchannelList() override {
RoundRobin* p = static_cast<RoundRobin*>(policy());
p->Unref(DEBUG_LOCATION, "subchannel_list");
}
// Updates the counters of subchannels in each state when a
// subchannel transitions from old_state to new_state.
// Updates the counters of children in each state when a
// child transitions from old_state to new_state.
void UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state);
// Ensures that the right subchannel list is used and then updates
// the RR policy's connectivity state based on the subchannel list's
// Ensures that the right child list is used and then updates
// the RR policy's connectivity state based on the child list's
// state counters.
void MaybeUpdateRoundRobinConnectivityStateLocked(
absl::Status status_for_tf);
private:
std::shared_ptr<WorkSerializer> work_serializer() const override {
return static_cast<RoundRobin*>(policy())->work_serializer();
}
std::string CountersString() const {
return absl::StrCat("num_subchannels=", num_subchannels(),
" num_ready=", num_ready_,
return absl::StrCat("num_children=", size(), " num_ready=", num_ready_,
" num_connecting=", num_connecting_,
" num_transient_failure=", num_transient_failure_);
}
@ -174,7 +140,9 @@ class RoundRobin : public LoadBalancingPolicy {
class Picker : public SubchannelPicker {
public:
Picker(RoundRobin* parent, RoundRobinSubchannelList* subchannel_list);
Picker(RoundRobin* parent,
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>>
pickers);
PickResult Pick(PickArgs args) override;
@ -183,18 +151,20 @@ class RoundRobin : public LoadBalancingPolicy {
RoundRobin* parent_;
std::atomic<size_t> last_picked_index_;
std::vector<RefCountedPtr<SubchannelInterface>> subchannels_;
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers_;
};
~RoundRobin() override;
void ShutdownLocked() override;
// List of subchannels.
RefCountedPtr<RoundRobinSubchannelList> subchannel_list_;
// Latest pending subchannel list.
// When we get an updated address list, we create a new subchannel list
// for it here, and we wait to swap it into subchannel_list_ until the new
// Current child list.
OrphanablePtr<RoundRobinEndpointList> endpoint_list_;
// Latest pending child list.
// When we get an updated address list, we create a new child list
// for it here, and we wait to swap it into endpoint_list_ until the new
// list becomes READY.
RefCountedPtr<RoundRobinSubchannelList> latest_pending_subchannel_list_;
OrphanablePtr<RoundRobinEndpointList> latest_pending_endpoint_list_;
bool shutdown_ = false;
@ -205,38 +175,32 @@ class RoundRobin : public LoadBalancingPolicy {
// RoundRobin::Picker
//
RoundRobin::Picker::Picker(RoundRobin* parent,
RoundRobinSubchannelList* subchannel_list)
: parent_(parent) {
for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) {
RoundRobinSubchannelData* sd = subchannel_list->subchannel(i);
if (sd->connectivity_state().value_or(GRPC_CHANNEL_IDLE) ==
GRPC_CHANNEL_READY) {
subchannels_.push_back(sd->subchannel()->Ref());
}
}
RoundRobin::Picker::Picker(
RoundRobin* parent,
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers)
: parent_(parent), pickers_(std::move(pickers)) {
// For discussion on why we generate a random starting index for
// the picker, see https://github.com/grpc/grpc-go/issues/2580.
size_t index =
absl::Uniform<size_t>(parent->bit_gen_, 0, subchannels_.size());
size_t index = absl::Uniform<size_t>(parent->bit_gen_, 0, pickers_.size());
last_picked_index_.store(index, std::memory_order_relaxed);
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p picker %p] created picker from subchannel_list=%p "
"with %" PRIuPTR " READY subchannels; last_picked_index_=%" PRIuPTR,
parent_, this, subchannel_list, subchannels_.size(), index);
"[RR %p picker %p] created picker from endpoint_list=%p "
"with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR,
parent_, this, parent_->endpoint_list_.get(), pickers_.size(),
index);
}
}
RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs /*args*/) {
RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs args) {
size_t index = last_picked_index_.fetch_add(1, std::memory_order_relaxed) %
subchannels_.size();
pickers_.size();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p picker %p] returning index %" PRIuPTR ", subchannel=%p",
parent_, this, index, subchannels_[index].get());
"[RR %p picker %p] using picker index %" PRIuPTR ", picker=%p",
parent_, this, index, pickers_[index].get());
}
return PickResult::Complete(subchannels_[index]);
return pickers_[index]->Pick(args);
}
//
@ -253,8 +217,8 @@ RoundRobin::~RoundRobin() {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this);
}
GPR_ASSERT(subchannel_list_ == nullptr);
GPR_ASSERT(latest_pending_subchannel_list_ == nullptr);
GPR_ASSERT(endpoint_list_ == nullptr);
GPR_ASSERT(latest_pending_endpoint_list_ == nullptr);
}
void RoundRobin::ShutdownLocked() {
@ -262,14 +226,14 @@ void RoundRobin::ShutdownLocked() {
gpr_log(GPR_INFO, "[RR %p] Shutting down", this);
}
shutdown_ = true;
subchannel_list_.reset();
latest_pending_subchannel_list_.reset();
endpoint_list_.reset();
latest_pending_endpoint_list_.reset();
}
void RoundRobin::ResetBackoffLocked() {
subchannel_list_->ResetBackoffLocked();
if (latest_pending_subchannel_list_ != nullptr) {
latest_pending_subchannel_list_->ResetBackoffLocked();
endpoint_list_->ResetBackoffLocked();
if (latest_pending_endpoint_list_ != nullptr) {
latest_pending_endpoint_list_->ResetBackoffLocked();
}
}
@ -286,28 +250,31 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) {
gpr_log(GPR_INFO, "[RR %p] received update with address error: %s", this,
args.addresses.status().ToString().c_str());
}
// If we already have a subchannel list, then keep using the existing
// If we already have a child list, then keep using the existing
// list, but still report back that the update was not accepted.
if (subchannel_list_ != nullptr) return args.addresses.status();
if (endpoint_list_ != nullptr) return args.addresses.status();
}
// Create new subchannel list, replacing the previous pending list, if any.
// Create new child list, replacing the previous pending list, if any.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) &&
latest_pending_subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous pending subchannel list %p",
this, latest_pending_subchannel_list_.get());
latest_pending_endpoint_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", this,
latest_pending_endpoint_list_.get());
}
latest_pending_subchannel_list_ = MakeRefCounted<RoundRobinSubchannelList>(
this, std::move(addresses), args.args);
latest_pending_subchannel_list_->StartWatchingLocked(args.args);
latest_pending_endpoint_list_ = MakeOrphanable<RoundRobinEndpointList>(
Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses),
args.args);
// If the new list is empty, immediately promote it to
// subchannel_list_ and report TRANSIENT_FAILURE.
if (latest_pending_subchannel_list_->num_subchannels() == 0) {
// endpoint_list_ and report TRANSIENT_FAILURE.
// TODO(roth): As part of adding dualstack backend support, we need to
// also handle the case where the list of addresses for a given
// endpoint is empty.
if (latest_pending_endpoint_list_->size() == 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) &&
subchannel_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous subchannel list %p", this,
subchannel_list_.get());
endpoint_list_ != nullptr) {
gpr_log(GPR_INFO, "[RR %p] replacing previous child list %p", this,
endpoint_list_.get());
}
subchannel_list_ = std::move(latest_pending_subchannel_list_);
endpoint_list_ = std::move(latest_pending_endpoint_list_);
absl::Status status =
args.addresses.ok() ? absl::UnavailableError(absl::StrCat(
"empty address list: ", args.resolution_note))
@ -318,26 +285,64 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) {
return status;
}
// Otherwise, if this is the initial update, immediately promote it to
// subchannel_list_.
if (subchannel_list_.get() == nullptr) {
subchannel_list_ = std::move(latest_pending_subchannel_list_);
// endpoint_list_.
if (endpoint_list_ == nullptr) {
endpoint_list_ = std::move(latest_pending_endpoint_list_);
}
return absl::OkStatus();
}
//
// RoundRobinSubchannelList
// RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint
//
void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state, const absl::Status& status) {
auto* rr_endpoint_list = endpoint_list<RoundRobinEndpointList>();
auto* round_robin = policy<RoundRobin>();
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] connectivity changed for child %p, endpoint_list %p "
"(index %" PRIuPTR " of %" PRIuPTR
"): prev_state=%s new_state=%s "
"(%s)",
round_robin, this, rr_endpoint_list, Index(),
rr_endpoint_list->size(),
(old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"),
ConnectivityStateName(new_state), status.ToString().c_str());
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection",
round_robin, this);
}
ExitIdleLocked();
}
// If state changed, update state counters.
if (!old_state.has_value() || *old_state != new_state) {
rr_endpoint_list->UpdateStateCountersLocked(old_state, new_state);
}
// Update the policy state.
rr_endpoint_list->MaybeUpdateRoundRobinConnectivityStateLocked(status);
}
//
// RoundRobin::RoundRobinEndpointList
//
void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked(
void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
// We treat IDLE the same as CONNECTING, since it will immediately
// transition into that state anyway.
if (old_state.has_value()) {
GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN);
if (*old_state == GRPC_CHANNEL_READY) {
GPR_ASSERT(num_ready_ > 0);
--num_ready_;
} else if (*old_state == GRPC_CHANNEL_CONNECTING) {
} else if (*old_state == GRPC_CHANNEL_CONNECTING ||
*old_state == GRPC_CHANNEL_IDLE) {
GPR_ASSERT(num_connecting_ > 0);
--num_connecting_;
} else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
@ -348,161 +353,90 @@ void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked(
GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN);
if (new_state == GRPC_CHANNEL_READY) {
++num_ready_;
} else if (new_state == GRPC_CHANNEL_CONNECTING) {
} else if (new_state == GRPC_CHANNEL_CONNECTING ||
new_state == GRPC_CHANNEL_IDLE) {
++num_connecting_;
} else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) {
++num_transient_failure_;
}
}
void RoundRobin::RoundRobinSubchannelList::
void RoundRobin::RoundRobinEndpointList::
MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) {
RoundRobin* p = static_cast<RoundRobin*>(policy());
// If this is latest_pending_subchannel_list_, then swap it into
// subchannel_list_ in the following cases:
// - subchannel_list_ has no READY subchannels.
// - This list has at least one READY subchannel and we have seen the
// initial connectivity state notification for all subchannels.
// - All of the subchannels in this list are in TRANSIENT_FAILURE.
auto* round_robin = policy<RoundRobin>();
// If this is latest_pending_endpoint_list_, then swap it into
// endpoint_list_ in the following cases:
// - endpoint_list_ has no READY children.
// - This list has at least one READY child and we have seen the
// initial connectivity state notification for all children.
// - All of the children in this list are in TRANSIENT_FAILURE.
// (This may cause the channel to go from READY to TRANSIENT_FAILURE,
// but we're doing what the control plane told us to do.)
if (p->latest_pending_subchannel_list_.get() == this &&
(p->subchannel_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllSubchannelsSeenInitialState()) ||
num_transient_failure_ == num_subchannels())) {
if (round_robin->latest_pending_endpoint_list_.get() == this &&
(round_robin->endpoint_list_->num_ready_ == 0 ||
(num_ready_ > 0 && AllEndpointsSeenInitialState()) ||
num_transient_failure_ == size())) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
const std::string old_counters_string =
p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString()
: "";
gpr_log(
GPR_INFO,
"[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", p,
p->subchannel_list_.get(), old_counters_string.c_str(), this,
CountersString().c_str());
round_robin->endpoint_list_ != nullptr
? round_robin->endpoint_list_->CountersString()
: "";
gpr_log(GPR_INFO,
"[RR %p] swapping out child list %p (%s) in favor of %p (%s)",
round_robin, round_robin->endpoint_list_.get(),
old_counters_string.c_str(), this, CountersString().c_str());
}
p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_);
round_robin->endpoint_list_ =
std::move(round_robin->latest_pending_endpoint_list_);
}
// Only set connectivity state if this is the current subchannel list.
if (p->subchannel_list_.get() != this) return;
// Only set connectivity state if this is the current child list.
if (round_robin->endpoint_list_.get() != this) return;
// FIXME: scan children each time instead of keeping counters?
// First matching rule wins:
// 1) ANY subchannel is READY => policy is READY.
// 2) ANY subchannel is CONNECTING => policy is CONNECTING.
// 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
// 1) ANY child is READY => policy is READY.
// 2) ANY child is CONNECTING => policy is CONNECTING.
// 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE.
if (num_ready_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", p,
this);
gpr_log(GPR_INFO, "[RR %p] reporting READY with child list %p",
round_robin, this);
}
p->channel_control_helper()->UpdateState(GRPC_CHANNEL_READY, absl::Status(),
MakeRefCounted<Picker>(p, this));
std::vector<RefCountedPtr<LoadBalancingPolicy::SubchannelPicker>> pickers;
for (const auto& endpoint : endpoints()) {
auto state = endpoint->connectivity_state();
if (state.has_value() && *state == GRPC_CHANNEL_READY) {
pickers.push_back(endpoint->picker());
}
}
GPR_ASSERT(!pickers.empty());
round_robin->channel_control_helper()->UpdateState(
GRPC_CHANNEL_READY, absl::OkStatus(),
MakeRefCounted<Picker>(round_robin, std::move(pickers)));
} else if (num_connecting_ > 0) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p",
p, this);
gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with child list %p",
round_robin, this);
}
p->channel_control_helper()->UpdateState(
round_robin->channel_control_helper()->UpdateState(
GRPC_CHANNEL_CONNECTING, absl::Status(),
MakeRefCounted<QueuePicker>(p->Ref(DEBUG_LOCATION, "QueuePicker")));
} else if (num_transient_failure_ == num_subchannels()) {
MakeRefCounted<QueuePicker>(nullptr));
} else if (num_transient_failure_ == size()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s",
p, this, status_for_tf.ToString().c_str());
"[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s",
round_robin, this, status_for_tf.ToString().c_str());
}
if (!status_for_tf.ok()) {
last_failure_ = absl::UnavailableError(
absl::StrCat("connections to all backends failing; last error: ",
status_for_tf.ToString()));
status_for_tf.message()));
}
p->channel_control_helper()->UpdateState(
round_robin->channel_control_helper()->UpdateState(
GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_,
MakeRefCounted<TransientFailurePicker>(last_failure_));
}
}
//
// RoundRobinSubchannelData
//
void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked(
absl::optional<grpc_connectivity_state> old_state,
grpc_connectivity_state new_state) {
RoundRobin* p = static_cast<RoundRobin*>(subchannel_list()->policy());
GPR_ASSERT(subchannel() != nullptr);
// If this is not the initial state notification and the new state is
// TRANSIENT_FAILURE or IDLE, re-resolve.
// Note that we don't want to do this on the initial state notification,
// because that would result in an endless loop of re-resolution.
if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE ||
new_state == GRPC_CHANNEL_IDLE)) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] Subchannel %p reported %s; requesting re-resolution", p,
subchannel(), ConnectivityStateName(new_state));
}
p->channel_control_helper()->RequestReresolution();
}
if (new_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] Subchannel %p reported IDLE; requesting connection", p,
subchannel());
}
subchannel()->RequestConnection();
}
// Update logical connectivity state.
UpdateLogicalConnectivityStateLocked(new_state);
// Update the policy state.
subchannel_list()->MaybeUpdateRoundRobinConnectivityStateLocked(
connectivity_status());
}
void RoundRobin::RoundRobinSubchannelData::UpdateLogicalConnectivityStateLocked(
grpc_connectivity_state connectivity_state) {
RoundRobin* p = static_cast<RoundRobin*>(subchannel_list()->policy());
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(
GPR_INFO,
"[RR %p] connectivity changed for subchannel %p, subchannel_list %p "
"(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels(),
(logical_connectivity_state_.has_value()
? ConnectivityStateName(*logical_connectivity_state_)
: "N/A"),
ConnectivityStateName(connectivity_state));
}
// Decide what state to report for aggregation purposes.
// If the last logical state was TRANSIENT_FAILURE, then ignore the
// state change unless the new state is READY.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE &&
connectivity_state != GRPC_CHANNEL_READY) {
return;
}
// If the new state is IDLE, treat it as CONNECTING, since it will
// immediately transition into CONNECTING anyway.
if (connectivity_state == GRPC_CHANNEL_IDLE) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) {
gpr_log(GPR_INFO,
"[RR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR
" of %" PRIuPTR "): treating IDLE as CONNECTING",
p, subchannel(), subchannel_list(), Index(),
subchannel_list()->num_subchannels());
}
connectivity_state = GRPC_CHANNEL_CONNECTING;
}
// If no change, return false.
if (logical_connectivity_state_.has_value() &&
*logical_connectivity_state_ == connectivity_state) {
return;
}
// Otherwise, update counters and logical state.
subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_,
connectivity_state);
logical_connectivity_state_ = connectivity_state;
}
//
// factory
//

@ -18,6 +18,7 @@
#include "src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h"
#include <inttypes.h>
#include <stddef.h>
#include <algorithm>
@ -430,7 +431,10 @@ void XdsOverrideHostLb::ResetBackoffLocked() {
absl::Status XdsOverrideHostLb::UpdateLocked(UpdateArgs args) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] Received update", this);
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] Received update with %" PRIuPTR
" addresses",
this, args.addresses.ok() ? args.addresses->size() : 0);
}
auto old_config = std::move(config_);
// Update config.
@ -498,6 +502,10 @@ OrphanablePtr<LoadBalancingPolicy> XdsOverrideHostLb::CreateChildPolicyLocked(
absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
absl::StatusOr<ServerAddressList> addresses) {
if (!addresses.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", this,
addresses.status().ToString().c_str());
}
return addresses;
}
ServerAddressList return_value;
@ -505,13 +513,30 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
for (const auto& address : *addresses) {
XdsHealthStatus status = GetAddressHealthStatus(address);
if (status.status() != XdsHealthStatus::kDraining) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: not draining, "
"passing to child",
this, address.ToString().c_str());
}
return_value.push_back(address);
} else if (!config_->override_host_status_set().Contains(status)) {
// Skip draining hosts if not in the override status set.
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: draining but not in "
"override_host_status set -- ignoring",
this, address.ToString().c_str());
}
continue;
}
auto key = grpc_sockaddr_to_uri(&address.address());
if (key.ok()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] address %s: adding map key %s", this,
address.ToString().c_str(), key->c_str());
}
addresses_for_map.emplace(std::move(*key), status);
}
}
@ -519,6 +544,10 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
MutexLock lock(&subchannel_map_mu_);
for (auto it = subchannel_map_.begin(); it != subchannel_map_.end();) {
if (addresses_for_map.find(it->first) == addresses_for_map.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] removing map key %s",
this, it->first.c_str());
}
it = subchannel_map_.erase(it);
} else {
++it;
@ -527,10 +556,20 @@ absl::StatusOr<ServerAddressList> XdsOverrideHostLb::UpdateAddressMap(
for (const auto& key_status : addresses_for_map) {
auto it = subchannel_map_.find(key_status.first);
if (it == subchannel_map_.end()) {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", this,
key_status.first.c_str());
}
subchannel_map_.emplace(std::piecewise_construct,
std::forward_as_tuple(key_status.first),
std::forward_as_tuple(key_status.second));
} else {
if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) {
gpr_log(GPR_INFO,
"[xds_override_host_lb %p] setting EDS health status for "
"%s to %s",
this, key_status.first.c_str(), key_status.second.ToString());
}
it->second.SetEdsHealthStatus(key_status.second);
}
}

@ -212,6 +212,8 @@ class Subchannel : public DualRefCounted<Subchannel> {
channelz::SubchannelNode* channelz_node();
const grpc_resolved_address& address() const { return key_.address(); }
// Starts watching the subchannel's connectivity state.
// The first callback to the watcher will be delivered ~immediately.
// Subsequent callbacks will be delivered as the subchannel's state

@ -33,6 +33,7 @@ CORE_SOURCE_FILES = [
'src/core/ext/filters/client_channel/http_proxy.cc',
'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc',
'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc',
'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc',
'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc',

@ -57,8 +57,10 @@
#include "src/core/ext/filters/client_channel/client_channel_internal.h"
#include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h"
#include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h"
#include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h"
#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h"
#include "src/core/ext/filters/client_channel/subchannel_pool_interface.h"
#include "src/core/lib/address_utils/parse_address.h"
#include "src/core/lib/address_utils/sockaddr_utils.h"
@ -111,7 +113,10 @@ class LoadBalancingPolicyTest : public ::testing::Test {
~FakeSubchannel() override {
if (orca_watcher_ != nullptr) {
MutexLock lock(&state_->backend_metric_watcher_mu_);
state_->watchers_.erase(orca_watcher_.get());
state_->orca_watchers_.erase(orca_watcher_.get());
}
for (const auto& p : watcher_map_) {
state_->state_tracker_.RemoveWatcher(p.second);
}
}
@ -121,6 +126,11 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Converts between
// SubchannelInterface::ConnectivityStateWatcherInterface and
// ConnectivityStateWatcherInterface.
//
// We support both unique_ptr<> and shared_ptr<>, since raw
// connectivity watches use the latter but health watches use the
// former.
// TODO(roth): Clean this up.
class WatcherWrapper : public AsyncConnectivityStateWatcherInterface {
public:
WatcherWrapper(
@ -132,33 +142,59 @@ class LoadBalancingPolicyTest : public ::testing::Test {
std::move(work_serializer)),
watcher_(std::move(watcher)) {}
WatcherWrapper(
std::shared_ptr<WorkSerializer> work_serializer,
std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
watcher)
: AsyncConnectivityStateWatcherInterface(
std::move(work_serializer)),
watcher_(std::move(watcher)) {}
void OnConnectivityStateChange(grpc_connectivity_state new_state,
const absl::Status& status) override {
watcher_->OnConnectivityStateChange(new_state, status);
watcher()->OnConnectivityStateChange(new_state, status);
}
private:
std::unique_ptr<SubchannelInterface::ConnectivityStateWatcherInterface>
SubchannelInterface::ConnectivityStateWatcherInterface* watcher()
const {
return Match(
watcher_,
[](const std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); },
[](const std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>&
watcher) { return watcher.get(); });
}
absl::variant<
std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>,
std::shared_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>>
watcher_;
};
void WatchConnectivityState(
std::unique_ptr<
SubchannelInterface::ConnectivityStateWatcherInterface>
watcher) override {
watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
auto* watcher_ptr = watcher.get();
auto watcher_wrapper = MakeOrphanable<WatcherWrapper>(
work_serializer_, std::move(watcher));
watcher_map_[watcher.get()] = watcher_wrapper.get();
MutexLock lock(&state_->mu_);
watcher_map_[watcher_ptr] = watcher_wrapper.get();
state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN,
std::move(watcher_wrapper));
}
void CancelConnectivityStateWatch(
ConnectivityStateWatcherInterface* watcher) override {
ConnectivityStateWatcherInterface* watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
auto it = watcher_map_.find(watcher);
if (it == watcher_map_.end()) return;
MutexLock lock(&state_->mu_);
state_->state_tracker_.RemoveWatcher(it->second);
watcher_map_.erase(it);
}
@ -168,19 +204,56 @@ class LoadBalancingPolicyTest : public ::testing::Test {
state_->requested_connection_ = true;
}
void AddDataWatcher(
std::unique_ptr<DataWatcherInterface> watcher) override {
void AddDataWatcher(std::unique_ptr<DataWatcherInterface> watcher)
override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
MutexLock lock(&state_->backend_metric_watcher_mu_);
GPR_ASSERT(orca_watcher_ == nullptr);
orca_watcher_.reset(static_cast<OrcaWatcher*>(watcher.release()));
state_->watchers_.insert(orca_watcher_.get());
auto* w =
static_cast<InternalSubchannelDataWatcherInterface*>(watcher.get());
if (w->type() == OrcaProducer::Type()) {
GPR_ASSERT(orca_watcher_ == nullptr);
orca_watcher_.reset(static_cast<OrcaWatcher*>(watcher.release()));
state_->orca_watchers_.insert(orca_watcher_.get());
} else if (w->type() == HealthProducer::Type()) {
// TODO(roth): Support health checking in test framework.
// For now, we just hard-code this to the raw connectivity state.
GPR_ASSERT(health_watcher_ == nullptr);
GPR_ASSERT(health_watcher_wrapper_ == nullptr);
health_watcher_.reset(static_cast<HealthWatcher*>(watcher.release()));
auto connectivity_watcher = health_watcher_->TakeWatcher();
auto* connectivity_watcher_ptr = connectivity_watcher.get();
auto watcher_wrapper = MakeOrphanable<WatcherWrapper>(
work_serializer_, std::move(connectivity_watcher));
health_watcher_wrapper_ = watcher_wrapper.get();
state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN,
std::move(watcher_wrapper));
gpr_log(GPR_INFO,
"AddDataWatcher(): added HealthWatch=%p "
"connectivity_watcher=%p watcher_wrapper=%p",
health_watcher_.get(), connectivity_watcher_ptr,
health_watcher_wrapper_);
}
}
void CancelDataWatcher(DataWatcherInterface* watcher) override {
void CancelDataWatcher(DataWatcherInterface* watcher) override
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) {
MutexLock lock(&state_->backend_metric_watcher_mu_);
if (orca_watcher_.get() != static_cast<OrcaWatcher*>(watcher)) return;
state_->watchers_.erase(orca_watcher_.get());
orca_watcher_.reset();
auto* w = static_cast<InternalSubchannelDataWatcherInterface*>(watcher);
if (w->type() == OrcaProducer::Type()) {
if (orca_watcher_.get() != static_cast<OrcaWatcher*>(watcher)) return;
state_->orca_watchers_.erase(orca_watcher_.get());
orca_watcher_.reset();
} else if (w->type() == HealthProducer::Type()) {
if (health_watcher_.get() != static_cast<HealthWatcher*>(watcher)) {
return;
}
gpr_log(GPR_INFO,
"CancelDataWatcher(): cancelling HealthWatch=%p "
"watcher_wrapper=%p",
health_watcher_.get(), health_watcher_wrapper_);
state_->state_tracker_.RemoveWatcher(health_watcher_wrapper_);
health_watcher_wrapper_ = nullptr;
health_watcher_.reset();
}
}
// Don't need this method, so it's a no-op.
@ -191,11 +264,16 @@ class LoadBalancingPolicyTest : public ::testing::Test {
std::map<SubchannelInterface::ConnectivityStateWatcherInterface*,
WatcherWrapper*>
watcher_map_;
std::unique_ptr<HealthWatcher> health_watcher_;
WatcherWrapper* health_watcher_wrapper_ = nullptr;
std::unique_ptr<OrcaWatcher> orca_watcher_;
};
explicit SubchannelState(absl::string_view address)
: address_(address), state_tracker_("LoadBalancingPolicyTest") {}
SubchannelState(absl::string_view address,
std::shared_ptr<WorkSerializer> work_serializer)
: address_(address),
work_serializer_(std::move(work_serializer)),
state_tracker_("LoadBalancingPolicyTest") {}
const std::string& address() const { return address_; }
@ -250,10 +328,14 @@ class LoadBalancingPolicyTest : public ::testing::Test {
<< "bug in test: " << ConnectivityStateName(state)
<< " must have OK status: " << status;
}
MutexLock lock(&mu_);
AssertValidConnectivityStateTransition(state_tracker_.state(), state,
location);
state_tracker_.SetState(state, status, "set from test");
work_serializer_->Run(
[this, state, status, location]()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_) {
AssertValidConnectivityStateTransition(state_tracker_.state(),
state, location);
state_tracker_.SetState(state, status, "set from test");
},
DEBUG_LOCATION);
}
// Indicates if any of the associated SubchannelInterface objects
@ -273,7 +355,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Sends an OOB backend metric report to all watchers.
void SendOobBackendMetricReport(const BackendMetricData& backend_metrics) {
MutexLock lock(&backend_metric_watcher_mu_);
for (const auto* watcher : watchers_) {
for (const auto* watcher : orca_watchers_) {
watcher->watcher()->OnBackendMetricReport(backend_metrics);
}
}
@ -282,7 +364,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
void CheckOobReportingPeriod(Duration expected,
SourceLocation location = SourceLocation()) {
MutexLock lock(&backend_metric_watcher_mu_);
for (const auto* watcher : watchers_) {
for (const auto* watcher : orca_watchers_) {
EXPECT_EQ(watcher->report_interval(), expected)
<< location.file() << ":" << location.line();
}
@ -290,16 +372,15 @@ class LoadBalancingPolicyTest : public ::testing::Test {
private:
const std::string address_;
Mutex mu_;
ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(&mu_);
std::shared_ptr<WorkSerializer> work_serializer_;
ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(*work_serializer_);
Mutex requested_connection_mu_;
bool requested_connection_ ABSL_GUARDED_BY(&requested_connection_mu_) =
false;
Mutex backend_metric_watcher_mu_;
std::set<OrcaWatcher*> watchers_
std::set<OrcaWatcher*> orca_watchers_
ABSL_GUARDED_BY(&backend_metric_watcher_mu_);
};
@ -417,7 +498,8 @@ class LoadBalancingPolicyTest : public ::testing::Test {
GPR_ASSERT(address_uri.ok());
it = test_->subchannel_pool_
.emplace(std::piecewise_construct, std::forward_as_tuple(key),
std::forward_as_tuple(std::move(*address_uri)))
std::forward_as_tuple(std::move(*address_uri),
work_serializer_))
.first;
}
return it->second.CreateSubchannel(work_serializer_);
@ -927,7 +1009,6 @@ class LoadBalancingPolicyTest : public ::testing::Test {
// Expect startup with RR with a set of addresses.
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> ExpectRoundRobinStartup(
absl::Span<const absl::string_view> addresses) {
ExpectConnectingUpdate();
RefCountedPtr<LoadBalancingPolicy::SubchannelPicker> picker;
for (size_t i = 0; i < addresses.size(); ++i) {
auto* subchannel = FindSubchannel(addresses[i]);
@ -935,6 +1016,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
if (subchannel == nullptr) return nullptr;
EXPECT_TRUE(subchannel->ConnectionRequested());
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
if (i == 0) ExpectConnectingUpdate();
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
if (i == 0) {
picker = WaitForConnected();
@ -1004,7 +1086,7 @@ class LoadBalancingPolicyTest : public ::testing::Test {
SubchannelKey key(MakeAddress(address), args);
auto it = subchannel_pool_
.emplace(std::piecewise_construct, std::forward_as_tuple(key),
std::forward_as_tuple(address))
std::forward_as_tuple(address, work_serializer_))
.first;
return &it->second;
}

@ -37,7 +37,6 @@
#include <grpc/support/log.h>
#include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h"
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
#include "src/core/lib/gprpp/time.h"
@ -184,8 +183,6 @@ TEST_F(OutlierDetectionTest, Basic) {
absl::Status status = ApplyUpdate(
BuildUpdate({kAddressUri}, ConfigBuilder().Build()), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have reported CONNECTING state.
ExpectConnectingUpdate();
// LB policy should have created a subchannel for the address.
auto* subchannel = FindSubchannel(kAddressUri);
ASSERT_NE(subchannel, nullptr);
@ -194,6 +191,8 @@ TEST_F(OutlierDetectionTest, Basic) {
EXPECT_TRUE(subchannel->ConnectionRequested());
// This causes the subchannel to start to connect, so it reports CONNECTING.
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
// LB policy should have reported CONNECTING state.
ExpectConnectingUpdate();
// When the subchannel becomes connected, it reports READY.
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
// The LB policy will report CONNECTING some number of times (doesn't
@ -230,8 +229,6 @@ TEST_F(OutlierDetectionTest, FailurePercentage) {
time_cache_.IncrementBy(Duration::Seconds(10));
RunTimerCallback();
gpr_log(GPR_INFO, "### ejection complete");
// Expect a re-resolution request.
ExpectReresolutionRequest();
// Expect a picker update.
std::vector<absl::string_view> remaining_addresses;
for (const auto& addr : kAddresses) {
@ -254,10 +251,8 @@ TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) {
.Build()),
lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for the first address with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
// LB policy should have created a subchannel for the first address.
auto* subchannel = FindSubchannel(kAddresses[0]);
ASSERT_NE(subchannel, nullptr);
// When the LB policy receives the subchannel's initial connectivity
// state notification (IDLE), it will request a connection.

@ -33,7 +33,6 @@
#include <grpc/grpc.h>
#include <grpc/support/json.h>
#include "src/core/lib/channel/channel_args.h"
#include "src/core/lib/gprpp/debug_location.h"
#include "src/core/lib/gprpp/orphanable.h"
#include "src/core/lib/gprpp/ref_counted_ptr.h"
@ -74,8 +73,7 @@ class PickFirstTest : public LoadBalancingPolicyTest {
// We will remove entries as each subchannel starts to connect.
std::map<SubchannelState*, absl::string_view> subchannels;
for (auto address : addresses) {
auto* subchannel = FindSubchannel(
address, ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
auto* subchannel = FindSubchannel(address);
ASSERT_NE(subchannel, nullptr);
subchannels.emplace(subchannel, address);
}
@ -136,13 +134,10 @@ TEST_F(PickFirstTest, FirstAddressWorks) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
auto* subchannel2 = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.
@ -172,13 +167,10 @@ TEST_F(PickFirstTest, FirstAddressFails) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
auto* subchannel2 = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.
@ -217,13 +209,10 @@ TEST_F(PickFirstTest, GoesIdleWhenConnectionFailsThenCanReconnect) {
absl::Status status = ApplyUpdate(
BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy_.get());
EXPECT_TRUE(status.ok()) << status;
// LB policy should have created a subchannel for both addresses with
// the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg.
auto* subchannel = FindSubchannel(
kAddresses[0], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
// LB policy should have created a subchannel for both addresses.
auto* subchannel = FindSubchannel(kAddresses[0]);
ASSERT_NE(subchannel, nullptr);
auto* subchannel2 = FindSubchannel(
kAddresses[1], ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true));
auto* subchannel2 = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel2, nullptr);
// When the LB policy receives the first subchannel's initial connectivity
// state notification (IDLE), it will request a connection.

@ -42,8 +42,6 @@ class RoundRobinTest : public LoadBalancingPolicyTest {
void ExpectStartup(absl::Span<const absl::string_view> addresses) {
EXPECT_EQ(ApplyUpdate(BuildUpdate(addresses, nullptr), lb_policy_.get()),
absl::OkStatus());
// Expect the initial CONNECTNG update with a picker that queues.
ExpectConnectingUpdate();
// RR should have created a subchannel for each address.
for (size_t i = 0; i < addresses.size(); ++i) {
auto* subchannel = FindSubchannel(addresses[i]);
@ -52,6 +50,8 @@ class RoundRobinTest : public LoadBalancingPolicyTest {
EXPECT_TRUE(subchannel->ConnectionRequested());
// The subchannel will connect successfully.
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
// Expect the initial CONNECTNG update with a picker that queues.
if (i == 0) ExpectConnectingUpdate();
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
// As each subchannel becomes READY, we should get a new picker that
// includes the behavior. Note that there may be any number of

@ -29,6 +29,7 @@
#include <grpc/grpc.h>
#include <grpc/support/json.h>
#include <grpc/support/log.h>
#include "src/core/ext/filters/stateful_session/stateful_session_filter.h"
#include "src/core/ext/xds/xds_health_status.h"
@ -209,18 +210,31 @@ TEST_F(XdsOverrideHostTest, FailedSubchannelIsNotPicked) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Subchannel for address 1 becomes disconnected.
gpr_log(GPR_INFO, "### subchannel 1 reporting IDLE");
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
gpr_log(GPR_INFO,
"### expecting RR picks to exclude the disconnected subchannel");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// It starts trying to reconnect...
gpr_log(GPR_INFO, "### subchannel 1 reporting CONNECTING");
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
gpr_log(GPR_INFO, "### expecting RR picks again");
ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(),
{kAddresses[0], kAddresses[2]});
// ...but the connection attempt fails.
gpr_log(GPR_INFO, "### subchannel 1 reporting TRANSIENT_FAILURE");
subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE,
absl::ResourceExhaustedError("Hmmmm"));
gpr_log(GPR_INFO, "### expecting re-resolution request");
ExpectReresolutionRequest();
// The host override is not used.
gpr_log(GPR_INFO, "### checking that host override is not used");
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]},
MakeOverrideHostAttribute(kAddresses[1]));
@ -292,6 +306,12 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) {
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
// Send an update that marks the endpoints with different EDS health
// states, but those states are present in override_host_status.
// The picker should use the DRAINING host when a call's override
// points to that hose, but the host should not be used if there is no
// override pointing to it.
gpr_log(GPR_INFO, "### sending update with DRAINING host");
ApplyUpdateWithHealthStatuses(
{{kAddresses[0], XdsHealthStatus::HealthStatus::kUnknown},
{kAddresses[1], XdsHealthStatus::HealthStatus::kDraining},
@ -299,23 +319,35 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) {
{"UNKNOWN", "HEALTHY", "DRAINING"});
auto subchannel = FindSubchannel(kAddresses[1]);
ASSERT_NE(subchannel, nullptr);
// There are two notifications - one from child policy and one from the parent
// policy due to draining channel update
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),
MakeOverrideHostAttribute(kAddresses[1])),
kAddresses[1]);
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// Now the connection to the draining host gets dropped.
// The picker should queue picks where the override host is IDLE.
// All picks without an override host should not use this host.
gpr_log(GPR_INFO, "### closing connection to DRAINING host");
subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel should have been asked to reconnect as a result of the
// queued pick above. It will therefore transition into state CONNECTING.
// The pick behavior is the same as above: The picker should queue
// picks where the override host is CONNECTING. All picks without an
// override host should not use this host.
gpr_log(GPR_INFO, "### subchannel starts reconnecting");
EXPECT_TRUE(subchannel->ConnectionRequested());
ExpectQueueEmpty();
subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING);
picker = ExpectState(GRPC_CHANNEL_READY);
ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1]));
ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]});
// The subchannel now becomes connected again.
// Now picks with this override host can be completed again.
// Picks without an override host still don't use the draining host.
gpr_log(GPR_INFO, "### subchannel becomes reconnected");
subchannel->SetConnectivityState(GRPC_CHANNEL_READY);
picker = ExpectState(GRPC_CHANNEL_READY);
EXPECT_EQ(ExpectPickComplete(picker.get(),

@ -2080,7 +2080,8 @@ TEST_F(RoundRobinTest, HealthChecking) {
EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"UNAVAILABLE: backend unhealthy");
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
// Clean up.
EnableDefaultHealthCheckService(false);
}
@ -2138,7 +2139,8 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) {
EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1));
CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"UNAVAILABLE: backend unhealthy");
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
// Second channel should be READY.
EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1));
CheckRpcSendOk(DEBUG_LOCATION, stub2);
@ -2183,7 +2185,8 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) {
EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1));
CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE,
"connections to all backends failing; last error: "
"UNAVAILABLE: backend unhealthy");
"(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: "
"backend unhealthy");
// Second channel should be READY.
EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1));
CheckRpcSendOk(DEBUG_LOCATION, stub2);

@ -1112,6 +1112,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \
src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
@ -1131,6 +1133,7 @@ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \
src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \
src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h \
src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \
src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h \
src/core/ext/filters/client_channel/lb_policy/priority/priority.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \

@ -918,6 +918,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \
src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \
src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \
src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \
src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \
src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \
@ -937,6 +939,7 @@ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \
src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \
src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h \
src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \
src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h \
src/core/ext/filters/client_channel/lb_policy/priority/priority.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \
src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \

Loading…
Cancel
Save