From 57ff0b890fbc95672c198d6840f863ef42e683b5 Mon Sep 17 00:00:00 2001 From: Eugene Ostroukhov Date: Tue, 12 Mar 2024 13:21:54 -0700 Subject: [PATCH 1/4] [xds] Support for multiple servers in the bootstrap.json (#36021) Closes #36021 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/36021 from eugeneo:tasks/multiple-servers-bootstrap 8ebf7e1c064f2d5215f967d47cc1493cbbf12049 PiperOrigin-RevId: 615150672 --- src/core/ext/xds/xds_bootstrap.h | 6 +- src/core/ext/xds/xds_bootstrap_grpc.cc | 35 ++++---- src/core/ext/xds/xds_bootstrap_grpc.h | 19 ++++- src/core/ext/xds/xds_client.cc | 5 +- test/core/memory_usage/memory_usage_test.cc | 2 +- test/core/xds/xds_bootstrap_test.cc | 85 ++++++++++--------- test/core/xds/xds_client_fuzzer.cc | 8 +- test/core/xds/xds_client_test.cc | 50 ++++++----- .../xds/xds_cluster_resource_type_test.cc | 5 +- test/core/xds/xds_common_types_test.cc | 3 +- .../xds/xds_endpoint_resource_type_test.cc | 3 +- .../xds/xds_listener_resource_type_test.cc | 3 +- .../xds_route_config_resource_type_test.cc | 3 +- .../end2end/xds/xds_cluster_end2end_test.cc | 2 +- test/cpp/end2end/xds/xds_core_end2end_test.cc | 28 +++--- test/cpp/end2end/xds/xds_csds_end2end_test.cc | 2 +- test/cpp/end2end/xds/xds_end2end_test.cc | 9 +- test/cpp/end2end/xds/xds_end2end_test_lib.cc | 10 +-- test/cpp/end2end/xds/xds_end2end_test_lib.h | 7 +- .../end2end/xds/xds_routing_end2end_test.cc | 2 +- test/cpp/end2end/xds/xds_utils.cc | 25 +++--- test/cpp/end2end/xds/xds_utils.h | 11 +-- 22 files changed, 181 insertions(+), 142 deletions(-) diff --git a/src/core/ext/xds/xds_bootstrap.h b/src/core/ext/xds/xds_bootstrap.h index 8a1a8214f65..5754319ccfd 100644 --- a/src/core/ext/xds/xds_bootstrap.h +++ b/src/core/ext/xds/xds_bootstrap.h @@ -65,16 +65,14 @@ class XdsBootstrap { public: virtual ~Authority() = default; - virtual const XdsServer* server() const = 0; + virtual std::vector servers() const = 0; }; virtual ~XdsBootstrap() = default; virtual std::string ToString() const = 0; - // TODO(roth): We currently support only one server. Fix this when we - // add support for fallback for the xds channel. - virtual const XdsServer& server() const = 0; + virtual std::vector servers() const = 0; // Returns the node information, or null if not present in the bootstrap // config. diff --git a/src/core/ext/xds/xds_bootstrap_grpc.cc b/src/core/ext/xds/xds_bootstrap_grpc.cc index dd2b3da3c87..553866d667b 100644 --- a/src/core/ext/xds/xds_bootstrap_grpc.cc +++ b/src/core/ext/xds/xds_bootstrap_grpc.cc @@ -14,17 +14,25 @@ // limitations under the License. // -#include - #include "src/core/ext/xds/xds_bootstrap_grpc.h" +#include +#include #include #include #include +#include #include #include +#include "src/core/lib/config/core_configuration.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/json/json.h" +#include "src/core/lib/json/json_object_loader.h" +#include "src/core/lib/json/json_reader.h" +#include "src/core/lib/json/json_writer.h" +#include "src/core/lib/security/credentials/channel_creds_registry.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/match.h" @@ -34,16 +42,6 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include - -#include "src/core/lib/config/core_configuration.h" -#include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/json/json.h" -#include "src/core/lib/json/json_object_loader.h" -#include "src/core/lib/json/json_reader.h" -#include "src/core/lib/json/json_writer.h" -#include "src/core/lib/security/credentials/channel_creds_registry.h" - namespace grpc_core { // @@ -330,11 +328,14 @@ std::string GrpcXdsBootstrap::ToString() const { parts.push_back( absl::StrFormat(" client_listener_resource_name_template=\"%s\",\n", entry.second.client_listener_resource_name_template())); - if (entry.second.server() != nullptr) { - parts.push_back(absl::StrFormat( - " servers=[\n%s\n],\n", - JsonDump(static_cast(entry.second.server()) - ->ToJson()))); + std::vector server_jsons; + for (const XdsServer* server : entry.second.servers()) { + server_jsons.emplace_back( + JsonDump(static_cast(server)->ToJson())); + } + if (!server_jsons.empty()) { + parts.push_back(absl::StrFormat(" servers=[\n%s\n],\n", + absl::StrJoin(server_jsons, ",\n"))); } parts.push_back(" },\n"); } diff --git a/src/core/ext/xds/xds_bootstrap_grpc.h b/src/core/ext/xds/xds_bootstrap_grpc.h index e3506833c65..13c6c8ac083 100644 --- a/src/core/ext/xds/xds_bootstrap_grpc.h +++ b/src/core/ext/xds/xds_bootstrap_grpc.h @@ -104,8 +104,13 @@ class GrpcXdsBootstrap : public XdsBootstrap { class GrpcAuthority : public Authority { public: - const XdsServer* server() const override { - return servers_.empty() ? nullptr : &servers_[0]; + std::vector servers() const override { + std::vector servers; + servers.reserve(servers_.size()); + for (const auto& server : servers_) { + servers.emplace_back(&server); + } + return servers; } const std::string& client_listener_resource_name_template() const { @@ -129,7 +134,15 @@ class GrpcXdsBootstrap : public XdsBootstrap { std::string ToString() const override; - const XdsServer& server() const override { return servers_[0]; } + std::vector servers() const override { + std::vector servers; + servers.reserve(servers_.size()); + for (const auto& server : servers_) { + servers.emplace_back(&server); + } + return servers; + } + const Node* node() const override { return node_.has_value() ? &*node_ : nullptr; } diff --git a/src/core/ext/xds/xds_client.cc b/src/core/ext/xds/xds_client.cc index 08493a414ac..c9c65ab3e51 100644 --- a/src/core/ext/xds/xds_client.cc +++ b/src/core/ext/xds/xds_client.cc @@ -1584,9 +1584,10 @@ void XdsClient::WatchResource(const XdsResourceType* type, "\" not present in bootstrap config"))); return; } - xds_server = authority->server(); + xds_server = + authority->servers().empty() ? nullptr : authority->servers().front(); } - if (xds_server == nullptr) xds_server = &bootstrap_->server(); + if (xds_server == nullptr) xds_server = bootstrap_->servers().front(); { MutexLock lock(&mu_); MaybeRegisterResourceTypeLocked(type); diff --git a/test/core/memory_usage/memory_usage_test.cc b/test/core/memory_usage/memory_usage_test.cc index 5b178b6d7f4..1ca0fc4aca5 100644 --- a/test/core/memory_usage/memory_usage_test.cc +++ b/test/core/memory_usage/memory_usage_test.cc @@ -210,7 +210,7 @@ XdsServer StartXdsServerAndConfigureBootstrap( // Generate xDS bootstrap and set the env var. std::string bootstrap = grpc::testing::XdsBootstrapBuilder() - .SetDefaultServer(absl::StrCat("localhost:", xds_server_port)) + .SetServers({absl::StrCat("localhost:", xds_server_port)}) .SetXdsChannelCredentials("insecure") .Build(); grpc_core::SetEnv("GRPC_XDS_BOOTSTRAP_CONFIG", bootstrap); diff --git a/test/core/xds/xds_bootstrap_test.cc b/test/core/xds/xds_bootstrap_test.cc index 749d44769e8..0749a9006e3 100644 --- a/test/core/xds/xds_bootstrap_test.cc +++ b/test/core/xds/xds_bootstrap_test.cc @@ -55,12 +55,30 @@ namespace grpc_core { namespace testing { namespace { +MATCHER_P2(EqXdsServer, name, creds_config_type, "equals XdsServer") { + auto* server = static_cast(arg); + if (!::testing::ExplainMatchResult(::testing::Ne(nullptr), server, + result_listener)) { + return false; + } + bool ok = ::testing::ExplainMatchResult(name, server->server_uri(), + result_listener); + auto creds_config = server->channel_creds_config(); + if (!::testing::ExplainMatchResult(::testing::Ne(nullptr), creds_config, + result_listener)) { + return false; + } + ok |= ::testing::ExplainMatchResult(creds_config_type, creds_config->type(), + result_listener); + return ok; +} + TEST(XdsBootstrapTest, Basic) { const char* json_str = "{" " \"xds_servers\": [" " {" - " \"server_uri\": \"fake:///lb\"," + " \"server_uri\": \"fake:///lb1\"," " \"channel_creds\": [" " {" " \"type\": \"fake\"," @@ -70,14 +88,11 @@ TEST(XdsBootstrapTest, Basic) { " \"ignore\": 0" " }," " {" - " \"server_uri\": \"ignored\"," + " \"server_uri\": \"fake:///lb2\"," " \"channel_creds\": [" " {" - " \"type\": \"ignored\"," + " \"type\": \"fake\"," " \"ignore\": 0" - " }," - " {" - " \"type\": \"fake\"" " }" " ]," " \"ignore\": 0" @@ -97,6 +112,15 @@ TEST(XdsBootstrapTest, Basic) { " }" " ]," " \"server_features\": [\"xds_v3\"]" + " }," + " {" + " \"server_uri\": \"fake:///xds_server2\"," + " \"channel_creds\": [" + " {" + " \"type\": \"fake\"" + " }" + " ]," + " \"server_features\": [\"xds_v3\"]" " }" " ]" " }," @@ -106,7 +130,7 @@ TEST(XdsBootstrapTest, Basic) { "server/%s\"," " \"xds_servers\": [" " {" - " \"server_uri\": \"fake:///xds_server2\"," + " \"server_uri\": \"fake:///xds_server3\"," " \"channel_creds\": [" " {" " \"type\": \"fake\"" @@ -138,11 +162,9 @@ TEST(XdsBootstrapTest, Basic) { auto bootstrap_or = GrpcXdsBootstrap::Create(json_str); ASSERT_TRUE(bootstrap_or.ok()) << bootstrap_or.status(); auto bootstrap = std::move(*bootstrap_or); - auto* server = - &static_cast(bootstrap->server()); - EXPECT_EQ(server->server_uri(), "fake:///lb"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "fake"); + EXPECT_THAT(bootstrap->servers(), + ::testing::ElementsAre(EqXdsServer("fake:///lb1", "fake"), + EqXdsServer("fake:///lb2", "fake"))); EXPECT_EQ(bootstrap->authorities().size(), 2); auto* authority = static_cast( bootstrap->LookupAuthority("xds.example.com")); @@ -150,24 +172,18 @@ TEST(XdsBootstrapTest, Basic) { EXPECT_EQ(authority->client_listener_resource_name_template(), "xdstp://xds.example.com/envoy.config.listener.v3.Listener/grpc/" "server/%s"); - server = - static_cast(authority->server()); - ASSERT_NE(server, nullptr); - EXPECT_EQ(server->server_uri(), "fake:///xds_server"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "fake"); + EXPECT_THAT( + authority->servers(), + ::testing::ElementsAre(EqXdsServer("fake:///xds_server", "fake"), + EqXdsServer("fake:///xds_server2", "fake"))); authority = static_cast( bootstrap->LookupAuthority("xds.example2.com")); ASSERT_NE(authority, nullptr); EXPECT_EQ(authority->client_listener_resource_name_template(), "xdstp://xds.example2.com/envoy.config.listener.v3.Listener/grpc/" "server/%s"); - server = - static_cast(authority->server()); - ASSERT_NE(server, nullptr); - EXPECT_EQ(server->server_uri(), "fake:///xds_server2"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "fake"); + EXPECT_THAT(authority->servers(), ::testing::ElementsAre(EqXdsServer( + "fake:///xds_server3", "fake"))); ASSERT_NE(bootstrap->node(), nullptr); EXPECT_EQ(bootstrap->node()->id(), "foo"); EXPECT_EQ(bootstrap->node()->cluster(), "bar"); @@ -203,11 +219,8 @@ TEST(XdsBootstrapTest, ValidWithoutNode) { auto bootstrap_or = GrpcXdsBootstrap::Create(json_str); ASSERT_TRUE(bootstrap_or.ok()) << bootstrap_or.status(); auto bootstrap = std::move(*bootstrap_or); - auto* server = - &static_cast(bootstrap->server()); - EXPECT_EQ(server->server_uri(), "fake:///lb"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "fake"); + EXPECT_THAT(bootstrap->servers(), + ::testing::ElementsAre(EqXdsServer("fake:///lb", "fake"))); EXPECT_EQ(bootstrap->node(), nullptr); } @@ -224,11 +237,8 @@ TEST(XdsBootstrapTest, InsecureCreds) { auto bootstrap_or = GrpcXdsBootstrap::Create(json_str); ASSERT_TRUE(bootstrap_or.ok()) << bootstrap_or.status(); auto bootstrap = std::move(*bootstrap_or); - auto* server = - &static_cast(bootstrap->server()); - EXPECT_EQ(server->server_uri(), "fake:///lb"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "insecure"); + EXPECT_THAT(bootstrap->servers(), + ::testing::ElementsAre(EqXdsServer("fake:///lb", "insecure"))); EXPECT_EQ(bootstrap->node(), nullptr); } @@ -261,11 +271,8 @@ TEST(XdsBootstrapTest, GoogleDefaultCreds) { auto bootstrap_or = GrpcXdsBootstrap::Create(json_str); ASSERT_TRUE(bootstrap_or.ok()) << bootstrap_or.status(); auto bootstrap = std::move(*bootstrap_or); - auto* server = - &static_cast(bootstrap->server()); - EXPECT_EQ(server->server_uri(), "fake:///lb"); - ASSERT_NE(server->channel_creds_config(), nullptr); - EXPECT_EQ(server->channel_creds_config()->type(), "google_default"); + EXPECT_THAT(bootstrap->servers(), ::testing::ElementsAre(EqXdsServer( + "fake:///lb", "google_default"))); EXPECT_EQ(bootstrap->node(), nullptr); } diff --git a/test/core/xds/xds_client_fuzzer.cc b/test/core/xds/xds_client_fuzzer.cc index 3603e7d0d69..4d6994a1381 100644 --- a/test/core/xds/xds_client_fuzzer.cc +++ b/test/core/xds/xds_client_fuzzer.cc @@ -236,13 +236,15 @@ class Fuzzer { const XdsBootstrap::XdsServer* GetServer(const std::string& authority) { const GrpcXdsBootstrap& bootstrap = static_cast(xds_client_->bootstrap()); - if (authority.empty()) return &bootstrap.server(); + if (authority.empty()) return bootstrap.servers().front(); const auto* authority_entry = static_cast( bootstrap.LookupAuthority(authority)); if (authority_entry == nullptr) return nullptr; - if (authority_entry->server() != nullptr) return authority_entry->server(); - return &bootstrap.server(); + if (!authority_entry->servers().empty()) { + return authority_entry->servers().front(); + } + return bootstrap.servers().front(); } void TriggerConnectionFailure(const std::string& authority, diff --git a/test/core/xds/xds_client_test.cc b/test/core/xds/xds_client_test.cc index 7a699bcfcb0..c6365c03e73 100644 --- a/test/core/xds/xds_client_test.cc +++ b/test/core/xds/xds_client_test.cc @@ -20,6 +20,12 @@ #include "src/core/ext/xds/xds_client.h" +#include +#include +#include +#include +#include +#include #include #include @@ -27,23 +33,10 @@ #include #include #include +#include -#include -#include - -#include "absl/strings/str_cat.h" -#include "absl/time/time.h" -#include "absl/types/optional.h" -#include "absl/types/variant.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "upb/reflection/def.h" - -#include -#include -#include -#include - #include "src/core/ext/xds/xds_bootstrap.h" #include "src/core/ext/xds/xds_resource_type_impl.h" #include "src/core/lib/event_engine/default_event_engine.h" @@ -59,6 +52,11 @@ #include "test/core/util/scoped_env_var.h" #include "test/core/util/test_config.h" #include "test/core/xds/xds_transport_fake.h" +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" +#include "absl/types/optional.h" +#include "absl/types/variant.h" +#include "upb/reflection/def.h" // IWYU pragma: no_include // IWYU pragma: no_include @@ -149,8 +147,12 @@ class XdsClientTest : public ::testing::Test { class FakeAuthority : public Authority { public: - const XdsServer* server() const override { - return server_.has_value() ? &*server_ : nullptr; + std::vector servers() const override { + if (server_.has_value()) { + return {&*server_}; + } else { + return {}; + }; } void set_server(absl::optional server) { @@ -194,7 +196,10 @@ class XdsClientTest : public ::testing::Test { std::string ToString() const override { return ""; } - const XdsServer& server() const override { return server_; } + std::vector servers() const override { + return {&server_}; + } + const Node* node() const override { return node_.has_value() ? &*node_ : nullptr; } @@ -659,7 +664,8 @@ class XdsClientTest : public ::testing::Test { RefCountedPtr WaitForAdsStream( absl::Duration timeout = absl::Seconds(5)) { - return WaitForAdsStream(xds_client_->bootstrap().server(), timeout); + return WaitForAdsStream(*xds_client_->bootstrap().servers().front(), + timeout); } // Gets the latest request sent to the fake xDS server. @@ -1809,7 +1815,7 @@ TEST_F(XdsClientTest, ConnectionFails) { /*resource_names=*/{"foo1"}); CheckRequestNode(*request); // Should be present on the first request. // Transport reports connection failure. - TriggerConnectionFailure(xds_client_->bootstrap().server(), + TriggerConnectionFailure(*xds_client_->bootstrap().servers().front(), absl::UnavailableError("connection failed")); // XdsClient should report an error to the watcher. auto error = watcher->WaitForNextError(); @@ -2364,7 +2370,7 @@ TEST_F(XdsClientTest, Federation) { // Watcher should initially not see any resource reported. EXPECT_FALSE(watcher->HasEvent()); // XdsClient should have created an ADS stream to the top-level xDS server. - auto stream = WaitForAdsStream(xds_client_->bootstrap().server()); + auto stream = WaitForAdsStream(*xds_client_->bootstrap().servers().front()); ASSERT_TRUE(stream != nullptr); // XdsClient should have sent a subscription request on the ADS stream. auto request = WaitForRequest(stream.get()); @@ -2450,7 +2456,7 @@ TEST_F(XdsClientTest, FederationAuthorityDefaultsToTopLevelXdsServer) { // Watcher should initially not see any resource reported. EXPECT_FALSE(watcher->HasEvent()); // XdsClient should have created an ADS stream to the top-level xDS server. - auto stream = WaitForAdsStream(xds_client_->bootstrap().server()); + auto stream = WaitForAdsStream(*xds_client_->bootstrap().servers().front()); ASSERT_TRUE(stream != nullptr); // XdsClient should have sent a subscription request on the ADS stream. auto request = WaitForRequest(stream.get()); @@ -2620,7 +2626,7 @@ TEST_F(XdsClientTest, FederationChannelFailureReportedToWatchers) { // Watcher should initially not see any resource reported. EXPECT_FALSE(watcher->HasEvent()); // XdsClient should have created an ADS stream to the top-level xDS server. - auto stream = WaitForAdsStream(xds_client_->bootstrap().server()); + auto stream = WaitForAdsStream(*xds_client_->bootstrap().servers().front()); ASSERT_TRUE(stream != nullptr); // XdsClient should have sent a subscription request on the ADS stream. auto request = WaitForRequest(stream.get()); diff --git a/test/core/xds/xds_cluster_resource_type_test.cc b/test/core/xds/xds_cluster_resource_type_test.cc index af8795e00fc..a30dd8bc253 100644 --- a/test/core/xds/xds_cluster_resource_type_test.cc +++ b/test/core/xds/xds_cluster_resource_type_test.cc @@ -85,7 +85,8 @@ class XdsClusterTest : public ::testing::Test { protected: XdsClusterTest() : xds_client_(MakeXdsClient()), - decode_context_{xds_client_.get(), xds_client_->bootstrap().server(), + decode_context_{xds_client_.get(), + *xds_client_->bootstrap().servers().front(), &xds_cluster_resource_type_test_trace, upb_def_pool_.ptr(), upb_arena_.ptr()} {} @@ -1106,7 +1107,7 @@ TEST_F(LrsTest, Valid) { static_cast(**decode_result.resource); ASSERT_TRUE(resource.lrs_load_reporting_server.has_value()); EXPECT_EQ(*resource.lrs_load_reporting_server, - xds_client_->bootstrap().server()); + *xds_client_->bootstrap().servers().front()); } TEST_F(LrsTest, NotSelfConfigSource) { diff --git a/test/core/xds/xds_common_types_test.cc b/test/core/xds/xds_common_types_test.cc index 4eaf702092f..ab049d954d6 100644 --- a/test/core/xds/xds_common_types_test.cc +++ b/test/core/xds/xds_common_types_test.cc @@ -72,7 +72,8 @@ class XdsCommonTypesTest : public ::testing::Test { protected: XdsCommonTypesTest() : xds_client_(MakeXdsClient()), - decode_context_{xds_client_.get(), xds_client_->bootstrap().server(), + decode_context_{xds_client_.get(), + *xds_client_->bootstrap().servers().front(), &xds_common_types_test_trace, upb_def_pool_.ptr(), upb_arena_.ptr()} {} diff --git a/test/core/xds/xds_endpoint_resource_type_test.cc b/test/core/xds/xds_endpoint_resource_type_test.cc index 08e95f9164f..8538dcd15c0 100644 --- a/test/core/xds/xds_endpoint_resource_type_test.cc +++ b/test/core/xds/xds_endpoint_resource_type_test.cc @@ -71,7 +71,8 @@ class XdsEndpointTest : public ::testing::Test { protected: XdsEndpointTest() : xds_client_(MakeXdsClient()), - decode_context_{xds_client_.get(), xds_client_->bootstrap().server(), + decode_context_{xds_client_.get(), + *xds_client_->bootstrap().servers().front(), &xds_endpoint_resource_type_test_trace, upb_def_pool_.ptr(), upb_arena_.ptr()} {} diff --git a/test/core/xds/xds_listener_resource_type_test.cc b/test/core/xds/xds_listener_resource_type_test.cc index 60f3996d7cb..eac9cb55df7 100644 --- a/test/core/xds/xds_listener_resource_type_test.cc +++ b/test/core/xds/xds_listener_resource_type_test.cc @@ -85,7 +85,8 @@ class XdsListenerTest : public ::testing::Test { protected: XdsListenerTest() : xds_client_(MakeXdsClient()), - decode_context_{xds_client_.get(), xds_client_->bootstrap().server(), + decode_context_{xds_client_.get(), + *xds_client_->bootstrap().servers().front(), &xds_listener_resource_type_test_trace, upb_def_pool_.ptr(), upb_arena_.ptr()} {} diff --git a/test/core/xds/xds_route_config_resource_type_test.cc b/test/core/xds/xds_route_config_resource_type_test.cc index 013b69b0b3e..8703efdfcb2 100644 --- a/test/core/xds/xds_route_config_resource_type_test.cc +++ b/test/core/xds/xds_route_config_resource_type_test.cc @@ -83,7 +83,8 @@ class XdsRouteConfigTest : public ::testing::Test { protected: XdsRouteConfigTest() : xds_client_(MakeXdsClient()), - decode_context_{xds_client_.get(), xds_client_->bootstrap().server(), + decode_context_{xds_client_.get(), + *xds_client_->bootstrap().servers().front(), &xds_route_config_resource_type_test_trace, upb_def_pool_.ptr(), upb_arena_.ptr()} {} diff --git a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc index 2bef38e84b8..7322f06c9f6 100644 --- a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc @@ -411,7 +411,7 @@ TEST_P(CdsDeletionTest, ClusterDeleted) { // Tests that we ignore Cluster deletions if configured to do so. TEST_P(CdsDeletionTest, ClusterDeletionIgnored) { - InitClient(XdsBootstrapBuilder().SetIgnoreResourceDeletion()); + InitClient(MakeBootstrapBuilder().SetIgnoreResourceDeletion()); CreateAndStartBackends(2); // Bring up client pointing to backend 0 and wait for it to connect. EdsResourceArgs args({{"locality0", CreateEndpointsForBackends(0, 1)}}); diff --git a/test/cpp/end2end/xds/xds_core_end2end_test.cc b/test/cpp/end2end/xds/xds_core_end2end_test.cc index a0afbaaec82..752ed01fe55 100644 --- a/test/cpp/end2end/xds/xds_core_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_core_end2end_test.cc @@ -324,7 +324,7 @@ TEST_P(GlobalXdsClientTest, InvalidListenerStillExistsIfPreviouslyCached) { class TimeoutTest : public XdsEnd2endTest { protected: void SetUp() override { - InitClient(XdsBootstrapBuilder(), /*lb_expected_authority=*/"", + InitClient(MakeBootstrapBuilder(), /*lb_expected_authority=*/"", /*xds_resource_does_not_exist_timeout_ms=*/2000); } }; @@ -644,7 +644,7 @@ TEST_P(XdsFederationTest, FederationTargetNoAuthorityWithResourceTemplate) { const char* kNewClusterName = "xdstp://xds.example.com/envoy.config.cluster.v3.Cluster/" "new_cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.SetClientDefaultListenerResourceNameTemplate(kNewListenerTemplate); builder.AddAuthority( kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), @@ -698,7 +698,7 @@ TEST_P(XdsFederationTest, FederationTargetAuthorityDefaultResourceTemplate) { const char* kNewClusterName = "xdstp://xds.example.com/envoy.config.cluster.v3.Cluster/" "cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port())); InitClient(builder); @@ -767,7 +767,7 @@ TEST_P(XdsFederationTest, FederationTargetAuthorityWithResourceTemplate) { const char* kNewClusterName = "xdstp://xds.example.com/envoy.config.cluster.v3.Cluster/" "cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), kNewListenerTemplate); @@ -823,7 +823,7 @@ TEST_P(XdsFederationTest, TargetUriAuthorityUnknown) { const char* kNewListenerTemplate = "xdstp://xds.example.com/envoy.config.listener.v3.Listener/" "client/%s?psm_project_id=1234"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority( kAuthority, absl::StrCat("localhost:", grpc_pick_unused_port_or_die()), kNewListenerTemplate); @@ -854,7 +854,7 @@ TEST_P(XdsFederationTest, RdsResourceNameAuthorityUnknown) { const char* kNewRouteConfigName = "xdstp://xds.unknown.com/envoy.config.route.v3.RouteConfiguration/" "new_route_config_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), kNewListenerTemplate); @@ -900,7 +900,7 @@ TEST_P(XdsFederationTest, CdsResourceNameAuthorityUnknown) { const char* kNewClusterName = "xdstp://xds.unknown.com/envoy.config.cluster.v3.Cluster/" "cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), kNewListenerTemplate); @@ -952,7 +952,7 @@ TEST_P(XdsFederationTest, EdsResourceNameAuthorityUnknown) { const char* kNewClusterName = "xdstp://xds.example.com/envoy.config.cluster.v3.Cluster/" "cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), kNewListenerTemplate); @@ -1019,7 +1019,7 @@ TEST_P(XdsFederationTest, FederationServer) { const char* kNewClusterName = "xdstp://xds.example.com/envoy.config.cluster.v3.Cluster/" "new_cluster_name"; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.SetClientDefaultListenerResourceNameTemplate(kNewListenerTemplate); builder.SetServerListenerResourceNameTemplate(kNewServerListenerTemplate); builder.AddAuthority( @@ -1165,7 +1165,7 @@ TEST_P(XdsFederationLoadReportingTest, FederationMultipleLoadReportingTest) { "cluster_name"; const size_t kNumRpcsToDefaultBalancer = 5; const size_t kNumRpcsToAuthorityBalancer = 10; - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddAuthority(kAuthority, absl::StrCat("localhost:", authority_balancer_->port()), kNewListenerTemplate); @@ -1276,11 +1276,11 @@ TEST_P(XdsFederationLoadReportingTest, SameServerInAuthorityAndTopLevel) { const char* kNewEdsServiceName = "xdstp://xds.example.com/envoy.config.endpoint.v3.ClusterLoadAssignment/" "edsservice_name"; - XdsBootstrapBuilder builder; std::string xds_server = absl::StrCat("localhost:", authority_balancer_->port()); + XdsBootstrapBuilder builder; + builder.SetServers({xds_server}); builder.AddAuthority(kAuthority, xds_server); - builder.SetDefaultServer(xds_server); InitClient(builder); CreateAndStartBackends(1); authority_balancer_->lrs_service()->set_send_all_clusters(true); @@ -1349,7 +1349,7 @@ INSTANTIATE_TEST_SUITE_P(XdsTest, SecureNamingTest, // Tests that secure naming check passes if target name is expected. TEST_P(SecureNamingTest, TargetNameIsExpected) { - InitClient(XdsBootstrapBuilder(), /*lb_expected_authority=*/"localhost:%d"); + InitClient(MakeBootstrapBuilder(), /*lb_expected_authority=*/"localhost:%d"); CreateAndStartBackends(4); EdsResourceArgs args({ {"locality0", CreateEndpointsForBackends()}, @@ -1361,7 +1361,7 @@ TEST_P(SecureNamingTest, TargetNameIsExpected) { // Tests that secure naming check fails if target name is unexpected. TEST_P(SecureNamingTest, TargetNameIsUnexpected) { GTEST_FLAG_SET(death_test_style, "threadsafe"); - InitClient(XdsBootstrapBuilder(), + InitClient(MakeBootstrapBuilder(), /*lb_expected_authority=*/"incorrect_server_name"); CreateAndStartBackends(4); EdsResourceArgs args({ diff --git a/test/cpp/end2end/xds/xds_csds_end2end_test.cc b/test/cpp/end2end/xds/xds_csds_end2end_test.cc index cde8148b9c5..2aa0c0645e4 100644 --- a/test/cpp/end2end/xds/xds_csds_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_csds_end2end_test.cc @@ -696,7 +696,7 @@ class CsdsShortAdsTimeoutTest : public ClientStatusDiscoveryServiceTest { protected: void SetUp() override { // Shorten the ADS subscription timeout to speed up the test run. - InitClient(XdsBootstrapBuilder(), /*lb_expected_authority=*/"", + InitClient(absl::nullopt, /*lb_expected_authority=*/"", /*xds_resource_does_not_exist_timeout_ms=*/2000); } }; diff --git a/test/cpp/end2end/xds/xds_end2end_test.cc b/test/cpp/end2end/xds/xds_end2end_test.cc index f48de70c7d1..1415742f198 100644 --- a/test/cpp/end2end/xds/xds_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_end2end_test.cc @@ -279,7 +279,7 @@ FakeCertificateProvider::CertDataMapWrapper* g_fake2_cert_data_map = nullptr; class XdsSecurityTest : public XdsEnd2endTest { protected: void SetUp() override { - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddCertificateProviderPlugin("fake_plugin1", "fake1"); builder.AddCertificateProviderPlugin("fake_plugin2", "fake2"); std::vector fields; @@ -799,7 +799,8 @@ class XdsEnabledServerTest : public XdsEnd2endTest { protected: void SetUp() override {} // No-op -- individual tests do this themselves. - void DoSetUp(XdsBootstrapBuilder builder = XdsBootstrapBuilder()) { + void DoSetUp( + const absl::optional& builder = absl::nullopt) { InitClient(builder); CreateBackends(1, /*xds_enabled=*/true); EdsResourceArgs args({{"locality0", CreateEndpointsForBackends(0, 1)}}); @@ -814,7 +815,7 @@ TEST_P(XdsEnabledServerTest, Basic) { } TEST_P(XdsEnabledServerTest, ListenerDeletionIgnored) { - DoSetUp(XdsBootstrapBuilder().SetIgnoreResourceDeletion()); + DoSetUp(MakeBootstrapBuilder().SetIgnoreResourceDeletion()); backends_[0]->Start(); WaitForBackend(DEBUG_LOCATION, 0); // Check that we ACKed. @@ -908,7 +909,7 @@ TEST_P(XdsEnabledServerTest, ListenerAddressMismatch) { class XdsServerSecurityTest : public XdsEnd2endTest { protected: void SetUp() override { - XdsBootstrapBuilder builder; + XdsBootstrapBuilder builder = MakeBootstrapBuilder(); builder.AddCertificateProviderPlugin("fake_plugin1", "fake1"); builder.AddCertificateProviderPlugin("fake_plugin2", "fake2"); std::vector fields; diff --git a/test/cpp/end2end/xds/xds_end2end_test_lib.cc b/test/cpp/end2end/xds/xds_end2end_test_lib.cc index 6f69e351cfb..121f92328fc 100644 --- a/test/cpp/end2end/xds/xds_end2end_test_lib.cc +++ b/test/cpp/end2end/xds/xds_end2end_test_lib.cc @@ -483,9 +483,12 @@ std::vector XdsEnd2endTest::GetBackendPorts(size_t start_index, return backend_ports; } -void XdsEnd2endTest::InitClient(XdsBootstrapBuilder builder, +void XdsEnd2endTest::InitClient(absl::optional builder, std::string lb_expected_authority, int xds_resource_does_not_exist_timeout_ms) { + if (!builder.has_value()) { + builder = MakeBootstrapBuilder(); + } if (xds_resource_does_not_exist_timeout_ms > 0) { xds_channel_args_to_add_.emplace_back(grpc_channel_arg_integer_create( const_cast(GRPC_ARG_XDS_RESOURCE_DOES_NOT_EXIST_TIMEOUT_MS), @@ -503,10 +506,7 @@ void XdsEnd2endTest::InitClient(XdsBootstrapBuilder builder, } xds_channel_args_.num_args = xds_channel_args_to_add_.size(); xds_channel_args_.args = xds_channel_args_to_add_.data(); - // Initialize XdsClient state. - builder.SetDefaultServer(absl::StrCat("localhost:", balancer_->port()), - /*ignore_if_set=*/true); - bootstrap_ = builder.Build(); + bootstrap_ = builder->Build(); if (GetParam().bootstrap_source() == XdsTestType::kBootstrapFromEnvVar) { grpc_core::SetEnv("GRPC_XDS_BOOTSTRAP_CONFIG", bootstrap_.c_str()); } else if (GetParam().bootstrap_source() == XdsTestType::kBootstrapFromFile) { diff --git a/test/cpp/end2end/xds/xds_end2end_test_lib.h b/test/cpp/end2end/xds/xds_end2end_test_lib.h index 86cc867d008..feff26b608a 100644 --- a/test/cpp/end2end/xds/xds_end2end_test_lib.h +++ b/test/cpp/end2end/xds/xds_end2end_test_lib.h @@ -566,10 +566,15 @@ class XdsEnd2endTest : public ::testing::TestWithParam, // Initializes global state for the client, such as the bootstrap file // and channel args for the XdsClient. Then calls ResetStub(). // All tests must call this exactly once at the start of the test. - void InitClient(XdsBootstrapBuilder builder = XdsBootstrapBuilder(), + void InitClient(absl::optional builder = absl::nullopt, std::string lb_expected_authority = "", int xds_resource_does_not_exist_timeout_ms = 0); + XdsBootstrapBuilder MakeBootstrapBuilder() { + return XdsBootstrapBuilder().SetServers( + {absl::StrCat("localhost:", balancer_->port())}); + } + // Sets channel_, stub_, stub1_, and stub2_. void ResetStub(int failover_timeout_ms = 0, ChannelArguments* args = nullptr); diff --git a/test/cpp/end2end/xds/xds_routing_end2end_test.cc b/test/cpp/end2end/xds/xds_routing_end2end_test.cc index fc05fc17b9c..e9f3fbd0ad7 100644 --- a/test/cpp/end2end/xds/xds_routing_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_routing_end2end_test.cc @@ -115,7 +115,7 @@ TEST_P(LdsDeletionTest, ListenerDeleted) { // Tests that we ignore Listener deletions if configured to do so. TEST_P(LdsDeletionTest, ListenerDeletionIgnored) { - InitClient(XdsBootstrapBuilder().SetIgnoreResourceDeletion()); + InitClient(MakeBootstrapBuilder().SetIgnoreResourceDeletion()); CreateAndStartBackends(2); // Bring up client pointing to backend 0 and wait for it to connect. EdsResourceArgs args({{"locality0", CreateEndpointsForBackends(0, 1)}}); diff --git a/test/cpp/end2end/xds/xds_utils.cc b/test/cpp/end2end/xds/xds_utils.cc index b1d42bdde79..c47302ec66b 100644 --- a/test/cpp/end2end/xds/xds_utils.cc +++ b/test/cpp/end2end/xds/xds_utils.cc @@ -60,7 +60,7 @@ using ::envoy::extensions::filters::network::http_connection_manager::v3:: std::string XdsBootstrapBuilder::Build() { std::vector fields; - fields.push_back(MakeXdsServersText(top_server_)); + fields.push_back(MakeXdsServersText(servers_)); if (!client_default_listener_resource_name_template_.empty()) { fields.push_back( absl::StrCat(" \"client_default_listener_resource_name_template\": \"", @@ -78,9 +78,8 @@ std::string XdsBootstrapBuilder::Build() { } std::string XdsBootstrapBuilder::MakeXdsServersText( - absl::string_view server_uri) { + absl::Span server_uris) { constexpr char kXdsServerTemplate[] = - " \"xds_servers\": [\n" " {\n" " \"server_uri\": \"\",\n" " \"channel_creds\": [\n" @@ -89,17 +88,21 @@ std::string XdsBootstrapBuilder::MakeXdsServersText( " }\n" " ],\n" " \"server_features\": []\n" - " }\n" - " ]"; + " }"; std::vector server_features; if (ignore_resource_deletion_) { server_features.push_back("\"ignore_resource_deletion\""); } - return absl::StrReplaceAll( - kXdsServerTemplate, - {{"", server_uri}, - {"", xds_channel_creds_type_}, - {"", absl::StrJoin(server_features, ", ")}}); + std::vector servers; + for (absl::string_view server_uri : server_uris) { + servers.emplace_back(absl::StrReplaceAll( + kXdsServerTemplate, + {{"", server_uri}, + {"", xds_channel_creds_type_}, + {"", absl::StrJoin(server_features, ", ")}})); + } + return absl::StrCat(" \"xds_servers\": [\n", + absl::StrJoin(servers, ",\n"), "\n ]"); } std::string XdsBootstrapBuilder::MakeNodeText() { @@ -148,7 +151,7 @@ std::string XdsBootstrapBuilder::MakeAuthorityText() { const std::string& name = p.first; const AuthorityInfo& authority_info = p.second; std::vector fields = { - MakeXdsServersText(authority_info.server)}; + MakeXdsServersText({authority_info.server})}; if (!authority_info.client_listener_resource_name_template.empty()) { fields.push_back(absl::StrCat( "\"client_listener_resource_name_template\": \"", diff --git a/test/cpp/end2end/xds/xds_utils.h b/test/cpp/end2end/xds/xds_utils.h index e12d36c2cfb..c3695125ae6 100644 --- a/test/cpp/end2end/xds/xds_utils.h +++ b/test/cpp/end2end/xds/xds_utils.h @@ -39,11 +39,8 @@ class XdsBootstrapBuilder { ignore_resource_deletion_ = true; return *this; } - // If ignore_if_set is true, sets the default server only if it has - // not already been set. - XdsBootstrapBuilder& SetDefaultServer(const std::string& server, - bool ignore_if_set = false) { - if (!ignore_if_set || top_server_.empty()) top_server_ = server; + XdsBootstrapBuilder& SetServers(absl::Span servers) { + servers_ = std::vector(servers.begin(), servers.end()); return *this; } XdsBootstrapBuilder& SetXdsChannelCredentials(const std::string& type) { @@ -87,13 +84,13 @@ class XdsBootstrapBuilder { std::string client_listener_resource_name_template; }; - std::string MakeXdsServersText(absl::string_view server_uri); + std::string MakeXdsServersText(absl::Span server_uris); std::string MakeNodeText(); std::string MakeCertificateProviderText(); std::string MakeAuthorityText(); bool ignore_resource_deletion_ = false; - std::string top_server_; + std::vector servers_; std::string xds_channel_creds_type_ = "fake"; std::string client_default_listener_resource_name_template_; std::map plugins_; From 6546fcd19640ead78749e135ab7514f2f5f93343 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 12 Mar 2024 13:42:52 -0700 Subject: [PATCH 2/4] [pick first] remove happy eyeballs experiment (#36092) Closes #36092 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/36092 from markdroth:pf_happy_eyeballs_cleanup 24d49849bb1b33b4a967c1acc39196f541674999 PiperOrigin-RevId: 615157870 --- bazel/experiments.bzl | 10 -- src/core/BUILD | 1 - src/core/lib/experiments/experiments.cc | 15 -- src/core/lib/experiments/experiments.h | 11 -- src/core/lib/experiments/experiments.yaml | 6 - src/core/lib/experiments/rollouts.yaml | 2 - .../load_balancing/pick_first/pick_first.cc | 160 +++--------------- .../lb_policy/pick_first_test.cc | 6 - 8 files changed, 19 insertions(+), 192 deletions(-) diff --git a/bazel/experiments.bzl b/bazel/experiments.bzl index 7e58d5cb29c..7edd8cd75ea 100644 --- a/bazel/experiments.bzl +++ b/bazel/experiments.bzl @@ -33,7 +33,6 @@ EXPERIMENT_ENABLES = { "multiping": "multiping", "peer_state_based_framing": "peer_state_based_framing", "pending_queue_cap": "pending_queue_cap", - "pick_first_happy_eyeballs": "pick_first_happy_eyeballs", "promise_based_client_call": "event_engine_client,event_engine_listener,promise_based_client_call", "promise_based_server_call": "promise_based_server_call", "chaotic_good": "chaotic_good,event_engine_client,event_engine_listener,promise_based_client_call,promise_based_server_call", @@ -103,7 +102,6 @@ EXPERIMENTS = { "event_engine_listener", ], "cpp_lb_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -114,7 +112,6 @@ EXPERIMENTS = { "event_engine_listener", ], "lb_unit_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -122,7 +119,6 @@ EXPERIMENTS = { "registered_method_lookup_in_transport", ], "xds_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -165,7 +161,6 @@ EXPERIMENTS = { }, "on": { "cpp_lb_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -173,7 +168,6 @@ EXPERIMENTS = { "absl_base64", ], "lb_unit_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -181,7 +175,6 @@ EXPERIMENTS = { "registered_method_lookup_in_transport", ], "xds_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -243,7 +236,6 @@ EXPERIMENTS = { "work_serializer_dispatch", ], "cpp_lb_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "wrr_delegate_to_pick_first", ], @@ -254,7 +246,6 @@ EXPERIMENTS = { "event_engine_listener", ], "lb_unit_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "work_serializer_dispatch", "wrr_delegate_to_pick_first", @@ -266,7 +257,6 @@ EXPERIMENTS = { "registered_method_lookup_in_transport", ], "xds_end2end_test": [ - "pick_first_happy_eyeballs", "round_robin_delegate_to_pick_first", "work_serializer_dispatch", "wrr_delegate_to_pick_first", diff --git a/src/core/BUILD b/src/core/BUILD index c591f75aee0..c67173ba22b 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -5555,7 +5555,6 @@ grpc_cc_library( deps = [ "channel_args", "connectivity_state", - "experiments", "health_check_client", "iomgr_fwd", "json", diff --git a/src/core/lib/experiments/experiments.cc b/src/core/lib/experiments/experiments.cc index 784735cd00c..495570303b8 100644 --- a/src/core/lib/experiments/experiments.cc +++ b/src/core/lib/experiments/experiments.cc @@ -80,9 +80,6 @@ const char* const description_pending_queue_cap = "grpc_server_request_call or grpc_server_request_registered_call (or their " "wrappers in the C++ API)."; const char* const additional_constraints_pending_queue_cap = "{}"; -const char* const description_pick_first_happy_eyeballs = - "Use Happy Eyeballs in pick_first."; -const char* const additional_constraints_pick_first_happy_eyeballs = "{}"; const char* const description_promise_based_client_call = "If set, use the new gRPC promise based call code when it's appropriate " "(ie when all filters in a stack are promise based)"; @@ -215,8 +212,6 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_peer_state_based_framing, nullptr, 0, false, true}, {"pending_queue_cap", description_pending_queue_cap, additional_constraints_pending_queue_cap, nullptr, 0, true, true}, - {"pick_first_happy_eyeballs", description_pick_first_happy_eyeballs, - additional_constraints_pick_first_happy_eyeballs, nullptr, 0, true, true}, {"promise_based_client_call", description_promise_based_client_call, additional_constraints_promise_based_client_call, required_experiments_promise_based_client_call, 2, false, true}, @@ -334,9 +329,6 @@ const char* const description_pending_queue_cap = "grpc_server_request_call or grpc_server_request_registered_call (or their " "wrappers in the C++ API)."; const char* const additional_constraints_pending_queue_cap = "{}"; -const char* const description_pick_first_happy_eyeballs = - "Use Happy Eyeballs in pick_first."; -const char* const additional_constraints_pick_first_happy_eyeballs = "{}"; const char* const description_promise_based_client_call = "If set, use the new gRPC promise based call code when it's appropriate " "(ie when all filters in a stack are promise based)"; @@ -469,8 +461,6 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_peer_state_based_framing, nullptr, 0, false, true}, {"pending_queue_cap", description_pending_queue_cap, additional_constraints_pending_queue_cap, nullptr, 0, true, true}, - {"pick_first_happy_eyeballs", description_pick_first_happy_eyeballs, - additional_constraints_pick_first_happy_eyeballs, nullptr, 0, true, true}, {"promise_based_client_call", description_promise_based_client_call, additional_constraints_promise_based_client_call, required_experiments_promise_based_client_call, 2, false, true}, @@ -588,9 +578,6 @@ const char* const description_pending_queue_cap = "grpc_server_request_call or grpc_server_request_registered_call (or their " "wrappers in the C++ API)."; const char* const additional_constraints_pending_queue_cap = "{}"; -const char* const description_pick_first_happy_eyeballs = - "Use Happy Eyeballs in pick_first."; -const char* const additional_constraints_pick_first_happy_eyeballs = "{}"; const char* const description_promise_based_client_call = "If set, use the new gRPC promise based call code when it's appropriate " "(ie when all filters in a stack are promise based)"; @@ -723,8 +710,6 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_peer_state_based_framing, nullptr, 0, false, true}, {"pending_queue_cap", description_pending_queue_cap, additional_constraints_pending_queue_cap, nullptr, 0, true, true}, - {"pick_first_happy_eyeballs", description_pick_first_happy_eyeballs, - additional_constraints_pick_first_happy_eyeballs, nullptr, 0, true, true}, {"promise_based_client_call", description_promise_based_client_call, additional_constraints_promise_based_client_call, required_experiments_promise_based_client_call, 2, false, true}, diff --git a/src/core/lib/experiments/experiments.h b/src/core/lib/experiments/experiments.h index 1a51ba7799b..70027ee9d99 100644 --- a/src/core/lib/experiments/experiments.h +++ b/src/core/lib/experiments/experiments.h @@ -86,8 +86,6 @@ inline bool IsMultipingEnabled() { return false; } inline bool IsPeerStateBasedFramingEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_PENDING_QUEUE_CAP inline bool IsPendingQueueCapEnabled() { return true; } -#define GRPC_EXPERIMENT_IS_INCLUDED_PICK_FIRST_HAPPY_EYEBALLS -inline bool IsPickFirstHappyEyeballsEnabled() { return true; } inline bool IsPromiseBasedClientCallEnabled() { return false; } inline bool IsPromiseBasedServerCallEnabled() { return false; } inline bool IsChaoticGoodEnabled() { return false; } @@ -144,8 +142,6 @@ inline bool IsMultipingEnabled() { return false; } inline bool IsPeerStateBasedFramingEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_PENDING_QUEUE_CAP inline bool IsPendingQueueCapEnabled() { return true; } -#define GRPC_EXPERIMENT_IS_INCLUDED_PICK_FIRST_HAPPY_EYEBALLS -inline bool IsPickFirstHappyEyeballsEnabled() { return true; } inline bool IsPromiseBasedClientCallEnabled() { return false; } inline bool IsPromiseBasedServerCallEnabled() { return false; } inline bool IsChaoticGoodEnabled() { return false; } @@ -203,8 +199,6 @@ inline bool IsMultipingEnabled() { return false; } inline bool IsPeerStateBasedFramingEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_PENDING_QUEUE_CAP inline bool IsPendingQueueCapEnabled() { return true; } -#define GRPC_EXPERIMENT_IS_INCLUDED_PICK_FIRST_HAPPY_EYEBALLS -inline bool IsPickFirstHappyEyeballsEnabled() { return true; } inline bool IsPromiseBasedClientCallEnabled() { return false; } inline bool IsPromiseBasedServerCallEnabled() { return false; } inline bool IsChaoticGoodEnabled() { return false; } @@ -250,7 +244,6 @@ enum ExperimentIds { kExperimentIdMultiping, kExperimentIdPeerStateBasedFraming, kExperimentIdPendingQueueCap, - kExperimentIdPickFirstHappyEyeballs, kExperimentIdPromiseBasedClientCall, kExperimentIdPromiseBasedServerCall, kExperimentIdChaoticGood, @@ -337,10 +330,6 @@ inline bool IsPeerStateBasedFramingEnabled() { inline bool IsPendingQueueCapEnabled() { return IsExperimentEnabled(kExperimentIdPendingQueueCap); } -#define GRPC_EXPERIMENT_IS_INCLUDED_PICK_FIRST_HAPPY_EYEBALLS -inline bool IsPickFirstHappyEyeballsEnabled() { - return IsExperimentEnabled(kExperimentIdPickFirstHappyEyeballs); -} #define GRPC_EXPERIMENT_IS_INCLUDED_PROMISE_BASED_CLIENT_CALL inline bool IsPromiseBasedClientCallEnabled() { return IsExperimentEnabled(kExperimentIdPromiseBasedClientCall); diff --git a/src/core/lib/experiments/experiments.yaml b/src/core/lib/experiments/experiments.yaml index e67b4d4c92c..b86fde5cbdd 100644 --- a/src/core/lib/experiments/experiments.yaml +++ b/src/core/lib/experiments/experiments.yaml @@ -155,12 +155,6 @@ expiry: 2024/05/05 owner: ctiller@google.com test_tags: [] -- name: pick_first_happy_eyeballs - description: - Use Happy Eyeballs in pick_first. - expiry: 2024/03/15 - owner: roth@google.com - test_tags: ["lb_unit_test", "cpp_lb_end2end_test", "xds_end2end_test"] - name: promise_based_client_call description: If set, use the new gRPC promise based call code when it's appropriate diff --git a/src/core/lib/experiments/rollouts.yaml b/src/core/lib/experiments/rollouts.yaml index a971b0e4284..8bbd66af0ff 100644 --- a/src/core/lib/experiments/rollouts.yaml +++ b/src/core/lib/experiments/rollouts.yaml @@ -91,8 +91,6 @@ default: false - name: pending_queue_cap default: true -- name: pick_first_happy_eyeballs - default: true - name: promise_based_client_call default: ios: broken diff --git a/src/core/load_balancing/pick_first/pick_first.cc b/src/core/load_balancing/pick_first/pick_first.cc index 79c04248031..27c4de11db8 100644 --- a/src/core/load_balancing/pick_first/pick_first.cc +++ b/src/core/load_balancing/pick_first/pick_first.cc @@ -47,7 +47,6 @@ #include "src/core/lib/channel/metrics.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gpr/useful.h" #include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/debug_location.h" @@ -198,10 +197,6 @@ class PickFirst : public LoadBalancingPolicy { // subchannel. void ProcessUnselectedReadyLocked(); - // Reacts to the current connectivity state while trying to connect. - // TODO(roth): Remove this when we remove the Happy Eyeballs experiment. - void ReactToConnectivityStateLocked(); - // Backpointer to owning subchannel list. Not owned. SubchannelList* subchannel_list_; const size_t index_; @@ -274,9 +269,6 @@ class PickFirst : public LoadBalancingPolicy { // finished processing. bool shutting_down_ = false; - // TODO(roth): Remove this when we remove the Happy Eyeballs experiment. - bool in_transient_failure_ = false; - size_t num_subchannels_seen_initial_notification_ = 0; // The index into subchannels_ to which we are currently attempting @@ -533,34 +525,30 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { for (const auto& endpoint : endpoints) { for (const auto& address : endpoint.addresses()) { flattened_endpoints.emplace_back(address, endpoint.args()); - if (IsPickFirstHappyEyeballsEnabled()) { - absl::string_view scheme = GetAddressFamily(address); - bool inserted = address_families.insert(scheme).second; - if (inserted) { - address_family_order.emplace_back(scheme, - flattened_endpoints.size() - 1); - } + absl::string_view scheme = GetAddressFamily(address); + bool inserted = address_families.insert(scheme).second; + if (inserted) { + address_family_order.emplace_back(scheme, + flattened_endpoints.size() - 1); } } } endpoints = std::move(flattened_endpoints); // Interleave addresses as per RFC-8305 section 4. - if (IsPickFirstHappyEyeballsEnabled()) { - EndpointAddressesList interleaved_endpoints; - interleaved_endpoints.reserve(endpoints.size()); - std::vector endpoints_moved(endpoints.size()); - size_t scheme_index = 0; - for (size_t i = 0; i < endpoints.size(); ++i) { - EndpointAddresses* endpoint; - do { - auto& iterator = address_family_order[scheme_index++ % - address_family_order.size()]; - endpoint = iterator.Next(endpoints, &endpoints_moved); - } while (endpoint == nullptr); - interleaved_endpoints.emplace_back(std::move(*endpoint)); - } - endpoints = std::move(interleaved_endpoints); + EndpointAddressesList interleaved_endpoints; + interleaved_endpoints.reserve(endpoints.size()); + std::vector endpoints_moved(endpoints.size()); + size_t scheme_index = 0; + for (size_t i = 0; i < endpoints.size(); ++i) { + EndpointAddresses* endpoint; + do { + auto& iterator = address_family_order[scheme_index++ % + address_family_order.size()]; + endpoint = iterator.Next(endpoints, &endpoints_moved); + } while (endpoint == nullptr); + interleaved_endpoints.emplace_back(std::move(*endpoint)); } + endpoints = std::move(interleaved_endpoints); args.addresses = std::make_shared(std::move(endpoints)); } @@ -738,9 +726,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( p->UnsetSelectedSubchannel(); p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); // Set our state to that of the pending subchannel list. - if (IsPickFirstHappyEyeballsEnabled() - ? p->subchannel_list_->IsHappyEyeballsPassComplete() - : p->subchannel_list_->in_transient_failure_) { + if (p->subchannel_list_->IsHappyEyeballsPassComplete()) { status = absl::UnavailableError(absl::StrCat( "selected subchannel failed; switching to pending update; " "last failure: ", @@ -772,9 +758,6 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // select in place of the current one. // If the subchannel is READY, use it. if (new_state == GRPC_CHANNEL_READY) { - if (!IsPickFirstHappyEyeballsEnabled()) { - subchannel_list_->in_transient_failure_ = false; - } // We consider it a successful connection attempt only if the // previous state was CONNECTING. In particular, we don't want to // increment this counter if we got a new address list and found the @@ -807,10 +790,6 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // see its initial notification. Start trying to connect, starting // with the first subchannel. if (!old_state.has_value()) { - if (!IsPickFirstHappyEyeballsEnabled()) { - subchannel_list_->subchannels_.front().ReactToConnectivityStateLocked(); - return; - } subchannel_list_->StartConnectingNextSubchannel(); return; } @@ -821,14 +800,6 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( kMetricConnectionAttemptsFailed, 1, {subchannel_list_->policy_->channel_control_helper()->GetTarget()}, {}); } - if (!IsPickFirstHappyEyeballsEnabled()) { - // Ignore any other updates for subchannels we're not currently trying to - // connect to. - if (index_ != subchannel_list_->attempting_index_) return; - // React to the connectivity state. - ReactToConnectivityStateLocked(); - return; - } // Otherwise, process connectivity state change. switch (*connectivity_state_) { case GRPC_CHANNEL_TRANSIENT_FAILURE: { @@ -898,99 +869,6 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( } } -void PickFirst::SubchannelList::SubchannelData:: - ReactToConnectivityStateLocked() { - PickFirst* p = subchannel_list_->policy_.get(); - // Otherwise, process connectivity state. - switch (connectivity_state_.value()) { - case GRPC_CHANNEL_READY: - // Already handled this case above, so this should not happen. - GPR_UNREACHABLE_CODE(break); - case GRPC_CHANNEL_TRANSIENT_FAILURE: { - // Find the next subchannel not in state TRANSIENT_FAILURE. - // We skip subchannels in state TRANSIENT_FAILURE to avoid a - // large recursion that could overflow the stack. - SubchannelData* found_subchannel = nullptr; - for (size_t next_index = index_ + 1; - next_index < subchannel_list_->size(); ++next_index) { - SubchannelData* sc = &subchannel_list_->subchannels_[next_index]; - GPR_ASSERT(sc->connectivity_state_.has_value()); - if (sc->connectivity_state_ != GRPC_CHANNEL_TRANSIENT_FAILURE) { - subchannel_list_->attempting_index_ = next_index; - found_subchannel = sc; - break; - } - } - // If we found another subchannel in the list not in state - // TRANSIENT_FAILURE, trigger the right behavior for that subchannel. - if (found_subchannel != nullptr) { - found_subchannel->ReactToConnectivityStateLocked(); - break; - } - // We didn't find another subchannel not in state TRANSIENT_FAILURE, - // so report TRANSIENT_FAILURE and wait for the first subchannel - // in the list to report IDLE before continuing. - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p subchannel list %p failed to connect to " - "all subchannels", - p, subchannel_list_); - } - subchannel_list_->attempting_index_ = 0; - subchannel_list_->in_transient_failure_ = true; - // In case 2, swap to the new subchannel list. This means reporting - // TRANSIENT_FAILURE and dropping the existing (working) connection, - // but we can't ignore what the control plane has told us. - if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p promoting pending subchannel list %p to " - "replace %p", - p, p->latest_pending_subchannel_list_.get(), - p->subchannel_list_.get()); - } - p->UnsetSelectedSubchannel(); - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); - } - // If this is the current subchannel list (either because we were - // in case 1 or because we were in case 2 and just promoted it to - // be the current list), re-resolve and report new state. - if (subchannel_list_ == p->subchannel_list_.get()) { - p->channel_control_helper()->RequestReresolution(); - absl::Status status = absl::UnavailableError(absl::StrCat( - (p->omit_status_message_prefix_ - ? "" - : "failed to connect to all addresses; last error: "), - connectivity_status_.ToString())); - p->UpdateState(GRPC_CHANNEL_TRANSIENT_FAILURE, status, - MakeRefCounted(status)); - } - // If the first subchannel is already IDLE, trigger the next connection - // attempt immediately. Otherwise, we'll wait for it to report - // its own connectivity state change. - auto& subchannel0 = subchannel_list_->subchannels_.front(); - if (subchannel0.connectivity_state_ == GRPC_CHANNEL_IDLE) { - subchannel0.subchannel_->RequestConnection(); - } - break; - } - case GRPC_CHANNEL_IDLE: - subchannel_->RequestConnection(); - break; - case GRPC_CHANNEL_CONNECTING: - // Only update connectivity state in case 1, and only if we're not - // already in TRANSIENT_FAILURE. - if (subchannel_list_ == p->subchannel_list_.get() && - p->state_ != GRPC_CHANNEL_TRANSIENT_FAILURE) { - p->UpdateState(GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(nullptr)); - } - break; - case GRPC_CHANNEL_SHUTDOWN: - GPR_UNREACHABLE_CODE(break); - } -} - void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { GPR_ASSERT(connectivity_state_.has_value()); if (connectivity_state_ == GRPC_CHANNEL_IDLE) { diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 05330ec9337..609f6f31a2f 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -38,7 +38,6 @@ #include #include "src/core/lib/channel/metrics.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" @@ -513,7 +512,6 @@ TEST_F(PickFirstTest, ResolverUpdateBeforeLeavingIdle) { } TEST_F(PickFirstTest, HappyEyeballs) { - if (!IsPickFirstHappyEyeballsEnabled()) return; // Send an update containing three addresses. constexpr std::array kAddresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444", "ipv4:127.0.0.1:445"}; @@ -568,7 +566,6 @@ TEST_F(PickFirstTest, HappyEyeballs) { } TEST_F(PickFirstTest, HappyEyeballsCompletesWithoutSuccess) { - if (!IsPickFirstHappyEyeballsEnabled()) return; // Send an update containing three addresses. constexpr std::array kAddresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444", "ipv4:127.0.0.1:445"}; @@ -689,7 +686,6 @@ TEST_F(PickFirstTest, HappyEyeballsCompletesWithoutSuccess) { TEST_F(PickFirstTest, HappyEyeballsLastSubchannelFailsWhileAnotherIsStillPending) { - if (!IsPickFirstHappyEyeballsEnabled()) return; // Send an update containing three addresses. constexpr std::array kAddresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; @@ -758,7 +754,6 @@ TEST_F(PickFirstTest, } TEST_F(PickFirstTest, HappyEyeballsAddressInterleaving) { - if (!IsPickFirstHappyEyeballsEnabled()) return; // Send an update containing four IPv4 addresses followed by two // IPv6 addresses. constexpr std::array kAddresses = { @@ -851,7 +846,6 @@ TEST_F(PickFirstTest, HappyEyeballsAddressInterleaving) { TEST_F(PickFirstTest, HappyEyeballsAddressInterleavingSecondFamilyHasMoreAddresses) { - if (!IsPickFirstHappyEyeballsEnabled()) return; // Send an update containing two IPv6 addresses followed by four IPv4 // addresses. constexpr std::array kAddresses = { From a19e01c4fd947ed5f4cc4a91412c4cbf029d2526 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 12 Mar 2024 15:56:15 -0700 Subject: [PATCH 3/4] [sanity] fix sanity on master (#36102) Broken by the merge commit for https://github.com/grpc/grpc/pull/36021. Closes #36102 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/36102 from markdroth:sanity 709aa26fe8b49632cb303c0c02cee424a97175e8 PiperOrigin-RevId: 615200273 --- src/core/ext/xds/xds_bootstrap_grpc.cc | 21 ++++++++++++--------- test/core/xds/xds_client_test.cc | 25 ++++++++++++++----------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/core/ext/xds/xds_bootstrap_grpc.cc b/src/core/ext/xds/xds_bootstrap_grpc.cc index 553866d667b..41749e510cd 100644 --- a/src/core/ext/xds/xds_bootstrap_grpc.cc +++ b/src/core/ext/xds/xds_bootstrap_grpc.cc @@ -14,10 +14,10 @@ // limitations under the License. // +#include + #include "src/core/ext/xds/xds_bootstrap_grpc.h" -#include -#include #include #include @@ -26,13 +26,6 @@ #include #include -#include "src/core/lib/config/core_configuration.h" -#include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/json/json.h" -#include "src/core/lib/json/json_object_loader.h" -#include "src/core/lib/json/json_reader.h" -#include "src/core/lib/json/json_writer.h" -#include "src/core/lib/security/credentials/channel_creds_registry.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/match.h" @@ -42,6 +35,16 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include + +#include "src/core/lib/config/core_configuration.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/json/json.h" +#include "src/core/lib/json/json_object_loader.h" +#include "src/core/lib/json/json_reader.h" +#include "src/core/lib/json/json_writer.h" +#include "src/core/lib/security/credentials/channel_creds_registry.h" + namespace grpc_core { // diff --git a/test/core/xds/xds_client_test.cc b/test/core/xds/xds_client_test.cc index c6365c03e73..741e2022362 100644 --- a/test/core/xds/xds_client_test.cc +++ b/test/core/xds/xds_client_test.cc @@ -20,12 +20,6 @@ #include "src/core/ext/xds/xds_client.h" -#include -#include -#include -#include -#include -#include #include #include @@ -35,8 +29,22 @@ #include #include +#include +#include + +#include "absl/strings/str_cat.h" +#include "absl/time/time.h" +#include "absl/types/optional.h" +#include "absl/types/variant.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "upb/reflection/def.h" + +#include +#include +#include +#include + #include "src/core/ext/xds/xds_bootstrap.h" #include "src/core/ext/xds/xds_resource_type_impl.h" #include "src/core/lib/event_engine/default_event_engine.h" @@ -52,11 +60,6 @@ #include "test/core/util/scoped_env_var.h" #include "test/core/util/test_config.h" #include "test/core/xds/xds_transport_fake.h" -#include "absl/strings/str_cat.h" -#include "absl/time/time.h" -#include "absl/types/optional.h" -#include "absl/types/variant.h" -#include "upb/reflection/def.h" // IWYU pragma: no_include // IWYU pragma: no_include From 390fef05901b602f317cf28cad29e8b9c9d7f501 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 12 Mar 2024 16:24:17 -0700 Subject: [PATCH 4/4] [RR and WRR] clean up dualstack experiments (#35135) Closes #35135 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/35135 from markdroth:dualstack_rr_wrr_cleanup 438b29df5ca83c0020f05cb437369720622ed752 PiperOrigin-RevId: 615208297 --- Package.swift | 1 - bazel/experiments.bzl | 34 - build_autogenerated.yaml | 2 - gRPC-C++.podspec | 2 - gRPC-Core.podspec | 2 - grpc.gemspec | 1 - package.xml | 1 - src/core/BUILD | 36 - src/core/lib/experiments/experiments.cc | 45 - src/core/lib/experiments/experiments.h | 22 - src/core/lib/experiments/experiments.yaml | 14 - src/core/lib/experiments/rollouts.yaml | 4 - .../load_balancing/round_robin/round_robin.cc | 457 +------ src/core/load_balancing/subchannel_list.h | 455 ------- .../weighted_round_robin.cc | 1051 ++--------------- .../lb_policy/outlier_detection_test.cc | 4 - .../lb_policy/round_robin_test.cc | 2 - .../lb_policy/weighted_round_robin_test.cc | 3 - .../lb_policy/xds_override_host_test.cc | 2 - test/cpp/end2end/client_lb_end2end_test.cc | 28 +- .../end2end/xds/xds_cluster_end2end_test.cc | 2 - .../xds/xds_override_host_end2end_test.cc | 2 - test/cpp/end2end/xds/xds_wrr_end2end_test.cc | 2 - tools/doxygen/Doxyfile.c++.internal | 1 - tools/doxygen/Doxyfile.core.internal | 1 - 25 files changed, 103 insertions(+), 2071 deletions(-) delete mode 100644 src/core/load_balancing/subchannel_list.h diff --git a/Package.swift b/Package.swift index 5e15d8e55af..0465d912aa5 100644 --- a/Package.swift +++ b/Package.swift @@ -1896,7 +1896,6 @@ let package = Package( "src/core/load_balancing/rls/rls.h", "src/core/load_balancing/round_robin/round_robin.cc", "src/core/load_balancing/subchannel_interface.h", - "src/core/load_balancing/subchannel_list.h", "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc", "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h", "src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc", diff --git a/bazel/experiments.bzl b/bazel/experiments.bzl index 7edd8cd75ea..79de231736b 100644 --- a/bazel/experiments.bzl +++ b/bazel/experiments.bzl @@ -38,7 +38,6 @@ EXPERIMENT_ENABLES = { "chaotic_good": "chaotic_good,event_engine_client,event_engine_listener,promise_based_client_call,promise_based_server_call", "registered_method_lookup_in_transport": "registered_method_lookup_in_transport", "promise_based_inproc_transport": "event_engine_client,event_engine_listener,promise_based_client_call,promise_based_inproc_transport,promise_based_server_call,registered_method_lookup_in_transport", - "round_robin_delegate_to_pick_first": "round_robin_delegate_to_pick_first", "rstpit": "rstpit", "schedule_cancellation_over_write": "schedule_cancellation_over_write", "server_privacy": "server_privacy", @@ -52,7 +51,6 @@ EXPERIMENT_ENABLES = { "v3_server_auth_filter": "v3_server_auth_filter", "work_serializer_clears_time_cache": "work_serializer_clears_time_cache", "work_serializer_dispatch": "event_engine_client,work_serializer_dispatch", - "wrr_delegate_to_pick_first": "wrr_delegate_to_pick_first", } EXPERIMENT_POLLERS = [ @@ -101,27 +99,15 @@ EXPERIMENTS = { "core_end2end_test": [ "event_engine_listener", ], - "cpp_lb_end2end_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], "credential_token_tests": [ "absl_base64", ], "event_engine_listener_test": [ "event_engine_listener", ], - "lb_unit_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], "surface_registered_method_lookup": [ "registered_method_lookup_in_transport", ], - "xds_end2end_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], }, }, "ios": { @@ -160,24 +146,12 @@ EXPERIMENTS = { ], }, "on": { - "cpp_lb_end2end_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], "credential_token_tests": [ "absl_base64", ], - "lb_unit_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], "surface_registered_method_lookup": [ "registered_method_lookup_in_transport", ], - "xds_end2end_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], }, }, "posix": { @@ -235,10 +209,6 @@ EXPERIMENTS = { "cpp_end2end_test": [ "work_serializer_dispatch", ], - "cpp_lb_end2end_test": [ - "round_robin_delegate_to_pick_first", - "wrr_delegate_to_pick_first", - ], "credential_token_tests": [ "absl_base64", ], @@ -246,9 +216,7 @@ EXPERIMENTS = { "event_engine_listener", ], "lb_unit_test": [ - "round_robin_delegate_to_pick_first", "work_serializer_dispatch", - "wrr_delegate_to_pick_first", ], "resolver_component_tests_runner_invoker": [ "event_engine_dns", @@ -257,9 +225,7 @@ EXPERIMENTS = { "registered_method_lookup_in_transport", ], "xds_end2end_test": [ - "round_robin_delegate_to_pick_first", "work_serializer_dispatch", - "wrr_delegate_to_pick_first", ], }, }, diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index a3f46868f21..91e82652419 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -1183,7 +1183,6 @@ libs: - src/core/load_balancing/ring_hash/ring_hash.h - src/core/load_balancing/rls/rls.h - src/core/load_balancing/subchannel_interface.h - - src/core/load_balancing/subchannel_list.h - src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h - src/core/load_balancing/weighted_target/weighted_target.h - src/core/load_balancing/xds/xds_channel_args.h @@ -2655,7 +2654,6 @@ libs: - src/core/load_balancing/pick_first/pick_first.h - src/core/load_balancing/rls/rls.h - src/core/load_balancing/subchannel_interface.h - - src/core/load_balancing/subchannel_list.h - src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h - src/core/load_balancing/weighted_target/weighted_target.h - src/core/resolver/dns/c_ares/dns_resolver_ares.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 898b33d3655..f459d1b7b85 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -1289,7 +1289,6 @@ Pod::Spec.new do |s| 'src/core/load_balancing/ring_hash/ring_hash.h', 'src/core/load_balancing/rls/rls.h', 'src/core/load_balancing/subchannel_interface.h', - 'src/core/load_balancing/subchannel_list.h', 'src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h', 'src/core/load_balancing/weighted_target/weighted_target.h', 'src/core/load_balancing/xds/xds_channel_args.h', @@ -2554,7 +2553,6 @@ Pod::Spec.new do |s| 'src/core/load_balancing/ring_hash/ring_hash.h', 'src/core/load_balancing/rls/rls.h', 'src/core/load_balancing/subchannel_interface.h', - 'src/core/load_balancing/subchannel_list.h', 'src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h', 'src/core/load_balancing/weighted_target/weighted_target.h', 'src/core/load_balancing/xds/xds_channel_args.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 8326e6b78bd..dd30ea8829c 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -2006,7 +2006,6 @@ Pod::Spec.new do |s| 'src/core/load_balancing/rls/rls.h', 'src/core/load_balancing/round_robin/round_robin.cc', 'src/core/load_balancing/subchannel_interface.h', - 'src/core/load_balancing/subchannel_list.h', 'src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc', 'src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h', 'src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc', @@ -3336,7 +3335,6 @@ Pod::Spec.new do |s| 'src/core/load_balancing/ring_hash/ring_hash.h', 'src/core/load_balancing/rls/rls.h', 'src/core/load_balancing/subchannel_interface.h', - 'src/core/load_balancing/subchannel_list.h', 'src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h', 'src/core/load_balancing/weighted_target/weighted_target.h', 'src/core/load_balancing/xds/xds_channel_args.h', diff --git a/grpc.gemspec b/grpc.gemspec index 8b33f403ed2..64b5cf62e4b 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -1898,7 +1898,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/load_balancing/rls/rls.h ) s.files += %w( src/core/load_balancing/round_robin/round_robin.cc ) s.files += %w( src/core/load_balancing/subchannel_interface.h ) - s.files += %w( src/core/load_balancing/subchannel_list.h ) s.files += %w( src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc ) s.files += %w( src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h ) s.files += %w( src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc ) diff --git a/package.xml b/package.xml index cfe611d7459..84621351c14 100644 --- a/package.xml +++ b/package.xml @@ -1880,7 +1880,6 @@ - diff --git a/src/core/BUILD b/src/core/BUILD index c67173ba22b..857484df4f1 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -5470,35 +5470,6 @@ grpc_cc_library( ], ) -grpc_cc_library( - name = "grpc_lb_subchannel_list", - hdrs = [ - "load_balancing/subchannel_list.h", - ], - external_deps = [ - "absl/status", - "absl/types:optional", - ], - language = "c++", - deps = [ - "channel_args", - "connectivity_state", - "dual_ref_counted", - "gpr_manual_constructor", - "health_check_client", - "iomgr_fwd", - "lb_policy", - "subchannel_interface", - "//:debug_location", - "//:endpoint_addresses", - "//:gpr", - "//:grpc_base", - "//:ref_counted_ptr", - "//:server_address", - "//:work_serializer", - ], -) - grpc_cc_library( name = "lb_endpoint_list", srcs = [ @@ -5721,13 +5692,10 @@ grpc_cc_library( deps = [ "channel_args", "connectivity_state", - "experiments", - "grpc_lb_subchannel_list", "json", "lb_endpoint_list", "lb_policy", "lb_policy_factory", - "subchannel_interface", "//:config", "//:debug_location", "//:endpoint_addresses", @@ -5736,7 +5704,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -5780,7 +5747,6 @@ grpc_cc_library( "experiments", "grpc_backend_metric_data", "grpc_lb_policy_weighted_target", - "grpc_lb_subchannel_list", "json", "json_args", "json_object_loader", @@ -5806,8 +5772,6 @@ grpc_cc_library( "//:oob_backend_metric", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", - "//:sockaddr_utils", "//:stats", "//:work_serializer", ], diff --git a/src/core/lib/experiments/experiments.cc b/src/core/lib/experiments/experiments.cc index 495570303b8..41307cf8843 100644 --- a/src/core/lib/experiments/experiments.cc +++ b/src/core/lib/experiments/experiments.cc @@ -110,11 +110,6 @@ const uint8_t required_experiments_promise_based_inproc_transport[] = { static_cast(grpc_core::kExperimentIdPromiseBasedServerCall), static_cast( grpc_core::kExperimentIdRegisteredMethodLookupInTransport)}; -const char* const description_round_robin_delegate_to_pick_first = - "Change round_robin code to delegate to pick_first as per dualstack " - "backend design."; -const char* const additional_constraints_round_robin_delegate_to_pick_first = - "{}"; const char* const description_rstpit = "On RST_STREAM on a server, reduce MAX_CONCURRENT_STREAMS for a short " "duration"; @@ -164,10 +159,6 @@ const char* const description_work_serializer_dispatch = const char* const additional_constraints_work_serializer_dispatch = "{}"; const uint8_t required_experiments_work_serializer_dispatch[] = { static_cast(grpc_core::kExperimentIdEventEngineClient)}; -const char* const description_wrr_delegate_to_pick_first = - "Change WRR code to delegate to pick_first as per dualstack backend " - "design."; -const char* const additional_constraints_wrr_delegate_to_pick_first = "{}"; #ifdef NDEBUG const bool kDefaultForDebugOnly = false; #else @@ -228,10 +219,6 @@ const ExperimentMetadata g_experiment_metadata[] = { description_promise_based_inproc_transport, additional_constraints_promise_based_inproc_transport, required_experiments_promise_based_inproc_transport, 3, false, false}, - {"round_robin_delegate_to_pick_first", - description_round_robin_delegate_to_pick_first, - additional_constraints_round_robin_delegate_to_pick_first, nullptr, 0, - true, true}, {"rstpit", description_rstpit, additional_constraints_rstpit, nullptr, 0, false, true}, {"schedule_cancellation_over_write", @@ -265,8 +252,6 @@ const ExperimentMetadata g_experiment_metadata[] = { {"work_serializer_dispatch", description_work_serializer_dispatch, additional_constraints_work_serializer_dispatch, required_experiments_work_serializer_dispatch, 1, false, true}, - {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, nullptr, 0, true, true}, }; } // namespace grpc_core @@ -359,11 +344,6 @@ const uint8_t required_experiments_promise_based_inproc_transport[] = { static_cast(grpc_core::kExperimentIdPromiseBasedServerCall), static_cast( grpc_core::kExperimentIdRegisteredMethodLookupInTransport)}; -const char* const description_round_robin_delegate_to_pick_first = - "Change round_robin code to delegate to pick_first as per dualstack " - "backend design."; -const char* const additional_constraints_round_robin_delegate_to_pick_first = - "{}"; const char* const description_rstpit = "On RST_STREAM on a server, reduce MAX_CONCURRENT_STREAMS for a short " "duration"; @@ -413,10 +393,6 @@ const char* const description_work_serializer_dispatch = const char* const additional_constraints_work_serializer_dispatch = "{}"; const uint8_t required_experiments_work_serializer_dispatch[] = { static_cast(grpc_core::kExperimentIdEventEngineClient)}; -const char* const description_wrr_delegate_to_pick_first = - "Change WRR code to delegate to pick_first as per dualstack backend " - "design."; -const char* const additional_constraints_wrr_delegate_to_pick_first = "{}"; #ifdef NDEBUG const bool kDefaultForDebugOnly = false; #else @@ -477,10 +453,6 @@ const ExperimentMetadata g_experiment_metadata[] = { description_promise_based_inproc_transport, additional_constraints_promise_based_inproc_transport, required_experiments_promise_based_inproc_transport, 3, false, false}, - {"round_robin_delegate_to_pick_first", - description_round_robin_delegate_to_pick_first, - additional_constraints_round_robin_delegate_to_pick_first, nullptr, 0, - true, true}, {"rstpit", description_rstpit, additional_constraints_rstpit, nullptr, 0, false, true}, {"schedule_cancellation_over_write", @@ -514,8 +486,6 @@ const ExperimentMetadata g_experiment_metadata[] = { {"work_serializer_dispatch", description_work_serializer_dispatch, additional_constraints_work_serializer_dispatch, required_experiments_work_serializer_dispatch, 1, false, true}, - {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, nullptr, 0, true, true}, }; } // namespace grpc_core @@ -608,11 +578,6 @@ const uint8_t required_experiments_promise_based_inproc_transport[] = { static_cast(grpc_core::kExperimentIdPromiseBasedServerCall), static_cast( grpc_core::kExperimentIdRegisteredMethodLookupInTransport)}; -const char* const description_round_robin_delegate_to_pick_first = - "Change round_robin code to delegate to pick_first as per dualstack " - "backend design."; -const char* const additional_constraints_round_robin_delegate_to_pick_first = - "{}"; const char* const description_rstpit = "On RST_STREAM on a server, reduce MAX_CONCURRENT_STREAMS for a short " "duration"; @@ -662,10 +627,6 @@ const char* const description_work_serializer_dispatch = const char* const additional_constraints_work_serializer_dispatch = "{}"; const uint8_t required_experiments_work_serializer_dispatch[] = { static_cast(grpc_core::kExperimentIdEventEngineClient)}; -const char* const description_wrr_delegate_to_pick_first = - "Change WRR code to delegate to pick_first as per dualstack backend " - "design."; -const char* const additional_constraints_wrr_delegate_to_pick_first = "{}"; #ifdef NDEBUG const bool kDefaultForDebugOnly = false; #else @@ -726,10 +687,6 @@ const ExperimentMetadata g_experiment_metadata[] = { description_promise_based_inproc_transport, additional_constraints_promise_based_inproc_transport, required_experiments_promise_based_inproc_transport, 3, false, false}, - {"round_robin_delegate_to_pick_first", - description_round_robin_delegate_to_pick_first, - additional_constraints_round_robin_delegate_to_pick_first, nullptr, 0, - true, true}, {"rstpit", description_rstpit, additional_constraints_rstpit, nullptr, 0, false, true}, {"schedule_cancellation_over_write", @@ -763,8 +720,6 @@ const ExperimentMetadata g_experiment_metadata[] = { {"work_serializer_dispatch", description_work_serializer_dispatch, additional_constraints_work_serializer_dispatch, required_experiments_work_serializer_dispatch, 1, true, true}, - {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, nullptr, 0, true, true}, }; } // namespace grpc_core diff --git a/src/core/lib/experiments/experiments.h b/src/core/lib/experiments/experiments.h index 70027ee9d99..90e66aee6f5 100644 --- a/src/core/lib/experiments/experiments.h +++ b/src/core/lib/experiments/experiments.h @@ -92,8 +92,6 @@ inline bool IsChaoticGoodEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_REGISTERED_METHOD_LOOKUP_IN_TRANSPORT inline bool IsRegisteredMethodLookupInTransportEnabled() { return true; } inline bool IsPromiseBasedInprocTransportEnabled() { return false; } -#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST -inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } inline bool IsRstpitEnabled() { return false; } inline bool IsScheduleCancellationOverWriteEnabled() { return false; } inline bool IsServerPrivacyEnabled() { return false; } @@ -108,8 +106,6 @@ inline bool IsV3ServerAuthFilterEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_WORK_SERIALIZER_CLEARS_TIME_CACHE inline bool IsWorkSerializerClearsTimeCacheEnabled() { return true; } inline bool IsWorkSerializerDispatchEnabled() { return false; } -#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST -inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #elif defined(GPR_WINDOWS) #define GRPC_EXPERIMENT_IS_INCLUDED_ABSL_BASE64 @@ -148,8 +144,6 @@ inline bool IsChaoticGoodEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_REGISTERED_METHOD_LOOKUP_IN_TRANSPORT inline bool IsRegisteredMethodLookupInTransportEnabled() { return true; } inline bool IsPromiseBasedInprocTransportEnabled() { return false; } -#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST -inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } inline bool IsRstpitEnabled() { return false; } inline bool IsScheduleCancellationOverWriteEnabled() { return false; } inline bool IsServerPrivacyEnabled() { return false; } @@ -164,8 +158,6 @@ inline bool IsV3ServerAuthFilterEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_WORK_SERIALIZER_CLEARS_TIME_CACHE inline bool IsWorkSerializerClearsTimeCacheEnabled() { return true; } inline bool IsWorkSerializerDispatchEnabled() { return false; } -#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST -inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #else #define GRPC_EXPERIMENT_IS_INCLUDED_ABSL_BASE64 @@ -205,8 +197,6 @@ inline bool IsChaoticGoodEnabled() { return false; } #define GRPC_EXPERIMENT_IS_INCLUDED_REGISTERED_METHOD_LOOKUP_IN_TRANSPORT inline bool IsRegisteredMethodLookupInTransportEnabled() { return true; } inline bool IsPromiseBasedInprocTransportEnabled() { return false; } -#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST -inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } inline bool IsRstpitEnabled() { return false; } inline bool IsScheduleCancellationOverWriteEnabled() { return false; } inline bool IsServerPrivacyEnabled() { return false; } @@ -222,8 +212,6 @@ inline bool IsV3ServerAuthFilterEnabled() { return false; } inline bool IsWorkSerializerClearsTimeCacheEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_WORK_SERIALIZER_DISPATCH inline bool IsWorkSerializerDispatchEnabled() { return true; } -#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST -inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #endif #else @@ -249,7 +237,6 @@ enum ExperimentIds { kExperimentIdChaoticGood, kExperimentIdRegisteredMethodLookupInTransport, kExperimentIdPromiseBasedInprocTransport, - kExperimentIdRoundRobinDelegateToPickFirst, kExperimentIdRstpit, kExperimentIdScheduleCancellationOverWrite, kExperimentIdServerPrivacy, @@ -263,7 +250,6 @@ enum ExperimentIds { kExperimentIdV3ServerAuthFilter, kExperimentIdWorkSerializerClearsTimeCache, kExperimentIdWorkSerializerDispatch, - kExperimentIdWrrDelegateToPickFirst, kNumExperiments }; #define GRPC_EXPERIMENT_IS_INCLUDED_ABSL_BASE64 @@ -350,10 +336,6 @@ inline bool IsRegisteredMethodLookupInTransportEnabled() { inline bool IsPromiseBasedInprocTransportEnabled() { return IsExperimentEnabled(kExperimentIdPromiseBasedInprocTransport); } -#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST -inline bool IsRoundRobinDelegateToPickFirstEnabled() { - return IsExperimentEnabled(kExperimentIdRoundRobinDelegateToPickFirst); -} #define GRPC_EXPERIMENT_IS_INCLUDED_RSTPIT inline bool IsRstpitEnabled() { return IsExperimentEnabled(kExperimentIdRstpit); @@ -406,10 +388,6 @@ inline bool IsWorkSerializerClearsTimeCacheEnabled() { inline bool IsWorkSerializerDispatchEnabled() { return IsExperimentEnabled(kExperimentIdWorkSerializerDispatch); } -#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST -inline bool IsWrrDelegateToPickFirstEnabled() { - return IsExperimentEnabled(kExperimentIdWrrDelegateToPickFirst); -} extern const ExperimentMetadata g_experiment_metadata[kNumExperiments]; diff --git a/src/core/lib/experiments/experiments.yaml b/src/core/lib/experiments/experiments.yaml index b86fde5cbdd..f1c1f4a11fc 100644 --- a/src/core/lib/experiments/experiments.yaml +++ b/src/core/lib/experiments/experiments.yaml @@ -184,13 +184,6 @@ expiry: 2024/03/31 owner: yashkt@google.com test_tags: ["surface_registered_method_lookup"] -- name: round_robin_delegate_to_pick_first - description: - Change round_robin code to delegate to pick_first as per dualstack - backend design. - expiry: 2024/03/15 - owner: roth@google.com - test_tags: ["lb_unit_test", "cpp_lb_end2end_test", "xds_end2end_test"] - name: rstpit description: On RST_STREAM on a server, reduce MAX_CONCURRENT_STREAMS for a short duration @@ -271,10 +264,3 @@ expiry: 2024/03/31 owner: ysseung@google.com test_tags: ["core_end2end_test", "cpp_end2end_test", "xds_end2end_test", "lb_unit_test"] -- name: wrr_delegate_to_pick_first - description: - Change WRR code to delegate to pick_first as per dualstack - backend design. - expiry: 2024/03/15 - owner: roth@google.com - test_tags: ["lb_unit_test", "cpp_lb_end2end_test", "xds_end2end_test"] diff --git a/src/core/lib/experiments/rollouts.yaml b/src/core/lib/experiments/rollouts.yaml index 8bbd66af0ff..b6cbcd468a2 100644 --- a/src/core/lib/experiments/rollouts.yaml +++ b/src/core/lib/experiments/rollouts.yaml @@ -100,8 +100,6 @@ default: false - name: registered_method_lookup_in_transport default: true -- name: round_robin_delegate_to_pick_first - default: true - name: rstpit default: false - name: schedule_cancellation_over_write @@ -126,5 +124,3 @@ posix: true # TODO(ysseung): Test flakes not fully resolved. windows: broken -- name: wrr_delegate_to_pick_first - default: true diff --git a/src/core/load_balancing/round_robin/round_robin.cc b/src/core/load_balancing/round_robin/round_robin.cc index 66065aa06bf..1bfa55ccbaa 100644 --- a/src/core/load_balancing/round_robin/round_robin.cc +++ b/src/core/load_balancing/round_robin/round_robin.cc @@ -37,23 +37,19 @@ #include #include -#include "src/core/load_balancing/endpoint_list.h" -#include "src/core/load_balancing/subchannel_list.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/json/json.h" #include "src/core/lib/transport/connectivity_state.h" +#include "src/core/load_balancing/endpoint_list.h" #include "src/core/load_balancing/lb_policy.h" #include "src/core/load_balancing/lb_policy_factory.h" -#include "src/core/load_balancing/subchannel_interface.h" #include "src/core/resolver/endpoint_addresses.h" -#include "src/core/resolver/server_address.h" namespace grpc_core { @@ -61,456 +57,8 @@ TraceFlag grpc_lb_round_robin_trace(false, "round_robin"); namespace { -// -// legacy round_robin LB policy (before dualstack support) -// - constexpr absl::string_view kRoundRobin = "round_robin"; -class OldRoundRobin : public LoadBalancingPolicy { - public: - explicit OldRoundRobin(Args args); - - absl::string_view name() const override { return kRoundRobin; } - - absl::Status UpdateLocked(UpdateArgs args) override; - void ResetBackoffLocked() override; - - private: - ~OldRoundRobin() override; - - // Forward declaration. - class RoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class RoundRobinSubchannelData - : public SubchannelData { - public: - RoundRobinSubchannelData( - SubchannelList* - subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel) - : SubchannelData(subchannel_list, address, std::move(subchannel)) {} - - absl::optional connectivity_state() const { - return logical_connectivity_state_; - } - - private: - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - }; - - // A list of subchannels. - class RoundRobinSubchannelList - : public SubchannelList { - public: - RoundRobinSubchannelList(OldRoundRobin* policy, - EndpointAddressesIterator* addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) - ? "RoundRobinSubchannelList" - : nullptr), - addresses, policy->channel_control_helper(), args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); - } - - ~RoundRobinSubchannelList() override { - OldRoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); - } - - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. - void UpdateStateCountersLocked( - absl::optional old_state, - grpc_connectivity_state new_state); - - // Ensures that the right subchannel list is used and then updates - // the RR policy's connectivity state based on the subchannel list's - // state counters. - void MaybeUpdateRoundRobinConnectivityStateLocked( - absl::Status status_for_tf); - - private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } - - std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), - " num_ready=", num_ready_, - " num_connecting=", num_connecting_, - " num_transient_failure=", num_transient_failure_); - } - - size_t num_ready_ = 0; - size_t num_connecting_ = 0; - size_t num_transient_failure_ = 0; - - absl::Status last_failure_; - }; - - class Picker : public SubchannelPicker { - public: - Picker(OldRoundRobin* parent, RoundRobinSubchannelList* subchannel_list); - - PickResult Pick(PickArgs args) override; - - private: - // Using pointer value only, no ref held -- do not dereference! - OldRoundRobin* parent_; - - std::atomic last_picked_index_; - std::vector> subchannels_; - }; - - void ShutdownLocked() override; - - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new - // list becomes READY. - RefCountedPtr latest_pending_subchannel_list_; - - bool shutdown_ = false; - - absl::BitGen bit_gen_; -}; - -// -// OldRoundRobin::Picker -// - -OldRoundRobin::Picker::Picker(OldRoundRobin* parent, - RoundRobinSubchannelList* subchannel_list) - : parent_(parent) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - RoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state().value_or(GRPC_CHANNEL_IDLE) == - GRPC_CHANNEL_READY) { - subchannels_.push_back(sd->subchannel()->Ref()); - } - } - // For discussion on why we generate a random starting index for - // the picker, see https://github.com/grpc/grpc-go/issues/2580. - size_t index = - absl::Uniform(parent->bit_gen_, 0, subchannels_.size()); - last_picked_index_.store(index, std::memory_order_relaxed); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p picker %p] created picker from subchannel_list=%p " - "with %" PRIuPTR " READY subchannels; last_picked_index_=%" PRIuPTR, - parent_, this, subchannel_list, subchannels_.size(), index); - } -} - -OldRoundRobin::PickResult OldRoundRobin::Picker::Pick(PickArgs /*args*/) { - size_t index = last_picked_index_.fetch_add(1, std::memory_order_relaxed) % - subchannels_.size(); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - parent_, this, index, subchannels_[index].get()); - } - return PickResult::Complete(subchannels_[index]); -} - -// -// RoundRobin -// - -OldRoundRobin::OldRoundRobin(Args args) : LoadBalancingPolicy(std::move(args)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] Created", this); - } -} - -OldRoundRobin::~OldRoundRobin() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this); - } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); -} - -void OldRoundRobin::ShutdownLocked() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] Shutting down", this); - } - shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); -} - -void OldRoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); - } -} - -absl::Status OldRoundRobin::UpdateLocked(UpdateArgs args) { - EndpointAddressesIterator* addresses = nullptr; - if (args.addresses.ok()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] received update", this); - } - addresses = args.addresses->get(); - } else { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] received update with address error: %s", this, - args.addresses.status().ToString().c_str()); - } - // If we already have a subchannel list, then keep using the existing - // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); - } - // Create new subchannel list, replacing the previous pending list, if any. - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - latest_pending_subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); - } - latest_pending_subchannel_list_ = - MakeRefCounted(this, addresses, args.args); - latest_pending_subchannel_list_->StartWatchingLocked(args.args); - // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); - } - subchannel_list_ = std::move(latest_pending_subchannel_list_); - absl::Status status = - args.addresses.ok() ? absl::UnavailableError(absl::StrCat( - "empty address list: ", args.resolution_note)) - : args.addresses.status(); - channel_control_helper()->UpdateState( - GRPC_CHANNEL_TRANSIENT_FAILURE, status, - MakeRefCounted(status)); - return status; - } - // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); - } - return absl::OkStatus(); -} - -// -// RoundRobinSubchannelList -// - -void OldRoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - if (old_state.has_value()) { - GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); - if (*old_state == GRPC_CHANNEL_READY) { - GPR_ASSERT(num_ready_ > 0); - --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { - GPR_ASSERT(num_connecting_ > 0); - --num_connecting_; - } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - GPR_ASSERT(num_transient_failure_ > 0); - --num_transient_failure_; - } - } - GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); - if (new_state == GRPC_CHANNEL_READY) { - ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { - ++num_connecting_; - } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ++num_transient_failure_; - } -} - -void OldRoundRobin::RoundRobinSubchannelList:: - MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) { - OldRoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. - // (This may cause the channel to go from READY to TRANSIENT_FAILURE, - // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() - : ""; - gpr_log( - GPR_INFO, - "[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", p, - p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); - } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); - } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; - // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. - if (num_ready_ > 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", p, - this); - } - p->channel_control_helper()->UpdateState(GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p, this)); - } else if (num_connecting_ > 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p", - p, this); - } - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted( - p->RefAsSubclass(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", - p, this, status_for_tf.ToString().c_str()); - } - if (!status_for_tf.ok()) { - last_failure_ = absl::UnavailableError( - absl::StrCat("connections to all backends failing; last error: ", - status_for_tf.ToString())); - } - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, - MakeRefCounted(last_failure_)); - } -} - -// -// RoundRobinSubchannelData -// - -void OldRoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - OldRoundRobin* p = static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateRoundRobinConnectivityStateLocked( - connectivity_status()); -} - -void OldRoundRobin::RoundRobinSubchannelData:: - UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - OldRoundRobin* p = static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log( - GPR_INFO, - "[RR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} - -// -// round_robin LB policy (with dualstack changes) -// - class RoundRobin : public LoadBalancingPolicy { public: explicit RoundRobin(Args args); @@ -892,9 +440,6 @@ class RoundRobinFactory : public LoadBalancingPolicyFactory { public: OrphanablePtr CreateLoadBalancingPolicy( LoadBalancingPolicy::Args args) const override { - if (!IsRoundRobinDelegateToPickFirstEnabled()) { - return MakeOrphanable(std::move(args)); - } return MakeOrphanable(std::move(args)); } diff --git a/src/core/load_balancing/subchannel_list.h b/src/core/load_balancing/subchannel_list.h deleted file mode 100644 index da9e35272a6..00000000000 --- a/src/core/load_balancing/subchannel_list.h +++ /dev/null @@ -1,455 +0,0 @@ -// -// Copyright 2015 gRPC authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#ifndef GRPC_SRC_CORE_LOAD_BALANCING_SUBCHANNEL_LIST_H -#define GRPC_SRC_CORE_LOAD_BALANCING_SUBCHANNEL_LIST_H - -#include - -#include -#include - -#include -#include -#include - -#include "absl/status/status.h" -#include "absl/types/optional.h" - -#include -#include - -#include "src/core/load_balancing/health_check_client.h" -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/gprpp/debug_location.h" -#include "src/core/lib/gprpp/dual_ref_counted.h" -#include "src/core/lib/gprpp/manual_constructor.h" -#include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/gprpp/work_serializer.h" -#include "src/core/lib/iomgr/iomgr_fwd.h" -#include "src/core/lib/transport/connectivity_state.h" -#include "src/core/load_balancing/lb_policy.h" -#include "src/core/load_balancing/subchannel_interface.h" -#include "src/core/resolver/endpoint_addresses.h" -#include "src/core/resolver/server_address.h" - -// Code for maintaining a list of subchannels within an LB policy. -// -// To use this, callers must create their own subclasses, like so: -// - -// class MySubchannelList; // Forward declaration. - -// class MySubchannelData -// : public SubchannelData { -// public: -// void ProcessConnectivityChangeLocked( -// absl::optional old_state, -// grpc_connectivity_state new_state) override { -// // ...code to handle connectivity changes... -// } -// }; - -// class MySubchannelList -// : public SubchannelList { -// }; - -// -// All methods will be called from within the client_channel work serializer. - -namespace grpc_core { - -// Forward declaration. -template -class SubchannelList; - -// Stores data for a particular subchannel in a subchannel list. -// Callers must create a subclass that implements the -// ProcessConnectivityChangeLocked() method. -template -class SubchannelData { - public: - // Returns a pointer to the subchannel list containing this object. - SubchannelListType* subchannel_list() const { - return static_cast(subchannel_list_); - } - - // Returns the index into the subchannel list of this object. - size_t Index() const { - return static_cast(static_cast(this) - - subchannel_list_->subchannel(0)); - } - - // Returns a pointer to the subchannel. - SubchannelInterface* subchannel() const { return subchannel_.get(); } - - // Returns the cached connectivity state, if any. - absl::optional connectivity_state() { - return connectivity_state_; - } - absl::Status connectivity_status() { return connectivity_status_; } - - // Resets the connection backoff. - void ResetBackoffLocked(); - - // Cancels any pending connectivity watch and unrefs the subchannel. - void ShutdownLocked(); - - protected: - SubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel); - - virtual ~SubchannelData(); - - // This method will be invoked once soon after instantiation to report - // the current connectivity state, and it will then be invoked again - // whenever the connectivity state changes. - virtual void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) = 0; - - private: - // For accessing StartConnectivityWatchLocked(). - friend class SubchannelList; - - // Watcher for subchannel connectivity state. - class Watcher - : public SubchannelInterface::ConnectivityStateWatcherInterface { - public: - Watcher( - SubchannelData* subchannel_data, - WeakRefCountedPtr subchannel_list) - : subchannel_data_(subchannel_data), - subchannel_list_(std::move(subchannel_list)) {} - - ~Watcher() override { - subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor"); - } - - void OnConnectivityStateChange(grpc_connectivity_state new_state, - absl::Status status) override; - - grpc_pollset_set* interested_parties() override { - return subchannel_list_->policy()->interested_parties(); - } - - private: - SubchannelData* subchannel_data_; - WeakRefCountedPtr subchannel_list_; - }; - - // Starts watching the connectivity state of the subchannel. - // ProcessConnectivityChangeLocked() will be called whenever the - // connectivity state changes. - void StartConnectivityWatchLocked(const ChannelArgs& args); - - // Cancels watching the connectivity state of the subchannel. - void CancelConnectivityWatchLocked(const char* reason); - - // Unrefs the subchannel. - void UnrefSubchannelLocked(const char* reason); - - // Backpointer to owning subchannel list. Not owned. - SubchannelList* subchannel_list_; - // The subchannel. - RefCountedPtr subchannel_; - // Will be non-null when the subchannel's state is being watched. - SubchannelInterface::DataWatcherInterface* health_watcher_ = nullptr; - // Data updated by the watcher. - absl::optional connectivity_state_; - absl::Status connectivity_status_; -}; - -// A list of subchannels. -template -class SubchannelList : public DualRefCounted { - public: - // Starts watching the connectivity state of all subchannels. - // Must be called immediately after instantiation. - void StartWatchingLocked(const ChannelArgs& args); - - // The number of subchannels in the list. - size_t num_subchannels() const { return subchannels_.size(); } - - // The data for the subchannel at a particular index. - SubchannelDataType* subchannel(size_t index) { - return subchannels_[index].get(); - } - - // Returns true if the subchannel list is shutting down. - bool shutting_down() const { return shutting_down_; } - - // Accessors. - LoadBalancingPolicy* policy() const { return policy_; } - const char* tracer() const { return tracer_; } - - // Resets connection backoff of all subchannels. - void ResetBackoffLocked(); - - // Returns true if all subchannels have seen their initial - // connectivity state notifications. - bool AllSubchannelsSeenInitialState(); - - void Orphan() override; - - protected: - SubchannelList(LoadBalancingPolicy* policy, const char* tracer, - EndpointAddressesIterator* addresses, - LoadBalancingPolicy::ChannelControlHelper* helper, - const ChannelArgs& args); - - virtual ~SubchannelList(); - - private: - // For accessing Ref() and Unref(). - friend class SubchannelData; - - virtual std::shared_ptr work_serializer() const = 0; - - // Backpointer to owning policy. - LoadBalancingPolicy* policy_; - - const char* tracer_; - - // The list of subchannels. - // We use ManualConstructor here to support SubchannelDataType classes - // that are not copyable. - std::vector> subchannels_; - - // Is this list shutting down? This may be true due to the shutdown of the - // policy itself or because a newer update has arrived while this one hadn't - // finished processing. - bool shutting_down_ = false; -}; - -// -// implementation -- no user-servicable parts below -// - -// -// SubchannelData::Watcher -// - -template -void SubchannelData::Watcher:: - OnConnectivityStateChange(grpc_connectivity_state new_state, - absl::Status status) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log( - GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): connectivity changed: old_state=%s, new_state=%s, " - "status=%s, shutting_down=%d, health_watcher=%p", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_.get(), subchannel_data_->Index(), - subchannel_list_->num_subchannels(), - subchannel_data_->subchannel_.get(), - (subchannel_data_->connectivity_state_.has_value() - ? ConnectivityStateName(*subchannel_data_->connectivity_state_) - : "N/A"), - ConnectivityStateName(new_state), status.ToString().c_str(), - subchannel_list_->shutting_down(), subchannel_data_->health_watcher_); - } - if (!subchannel_list_->shutting_down() && - subchannel_data_->health_watcher_ != nullptr) { - absl::optional old_state = - subchannel_data_->connectivity_state_; - subchannel_data_->connectivity_state_ = new_state; - subchannel_data_->connectivity_status_ = status; - // Call the subclass's ProcessConnectivityChangeLocked() method. - subchannel_data_->ProcessConnectivityChangeLocked(old_state, new_state); - } -} - -// -// SubchannelData -// - -template -SubchannelData::SubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& /*address*/, - RefCountedPtr subchannel) - : subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) {} - -template -SubchannelData::~SubchannelData() { - GPR_ASSERT(subchannel_ == nullptr); -} - -template -void SubchannelData:: - UnrefSubchannelLocked(const char* reason) { - if (subchannel_ != nullptr) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): unreffing subchannel (%s)", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), reason); - } - subchannel_.reset(); - } -} - -template -void SubchannelData::ResetBackoffLocked() { - if (subchannel_ != nullptr) { - subchannel_->ResetBackoff(); - } -} - -template -void SubchannelData:: - StartConnectivityWatchLocked(const ChannelArgs& args) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get()); - } - GPR_ASSERT(health_watcher_ == nullptr); - auto watcher = std::make_unique( - this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); - auto health_watcher = MakeHealthCheckWatcher( - subchannel_list_->work_serializer(), args, std::move(watcher)); - health_watcher_ = health_watcher.get(); - subchannel_->AddDataWatcher(std::move(health_watcher)); -} - -template -void SubchannelData:: - CancelConnectivityWatchLocked(const char* reason) { - if (health_watcher_ != nullptr) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): canceling health watch (%s)", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), reason); - } - subchannel_->CancelDataWatcher(health_watcher_); - health_watcher_ = nullptr; - } -} - -template -void SubchannelData::ShutdownLocked() { - CancelConnectivityWatchLocked("shutdown"); - UnrefSubchannelLocked("shutdown"); -} - -// -// SubchannelList -// - -template -SubchannelList::SubchannelList( - LoadBalancingPolicy* policy, const char* tracer, - EndpointAddressesIterator* addresses, - LoadBalancingPolicy::ChannelControlHelper* helper, const ChannelArgs& args) - : DualRefCounted(tracer), - policy_(policy), - tracer_(tracer) { - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[%s %p] Creating subchannel list %p", tracer_, policy, - this); - } - if (addresses == nullptr) return; - // Create a subchannel for each address. - addresses->ForEach([&](const EndpointAddresses& address) { - RefCountedPtr subchannel = - helper->CreateSubchannel(address.address(), address.args(), args); - if (subchannel == nullptr) { - // Subchannel could not be created. - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] could not create subchannel for address %s, ignoring", - tracer_, policy_, address.ToString().c_str()); - } - return; - } - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR - ": Created subchannel %p for address %s", - tracer_, policy_, this, subchannels_.size(), subchannel.get(), - address.ToString().c_str()); - } - subchannels_.emplace_back(); - subchannels_.back().Init(this, address, std::move(subchannel)); - }); -} - -template -SubchannelList::~SubchannelList() { - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", tracer_, policy_, - this); - } - for (auto& sd : subchannels_) { - sd.Destroy(); - } -} - -template -void SubchannelList:: - StartWatchingLocked(const ChannelArgs& args) { - for (auto& sd : subchannels_) { - sd->StartConnectivityWatchLocked(args); - } -} - -template -void SubchannelList::Orphan() { - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p", tracer_, - policy_, this); - } - GPR_ASSERT(!shutting_down_); - shutting_down_ = true; - for (auto& sd : subchannels_) { - sd->ShutdownLocked(); - } -} - -template -void SubchannelList::ResetBackoffLocked() { - for (auto& sd : subchannels_) { - sd->ResetBackoffLocked(); - } -} - -template -bool SubchannelList::AllSubchannelsSeenInitialState() { - for (size_t i = 0; i < num_subchannels(); ++i) { - if (!subchannel(i)->connectivity_state().has_value()) return false; - } - return true; -} - -} // namespace grpc_core - -#endif // GRPC_SRC_CORE_LOAD_BALANCING_SUBCHANNEL_LIST_H diff --git a/src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc b/src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc index 9a049a5a03e..14f01fb267e 100644 --- a/src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc @@ -18,11 +18,9 @@ #include #include -#include #include #include -#include #include #include #include @@ -46,13 +44,6 @@ #include #include -#include "src/core/load_balancing/backend_metric_data.h" -#include "src/core/load_balancing/endpoint_list.h" -#include "src/core/load_balancing/oob_backend_metric.h" -#include "src/core/load_balancing/subchannel_list.h" -#include "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h" -#include "src/core/load_balancing/weighted_target/weighted_target.h" -#include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/metrics.h" #include "src/core/lib/config/core_configuration.h" @@ -72,970 +63,123 @@ #include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" -#include "src/core/lib/json/json_object_loader.h" -#include "src/core/lib/transport/connectivity_state.h" -#include "src/core/load_balancing/lb_policy.h" -#include "src/core/load_balancing/lb_policy_factory.h" -#include "src/core/load_balancing/subchannel_interface.h" -#include "src/core/resolver/endpoint_addresses.h" -#include "src/core/resolver/server_address.h" - -namespace grpc_core { - -TraceFlag grpc_lb_wrr_trace(false, "weighted_round_robin_lb"); - -namespace { - -constexpr absl::string_view kWeightedRoundRobin = "weighted_round_robin"; - -constexpr absl::string_view kMetricLabelLocality = "grpc.lb.locality"; - -const auto kMetricRrFallback = - GlobalInstrumentsRegistry::RegisterUInt64Counter( - "grpc.lb.wrr.rr_fallback", - "EXPERIMENTAL. Number of scheduler updates in which there were not " - "enough endpoints with valid weight, which caused the WRR policy to " - "fall back to RR behavior.", - "{update}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); - -const auto kMetricEndpointWeightNotYetUsable = - GlobalInstrumentsRegistry::RegisterUInt64Counter( - "grpc.lb.wrr.endpoint_weight_not_yet_usable", - "EXPERIMENTAL. Number of endpoints from each scheduler update that " - "don't yet have usable weight information (i.e., either the load " - "report has not yet been received, or it is within the blackout " - "period).", - "{endpoint}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); - -const auto kMetricEndpointWeightStale = - GlobalInstrumentsRegistry::RegisterUInt64Counter( - "grpc.lb.wrr.endpoint_weight_stale", - "EXPERIMENTAL. Number of endpoints from each scheduler update whose " - "latest weight is older than the expiration period.", - "{endpoint}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); - -const auto kMetricEndpointWeights = - GlobalInstrumentsRegistry::RegisterDoubleHistogram( - "grpc.lb.wrr.endpoint_weights", - "EXPERIMENTAL. The histogram buckets will be endpoint weight ranges. " - "Each bucket will be a counter that is incremented once for every " - "endpoint whose weight is within that range. Note that endpoints " - "without usable weights will have weight 0.", - "{weight}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); - -// Config for WRR policy. -class WeightedRoundRobinConfig : public LoadBalancingPolicy::Config { - public: - WeightedRoundRobinConfig() = default; - - WeightedRoundRobinConfig(const WeightedRoundRobinConfig&) = delete; - WeightedRoundRobinConfig& operator=(const WeightedRoundRobinConfig&) = delete; - - WeightedRoundRobinConfig(WeightedRoundRobinConfig&&) = delete; - WeightedRoundRobinConfig& operator=(WeightedRoundRobinConfig&&) = delete; - - absl::string_view name() const override { return kWeightedRoundRobin; } - - bool enable_oob_load_report() const { return enable_oob_load_report_; } - Duration oob_reporting_period() const { return oob_reporting_period_; } - Duration blackout_period() const { return blackout_period_; } - Duration weight_update_period() const { return weight_update_period_; } - Duration weight_expiration_period() const { - return weight_expiration_period_; - } - float error_utilization_penalty() const { return error_utilization_penalty_; } - - static const JsonLoaderInterface* JsonLoader(const JsonArgs&) { - static const auto* loader = - JsonObjectLoader() - .OptionalField("enableOobLoadReport", - &WeightedRoundRobinConfig::enable_oob_load_report_) - .OptionalField("oobReportingPeriod", - &WeightedRoundRobinConfig::oob_reporting_period_) - .OptionalField("blackoutPeriod", - &WeightedRoundRobinConfig::blackout_period_) - .OptionalField("weightUpdatePeriod", - &WeightedRoundRobinConfig::weight_update_period_) - .OptionalField("weightExpirationPeriod", - &WeightedRoundRobinConfig::weight_expiration_period_) - .OptionalField( - "errorUtilizationPenalty", - &WeightedRoundRobinConfig::error_utilization_penalty_) - .Finish(); - return loader; - } - - void JsonPostLoad(const Json&, const JsonArgs&, ValidationErrors* errors) { - // Impose lower bound of 100ms on weightUpdatePeriod. - weight_update_period_ = - std::max(weight_update_period_, Duration::Milliseconds(100)); - if (error_utilization_penalty_ < 0) { - ValidationErrors::ScopedField field(errors, ".errorUtilizationPenalty"); - errors->AddError("must be non-negative"); - } - } - - private: - bool enable_oob_load_report_ = false; - Duration oob_reporting_period_ = Duration::Seconds(10); - Duration blackout_period_ = Duration::Seconds(10); - Duration weight_update_period_ = Duration::Seconds(1); - Duration weight_expiration_period_ = Duration::Minutes(3); - float error_utilization_penalty_ = 1.0; -}; - -// Legacy WRR LB policy (not delegating to pick_first) -class OldWeightedRoundRobin : public LoadBalancingPolicy { - public: - explicit OldWeightedRoundRobin(Args args); - - absl::string_view name() const override { return kWeightedRoundRobin; } - - absl::Status UpdateLocked(UpdateArgs args) override; - void ResetBackoffLocked() override; - - private: - // Represents the weight for a given address. - class AddressWeight : public RefCounted { - public: - AddressWeight(RefCountedPtr wrr, std::string key) - : wrr_(std::move(wrr)), key_(std::move(key)) {} - ~AddressWeight() override; - - void MaybeUpdateWeight(double qps, double eps, double utilization, - float error_utilization_penalty); - - float GetWeight(Timestamp now, Duration weight_expiration_period, - Duration blackout_period); - - void ResetNonEmptySince(); - - private: - RefCountedPtr wrr_; - const std::string key_; - - Mutex mu_; - float weight_ ABSL_GUARDED_BY(&mu_) = 0; - Timestamp non_empty_since_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfFuture(); - Timestamp last_update_time_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfPast(); - }; - - // Forward declaration. - class WeightedRoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class WeightedRoundRobinSubchannelData - : public SubchannelData { - public: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc); - - absl::optional connectivity_state() const { - return logical_connectivity_state_; - } - - RefCountedPtr weight() const { return weight_; } - - private: - class OobWatcher : public OobBackendMetricWatcher { - public: - OobWatcher(RefCountedPtr weight, - float error_utilization_penalty) - : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} - - void OnBackendMetricReport( - const BackendMetricData& backend_metric_data) override; - - private: - RefCountedPtr weight_; - const float error_utilization_penalty_; - }; - - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - - RefCountedPtr weight_; - }; - - // A list of subchannels. - class WeightedRoundRobinSubchannelList - : public SubchannelList { - public: - WeightedRoundRobinSubchannelList(OldWeightedRoundRobin* policy, - EndpointAddressesIterator* addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) - ? "WeightedRoundRobinSubchannelList" - : nullptr), - addresses, policy->channel_control_helper(), args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); - } - - ~WeightedRoundRobinSubchannelList() override { - OldWeightedRoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); - } - - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. - void UpdateStateCountersLocked( - absl::optional old_state, - grpc_connectivity_state new_state); - - // Ensures that the right subchannel list is used and then updates - // the aggregated connectivity state based on the subchannel list's - // state counters. - void MaybeUpdateAggregatedConnectivityStateLocked( - absl::Status status_for_tf); - - private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } - - std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), - " num_ready=", num_ready_, - " num_connecting=", num_connecting_, - " num_transient_failure=", num_transient_failure_); - } - - size_t num_ready_ = 0; - size_t num_connecting_ = 0; - size_t num_transient_failure_ = 0; - - absl::Status last_failure_; - }; - - // A picker that performs WRR picks with weights based on - // endpoint-reported utilization and QPS. - class Picker : public SubchannelPicker { - public: - Picker(RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list); - - ~Picker() override; - - PickResult Pick(PickArgs args) override; - - void Orphan() override; - - private: - // A call tracker that collects per-call endpoint utilization reports. - class SubchannelCallTracker : public SubchannelCallTrackerInterface { - public: - SubchannelCallTracker(RefCountedPtr weight, - float error_utilization_penalty) - : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} - - void Start() override {} - - void Finish(FinishArgs args) override; - - private: - RefCountedPtr weight_; - const float error_utilization_penalty_; - }; - - // Info stored about each subchannel. - struct SubchannelInfo { - SubchannelInfo(RefCountedPtr subchannel, - RefCountedPtr weight) - : subchannel(std::move(subchannel)), weight(std::move(weight)) {} - - RefCountedPtr subchannel; - RefCountedPtr weight; - }; - - // Returns the index into subchannels_ to be picked. - size_t PickIndex(); - - // Builds a new scheduler and swaps it into place, then starts a - // timer for the next update. - void BuildSchedulerAndStartTimerLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&timer_mu_); - - RefCountedPtr wrr_; - RefCountedPtr config_; - std::vector subchannels_; - - Mutex scheduler_mu_; - std::shared_ptr scheduler_ - ABSL_GUARDED_BY(&scheduler_mu_); - - Mutex timer_mu_ ABSL_ACQUIRED_BEFORE(&scheduler_mu_); - absl::optional - timer_handle_ ABSL_GUARDED_BY(&timer_mu_); - - // Used when falling back to RR. - std::atomic last_picked_index_; - }; - - ~OldWeightedRoundRobin() override; - - void ShutdownLocked() override; - - RefCountedPtr GetOrCreateWeight( - const grpc_resolved_address& address); - - RefCountedPtr config_; - - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new - // list becomes READY. - RefCountedPtr - latest_pending_subchannel_list_; - - Mutex address_weight_map_mu_; - std::map> address_weight_map_ - ABSL_GUARDED_BY(&address_weight_map_mu_); - - bool shutdown_ = false; - - absl::BitGen bit_gen_; - - // Accessed by picker. - std::atomic scheduler_state_{absl::Uniform(bit_gen_)}; -}; - -// -// OldWeightedRoundRobin::AddressWeight -// - -OldWeightedRoundRobin::AddressWeight::~AddressWeight() { - MutexLock lock(&wrr_->address_weight_map_mu_); - auto it = wrr_->address_weight_map_.find(key_); - if (it != wrr_->address_weight_map_.end() && it->second == this) { - wrr_->address_weight_map_.erase(it); - } -} - -void OldWeightedRoundRobin::AddressWeight::MaybeUpdateWeight( - double qps, double eps, double utilization, - float error_utilization_penalty) { - // Compute weight. - float weight = 0; - if (qps > 0 && utilization > 0) { - double penalty = 0.0; - if (eps > 0 && error_utilization_penalty > 0) { - penalty = eps / qps * error_utilization_penalty; - } - weight = qps / (utilization + penalty); - } - if (weight == 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f: " - "error_util_penalty=%f, weight=%f (not updating)", - wrr_.get(), key_.c_str(), qps, eps, utilization, - error_utilization_penalty, weight); - } - return; - } - Timestamp now = Timestamp::Now(); - // Grab the lock and update the data. - MutexLock lock(&mu_); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f " - "error_util_penalty=%f : setting weight=%f weight_=%f now=%s " - "last_update_time_=%s non_empty_since_=%s", - wrr_.get(), key_.c_str(), qps, eps, utilization, - error_utilization_penalty, weight, weight_, now.ToString().c_str(), - last_update_time_.ToString().c_str(), - non_empty_since_.ToString().c_str()); - } - if (non_empty_since_ == Timestamp::InfFuture()) non_empty_since_ = now; - weight_ = weight; - last_update_time_ = now; -} - -float OldWeightedRoundRobin::AddressWeight::GetWeight( - Timestamp now, Duration weight_expiration_period, - Duration blackout_period) { - MutexLock lock(&mu_); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %s: getting weight: now=%s " - "weight_expiration_period=%s blackout_period=%s " - "last_update_time_=%s non_empty_since_=%s weight_=%f", - wrr_.get(), key_.c_str(), now.ToString().c_str(), - weight_expiration_period.ToString().c_str(), - blackout_period.ToString().c_str(), - last_update_time_.ToString().c_str(), - non_empty_since_.ToString().c_str(), weight_); - } - // If the most recent update was longer ago than the expiration - // period, reset non_empty_since_ so that we apply the blackout period - // again if we start getting data again in the future, and return 0. - if (now - last_update_time_ >= weight_expiration_period) { - non_empty_since_ = Timestamp::InfFuture(); - return 0; - } - // If we don't have at least blackout_period worth of data, return 0. - if (blackout_period > Duration::Zero() && - now - non_empty_since_ < blackout_period) { - return 0; - } - // Otherwise, return the weight. - return weight_; -} - -void OldWeightedRoundRobin::AddressWeight::ResetNonEmptySince() { - MutexLock lock(&mu_); - non_empty_since_ = Timestamp::InfFuture(); -} - -// -// OldWeightedRoundRobin::Picker::SubchannelCallTracker -// - -void OldWeightedRoundRobin::Picker::SubchannelCallTracker::Finish( - FinishArgs args) { - auto* backend_metric_data = - args.backend_metric_accessor->GetBackendMetricData(); - double qps = 0; - double eps = 0; - double utilization = 0; - if (backend_metric_data != nullptr) { - qps = backend_metric_data->qps; - eps = backend_metric_data->eps; - utilization = backend_metric_data->application_utilization; - if (utilization <= 0) { - utilization = backend_metric_data->cpu_utilization; - } - } - weight_->MaybeUpdateWeight(qps, eps, utilization, error_utilization_penalty_); -} - -// -// OldWeightedRoundRobin::Picker -// - -OldWeightedRoundRobin::Picker::Picker( - RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list) - : wrr_(std::move(wrr)), - config_(wrr_->config_), - last_picked_index_(absl::Uniform(wrr_->bit_gen_)) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - WeightedRoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state() == GRPC_CHANNEL_READY) { - subchannels_.emplace_back(sd->subchannel()->Ref(), sd->weight()); - } - } - global_stats().IncrementWrrSubchannelListSize( - subchannel_list->num_subchannels()); - global_stats().IncrementWrrSubchannelReadySize(subchannels_.size()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p picker %p] created picker from subchannel_list=%p " - "with %" PRIuPTR " subchannels", - wrr_.get(), this, subchannel_list, subchannels_.size()); - } - BuildSchedulerAndStartTimerLocked(); -} - -OldWeightedRoundRobin::Picker::~Picker() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] destroying picker", wrr_.get(), this); - } -} - -void OldWeightedRoundRobin::Picker::Orphan() { - MutexLock lock(&timer_mu_); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] cancelling timer", wrr_.get(), this); - } - wrr_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_); - timer_handle_.reset(); -} - -OldWeightedRoundRobin::PickResult OldWeightedRoundRobin::Picker::Pick( - PickArgs /*args*/) { - size_t index = PickIndex(); - GPR_ASSERT(index < subchannels_.size()); - auto& subchannel_info = subchannels_[index]; - // Collect per-call utilization data if needed. - std::unique_ptr subchannel_call_tracker; - if (!config_->enable_oob_load_report()) { - subchannel_call_tracker = std::make_unique( - subchannel_info.weight, config_->error_utilization_penalty()); - } - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - wrr_.get(), this, index, subchannel_info.subchannel.get()); - } - return PickResult::Complete(subchannel_info.subchannel, - std::move(subchannel_call_tracker)); -} - -size_t OldWeightedRoundRobin::Picker::PickIndex() { - // Grab a ref to the scheduler. - std::shared_ptr scheduler; - { - MutexLock lock(&scheduler_mu_); - scheduler = scheduler_; - } - // If we have a scheduler, use it to do a WRR pick. - if (scheduler != nullptr) return scheduler->Pick(); - // We don't have a scheduler (i.e., either all of the weights are 0 or - // there is only one subchannel), so fall back to RR. - return last_picked_index_.fetch_add(1) % subchannels_.size(); -} - -void OldWeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { - // Build scheduler. - const Timestamp now = Timestamp::Now(); - std::vector weights; - weights.reserve(subchannels_.size()); - for (const auto& subchannel : subchannels_) { - weights.push_back(subchannel.weight->GetWeight( - now, config_->weight_expiration_period(), config_->blackout_period())); - } - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] new weights: %s", wrr_.get(), this, - absl::StrJoin(weights, " ").c_str()); - } - auto scheduler_or = StaticStrideScheduler::Make( - weights, [this]() { return wrr_->scheduler_state_.fetch_add(1); }); - std::shared_ptr scheduler; - if (scheduler_or.has_value()) { - scheduler = - std::make_shared(std::move(*scheduler_or)); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] new scheduler: %p", wrr_.get(), - this, scheduler.get()); - } - } else if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] no scheduler, falling back to RR", - wrr_.get(), this); - } - { - MutexLock lock(&scheduler_mu_); - scheduler_ = std::move(scheduler); - } - // Start timer. - timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter( - config_->weight_update_period(), - [self = WeakRefAsSubclass(), - work_serializer = wrr_->work_serializer()]() mutable { - ApplicationCallbackExecCtx callback_exec_ctx; - ExecCtx exec_ctx; - { - MutexLock lock(&self->timer_mu_); - if (self->timer_handle_.has_value()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] timer fired", - self->wrr_.get(), self.get()); - } - self->BuildSchedulerAndStartTimerLocked(); - } - } - if (!IsWorkSerializerDispatchEnabled()) { - // Release the picker ref inside the WorkSerializer. - work_serializer->Run([self = std::move(self)]() {}, DEBUG_LOCATION); - return; - } - self.reset(); - }); -} +#include "src/core/lib/json/json_object_loader.h" +#include "src/core/lib/transport/connectivity_state.h" +#include "src/core/load_balancing/backend_metric_data.h" +#include "src/core/load_balancing/endpoint_list.h" +#include "src/core/load_balancing/oob_backend_metric.h" +#include "src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h" +#include "src/core/load_balancing/weighted_target/weighted_target.h" +#include "src/core/load_balancing/lb_policy.h" +#include "src/core/load_balancing/lb_policy_factory.h" +#include "src/core/load_balancing/subchannel_interface.h" +#include "src/core/resolver/endpoint_addresses.h" -// -// WeightedRoundRobin -// +namespace grpc_core { -OldWeightedRoundRobin::OldWeightedRoundRobin(Args args) - : LoadBalancingPolicy(std::move(args)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] Created", this); - } -} +TraceFlag grpc_lb_wrr_trace(false, "weighted_round_robin_lb"); -OldWeightedRoundRobin::~OldWeightedRoundRobin() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] Destroying Round Robin policy", this); - } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); -} +namespace { -void OldWeightedRoundRobin::ShutdownLocked() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] Shutting down", this); - } - shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); -} +constexpr absl::string_view kWeightedRoundRobin = "weighted_round_robin"; -void OldWeightedRoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); - } -} +constexpr absl::string_view kMetricLabelLocality = "grpc.lb.locality"; -absl::Status OldWeightedRoundRobin::UpdateLocked(UpdateArgs args) { - global_stats().IncrementWrrUpdates(); - config_ = args.config.TakeAsSubclass(); - std::shared_ptr addresses; - if (args.addresses.ok()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] received update", this); - } - // Weed out duplicate addresses. Also sort the addresses so that if - // the set of the addresses don't change, their indexes in the - // subchannel list don't change, since this avoids unnecessary churn - // in the picker. Note that this does not ensure that if a given - // address remains present that it will have the same index; if, - // for example, an address at the end of the list is replaced with one - // that sorts much earlier in the list, then all of the addresses in - // between those two positions will have changed indexes. - struct AddressLessThan { - bool operator()(const ServerAddress& address1, - const ServerAddress& address2) const { - const grpc_resolved_address& addr1 = address1.address(); - const grpc_resolved_address& addr2 = address2.address(); - if (addr1.len != addr2.len) return addr1.len < addr2.len; - return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; - } - }; - std::set ordered_addresses; - (*args.addresses)->ForEach([&](const EndpointAddresses& endpoint) { - ordered_addresses.insert(endpoint); - }); - addresses = std::make_shared( - ServerAddressList(ordered_addresses.begin(), ordered_addresses.end())); - } else { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] received update with address error: %s", this, - args.addresses.status().ToString().c_str()); - } - // If we already have a subchannel list, then keep using the existing - // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); - } - // Create new subchannel list, replacing the previous pending list, if any. - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - latest_pending_subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); - } - latest_pending_subchannel_list_ = - MakeRefCounted(this, addresses.get(), - args.args); - latest_pending_subchannel_list_->StartWatchingLocked(args.args); - // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); - } - subchannel_list_ = std::move(latest_pending_subchannel_list_); - absl::Status status = - args.addresses.ok() ? absl::UnavailableError(absl::StrCat( - "empty address list: ", args.resolution_note)) - : args.addresses.status(); - channel_control_helper()->UpdateState( - GRPC_CHANNEL_TRANSIENT_FAILURE, status, - MakeRefCounted(status)); - return status; - } - // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); - } - return absl::OkStatus(); -} +const auto kMetricRrFallback = + GlobalInstrumentsRegistry::RegisterUInt64Counter( + "grpc.lb.wrr.rr_fallback", + "EXPERIMENTAL. Number of scheduler updates in which there were not " + "enough endpoints with valid weight, which caused the WRR policy to " + "fall back to RR behavior.", + "{update}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); -RefCountedPtr -OldWeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) { - auto key = grpc_sockaddr_to_uri(&address); - if (!key.ok()) return nullptr; - MutexLock lock(&address_weight_map_mu_); - auto it = address_weight_map_.find(*key); - if (it != address_weight_map_.end()) { - auto weight = it->second->RefIfNonZero(); - if (weight != nullptr) return weight; - } - auto weight = MakeRefCounted( - RefAsSubclass(DEBUG_LOCATION, "AddressWeight"), - *key); - address_weight_map_.emplace(*key, weight.get()); - return weight; -} +const auto kMetricEndpointWeightNotYetUsable = + GlobalInstrumentsRegistry::RegisterUInt64Counter( + "grpc.lb.wrr.endpoint_weight_not_yet_usable", + "EXPERIMENTAL. Number of endpoints from each scheduler update that " + "don't yet have usable weight information (i.e., either the load " + "report has not yet been received, or it is within the blackout " + "period).", + "{endpoint}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); -// -// OldWeightedRoundRobin::WeightedRoundRobinSubchannelList -// +const auto kMetricEndpointWeightStale = + GlobalInstrumentsRegistry::RegisterUInt64Counter( + "grpc.lb.wrr.endpoint_weight_stale", + "EXPERIMENTAL. Number of endpoints from each scheduler update whose " + "latest weight is older than the expiration period.", + "{endpoint}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); -void OldWeightedRoundRobin::WeightedRoundRobinSubchannelList:: - UpdateStateCountersLocked(absl::optional old_state, - grpc_connectivity_state new_state) { - if (old_state.has_value()) { - GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); - if (*old_state == GRPC_CHANNEL_READY) { - GPR_ASSERT(num_ready_ > 0); - --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { - GPR_ASSERT(num_connecting_ > 0); - --num_connecting_; - } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - GPR_ASSERT(num_transient_failure_ > 0); - --num_transient_failure_; - } - } - GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); - if (new_state == GRPC_CHANNEL_READY) { - ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { - ++num_connecting_; - } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ++num_transient_failure_; - } -} +const auto kMetricEndpointWeights = + GlobalInstrumentsRegistry::RegisterDoubleHistogram( + "grpc.lb.wrr.endpoint_weights", + "EXPERIMENTAL. The histogram buckets will be endpoint weight ranges. " + "Each bucket will be a counter that is incremented once for every " + "endpoint whose weight is within that range. Note that endpoints " + "without usable weights will have weight 0.", + "{weight}", {kMetricLabelTarget}, {kMetricLabelLocality}, false); -void OldWeightedRoundRobin::WeightedRoundRobinSubchannelList:: - MaybeUpdateAggregatedConnectivityStateLocked(absl::Status status_for_tf) { - OldWeightedRoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. - // (This may cause the channel to go from READY to TRANSIENT_FAILURE, - // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() - : ""; - gpr_log( - GPR_INFO, - "[WRR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", - p, p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); - } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); - } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; - // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. - if (num_ready_ > 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting READY with subchannel list %p", p, - this); - } - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p->RefAsSubclass(), - this)); - } else if (num_connecting_ > 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with subchannel list %p", - p, this); - } - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", p, - this, status_for_tf.ToString().c_str()); - } - if (!status_for_tf.ok()) { - last_failure_ = absl::UnavailableError( - absl::StrCat("connections to all backends failing; last error: ", - status_for_tf.ToString())); - } - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, - MakeRefCounted(last_failure_)); - } -} +// Config for WRR policy. +class WeightedRoundRobinConfig : public LoadBalancingPolicy::Config { + public: + WeightedRoundRobinConfig() = default; -// -// OldWeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher -// + WeightedRoundRobinConfig(const WeightedRoundRobinConfig&) = delete; + WeightedRoundRobinConfig& operator=(const WeightedRoundRobinConfig&) = delete; -void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher:: - OnBackendMetricReport(const BackendMetricData& backend_metric_data) { - double utilization = backend_metric_data.application_utilization; - if (utilization <= 0) { - utilization = backend_metric_data.cpu_utilization; - } - weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, - utilization, error_utilization_penalty_); -} + WeightedRoundRobinConfig(WeightedRoundRobinConfig&&) = delete; + WeightedRoundRobinConfig& operator=(WeightedRoundRobinConfig&&) = delete; -// -// OldWeightedRoundRobin::WeightedRoundRobinSubchannelData -// + absl::string_view name() const override { return kWeightedRoundRobin; } -OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc) - : SubchannelData(subchannel_list, address, std::move(sc)), - weight_(static_cast(subchannel_list->policy()) - ->GetOrCreateWeight(address.address())) { - // Start OOB watch if configured. - OldWeightedRoundRobin* p = - static_cast(subchannel_list->policy()); - if (p->config_->enable_oob_load_report()) { - subchannel()->AddDataWatcher(MakeOobBackendMetricWatcher( - p->config_->oob_reporting_period(), - std::make_unique(weight_, - p->config_->error_utilization_penalty()))); + bool enable_oob_load_report() const { return enable_oob_load_report_; } + Duration oob_reporting_period() const { return oob_reporting_period_; } + Duration blackout_period() const { return blackout_period_; } + Duration weight_update_period() const { return weight_update_period_; } + Duration weight_expiration_period() const { + return weight_expiration_period_; } -} + float error_utilization_penalty() const { return error_utilization_penalty_; } -void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: - ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - OldWeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } else if (new_state == GRPC_CHANNEL_READY) { - // If we transition back to READY state, restart the blackout period. - // Skip this if this is the initial notification for this - // subchannel (which happens whenever we get updated addresses and - // create a new endpoint list). Also skip it if the previous state - // was READY (which should never happen in practice, but we've seen - // at least one bug that caused this in the outlier_detection - // policy, so let's be defensive here). - // - // Note that we cannot guarantee that we will never receive - // lingering callbacks for backend metric reports from the previous - // connection after the new connection has been established, but they - // should be masked by new backend metric reports from the new - // connection by the time the blackout period ends. - if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) { - weight_->ResetNonEmptySince(); - } + static const JsonLoaderInterface* JsonLoader(const JsonArgs&) { + static const auto* loader = + JsonObjectLoader() + .OptionalField("enableOobLoadReport", + &WeightedRoundRobinConfig::enable_oob_load_report_) + .OptionalField("oobReportingPeriod", + &WeightedRoundRobinConfig::oob_reporting_period_) + .OptionalField("blackoutPeriod", + &WeightedRoundRobinConfig::blackout_period_) + .OptionalField("weightUpdatePeriod", + &WeightedRoundRobinConfig::weight_update_period_) + .OptionalField("weightExpirationPeriod", + &WeightedRoundRobinConfig::weight_expiration_period_) + .OptionalField( + "errorUtilizationPenalty", + &WeightedRoundRobinConfig::error_utilization_penalty_) + .Finish(); + return loader; } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateAggregatedConnectivityStateLocked( - connectivity_status()); -} -void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: - UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - OldWeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); + void JsonPostLoad(const Json&, const JsonArgs&, ValidationErrors* errors) { + // Impose lower bound of 100ms on weightUpdatePeriod. + weight_update_period_ = + std::max(weight_update_period_, Duration::Milliseconds(100)); + if (error_utilization_penalty_ < 0) { + ValidationErrors::ScopedField field(errors, ".errorUtilizationPenalty"); + errors->AddError("must be non-negative"); } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} -// New WRR LB policy (with delegation to pick_first) + private: + bool enable_oob_load_report_ = false; + Duration oob_reporting_period_ = Duration::Seconds(10); + Duration blackout_period_ = Duration::Seconds(10); + Duration weight_update_period_ = Duration::Seconds(1); + Duration weight_expiration_period_ = Duration::Minutes(3); + float error_utilization_penalty_ = 1.0; +}; + +// WRR LB policy class WeightedRoundRobin : public LoadBalancingPolicy { public: explicit WeightedRoundRobin(Args args); @@ -1858,9 +1002,6 @@ class WeightedRoundRobinFactory : public LoadBalancingPolicyFactory { public: OrphanablePtr CreateLoadBalancingPolicy( LoadBalancingPolicy::Args args) const override { - if (!IsWrrDelegateToPickFirstEnabled()) { - return MakeOrphanable(std::move(args)); - } return MakeOrphanable(std::move(args)); } diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 8e93de35f0f..1dbb5476037 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -35,7 +35,6 @@ #include #include -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" @@ -224,7 +223,6 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { // Advance time and run the timer callback to trigger ejection. IncrementTimeBy(Duration::Seconds(10)); gpr_log(GPR_INFO, "### ejection complete"); - if (!IsRoundRobinDelegateToPickFirstEnabled()) ExpectReresolutionRequest(); // Expect a picker update. std::vector remaining_addresses; for (const auto& addr : kAddresses) { @@ -239,7 +237,6 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { } TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { - if (!IsRoundRobinDelegateToPickFirstEnabled()) return; // Can't use timer duration expectation here, because the Happy // Eyeballs timer inside pick_first will use a different duration than // the timer in outlier_detection. @@ -338,7 +335,6 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { } TEST_F(OutlierDetectionTest, EjectionStateResetsWhenEndpointAddressesChange) { - if (!IsRoundRobinDelegateToPickFirstEnabled()) return; // Can't use timer duration expectation here, because the Happy // Eyeballs timer inside pick_first will use a different duration than // the timer in outlier_detection. diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index ce49f7ef6b4..9af35ed951d 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -24,7 +24,6 @@ #include -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/resolver/endpoint_addresses.h" @@ -70,7 +69,6 @@ TEST_F(RoundRobinTest, AddressUpdates) { } TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { - if (!IsRoundRobinDelegateToPickFirstEnabled()) return; constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 4572429513e..edc89309eca 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -40,7 +40,6 @@ #include #include -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" @@ -858,7 +857,6 @@ TEST_F(WeightedRoundRobinTest, ZeroErrorUtilPenalty) { } TEST_F(WeightedRoundRobinTest, MultipleAddressesPerEndpoint) { - if (!IsWrrDelegateToPickFirstEnabled()) return; // Can't use timer duration expectation here, because the Happy // Eyeballs timer inside pick_first will use a different duration than // the timer in WRR. @@ -1066,7 +1064,6 @@ TEST_F(WeightedRoundRobinTest, MetricDefinitionEndpointWeights) { } TEST_F(WeightedRoundRobinTest, MetricValues) { - if (!IsWrrDelegateToPickFirstEnabled()) return; const auto kRrFallback = GlobalInstrumentsRegistryTestPeer::FindUInt64CounterHandleByName( "grpc.lb.wrr.rr_fallback") diff --git a/test/core/client_channel/lb_policy/xds_override_host_test.cc b/test/core/client_channel/lb_policy/xds_override_host_test.cc index d1ddeae05c7..e6c74b7b2f1 100644 --- a/test/core/client_channel/lb_policy/xds_override_host_test.cc +++ b/test/core/client_channel/lb_policy/xds_override_host_test.cc @@ -40,7 +40,6 @@ #include "src/core/ext/filters/stateful_session/stateful_session_filter.h" #include "src/core/ext/xds/xds_health_status.h" #include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/json/json.h" @@ -511,7 +510,6 @@ TEST_F(XdsOverrideHostTest, OverrideHostStatus) { } TEST_F(XdsOverrideHostTest, MultipleAddressesPerEndpoint) { - if (!IsRoundRobinDelegateToPickFirstEnabled()) return; constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 11bc2492cdd..d5ee65ad9d0 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -56,7 +56,6 @@ #include "src/core/lib/backoff/backoff.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/config_vars.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/env.h" @@ -1982,13 +1981,8 @@ TEST_F(RoundRobinTest, HealthChecking) { EXPECT_THAT( status.error_message(), ::testing::MatchesRegex( - grpc_core::IsRoundRobinDelegateToPickFirstEnabled() - ? "connections to all backends failing; last error: " - "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy" - : "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy")); + "connections to all backends failing; last error: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: backend unhealthy")); return false; }); // Clean up. @@ -2048,13 +2042,8 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure( DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, - grpc_core::IsRoundRobinDelegateToPickFirstEnabled() - ? "connections to all backends failing; last error: " - "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy" - : "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy"); + "connections to all backends failing; last error: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); @@ -2099,13 +2088,8 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure( DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, - grpc_core::IsRoundRobinDelegateToPickFirstEnabled() - ? "connections to all backends failing; last error: " - "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy" - : "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "backend unhealthy"); + "connections to all backends failing; last error: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); diff --git a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc index 7322f06c9f6..2772a2370cf 100644 --- a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc @@ -27,7 +27,6 @@ #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/call_tracer.h" #include "src/core/lib/config/config_vars.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/surface/call.h" #include "src/proto/grpc/testing/xds/v3/orca_load_report.pb.h" #include "test/core/util/fake_stats_plugin.h" @@ -482,7 +481,6 @@ TEST_P(EdsTest, Vanilla) { } TEST_P(EdsTest, MultipleAddressesPerEndpoint) { - if (!grpc_core::IsRoundRobinDelegateToPickFirstEnabled()) return; grpc_core::testing::ScopedExperimentalEnvVar env( "GRPC_EXPERIMENTAL_XDS_DUALSTACK_ENDPOINTS"); const size_t kNumRpcsPerAddress = 10; diff --git a/test/cpp/end2end/xds/xds_override_host_end2end_test.cc b/test/cpp/end2end/xds/xds_override_host_end2end_test.cc index 65d5c43f9bb..fbd3793b206 100644 --- a/test/cpp/end2end/xds/xds_override_host_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_override_host_end2end_test.cc @@ -23,7 +23,6 @@ #include "absl/strings/str_split.h" #include "src/core/lib/config/config_vars.h" -#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/time.h" #include "src/proto/grpc/testing/xds/v3/stateful_session.pb.h" #include "src/proto/grpc/testing/xds/v3/stateful_session_cookie.pb.h" @@ -703,7 +702,6 @@ TEST_P(OverrideHostTest, TTLSetsMaxAge) { } TEST_P(OverrideHostTest, MultipleAddressesPerEndpoint) { - if (!grpc_core::IsRoundRobinDelegateToPickFirstEnabled()) return; grpc_core::testing::ScopedExperimentalEnvVar env( "GRPC_EXPERIMENTAL_XDS_DUALSTACK_ENDPOINTS"); // Create 3 backends, but leave backend 0 unstarted. diff --git a/test/cpp/end2end/xds/xds_wrr_end2end_test.cc b/test/cpp/end2end/xds/xds_wrr_end2end_test.cc index a89a3021e73..88f7513b532 100644 --- a/test/cpp/end2end/xds/xds_wrr_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_wrr_end2end_test.cc @@ -27,7 +27,6 @@ #include "src/core/client_channel/backup_poller.h" #include "src/core/lib/config/config_vars.h" -#include "src/core/lib/experiments/experiments.h" #include "src/proto/grpc/testing/xds/v3/client_side_weighted_round_robin.grpc.pb.h" #include "src/proto/grpc/testing/xds/v3/wrr_locality.grpc.pb.h" #include "test/core/util/fake_stats_plugin.h" @@ -102,7 +101,6 @@ TEST_P(WrrTest, Basic) { } TEST_P(WrrTest, MetricsHaveLocalityLabel) { - if (!grpc_core::IsWrrDelegateToPickFirstEnabled()) return; const auto kEndpointWeights = grpc_core::GlobalInstrumentsRegistryTestPeer:: FindDoubleHistogramHandleByName("grpc.lb.wrr.endpoint_weights") diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 35da52b9935..ba4cbaec282 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -2897,7 +2897,6 @@ src/core/load_balancing/rls/rls.cc \ src/core/load_balancing/rls/rls.h \ src/core/load_balancing/round_robin/round_robin.cc \ src/core/load_balancing/subchannel_interface.h \ -src/core/load_balancing/subchannel_list.h \ src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc \ src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h \ src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 1f4c698de8b..8796adfd4c6 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -2674,7 +2674,6 @@ src/core/load_balancing/rls/rls.cc \ src/core/load_balancing/rls/rls.h \ src/core/load_balancing/round_robin/round_robin.cc \ src/core/load_balancing/subchannel_interface.h \ -src/core/load_balancing/subchannel_list.h \ src/core/load_balancing/weighted_round_robin/static_stride_scheduler.cc \ src/core/load_balancing/weighted_round_robin/static_stride_scheduler.h \ src/core/load_balancing/weighted_round_robin/weighted_round_robin.cc \