From 76bbbb06b28f1064af49afb71e1cd60d370dae23 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 17 Sep 2024 11:20:18 -0700 Subject: [PATCH] [xds_cluster_e2e_test] fix flake in DropConfigUpdate test (#37735) Changed test to add a new backend, as a more reliable method for determining when the client has seen the update. This fixes flakes like the following: https://btx.cloud.google.com/invocations/d4267c2a-3557-4e09-8439-d7f24637a5ba/targets/%2F%2Ftest%2Fcpp%2Fend2end%2Fxds:xds_cluster_end2end_test@poller%3Dpoll;config=c4ae5af353698403518bd66f686ce4f7f10d865e4cdcccbb7036582cbc9fa7d6/tests Closes #37735 COPYBARA_INTEGRATE_REVIEW=https://github.com/grpc/grpc/pull/37735 from markdroth:xds_cluster_e2e_flake 68edb080daebd056e456037e8f792c66a22a99a1 PiperOrigin-RevId: 675640226 --- .../end2end/xds/xds_cluster_end2end_test.cc | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc index afb41a287be..1190111fe1a 100644 --- a/test/cpp/end2end/xds/xds_cluster_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_cluster_end2end_test.cc @@ -1073,7 +1073,7 @@ TEST_P(EdsTest, DropPerTenThousand) { // Tests that drop is working correctly after update. TEST_P(EdsTest, DropConfigUpdate) { - CreateAndStartBackends(1); + CreateAndStartBackends(2); const uint32_t kDropPerMillionForLb = 100000; const uint32_t kDropPerMillionForThrottle = 200000; const double kErrorTolerance = 0.05; @@ -1085,8 +1085,8 @@ TEST_P(EdsTest, DropConfigUpdate) { ComputeIdealNumRpcs(kDropRateForLb, kErrorTolerance); const size_t kNumRpcsBoth = ComputeIdealNumRpcs(kDropRateForLbAndThrottle, kErrorTolerance); - // The first ADS response contains one drop category. - EdsResourceArgs args({{"locality0", CreateEndpointsForBackends()}}); + // The first EDS response contains backend 0 and one drop category. + EdsResourceArgs args({{"locality0", CreateEndpointsForBackends(0, 1)}}); args.drop_categories = {{kLbDropType, kDropPerMillionForLb}}; balancer_->ads_service()->SetEdsResource(BuildEdsResource(args)); // Send kNumRpcsLbOnly RPCs and count the drops. @@ -1100,32 +1100,20 @@ TEST_P(EdsTest, DropConfigUpdate) { LOG(INFO) << "First batch drop rate " << seen_drop_rate; EXPECT_THAT(seen_drop_rate, ::testing::DoubleNear(kDropRateForLb, kErrorTolerance)); - // The second ADS response contains two drop categories, send an update EDS - // response. + // The second EDS response contains both backends and two drop categories. + args = EdsResourceArgs({{"locality0", CreateEndpointsForBackends()}}); args.drop_categories = {{kLbDropType, kDropPerMillionForLb}, {kThrottleDropType, kDropPerMillionForThrottle}}; balancer_->ads_service()->SetEdsResource(BuildEdsResource(args)); - // Wait until the drop rate increases to the middle of the two configs, - // which implies that the update has been in effect. - const double kDropRateThreshold = - (kDropRateForLb + kDropRateForLbAndThrottle) / 2; - size_t num_rpcs = kNumRpcsBoth; - SendRpcsUntil( - DEBUG_LOCATION, - [&](const RpcResult& result) { - ++num_rpcs; - if (result.status.ok()) { - EXPECT_EQ(result.response.message(), kRequestMessage); - } else { - EXPECT_EQ(result.status.error_code(), StatusCode::UNAVAILABLE); - EXPECT_THAT(result.status.error_message(), - ::testing::StartsWith(kStatusMessageDropPrefix)); - ++num_drops; - } - seen_drop_rate = static_cast(num_drops) / num_rpcs; - return seen_drop_rate < kDropRateThreshold; - }, - /*timeout_ms=*/40000); + // Wait until backend 1 sees traffic, so that we know the client has + // seen the update. + WaitForBackend(DEBUG_LOCATION, 1, [&](const RpcResult& result) { + if (!result.status.ok()) { + EXPECT_EQ(result.status.error_code(), StatusCode::UNAVAILABLE); + EXPECT_THAT(result.status.error_message(), + ::testing::StartsWith(kStatusMessageDropPrefix)); + } + }); // Send kNumRpcsBoth RPCs and count the drops. LOG(INFO) << "========= BEFORE SECOND BATCH =========="; num_drops = SendRpcsAndCountFailuresWithMessage(DEBUG_LOCATION, kNumRpcsBoth,