From 63ec566f3e5c7524b4ae8ed3335ea78b1a97f5e2 Mon Sep 17 00:00:00 2001 From: AJ Heller Date: Tue, 9 May 2023 16:04:01 -0700 Subject: [PATCH] [EventEngine] Reduce the size of some thread pool tests (#33055) The `CanStartLotsOfClosures` test was sometimes taking over 60s to run ([example](https://source.cloud.google.com/results/invocations/d96c89f9-03f9-43fd-a729-d744d7499532/targets;query=thread_pool_test/%2F%2Ftest%2Fcore%2Fevent_engine:thread_pool_test@poller%3Depoll1/log)). More often than not, though, the test would take < 5s ([example](https://source.cloud.google.com/results/invocations/95d32b32-5df7-4dd4-a82c-1024869b09c8/targets;query=thread_pool_test/%2F%2Ftest%2Fcore%2Fevent_engine:thread_pool_test/log)). Both examples are from before the tests changed with the introduction of the work-stealing thread pool (https://github.com/grpc/grpc/commit/3fb738b9b1835b8b1faf08cfacfe0b516fb29e22). This PR reduces the closure count to 500k for the `CanStartLotsOfClosures` test, and changes the blocking-closure scale-test to exercise the work stealing implementation alone. --- .../thread_pool/original_thread_pool.cc | 3 ++- test/core/event_engine/thread_pool_test.cc | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/core/lib/event_engine/thread_pool/original_thread_pool.cc b/src/core/lib/event_engine/thread_pool/original_thread_pool.cc index 3d2533aec47..95f8b6cfd5f 100644 --- a/src/core/lib/event_engine/thread_pool/original_thread_pool.cc +++ b/src/core/lib/event_engine/thread_pool/original_thread_pool.cc @@ -229,7 +229,8 @@ void OriginalThreadPool::ThreadCount::BlockUntilThreadCount(int threads, // fork. cv_.WaitWithTimeout(&thread_count_mu_, absl::Seconds(3)); if (threads_ > threads && absl::Now() - last_log > absl::Seconds(1)) { - gpr_log(GPR_ERROR, "Waiting for thread pool to idle before %s", why); + gpr_log(GPR_ERROR, "Waiting for thread pool to idle before %s (%d to %d)", + why, threads_, threads); last_log = absl::Now(); } } diff --git a/test/core/event_engine/thread_pool_test.cc b/test/core/event_engine/thread_pool_test.cc index d9e595be889..f2142854fc1 100644 --- a/test/core/event_engine/thread_pool_test.cc +++ b/test/core/event_engine/thread_pool_test.cc @@ -146,13 +146,18 @@ void ScheduleTwiceUntilZero(ThreadPool* p, std::atomic& runcount, int n) { } TYPED_TEST(ThreadPoolTest, CanStartLotsOfClosures) { + // TODO(hork): this is nerfed due to the original thread pool taking eons to + // finish running 2M closures in some cases (usually < 10s, sometimes over + // 90s). Reset the branch factor to 20 when all thread pool runtimes + // stabilize. TypeParam p(8); std::atomic runcount{0}; // Our first thread pool implementation tried to create ~1M threads for this // test. - ScheduleTwiceUntilZero(&p, runcount, 20); + int branch_factor = 18; + ScheduleTwiceUntilZero(&p, runcount, branch_factor); p.Quiesce(); - ASSERT_EQ(runcount.load(), pow(2, 21) - 1); + ASSERT_EQ(runcount.load(), pow(2, branch_factor + 1) - 1); } TYPED_TEST(ThreadPoolTest, ScalesWhenBackloggedFromSingleThreadLocalQueue) { @@ -182,9 +187,14 @@ TYPED_TEST(ThreadPoolTest, ScalesWhenBackloggedFromSingleThreadLocalQueue) { p.Quiesce(); } -TYPED_TEST(ThreadPoolTest, ScalesWhenBackloggedFromGlobalQueue) { +class WorkStealingThreadPoolTest : public ::testing::Test {}; + +// TODO(hork): This is currently a pathological case for the original thread +// pool, it gets wedged in ~3% of runs when new threads fail to start. When that +// is fixed, or the implementation is deleted, make this a typed test again. +TEST_F(WorkStealingThreadPoolTest, ScalesWhenBackloggedFromGlobalQueue) { int pool_thread_count = 8; - TypeParam p(pool_thread_count); + WorkStealingThreadPool p(pool_thread_count); grpc_core::Notification signal; // Ensures the pool is saturated before signaling closures to continue. std::atomic waiters{0};