From b95772eeb926f78b8ac14e03b36ed3e73b2e1a2c Mon Sep 17 00:00:00 2001 From: Sree Kuchibhotla Date: Mon, 23 Jul 2018 23:38:13 -0700 Subject: [PATCH] Add Tests in Core and C++ and fix a few related bugs in thread_manager.cc --- src/core/lib/iomgr/resource_quota.cc | 7 + src/cpp/server/server_cc.cc | 10 +- src/cpp/thread_manager/thread_manager.cc | 70 +++++---- src/cpp/thread_manager/thread_manager.h | 42 +++-- test/core/iomgr/resource_quota_test.cc | 96 ++++++++++++ .../cpp/thread_manager/thread_manager_test.cc | 147 +++++++++++++----- 6 files changed, 288 insertions(+), 84 deletions(-) diff --git a/src/core/lib/iomgr/resource_quota.cc b/src/core/lib/iomgr/resource_quota.cc index a30688bd873..67d05aa202d 100644 --- a/src/core/lib/iomgr/resource_quota.cc +++ b/src/core/lib/iomgr/resource_quota.cc @@ -547,6 +547,11 @@ static void ru_shutdown(void* ru, grpc_error* error) { static void ru_destroy(void* ru, grpc_error* error) { grpc_resource_user* resource_user = static_cast(ru); GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0); + // Free all the remaining thread quota + grpc_resource_user_free_threads( + resource_user, + static_cast(gpr_atm_no_barrier_load(&resource_user->num_threads))); + for (int i = 0; i < GRPC_RULIST_COUNT; i++) { rulist_remove(resource_user, static_cast(i)); } @@ -642,6 +647,7 @@ grpc_resource_quota* grpc_resource_quota_create(const char* name) { void grpc_resource_quota_unref_internal(grpc_resource_quota* resource_quota) { if (gpr_unref(&resource_quota->refs)) { + GPR_ASSERT(resource_quota->num_threads == 0); // No outstanding thd quota GRPC_COMBINER_UNREF(resource_quota->combiner, "resource_quota"); gpr_free(resource_quota->name); gpr_free(resource_quota); @@ -846,6 +852,7 @@ void grpc_resource_user_free_threads(grpc_resource_user* resource_user, "Releasing more threads (%d) that currently allocated (rq threads: " "%d, ru threads: %d)", thd_count, old_cnt, rq->num_threads + thd_count); + abort(); } gpr_mu_unlock(&resource_user->resource_quota->thd_mu); } diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc index 6e6e0bfffe3..786ef44e3ef 100644 --- a/src/cpp/server/server_cc.cc +++ b/src/cpp/server/server_cc.cc @@ -47,6 +47,12 @@ namespace grpc { namespace { +// The default value for maximum number of threads that can be created in the +// sync server. This value of 1500 is empirically chosen. To increase the max +// number of threads in a sync server, pass a custom ResourceQuota object (with +// the desired number of max-threads set) to the server builder +#define DEFAULT_MAX_SYNC_SERVER_THREADS 1500 + class DefaultGlobalCallbacks final : public Server::GlobalCallbacks { public: ~DefaultGlobalCallbacks() override {} @@ -395,7 +401,9 @@ Server::Server( if (sync_server_cqs_ != nullptr) { bool default_rq_created = false; if (server_rq == nullptr) { - server_rq = grpc_resource_quota_create("SyncServer-Default"); + server_rq = grpc_resource_quota_create("SyncServer-default-rq"); + grpc_resource_quota_set_max_threads(server_rq, + DEFAULT_MAX_SYNC_SERVER_THREADS); default_rq_created = true; } diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index c0fa98798a0..5d367511e2e 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -22,8 +22,8 @@ #include #include - #include "src/core/lib/gprpp/thd.h" +#include "src/core/lib/iomgr/exec_ctx.h" namespace grpc { @@ -55,7 +55,8 @@ ThreadManager::ThreadManager(const char* name, num_pollers_(0), min_pollers_(min_pollers), max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers), - num_threads_(0) { + num_threads_(0), + max_active_threads_sofar_(0) { resource_user_ = grpc_resource_user_create(resource_quota, name); } @@ -65,6 +66,7 @@ ThreadManager::~ThreadManager() { GPR_ASSERT(num_threads_ == 0); } + grpc_core::ExecCtx exec_ctx; // grpc_resource_user_unref needs an exec_ctx grpc_resource_user_unref(resource_user_); CleanupCompletedThreads(); } @@ -86,17 +88,27 @@ bool ThreadManager::IsShutdown() { return shutdown_; } +int ThreadManager::GetMaxActiveThreadsSoFar() { + std::lock_guard list_lock(list_mu_); + return max_active_threads_sofar_; +} + void ThreadManager::MarkAsCompleted(WorkerThread* thd) { { std::lock_guard list_lock(list_mu_); completed_threads_.push_back(thd); } - std::lock_guard lock(mu_); - num_threads_--; - if (num_threads_ == 0) { - shutdown_cv_.notify_one(); + { + std::lock_guard lock(mu_); + num_threads_--; + if (num_threads_ == 0) { + shutdown_cv_.notify_one(); + } } + + // Give a thread back to the resource quota + grpc_resource_user_free_threads(resource_user_, 1); } void ThreadManager::CleanupCompletedThreads() { @@ -111,34 +123,24 @@ void ThreadManager::CleanupCompletedThreads() { } void ThreadManager::Initialize() { + if (!grpc_resource_user_alloc_threads(resource_user_, min_pollers_)) { + gpr_log(GPR_ERROR, + "No thread quota available to even create the minimum required " + "polling threads (i.e %d). Unable to start the thread manager", + min_pollers_); + abort(); + } + { std::unique_lock lock(mu_); num_pollers_ = min_pollers_; num_threads_ = min_pollers_; + max_active_threads_sofar_ = min_pollers_; } for (int i = 0; i < min_pollers_; i++) { - if (!CreateNewThread(this)) { - gpr_log(GPR_ERROR, - "No quota available to create additional threads. Created %d (of " - "%d) threads", - i, min_pollers_); - break; - } - } -} - -bool ThreadManager::CreateNewThread(ThreadManager* thd_mgr) { - if (!grpc_resource_user_alloc_threads(thd_mgr->resource_user_, 1)) { - return false; + new WorkerThread(this); } - // Create a new thread (which ends up calling the MainWorkLoop() function - new WorkerThread(thd_mgr); - return true; -} - -void ThreadManager::ReleaseThread(ThreadManager* thd_mgr) { - grpc_resource_user_free_threads(thd_mgr->resource_user_, 1); } void ThreadManager::MainWorkLoop() { @@ -162,14 +164,17 @@ void ThreadManager::MainWorkLoop() { done = true; break; case WORK_FOUND: - // If we got work and there are now insufficient pollers, start a new - // one - if (!shutdown_ && num_pollers_ < min_pollers_) { + // If we got work and there are now insufficient pollers and there is + // quota available to create a new thread,start a new poller thread + if (!shutdown_ && num_pollers_ < min_pollers_ && + grpc_resource_user_alloc_threads(resource_user_, 1)) { num_pollers_++; num_threads_++; + max_active_threads_sofar_ = + std::max(max_active_threads_sofar_, num_threads_); // Drop lock before spawning thread to avoid contention lock.unlock(); - CreateNewThread(this); + new WorkerThread(this); } else { // Drop lock for consistency with above branch lock.unlock(); @@ -219,10 +224,9 @@ void ThreadManager::MainWorkLoop() { } }; - // This thread is exiting. Do some cleanup work (i.e delete already completed - // worker threads and also release 1 thread back to the resource quota) + // This thread is exiting. Do some cleanup work i.e delete already completed + // worker threads CleanupCompletedThreads(); - ReleaseThread(this); // If we are here, either ThreadManager is shutting down or it already has // enough threads. diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h index 23bd38ee4f1..8332befed09 100644 --- a/src/cpp/thread_manager/thread_manager.h +++ b/src/cpp/thread_manager/thread_manager.h @@ -86,6 +86,11 @@ class ThreadManager { // all the threads have drained all the outstanding work virtual void Wait(); + // Max number of concurrent threads that were ever active in this thread + // manager so far. This is useful for debugging purposes (and in unit tests) + // to check if resource_quota is properly being enforced. + int GetMaxActiveThreadsSoFar(); + private: // Helper wrapper class around grpc_core::Thread. Takes a ThreadManager object // and starts a new grpc_core::Thread to calls the Run() function. @@ -93,6 +98,23 @@ class ThreadManager { // The Run() function calls ThreadManager::MainWorkLoop() function and once // that completes, it marks the WorkerThread completed by calling // ThreadManager::MarkAsCompleted() + // + // WHY IS THIS NEEDED?: + // When a thread terminates, some other tread *must* call Join() on that + // thread so that the resources are released. Having a WorkerThread wrapper + // will make this easier. Once Run() completes, each thread calls the + // following two functions: + // ThreadManager::CleanupCompletedThreads() + // ThreadManager::MarkAsCompleted() + // + // - MarkAsCompleted() puts the WorkerThread object in the ThreadManger's + // completed_threads_ list + // - CleanupCompletedThreads() calls "Join()" on the threads that are already + // in the completed_threads_ list (since a thread cannot call Join() on + // itself, it calls CleanupCompletedThreads() *before* calling + // MarkAsCompleted()) + // TODO: sreek - consider creating the threads 'detached' so that Join() need + // not be called class WorkerThread { public: WorkerThread(ThreadManager* thd_mgr); @@ -113,15 +135,8 @@ class ThreadManager { void MarkAsCompleted(WorkerThread* thd); void CleanupCompletedThreads(); - // Checks the resource quota and if available, creates a thread and returns - // true. If quota is not available, returns false (and thread is not created) - static bool CreateNewThread(ThreadManager* thd_mgr); - - // Give back a thread to the resource quota - static void ReleaseThread(ThreadManager* thd_mgr); - - // Protects shutdown_, num_pollers_ and num_threads_ - // TODO: sreek - Change num_pollers and num_threads_ to atomics + // Protects shutdown_, num_pollers_, num_threads_ and + // max_active_threads_sofar_ std::mutex mu_; bool shutdown_; @@ -142,10 +157,15 @@ class ThreadManager { int min_pollers_; int max_pollers_; - // The total number of threads (includes threads includes the threads that are - // currently polling i.e num_pollers_) + // The total number of threads currently active (includes threads includes the + // threads that are currently polling i.e num_pollers_) int num_threads_; + // See GetMaxActiveThreadsSoFar()'s description. + // To be more specific, this variable tracks the max value num_threads_ was + // ever set so far + int max_active_threads_sofar_; + std::mutex list_mu_; std::list completed_threads_; }; diff --git a/test/core/iomgr/resource_quota_test.cc b/test/core/iomgr/resource_quota_test.cc index 059ff7b5f8b..573e4010fa4 100644 --- a/test/core/iomgr/resource_quota_test.cc +++ b/test/core/iomgr/resource_quota_test.cc @@ -798,6 +798,97 @@ static void test_negative_rq_free_pool(void) { } } +// Simple test to check resource quota thread limits +static void test_thread_limit() { + grpc_core::ExecCtx exec_ctx; + + grpc_resource_quota* rq = grpc_resource_quota_create("test_thread_limit"); + grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1"); + grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2"); + + // Max threads = 100 + grpc_resource_quota_set_max_threads(rq, 100); + + // Request quota for 100 threads (50 for ru1, 50 for ru2) + GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 10)); + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 10)); + GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 40)); + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 40)); + + // Threads exhaused. Next request must fail + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 20)); + + // Free 20 threads from two different users + grpc_resource_user_free_threads(ru1, 10); + grpc_resource_user_free_threads(ru2, 10); + + // Next request to 20 threads must succeed + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 20)); + + // No more thread quota again + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 20)); + + // Free 10 more + grpc_resource_user_free_threads(ru1, 10); + + GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 5)); + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 10)); // Only 5 available + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 5)); + + // Teardown (ru1 and ru2 release all the quota back to rq) + grpc_resource_user_unref(ru1); + grpc_resource_user_unref(ru2); + grpc_resource_quota_unref(rq); +} + +// Change max quota in either directions dynamically +static void test_thread_maxquota_change() { + grpc_core::ExecCtx exec_ctx; + + grpc_resource_quota* rq = + grpc_resource_quota_create("test_thread_maxquota_change"); + grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1"); + grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2"); + + // Max threads = 100 + grpc_resource_quota_set_max_threads(rq, 100); + + // Request quota for 100 threads (50 for ru1, 50 for ru2) + GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 50)); + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 50)); + + // Threads exhaused. Next request must fail + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 20)); + + // Increase maxquota and retry + // Max threads = 150; + grpc_resource_quota_set_max_threads(rq, 150); + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 20)); // ru2 = 70, ru1 = 50 + + // Decrease maxquota (Note: Quota already given to ru1 and ru2 is unaffected) + // Max threads = 10; + grpc_resource_quota_set_max_threads(rq, 10); + + // New requests will fail until quota is available + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10)); + + // Make quota available + grpc_resource_user_free_threads(ru1, 50); // ru1 now has 0 + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10)); // Still not enough + + grpc_resource_user_free_threads(ru2, 70); // ru2 now has 0 + + // Now we can get quota up-to 10, the current max + GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 10)); + // No more thread quota again + GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10)); + + // Teardown (ru1 and ru2 release all the quota back to rq) + grpc_resource_user_unref(ru1); + grpc_resource_user_unref(ru2); + grpc_resource_quota_unref(rq); +} + int main(int argc, char** argv) { grpc_test_init(argc, argv); grpc_init(); @@ -827,6 +918,11 @@ int main(int argc, char** argv) { test_negative_rq_free_pool(); gpr_mu_destroy(&g_mu); gpr_cv_destroy(&g_cv); + + // Resource quota thread related + test_thread_limit(); + test_thread_maxquota_change(); + grpc_shutdown(); return 0; } diff --git a/test/cpp/thread_manager/thread_manager_test.cc b/test/cpp/thread_manager/thread_manager_test.cc index cf2cf770e6b..a7ed2dd380e 100644 --- a/test/cpp/thread_manager/thread_manager_test.cc +++ b/test/cpp/thread_manager/thread_manager_test.cc @@ -30,30 +30,44 @@ #include "test/cpp/util/test_config.h" namespace grpc { + +struct ThreadManagerTestSettings { + // The min number of pollers that SHOULD be active in ThreadManager + int min_pollers; + // The max number of pollers that could be active in ThreadManager + int max_pollers; + // The sleep duration in PollForWork() function to simulate "polling" + int poll_duration_ms; + // The sleep duration in DoWork() function to simulate "work" + int work_duration_ms; + // Max number of times PollForWork() is called before shutting down + int max_poll_calls; +}; + class ThreadManagerTest final : public grpc::ThreadManager { public: - ThreadManagerTest(const char* name, grpc_resource_quota* rq) - : ThreadManager(name, rq, kMinPollers, kMaxPollers), + ThreadManagerTest(const char* name, grpc_resource_quota* rq, + const ThreadManagerTestSettings& settings) + : ThreadManager(name, rq, settings.min_pollers, settings.max_pollers), + settings_(settings), num_do_work_(0), num_poll_for_work_(0), num_work_found_(0) {} grpc::ThreadManager::WorkStatus PollForWork(void** tag, bool* ok) override; void DoWork(void* tag, bool ok) override; - void PerformTest(); + + // Get number of times PollForWork() returned WORK_FOUND + int GetNumWorkFound(); + // Get number of times DoWork() was called + int GetNumDoWork(); private: void SleepForMs(int sleep_time_ms); - static const int kMinPollers = 2; - static const int kMaxPollers = 10; - - static const int kPollingTimeoutMsec = 10; - static const int kDoWorkDurationMsec = 1; - - // PollForWork will return SHUTDOWN after these many number of invocations - static const int kMaxNumPollForWork = 50; + ThreadManagerTestSettings settings_; + // Counters gpr_atm num_do_work_; // Number of calls to DoWork gpr_atm num_poll_for_work_; // Number of calls to PollForWork gpr_atm num_work_found_; // Number of times WORK_FOUND was returned @@ -69,58 +83,113 @@ void ThreadManagerTest::SleepForMs(int duration_ms) { grpc::ThreadManager::WorkStatus ThreadManagerTest::PollForWork(void** tag, bool* ok) { int call_num = gpr_atm_no_barrier_fetch_add(&num_poll_for_work_, 1); - - if (call_num >= kMaxNumPollForWork) { + if (call_num >= settings_.max_poll_calls) { Shutdown(); return SHUTDOWN; } - // Simulate "polling for work" by sleeping for sometime - SleepForMs(kPollingTimeoutMsec); - + SleepForMs(settings_.poll_duration_ms); // Simulate "polling" duration *tag = nullptr; *ok = true; - // Return timeout roughly 1 out of every 3 calls + // Return timeout roughly 1 out of every 3 calls just to make the test a bit + // more interesting if (call_num % 3 == 0) { return TIMEOUT; - } else { - gpr_atm_no_barrier_fetch_add(&num_work_found_, 1); - return WORK_FOUND; } + + gpr_atm_no_barrier_fetch_add(&num_work_found_, 1); + return WORK_FOUND; } void ThreadManagerTest::DoWork(void* tag, bool ok) { gpr_atm_no_barrier_fetch_add(&num_do_work_, 1); - SleepForMs(kDoWorkDurationMsec); // Simulate doing work by sleeping + SleepForMs(settings_.work_duration_ms); // Simulate work by sleeping } -void ThreadManagerTest::PerformTest() { - // Initialize() starts the ThreadManager - Initialize(); - - // Wait for all the threads to gracefully terminate - Wait(); +int ThreadManagerTest::GetNumWorkFound() { + return static_cast(gpr_atm_no_barrier_load(&num_work_found_)); +} - // The number of times DoWork() was called is equal to the number of times - // WORK_FOUND was returned - gpr_log(GPR_DEBUG, "DoWork() called %" PRIdPTR " times", - gpr_atm_no_barrier_load(&num_do_work_)); - GPR_ASSERT(gpr_atm_no_barrier_load(&num_do_work_) == - gpr_atm_no_barrier_load(&num_work_found_)); +int ThreadManagerTest::GetNumDoWork() { + return static_cast(gpr_atm_no_barrier_load(&num_do_work_)); } } // namespace grpc -int main(int argc, char** argv) { - std::srand(std::time(nullptr)); +// Test that the number of times DoWork() is called is equal to the number of +// times PollForWork() returned WORK_FOUND +static void TestPollAndWork() { + grpc_resource_quota* rq = grpc_resource_quota_create("Test-poll-and-work"); + grpc::ThreadManagerTestSettings settings = { + 2 /* min_pollers */, 10 /* max_pollers */, 10 /* poll_duration_ms */, + 1 /* work_duration_ms */, 50 /* max_poll_calls */}; - grpc::testing::InitTest(&argc, &argv, true); + grpc::ThreadManagerTest test_thd_mgr("TestThreadManager", rq, settings); + grpc_resource_quota_unref(rq); + + test_thd_mgr.Initialize(); // Start the thread manager + test_thd_mgr.Wait(); // Wait for all threads to finish + + // Verify that The number of times DoWork() was called is equal to the number + // of times WORK_FOUND was returned + gpr_log(GPR_DEBUG, "DoWork() called %d times", test_thd_mgr.GetNumDoWork()); + GPR_ASSERT(test_thd_mgr.GetNumDoWork() == test_thd_mgr.GetNumWorkFound()); +} - grpc_resource_quota* rq = grpc_resource_quota_create("Test"); - grpc::ThreadManagerTest test_rpc_manager("TestThreadManager", rq); +static void TestThreadQuota() { + const int kMaxNumThreads = 3; + grpc_resource_quota* rq = grpc_resource_quota_create("Test-thread-quota"); + grpc_resource_quota_set_max_threads(rq, kMaxNumThreads); + + // Set work_duration_ms to be much greater than poll_duration_ms. This way, + // the thread manager will be forced to create more 'polling' threads to + // honor the min_pollers guarantee + grpc::ThreadManagerTestSettings settings = { + 1 /* min_pollers */, 1 /* max_pollers */, 1 /* poll_duration_ms */, + 10 /* work_duration_ms */, 50 /* max_poll_calls */}; + + // Create two thread managers (but with same resource quota). This means + // that the max number of active threads across BOTH the thread managers + // cannot be greater than kMaxNumthreads + grpc::ThreadManagerTest test_thd_mgr_1("TestThreadManager-1", rq, settings); + grpc::ThreadManagerTest test_thd_mgr_2("TestThreadManager-2", rq, settings); + // It is ok to unref resource quota before starting thread managers. grpc_resource_quota_unref(rq); - test_rpc_manager.PerformTest(); + // Start both thread managers + test_thd_mgr_1.Initialize(); + test_thd_mgr_2.Initialize(); + + // Wait for both to finish + test_thd_mgr_1.Wait(); + test_thd_mgr_2.Wait(); + + // Now verify that the total number of active threads in either thread manager + // never exceeds kMaxNumThreads + // + // NOTE: Actually the total active threads across *both* thread managers at + // any point of time never exceeds kMaxNumThreads but unfortunately there is + // no easy way to verify it (i.e we can't just do (max1 + max2 <= k)) + // Its okay to not test this case here. The resource quota c-core tests + // provide enough coverage to resource quota object with multiple resource + // users + int max1 = test_thd_mgr_1.GetMaxActiveThreadsSoFar(); + int max2 = test_thd_mgr_2.GetMaxActiveThreadsSoFar(); + gpr_log( + GPR_DEBUG, + "MaxActiveThreads in TestThreadManager_1: %d, TestThreadManager_2: %d", + max1, max2); + GPR_ASSERT(max1 <= kMaxNumThreads && max2 <= kMaxNumThreads); +} + +int main(int argc, char** argv) { + std::srand(std::time(nullptr)); + grpc::testing::InitTest(&argc, &argv, true); + grpc_init(); + + TestPollAndWork(); + TestThreadQuota(); + grpc_shutdown(); return 0; }