From 991c101de89ef8c3855c8503031e3b7423d59153 Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 18 Apr 2017 19:43:14 +0000 Subject: [PATCH 1/4] Initial thread manager fixes --- include/grpc++/server_builder.h | 4 +- src/cpp/server/server_cc.cc | 13 ++---- src/cpp/thread_manager/thread_manager.cc | 57 +++++++++++++++--------- src/cpp/thread_manager/thread_manager.h | 4 +- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/include/grpc++/server_builder.h b/include/grpc++/server_builder.h index d707100a522..e15a5acbe1c 100644 --- a/include/grpc++/server_builder.h +++ b/include/grpc++/server_builder.h @@ -197,8 +197,8 @@ class ServerBuilder { SyncServerSettings() : num_cqs(1), min_pollers(1), - max_pollers(INT_MAX), - cq_timeout_msec(1000) {} + max_pollers(2), + cq_timeout_msec(10000) {} // Number of server completion queues to create to listen to incoming RPCs. int num_cqs; diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc index 62ded0d239f..6fea908b66b 100644 --- a/src/cpp/server/server_cc.cc +++ b/src/cpp/server/server_cc.cc @@ -328,15 +328,9 @@ class Server::SyncRequestThreadManager : public ThreadManager { } } - void ShutdownAndDrainCompletionQueue() { + void Shutdown() override { server_cq_->Shutdown(); - - // Drain any pending items from the queue - void* tag; - bool ok; - while (server_cq_->Next(&tag, &ok)) { - // Nothing to be done here - } +ThreadManager::Shutdown(); } void Start() { @@ -415,7 +409,7 @@ Server::~Server() { } else if (!started_) { // Shutdown the completion queues for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) { - (*it)->ShutdownAndDrainCompletionQueue(); + (*it)->Shutdown(); } } } @@ -579,7 +573,6 @@ void Server::ShutdownInternal(gpr_timespec deadline) { // Wait for threads in all ThreadManagers to terminate for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) { (*it)->Wait(); - (*it)->ShutdownAndDrainCompletionQueue(); } // Drain the shutdown queue (if the previous call to AsyncNext() timed out diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index 1450d009e4f..b6d57545113 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -98,11 +98,12 @@ void ThreadManager::MarkAsCompleted(WorkerThread* thd) { } void ThreadManager::CleanupCompletedThreads() { - std::unique_lock lock(list_mu_); - for (auto thd = completed_threads_.begin(); thd != completed_threads_.end(); - thd = completed_threads_.erase(thd)) { - delete *thd; + std::list completed_threads; + { + std::unique_lock lock(list_mu_); + completed_threads.swap(completed_threads_); } + for (auto thd : completed_threads) delete thd; } void ThreadManager::Initialize() { @@ -114,9 +115,10 @@ void ThreadManager::Initialize() { // If the number of pollers (i.e threads currently blocked in PollForWork()) is // less than max threshold (i.e max_pollers_) and the total number of threads is // below the maximum threshold, we can let the current thread continue as poller -bool ThreadManager::MaybeContinueAsPoller() { +bool ThreadManager::MaybeContinueAsPoller(bool work_found) { std::unique_lock lock(mu_); - if (shutdown_ || num_pollers_ > max_pollers_) { + gpr_log(GPR_DEBUG, "s=%d wf=%d np=%d mp=%d", shutdown_, work_found, num_pollers_, max_pollers_); + if (shutdown_ || (!work_found && num_pollers_ > max_pollers_)) { return false; } @@ -133,6 +135,8 @@ void ThreadManager::MaybeCreatePoller() { num_pollers_++; num_threads_++; + lock.unlock(); + // Create a new thread (which ends up calling the MainWorkLoop() function new WorkerThread(this); } @@ -153,25 +157,36 @@ void ThreadManager::MainWorkLoop() { 4. Do the actual work (DoWork()) 5. After doing the work, see it this thread can resume polling work (i.e see MaybeContinueAsPoller() for more details) */ - do { - WorkStatus work_status = PollForWork(&tag, &ok); + WorkStatus work_status; + while (true) { + bool done = false; + work_status = PollForWork(&tag, &ok); - { - std::unique_lock lock(mu_); - num_pollers_--; - - if (work_status == TIMEOUT && num_pollers_ > min_pollers_) { - break; + std::unique_lock lock(mu_); + num_pollers_--; + gpr_log(GPR_DEBUG, "%p: work_status:%d num_pollers:%d min_pollers:%d max_pollers:%d num_threads:%d shutdown:%d", this, work_status, num_pollers_, min_pollers_, max_pollers_, num_threads_, shutdown_); + switch (work_status) { + case TIMEOUT: + if (shutdown_ || num_pollers_ >= max_pollers_) done = true; + break; + case SHUTDOWN: done = true; break; + case WORK_FOUND: + if (!shutdown_ && num_pollers_ < min_pollers_) { + num_pollers_++; + num_threads_++; + lock.unlock(); + new WorkerThread(this); + } else { + lock.unlock(); } - } - - // Note that MaybeCreatePoller does check for shutdown and creates a new - // thread only if ThreadManager is not shutdown - if (work_status == WORK_FOUND) { - MaybeCreatePoller(); DoWork(tag, ok); + lock.lock(); + if (shutdown_) done = true; + break; } - } while (MaybeContinueAsPoller()); +if (done) break; + num_pollers_++; + }; CleanupCompletedThreads(); diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h index 9c0569c62c1..7d832ad16a6 100644 --- a/src/cpp/thread_manager/thread_manager.h +++ b/src/cpp/thread_manager/thread_manager.h @@ -89,7 +89,7 @@ class ThreadManager { // Mark the ThreadManager as shutdown and begin draining the work. This is a // non-blocking call and the caller should call Wait(), a blocking call which // returns only once the shutdown is complete - void Shutdown(); + virtual void Shutdown(); // Has Shutdown() been called bool IsShutdown(); @@ -128,7 +128,7 @@ class ThreadManager { // Returns true if the current thread can resume as a poller. i.e if the // current number of pollers is less than the max_pollers. - bool MaybeContinueAsPoller(); + bool MaybeContinueAsPoller(bool work_found); void MarkAsCompleted(WorkerThread* thd); void CleanupCompletedThreads(); From a3e87894f2cc921d2c5bbdab250fa23a1ba8b52d Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 18 Apr 2017 13:08:08 -0700 Subject: [PATCH 2/4] Fix, restore draining --- src/cpp/server/server_cc.cc | 12 +++- src/cpp/thread_manager/thread_manager.cc | 88 ++++++++++-------------- src/cpp/thread_manager/thread_manager.h | 6 +- 3 files changed, 50 insertions(+), 56 deletions(-) diff --git a/src/cpp/server/server_cc.cc b/src/cpp/server/server_cc.cc index 6fea908b66b..423f347acd2 100644 --- a/src/cpp/server/server_cc.cc +++ b/src/cpp/server/server_cc.cc @@ -330,7 +330,17 @@ class Server::SyncRequestThreadManager : public ThreadManager { void Shutdown() override { server_cq_->Shutdown(); -ThreadManager::Shutdown(); + ThreadManager::Shutdown(); + } + + void Wait() override { + ThreadManager::Wait(); + // Drain any pending items from the queue + void* tag; + bool ok; + while (server_cq_->Next(&tag, &ok)) { + // Do nothing + } } void Start() { diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index b6d57545113..39b9691b5f5 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -100,6 +100,8 @@ void ThreadManager::MarkAsCompleted(WorkerThread* thd) { void ThreadManager::CleanupCompletedThreads() { std::list completed_threads; { + // swap out the completed threads list: allows other threads to clean up + // more quickly std::unique_lock lock(list_mu_); completed_threads.swap(completed_threads_); } @@ -112,20 +114,6 @@ void ThreadManager::Initialize() { } } -// If the number of pollers (i.e threads currently blocked in PollForWork()) is -// less than max threshold (i.e max_pollers_) and the total number of threads is -// below the maximum threshold, we can let the current thread continue as poller -bool ThreadManager::MaybeContinueAsPoller(bool work_found) { - std::unique_lock lock(mu_); - gpr_log(GPR_DEBUG, "s=%d wf=%d np=%d mp=%d", shutdown_, work_found, num_pollers_, max_pollers_); - if (shutdown_ || (!work_found && num_pollers_ > max_pollers_)) { - return false; - } - - num_pollers_++; - return true; -} - // Create a new poller if the current number of pollers i.e num_pollers_ (i.e // threads currently blocked in PollForWork()) is below the threshold (i.e // min_pollers_) and the total number of threads is below the maximum threshold @@ -143,48 +131,48 @@ void ThreadManager::MaybeCreatePoller() { } void ThreadManager::MainWorkLoop() { - void* tag; - bool ok; - - /* - 1. Poll for work (i.e PollForWork()) - 2. After returning from PollForWork, reduce the number of pollers by 1. If - PollForWork() returned a TIMEOUT, then it may indicate that we have more - polling threads than needed. Check if the number of pollers is greater - than min_pollers and if so, terminate the thread. - 3. Since we are short of one poller now, see if a new poller has to be - created (i.e see MaybeCreatePoller() for more details) - 4. Do the actual work (DoWork()) - 5. After doing the work, see it this thread can resume polling work (i.e - see MaybeContinueAsPoller() for more details) */ - WorkStatus work_status; while (true) { - bool done = false; - work_status = PollForWork(&tag, &ok); + void* tag; + bool ok; + WorkStatus work_status = PollForWork(&tag, &ok); std::unique_lock lock(mu_); + // Reduce the number of pollers by 1 and check what happened with the poll num_pollers_--; - gpr_log(GPR_DEBUG, "%p: work_status:%d num_pollers:%d min_pollers:%d max_pollers:%d num_threads:%d shutdown:%d", this, work_status, num_pollers_, min_pollers_, max_pollers_, num_threads_, shutdown_); + bool done = false; switch (work_status) { - case TIMEOUT: - if (shutdown_ || num_pollers_ >= max_pollers_) done = true; - break; - case SHUTDOWN: done = true; break; - case WORK_FOUND: - if (!shutdown_ && num_pollers_ < min_pollers_) { - num_pollers_++; - num_threads_++; - lock.unlock(); - new WorkerThread(this); - } else { - lock.unlock(); - } - DoWork(tag, ok); - lock.lock(); - if (shutdown_) done = true; - break; + case TIMEOUT: + // If we timed out and we have more pollers than we need (or we are + // shutdown), finish this thread + if (shutdown_ || num_pollers_ > max_pollers_) done = true; + break; + case SHUTDOWN: + // If the thread manager is shutdown, finish this thread + done = true; + break; + case WORK_FOUND: + // If we got work and there are now insufficient pollers, start a new + // one + if (!shutdown_ && num_pollers_ < min_pollers_) { + num_pollers_++; + num_threads_++; + lock.unlock(); + new WorkerThread(this); + } else { + lock.unlock(); + } + DoWork(tag, ok); + lock.lock(); + // If we're shutdown, we should finish at this point. + // If not, there's a chance that we'll exceed the max poller count: that + // is explicitly ok - we'll decrease after one poll timeout, and prevent + // some thrashing starting up and shutting down threads + if (shutdown_) done = true; + break; } -if (done) break; + // If we decided to finish the thread, break out of the while loop + if (done) break; + // ... otherwise increase poller count and continue num_pollers_++; }; diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h index 7d832ad16a6..c9435011f95 100644 --- a/src/cpp/thread_manager/thread_manager.h +++ b/src/cpp/thread_manager/thread_manager.h @@ -96,7 +96,7 @@ class ThreadManager { // A blocking call that returns only after the ThreadManager has shutdown and // all the threads have drained all the outstanding work - void Wait(); + virtual void Wait(); private: // Helper wrapper class around std::thread. This takes a ThreadManager object @@ -126,10 +126,6 @@ class ThreadManager { // minimum number of pollers needed (i.e min_pollers). void MaybeCreatePoller(); - // Returns true if the current thread can resume as a poller. i.e if the - // current number of pollers is less than the max_pollers. - bool MaybeContinueAsPoller(bool work_found); - void MarkAsCompleted(WorkerThread* thd); void CleanupCompletedThreads(); From 4818150728ba9edf84231a726048763cf555a81a Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 18 Apr 2017 13:09:54 -0700 Subject: [PATCH 3/4] Better commentary --- src/cpp/thread_manager/thread_manager.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index 39b9691b5f5..73c59eeff00 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -156,12 +156,16 @@ void ThreadManager::MainWorkLoop() { if (!shutdown_ && num_pollers_ < min_pollers_) { num_pollers_++; num_threads_++; + // Drop lock before spawning thread to avoid contention lock.unlock(); new WorkerThread(this); } else { + // Drop lock for consistency with above branch lock.unlock(); } + // Lock is always released at this point - do the application work DoWork(tag, ok); + // Take the lock again to check post conditions lock.lock(); // If we're shutdown, we should finish at this point. // If not, there's a chance that we'll exceed the max poller count: that From 35f27cd457207e6d51304a40d736dca53285e79f Mon Sep 17 00:00:00 2001 From: Craig Tiller Date: Tue, 18 Apr 2017 13:14:45 -0700 Subject: [PATCH 4/4] More cleanup --- src/cpp/thread_manager/thread_manager.cc | 19 +++++-------------- src/cpp/thread_manager/thread_manager.h | 4 ---- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/src/cpp/thread_manager/thread_manager.cc b/src/cpp/thread_manager/thread_manager.cc index 73c59eeff00..ebcc4dd3787 100644 --- a/src/cpp/thread_manager/thread_manager.cc +++ b/src/cpp/thread_manager/thread_manager.cc @@ -109,22 +109,13 @@ void ThreadManager::CleanupCompletedThreads() { } void ThreadManager::Initialize() { - for (int i = 0; i < min_pollers_; i++) { - MaybeCreatePoller(); + { + std::unique_lock lock(mu_); + num_pollers_ = min_pollers_; + num_threads_ = min_pollers_; } -} - -// Create a new poller if the current number of pollers i.e num_pollers_ (i.e -// threads currently blocked in PollForWork()) is below the threshold (i.e -// min_pollers_) and the total number of threads is below the maximum threshold -void ThreadManager::MaybeCreatePoller() { - std::unique_lock lock(mu_); - if (!shutdown_ && num_pollers_ < min_pollers_) { - num_pollers_++; - num_threads_++; - - lock.unlock(); + for (int i = 0; i < min_pollers_; i++) { // Create a new thread (which ends up calling the MainWorkLoop() function new WorkerThread(this); } diff --git a/src/cpp/thread_manager/thread_manager.h b/src/cpp/thread_manager/thread_manager.h index c9435011f95..d1050f6dede 100644 --- a/src/cpp/thread_manager/thread_manager.h +++ b/src/cpp/thread_manager/thread_manager.h @@ -122,10 +122,6 @@ class ThreadManager { // The main funtion in ThreadManager void MainWorkLoop(); - // Create a new poller if the number of current pollers is less than the - // minimum number of pollers needed (i.e min_pollers). - void MaybeCreatePoller(); - void MarkAsCompleted(WorkerThread* thd); void CleanupCompletedThreads();