grpc/src/cpp/thread_manager/thread_manager.cc

/*
 *
 * Copyright 2016 gRPC authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

#include "src/cpp/thread_manager/thread_manager.h"

#include <climits>
#include <mutex>

#include <grpc/support/log.h>

#include "src/core/lib/gprpp/thd.h"

namespace grpc {

ThreadManager::WorkerThread::WorkerThread(ThreadManager* thd_mgr)
    : thd_mgr_(thd_mgr) {
  // Make thread creation exclusive with respect to its join happening in
  // ~WorkerThread().
  thd_ = grpc_core::Thread(
      "grpcpp_sync_server",
      [](void* th) { static_cast<ThreadManager::WorkerThread*>(th)->Run(); },
      this);
  thd_.Start();
}

void ThreadManager::WorkerThread::Run() {
  thd_mgr_->MainWorkLoop();
  thd_mgr_->MarkAsCompleted(this);
}

ThreadManager::WorkerThread::~WorkerThread() {
  // Don't join until the thread is fully constructed.
  thd_.Join();
}

ThreadManager::ThreadManager(int min_pollers, int max_pollers)
    : shutdown_(false),
      num_pollers_(0),
      min_pollers_(min_pollers),
      max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
      num_threads_(0) {}

ThreadManager::~ThreadManager() {
  {
    std::lock_guard<std::mutex> lock(mu_);
    GPR_ASSERT(num_threads_ == 0);
  }

  CleanupCompletedThreads();
}

void ThreadManager::Wait() {
  std::unique_lock<std::mutex> lock(mu_);
  while (num_threads_ != 0) {
    shutdown_cv_.wait(lock);
  }
}

void ThreadManager::Shutdown() {
  std::lock_guard<std::mutex> lock(mu_);
  shutdown_ = true;
}

bool ThreadManager::IsShutdown() {
  std::lock_guard<std::mutex> lock(mu_);
  return shutdown_;
}

void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
  {
    std::lock_guard<std::mutex> list_lock(list_mu_);
    completed_threads_.push_back(thd);
  }

  std::lock_guard<std::mutex> lock(mu_);
  num_threads_--;
  if (num_threads_ == 0) {
    shutdown_cv_.notify_one();
  }
}

void ThreadManager::CleanupCompletedThreads() {
  std::list<WorkerThread*> completed_threads;
  {
    // swap out the completed threads list: allows other threads to clean up
    // more quickly
    std::unique_lock<std::mutex> lock(list_mu_);
    completed_threads.swap(completed_threads_);
  }
  for (auto thd : completed_threads) delete thd;
}

void ThreadManager::Initialize() {
  {
    std::unique_lock<std::mutex> lock(mu_);
    num_pollers_ = min_pollers_;
    num_threads_ = min_pollers_;
  }

  for (int i = 0; i < min_pollers_; i++) {
    // Create a new thread (which ends up calling the MainWorkLoop() function
    new WorkerThread(this);
  }
}

void ThreadManager::MainWorkLoop() {
  while (true) {
    void* tag;
    bool ok;
    WorkStatus work_status = PollForWork(&tag, &ok);

    std::unique_lock<std::mutex> lock(mu_);
    // Reduce the number of pollers by 1 and check what happened with the poll
    num_pollers_--;
    bool done = false;
    switch (work_status) {
      case TIMEOUT:
        // If we timed out and we have more pollers than we need (or we are
        // shutdown), finish this thread
        if (shutdown_ || num_pollers_ > max_pollers_) done = true;
        break;
      case SHUTDOWN:
        // If the thread manager is shutdown, finish this thread
        done = true;
        break;
      case WORK_FOUND:
        // If we got work and there are now insufficient pollers, start a new
        // one
        if (!shutdown_ && num_pollers_ < min_pollers_) {
          num_pollers_++;
          num_threads_++;
          // Drop lock before spawning thread to avoid contention
          lock.unlock();
          new WorkerThread(this);
        } else {
          // Drop lock for consistency with above branch
          lock.unlock();
        }
        // Lock is always released at this point - do the application work
        DoWork(tag, ok);
        // Take the lock again to check post conditions
        lock.lock();
        // If we're shutdown, we should finish at this point.
        if (shutdown_) done = true;
        break;
    }
    // If we decided to finish the thread, break out of the while loop
    if (done) break;

    // Otherwise go back to polling as long as it doesn't exceed max_pollers_
    //
    // **WARNING**:
    // There is a possibility of threads thrashing here (i.e excessive thread
    // shutdowns and creations than the ideal case). This happens if max_poller_
    // count is small and the rate of incoming requests is also small. In such
    // scenarios we can possibly configure max_pollers_ to a higher value and/or
    // increase the cq timeout.
    //
    // However, not doing this check here and unconditionally incrementing
    // num_pollers (and hoping that the system will eventually settle down) has
    // far worse consequences i.e huge number of threads getting created to the
    // point of thread-exhaustion. For example: if the incoming request rate is
    // very high, all the polling threads will return very quickly from
    // PollForWork() with WORK_FOUND. They all briefly decrement num_pollers_
    // counter thereby possibly - and briefly - making it go below min_pollers;
    // This will most likely result in the creation of a new poller since
    // num_pollers_ dipped below min_pollers_.
    //
    // Now, If we didn't do the max_poller_ check here, all these threads will
    // go back to doing PollForWork() and the whole cycle repeats (with a new
    // thread being added in each cycle). Once the total number of threads in
    // the system crosses a certain threshold (around ~1500), there is heavy
    // contention on mutexes (the mu_ here or the mutexes in gRPC core like the
    // pollset mutex) that makes DoWork() take longer to finish thereby causing
    // new poller threads to be created even faster. This results in a thread
    // avalanche.
    if (num_pollers_ < max_pollers_) {
      num_pollers_++;
    } else {
      break;
    }
  };

  CleanupCompletedThreads();

  // If we are here, either ThreadManager is shutting down or it already has
  // enough threads.
}

}  // namespace grpc
Initial shell 8 years ago			`/*`
			`*`
auto-fix most of licenses 8 years ago			`* Copyright 2016 gRPC authors.`
Initial shell 8 years ago			`*`
auto-fix most of licenses 8 years ago			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
Initial shell 8 years ago			`*`
auto-fix most of licenses 8 years ago			`* http://www.apache.org/licenses/LICENSE-2.0`
Initial shell 8 years ago			`*`
auto-fix most of licenses 8 years ago			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
Initial shell 8 years ago			`*`
			`*/`

Deprecate grpc::thread and sync in favor of std::thread,mutex,etc 8 years ago			`#include "src/cpp/thread_manager/thread_manager.h"`

Minor changes 8 years ago			`#include <climits>`
Deprecate grpc::thread and sync in favor of std::thread,mutex,etc 8 years ago			`#include <mutex>`
Initial shell 8 years ago
Deprecate grpc::thread and sync in favor of std::thread,mutex,etc 8 years ago			`#include <grpc/support/log.h>`
Revert "Restrict the number of threads in C++ sync server" 6 years ago
Remove std::thread, keep everything else unchanged (#13) 7 years ago			`#include "src/core/lib/gprpp/thd.h"`

Initial shell 8 years ago			`namespace grpc {`

Revert "Stop using std::thread in C++ library since it can trigger exceptions" 7 years ago			`ThreadManager::WorkerThread::WorkerThread(ThreadManager* thd_mgr)`
Put thread creation and join under lock 7 years ago			`: thd_mgr_(thd_mgr) {`
			`// Make thread creation exclusive with respect to its join happening in`
			`// ~WorkerThread().`
Remove std::thread, keep everything else unchanged (#13) 7 years ago			`thd_ = grpc_core::Thread(`
Name needs to only be alphanumeric+underscore+dash 7 years ago			`"grpcpp_sync_server",`
Use static_cast rather than reinterpret_cast whenever possible 7 years ago			`[](void* th) { static_cast<ThreadManager::WorkerThread*>(th)->Run(); },`
Remove std::thread, keep everything else unchanged (#13) 7 years ago			`this);`
			`thd_.Start();`
Put thread creation and join under lock 7 years ago			`}`
Rename GrpcRpcManager -> ThreadManager 8 years ago
			`void ThreadManager::WorkerThread::Run() {`
			`thd_mgr_->MainWorkLoop();`
			`thd_mgr_->MarkAsCompleted(this);`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`

Put thread creation and join under lock 7 years ago			`ThreadManager::WorkerThread::~WorkerThread() {`
			`// Don't join until the thread is fully constructed.`
Remove std::thread, keep everything else unchanged (#13) 7 years ago			`thd_.Join();`
Put thread creation and join under lock 7 years ago			`}`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago
Revert "Restrict the number of threads in C++ sync server" 6 years ago			`ThreadManager::ThreadManager(int min_pollers, int max_pollers)`
Initial shell 8 years ago			`: shutdown_(false),`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`num_pollers_(0),`
Initial shell 8 years ago			`min_pollers_(min_pollers),`
tsan failures 8 years ago			`max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),`
Revert "Restrict the number of threads in C++ sync server" 6 years ago			`num_threads_(0) {}`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago
Rename GrpcRpcManager -> ThreadManager 8 years ago			`ThreadManager::~ThreadManager() {`
tsan failures 8 years ago			`{`
Put thread creation and join under lock 7 years ago			`std::lock_guard<std::mutex> lock(mu_);`
tsan failures 8 years ago			`GPR_ASSERT(num_threads_ == 0);`
			`}`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago
			`CleanupCompletedThreads();`
			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::Wait() {`
Deprecate grpc::thread and sync in favor of std::thread,mutex,etc 8 years ago			`std::unique_lock<std::mutex> lock(mu_);`
More changes 8 years ago			`while (num_threads_ != 0) {`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`shutdown_cv_.wait(lock);`
			`}`
			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::Shutdown() {`
Put thread creation and join under lock 7 years ago			`std::lock_guard<std::mutex> lock(mu_);`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`shutdown_ = true;`
			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`bool ThreadManager::IsShutdown() {`
Put thread creation and join under lock 7 years ago			`std::lock_guard<std::mutex> lock(mu_);`
One RPCMgr instance per CQ 8 years ago			`return shutdown_;`
			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::MarkAsCompleted(WorkerThread* thd) {`
tsan failures 8 years ago			`{`
Put thread creation and join under lock 7 years ago			`std::lock_guard<std::mutex> list_lock(list_mu_);`
tsan failures 8 years ago			`completed_threads_.push_back(thd);`
			`}`

Revert "Restrict the number of threads in C++ sync server" 6 years ago			`std::lock_guard<std::mutex> lock(mu_);`
			`num_threads_--;`
			`if (num_threads_ == 0) {`
			`shutdown_cv_.notify_one();`
tsan failures 8 years ago			`}`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::CleanupCompletedThreads() {`
Initial thread manager fixes 8 years ago			`std::list<WorkerThread*> completed_threads;`
			`{`
Fix, restore draining 8 years ago			`// swap out the completed threads list: allows other threads to clean up`
			`// more quickly`
Initial thread manager fixes 8 years ago			`std::unique_lock<std::mutex> lock(list_mu_);`
			`completed_threads.swap(completed_threads_);`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`
Initial thread manager fixes 8 years ago			`for (auto thd : completed_threads) delete thd;`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::Initialize() {`
More cleanup 8 years ago			`{`
			`std::unique_lock<std::mutex> lock(mu_);`
			`num_pollers_ = min_pollers_;`
			`num_threads_ = min_pollers_;`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`
Initial thread manager fixes 8 years ago
More cleanup 8 years ago			`for (int i = 0; i < min_pollers_; i++) {`
Revert "Restrict the number of threads in C++ sync server" 6 years ago			`// Create a new thread (which ends up calling the MainWorkLoop() function`
Add Tests in Core and C++ and fix a few related bugs in thread_manager.cc 6 years ago			`new WorkerThread(this);`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`
			`}`

Rename GrpcRpcManager -> ThreadManager 8 years ago			`void ThreadManager::MainWorkLoop() {`
Initial thread manager fixes 8 years ago			`while (true) {`
Fix, restore draining 8 years ago			`void* tag;`
			`bool ok;`
			`WorkStatus work_status = PollForWork(&tag, &ok);`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago
Initial thread manager fixes 8 years ago			`std::unique_lock<std::mutex> lock(mu_);`
Fix, restore draining 8 years ago			`// Reduce the number of pollers by 1 and check what happened with the poll`
Initial thread manager fixes 8 years ago			`num_pollers_--;`
Fix, restore draining 8 years ago			`bool done = false;`
Initial thread manager fixes 8 years ago			`switch (work_status) {`
Fix, restore draining 8 years ago			`case TIMEOUT:`
			`// If we timed out and we have more pollers than we need (or we are`
			`// shutdown), finish this thread`
			`if (shutdown_ \|\| num_pollers_ > max_pollers_) done = true;`
			`break;`
			`case SHUTDOWN:`
			`// If the thread manager is shutdown, finish this thread`
			`done = true;`
			`break;`
			`case WORK_FOUND:`
Revert "Restrict the number of threads in C++ sync server" 6 years ago			`// If we got work and there are now insufficient pollers, start a new`
			`// one`
			`if (!shutdown_ && num_pollers_ < min_pollers_) {`
Revert "Stop using std::thread in C++ library since it can trigger exceptions" 7 years ago			`num_pollers_++;`
			`num_threads_++;`
Better commentary 8 years ago			`// Drop lock before spawning thread to avoid contention`
Fix, restore draining 8 years ago			`lock.unlock();`
Add Tests in Core and C++ and fix a few related bugs in thread_manager.cc 6 years ago			`new WorkerThread(this);`
Fix, restore draining 8 years ago			`} else {`
Revert "Stop using std::thread in C++ library since it can trigger exceptions" 7 years ago			`// Drop lock for consistency with above branch`
			`lock.unlock();`
Fix, restore draining 8 years ago			`}`
Better commentary 8 years ago			`// Lock is always released at this point - do the application work`
Revert "Stop using std::thread in C++ library since it can trigger exceptions" 7 years ago			`DoWork(tag, ok);`
Better commentary 8 years ago			`// Take the lock again to check post conditions`
Fix, restore draining 8 years ago			`lock.lock();`
			`// If we're shutdown, we should finish at this point.`
			`if (shutdown_) done = true;`
			`break;`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`
Fix, restore draining 8 years ago			`// If we decided to finish the thread, break out of the while loop`
			`if (done) break;`
Fix thread avalances in thread manager 7 years ago
			`// Otherwise go back to polling as long as it doesn't exceed max_pollers_`
			`//`
			`// WARNING:`
			`// There is a possibility of threads thrashing here (i.e excessive thread`
			`// shutdowns and creations than the ideal case). This happens if max_poller_`
			`// count is small and the rate of incoming requests is also small. In such`
			`// scenarios we can possibly configure max_pollers_ to a higher value and/or`
			`// increase the cq timeout.`
			`//`
			`// However, not doing this check here and unconditionally incrementing`
			`// num_pollers (and hoping that the system will eventually settle down) has`
			`// far worse consequences i.e huge number of threads getting created to the`
			`// point of thread-exhaustion. For example: if the incoming request rate is`
			`// very high, all the polling threads will return very quickly from`
			`// PollForWork() with WORK_FOUND. They all briefly decrement num_pollers_`
			`// counter thereby possibly - and briefly - making it go below min_pollers;`
			`// This will most likely result in the creation of a new poller since`
			`// num_pollers_ dipped below min_pollers_.`
			`//`
			`// Now, If we didn't do the max_poller_ check here, all these threads will`
			`// go back to doing PollForWork() and the whole cycle repeats (with a new`
			`// thread being added in each cycle). Once the total number of threads in`
			`// the system crosses a certain threshold (around ~1500), there is heavy`
			`// contention on mutexes (the mu_ here or the mutexes in gRPC core like the`
			`// pollset mutex) that makes DoWork() take longer to finish thereby causing`
			`// new poller threads to be created even faster. This results in a thread`
			`// avalanche.`
			`if (num_pollers_ < max_pollers_) {`
			`num_pollers_++;`
			`} else {`
			`break;`
			`}`
Initial thread manager fixes 8 years ago			`};`
Initial shell 8 years ago
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`CleanupCompletedThreads();`
tsan failures 8 years ago
Rename GrpcRpcManager -> ThreadManager 8 years ago			`// If we are here, either ThreadManager is shutting down or it already has`
tsan failures 8 years ago			`// enough threads.`
Add more functionality (no cq integration yet) and add a dummy test 8 years ago			`}`
Initial shell 8 years ago
			`} // namespace grpc`