Add Tests in Core and C++ and fix a few related bugs in thread_manager.cc

pull/16081/head
Sree Kuchibhotla 6 years ago
parent 7b8be4d6fd
commit b95772eeb9
  1. 7
      src/core/lib/iomgr/resource_quota.cc
  2. 10
      src/cpp/server/server_cc.cc
  3. 70
      src/cpp/thread_manager/thread_manager.cc
  4. 42
      src/cpp/thread_manager/thread_manager.h
  5. 96
      test/core/iomgr/resource_quota_test.cc
  6. 147
      test/cpp/thread_manager/thread_manager_test.cc

@ -547,6 +547,11 @@ static void ru_shutdown(void* ru, grpc_error* error) {
static void ru_destroy(void* ru, grpc_error* error) {
grpc_resource_user* resource_user = static_cast<grpc_resource_user*>(ru);
GPR_ASSERT(gpr_atm_no_barrier_load(&resource_user->refs) == 0);
// Free all the remaining thread quota
grpc_resource_user_free_threads(
resource_user,
static_cast<int>(gpr_atm_no_barrier_load(&resource_user->num_threads)));
for (int i = 0; i < GRPC_RULIST_COUNT; i++) {
rulist_remove(resource_user, static_cast<grpc_rulist>(i));
}
@ -642,6 +647,7 @@ grpc_resource_quota* grpc_resource_quota_create(const char* name) {
void grpc_resource_quota_unref_internal(grpc_resource_quota* resource_quota) {
if (gpr_unref(&resource_quota->refs)) {
GPR_ASSERT(resource_quota->num_threads == 0); // No outstanding thd quota
GRPC_COMBINER_UNREF(resource_quota->combiner, "resource_quota");
gpr_free(resource_quota->name);
gpr_free(resource_quota);
@ -846,6 +852,7 @@ void grpc_resource_user_free_threads(grpc_resource_user* resource_user,
"Releasing more threads (%d) that currently allocated (rq threads: "
"%d, ru threads: %d)",
thd_count, old_cnt, rq->num_threads + thd_count);
abort();
}
gpr_mu_unlock(&resource_user->resource_quota->thd_mu);
}

@ -47,6 +47,12 @@
namespace grpc {
namespace {
// The default value for maximum number of threads that can be created in the
// sync server. This value of 1500 is empirically chosen. To increase the max
// number of threads in a sync server, pass a custom ResourceQuota object (with
// the desired number of max-threads set) to the server builder
#define DEFAULT_MAX_SYNC_SERVER_THREADS 1500
class DefaultGlobalCallbacks final : public Server::GlobalCallbacks {
public:
~DefaultGlobalCallbacks() override {}
@ -395,7 +401,9 @@ Server::Server(
if (sync_server_cqs_ != nullptr) {
bool default_rq_created = false;
if (server_rq == nullptr) {
server_rq = grpc_resource_quota_create("SyncServer-Default");
server_rq = grpc_resource_quota_create("SyncServer-default-rq");
grpc_resource_quota_set_max_threads(server_rq,
DEFAULT_MAX_SYNC_SERVER_THREADS);
default_rq_created = true;
}

@ -22,8 +22,8 @@
#include <mutex>
#include <grpc/support/log.h>
#include "src/core/lib/gprpp/thd.h"
#include "src/core/lib/iomgr/exec_ctx.h"
namespace grpc {
@ -55,7 +55,8 @@ ThreadManager::ThreadManager(const char* name,
num_pollers_(0),
min_pollers_(min_pollers),
max_pollers_(max_pollers == -1 ? INT_MAX : max_pollers),
num_threads_(0) {
num_threads_(0),
max_active_threads_sofar_(0) {
resource_user_ = grpc_resource_user_create(resource_quota, name);
}
@ -65,6 +66,7 @@ ThreadManager::~ThreadManager() {
GPR_ASSERT(num_threads_ == 0);
}
grpc_core::ExecCtx exec_ctx; // grpc_resource_user_unref needs an exec_ctx
grpc_resource_user_unref(resource_user_);
CleanupCompletedThreads();
}
@ -86,17 +88,27 @@ bool ThreadManager::IsShutdown() {
return shutdown_;
}
int ThreadManager::GetMaxActiveThreadsSoFar() {
std::lock_guard<std::mutex> list_lock(list_mu_);
return max_active_threads_sofar_;
}
void ThreadManager::MarkAsCompleted(WorkerThread* thd) {
{
std::lock_guard<std::mutex> list_lock(list_mu_);
completed_threads_.push_back(thd);
}
std::lock_guard<std::mutex> lock(mu_);
num_threads_--;
if (num_threads_ == 0) {
shutdown_cv_.notify_one();
{
std::lock_guard<std::mutex> lock(mu_);
num_threads_--;
if (num_threads_ == 0) {
shutdown_cv_.notify_one();
}
}
// Give a thread back to the resource quota
grpc_resource_user_free_threads(resource_user_, 1);
}
void ThreadManager::CleanupCompletedThreads() {
@ -111,34 +123,24 @@ void ThreadManager::CleanupCompletedThreads() {
}
void ThreadManager::Initialize() {
if (!grpc_resource_user_alloc_threads(resource_user_, min_pollers_)) {
gpr_log(GPR_ERROR,
"No thread quota available to even create the minimum required "
"polling threads (i.e %d). Unable to start the thread manager",
min_pollers_);
abort();
}
{
std::unique_lock<std::mutex> lock(mu_);
num_pollers_ = min_pollers_;
num_threads_ = min_pollers_;
max_active_threads_sofar_ = min_pollers_;
}
for (int i = 0; i < min_pollers_; i++) {
if (!CreateNewThread(this)) {
gpr_log(GPR_ERROR,
"No quota available to create additional threads. Created %d (of "
"%d) threads",
i, min_pollers_);
break;
}
}
}
bool ThreadManager::CreateNewThread(ThreadManager* thd_mgr) {
if (!grpc_resource_user_alloc_threads(thd_mgr->resource_user_, 1)) {
return false;
new WorkerThread(this);
}
// Create a new thread (which ends up calling the MainWorkLoop() function
new WorkerThread(thd_mgr);
return true;
}
void ThreadManager::ReleaseThread(ThreadManager* thd_mgr) {
grpc_resource_user_free_threads(thd_mgr->resource_user_, 1);
}
void ThreadManager::MainWorkLoop() {
@ -162,14 +164,17 @@ void ThreadManager::MainWorkLoop() {
done = true;
break;
case WORK_FOUND:
// If we got work and there are now insufficient pollers, start a new
// one
if (!shutdown_ && num_pollers_ < min_pollers_) {
// If we got work and there are now insufficient pollers and there is
// quota available to create a new thread,start a new poller thread
if (!shutdown_ && num_pollers_ < min_pollers_ &&
grpc_resource_user_alloc_threads(resource_user_, 1)) {
num_pollers_++;
num_threads_++;
max_active_threads_sofar_ =
std::max(max_active_threads_sofar_, num_threads_);
// Drop lock before spawning thread to avoid contention
lock.unlock();
CreateNewThread(this);
new WorkerThread(this);
} else {
// Drop lock for consistency with above branch
lock.unlock();
@ -219,10 +224,9 @@ void ThreadManager::MainWorkLoop() {
}
};
// This thread is exiting. Do some cleanup work (i.e delete already completed
// worker threads and also release 1 thread back to the resource quota)
// This thread is exiting. Do some cleanup work i.e delete already completed
// worker threads
CleanupCompletedThreads();
ReleaseThread(this);
// If we are here, either ThreadManager is shutting down or it already has
// enough threads.

@ -86,6 +86,11 @@ class ThreadManager {
// all the threads have drained all the outstanding work
virtual void Wait();
// Max number of concurrent threads that were ever active in this thread
// manager so far. This is useful for debugging purposes (and in unit tests)
// to check if resource_quota is properly being enforced.
int GetMaxActiveThreadsSoFar();
private:
// Helper wrapper class around grpc_core::Thread. Takes a ThreadManager object
// and starts a new grpc_core::Thread to calls the Run() function.
@ -93,6 +98,23 @@ class ThreadManager {
// The Run() function calls ThreadManager::MainWorkLoop() function and once
// that completes, it marks the WorkerThread completed by calling
// ThreadManager::MarkAsCompleted()
//
// WHY IS THIS NEEDED?:
// When a thread terminates, some other tread *must* call Join() on that
// thread so that the resources are released. Having a WorkerThread wrapper
// will make this easier. Once Run() completes, each thread calls the
// following two functions:
// ThreadManager::CleanupCompletedThreads()
// ThreadManager::MarkAsCompleted()
//
// - MarkAsCompleted() puts the WorkerThread object in the ThreadManger's
// completed_threads_ list
// - CleanupCompletedThreads() calls "Join()" on the threads that are already
// in the completed_threads_ list (since a thread cannot call Join() on
// itself, it calls CleanupCompletedThreads() *before* calling
// MarkAsCompleted())
// TODO: sreek - consider creating the threads 'detached' so that Join() need
// not be called
class WorkerThread {
public:
WorkerThread(ThreadManager* thd_mgr);
@ -113,15 +135,8 @@ class ThreadManager {
void MarkAsCompleted(WorkerThread* thd);
void CleanupCompletedThreads();
// Checks the resource quota and if available, creates a thread and returns
// true. If quota is not available, returns false (and thread is not created)
static bool CreateNewThread(ThreadManager* thd_mgr);
// Give back a thread to the resource quota
static void ReleaseThread(ThreadManager* thd_mgr);
// Protects shutdown_, num_pollers_ and num_threads_
// TODO: sreek - Change num_pollers and num_threads_ to atomics
// Protects shutdown_, num_pollers_, num_threads_ and
// max_active_threads_sofar_
std::mutex mu_;
bool shutdown_;
@ -142,10 +157,15 @@ class ThreadManager {
int min_pollers_;
int max_pollers_;
// The total number of threads (includes threads includes the threads that are
// currently polling i.e num_pollers_)
// The total number of threads currently active (includes threads includes the
// threads that are currently polling i.e num_pollers_)
int num_threads_;
// See GetMaxActiveThreadsSoFar()'s description.
// To be more specific, this variable tracks the max value num_threads_ was
// ever set so far
int max_active_threads_sofar_;
std::mutex list_mu_;
std::list<WorkerThread*> completed_threads_;
};

@ -798,6 +798,97 @@ static void test_negative_rq_free_pool(void) {
}
}
// Simple test to check resource quota thread limits
static void test_thread_limit() {
grpc_core::ExecCtx exec_ctx;
grpc_resource_quota* rq = grpc_resource_quota_create("test_thread_limit");
grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1");
grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2");
// Max threads = 100
grpc_resource_quota_set_max_threads(rq, 100);
// Request quota for 100 threads (50 for ru1, 50 for ru2)
GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 10));
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 10));
GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 40));
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 40));
// Threads exhaused. Next request must fail
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 20));
// Free 20 threads from two different users
grpc_resource_user_free_threads(ru1, 10);
grpc_resource_user_free_threads(ru2, 10);
// Next request to 20 threads must succeed
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 20));
// No more thread quota again
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 20));
// Free 10 more
grpc_resource_user_free_threads(ru1, 10);
GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 5));
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 10)); // Only 5 available
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 5));
// Teardown (ru1 and ru2 release all the quota back to rq)
grpc_resource_user_unref(ru1);
grpc_resource_user_unref(ru2);
grpc_resource_quota_unref(rq);
}
// Change max quota in either directions dynamically
static void test_thread_maxquota_change() {
grpc_core::ExecCtx exec_ctx;
grpc_resource_quota* rq =
grpc_resource_quota_create("test_thread_maxquota_change");
grpc_resource_user* ru1 = grpc_resource_user_create(rq, "ru1");
grpc_resource_user* ru2 = grpc_resource_user_create(rq, "ru2");
// Max threads = 100
grpc_resource_quota_set_max_threads(rq, 100);
// Request quota for 100 threads (50 for ru1, 50 for ru2)
GPR_ASSERT(grpc_resource_user_alloc_threads(ru1, 50));
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 50));
// Threads exhaused. Next request must fail
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru2, 20));
// Increase maxquota and retry
// Max threads = 150;
grpc_resource_quota_set_max_threads(rq, 150);
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 20)); // ru2 = 70, ru1 = 50
// Decrease maxquota (Note: Quota already given to ru1 and ru2 is unaffected)
// Max threads = 10;
grpc_resource_quota_set_max_threads(rq, 10);
// New requests will fail until quota is available
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10));
// Make quota available
grpc_resource_user_free_threads(ru1, 50); // ru1 now has 0
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10)); // Still not enough
grpc_resource_user_free_threads(ru2, 70); // ru2 now has 0
// Now we can get quota up-to 10, the current max
GPR_ASSERT(grpc_resource_user_alloc_threads(ru2, 10));
// No more thread quota again
GPR_ASSERT(!grpc_resource_user_alloc_threads(ru1, 10));
// Teardown (ru1 and ru2 release all the quota back to rq)
grpc_resource_user_unref(ru1);
grpc_resource_user_unref(ru2);
grpc_resource_quota_unref(rq);
}
int main(int argc, char** argv) {
grpc_test_init(argc, argv);
grpc_init();
@ -827,6 +918,11 @@ int main(int argc, char** argv) {
test_negative_rq_free_pool();
gpr_mu_destroy(&g_mu);
gpr_cv_destroy(&g_cv);
// Resource quota thread related
test_thread_limit();
test_thread_maxquota_change();
grpc_shutdown();
return 0;
}

@ -30,30 +30,44 @@
#include "test/cpp/util/test_config.h"
namespace grpc {
struct ThreadManagerTestSettings {
// The min number of pollers that SHOULD be active in ThreadManager
int min_pollers;
// The max number of pollers that could be active in ThreadManager
int max_pollers;
// The sleep duration in PollForWork() function to simulate "polling"
int poll_duration_ms;
// The sleep duration in DoWork() function to simulate "work"
int work_duration_ms;
// Max number of times PollForWork() is called before shutting down
int max_poll_calls;
};
class ThreadManagerTest final : public grpc::ThreadManager {
public:
ThreadManagerTest(const char* name, grpc_resource_quota* rq)
: ThreadManager(name, rq, kMinPollers, kMaxPollers),
ThreadManagerTest(const char* name, grpc_resource_quota* rq,
const ThreadManagerTestSettings& settings)
: ThreadManager(name, rq, settings.min_pollers, settings.max_pollers),
settings_(settings),
num_do_work_(0),
num_poll_for_work_(0),
num_work_found_(0) {}
grpc::ThreadManager::WorkStatus PollForWork(void** tag, bool* ok) override;
void DoWork(void* tag, bool ok) override;
void PerformTest();
// Get number of times PollForWork() returned WORK_FOUND
int GetNumWorkFound();
// Get number of times DoWork() was called
int GetNumDoWork();
private:
void SleepForMs(int sleep_time_ms);
static const int kMinPollers = 2;
static const int kMaxPollers = 10;
static const int kPollingTimeoutMsec = 10;
static const int kDoWorkDurationMsec = 1;
// PollForWork will return SHUTDOWN after these many number of invocations
static const int kMaxNumPollForWork = 50;
ThreadManagerTestSettings settings_;
// Counters
gpr_atm num_do_work_; // Number of calls to DoWork
gpr_atm num_poll_for_work_; // Number of calls to PollForWork
gpr_atm num_work_found_; // Number of times WORK_FOUND was returned
@ -69,58 +83,113 @@ void ThreadManagerTest::SleepForMs(int duration_ms) {
grpc::ThreadManager::WorkStatus ThreadManagerTest::PollForWork(void** tag,
bool* ok) {
int call_num = gpr_atm_no_barrier_fetch_add(&num_poll_for_work_, 1);
if (call_num >= kMaxNumPollForWork) {
if (call_num >= settings_.max_poll_calls) {
Shutdown();
return SHUTDOWN;
}
// Simulate "polling for work" by sleeping for sometime
SleepForMs(kPollingTimeoutMsec);
SleepForMs(settings_.poll_duration_ms); // Simulate "polling" duration
*tag = nullptr;
*ok = true;
// Return timeout roughly 1 out of every 3 calls
// Return timeout roughly 1 out of every 3 calls just to make the test a bit
// more interesting
if (call_num % 3 == 0) {
return TIMEOUT;
} else {
gpr_atm_no_barrier_fetch_add(&num_work_found_, 1);
return WORK_FOUND;
}
gpr_atm_no_barrier_fetch_add(&num_work_found_, 1);
return WORK_FOUND;
}
void ThreadManagerTest::DoWork(void* tag, bool ok) {
gpr_atm_no_barrier_fetch_add(&num_do_work_, 1);
SleepForMs(kDoWorkDurationMsec); // Simulate doing work by sleeping
SleepForMs(settings_.work_duration_ms); // Simulate work by sleeping
}
void ThreadManagerTest::PerformTest() {
// Initialize() starts the ThreadManager
Initialize();
// Wait for all the threads to gracefully terminate
Wait();
int ThreadManagerTest::GetNumWorkFound() {
return static_cast<int>(gpr_atm_no_barrier_load(&num_work_found_));
}
// The number of times DoWork() was called is equal to the number of times
// WORK_FOUND was returned
gpr_log(GPR_DEBUG, "DoWork() called %" PRIdPTR " times",
gpr_atm_no_barrier_load(&num_do_work_));
GPR_ASSERT(gpr_atm_no_barrier_load(&num_do_work_) ==
gpr_atm_no_barrier_load(&num_work_found_));
int ThreadManagerTest::GetNumDoWork() {
return static_cast<int>(gpr_atm_no_barrier_load(&num_do_work_));
}
} // namespace grpc
int main(int argc, char** argv) {
std::srand(std::time(nullptr));
// Test that the number of times DoWork() is called is equal to the number of
// times PollForWork() returned WORK_FOUND
static void TestPollAndWork() {
grpc_resource_quota* rq = grpc_resource_quota_create("Test-poll-and-work");
grpc::ThreadManagerTestSettings settings = {
2 /* min_pollers */, 10 /* max_pollers */, 10 /* poll_duration_ms */,
1 /* work_duration_ms */, 50 /* max_poll_calls */};
grpc::testing::InitTest(&argc, &argv, true);
grpc::ThreadManagerTest test_thd_mgr("TestThreadManager", rq, settings);
grpc_resource_quota_unref(rq);
test_thd_mgr.Initialize(); // Start the thread manager
test_thd_mgr.Wait(); // Wait for all threads to finish
// Verify that The number of times DoWork() was called is equal to the number
// of times WORK_FOUND was returned
gpr_log(GPR_DEBUG, "DoWork() called %d times", test_thd_mgr.GetNumDoWork());
GPR_ASSERT(test_thd_mgr.GetNumDoWork() == test_thd_mgr.GetNumWorkFound());
}
grpc_resource_quota* rq = grpc_resource_quota_create("Test");
grpc::ThreadManagerTest test_rpc_manager("TestThreadManager", rq);
static void TestThreadQuota() {
const int kMaxNumThreads = 3;
grpc_resource_quota* rq = grpc_resource_quota_create("Test-thread-quota");
grpc_resource_quota_set_max_threads(rq, kMaxNumThreads);
// Set work_duration_ms to be much greater than poll_duration_ms. This way,
// the thread manager will be forced to create more 'polling' threads to
// honor the min_pollers guarantee
grpc::ThreadManagerTestSettings settings = {
1 /* min_pollers */, 1 /* max_pollers */, 1 /* poll_duration_ms */,
10 /* work_duration_ms */, 50 /* max_poll_calls */};
// Create two thread managers (but with same resource quota). This means
// that the max number of active threads across BOTH the thread managers
// cannot be greater than kMaxNumthreads
grpc::ThreadManagerTest test_thd_mgr_1("TestThreadManager-1", rq, settings);
grpc::ThreadManagerTest test_thd_mgr_2("TestThreadManager-2", rq, settings);
// It is ok to unref resource quota before starting thread managers.
grpc_resource_quota_unref(rq);
test_rpc_manager.PerformTest();
// Start both thread managers
test_thd_mgr_1.Initialize();
test_thd_mgr_2.Initialize();
// Wait for both to finish
test_thd_mgr_1.Wait();
test_thd_mgr_2.Wait();
// Now verify that the total number of active threads in either thread manager
// never exceeds kMaxNumThreads
//
// NOTE: Actually the total active threads across *both* thread managers at
// any point of time never exceeds kMaxNumThreads but unfortunately there is
// no easy way to verify it (i.e we can't just do (max1 + max2 <= k))
// Its okay to not test this case here. The resource quota c-core tests
// provide enough coverage to resource quota object with multiple resource
// users
int max1 = test_thd_mgr_1.GetMaxActiveThreadsSoFar();
int max2 = test_thd_mgr_2.GetMaxActiveThreadsSoFar();
gpr_log(
GPR_DEBUG,
"MaxActiveThreads in TestThreadManager_1: %d, TestThreadManager_2: %d",
max1, max2);
GPR_ASSERT(max1 <= kMaxNumThreads && max2 <= kMaxNumThreads);
}
int main(int argc, char** argv) {
std::srand(std::time(nullptr));
grpc::testing::InitTest(&argc, &argv, true);
grpc_init();
TestPollAndWork();
TestThreadQuota();
grpc_shutdown();
return 0;
}

Loading…
Cancel
Save