Merge pull request #16356 from vjpai/exhaustion

C++ sync server: Return status RESOURCE_EXHAUSTED if no thread quota available
pull/16362/head
Vijay Pai 7 years ago committed by GitHub
commit 8a86f5329e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      include/grpcpp/impl/codegen/byte_buffer.h
  2. 6
      include/grpcpp/impl/codegen/completion_queue.h
  3. 17
      include/grpcpp/impl/codegen/method_handler_impl.h
  4. 6
      include/grpcpp/impl/codegen/server_context.h
  5. 3
      include/grpcpp/server.h
  6. 25
      src/cpp/server/server_cc.cc
  7. 38
      src/cpp/thread_manager/thread_manager.cc
  8. 6
      src/cpp/thread_manager/thread_manager.h
  9. 117
      test/cpp/end2end/thread_stress_test.cc
  10. 4
      test/cpp/thread_manager/thread_manager_test.cc

@ -45,6 +45,8 @@ template <class ServiceType, class RequestType, class ResponseType>
class RpcMethodHandler;
template <class ServiceType, class RequestType, class ResponseType>
class ServerStreamingHandler;
template <StatusCode code>
class ErrorMethodHandler;
template <class R>
class DeserializeFuncType;
class GrpcByteBufferPeer;
@ -144,6 +146,8 @@ class ByteBuffer final {
friend class internal::RpcMethodHandler;
template <class ServiceType, class RequestType, class ResponseType>
friend class internal::ServerStreamingHandler;
template <StatusCode code>
friend class internal::ErrorMethodHandler;
template <class R>
friend class internal::DeserializeFuncType;
friend class ProtoBufferReader;

@ -78,9 +78,10 @@ template <class ServiceType, class RequestType, class ResponseType>
class ServerStreamingHandler;
template <class ServiceType, class RequestType, class ResponseType>
class BidiStreamingHandler;
class UnknownMethodHandler;
template <class Streamer, bool WriteNeeded>
class TemplatedBidiStreamingHandler;
template <StatusCode code>
class ErrorMethodHandler;
template <class InputMessage, class OutputMessage>
class BlockingUnaryCallImpl;
} // namespace internal
@ -265,7 +266,8 @@ class CompletionQueue : private GrpcLibraryCodegen {
friend class ::grpc::internal::ServerStreamingHandler;
template <class Streamer, bool WriteNeeded>
friend class ::grpc::internal::TemplatedBidiStreamingHandler;
friend class ::grpc::internal::UnknownMethodHandler;
template <StatusCode code>
friend class ::grpc::internal::ErrorMethodHandler;
friend class ::grpc::Server;
friend class ::grpc::ServerContext;
friend class ::grpc::ServerInterface;

@ -272,12 +272,14 @@ class SplitServerStreamingHandler
ServerSplitStreamer<RequestType, ResponseType>, false>(func) {}
};
/// Handle unknown method by returning UNIMPLEMENTED error.
class UnknownMethodHandler : public MethodHandler {
/// General method handler class for errors that prevent real method use
/// e.g., handle unknown method by returning UNIMPLEMENTED error.
template <StatusCode code>
class ErrorMethodHandler : public MethodHandler {
public:
template <class T>
static void FillOps(ServerContext* context, T* ops) {
Status status(StatusCode::UNIMPLEMENTED, "");
Status status(code, "");
if (!context->sent_initial_metadata_) {
ops->SendInitialMetadata(context->initial_metadata_,
context->initial_metadata_flags());
@ -294,9 +296,18 @@ class UnknownMethodHandler : public MethodHandler {
FillOps(param.server_context, &ops);
param.call->PerformOps(&ops);
param.call->cq()->Pluck(&ops);
// We also have to destroy any request payload in the handler parameter
ByteBuffer* payload = param.request.bbuf_ptr();
if (payload != nullptr) {
payload->Clear();
}
}
};
typedef ErrorMethodHandler<StatusCode::UNIMPLEMENTED> UnknownMethodHandler;
typedef ErrorMethodHandler<StatusCode::RESOURCE_EXHAUSTED>
ResourceExhaustedHandler;
} // namespace internal
} // namespace grpc

@ -63,9 +63,10 @@ template <class ServiceType, class RequestType, class ResponseType>
class ServerStreamingHandler;
template <class ServiceType, class RequestType, class ResponseType>
class BidiStreamingHandler;
class UnknownMethodHandler;
template <class Streamer, bool WriteNeeded>
class TemplatedBidiStreamingHandler;
template <StatusCode code>
class ErrorMethodHandler;
class Call;
} // namespace internal
@ -262,7 +263,8 @@ class ServerContext {
friend class ::grpc::internal::ServerStreamingHandler;
template <class Streamer, bool WriteNeeded>
friend class ::grpc::internal::TemplatedBidiStreamingHandler;
friend class ::grpc::internal::UnknownMethodHandler;
template <StatusCode code>
friend class internal::ErrorMethodHandler;
friend class ::grpc::ClientContext;
/// Prevent copying.

@ -223,6 +223,9 @@ class Server : public ServerInterface, private GrpcLibraryCodegen {
std::unique_ptr<HealthCheckServiceInterface> health_check_service_;
bool health_check_service_disabled_;
// A special handler for resource exhausted in sync case
std::unique_ptr<internal::MethodHandler> resource_exhausted_handler_;
};
} // namespace grpc

@ -211,8 +211,10 @@ class Server::SyncRequest final : public internal::CompletionQueueTag {
call_(mrd->call_, server, &cq_, server->max_receive_message_size()),
ctx_(mrd->deadline_, &mrd->request_metadata_),
has_request_payload_(mrd->has_request_payload_),
request_payload_(mrd->request_payload_),
method_(mrd->method_) {
request_payload_(has_request_payload_ ? mrd->request_payload_
: nullptr),
method_(mrd->method_),
server_(server) {
ctx_.set_call(mrd->call_);
ctx_.cq_ = &cq_;
GPR_ASSERT(mrd->in_flight_);
@ -226,10 +228,13 @@ class Server::SyncRequest final : public internal::CompletionQueueTag {
}
}
void Run(const std::shared_ptr<GlobalCallbacks>& global_callbacks) {
void Run(const std::shared_ptr<GlobalCallbacks>& global_callbacks,
bool resources) {
ctx_.BeginCompletionOp(&call_);
global_callbacks->PreSynchronousRequest(&ctx_);
method_->handler()->RunHandler(internal::MethodHandler::HandlerParameter(
auto* handler = resources ? method_->handler()
: server_->resource_exhausted_handler_.get();
handler->RunHandler(internal::MethodHandler::HandlerParameter(
&call_, &ctx_, request_payload_));
global_callbacks->PostSynchronousRequest(&ctx_);
request_payload_ = nullptr;
@ -251,6 +256,7 @@ class Server::SyncRequest final : public internal::CompletionQueueTag {
const bool has_request_payload_;
grpc_byte_buffer* request_payload_;
internal::RpcServiceMethod* const method_;
Server* server_;
};
private:
@ -301,7 +307,7 @@ class Server::SyncRequestThreadManager : public ThreadManager {
GPR_UNREACHABLE_CODE(return TIMEOUT);
}
void DoWork(void* tag, bool ok) override {
void DoWork(void* tag, bool ok, bool resources) override {
SyncRequest* sync_req = static_cast<SyncRequest*>(tag);
if (!sync_req) {
@ -321,7 +327,7 @@ class Server::SyncRequestThreadManager : public ThreadManager {
}
GPR_TIMER_SCOPE("cd.Run()", 0);
cd.Run(global_callbacks_);
cd.Run(global_callbacks_, resources);
}
// TODO (sreek) If ok is false here (which it isn't in case of
// grpc_request_registered_call), we should still re-queue the request
@ -579,6 +585,13 @@ void Server::Start(ServerCompletionQueue** cqs, size_t num_cqs) {
}
}
// If this server has any support for synchronous methods (has any sync
// server CQs), make sure that we have a ResourceExhausted handler
// to deal with the case of thread exhaustion
if (!sync_server_cqs_->empty()) {
resource_exhausted_handler_.reset(new internal::ResourceExhaustedHandler);
}
for (auto it = sync_req_mgrs_.begin(); it != sync_req_mgrs_.end(); it++) {
(*it)->Start();
}

@ -166,22 +166,38 @@ void ThreadManager::MainWorkLoop() {
case WORK_FOUND:
// If we got work and there are now insufficient pollers and there is
// quota available to create a new thread, start a new poller thread
if (!shutdown_ && num_pollers_ < min_pollers_ &&
grpc_resource_user_allocate_threads(resource_user_, 1)) {
num_pollers_++;
num_threads_++;
if (num_threads_ > max_active_threads_sofar_) {
max_active_threads_sofar_ = num_threads_;
bool resource_exhausted = false;
if (!shutdown_ && num_pollers_ < min_pollers_) {
if (grpc_resource_user_allocate_threads(resource_user_, 1)) {
// We can allocate a new poller thread
num_pollers_++;
num_threads_++;
if (num_threads_ > max_active_threads_sofar_) {
max_active_threads_sofar_ = num_threads_;
}
// Drop lock before spawning thread to avoid contention
lock.unlock();
new WorkerThread(this);
} else if (num_pollers_ > 0) {
// There is still at least some thread polling, so we can go on
// even though we are below the number of pollers that we would
// like to have (min_pollers_)
lock.unlock();
} else {
// There are no pollers to spare and we couldn't allocate
// a new thread, so resources are exhausted!
lock.unlock();
resource_exhausted = true;
}
// Drop lock before spawning thread to avoid contention
lock.unlock();
new WorkerThread(this);
} else {
// Drop lock for consistency with above branch
// There are a sufficient number of pollers available so we can do
// the work and continue polling with our existing poller threads
lock.unlock();
}
// Lock is always released at this point - do the application work
DoWork(tag, ok);
// or return resource exhausted if there is new work but we couldn't
// get a thread in which to do it.
DoWork(tag, ok, !resource_exhausted);
// Take the lock again to check post conditions
lock.lock();
// If we're shutdown, we should finish at this point.

@ -67,12 +67,14 @@ class ThreadManager {
// The implementation of DoWork() is supposed to perform the work found by
// PollForWork(). The tag and ok parameters are the same as returned by
// PollForWork()
// PollForWork(). The resources parameter indicates that the call actually
// has the resources available for performing the RPC's work. If it doesn't,
// the implementation should fail it appropriately.
//
// The implementation of DoWork() should also do any setup needed to ensure
// that the next call to PollForWork() (not necessarily by the current thread)
// actually finds some work
virtual void DoWork(void* tag, bool ok) = 0;
virtual void DoWork(void* tag, bool ok, bool resources) = 0;
// Mark the ThreadManager as shutdown and begin draining the work. This is a
// non-blocking call and the caller should call Wait(), a blocking call which

@ -16,6 +16,7 @@
*
*/
#include <cinttypes>
#include <mutex>
#include <thread>
@ -24,6 +25,7 @@
#include <grpcpp/channel.h>
#include <grpcpp/client_context.h>
#include <grpcpp/create_channel.h>
#include <grpcpp/resource_quota.h>
#include <grpcpp/server.h>
#include <grpcpp/server_builder.h>
#include <grpcpp/server_context.h>
@ -51,63 +53,13 @@ namespace testing {
class TestServiceImpl : public ::grpc::testing::EchoTestService::Service {
public:
TestServiceImpl() : signal_client_(false) {}
TestServiceImpl() {}
Status Echo(ServerContext* context, const EchoRequest* request,
EchoResponse* response) override {
response->set_message(request->message());
return Status::OK;
}
// Unimplemented is left unimplemented to test the returned error.
Status RequestStream(ServerContext* context,
ServerReader<EchoRequest>* reader,
EchoResponse* response) override {
EchoRequest request;
response->set_message("");
while (reader->Read(&request)) {
response->mutable_message()->append(request.message());
}
return Status::OK;
}
// Return 3 messages.
// TODO(yangg) make it generic by adding a parameter into EchoRequest
Status ResponseStream(ServerContext* context, const EchoRequest* request,
ServerWriter<EchoResponse>* writer) override {
EchoResponse response;
response.set_message(request->message() + "0");
writer->Write(response);
response.set_message(request->message() + "1");
writer->Write(response);
response.set_message(request->message() + "2");
writer->Write(response);
return Status::OK;
}
Status BidiStream(
ServerContext* context,
ServerReaderWriter<EchoResponse, EchoRequest>* stream) override {
EchoRequest request;
EchoResponse response;
while (stream->Read(&request)) {
gpr_log(GPR_INFO, "recv msg %s", request.message().c_str());
response.set_message(request.message());
stream->Write(response);
}
return Status::OK;
}
bool signal_client() {
std::unique_lock<std::mutex> lock(mu_);
return signal_client_;
}
private:
bool signal_client_;
std::mutex mu_;
};
template <class Service>
@ -118,6 +70,7 @@ class CommonStressTest {
virtual void SetUp() = 0;
virtual void TearDown() = 0;
virtual void ResetStub() = 0;
virtual bool AllowExhaustion() = 0;
grpc::testing::EchoTestService::Stub* GetStub() { return stub_.get(); }
protected:
@ -146,6 +99,7 @@ class CommonStressTestInsecure : public CommonStressTest<Service> {
CreateChannel(server_address_.str(), InsecureChannelCredentials());
this->stub_ = grpc::testing::EchoTestService::NewStub(channel);
}
bool AllowExhaustion() override { return false; }
protected:
void SetUpStart(ServerBuilder* builder, Service* service) override {
@ -161,7 +115,7 @@ class CommonStressTestInsecure : public CommonStressTest<Service> {
std::ostringstream server_address_;
};
template <class Service>
template <class Service, bool allow_resource_exhaustion>
class CommonStressTestInproc : public CommonStressTest<Service> {
public:
void ResetStub() override {
@ -169,6 +123,7 @@ class CommonStressTestInproc : public CommonStressTest<Service> {
std::shared_ptr<Channel> channel = this->server_->InProcessChannel(args);
this->stub_ = grpc::testing::EchoTestService::NewStub(channel);
}
bool AllowExhaustion() override { return allow_resource_exhaustion; }
protected:
void SetUpStart(ServerBuilder* builder, Service* service) override {
@ -193,6 +148,26 @@ class CommonStressTestSyncServer : public BaseClass {
TestServiceImpl service_;
};
template <class BaseClass>
class CommonStressTestSyncServerLowThreadCount : public BaseClass {
public:
void SetUp() override {
ServerBuilder builder;
ResourceQuota quota;
this->SetUpStart(&builder, &service_);
quota.SetMaxThreads(4);
builder.SetResourceQuota(quota);
this->SetUpEnd(&builder);
}
void TearDown() override {
this->TearDownStart();
this->TearDownEnd();
}
private:
TestServiceImpl service_;
};
template <class BaseClass>
class CommonStressTestAsyncServer : public BaseClass {
public:
@ -293,7 +268,8 @@ class End2endTest : public ::testing::Test {
Common common_;
};
static void SendRpc(grpc::testing::EchoTestService::Stub* stub, int num_rpcs) {
static void SendRpc(grpc::testing::EchoTestService::Stub* stub, int num_rpcs,
bool allow_exhaustion, gpr_atm* errors) {
EchoRequest request;
EchoResponse response;
request.set_message("Hello");
@ -301,34 +277,53 @@ static void SendRpc(grpc::testing::EchoTestService::Stub* stub, int num_rpcs) {
for (int i = 0; i < num_rpcs; ++i) {
ClientContext context;
Status s = stub->Echo(&context, request, &response);
EXPECT_EQ(response.message(), request.message());
EXPECT_TRUE(s.ok() || (allow_exhaustion &&
s.error_code() == StatusCode::RESOURCE_EXHAUSTED));
if (!s.ok()) {
gpr_log(GPR_ERROR, "RPC error: %d: %s", s.error_code(),
s.error_message().c_str());
if (!(allow_exhaustion &&
s.error_code() == StatusCode::RESOURCE_EXHAUSTED)) {
gpr_log(GPR_ERROR, "RPC error: %d: %s", s.error_code(),
s.error_message().c_str());
}
gpr_atm_no_barrier_fetch_add(errors, static_cast<gpr_atm>(1));
} else {
EXPECT_EQ(response.message(), request.message());
}
ASSERT_TRUE(s.ok());
}
}
typedef ::testing::Types<
CommonStressTestSyncServer<CommonStressTestInsecure<TestServiceImpl>>,
CommonStressTestSyncServer<CommonStressTestInproc<TestServiceImpl>>,
CommonStressTestSyncServer<CommonStressTestInproc<TestServiceImpl, false>>,
CommonStressTestSyncServerLowThreadCount<
CommonStressTestInproc<TestServiceImpl, true>>,
CommonStressTestAsyncServer<
CommonStressTestInsecure<grpc::testing::EchoTestService::AsyncService>>,
CommonStressTestAsyncServer<
CommonStressTestInproc<grpc::testing::EchoTestService::AsyncService>>>
CommonStressTestAsyncServer<CommonStressTestInproc<
grpc::testing::EchoTestService::AsyncService, false>>>
CommonTypes;
TYPED_TEST_CASE(End2endTest, CommonTypes);
TYPED_TEST(End2endTest, ThreadStress) {
this->common_.ResetStub();
std::vector<std::thread> threads;
gpr_atm errors;
gpr_atm_rel_store(&errors, static_cast<gpr_atm>(0));
threads.reserve(kNumThreads);
for (int i = 0; i < kNumThreads; ++i) {
threads.emplace_back(SendRpc, this->common_.GetStub(), kNumRpcs);
threads.emplace_back(SendRpc, this->common_.GetStub(), kNumRpcs,
this->common_.AllowExhaustion(), &errors);
}
for (int i = 0; i < kNumThreads; ++i) {
threads[i].join();
}
uint64_t error_cnt = static_cast<uint64_t>(gpr_atm_no_barrier_load(&errors));
if (error_cnt != 0) {
gpr_log(GPR_INFO, "RPC error count: %" PRIu64, error_cnt);
}
// If this test allows resource exhaustion, expect that it actually sees some
if (this->common_.AllowExhaustion()) {
EXPECT_GT(error_cnt, static_cast<uint64_t>(0));
}
}
template <class Common>

@ -55,7 +55,7 @@ class ThreadManagerTest final : public grpc::ThreadManager {
num_work_found_(0) {}
grpc::ThreadManager::WorkStatus PollForWork(void** tag, bool* ok) override;
void DoWork(void* tag, bool ok) override;
void DoWork(void* tag, bool ok, bool resources) override;
// Get number of times PollForWork() returned WORK_FOUND
int GetNumWorkFound();
@ -102,7 +102,7 @@ grpc::ThreadManager::WorkStatus ThreadManagerTest::PollForWork(void** tag,
return WORK_FOUND;
}
void ThreadManagerTest::DoWork(void* tag, bool ok) {
void ThreadManagerTest::DoWork(void* tag, bool ok, bool resources) {
gpr_atm_no_barrier_fetch_add(&num_do_work_, 1);
SleepForMs(settings_.work_duration_ms); // Simulate work by sleeping
}

Loading…
Cancel
Save