Support multiple threads per cq sharing, add tests

8 years ago · 4b07aab513
parent 275bc932d2
commit 4b07aab513
5 changed files with 1129 additions and 258 deletions
--- a/src/proto/grpc/testing/control.proto
+++ b/src/proto/grpc/testing/control.proto
@ -117,6 +117,9 @@ message ClientConfig {

  repeated ChannelArg channel_args = 16;

+  // Number of threads that share each completion queue
+  int32 threads_per_cq = 17;
+
  // Number of messages on a stream before it gets finished/restarted
  int32 messages_per_stream = 18;
 }
@ -157,6 +160,9 @@ message ServerConfig {
  // If we use an OTHER_SERVER client_type, this string gives more detail
  string other_server_api = 11;

+  // Number of threads that share each completion queue
+  int32 threads_per_cq = 12;
+
  // c++-only options (for now) --------------------------------

  // Buffer pool size (no buffer pool specified if unset)
--- a/test/cpp/qps/client_async.cc
+++ b/test/cpp/qps/client_async.cc
@ -70,6 +70,11 @@ class ClientRpcContext {
  }

  virtual void Start(CompletionQueue* cq, const ClientConfig& config) = 0;
+  void lock() { mu_.lock(); }
+  void unlock() { mu_.unlock(); }
+
+ private:
+  std::mutex mu_;
 };

 template <class RequestType, class ResponseType>
@ -121,6 +126,7 @@ class ClientRpcContextUnaryImpl : public ClientRpcContext {
  void StartNewClone(CompletionQueue* cq) override {
    auto* clone = new ClientRpcContextUnaryImpl(stub_, req_, next_issue_,
                                                start_req_, callback_);
+    std::lock_guard<ClientRpcContext> lclone(*clone);
    clone->StartInternal(cq);
  }

@ -178,8 +184,14 @@ class AsyncClient : public ClientImpl<StubType, RequestType> {
        num_async_threads_(NumThreads(config)) {
    SetupLoadTest(config, num_async_threads_);

-    for (int i = 0; i < num_async_threads_; i++) {
+    int tpc = std::max(1, config.threads_per_cq());      // 1 if unspecified
+    int num_cqs = (num_async_threads_ + tpc - 1) / tpc;  // ceiling operator
+    for (int i = 0; i < num_cqs; i++) {
      cli_cqs_.emplace_back(new CompletionQueue);
+    }
+
+    for (int i = 0; i < num_async_threads_; i++) {
+      cq_.emplace_back(i % cli_cqs_.size());
      next_issuers_.emplace_back(NextIssuer(i));
      shutdown_state_.emplace_back(new PerThreadShutdownState());
    }
@ -246,20 +258,36 @@ class AsyncClient : public ClientImpl<StubType, RequestType> {
    void* got_tag;
    bool ok;

-    if (cli_cqs_[thread_idx]->Next(&got_tag, &ok)) {
+    if (cli_cqs_[cq_[thread_idx]]->Next(&got_tag, &ok)) {
      // Got a regular event, so process it
      ClientRpcContext* ctx = ClientRpcContext::detag(got_tag);
      // Proceed while holding a lock to make sure that
      // this thread isn't supposed to shut down
      std::lock_guard<std::mutex> l(shutdown_state_[thread_idx]->mutex);
      if (shutdown_state_[thread_idx]->shutdown) {
+        // We want to delete the context. However, it is possible that
+        // another thread that just initiated an action on this
+        // context still has its lock even though the action on the
+        // context has completed. To delay for that, just grab the
+        // lock for serialization. Take a new scope.
+        { std::lock_guard<ClientRpcContext> lctx(*ctx); }
        delete ctx;
        return true;
-      } else if (!ctx->RunNextState(ok, entry)) {
+      }
+      bool del = false;
+
+      // Create a new scope for a lock_guard'ed region
+      {
+        std::lock_guard<ClientRpcContext> lctx(*ctx);
+        if (!ctx->RunNextState(ok, entry)) {
          // The RPC and callback are done, so clone the ctx
          // and kickstart the new one
-        ctx->StartNewClone(cli_cqs_[thread_idx].get());
-        // delete the old version
+          ctx->StartNewClone(cli_cqs_[cq_[thread_idx]].get());
+          // set the old version to delete
+          del = true;
+        }
+      }
+      if (del) {
        delete ctx;
      }
      return true;
@ -270,6 +298,7 @@ class AsyncClient : public ClientImpl<StubType, RequestType> {
  }

  std::vector<std::unique_ptr<CompletionQueue>> cli_cqs_;
+  std::vector<int> cq_;
  std::vector<std::function<gpr_timespec()>> next_issuers_;
  std::vector<std::unique_ptr<PerThreadShutdownState>> shutdown_state_;
 };
@ -392,6 +421,7 @@ class ClientRpcContextStreamingPingPongImpl : public ClientRpcContext {
  void StartNewClone(CompletionQueue* cq) override {
    auto* clone = new ClientRpcContextStreamingPingPongImpl(
        stub_, req_, next_issue_, start_req_, callback_);
+    std::lock_guard<ClientRpcContext> lclone(*clone);
    clone->StartInternal(cq, messages_per_stream_);
  }

@ -530,6 +560,7 @@ class ClientRpcContextStreamingFromClientImpl : public ClientRpcContext {
  void StartNewClone(CompletionQueue* cq) override {
    auto* clone = new ClientRpcContextStreamingFromClientImpl(
        stub_, req_, next_issue_, start_req_, callback_);
+    std::lock_guard<ClientRpcContext> lclone(*clone);
    clone->StartInternal(cq);
  }

@ -647,6 +678,7 @@ class ClientRpcContextStreamingFromServerImpl : public ClientRpcContext {
  void StartNewClone(CompletionQueue* cq) override {
    auto* clone = new ClientRpcContextStreamingFromServerImpl(
        stub_, req_, next_issue_, start_req_, callback_);
+    std::lock_guard<ClientRpcContext> lclone(*clone);
    clone->StartInternal(cq);
  }

@ -789,6 +821,7 @@ class ClientRpcContextGenericStreamingImpl : public ClientRpcContext {
  void StartNewClone(CompletionQueue* cq) override {
    auto* clone = new ClientRpcContextGenericStreamingImpl(
        stub_, req_, next_issue_, start_req_, callback_);
+    std::lock_guard<ClientRpcContext> lclone(*clone);
    clone->StartInternal(cq, messages_per_stream_);
  }

--- a/test/cpp/qps/server_async.cc
+++ b/test/cpp/qps/server_async.cc
@ -31,6 +31,7 @@
 *
 */

+#include <algorithm>
 #include <forward_list>
 #include <functional>
 #include <memory>
@ -104,9 +105,14 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
      gpr_log(GPR_INFO, "Sizing async server to %d threads", num_threads);
    }

-    for (int i = 0; i < num_threads; i++) {
+    int tpc = std::max(1, config.threads_per_cq());  // 1 if unspecified
+    int num_cqs = (num_threads + tpc - 1) / tpc;     // ceiling operator
+    for (int i = 0; i < num_cqs; i++) {
      srv_cqs_.emplace_back(builder.AddCompletionQueue());
    }
+    for (int i = 0; i < num_threads; i++) {
+      cq_.emplace_back(i % srv_cqs_.size());
+    }

    if (config.resource_quota_size() > 0) {
      builder.SetResourceQuota(ResourceQuota("AsyncQpsServerTest")
@ -120,7 +126,7 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
                  std::placeholders::_2);

    for (int i = 0; i < 5000; i++) {
-      for (int j = 0; j < num_threads; j++) {
+      for (int j = 0; j < num_cqs; j++) {
        if (request_unary_function) {
          auto request_unary = std::bind(
              request_unary_function, &async_service_, std::placeholders::_1,
@ -205,7 +211,7 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
    // Wait until work is available or we are shutting down
    bool ok;
    void *got_tag;
-    while (srv_cqs_[thread_idx]->Next(&got_tag, &ok)) {
+    while (srv_cqs_[cq_[thread_idx]]->Next(&got_tag, &ok)) {
      ServerRpcContext *ctx = detag(got_tag);
      // The tag is a pointer to an RPC context to invoke
      // Proceed while holding a lock to make sure that
@ -214,6 +220,7 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
      if (shutdown_state_[thread_idx]->shutdown) {
        return;
      }
+      std::lock_guard<ServerRpcContext> l2(*ctx);
      const bool still_going = ctx->RunNextState(ok);
      // if this RPC context is done, refresh it
      if (!still_going) {
@ -226,9 +233,13 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
  class ServerRpcContext {
   public:
    ServerRpcContext() {}
+    void lock() { mu_.lock(); }
+    void unlock() { mu_.unlock(); }
    virtual ~ServerRpcContext(){};
    virtual bool RunNextState(bool) = 0;  // next state, return false if done
    virtual void Reset() = 0;             // start this back at a clean state
+   private:
+    std::mutex mu_;
  };
  static void *tag(ServerRpcContext *func) {
    return reinterpret_cast<void *>(func);
@ -518,6 +529,7 @@ class AsyncQpsServerTest final : public grpc::testing::Server {
  std::vector<std::thread> threads_;
  std::unique_ptr<grpc::Server> server_;
  std::vector<std::unique_ptr<grpc::ServerCompletionQueue>> srv_cqs_;
+  std::vector<int> cq_;
  ServiceType async_service_;
  std::vector<std::unique_ptr<ServerRpcContext>> contexts_;

--- a/tools/run_tests/generated/tests.json
+++ b/tools/run_tests/generated/tests.json
--- a/tools/run_tests/performance/scenario_config.py
+++ b/tools/run_tests/performance/scenario_config.py
@ -108,6 +108,8 @@ def _ping_pong_scenario(name, rpc_type,
                        client_language=None,
                        server_language=None,
                        async_server_threads=0,
+                        server_threads_per_cq=0,
+                        client_threads_per_cq=0,
                        warmup_seconds=WARMUP_SECONDS,
                        categories=DEFAULT_CATEGORIES,
                        channels=None,
@ -127,6 +129,7 @@ def _ping_pong_scenario(name, rpc_type,
      'outstanding_rpcs_per_channel': 1,
      'client_channels': 1,
      'async_client_threads': 1,
+      'threads_per_cq': client_threads_per_cq,
      'rpc_type': rpc_type,
      'load_params': {
        'closed_loop': {}
@ -137,6 +140,7 @@ def _ping_pong_scenario(name, rpc_type,
      'server_type': server_type,
      'security_params': _get_secargs(secure),
      'async_server_threads': async_server_threads,
+      'threads_per_cq': server_threads_per_cq,
    },
    'warmup_seconds': warmup_seconds,
    'benchmark_seconds': BENCHMARK_SECONDS
@ -280,6 +284,66 @@ class CXXLanguage:
          secure=secure,
          categories=smoketest_categories+[SCALABLE])

+      yield _ping_pong_scenario(
+          'cpp_generic_async_streaming_qps_unconstrained_1cq_%s' % secstr,
+          rpc_type='STREAMING',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_GENERIC_SERVER',
+          unconstrained_client='async', use_generic_payload=True,
+          secure=secure,
+          client_threads_per_cq=1000000, server_threads_per_cq=1000000,
+          categories=smoketest_categories+[SCALABLE])
+
+      yield _ping_pong_scenario(
+          'cpp_generic_async_streaming_qps_unconstrained_2waysharedcq_%s' % secstr,
+          rpc_type='STREAMING',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_GENERIC_SERVER',
+          unconstrained_client='async', use_generic_payload=True,
+          secure=secure,
+          client_threads_per_cq=2, server_threads_per_cq=2,
+          categories=smoketest_categories+[SCALABLE])
+
+      yield _ping_pong_scenario(
+          'cpp_protobuf_async_streaming_qps_unconstrained_1cq_%s' % secstr,
+          rpc_type='STREAMING',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_SERVER',
+          unconstrained_client='async',
+          secure=secure,
+          client_threads_per_cq=1000000, server_threads_per_cq=1000000,
+          categories=smoketest_categories+[SCALABLE])
+
+      yield _ping_pong_scenario(
+          'cpp_protobuf_async_streaming_qps_unconstrained_2waysharedcq_%s' % secstr,
+          rpc_type='STREAMING',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_SERVER',
+          unconstrained_client='async',
+          secure=secure,
+          client_threads_per_cq=2, server_threads_per_cq=2,
+          categories=smoketest_categories+[SCALABLE])
+
+      yield _ping_pong_scenario(
+          'cpp_protobuf_async_unary_qps_unconstrained_1cq_%s' % secstr,
+          rpc_type='UNARY',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_SERVER',
+          unconstrained_client='async',
+          secure=secure,
+          client_threads_per_cq=1000000, server_threads_per_cq=1000000,
+          categories=smoketest_categories+[SCALABLE])
+
+      yield _ping_pong_scenario(
+          'cpp_protobuf_async_unary_qps_unconstrained_2waysharedcq_%s' % secstr,
+          rpc_type='UNARY',
+          client_type='ASYNC_CLIENT',
+          server_type='ASYNC_SERVER',
+          unconstrained_client='async',
+          secure=secure,
+          client_threads_per_cq=2, server_threads_per_cq=2,
+          categories=smoketest_categories+[SCALABLE])
+
      yield _ping_pong_scenario(
          'cpp_generic_async_streaming_qps_one_server_core_%s' % secstr,
          rpc_type='STREAMING',