Merge pull request #20597 from nanahpang/benchmark-correctness

Correct the benchmark calculation and add comments to clarify the meaning of each field in ScenarioResultSummary
5 years ago · 3240333334
parent 6950e15882 a7b4d4588e
commit 3240333334
2 changed files with 49 additions and 28 deletions
--- a/src/proto/grpc/testing/control.proto
+++ b/src/proto/grpc/testing/control.proto
@ -220,17 +220,22 @@ message Scenarios {
 // once the scenario has finished.
 message ScenarioResultSummary
 {
-  // Total number of operations per second over all clients.
+  // Total number of operations per second over all clients. What is counted as 1 'operation' depends on the benchmark scenarios:
+  // For unary benchmarks, an operation is processing of a single unary RPC. 
+  // For streaming benchmarks, an operation is processing of a single ping pong of request and response. 
  double qps = 1;
-  // QPS per one server core.
+  // QPS per server core.
  double qps_per_server_core = 2;
-  // server load based on system_time (0.85 => 85%)
+  // The total server cpu load based on system time across all server processes, expressed as percentage of a single cpu core.
+  // For example, 85 implies 85% of a cpu core, 125 implies 125% of a cpu core. Since we are accumulating the cpu load across all the server 
+  // processes, the value could > 100 when there are multiple servers or a single server using multiple threads and cores. 
+  // Same explanation for the total client cpu load below.
  double server_system_time = 3;
-  // server load based on user_time (0.85 => 85%)
+  // The total server cpu load based on user time across all server processes, expressed as percentage of a single cpu core. (85 => 85%, 125 => 125%)
  double server_user_time = 4;
-  // client load based on system_time (0.85 => 85%)
+  // The total client cpu load based on system time across all client processes, expressed as percentage of a single cpu core. (85 => 85%, 125 => 125%)
  double client_system_time = 5;
-  // client load based on user_time (0.85 => 85%)
+  // The total client cpu load based on user time across all client processes, expressed as percentage of a single cpu core. (85 => 85%, 125 => 125%)
  double client_user_time = 6;

  // X% latency percentiles (in nanoseconds)
--- a/test/cpp/qps/driver.cc
+++ b/test/cpp/qps/driver.cc
@ -134,36 +134,46 @@ static bool IsSuccess(const Status& s) {

 // Postprocess ScenarioResult and populate result summary.
 static void postprocess_scenario_result(ScenarioResult* result) {
+  // Get latencies from ScenarioResult latencies histogram and populate to
+  // result summary.
  Histogram histogram;
  histogram.MergeProto(result->latencies());
-
-  auto time_estimate = average(result->client_stats(), WallTime);
-  auto qps = histogram.Count() / time_estimate;
-  auto qps_per_server_core = qps / sum(result->server_cores(), Cores);
-
-  result->mutable_summary()->set_qps(qps);
-  result->mutable_summary()->set_qps_per_server_core(qps_per_server_core);
  result->mutable_summary()->set_latency_50(histogram.Percentile(50));
  result->mutable_summary()->set_latency_90(histogram.Percentile(90));
  result->mutable_summary()->set_latency_95(histogram.Percentile(95));
  result->mutable_summary()->set_latency_99(histogram.Percentile(99));
  result->mutable_summary()->set_latency_999(histogram.Percentile(99.9));

-  auto server_system_time = 100.0 *
-                            sum(result->server_stats(), ServerSystemTime) /
-                            sum(result->server_stats(), ServerWallTime);
-  auto server_user_time = 100.0 * sum(result->server_stats(), ServerUserTime) /
-                          sum(result->server_stats(), ServerWallTime);
-
-  auto client_system_time = 100.0 * sum(result->client_stats(), SystemTime) /
-                            sum(result->client_stats(), WallTime);
-  auto client_user_time = 100.0 * sum(result->client_stats(), UserTime) /
-                          sum(result->client_stats(), WallTime);
-
-  result->mutable_summary()->set_server_system_time(server_system_time);
-  result->mutable_summary()->set_server_user_time(server_user_time);
-  result->mutable_summary()->set_client_system_time(client_system_time);
-  result->mutable_summary()->set_client_user_time(client_user_time);
+  // Calculate qps and cpu load for each client and then aggregate results for
+  // all clients
+  double qps = 0;
+  double client_system_cpu_load = 0, client_user_cpu_load = 0;
+  for (size_t i = 0; i < result->client_stats_size(); i++) {
+    auto client_stat = result->client_stats(i);
+    qps += client_stat.latencies().count() / client_stat.time_elapsed();
+    client_system_cpu_load +=
+        client_stat.time_system() / client_stat.time_elapsed();
+    client_user_cpu_load +=
+        client_stat.time_user() / client_stat.time_elapsed();
+  }
+  // Calculate cpu load for each server and then aggregate results for all
+  // servers
+  double server_system_cpu_load = 0, server_user_cpu_load = 0;
+  for (size_t i = 0; i < result->server_stats_size(); i++) {
+    auto server_stat = result->server_stats(i);
+    server_system_cpu_load +=
+        server_stat.time_system() / server_stat.time_elapsed();
+    server_user_cpu_load +=
+        server_stat.time_user() / server_stat.time_elapsed();
+  }
+  result->mutable_summary()->set_qps(qps);
+  // Populate the percentage of cpu load to result summary.
+  result->mutable_summary()->set_server_system_time(100 *
+                                                    server_system_cpu_load);
+  result->mutable_summary()->set_server_user_time(100 * server_user_cpu_load);
+  result->mutable_summary()->set_client_system_time(100 *
+                                                    client_system_cpu_load);
+  result->mutable_summary()->set_client_user_time(100 * client_user_cpu_load);

  // For Non-linux platform, get_cpu_usage() is not implemented. Thus,
  // ServerTotalCpuTime and ServerIdleCpuTime are both 0.
@ -176,6 +186,9 @@ static void postprocess_scenario_result(ScenarioResult* result) {
    result->mutable_summary()->set_server_cpu_usage(server_cpu_usage);
  }

+  // Calculate and populate successful request per second and failed requests
+  // per seconds to result summary.
+  auto time_estimate = average(result->client_stats(), WallTime);
  if (result->request_results_size() > 0) {
    int64_t successes = 0;
    int64_t failures = 0;
@ -193,6 +206,9 @@ static void postprocess_scenario_result(ScenarioResult* result) {
                                                              time_estimate);
  }

+  // Fill in data for other metrics required in result summary
+  auto qps_per_server_core = qps / sum(result->server_cores(), Cores);
+  result->mutable_summary()->set_qps_per_server_core(qps_per_server_core);
  result->mutable_summary()->set_client_polls_per_request(
      sum(result->client_stats(), CliPollCount) / histogram.Count());
  result->mutable_summary()->set_server_polls_per_request(