Add more counters in cq

pull/12808/head
Sree Kuchibhotla 8 years ago
parent 2f2175c207
commit 0d0fa06488
  1. 8
      src/core/lib/debug/stats_data.c
  2. 16
      src/core/lib/debug/stats_data.h
  3. 8
      src/core/lib/debug/stats_data.yaml
  4. 4
      src/core/lib/debug/stats_data_bq_schema.sql
  5. 17
      src/core/lib/surface/completion_queue.c
  6. 4
      tools/run_tests/performance/massage_qps_stats.py
  7. 24
      tools/run_tests/performance/scenario_result_schema.json

@ -111,7 +111,9 @@ const char *grpc_stats_counter_name[GRPC_STATS_COUNTER_COUNT] = {
"executor_push_retries", "executor_push_retries",
"server_requested_calls", "server_requested_calls",
"server_slowpath_requests_queued", "server_slowpath_requests_queued",
"cq_failed_queue_trylocks", "cq_ev_queue_trylock_failures",
"cq_ev_queue_trylock_successes",
"cq_ev_queue_transient_pop_failures",
}; };
const char *grpc_stats_counter_doc[GRPC_STATS_COUNTER_COUNT] = { const char *grpc_stats_counter_doc[GRPC_STATS_COUNTER_COUNT] = {
"Number of client side calls created by this process", "Number of client side calls created by this process",
@ -223,6 +225,10 @@ const char *grpc_stats_counter_doc[GRPC_STATS_COUNTER_COUNT] = {
"outstanding requests)", "outstanding requests)",
"Number of lock (trylock) acquisition failures on completion queue event " "Number of lock (trylock) acquisition failures on completion queue event "
"queue. High value here indicates high contention on completion queues", "queue. High value here indicates high contention on completion queues",
"Number of lock (trylock) acquisition successes on completion queue event "
"queue.",
"Number of times NULL was popped out of completion queue's event queue "
"even though the event queue was not empty",
}; };
const char *grpc_stats_histogram_name[GRPC_STATS_HISTOGRAM_COUNT] = { const char *grpc_stats_histogram_name[GRPC_STATS_HISTOGRAM_COUNT] = {
"call_initial_size", "call_initial_size",

@ -113,7 +113,9 @@ typedef enum {
GRPC_STATS_COUNTER_EXECUTOR_PUSH_RETRIES, GRPC_STATS_COUNTER_EXECUTOR_PUSH_RETRIES,
GRPC_STATS_COUNTER_SERVER_REQUESTED_CALLS, GRPC_STATS_COUNTER_SERVER_REQUESTED_CALLS,
GRPC_STATS_COUNTER_SERVER_SLOWPATH_REQUESTS_QUEUED, GRPC_STATS_COUNTER_SERVER_SLOWPATH_REQUESTS_QUEUED,
GRPC_STATS_COUNTER_CQ_FAILED_QUEUE_TRYLOCKS, GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRYLOCK_FAILURES,
GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRYLOCK_SUCCESSES,
GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRANSIENT_POP_FAILURES,
GRPC_STATS_COUNTER_COUNT GRPC_STATS_COUNTER_COUNT
} grpc_stats_counters; } grpc_stats_counters;
extern const char *grpc_stats_counter_name[GRPC_STATS_COUNTER_COUNT]; extern const char *grpc_stats_counter_name[GRPC_STATS_COUNTER_COUNT];
@ -418,9 +420,15 @@ typedef enum {
#define GRPC_STATS_INC_SERVER_SLOWPATH_REQUESTS_QUEUED(exec_ctx) \ #define GRPC_STATS_INC_SERVER_SLOWPATH_REQUESTS_QUEUED(exec_ctx) \
GRPC_STATS_INC_COUNTER((exec_ctx), \ GRPC_STATS_INC_COUNTER((exec_ctx), \
GRPC_STATS_COUNTER_SERVER_SLOWPATH_REQUESTS_QUEUED) GRPC_STATS_COUNTER_SERVER_SLOWPATH_REQUESTS_QUEUED)
#define GRPC_STATS_INC_CQ_FAILED_QUEUE_TRYLOCKS(exec_ctx) \ #define GRPC_STATS_INC_CQ_EV_QUEUE_TRYLOCK_FAILURES(exec_ctx) \
GRPC_STATS_INC_COUNTER((exec_ctx), \ GRPC_STATS_INC_COUNTER((exec_ctx), \
GRPC_STATS_COUNTER_CQ_FAILED_QUEUE_TRYLOCKS) GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRYLOCK_FAILURES)
#define GRPC_STATS_INC_CQ_EV_QUEUE_TRYLOCK_SUCCESSES(exec_ctx) \
GRPC_STATS_INC_COUNTER((exec_ctx), \
GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRYLOCK_SUCCESSES)
#define GRPC_STATS_INC_CQ_EV_QUEUE_TRANSIENT_POP_FAILURES(exec_ctx) \
GRPC_STATS_INC_COUNTER( \
(exec_ctx), GRPC_STATS_COUNTER_CQ_EV_QUEUE_TRANSIENT_POP_FAILURES)
#define GRPC_STATS_INC_CALL_INITIAL_SIZE(exec_ctx, value) \ #define GRPC_STATS_INC_CALL_INITIAL_SIZE(exec_ctx, value) \
grpc_stats_inc_call_initial_size((exec_ctx), (int)(value)) grpc_stats_inc_call_initial_size((exec_ctx), (int)(value))
void grpc_stats_inc_call_initial_size(grpc_exec_ctx *exec_ctx, int x); void grpc_stats_inc_call_initial_size(grpc_exec_ctx *exec_ctx, int x);

@ -271,6 +271,12 @@
doc: How many times was the server slow path taken (indicates too few doc: How many times was the server slow path taken (indicates too few
outstanding requests) outstanding requests)
# cq # cq
- counter: cq_failed_queue_trylocks - counter: cq_ev_queue_trylock_failures
doc: Number of lock (trylock) acquisition failures on completion queue event doc: Number of lock (trylock) acquisition failures on completion queue event
queue. High value here indicates high contention on completion queues queue. High value here indicates high contention on completion queues
- counter: cq_ev_queue_trylock_successes
doc: Number of lock (trylock) acquisition successes on completion queue event
queue.
- counter: cq_ev_queue_transient_pop_failures
doc: Number of times NULL was popped out of completion queue's event queue
even though the event queue was not empty

@ -86,4 +86,6 @@ executor_queue_drained_per_iteration:FLOAT,
executor_push_retries_per_iteration:FLOAT, executor_push_retries_per_iteration:FLOAT,
server_requested_calls_per_iteration:FLOAT, server_requested_calls_per_iteration:FLOAT,
server_slowpath_requests_queued_per_iteration:FLOAT, server_slowpath_requests_queued_per_iteration:FLOAT,
cq_failed_queue_trylocks_per_iteration:FLOAT cq_ev_queue_trylock_failures_per_iteration:FLOAT,
cq_ev_queue_trylock_successes_per_iteration:FLOAT,
cq_ev_queue_transient_pop_failures_per_iteration:FLOAT

@ -375,15 +375,24 @@ static bool cq_event_queue_push(grpc_cq_event_queue *q, grpc_cq_completion *c) {
static grpc_cq_completion *cq_event_queue_pop(grpc_cq_event_queue *q) { static grpc_cq_completion *cq_event_queue_pop(grpc_cq_event_queue *q) {
grpc_cq_completion *c = NULL; grpc_cq_completion *c = NULL;
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT;
if (gpr_spinlock_trylock(&q->queue_lock)) { if (gpr_spinlock_trylock(&q->queue_lock)) {
c = (grpc_cq_completion *)gpr_mpscq_pop(&q->queue); GRPC_STATS_INC_CQ_EV_QUEUE_TRYLOCK_SUCCESSES(&exec_ctx);
bool is_empty = false;
c = (grpc_cq_completion *)gpr_mpscq_pop_and_check_end(&q->queue, &is_empty);
gpr_spinlock_unlock(&q->queue_lock); gpr_spinlock_unlock(&q->queue_lock);
if (c == NULL && !is_empty) {
GRPC_STATS_INC_CQ_EV_QUEUE_TRANSIENT_POP_FAILURES(&exec_ctx);
}
} else { } else {
grpc_exec_ctx exec_ctx = GRPC_EXEC_CTX_INIT; GRPC_STATS_INC_CQ_EV_QUEUE_TRYLOCK_FAILURES(&exec_ctx);
GRPC_STATS_INC_CQ_FAILED_QUEUE_TRYLOCKS(&exec_ctx);
grpc_exec_ctx_finish(&exec_ctx);
} }
grpc_exec_ctx_finish(&exec_ctx);
if (c) { if (c) {
gpr_atm_no_barrier_fetch_add(&q->num_queue_items, -1); gpr_atm_no_barrier_fetch_add(&q->num_queue_items, -1);
} }

@ -108,7 +108,9 @@ def massage_qps_stats(scenario_result):
stats["core_executor_push_retries"] = massage_qps_stats_helpers.counter(core_stats, "executor_push_retries") stats["core_executor_push_retries"] = massage_qps_stats_helpers.counter(core_stats, "executor_push_retries")
stats["core_server_requested_calls"] = massage_qps_stats_helpers.counter(core_stats, "server_requested_calls") stats["core_server_requested_calls"] = massage_qps_stats_helpers.counter(core_stats, "server_requested_calls")
stats["core_server_slowpath_requests_queued"] = massage_qps_stats_helpers.counter(core_stats, "server_slowpath_requests_queued") stats["core_server_slowpath_requests_queued"] = massage_qps_stats_helpers.counter(core_stats, "server_slowpath_requests_queued")
stats["core_cq_failed_queue_trylocks"] = massage_qps_stats_helpers.counter(core_stats, "cq_failed_queue_trylocks") stats["core_cq_ev_queue_trylock_failures"] = massage_qps_stats_helpers.counter(core_stats, "cq_ev_queue_trylock_failures")
stats["core_cq_ev_queue_trylock_successes"] = massage_qps_stats_helpers.counter(core_stats, "cq_ev_queue_trylock_successes")
stats["core_cq_ev_queue_transient_pop_failures"] = massage_qps_stats_helpers.counter(core_stats, "cq_ev_queue_transient_pop_failures")
h = massage_qps_stats_helpers.histogram(core_stats, "call_initial_size") h = massage_qps_stats_helpers.histogram(core_stats, "call_initial_size")
stats["core_call_initial_size"] = ",".join("%f" % x for x in h.buckets) stats["core_call_initial_size"] = ",".join("%f" % x for x in h.buckets)
stats["core_call_initial_size_bkts"] = ",".join("%f" % x for x in h.boundaries) stats["core_call_initial_size_bkts"] = ",".join("%f" % x for x in h.boundaries)

@ -552,7 +552,17 @@
}, },
{ {
"mode": "NULLABLE", "mode": "NULLABLE",
"name": "core_cq_failed_queue_trylocks", "name": "core_cq_ev_queue_trylock_failures",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "core_cq_ev_queue_trylock_successes",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "core_cq_ev_queue_transient_pop_failures",
"type": "INTEGER" "type": "INTEGER"
}, },
{ {
@ -1349,7 +1359,17 @@
}, },
{ {
"mode": "NULLABLE", "mode": "NULLABLE",
"name": "core_cq_failed_queue_trylocks", "name": "core_cq_ev_queue_trylock_failures",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "core_cq_ev_queue_trylock_successes",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "core_cq_ev_queue_transient_pop_failures",
"type": "INTEGER" "type": "INTEGER"
}, },
{ {

Loading…
Cancel
Save