More debug timers to record root cause

pull/17457/head
Muxi Yan 6 years ago
parent 3e79baf30d
commit d42c56788c
  1. 92
      src/core/lib/gpr/sync_posix.cc

@ -30,11 +30,16 @@
// For debug of the timer manager crash only.
// TODO (mxyan): remove after bug is fixed.
#ifdef GRPC_DEBUG_TIMER_MANAGER
#include <string.h>
void (*g_grpc_debug_timer_manager_stats)(
int64_t timer_manager_init_count, int64_t timer_manager_shutdown_count,
int64_t fork_count, int64_t timer_wait_err, int64_t timer_cv_value,
int64_t timer_mu_value, int64_t abstime_sec_value,
int64_t abstime_nsec_value) = nullptr;
int64_t abstime_nsec_value, int64_t abs_deadline_sec_value, int64_t abs_deadline_nsec_value, int64_t now1_sec_value,
int64_t now1_nsec_value, int64_t now2_sec_value,
int64_t now2_nsec_value, int64_t add_result_sec_value,
int64_t add_result_nsec_value, int64_t sub_result_sec_value,
int64_t sub_result_nsec_value) = nullptr;
int64_t g_timer_manager_init_count = 0;
int64_t g_timer_manager_shutdown_count = 0;
int64_t g_fork_count = 0;
@ -43,6 +48,16 @@ int64_t g_timer_cv_value = 0;
int64_t g_timer_mu_value = 0;
int64_t g_abstime_sec_value = -1;
int64_t g_abstime_nsec_value = -1;
int64_t g_abs_deadline_sec_value = -1;
int64_t g_abs_deadline_nsec_value = -1;
int64_t g_now1_sec_value = -1;
int64_t g_now1_nsec_value = -1;
int64_t g_now2_sec_value = -1;
int64_t g_now2_nsec_value = -1;
int64_t g_add_result_sec_value = -1;
int64_t g_add_result_nsec_value = -1;
int64_t g_sub_result_sec_value = -1;
int64_t g_sub_result_nsec_value = -1;
#endif // GRPC_DEBUG_TIMER_MANAGER
#ifdef GPR_LOW_LEVEL_COUNTERS
@ -90,17 +105,70 @@ void gpr_cv_init(gpr_cv* cv) {
void gpr_cv_destroy(gpr_cv* cv) { GPR_ASSERT(pthread_cond_destroy(cv) == 0); }
// For debug of the timer manager crash only.
// TODO (mxyan): remove after bug is fixed.
#ifdef GRPC_DEBUG_TIMER_MANAGER
static gpr_timespec gpr_convert_clock_type_debug_timespec(gpr_timespec t,
gpr_clock_type clock_type,
gpr_timespec &now1,
gpr_timespec &now2,
gpr_timespec &add_result,
gpr_timespec &sub_result) {
if (t.clock_type == clock_type) {
return t;
}
if (t.tv_sec == INT64_MAX || t.tv_sec == INT64_MIN) {
t.clock_type = clock_type;
return t;
}
if (clock_type == GPR_TIMESPAN) {
return gpr_time_sub(t, gpr_now(t.clock_type));
}
if (t.clock_type == GPR_TIMESPAN) {
return gpr_time_add(gpr_now(clock_type), t);
}
now1 = gpr_now(t.clock_type);
sub_result = gpr_time_sub(t, now1);
now2 = gpr_now(clock_type);
add_result = gpr_time_add(now2, sub_result);
return add_result;
}
#define gpr_convert_clock_type_debug(t, clock_type, now1, now2, add_result, sub_result) gpr_convert_clock_type_debug_timespec((t), (clock_type), (now1), (now2), (add_result), (sub_result))
#else
#define gpr_convert_clock_type_debug(t, clock_type, now1, now2, add_result, sub_result) gpr_convert_clock_type((t), (clock_type))
#endif
int gpr_cv_wait(gpr_cv* cv, gpr_mu* mu, gpr_timespec abs_deadline) {
int err = 0;
#ifdef GRPC_DEBUG_TIMER_MANAGER
// For debug of the timer manager crash only.
// TODO (mxyan): remove after bug is fixed.
gpr_timespec abs_deadline_copy;
abs_deadline_copy.tv_sec = abs_deadline.tv_sec;
abs_deadline_copy.tv_nsec = abs_deadline.tv_nsec;
gpr_timespec now1;
gpr_timespec now2;
gpr_timespec add_result;
gpr_timespec sub_result;
memset(&now1, 0, sizeof(now1));
memset(&now2, 0, sizeof(now2));
memset(&add_result, 0, sizeof(add_result));
memset(&sub_result, 0, sizeof(sub_result));
#endif
if (gpr_time_cmp(abs_deadline, gpr_inf_future(abs_deadline.clock_type)) ==
0) {
err = pthread_cond_wait(cv, mu);
} else {
struct timespec abs_deadline_ts;
#if GPR_LINUX
abs_deadline = gpr_convert_clock_type(abs_deadline, GPR_CLOCK_MONOTONIC);
abs_deadline = gpr_convert_clock_type_debug(abs_deadline, GPR_CLOCK_MONOTONIC, now1, now2, add_result, sub_result);
#else
abs_deadline = gpr_convert_clock_type(abs_deadline, GPR_CLOCK_REALTIME);
abs_deadline = gpr_convert_clock_type_debug(abs_deadline, GPR_CLOCK_REALTIME, now1, now2, add_result, sub_result);
#endif // GPR_LINUX
abs_deadline_ts.tv_sec = static_cast<time_t>(abs_deadline.tv_sec);
abs_deadline_ts.tv_nsec = abs_deadline.tv_nsec;
@ -123,11 +191,25 @@ int gpr_cv_wait(gpr_cv* cv, gpr_mu* mu, gpr_timespec abs_deadline) {
g_timer_wait_err = err;
g_timer_cv_value = (int64_t)cv;
g_timer_mu_value = (int64_t)mu;
g_abs_deadline_sec_value = abs_deadline_copy.tv_sec;
g_abs_deadline_nsec_value = abs_deadline_copy.tv_nsec;
g_now1_sec_value = now1.tv_sec;
g_now1_nsec_value = now1.tv_nsec;
g_now2_sec_value = now2.tv_sec;
g_now2_nsec_value = now2.tv_nsec;
g_add_result_sec_value = add_result.tv_sec;
g_add_result_nsec_value = add_result.tv_nsec;
g_sub_result_sec_value = sub_result.tv_sec;
g_sub_result_nsec_value = sub_result.tv_nsec;
g_grpc_debug_timer_manager_stats(
g_timer_manager_init_count, g_timer_manager_shutdown_count,
g_fork_count, g_timer_wait_err, g_timer_cv_value, g_timer_mu_value,
g_abstime_sec_value, g_abstime_nsec_value);
}
g_abstime_sec_value, g_abstime_nsec_value, g_abs_deadline_sec_value,
g_abs_deadline_nsec_value, g_now1_sec_value, g_now1_nsec_value,
g_now2_sec_value, g_now2_nsec_value, g_add_result_sec_value,
g_add_result_nsec_value, g_sub_result_sec_value,
g_sub_result_nsec_value);
}
}
#endif
GPR_ASSERT(err == 0 || err == ETIMEDOUT || err == EAGAIN);

Loading…
Cancel
Save