Merge pull request #1460 from ctiller/fast-timer

25ns overhead latency timers
pull/1432/head^2
Vijay Pai 10 years ago
commit 520ecb18f5
  1. 95
      src/core/profiling/basic_timers.c
  2. 31
      src/core/profiling/timers.h
  3. 23
      src/core/profiling/timers_preciseclock.h

@ -45,15 +45,10 @@
#include <grpc/support/thd.h>
#include <stdio.h>
typedef enum {
BEGIN = '{',
END = '}',
MARK = '.'
} marker_type;
typedef enum { BEGIN = '{', END = '}', MARK = '.' } marker_type;
typedef struct grpc_timer_entry {
grpc_precise_clock tm;
gpr_thd_id thd;
int tag;
marker_type type;
void* id;
@ -61,71 +56,35 @@ typedef struct grpc_timer_entry {
int line;
} grpc_timer_entry;
struct grpc_timers_log {
gpr_mu mu;
grpc_timer_entry* log;
int num_entries;
int capacity;
int capacity_limit;
FILE* fp;
};
#define MAX_COUNT (1024 * 1024 / sizeof(grpc_timer_entry))
grpc_timers_log* grpc_timers_log_global = NULL;
static __thread grpc_timer_entry log[MAX_COUNT];
static __thread int count;
static grpc_timers_log* grpc_timers_log_create(int capacity_limit, FILE* dump) {
grpc_timers_log* log = gpr_malloc(sizeof(*log));
/* TODO (vpai): Allow allocation below limit */
log->log = gpr_malloc(capacity_limit * sizeof(*log->log));
/* TODO (vpai): Improve concurrency, do per-thread logging? */
gpr_mu_init(&log->mu);
log->num_entries = 0;
log->capacity = log->capacity_limit = capacity_limit;
log->fp = dump;
return log;
}
static void log_report_locked(grpc_timers_log* log) {
FILE* fp = log->fp;
static void log_report() {
int i;
for (i = 0; i < log->num_entries; i++) {
grpc_timer_entry* entry = &(log->log[i]);
fprintf(fp, "GRPC_LAT_PROF ");
grpc_precise_clock_print(&entry->tm, fp);
fprintf(fp, " %p %c %d %p %s %d\n", (void*)(gpr_intptr)entry->thd, entry->type, entry->tag,
entry->id, entry->file, entry->line);
for (i = 0; i < count; i++) {
grpc_timer_entry* entry = &(log[i]);
printf("GRPC_LAT_PROF " GRPC_PRECISE_CLOCK_FORMAT " %p %c %d %p %s %d\n",
GRPC_PRECISE_CLOCK_PRINTF_ARGS(&entry->tm),
(void*)(gpr_intptr)gpr_thd_currentid(), entry->type, entry->tag,
entry->id, entry->file, entry->line);
}
/* Now clear out the log */
log->num_entries = 0;
count = 0;
}
static void grpc_timers_log_destroy(grpc_timers_log* log) {
gpr_mu_lock(&log->mu);
log_report_locked(log);
gpr_mu_unlock(&log->mu);
gpr_free(log->log);
gpr_mu_destroy(&log->mu);
gpr_free(log);
}
static void grpc_timers_log_add(grpc_timers_log* log, int tag, marker_type type, void* id,
static void grpc_timers_log_add(int tag, marker_type type, void* id,
const char* file, int line) {
grpc_timer_entry* entry;
/* TODO (vpai) : Improve concurrency */
gpr_mu_lock(&log->mu);
if (log->num_entries == log->capacity_limit) {
log_report_locked(log);
if (count == MAX_COUNT) {
log_report();
}
entry = &log->log[log->num_entries++];
entry = &log[count++];
grpc_precise_clock_now(&entry->tm);
entry->tag = tag;
@ -133,39 +92,31 @@ static void grpc_timers_log_add(grpc_timers_log* log, int tag, marker_type type,
entry->id = id;
entry->file = file;
entry->line = line;
entry->thd = gpr_thd_currentid();
gpr_mu_unlock(&log->mu);
}
/* Latency profiler API implementation. */
void grpc_timer_add_mark(int tag, void* id, const char* file, int line) {
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {
grpc_timers_log_add(grpc_timers_log_global, tag, MARK, id, file, line);
grpc_timers_log_add(tag, MARK, id, file, line);
}
}
void grpc_timer_begin(int tag, void* id, const char *file, int line) {
void grpc_timer_begin(int tag, void* id, const char* file, int line) {
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {
grpc_timers_log_add(grpc_timers_log_global, tag, BEGIN, id, file, line);
grpc_timers_log_add(tag, BEGIN, id, file, line);
}
}
void grpc_timer_end(int tag, void* id, const char *file, int line) {
void grpc_timer_end(int tag, void* id, const char* file, int line) {
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) {
grpc_timers_log_add(grpc_timers_log_global, tag, END, id, file, line);
grpc_timers_log_add(tag, END, id, file, line);
}
}
/* Basic profiler specific API functions. */
void grpc_timers_global_init(void) {
grpc_timers_log_global = grpc_timers_log_create(100000, stdout);
}
void grpc_timers_global_destroy(void) {
grpc_timers_log_destroy(grpc_timers_log_global);
}
void grpc_timers_global_init(void) {}
void grpc_timers_global_destroy(void) {}
#else /* !GRPC_BASIC_PROFILER */
void grpc_timers_global_init(void) {}

@ -41,9 +41,9 @@ extern "C" {
void grpc_timers_global_init(void);
void grpc_timers_global_destroy(void);
void grpc_timer_add_mark(int tag, void* id, const char *file, int line);
void grpc_timer_begin(int tag, void* id, const char *file, int line);
void grpc_timer_end(int tag, void* id, const char *file, int line);
void grpc_timer_add_mark(int tag, void *id, const char *file, int line);
void grpc_timer_begin(int tag, void *id, const char *file, int line);
void grpc_timer_end(int tag, void *id, const char *file, int line);
enum grpc_profiling_tags {
/* Any GRPC_PTAG_* >= than the threshold won't generate any profiling mark. */
@ -74,13 +74,16 @@ enum grpc_profiling_tags {
#if !(defined(GRPC_STAP_PROFILER) + defined(GRPC_BASIC_PROFILER))
/* No profiling. No-op all the things. */
#define GRPC_TIMER_MARK(tag, id) \
do {} while(0)
do { \
} while (0)
#define GRPC_TIMER_BEGIN(tag, id) \
do {} while(0)
do { \
} while (0)
#define GRPC_TIMER_END(tag, id) \
do {} while(0)
do { \
} while (0)
#else /* at least one profiler requested... */
/* ... hopefully only one. */
@ -94,14 +97,14 @@ enum grpc_profiling_tags {
grpc_timer_add_mark(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
}
#define GRPC_TIMER_BEGIN(tag, id) \
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) { \
grpc_timer_begin(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
#define GRPC_TIMER_BEGIN(tag, id) \
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) { \
grpc_timer_begin(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
}
#define GRPC_TIMER_END(tag, id) \
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) { \
grpc_timer_end(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
#define GRPC_TIMER_END(tag, id) \
if (tag < GRPC_PTAG_IGNORE_THRESHOLD) { \
grpc_timer_end(tag, ((void *)(gpr_intptr)(id)), __FILE__, __LINE__); \
}
#ifdef GRPC_STAP_PROFILER
@ -109,9 +112,7 @@ enum grpc_profiling_tags {
#endif /* GRPC_STAP_PROFILER */
#ifdef GRPC_BASIC_PROFILER
typedef struct grpc_timers_log grpc_timers_log;
extern grpc_timers_log *grpc_timers_log_global;
/* Empty placeholder for now. */
#endif /* GRPC_BASIC_PROFILER */
#endif /* at least one profiler requested. */

@ -43,7 +43,7 @@ typedef long long int grpc_precise_clock;
#if defined(__i386__)
static void grpc_precise_clock_now(grpc_precise_clock *clk) {
grpc_precise_clock ret;
__asm__ volatile("rdtsc" : "=A" (ret) );
__asm__ volatile("rdtsc" : "=A"(ret));
*clk = ret;
}
@ -51,7 +51,7 @@ static void grpc_precise_clock_now(grpc_precise_clock *clk) {
#elif defined(__x86_64__) || defined(__amd64__)
static void grpc_precise_clock_now(grpc_precise_clock *clk) {
unsigned long long low, high;
__asm__ volatile("rdtsc" : "=a" (low), "=d" (high));
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
*clk = (high << 32) | low;
}
#endif
@ -61,19 +61,21 @@ static void grpc_precise_clock_init() {
time_t start = time(NULL);
grpc_precise_clock start_time;
grpc_precise_clock end_time;
while (time(NULL) == start);
while (time(NULL) == start)
;
grpc_precise_clock_now(&start_time);
while (time(NULL) == start+1);
while (time(NULL) == start + 1)
;
grpc_precise_clock_now(&end_time);
cycles_per_second = end_time - start_time;
}
static double grpc_precise_clock_scaling_factor() {
gpr_once_init(&precise_clock_init, grpc_precise_clock_init);
return 1e6 / cycles_per_second;
}
static void grpc_precise_clock_print(const grpc_precise_clock* clk, FILE* fp) {
fprintf(fp, "%f", *clk * grpc_precise_clock_scaling_factor());
gpr_once_init(&precise_clock_init, grpc_precise_clock_init);
return 1e6 / cycles_per_second;
}
#define GRPC_PRECISE_CLOCK_FORMAT "%f"
#define GRPC_PRECISE_CLOCK_PRINTF_ARGS(clk) \
(*(clk)*grpc_precise_clock_scaling_factor())
#else
typedef struct grpc_precise_clock grpc_precise_clock;
struct grpc_precise_clock {
@ -82,6 +84,9 @@ struct grpc_precise_clock {
static void grpc_precise_clock_now(grpc_precise_clock* clk) {
clk->clock = gpr_now();
}
#define GRPC_PRECISE_CLOCK_FORMAT "%ld.%09d"
#define GRPC_PRECISE_CLOCK_PRINTF_ARGS(clk) \
(clk)->clock.tv_sec, (clk)->clock.tv_nsec
static void grpc_precise_clock_print(const grpc_precise_clock* clk, FILE* fp) {
fprintf(fp, "%ld.%09d", clk->clock.tv_sec, clk->clock.tv_nsec);
}

Loading…
Cancel
Save