|
|
@ -63,7 +63,7 @@ |
|
|
|
// a keepalive ping timeout issue. We may want to revert https://github
|
|
|
|
// a keepalive ping timeout issue. We may want to revert https://github
|
|
|
|
// .com/grpc/grpc/pull/14943 once we figure out the root cause.
|
|
|
|
// .com/grpc/grpc/pull/14943 once we figure out the root cause.
|
|
|
|
#define MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL 16 |
|
|
|
#define MAX_EPOLL_EVENTS_HANDLED_EACH_POLL_CALL 16 |
|
|
|
#define MAX_PROBE_EPOLL_FDS 32 |
|
|
|
#define MAX_FDS_IN_CACHE 32 |
|
|
|
|
|
|
|
|
|
|
|
grpc_core::DebugOnlyTraceFlag grpc_trace_pollable_refcount(false, |
|
|
|
grpc_core::DebugOnlyTraceFlag grpc_trace_pollable_refcount(false, |
|
|
|
"pollable_refcount"); |
|
|
|
"pollable_refcount"); |
|
|
@ -77,8 +77,14 @@ typedef enum { PO_MULTI, PO_FD, PO_EMPTY } pollable_type; |
|
|
|
typedef struct pollable pollable; |
|
|
|
typedef struct pollable pollable; |
|
|
|
|
|
|
|
|
|
|
|
typedef struct cached_fd { |
|
|
|
typedef struct cached_fd { |
|
|
|
|
|
|
|
// Set to the grpc_fd's salt value. See 'salt' variable' in grpc_fd for more
|
|
|
|
|
|
|
|
// details
|
|
|
|
intptr_t salt; |
|
|
|
intptr_t salt; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The underlying fd
|
|
|
|
int fd; |
|
|
|
int fd; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// A recency time counter that helps to determine the LRU fd in the cache
|
|
|
|
uint64_t last_used; |
|
|
|
uint64_t last_used; |
|
|
|
} cached_fd; |
|
|
|
} cached_fd; |
|
|
|
|
|
|
|
|
|
|
@ -111,10 +117,32 @@ struct pollable { |
|
|
|
int event_count; |
|
|
|
int event_count; |
|
|
|
struct epoll_event events[MAX_EPOLL_EVENTS]; |
|
|
|
struct epoll_event events[MAX_EPOLL_EVENTS]; |
|
|
|
|
|
|
|
|
|
|
|
// Maintain a LRU-eviction cache of fds in this pollable
|
|
|
|
// We may be calling pollable_add_fd() on the same (pollable, fd) multiple
|
|
|
|
cached_fd fd_cache[MAX_PROBE_EPOLL_FDS]; |
|
|
|
// times. To prevent pollable_add_fd() from making multiple sys calls to
|
|
|
|
|
|
|
|
// epoll_ctl() to add the fd, we maintain a cache of what fds are already
|
|
|
|
|
|
|
|
// present in the underlying epoll-set.
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// Since this is not a correctness issue, we do not need to maintain all the
|
|
|
|
|
|
|
|
// fds in the cache. Hence we just use an LRU cache of size 'MAX_FDS_IN_CACHE'
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// NOTE: An ideal implementation of this should do the following:
|
|
|
|
|
|
|
|
// 1) Add fds to the cache in pollable_add_fd() function (i.e whenever the fd
|
|
|
|
|
|
|
|
// is added to the pollable's epoll set)
|
|
|
|
|
|
|
|
// 2) Remove the fd from the cache whenever the fd is removed from the
|
|
|
|
|
|
|
|
// underlying epoll set (i.e whenever fd_orphan() is called).
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// Implementing (2) above (i.e removing fds from cache on fd_orphan) adds a
|
|
|
|
|
|
|
|
// lot of complexity since an fd can be present in multiple pollalbles. So our
|
|
|
|
|
|
|
|
// implementation ONLY DOES (1) and NOT (2).
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// The cache_fd.salt variable helps here to maintain correctness (it serves as
|
|
|
|
|
|
|
|
// an epoch that differentiates one grpc_fd from the other even though both of
|
|
|
|
|
|
|
|
// them may have the same fd number)
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// The following implements LRU-eviction cache of fds in this pollable
|
|
|
|
|
|
|
|
cached_fd fd_cache[MAX_FDS_IN_CACHE]; |
|
|
|
int fd_cache_size; |
|
|
|
int fd_cache_size; |
|
|
|
uint64_t fd_cache_counter; |
|
|
|
uint64_t fd_cache_counter; // Recency timer tick counter
|
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
static const char* pollable_type_string(pollable_type t) { |
|
|
|
static const char* pollable_type_string(pollable_type t) { |
|
|
@ -157,15 +185,24 @@ static void pollable_unref(pollable* p, int line, const char* reason); |
|
|
|
* Fd Declarations |
|
|
|
* Fd Declarations |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Monotonically increasing Epoch counter that is assinged to each grpc_fd. See
|
|
|
|
|
|
|
|
// the description of 'salt' variable in 'grpc_fd' for more details
|
|
|
|
|
|
|
|
// TODO: (sreek/kpayson) gpr_atm is intptr_t which may not be wide-enough on
|
|
|
|
|
|
|
|
// 32-bit systems. Change this to int_64 - atleast on 32-bit systems
|
|
|
|
static gpr_atm g_fd_salt; |
|
|
|
static gpr_atm g_fd_salt; |
|
|
|
|
|
|
|
|
|
|
|
struct grpc_fd { |
|
|
|
struct grpc_fd { |
|
|
|
int fd; |
|
|
|
int fd; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Since fd numbers can be reused (after old fds are closed), this serves as
|
|
|
|
|
|
|
|
// an epoch that uniquely identifies this fd (i.e the pair (salt, fd) is
|
|
|
|
|
|
|
|
// unique (until the salt counter (i.e g_fd_salt) overflows)
|
|
|
|
intptr_t salt; |
|
|
|
intptr_t salt; |
|
|
|
/* refst format:
|
|
|
|
|
|
|
|
bit 0 : 1=Active / 0=Orphaned |
|
|
|
// refst format:
|
|
|
|
bits 1-n : refcount |
|
|
|
// bit 0 : 1=Active / 0=Orphaned
|
|
|
|
Ref/Unref by two to avoid altering the orphaned bit */ |
|
|
|
// bits 1-n : refcount
|
|
|
|
|
|
|
|
// Ref/Unref by two to avoid altering the orphaned bit
|
|
|
|
gpr_atm refst; |
|
|
|
gpr_atm refst; |
|
|
|
|
|
|
|
|
|
|
|
gpr_mu orphan_mu; |
|
|
|
gpr_mu orphan_mu; |
|
|
@ -180,13 +217,13 @@ struct grpc_fd { |
|
|
|
struct grpc_fd* freelist_next; |
|
|
|
struct grpc_fd* freelist_next; |
|
|
|
grpc_closure* on_done_closure; |
|
|
|
grpc_closure* on_done_closure; |
|
|
|
|
|
|
|
|
|
|
|
/* The pollset that last noticed that the fd is readable. The actual type
|
|
|
|
// The pollset that last noticed that the fd is readable. The actual type
|
|
|
|
* stored in this is (grpc_pollset *) */ |
|
|
|
// stored in this is (grpc_pollset *)
|
|
|
|
gpr_atm read_notifier_pollset; |
|
|
|
gpr_atm read_notifier_pollset; |
|
|
|
|
|
|
|
|
|
|
|
grpc_iomgr_object iomgr_object; |
|
|
|
grpc_iomgr_object iomgr_object; |
|
|
|
|
|
|
|
|
|
|
|
/* Do we need to track EPOLLERR events separately? */ |
|
|
|
// Do we need to track EPOLLERR events separately?
|
|
|
|
bool track_err; |
|
|
|
bool track_err; |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
@ -562,6 +599,7 @@ static grpc_error* pollable_add_fd(pollable* p, grpc_fd* fd) { |
|
|
|
const int epfd = p->epfd; |
|
|
|
const int epfd = p->epfd; |
|
|
|
gpr_mu_lock(&p->mu); |
|
|
|
gpr_mu_lock(&p->mu); |
|
|
|
p->fd_cache_counter++; |
|
|
|
p->fd_cache_counter++; |
|
|
|
|
|
|
|
|
|
|
|
// Handle the case of overflow for our cache counter by
|
|
|
|
// Handle the case of overflow for our cache counter by
|
|
|
|
// reseting the recency-counter on all cache objects
|
|
|
|
// reseting the recency-counter on all cache objects
|
|
|
|
if (p->fd_cache_counter == 0) { |
|
|
|
if (p->fd_cache_counter == 0) { |
|
|
@ -581,8 +619,9 @@ static grpc_error* pollable_add_fd(pollable* p, grpc_fd* fd) { |
|
|
|
lru_idx = i; |
|
|
|
lru_idx = i; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Add to cache
|
|
|
|
// Add to cache
|
|
|
|
if (p->fd_cache_size < MAX_PROBE_EPOLL_FDS) { |
|
|
|
if (p->fd_cache_size < MAX_FDS_IN_CACHE) { |
|
|
|
lru_idx = p->fd_cache_size; |
|
|
|
lru_idx = p->fd_cache_size; |
|
|
|
p->fd_cache_size++; |
|
|
|
p->fd_cache_size++; |
|
|
|
} |
|
|
|
} |
|
|
@ -590,6 +629,7 @@ static grpc_error* pollable_add_fd(pollable* p, grpc_fd* fd) { |
|
|
|
p->fd_cache[lru_idx].salt = fd->salt; |
|
|
|
p->fd_cache[lru_idx].salt = fd->salt; |
|
|
|
p->fd_cache[lru_idx].last_used = p->fd_cache_counter; |
|
|
|
p->fd_cache[lru_idx].last_used = p->fd_cache_counter; |
|
|
|
gpr_mu_unlock(&p->mu); |
|
|
|
gpr_mu_unlock(&p->mu); |
|
|
|
|
|
|
|
|
|
|
|
if (grpc_polling_trace.enabled()) { |
|
|
|
if (grpc_polling_trace.enabled()) { |
|
|
|
gpr_log(GPR_INFO, "add fd %p (%d) to pollable %p", fd, fd->fd, p); |
|
|
|
gpr_log(GPR_INFO, "add fd %p (%d) to pollable %p", fd, fd->fd, p); |
|
|
|
} |
|
|
|
} |
|
|
|