Collect OPT_STATS along with tx timestamps

pull/17757/head
Yash Tibrewal 6 years ago
parent 52e0153189
commit 23061cdfc2
  1. 123
      src/core/lib/iomgr/buffer_list.cc
  2. 81
      src/core/lib/iomgr/buffer_list.h
  3. 35
      src/core/lib/iomgr/internal_errqueue.h
  4. 24
      src/core/lib/iomgr/tcp_posix.cc
  5. 23
      test/core/iomgr/buffer_list_test.cc

@ -24,6 +24,7 @@
#include <grpc/support/log.h> #include <grpc/support/log.h>
#ifdef GRPC_LINUX_ERRQUEUE #ifdef GRPC_LINUX_ERRQUEUE
#include <string.h>
#include <time.h> #include <time.h>
#include "src/core/lib/gprpp/memory.h" #include "src/core/lib/gprpp/memory.h"
@ -34,10 +35,10 @@ void TracedBuffer::AddNewEntry(TracedBuffer** head, uint32_t seq_no,
GPR_DEBUG_ASSERT(head != nullptr); GPR_DEBUG_ASSERT(head != nullptr);
TracedBuffer* new_elem = New<TracedBuffer>(seq_no, arg); TracedBuffer* new_elem = New<TracedBuffer>(seq_no, arg);
/* Store the current time as the sendmsg time. */ /* Store the current time as the sendmsg time. */
new_elem->ts_.sendmsg_time = gpr_now(GPR_CLOCK_REALTIME); new_elem->ts_.sendmsg_time.time = gpr_now(GPR_CLOCK_REALTIME);
new_elem->ts_.scheduled_time = gpr_inf_past(GPR_CLOCK_REALTIME); new_elem->ts_.scheduled_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
new_elem->ts_.sent_time = gpr_inf_past(GPR_CLOCK_REALTIME); new_elem->ts_.sent_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
new_elem->ts_.acked_time = gpr_inf_past(GPR_CLOCK_REALTIME); new_elem->ts_.acked_time.time = gpr_inf_past(GPR_CLOCK_REALTIME);
if (*head == nullptr) { if (*head == nullptr) {
*head = new_elem; *head = new_elem;
return; return;
@ -68,10 +69,114 @@ void default_timestamps_callback(void* arg, grpc_core::Timestamps* ts,
void (*timestamps_callback)(void*, grpc_core::Timestamps*, void (*timestamps_callback)(void*, grpc_core::Timestamps*,
grpc_error* shutdown_err) = grpc_error* shutdown_err) =
default_timestamps_callback; default_timestamps_callback;
/* Used to extract individual opt stats from cmsg, so as to avoid troubles with
* unaligned reads */
template <typename T>
T read_unaligned(const void* ptr) {
T val;
memcpy(&val, ptr, sizeof(val));
return val;
}
/** Adds opt stats statistics from the given control message to the connection
* metrics. */
void ExtractOptStats(ConnectionMetrics* conn_metrics,
const cmsghdr* opt_stats) {
if (opt_stats == nullptr) {
return;
}
const auto* data = CMSG_DATA(opt_stats);
constexpr int64_t cmsg_hdr_len = CMSG_ALIGN(sizeof(struct cmsghdr));
const int64_t len = opt_stats->cmsg_len - cmsg_hdr_len;
int64_t offset = 0;
while (offset < len) {
const auto* attr = reinterpret_cast<const nlattr*>(data + offset);
const void* val = data + offset + NLA_HDRLEN;
switch (attr->nla_type) {
case TCP_NLA_BUSY: {
conn_metrics->busy_usec.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_RWND_LIMITED: {
conn_metrics->rwnd_limited_usec.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_SNDBUF_LIMITED: {
conn_metrics->sndbuf_limited_usec.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_PACING_RATE: {
conn_metrics->pacing_rate.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_DELIVERY_RATE: {
conn_metrics->delivery_rate.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_DELIVERY_RATE_APP_LMT: {
conn_metrics->is_delivery_rate_app_limited =
read_unaligned<uint8_t>(val);
break;
}
case TCP_NLA_SND_CWND: {
conn_metrics->congestion_window.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_MIN_RTT: {
conn_metrics->min_rtt.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_SRTT: {
conn_metrics->srtt.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_RECUR_RETRANS: {
conn_metrics->recurring_retrans.set(read_unaligned<uint8_t>(val));
break;
}
case TCP_NLA_BYTES_SENT: {
conn_metrics->data_sent.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_DATA_SEGS_OUT: {
conn_metrics->packet_sent.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_TOTAL_RETRANS: {
conn_metrics->packet_retx.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_DELIVERED: {
conn_metrics->packet_delivered.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_DELIVERED_CE: {
conn_metrics->packet_delivered_ce.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_BYTES_RETRANS: {
conn_metrics->data_retx.set(read_unaligned<uint64_t>(val));
break;
}
case TCP_NLA_REORDERING: {
conn_metrics->reordering.set(read_unaligned<uint32_t>(val));
break;
}
case TCP_NLA_SND_SSTHRESH: {
conn_metrics->snd_ssthresh.set(read_unaligned<uint32_t>(val));
break;
}
}
offset += NLA_ALIGN(attr->nla_len);
}
}
} /* namespace */ } /* namespace */
void TracedBuffer::ProcessTimestamp(TracedBuffer** head, void TracedBuffer::ProcessTimestamp(TracedBuffer** head,
struct sock_extended_err* serr, struct sock_extended_err* serr,
struct cmsghdr* opt_stats,
struct scm_timestamping* tss) { struct scm_timestamping* tss) {
GPR_DEBUG_ASSERT(head != nullptr); GPR_DEBUG_ASSERT(head != nullptr);
TracedBuffer* elem = *head; TracedBuffer* elem = *head;
@ -82,15 +187,19 @@ void TracedBuffer::ProcessTimestamp(TracedBuffer** head,
if (serr->ee_data >= elem->seq_no_) { if (serr->ee_data >= elem->seq_no_) {
switch (serr->ee_info) { switch (serr->ee_info) {
case SCM_TSTAMP_SCHED: case SCM_TSTAMP_SCHED:
fill_gpr_from_timestamp(&(elem->ts_.scheduled_time), &(tss->ts[0])); fill_gpr_from_timestamp(&(elem->ts_.scheduled_time.time),
&(tss->ts[0]));
ExtractOptStats(&(elem->ts_.scheduled_time.metrics), opt_stats);
elem = elem->next_; elem = elem->next_;
break; break;
case SCM_TSTAMP_SND: case SCM_TSTAMP_SND:
fill_gpr_from_timestamp(&(elem->ts_.sent_time), &(tss->ts[0])); fill_gpr_from_timestamp(&(elem->ts_.sent_time.time), &(tss->ts[0]));
ExtractOptStats(&(elem->ts_.sent_time.metrics), opt_stats);
elem = elem->next_; elem = elem->next_;
break; break;
case SCM_TSTAMP_ACK: case SCM_TSTAMP_ACK:
fill_gpr_from_timestamp(&(elem->ts_.acked_time), &(tss->ts[0])); fill_gpr_from_timestamp(&(elem->ts_.acked_time.time), &(tss->ts[0]));
ExtractOptStats(&(elem->ts_.acked_time.metrics), opt_stats);
/* Got all timestamps. Do the callback and free this TracedBuffer. /* Got all timestamps. Do the callback and free this TracedBuffer.
* The thing below can be passed by value if we don't want the * The thing below can be passed by value if we don't want the
* restriction on the lifetime. */ * restriction on the lifetime. */

@ -30,13 +30,81 @@
#include "src/core/lib/iomgr/internal_errqueue.h" #include "src/core/lib/iomgr/internal_errqueue.h"
namespace grpc_core { namespace grpc_core {
/* A make-shift alternative for absl::Optional. This can be removed in favor of
* that once is absl dependencies can be introduced. */
template <typename T>
class Optional {
public:
void set(const T& val) {
value_ = val;
set_ = true;
}
bool has_value() { return set_; }
void reset() { set_ = false; }
T value() { return value_; }
T value_;
bool set_ = false;
};
struct ConnectionMetrics {
/* Delivery rate in Bps. */
Optional<uint64_t> delivery_rate;
/* If the delivery rate is limited by the application, this is set to true. */
bool is_delivery_rate_app_limited = true;
/* Total packets retransmitted. */
Optional<uint32_t> packet_retx;
/* Total packets sent. */
Optional<uint32_t> packet_sent;
/* Total packets delivered. */
Optional<uint32_t> packet_delivered;
/* Total packets delivered with ECE marked. This metric is smaller than or
equal to packet_delivered. */
Optional<uint32_t> packet_delivered_ce;
/* Total bytes lost so far. */
Optional<uint64_t> data_retx;
/* Total bytes sent so far. */
Optional<uint64_t> data_sent;
/* Pacing rate of the connection in Bps */
Optional<uint64_t> pacing_rate;
/* Minimum RTT observed in usec. */
Optional<uint32_t> min_rtt;
/* Smoothed RTT in usec */
Optional<uint32_t> srtt;
/* Send congestion window. */
Optional<uint32_t> congestion_window;
/* Slow start threshold in packets. */
Optional<uint32_t> snd_ssthresh;
/* Maximum degree of reordering (i.e., maximum number of packets reodered)
on the connection. */
Optional<uint32_t> reordering;
/* Represents the number of recurring retransmissions of the first sequence
that is not acknowledged yet. */
Optional<uint8_t> recurring_retrans;
/* The cumulative time (in usec) that the transport protocol was busy
sending data. */
Optional<uint64_t> busy_usec;
/* The cumulative time (in usec) that the transport protocol was limited by
the receive window size. */
Optional<uint64_t> rwnd_limited_usec;
/* The cumulative time (in usec) that the transport protocol was limited by
the send buffer size. */
Optional<uint64_t> sndbuf_limited_usec;
};
struct Timestamp {
gpr_timespec time;
ConnectionMetrics metrics; /* Metrics collected with this timestamp */
};
struct Timestamps { struct Timestamps {
/* TODO(yashykt): This would also need to store OPTSTAT once support is added Timestamp sendmsg_time;
*/ Timestamp scheduled_time;
gpr_timespec sendmsg_time; Timestamp sent_time;
gpr_timespec scheduled_time; Timestamp acked_time;
gpr_timespec sent_time;
gpr_timespec acked_time;
uint32_t byte_offset; /* byte offset relative to the start of the RPC */ uint32_t byte_offset; /* byte offset relative to the start of the RPC */
}; };
@ -65,6 +133,7 @@ class TracedBuffer {
* timestamp type is SCM_TSTAMP_ACK. */ * timestamp type is SCM_TSTAMP_ACK. */
static void ProcessTimestamp(grpc_core::TracedBuffer** head, static void ProcessTimestamp(grpc_core::TracedBuffer** head,
struct sock_extended_err* serr, struct sock_extended_err* serr,
struct cmsghdr* opt_stats,
struct scm_timestamping* tss); struct scm_timestamping* tss);
/** Cleans the list by calling the callback for each traced buffer in the list /** Cleans the list by calling the callback for each traced buffer in the list

@ -37,6 +37,7 @@
#ifdef GRPC_LINUX_ERRQUEUE #ifdef GRPC_LINUX_ERRQUEUE
#include <linux/errqueue.h> #include <linux/errqueue.h>
#include <linux/net_tstamp.h> #include <linux/net_tstamp.h>
#include <linux/netlink.h>
#include <sys/socket.h> #include <sys/socket.h>
#endif /* GRPC_LINUX_ERRQUEUE */ #endif /* GRPC_LINUX_ERRQUEUE */
@ -63,13 +64,41 @@ constexpr uint32_t SOF_TIMESTAMPING_OPT_ID = 1u << 7;
constexpr uint32_t SOF_TIMESTAMPING_TX_SCHED = 1u << 8; constexpr uint32_t SOF_TIMESTAMPING_TX_SCHED = 1u << 8;
constexpr uint32_t SOF_TIMESTAMPING_TX_ACK = 1u << 9; constexpr uint32_t SOF_TIMESTAMPING_TX_ACK = 1u << 9;
constexpr uint32_t SOF_TIMESTAMPING_OPT_TSONLY = 1u << 11; constexpr uint32_t SOF_TIMESTAMPING_OPT_TSONLY = 1u << 11;
constexpr uint32_t SOF_TIMESTAMPING_OPT_STATS = 1u << 12;
constexpr uint32_t kTimestampingSocketOptions = SOF_TIMESTAMPING_SOFTWARE | constexpr uint32_t kTimestampingSocketOptions =
SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID |
SOF_TIMESTAMPING_OPT_TSONLY; SOF_TIMESTAMPING_OPT_TSONLY | SOF_TIMESTAMPING_OPT_STATS;
constexpr uint32_t kTimestampingRecordingOptions = constexpr uint32_t kTimestampingRecordingOptions =
SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_TX_ACK; SOF_TIMESTAMPING_TX_ACK;
/* Netlink attribute types used for TCP opt stats. */
enum TCPOptStats {
TCP_NLA_PAD,
TCP_NLA_BUSY, /* Time (usec) busy sending data. */
TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window. */
TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer. */
TCP_NLA_DATA_SEGS_OUT, // Data pkts sent including retransmission. */
TCP_NLA_TOTAL_RETRANS, // Data pkts retransmitted. */
TCP_NLA_PACING_RATE, // Pacing rate in Bps. */
TCP_NLA_DELIVERY_RATE, // Delivery rate in Bps. */
TCP_NLA_SND_CWND, // Sending congestion window. */
TCP_NLA_REORDERING, // Reordering metric. */
TCP_NLA_MIN_RTT, // minimum RTT. */
TCP_NLA_RECUR_RETRANS, // Recurring retransmits for the current pkt. */
TCP_NLA_DELIVERY_RATE_APP_LMT, // Delivery rate application limited? */
TCP_NLA_SNDQ_SIZE, // Data (bytes) pending in send queue */
TCP_NLA_CA_STATE, // ca_state of socket */
TCP_NLA_SND_SSTHRESH, // Slow start size threshold */
TCP_NLA_DELIVERED, // Data pkts delivered incl. out-of-order */
TCP_NLA_DELIVERED_CE, // Like above but only ones w/ CE marks */
TCP_NLA_BYTES_SENT, // Data bytes sent including retransmission */
TCP_NLA_BYTES_RETRANS, // Data bytes retransmitted */
TCP_NLA_DSACK_DUPS, // DSACK blocks received */
TCP_NLA_REORD_SEEN, // reordering events seen */
TCP_NLA_SRTT, // smoothed RTT in usecs */
};
#endif /* GRPC_LINUX_ERRQUEUE */ #endif /* GRPC_LINUX_ERRQUEUE */
/* Returns true if kernel is capable of supporting errqueue and timestamping. /* Returns true if kernel is capable of supporting errqueue and timestamping.

@ -648,6 +648,7 @@ static bool tcp_write_with_timestamps(grpc_tcp* tcp, struct msghdr* msg,
struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg, struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg,
struct cmsghdr* cmsg) { struct cmsghdr* cmsg) {
auto next_cmsg = CMSG_NXTHDR(msg, cmsg); auto next_cmsg = CMSG_NXTHDR(msg, cmsg);
cmsghdr* opt_stats = nullptr;
if (next_cmsg == nullptr) { if (next_cmsg == nullptr) {
if (grpc_tcp_trace.enabled()) { if (grpc_tcp_trace.enabled()) {
gpr_log(GPR_ERROR, "Received timestamp without extended error"); gpr_log(GPR_ERROR, "Received timestamp without extended error");
@ -655,6 +656,19 @@ struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg,
return cmsg; return cmsg;
} }
/* Check if next_cmsg is an OPT_STATS msg */
if (next_cmsg->cmsg_level == SOL_SOCKET &&
next_cmsg->cmsg_type == SCM_TIMESTAMPING_OPT_STATS) {
opt_stats = next_cmsg;
next_cmsg = CMSG_NXTHDR(msg, opt_stats);
if (next_cmsg == nullptr) {
if (grpc_tcp_trace.enabled()) {
gpr_log(GPR_ERROR, "Received timestamp without extended error");
}
}
return opt_stats;
}
if (!(next_cmsg->cmsg_level == SOL_IP || next_cmsg->cmsg_level == SOL_IPV6) || if (!(next_cmsg->cmsg_level == SOL_IP || next_cmsg->cmsg_level == SOL_IPV6) ||
!(next_cmsg->cmsg_type == IP_RECVERR || !(next_cmsg->cmsg_type == IP_RECVERR ||
next_cmsg->cmsg_type == IPV6_RECVERR)) { next_cmsg->cmsg_type == IPV6_RECVERR)) {
@ -676,7 +690,8 @@ struct cmsghdr* process_timestamp(grpc_tcp* tcp, msghdr* msg,
* to protect the traced buffer list. A lock free list might be better. Using * to protect the traced buffer list. A lock free list might be better. Using
* a simple mutex for now. */ * a simple mutex for now. */
gpr_mu_lock(&tcp->tb_mu); gpr_mu_lock(&tcp->tb_mu);
grpc_core::TracedBuffer::ProcessTimestamp(&tcp->tb_head, serr, tss); grpc_core::TracedBuffer::ProcessTimestamp(&tcp->tb_head, serr, opt_stats,
tss);
gpr_mu_unlock(&tcp->tb_mu); gpr_mu_unlock(&tcp->tb_mu);
return next_cmsg; return next_cmsg;
} }
@ -696,10 +711,11 @@ static void process_errors(grpc_tcp* tcp) {
msg.msg_iovlen = 0; msg.msg_iovlen = 0;
msg.msg_flags = 0; msg.msg_flags = 0;
// Allocate aligned space for cmsgs received along with a timestamps
union { union {
char rbuf[1024 /*CMSG_SPACE(sizeof(scm_timestamping)) + char rbuf[CMSG_SPACE(sizeof(scm_timestamping)) +
CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in))*/ CMSG_SPACE(sizeof(sock_extended_err) + sizeof(sockaddr_in)) +
]; CMSG_SPACE(16 * NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)))];
struct cmsghdr align; struct cmsghdr align;
} aligned_buf; } aligned_buf;
memset(&aligned_buf, 0, sizeof(aligned_buf)); memset(&aligned_buf, 0, sizeof(aligned_buf));

@ -63,9 +63,9 @@ static void TestVerifierCalledOnAckVerifier(void* arg,
grpc_error* error) { grpc_error* error) {
GPR_ASSERT(error == GRPC_ERROR_NONE); GPR_ASSERT(error == GRPC_ERROR_NONE);
GPR_ASSERT(arg != nullptr); GPR_ASSERT(arg != nullptr);
GPR_ASSERT(ts->acked_time.clock_type == GPR_CLOCK_REALTIME); GPR_ASSERT(ts->acked_time.time.clock_type == GPR_CLOCK_REALTIME);
GPR_ASSERT(ts->acked_time.tv_sec == 123); GPR_ASSERT(ts->acked_time.time.tv_sec == 123);
GPR_ASSERT(ts->acked_time.tv_nsec == 456); GPR_ASSERT(ts->acked_time.time.tv_nsec == 456);
gpr_atm* done = reinterpret_cast<gpr_atm*>(arg); gpr_atm* done = reinterpret_cast<gpr_atm*>(arg);
gpr_atm_rel_store(done, static_cast<gpr_atm>(1)); gpr_atm_rel_store(done, static_cast<gpr_atm>(1));
} }
@ -85,7 +85,7 @@ static void TestVerifierCalledOnAck() {
gpr_atm verifier_called; gpr_atm verifier_called;
gpr_atm_rel_store(&verifier_called, static_cast<gpr_atm>(0)); gpr_atm_rel_store(&verifier_called, static_cast<gpr_atm>(0));
grpc_core::TracedBuffer::AddNewEntry(&list, 213, &verifier_called); grpc_core::TracedBuffer::AddNewEntry(&list, 213, &verifier_called);
grpc_core::TracedBuffer::ProcessTimestamp(&list, &serr, &tss); grpc_core::TracedBuffer::ProcessTimestamp(&list, &serr, nullptr, &tss);
GPR_ASSERT(gpr_atm_acq_load(&verifier_called) == static_cast<gpr_atm>(1)); GPR_ASSERT(gpr_atm_acq_load(&verifier_called) == static_cast<gpr_atm>(1));
GPR_ASSERT(list == nullptr); GPR_ASSERT(list == nullptr);
grpc_core::TracedBuffer::Shutdown(&list, nullptr, GRPC_ERROR_NONE); grpc_core::TracedBuffer::Shutdown(&list, nullptr, GRPC_ERROR_NONE);
@ -96,10 +96,25 @@ static void TestTcpBufferList() {
TestShutdownFlushesList(); TestShutdownFlushesList();
} }
/* Tests grpc_core::Optional */
static void TestOptional() {
grpc_core::Optional<int> opt_val;
GPR_ASSERT(opt_val.has_value() == false);
const int kTestVal = 123;
opt_val.set(kTestVal);
GPR_ASSERT(opt_val.has_value());
GPR_ASSERT(opt_val.value() == 123);
opt_val.reset();
GPR_ASSERT(opt_val.has_value() == false);
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
grpc::testing::TestEnvironment env(argc, argv); grpc::testing::TestEnvironment env(argc, argv);
grpc_init(); grpc_init();
TestTcpBufferList(); TestTcpBufferList();
TestOptional();
grpc_shutdown(); grpc_shutdown();
return 0; return 0;
} }

Loading…
Cancel
Save