Inline more of md_unref for interned metadata.

In the common case, unref() for interned metadata was performing some
unnecessary branches. This patch reduces the instruction count from about 19 to
12 in the case where an unref() occured but it wasn't the last reference.

This has various speedups for chttp2_hpack, metadata and fullstack pingpong
benchmarks.

As a tradeoff, static/external metadata unref regresses slightly (fraction of a
nanosecond increase in CPU time) while interned improves (roughly 5 nanoseconds
reduction in CPU time).

Examples:

Fullstack:

BM_UnaryPingPong<InProcess, NoOpMutator,  NoOpMutator>/8/8  [polls/iter:0 ]
7.12µs  ± 4%  6.91µs  ± 3%  -3.00%  (p=0.000  n=20+19)
BM_UnaryPingPong<TCP, NoOpMutator,  NoOpMutator>/0/32768
[polls/iter:3.0001  ] 35.3µs  ± 2%  34.9µs  ± 0%  -1.15%  (p=0.048  n=8+4)
BM_UnaryPingPong<TCP, NoOpMutator,  NoOpMutator>/32768/32768
[polls/iter:3.00014 ] 49.6µs  ± 1%  49.0µs  ± 1%  -1.15%  (p=0.017  n=7+3)

HpackParser:

BM_HpackEncoderInitDestroy
274ns ± 0%              254ns ± 0%   -7.12%        (p=0.000 n=19+19)
BM_HpackParserParseHeader<RepresentativeClientInitialMetadata, UnrefHeader>
173ns ± 0%              140ns ± 0%  -19.04%        (p=0.000 n=18+16)
BM_HpackParserParseHeader<MoreRepresentativeClientInitialMetadata, UnrefHeader>
751ns ± 1%              702ns ± 2%   -6.50%        (p=0.000 n=20+20)
BM_HpackParserParseHeader<RepresentativeServerTrailingMetadata, UnrefHeader>
50.6ns ± 0%             46.9ns ± 0%   -7.29%        (p=0.000 n=14+19)
BM_HpackParserParseHeader<RepresentativeClientInitialMetadata, OnInitialHeader>
486ns ± 1%              455ns ± 0%   -6.42%        (p=0.000 n=20+19)
BM_HpackParserParseHeader<MoreRepresentativeClientInitialMetadata,
OnInitialHeader>
1.17µs ± 1%             1.12µs ± 1%   -4.18%        (p=0.000 n=18+20)
BM_HpackParserParseHeader<RepresentativeServerInitialMetadata, OnInitialHeader>
163ns ± 1%              155ns ± 0%   -4.52%        (p=0.000 n=18+17)

Metadata:

BM_MetadataFromInternedKeyWithBackingStore                   7.32ns ± 1%
6.67ns ± 0%   -8.89%        (p=0.000 n=20+17)
BM_MetadataFromStaticMetadataStringsNotIndexed               77.4ns ± 2%
79.6ns ± 0%   +2.86%        (p=0.000 n=18+19)
BM_MetadataRefUnrefExternal                                  1.74ns ± 0%
1.79ns ± 0%   +2.65%        (p=0.000 n=20+19)
BM_MetadataRefUnrefInterned                                  18.5ns ± 0%
13.3ns ± 0%  -28.00%        (p=0.000 n=17+17)
BM_MetadataRefUnrefAllocated                                 18.5ns ± 0%
13.3ns ± 0%  -28.00%        (p=0.000 n=17+18)
pull/19031/head
Arjun Roy 6 years ago
parent 42c93b5f45
commit 14c55dea4a
  1. 48
      src/core/lib/transport/metadata.cc
  2. 135
      src/core/lib/transport/metadata.h

@ -109,13 +109,12 @@ void StaticMetadata::HashInit() {
AllocatedMetadata::AllocatedMetadata(const grpc_slice& key,
const grpc_slice& value)
: key_(grpc_slice_ref_internal(key)),
value_(grpc_slice_ref_internal(value)),
refcnt_(1) {
: RefcountedMdBase(grpc_slice_ref_internal(key),
grpc_slice_ref_internal(value)) {
#ifndef NDEBUG
if (grpc_trace_metadata.enabled()) {
char* key_str = grpc_slice_to_c_string(key_);
char* value_str = grpc_slice_to_c_string(value_);
char* key_str = grpc_slice_to_c_string(key);
char* value_str = grpc_slice_to_c_string(value);
gpr_log(GPR_DEBUG, "ELM ALLOC:%p:%" PRIdPTR ": '%s' = '%s'", this,
RefValue(), key_str, value_str);
gpr_free(key_str);
@ -125,8 +124,8 @@ AllocatedMetadata::AllocatedMetadata(const grpc_slice& key,
}
AllocatedMetadata::~AllocatedMetadata() {
grpc_slice_unref_internal(key_);
grpc_slice_unref_internal(value_);
grpc_slice_unref_internal(key());
grpc_slice_unref_internal(value());
void* user_data = user_data_.data.Load(grpc_core::MemoryOrder::RELAXED);
if (user_data) {
destroy_user_data_func destroy_user_data =
@ -138,15 +137,13 @@ AllocatedMetadata::~AllocatedMetadata() {
InternedMetadata::InternedMetadata(const grpc_slice& key,
const grpc_slice& value, uint32_t hash,
InternedMetadata* next)
: key_(grpc_slice_ref_internal(key)),
value_(grpc_slice_ref_internal(value)),
hash_(hash),
refcnt_(1),
: RefcountedMdBase(grpc_slice_ref_internal(key),
grpc_slice_ref_internal(value), hash),
link_(next) {
#ifndef NDEBUG
if (grpc_trace_metadata.enabled()) {
char* key_str = grpc_slice_to_c_string(key_);
char* value_str = grpc_slice_to_c_string(value_);
char* key_str = grpc_slice_to_c_string(key);
char* value_str = grpc_slice_to_c_string(value);
gpr_log(GPR_DEBUG, "ELM NEW:%p:%" PRIdPTR ": '%s' = '%s'", this,
RefValue(), key_str, value_str);
gpr_free(key_str);
@ -156,8 +153,8 @@ InternedMetadata::InternedMetadata(const grpc_slice& key,
}
InternedMetadata::~InternedMetadata() {
grpc_slice_unref_internal(key_);
grpc_slice_unref_internal(value_);
grpc_slice_unref_internal(key());
grpc_slice_unref_internal(value());
void* user_data = user_data_.data.Load(grpc_core::MemoryOrder::RELAXED);
if (user_data) {
destroy_user_data_func destroy_user_data =
@ -242,8 +239,8 @@ static int is_mdelem_static(grpc_mdelem e) {
void InternedMetadata::RefWithShardLocked(mdtab_shard* shard) {
#ifndef NDEBUG
if (grpc_trace_metadata.enabled()) {
char* key_str = grpc_slice_to_c_string(key_);
char* value_str = grpc_slice_to_c_string(value_);
char* key_str = grpc_slice_to_c_string(key());
char* value_str = grpc_slice_to_c_string(value());
intptr_t value = RefValue();
gpr_log(__FILE__, __LINE__, GPR_LOG_SEVERITY_DEBUG,
"ELM REF:%p:%" PRIdPTR "->%" PRIdPTR ": '%s' = '%s'", this, value,
@ -498,3 +495,20 @@ void grpc_mdelem_do_unref(grpc_mdelem gmd DEBUG_ARGS) {
}
}
}
void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr,
uint32_t hash DEBUG_ARGS) {
switch (storage) {
case GRPC_MDELEM_STORAGE_EXTERNAL:
case GRPC_MDELEM_STORAGE_STATIC:
return;
case GRPC_MDELEM_STORAGE_INTERNED: {
note_disposed_interned_metadata(hash);
break;
}
case GRPC_MDELEM_STORAGE_ALLOCATED: {
grpc_core::Delete(reinterpret_cast<AllocatedMetadata*>(ptr));
break;
}
}
}

@ -80,17 +80,22 @@ typedef struct grpc_mdelem_data {
this bit set in their integer value */
#define GRPC_MDELEM_STORAGE_INTERNED_BIT 1
/* External and static storage metadata has no refcount to ref/unref. Allocated
* and interned metadata do have a refcount. Metadata ref and unref methods use
* a switch statement on this enum to determine which behaviour to execute.
* Keeping the no-ref cases together and the ref-cases together leads to
* slightly better code generation (9 inlined instructions rather than 10). */
typedef enum {
/* memory pointed to by grpc_mdelem::payload is owned by an external system */
GRPC_MDELEM_STORAGE_EXTERNAL = 0,
/* memory pointed to by grpc_mdelem::payload is interned by the metadata
system */
GRPC_MDELEM_STORAGE_INTERNED = GRPC_MDELEM_STORAGE_INTERNED_BIT,
/* memory is in the static metadata table */
GRPC_MDELEM_STORAGE_STATIC = GRPC_MDELEM_STORAGE_INTERNED_BIT,
/* memory pointed to by grpc_mdelem::payload is allocated by the metadata
system */
GRPC_MDELEM_STORAGE_ALLOCATED = 2,
/* memory is in the static metadata table */
GRPC_MDELEM_STORAGE_STATIC = 2 | GRPC_MDELEM_STORAGE_INTERNED_BIT,
/* memory pointed to by grpc_mdelem::payload is interned by the metadata
system */
GRPC_MDELEM_STORAGE_INTERNED = 2 | GRPC_MDELEM_STORAGE_INTERNED_BIT,
} grpc_mdelem_data_storage;
struct grpc_mdelem {
@ -196,17 +201,17 @@ class StaticMetadata {
uint32_t hash_;
};
class InternedMetadata {
class RefcountedMdBase {
public:
struct BucketLink {
explicit BucketLink(InternedMetadata* md) : next(md) {}
InternedMetadata* next = nullptr;
};
RefcountedMdBase(const grpc_slice& key, const grpc_slice& value)
: key_(key), value_(value), refcnt_(1) {}
RefcountedMdBase(const grpc_slice& key, const grpc_slice& value,
uint32_t hash)
: key_(key), value_(value), refcnt_(1), hash_(hash) {}
InternedMetadata(const grpc_slice& key, const grpc_slice& value,
uint32_t hash, InternedMetadata* next);
~InternedMetadata();
const grpc_slice& key() const { return key_; }
const grpc_slice& value() const { return value_; }
uint32_t hash() { return hash_; }
#ifndef NDEBUG
void Ref(const char* file, int line) {
@ -218,92 +223,65 @@ class InternedMetadata {
grpc_mdelem_trace_unref(this, key_, value_, RefValue(), file, line);
return Unref();
}
#else
// We define a naked Ref() in the else-clause to make sure we don't
// inadvertently skip the assert on debug builds.
#endif
void Ref() {
/* we can assume the ref count is >= 1 as the application is calling
this function - meaning that no adjustment to mdtab_free is necessary,
simplifying the logic here to be just an atomic increment */
refcnt_.FetchAdd(1, MemoryOrder::RELAXED);
}
#endif // ifndef NDEBUG
bool Unref() {
const intptr_t prior = refcnt_.FetchSub(1, MemoryOrder::ACQ_REL);
GPR_DEBUG_ASSERT(prior > 0);
return prior == 1;
}
void RefWithShardLocked(mdtab_shard* shard);
const grpc_slice& key() const { return key_; }
const grpc_slice& value() const { return value_; }
UserData* user_data() { return &user_data_; }
uint32_t hash() { return hash_; }
InternedMetadata* bucket_next() { return link_.next; }
void set_bucket_next(InternedMetadata* md) { link_.next = md; }
static size_t CleanupLinkedMetadata(BucketLink* head);
private:
protected:
intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); }
bool AllRefsDropped() { return refcnt_.Load(MemoryOrder::ACQUIRE) == 0; }
bool FirstRef() { return refcnt_.FetchAdd(1, MemoryOrder::RELAXED) == 0; }
intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); }
private:
/* must be byte compatible with grpc_mdelem_data */
grpc_slice key_;
grpc_slice value_;
/* private only data */
uint32_t hash_;
grpc_core::Atomic<intptr_t> refcnt_;
uint32_t hash_ = 0;
};
UserData user_data_;
class InternedMetadata : public RefcountedMdBase {
public:
struct BucketLink {
explicit BucketLink(InternedMetadata* md) : next(md) {}
InternedMetadata* next = nullptr;
};
InternedMetadata(const grpc_slice& key, const grpc_slice& value,
uint32_t hash, InternedMetadata* next);
~InternedMetadata();
void RefWithShardLocked(mdtab_shard* shard);
UserData* user_data() { return &user_data_; }
InternedMetadata* bucket_next() { return link_.next; }
void set_bucket_next(InternedMetadata* md) { link_.next = md; }
static size_t CleanupLinkedMetadata(BucketLink* head);
private:
UserData user_data_;
BucketLink link_;
};
/* Shadow structure for grpc_mdelem_data for allocated elements */
class AllocatedMetadata {
class AllocatedMetadata : public RefcountedMdBase {
public:
AllocatedMetadata(const grpc_slice& key, const grpc_slice& value);
~AllocatedMetadata();
const grpc_slice& key() const { return key_; }
const grpc_slice& value() const { return value_; }
UserData* user_data() { return &user_data_; }
#ifndef NDEBUG
void Ref(const char* file, int line) {
grpc_mdelem_trace_ref(this, key_, value_, RefValue(), file, line);
Ref();
}
bool Unref(const char* file, int line) {
grpc_mdelem_trace_unref(this, key_, value_, RefValue(), file, line);
return Unref();
}
#endif // ifndef NDEBUG
void Ref() {
/* we can assume the ref count is >= 1 as the application is calling
this function - meaning that no adjustment to mdtab_free is necessary,
simplifying the logic here to be just an atomic increment */
refcnt_.FetchAdd(1, MemoryOrder::RELAXED);
}
bool Unref() {
const intptr_t prior = refcnt_.FetchSub(1, MemoryOrder::ACQ_REL);
GPR_DEBUG_ASSERT(prior > 0);
return prior == 1;
}
private:
intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); }
/* must be byte compatible with grpc_mdelem_data */
grpc_slice key_;
grpc_slice value_;
/* private only data */
grpc_core::Atomic<intptr_t> refcnt_;
UserData user_data_;
};
@ -348,24 +326,35 @@ inline grpc_mdelem grpc_mdelem_ref(grpc_mdelem gmd) {
#ifndef NDEBUG
#define GRPC_MDELEM_UNREF(s) grpc_mdelem_unref((s), __FILE__, __LINE__)
void grpc_mdelem_do_unref(grpc_mdelem gmd, const char* file, int line);
void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr,
uint32_t hash, const char* file, int line);
inline void grpc_mdelem_unref(grpc_mdelem gmd, const char* file, int line) {
#else
#define GRPC_MDELEM_UNREF(s) grpc_mdelem_unref((s))
void grpc_mdelem_do_unref(grpc_mdelem gmd);
void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr,
uint32_t hash);
inline void grpc_mdelem_unref(grpc_mdelem gmd) {
#endif
switch (GRPC_MDELEM_STORAGE(gmd)) {
const grpc_mdelem_data_storage storage = GRPC_MDELEM_STORAGE(gmd);
switch (storage) {
case GRPC_MDELEM_STORAGE_EXTERNAL:
case GRPC_MDELEM_STORAGE_STATIC:
return;
case GRPC_MDELEM_STORAGE_INTERNED:
case GRPC_MDELEM_STORAGE_ALLOCATED:
auto* md =
reinterpret_cast<grpc_core::RefcountedMdBase*> GRPC_MDELEM_DATA(gmd);
/* once the refcount hits zero, some other thread can come along and
free an interned md at any time: it's unsafe from this point on to
access it so we read the hash now. */
uint32_t hash = md->hash();
if (GPR_UNLIKELY(md->Unref())) {
#ifndef NDEBUG
grpc_mdelem_do_unref(gmd, file, line);
grpc_mdelem_on_final_unref(storage, md, hash, file, line);
#else
grpc_mdelem_do_unref(gmd);
grpc_mdelem_on_final_unref(storage, md, hash);
#endif
}
return;
}
}

Loading…
Cancel
Save