From 14c55dea4ab8a7a24003f0497c6b166079096be1 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Tue, 14 May 2019 16:19:07 -0700 Subject: [PATCH] Inline more of md_unref for interned metadata. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the common case, unref() for interned metadata was performing some unnecessary branches. This patch reduces the instruction count from about 19 to 12 in the case where an unref() occured but it wasn't the last reference. This has various speedups for chttp2_hpack, metadata and fullstack pingpong benchmarks. As a tradeoff, static/external metadata unref regresses slightly (fraction of a nanosecond increase in CPU time) while interned improves (roughly 5 nanoseconds reduction in CPU time). Examples: Fullstack: BM_UnaryPingPong/8/8 [polls/iter:0 ] 7.12µs ± 4% 6.91µs ± 3% -3.00% (p=0.000 n=20+19) BM_UnaryPingPong/0/32768 [polls/iter:3.0001 ] 35.3µs ± 2% 34.9µs ± 0% -1.15% (p=0.048 n=8+4) BM_UnaryPingPong/32768/32768 [polls/iter:3.00014 ] 49.6µs ± 1% 49.0µs ± 1% -1.15% (p=0.017 n=7+3) HpackParser: BM_HpackEncoderInitDestroy 274ns ± 0% 254ns ± 0% -7.12% (p=0.000 n=19+19) BM_HpackParserParseHeader 173ns ± 0% 140ns ± 0% -19.04% (p=0.000 n=18+16) BM_HpackParserParseHeader 751ns ± 1% 702ns ± 2% -6.50% (p=0.000 n=20+20) BM_HpackParserParseHeader 50.6ns ± 0% 46.9ns ± 0% -7.29% (p=0.000 n=14+19) BM_HpackParserParseHeader 486ns ± 1% 455ns ± 0% -6.42% (p=0.000 n=20+19) BM_HpackParserParseHeader 1.17µs ± 1% 1.12µs ± 1% -4.18% (p=0.000 n=18+20) BM_HpackParserParseHeader 163ns ± 1% 155ns ± 0% -4.52% (p=0.000 n=18+17) Metadata: BM_MetadataFromInternedKeyWithBackingStore 7.32ns ± 1% 6.67ns ± 0% -8.89% (p=0.000 n=20+17) BM_MetadataFromStaticMetadataStringsNotIndexed 77.4ns ± 2% 79.6ns ± 0% +2.86% (p=0.000 n=18+19) BM_MetadataRefUnrefExternal 1.74ns ± 0% 1.79ns ± 0% +2.65% (p=0.000 n=20+19) BM_MetadataRefUnrefInterned 18.5ns ± 0% 13.3ns ± 0% -28.00% (p=0.000 n=17+17) BM_MetadataRefUnrefAllocated 18.5ns ± 0% 13.3ns ± 0% -28.00% (p=0.000 n=17+18) --- src/core/lib/transport/metadata.cc | 48 ++++++---- src/core/lib/transport/metadata.h | 135 +++++++++++++---------------- 2 files changed, 93 insertions(+), 90 deletions(-) diff --git a/src/core/lib/transport/metadata.cc b/src/core/lib/transport/metadata.cc index 2fdecc99d3e..1523ced16d8 100644 --- a/src/core/lib/transport/metadata.cc +++ b/src/core/lib/transport/metadata.cc @@ -109,13 +109,12 @@ void StaticMetadata::HashInit() { AllocatedMetadata::AllocatedMetadata(const grpc_slice& key, const grpc_slice& value) - : key_(grpc_slice_ref_internal(key)), - value_(grpc_slice_ref_internal(value)), - refcnt_(1) { + : RefcountedMdBase(grpc_slice_ref_internal(key), + grpc_slice_ref_internal(value)) { #ifndef NDEBUG if (grpc_trace_metadata.enabled()) { - char* key_str = grpc_slice_to_c_string(key_); - char* value_str = grpc_slice_to_c_string(value_); + char* key_str = grpc_slice_to_c_string(key); + char* value_str = grpc_slice_to_c_string(value); gpr_log(GPR_DEBUG, "ELM ALLOC:%p:%" PRIdPTR ": '%s' = '%s'", this, RefValue(), key_str, value_str); gpr_free(key_str); @@ -125,8 +124,8 @@ AllocatedMetadata::AllocatedMetadata(const grpc_slice& key, } AllocatedMetadata::~AllocatedMetadata() { - grpc_slice_unref_internal(key_); - grpc_slice_unref_internal(value_); + grpc_slice_unref_internal(key()); + grpc_slice_unref_internal(value()); void* user_data = user_data_.data.Load(grpc_core::MemoryOrder::RELAXED); if (user_data) { destroy_user_data_func destroy_user_data = @@ -138,15 +137,13 @@ AllocatedMetadata::~AllocatedMetadata() { InternedMetadata::InternedMetadata(const grpc_slice& key, const grpc_slice& value, uint32_t hash, InternedMetadata* next) - : key_(grpc_slice_ref_internal(key)), - value_(grpc_slice_ref_internal(value)), - hash_(hash), - refcnt_(1), + : RefcountedMdBase(grpc_slice_ref_internal(key), + grpc_slice_ref_internal(value), hash), link_(next) { #ifndef NDEBUG if (grpc_trace_metadata.enabled()) { - char* key_str = grpc_slice_to_c_string(key_); - char* value_str = grpc_slice_to_c_string(value_); + char* key_str = grpc_slice_to_c_string(key); + char* value_str = grpc_slice_to_c_string(value); gpr_log(GPR_DEBUG, "ELM NEW:%p:%" PRIdPTR ": '%s' = '%s'", this, RefValue(), key_str, value_str); gpr_free(key_str); @@ -156,8 +153,8 @@ InternedMetadata::InternedMetadata(const grpc_slice& key, } InternedMetadata::~InternedMetadata() { - grpc_slice_unref_internal(key_); - grpc_slice_unref_internal(value_); + grpc_slice_unref_internal(key()); + grpc_slice_unref_internal(value()); void* user_data = user_data_.data.Load(grpc_core::MemoryOrder::RELAXED); if (user_data) { destroy_user_data_func destroy_user_data = @@ -242,8 +239,8 @@ static int is_mdelem_static(grpc_mdelem e) { void InternedMetadata::RefWithShardLocked(mdtab_shard* shard) { #ifndef NDEBUG if (grpc_trace_metadata.enabled()) { - char* key_str = grpc_slice_to_c_string(key_); - char* value_str = grpc_slice_to_c_string(value_); + char* key_str = grpc_slice_to_c_string(key()); + char* value_str = grpc_slice_to_c_string(value()); intptr_t value = RefValue(); gpr_log(__FILE__, __LINE__, GPR_LOG_SEVERITY_DEBUG, "ELM REF:%p:%" PRIdPTR "->%" PRIdPTR ": '%s' = '%s'", this, value, @@ -498,3 +495,20 @@ void grpc_mdelem_do_unref(grpc_mdelem gmd DEBUG_ARGS) { } } } + +void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr, + uint32_t hash DEBUG_ARGS) { + switch (storage) { + case GRPC_MDELEM_STORAGE_EXTERNAL: + case GRPC_MDELEM_STORAGE_STATIC: + return; + case GRPC_MDELEM_STORAGE_INTERNED: { + note_disposed_interned_metadata(hash); + break; + } + case GRPC_MDELEM_STORAGE_ALLOCATED: { + grpc_core::Delete(reinterpret_cast(ptr)); + break; + } + } +} diff --git a/src/core/lib/transport/metadata.h b/src/core/lib/transport/metadata.h index 74c69f97584..3cef031031d 100644 --- a/src/core/lib/transport/metadata.h +++ b/src/core/lib/transport/metadata.h @@ -80,17 +80,22 @@ typedef struct grpc_mdelem_data { this bit set in their integer value */ #define GRPC_MDELEM_STORAGE_INTERNED_BIT 1 +/* External and static storage metadata has no refcount to ref/unref. Allocated + * and interned metadata do have a refcount. Metadata ref and unref methods use + * a switch statement on this enum to determine which behaviour to execute. + * Keeping the no-ref cases together and the ref-cases together leads to + * slightly better code generation (9 inlined instructions rather than 10). */ typedef enum { /* memory pointed to by grpc_mdelem::payload is owned by an external system */ GRPC_MDELEM_STORAGE_EXTERNAL = 0, - /* memory pointed to by grpc_mdelem::payload is interned by the metadata - system */ - GRPC_MDELEM_STORAGE_INTERNED = GRPC_MDELEM_STORAGE_INTERNED_BIT, + /* memory is in the static metadata table */ + GRPC_MDELEM_STORAGE_STATIC = GRPC_MDELEM_STORAGE_INTERNED_BIT, /* memory pointed to by grpc_mdelem::payload is allocated by the metadata system */ GRPC_MDELEM_STORAGE_ALLOCATED = 2, - /* memory is in the static metadata table */ - GRPC_MDELEM_STORAGE_STATIC = 2 | GRPC_MDELEM_STORAGE_INTERNED_BIT, + /* memory pointed to by grpc_mdelem::payload is interned by the metadata + system */ + GRPC_MDELEM_STORAGE_INTERNED = 2 | GRPC_MDELEM_STORAGE_INTERNED_BIT, } grpc_mdelem_data_storage; struct grpc_mdelem { @@ -196,17 +201,17 @@ class StaticMetadata { uint32_t hash_; }; -class InternedMetadata { +class RefcountedMdBase { public: - struct BucketLink { - explicit BucketLink(InternedMetadata* md) : next(md) {} - - InternedMetadata* next = nullptr; - }; + RefcountedMdBase(const grpc_slice& key, const grpc_slice& value) + : key_(key), value_(value), refcnt_(1) {} + RefcountedMdBase(const grpc_slice& key, const grpc_slice& value, + uint32_t hash) + : key_(key), value_(value), refcnt_(1), hash_(hash) {} - InternedMetadata(const grpc_slice& key, const grpc_slice& value, - uint32_t hash, InternedMetadata* next); - ~InternedMetadata(); + const grpc_slice& key() const { return key_; } + const grpc_slice& value() const { return value_; } + uint32_t hash() { return hash_; } #ifndef NDEBUG void Ref(const char* file, int line) { @@ -218,92 +223,65 @@ class InternedMetadata { grpc_mdelem_trace_unref(this, key_, value_, RefValue(), file, line); return Unref(); } -#else - // We define a naked Ref() in the else-clause to make sure we don't - // inadvertently skip the assert on debug builds. +#endif void Ref() { /* we can assume the ref count is >= 1 as the application is calling this function - meaning that no adjustment to mdtab_free is necessary, simplifying the logic here to be just an atomic increment */ refcnt_.FetchAdd(1, MemoryOrder::RELAXED); } -#endif // ifndef NDEBUG bool Unref() { const intptr_t prior = refcnt_.FetchSub(1, MemoryOrder::ACQ_REL); GPR_DEBUG_ASSERT(prior > 0); return prior == 1; } - void RefWithShardLocked(mdtab_shard* shard); - const grpc_slice& key() const { return key_; } - const grpc_slice& value() const { return value_; } - UserData* user_data() { return &user_data_; } - uint32_t hash() { return hash_; } - InternedMetadata* bucket_next() { return link_.next; } - void set_bucket_next(InternedMetadata* md) { link_.next = md; } - - static size_t CleanupLinkedMetadata(BucketLink* head); - - private: + protected: + intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); } bool AllRefsDropped() { return refcnt_.Load(MemoryOrder::ACQUIRE) == 0; } bool FirstRef() { return refcnt_.FetchAdd(1, MemoryOrder::RELAXED) == 0; } - intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); } + private: /* must be byte compatible with grpc_mdelem_data */ grpc_slice key_; grpc_slice value_; - - /* private only data */ - uint32_t hash_; grpc_core::Atomic refcnt_; + uint32_t hash_ = 0; +}; - UserData user_data_; +class InternedMetadata : public RefcountedMdBase { + public: + struct BucketLink { + explicit BucketLink(InternedMetadata* md) : next(md) {} + + InternedMetadata* next = nullptr; + }; + + InternedMetadata(const grpc_slice& key, const grpc_slice& value, + uint32_t hash, InternedMetadata* next); + ~InternedMetadata(); + + void RefWithShardLocked(mdtab_shard* shard); + UserData* user_data() { return &user_data_; } + InternedMetadata* bucket_next() { return link_.next; } + void set_bucket_next(InternedMetadata* md) { link_.next = md; } + + static size_t CleanupLinkedMetadata(BucketLink* head); + private: + UserData user_data_; BucketLink link_; }; /* Shadow structure for grpc_mdelem_data for allocated elements */ -class AllocatedMetadata { +class AllocatedMetadata : public RefcountedMdBase { public: AllocatedMetadata(const grpc_slice& key, const grpc_slice& value); ~AllocatedMetadata(); - const grpc_slice& key() const { return key_; } - const grpc_slice& value() const { return value_; } UserData* user_data() { return &user_data_; } -#ifndef NDEBUG - void Ref(const char* file, int line) { - grpc_mdelem_trace_ref(this, key_, value_, RefValue(), file, line); - Ref(); - } - bool Unref(const char* file, int line) { - grpc_mdelem_trace_unref(this, key_, value_, RefValue(), file, line); - return Unref(); - } -#endif // ifndef NDEBUG - void Ref() { - /* we can assume the ref count is >= 1 as the application is calling - this function - meaning that no adjustment to mdtab_free is necessary, - simplifying the logic here to be just an atomic increment */ - refcnt_.FetchAdd(1, MemoryOrder::RELAXED); - } - bool Unref() { - const intptr_t prior = refcnt_.FetchSub(1, MemoryOrder::ACQ_REL); - GPR_DEBUG_ASSERT(prior > 0); - return prior == 1; - } - private: - intptr_t RefValue() { return refcnt_.Load(MemoryOrder::RELAXED); } - - /* must be byte compatible with grpc_mdelem_data */ - grpc_slice key_; - grpc_slice value_; - - /* private only data */ - grpc_core::Atomic refcnt_; - UserData user_data_; }; @@ -348,24 +326,35 @@ inline grpc_mdelem grpc_mdelem_ref(grpc_mdelem gmd) { #ifndef NDEBUG #define GRPC_MDELEM_UNREF(s) grpc_mdelem_unref((s), __FILE__, __LINE__) -void grpc_mdelem_do_unref(grpc_mdelem gmd, const char* file, int line); +void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr, + uint32_t hash, const char* file, int line); inline void grpc_mdelem_unref(grpc_mdelem gmd, const char* file, int line) { #else #define GRPC_MDELEM_UNREF(s) grpc_mdelem_unref((s)) -void grpc_mdelem_do_unref(grpc_mdelem gmd); +void grpc_mdelem_on_final_unref(grpc_mdelem_data_storage storage, void* ptr, + uint32_t hash); inline void grpc_mdelem_unref(grpc_mdelem gmd) { #endif - switch (GRPC_MDELEM_STORAGE(gmd)) { + const grpc_mdelem_data_storage storage = GRPC_MDELEM_STORAGE(gmd); + switch (storage) { case GRPC_MDELEM_STORAGE_EXTERNAL: case GRPC_MDELEM_STORAGE_STATIC: return; case GRPC_MDELEM_STORAGE_INTERNED: case GRPC_MDELEM_STORAGE_ALLOCATED: + auto* md = + reinterpret_cast GRPC_MDELEM_DATA(gmd); + /* once the refcount hits zero, some other thread can come along and + free an interned md at any time: it's unsafe from this point on to + access it so we read the hash now. */ + uint32_t hash = md->hash(); + if (GPR_UNLIKELY(md->Unref())) { #ifndef NDEBUG - grpc_mdelem_do_unref(gmd, file, line); + grpc_mdelem_on_final_unref(storage, md, hash, file, line); #else - grpc_mdelem_do_unref(gmd); + grpc_mdelem_on_final_unref(storage, md, hash); #endif + } return; } }