diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 121e80117f..b2d1b1a395 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -3065,28 +3065,39 @@ void upb_Arena_LogFree(const upb_Arena* arena) { } #endif // UPB_TRACING_ENABLED +// If the param a is already the root, provides no memory order of refcount. +// If it has a parent, then acquire memory order is provided for both the root +// and the refcount. static upb_ArenaRoot _upb_Arena_FindRoot(const upb_Arena* a) { upb_ArenaInternal* ai = upb_Arena_Internal(a); - uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); - while (_upb_Arena_IsTaggedPointer(poc)) { + uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_relaxed); + if (_upb_Arena_IsTaggedRefcount(poc)) { + // Fast, relaxed path - arenas that have never been fused to a parent only + // need relaxed memory order, since they're returning themselves and the + // refcount. + return (upb_ArenaRoot){.root = ai, .tagged_count = poc}; + } + // Slow path needs acquire order; reloading is cheaper than a fence on ARM + // (LDA vs DMB ISH). Even though this is a reread, we know it must be a tagged + // pointer because if this Arena isn't a root, it can't ever become one. + poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); + do { upb_ArenaInternal* next = _upb_Arena_PointerFromTagged(poc); UPB_TSAN_CHECK_PUBLISHED(next); UPB_ASSERT(ai != next); - uintptr_t next_poc = - upb_Atomic_Load(&next->parent_or_count, memory_order_acquire); + poc = upb_Atomic_Load(&next->parent_or_count, memory_order_acquire); - if (_upb_Arena_IsTaggedPointer(next_poc)) { + if (_upb_Arena_IsTaggedPointer(poc)) { // To keep complexity down, we lazily collapse levels of the tree. This // keeps it flat in the final case, but doesn't cost much incrementally. // // Path splitting keeps time complexity down, see: // https://en.wikipedia.org/wiki/Disjoint-set_data_structure - UPB_ASSERT(ai != _upb_Arena_PointerFromTagged(next_poc)); - upb_Atomic_Store(&ai->parent_or_count, next_poc, memory_order_release); + UPB_ASSERT(ai != _upb_Arena_PointerFromTagged(poc)); + upb_Atomic_Store(&ai->parent_or_count, poc, memory_order_release); } ai = next; - poc = next_poc; - } + } while (_upb_Arena_IsTaggedPointer(poc)); return (upb_ArenaRoot){.root = ai, .tagged_count = poc}; } @@ -3281,6 +3292,8 @@ static void _upb_Arena_DoFree(upb_ArenaInternal* ai) { void upb_Arena_Free(upb_Arena* a) { upb_ArenaInternal* ai = upb_Arena_Internal(a); + // Cannot be replaced with _upb_Arena_FindRoot, as that provides only a + // relaxed read of the refcount if ai is already the root. uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); retry: while (_upb_Arena_IsTaggedPointer(poc)) { @@ -3484,7 +3497,13 @@ retry: &r.root->parent_or_count, &r.tagged_count, _upb_Arena_TaggedFromRefcount( _upb_Arena_RefCountFromTagged(r.tagged_count) + 1), - memory_order_release, memory_order_acquire)) { + // Relaxed order is safe on success, incrementing the refcount + // need not perform any synchronization with the eventual free of the + // arena - that's provided by decrements. + memory_order_relaxed, + // Relaxed order is safe on failure as r.tagged_count is immediately + // overwritten by retrying the find root operation. + memory_order_relaxed)) { // We incremented it successfully, so we are done. return true; } diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index accb0a3a06..f3a7344aee 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -547,7 +547,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, * to be freed. However the Arena does allow users to register cleanup * functions that will run when the arena is destroyed. * - * A upb_Arena is *not* thread-safe. + * A upb_Arena is *not* thread-safe, although some functions related to its + * managing its lifetime are, and are documented as such. * * You could write a thread-safe arena allocator that satisfies the * upb_alloc interface, but it would not be as efficient for the @@ -773,6 +774,7 @@ void upb_Arena_DecRefFor(const upb_Arena* a, const void* owner); // This operation is safe to use concurrently from multiple threads. size_t upb_Arena_SpaceAllocated(upb_Arena* a, size_t* fused_count); +// This operation is safe to use concurrently from multiple threads. uint32_t upb_Arena_DebugRefCount(upb_Arena* a); UPB_API_INLINE upb_Arena* upb_Arena_New(void) { @@ -791,6 +793,7 @@ UPB_API_INLINE void* upb_Arena_Realloc(upb_Arena* a, void* ptr, size_t oldsize, // // This API is meant for experimentation only. It will likely be removed in // the future. +// This operation is safe to use concurrently from multiple threads. void upb_Arena_SetMaxBlockSize(size_t max); // Shrinks the last alloc from arena. diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index ef35eb99c9..f0e65071b2 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -3065,28 +3065,39 @@ void upb_Arena_LogFree(const upb_Arena* arena) { } #endif // UPB_TRACING_ENABLED +// If the param a is already the root, provides no memory order of refcount. +// If it has a parent, then acquire memory order is provided for both the root +// and the refcount. static upb_ArenaRoot _upb_Arena_FindRoot(const upb_Arena* a) { upb_ArenaInternal* ai = upb_Arena_Internal(a); - uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); - while (_upb_Arena_IsTaggedPointer(poc)) { + uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_relaxed); + if (_upb_Arena_IsTaggedRefcount(poc)) { + // Fast, relaxed path - arenas that have never been fused to a parent only + // need relaxed memory order, since they're returning themselves and the + // refcount. + return (upb_ArenaRoot){.root = ai, .tagged_count = poc}; + } + // Slow path needs acquire order; reloading is cheaper than a fence on ARM + // (LDA vs DMB ISH). Even though this is a reread, we know it must be a tagged + // pointer because if this Arena isn't a root, it can't ever become one. + poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); + do { upb_ArenaInternal* next = _upb_Arena_PointerFromTagged(poc); UPB_TSAN_CHECK_PUBLISHED(next); UPB_ASSERT(ai != next); - uintptr_t next_poc = - upb_Atomic_Load(&next->parent_or_count, memory_order_acquire); + poc = upb_Atomic_Load(&next->parent_or_count, memory_order_acquire); - if (_upb_Arena_IsTaggedPointer(next_poc)) { + if (_upb_Arena_IsTaggedPointer(poc)) { // To keep complexity down, we lazily collapse levels of the tree. This // keeps it flat in the final case, but doesn't cost much incrementally. // // Path splitting keeps time complexity down, see: // https://en.wikipedia.org/wiki/Disjoint-set_data_structure - UPB_ASSERT(ai != _upb_Arena_PointerFromTagged(next_poc)); - upb_Atomic_Store(&ai->parent_or_count, next_poc, memory_order_release); + UPB_ASSERT(ai != _upb_Arena_PointerFromTagged(poc)); + upb_Atomic_Store(&ai->parent_or_count, poc, memory_order_release); } ai = next; - poc = next_poc; - } + } while (_upb_Arena_IsTaggedPointer(poc)); return (upb_ArenaRoot){.root = ai, .tagged_count = poc}; } @@ -3281,6 +3292,8 @@ static void _upb_Arena_DoFree(upb_ArenaInternal* ai) { void upb_Arena_Free(upb_Arena* a) { upb_ArenaInternal* ai = upb_Arena_Internal(a); + // Cannot be replaced with _upb_Arena_FindRoot, as that provides only a + // relaxed read of the refcount if ai is already the root. uintptr_t poc = upb_Atomic_Load(&ai->parent_or_count, memory_order_acquire); retry: while (_upb_Arena_IsTaggedPointer(poc)) { @@ -3484,7 +3497,13 @@ retry: &r.root->parent_or_count, &r.tagged_count, _upb_Arena_TaggedFromRefcount( _upb_Arena_RefCountFromTagged(r.tagged_count) + 1), - memory_order_release, memory_order_acquire)) { + // Relaxed order is safe on success, incrementing the refcount + // need not perform any synchronization with the eventual free of the + // arena - that's provided by decrements. + memory_order_relaxed, + // Relaxed order is safe on failure as r.tagged_count is immediately + // overwritten by retrying the find root operation. + memory_order_relaxed)) { // We incremented it successfully, so we are done. return true; } diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index ca980f3d78..4baac53f0e 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -549,7 +549,8 @@ void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt, * to be freed. However the Arena does allow users to register cleanup * functions that will run when the arena is destroyed. * - * A upb_Arena is *not* thread-safe. + * A upb_Arena is *not* thread-safe, although some functions related to its + * managing its lifetime are, and are documented as such. * * You could write a thread-safe arena allocator that satisfies the * upb_alloc interface, but it would not be as efficient for the @@ -775,6 +776,7 @@ void upb_Arena_DecRefFor(const upb_Arena* a, const void* owner); // This operation is safe to use concurrently from multiple threads. size_t upb_Arena_SpaceAllocated(upb_Arena* a, size_t* fused_count); +// This operation is safe to use concurrently from multiple threads. uint32_t upb_Arena_DebugRefCount(upb_Arena* a); UPB_API_INLINE upb_Arena* upb_Arena_New(void) { @@ -793,6 +795,7 @@ UPB_API_INLINE void* upb_Arena_Realloc(upb_Arena* a, void* ptr, size_t oldsize, // // This API is meant for experimentation only. It will likely be removed in // the future. +// This operation is safe to use concurrently from multiple threads. void upb_Arena_SetMaxBlockSize(size_t max); // Shrinks the last alloc from arena.