diff --git a/BUILD b/BUILD index 9514854453..9c3aed78eb 100644 --- a/BUILD +++ b/BUILD @@ -104,6 +104,7 @@ package_group( cc_library( name = "port", hdrs = [ + "upb/port/atomic.h", "upb/port/vsnprintf_compat.h", ], copts = UPB_DEFAULT_COPTS, diff --git a/upb/mem/arena.c b/upb/mem/arena.c index 23124a6425..4161cb06dd 100644 --- a/upb/mem/arena.c +++ b/upb/mem/arena.c @@ -26,6 +26,7 @@ */ #include "upb/mem/arena_internal.h" +#include "upb/port/atomic.h" // Must be last. #include "upb/port/def.inc" @@ -58,19 +59,42 @@ typedef struct cleanup_ent { static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(_upb_MemBlock), UPB_MALLOC_ALIGN); -static upb_Arena* arena_findroot(upb_Arena* a) { - /* Path splitting keeps time complexity down, see: - * https://en.wikipedia.org/wiki/Disjoint-set_data_structure */ - while (a->parent != a) { - upb_Arena* next = a->parent; - a->parent = next->parent; +static upb_Arena* _upb_Arena_FindRoot(upb_Arena* a) { + uintptr_t poc = upb_Atomic_LoadAcquire(&a->parent_or_count); + while (_upb_Arena_IsTaggedPointer(poc)) { + upb_Arena* next = _upb_Arena_PointerFromTagged(poc); + uintptr_t next_poc = upb_Atomic_LoadAcquire(&next->parent_or_count); + + if (_upb_Arena_IsTaggedPointer(next_poc)) { + // To keep complexity down, we lazily collapse levels of the tree. This + // keeps it flat in the final case, but doesn't cost much incrementally. + // + // Path splitting keeps time complexity down, see: + // https://en.wikipedia.org/wiki/Disjoint-set_data_structure + // + // We can safely use a relaxed atomic here because all threads doing this + // will converge on the same value and we don't need memory orderings to + // be visible. + // + // This is true because: + // - If no fuses occur, this will eventually become the root. + // - If fuses are actively occuring, the root may change, but the + // invariant is that `parent_or_count` merely points to *a* parent. + // + // In other words, it is moving towards "the" root, and that root may move + // further away over time, but the path towards that root will continue to + // be valid and the creation of the path carries all the memory orderings + // required. + upb_Atomic_StoreRelaxed(&a->parent_or_count, next_poc); + } a = next; + poc = next_poc; } return a; } size_t upb_Arena_SpaceAllocated(upb_Arena* arena) { - arena = arena_findroot(arena); + arena = _upb_Arena_FindRoot(arena); size_t memsize = 0; _upb_MemBlock* block = arena->freelist; @@ -83,8 +107,15 @@ size_t upb_Arena_SpaceAllocated(upb_Arena* arena) { return memsize; } -uint32_t upb_Arena_DebugRefCount(upb_Arena* arena) { - return arena_findroot(arena)->refcount; +uint32_t upb_Arena_DebugRefCount(upb_Arena* a) { + // These loads could probably be relaxed, but given that this is debug-only, + // it's not worth introducing a new variant for it. + uintptr_t poc = upb_Atomic_LoadAcquire(&a->parent_or_count); + while (_upb_Arena_IsTaggedPointer(poc)) { + a = _upb_Arena_PointerFromTagged(poc); + poc = upb_Atomic_LoadAcquire(&a->parent_or_count); + } + return _upb_Arena_RefCountFromTagged(poc); } static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr, @@ -108,7 +139,7 @@ static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr, } static bool upb_Arena_Allocblock(upb_Arena* a, size_t size) { - upb_Arena* root = arena_findroot(a); + upb_Arena* root = _upb_Arena_FindRoot(a); size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve; _upb_MemBlock* block = upb_malloc(root->block_alloc, block_size); @@ -139,8 +170,7 @@ static upb_Arena* arena_initslow(void* mem, size_t n, upb_alloc* alloc) { n -= sizeof(*a); a->block_alloc = alloc; - a->parent = a; - a->refcount = 1; + upb_Atomic_Init(&a->parent_or_count, _upb_Arena_TaggedFromRefcount(1)); a->freelist = NULL; a->freelist_tail = NULL; a->cleanup_metadata = upb_cleanup_metadata(NULL, false); @@ -172,8 +202,7 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) { a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena); a->block_alloc = alloc; - a->parent = a; - a->refcount = 1; + upb_Atomic_Init(&a->parent_or_count, _upb_Arena_TaggedFromRefcount(1)); a->last_size = UPB_MAX(128, n); a->head.ptr = mem; a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); @@ -186,8 +215,7 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) { static void arena_dofree(upb_Arena* a) { _upb_MemBlock* block = a->freelist; - UPB_ASSERT(a->parent == a); - UPB_ASSERT(a->refcount == 0); + UPB_ASSERT(_upb_Arena_RefCountFromTagged(a->parent_or_count) == 1); while (block) { /* Load first since we are deleting block. */ @@ -208,8 +236,32 @@ static void arena_dofree(upb_Arena* a) { } void upb_Arena_Free(upb_Arena* a) { - a = arena_findroot(a); - if (--a->refcount == 0) arena_dofree(a); + uintptr_t poc = upb_Atomic_LoadAcquire(&a->parent_or_count); +retry: + while (_upb_Arena_IsTaggedPointer(poc)) { + a = _upb_Arena_PointerFromTagged(poc); + poc = upb_Atomic_LoadAcquire(&a->parent_or_count); + } + + // compare_exchange or fetch_sub are RMW operations, which are more + // expensive then direct loads. As an optimization, we only do RMW ops + // when we need to update things for other threads to see. + if (poc == _upb_Arena_TaggedFromRefcount(1)) { + arena_dofree(a); + return; + } + + if (upb_Atomic_CompareExchangeStrongAcqRel( + &a->parent_or_count, &poc, + _upb_Arena_TaggedFromRefcount(_upb_Arena_RefCountFromTagged(poc) - + 1))) { + // We were >1 and we decremented it successfully, so we are done. + return; + } + + // We failed our update, so someone has done something, retry the whole + // process, but the failed exchange reloaded `poc` for us. + goto retry; } bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func) { @@ -234,33 +286,80 @@ bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func) { } bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) { - upb_Arena* r1 = arena_findroot(a1); - upb_Arena* r2 = arena_findroot(a2); - - if (r1 == r2) return true; /* Already fused. */ - - /* Do not fuse initial blocks since we cannot lifetime extend them. */ + // SAFE IN THE PRESENCE OF FUSE/FREE RACES BUT NOT IN THE + // PRESENCE OF FUSE/FUSE RACES!!! + // + // `parent_or_count` has two disctint modes + // - parent pointer mode + // - refcount mode + // + // In parent pointer mode, it may change what pointer it refers to in the + // tree, but it will always approach a root. Any operation that walks the + // tree to the root may collapse levels of the tree concurrently. + // + // In refcount mode, any free operation may lower the refcount. + // + // Only a fuse operation may increase the refcount. + // Only a fuse operation may switch `parent_or_count` from parent mode to + // refcount mode. + // + // Given that we do not allow fuse/fuse races, we may rely on the invariant + // that only refcounts can change once we have found the root. Because the + // threads doing the fuse must hold references, we can guarantee that no + // refcounts will reach zero concurrently. + upb_Arena* r1 = _upb_Arena_FindRoot(a1); + upb_Arena* r2 = _upb_Arena_FindRoot(a2); + + if (r1 == r2) return true; // Already fused. + + // Do not fuse initial blocks since we cannot lifetime extend them. if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false; if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false; - /* Only allow fuse with a common allocator */ + // Only allow fuse with a common allocator if (r1->block_alloc != r2->block_alloc) return false; - /* We want to join the smaller tree to the larger tree. - * So swap first if they are backwards. */ - if (r1->refcount < r2->refcount) { + uintptr_t r1_poc = upb_Atomic_LoadAcquire(&r1->parent_or_count); + uintptr_t r2_poc = upb_Atomic_LoadAcquire(&r2->parent_or_count); + UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r1_poc)); + UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r2_poc)); + + // Keep the tree shallow by joining the smaller tree to the larger. + if (_upb_Arena_RefCountFromTagged(r1_poc) < + _upb_Arena_RefCountFromTagged(r2_poc)) { upb_Arena* tmp = r1; r1 = r2; r2 = tmp; + + uintptr_t tmp_poc = r1_poc; + r1_poc = r2_poc; + r2_poc = tmp_poc; } - /* r1 takes over r2's freelist and refcount. */ - r1->refcount += r2->refcount; + // r1 takes over r2's freelist, this must happen before we update + // refcounts since the refcount carriers the memory dependencies. if (r2->freelist_tail) { UPB_ASSERT(r2->freelist_tail->next == NULL); r2->freelist_tail->next = r1->freelist; r1->freelist = r2->freelist; } - r2->parent = r1; + + // The moment we install `r1` as the parent for `r2` all racing frees may + // immediately begin decrementing `r1`'s refcount. So we must install all the + // refcounts that we know about first to prevent a premature unref to zero. + uint32_t r2_refcount = _upb_Arena_RefCountFromTagged(r2_poc); + upb_Atomic_AddRelease(&r1->parent_or_count, ((uintptr_t)r2_refcount) << 1); + + // When installing `r1` as the parent for `r2` racing frees may have changed + // the refcount for `r2` so we need to capture the old value to fix up `r1`'s + // refcount based on the delta from what we saw the first time. + r2_poc = upb_Atomic_ExchangeAcqRel(&r2->parent_or_count, + _upb_Arena_TaggedFromPointer(r1)); + UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r2_poc)); + uint32_t delta_refcount = r2_refcount - _upb_Arena_RefCountFromTagged(r2_poc); + if (delta_refcount != 0) { + upb_Atomic_SubRelease(&r1->parent_or_count, ((uintptr_t)delta_refcount) + << 1); + } return true; } diff --git a/upb/mem/arena_internal.h b/upb/mem/arena_internal.h index 42b6f60b95..f0ea8f3489 100644 --- a/upb/mem/arena_internal.h +++ b/upb/mem/arena_internal.h @@ -49,21 +49,47 @@ struct upb_Arena { /* When multiple arenas are fused together, each arena points to a parent * arena (root points to itself). The root tracks how many live arenas - * reference it. */ - uint32_t refcount; /* Only used when a->parent == a */ - struct upb_Arena* parent; + * reference it. + * + * The low bit is tagged: + * 0: pointer to parent + * 1: count, left shifted by one + */ + UPB_ATOMIC uintptr_t parent_or_count; /* Linked list of blocks to free/cleanup. */ _upb_MemBlock *freelist, *freelist_tail; }; -#ifdef __cplusplus -extern "C" { -#endif +UPB_INLINE bool _upb_Arena_IsTaggedRefcount(uintptr_t parent_or_count) { + return (parent_or_count & 1) == 1; +} -#ifdef __cplusplus -} /* extern "C" */ -#endif +UPB_INLINE bool _upb_Arena_IsTaggedPointer(uintptr_t parent_or_count) { + return (parent_or_count & 1) == 0; +} + +UPB_INLINE uint32_t _upb_Arena_RefCountFromTagged(uintptr_t parent_or_count) { + UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count)); + return parent_or_count >> 1; +} + +UPB_INLINE uintptr_t _upb_Arena_TaggedFromRefcount(uint32_t refcount) { + uintptr_t parent_or_count = (((uintptr_t)refcount) << 1) | 1; + UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count)); + return parent_or_count; +} + +UPB_INLINE upb_Arena* _upb_Arena_PointerFromTagged(uintptr_t parent_or_count) { + UPB_ASSERT(_upb_Arena_IsTaggedPointer(parent_or_count)); + return (upb_Arena*)parent_or_count; +} + +UPB_INLINE uintptr_t _upb_Arena_TaggedFromPointer(upb_Arena* a) { + uintptr_t parent_or_count = (uintptr_t)a; + UPB_ASSERT(_upb_Arena_IsTaggedPointer(parent_or_count)); + return parent_or_count; +} #include "upb/port/undef.inc" diff --git a/upb/mem/arena_test.cc b/upb/mem/arena_test.cc index 7cd4707746..8355d7babc 100644 --- a/upb/mem/arena_test.cc +++ b/upb/mem/arena_test.cc @@ -136,8 +136,9 @@ TEST(ArenaTest, FuzzSingleThreaded) { } } -// Disabled because this test currently fails. -TEST(ArenaTest, DISABLED_FuzzFuseFreeRace) { +#ifdef UPB_USE_C11_ATOMICS + +TEST(ArenaTest, FuzzFuseFreeRace) { Environment env; absl::Notification done; @@ -184,4 +185,6 @@ TEST(ArenaTest, DISABLED_FuzzFuseFuseRace) { for (auto& t : threads) t.join(); } +#endif + } // namespace diff --git a/upb/port/atomic.h b/upb/port/atomic.h new file mode 100644 index 0000000000..34065750f1 --- /dev/null +++ b/upb/port/atomic.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023, Google LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Google LLC nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UPB_PORT_ATOMIC_H_ +#define UPB_PORT_ATOMIC_H_ + +#include "upb/port/def.inc" + +#ifdef UPB_USE_C11_ATOMICS + +#include +#include + +UPB_INLINE void upb_Atomic_Init(_Atomic uintptr_t* addr, uintptr_t val) { + atomic_init(addr, val); +} + +UPB_INLINE uintptr_t upb_Atomic_LoadAcquire(_Atomic uintptr_t* addr) { + return atomic_load_explicit(addr, memory_order_acquire); +} + +UPB_INLINE void upb_Atomic_StoreRelaxed(_Atomic uintptr_t* addr, + uintptr_t val) { + atomic_store_explicit(addr, val, memory_order_relaxed); +} + +UPB_INLINE void upb_Atomic_AddRelease(_Atomic uintptr_t* addr, uintptr_t val) { + atomic_fetch_add_explicit(addr, val, memory_order_release); +} + +UPB_INLINE void upb_Atomic_SubRelease(_Atomic uintptr_t* addr, uintptr_t val) { + atomic_fetch_sub_explicit(addr, val, memory_order_release); +} + +UPB_INLINE uintptr_t upb_Atomic_ExchangeAcqRel(_Atomic uintptr_t* addr, + uintptr_t val) { + return atomic_exchange_explicit(addr, val, memory_order_acq_rel); +} + +UPB_INLINE bool upb_Atomic_CompareExchangeStrongAcqRel(_Atomic uintptr_t* addr, + uintptr_t* expected, + uintptr_t desired) { + return atomic_compare_exchange_strong_explicit( + addr, expected, desired, memory_order_release, memory_order_acquire); +} + +#else // !UPB_USE_C11_ATOMICS + +UPB_INLINE void upb_Atomic_Init(uintptr_t* addr, uintptr_t val) { *addr = val; } + +UPB_INLINE uintptr_t upb_Atomic_LoadAcquire(uintptr_t* addr) { return *addr; } + +UPB_INLINE void upb_Atomic_StoreRelaxed(uintptr_t* addr, uintptr_t val) { + *addr = val; +} + +UPB_INLINE void upb_Atomic_AddRelease(uintptr_t* addr, uintptr_t val) { + *addr += val; +} + +UPB_INLINE void upb_Atomic_SubRelease(uintptr_t* addr, uintptr_t val) { + *addr -= val; +} + +UPB_INLINE uintptr_t upb_Atomic_ExchangeAcqRel(uintptr_t* addr, uintptr_t val) { + uintptr_t ret = *addr; + *addr = val; + return ret; +} + +UPB_INLINE bool upb_Atomic_CompareExchangeStrongAcqRel(uintptr_t* addr, + uintptr_t* expected, + uintptr_t desired) { + if (*addr == *expected) { + *addr = desired; + return true; + } else { + *expected = *addr; + return false; + } +} + +#endif + +#include "upb/port/undef.inc" + +#endif // UPB_PORT_ATOMIC_H_ diff --git a/upb/port/def.inc b/upb/port/def.inc index 978972ee74..3baa94b199 100644 --- a/upb/port/def.inc +++ b/upb/port/def.inc @@ -190,6 +190,13 @@ #define UPB_LONGJMP(buf, val) longjmp(buf, val) #endif +#ifdef __GNUC__ +#define UPB_USE_C11_ATOMICS +#define UPB_ATOMIC _Atomic +#else +#define UPB_ATOMIC +#endif + /* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */ #define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr)) diff --git a/upb/port/undef.inc b/upb/port/undef.inc index a4782838ae..12e05d22ab 100644 --- a/upb/port/undef.inc +++ b/upb/port/undef.inc @@ -68,3 +68,5 @@ #undef UPB_DESCRIPTOR_UPB_H_FILENAME #undef UPB_DESC #undef UPB_IS_GOOGLE3 +#undef UPB_ATOMIC +#undef UPB_USE_C11_ATOMICS diff --git a/upb/wire/decode.c b/upb/wire/decode.c index fde4e4bbbf..0ee00ec5a0 100644 --- a/upb/wire/decode.c +++ b/upb/wire/decode.c @@ -31,8 +31,10 @@ #include "upb/collections/array_internal.h" #include "upb/collections/map_internal.h" +#include "upb/mem/arena_internal.h" #include "upb/mini_table/common.h" #include "upb/mini_table/enum_internal.h" +#include "upb/port/atomic.h" #include "upb/wire/common_internal.h" #include "upb/wire/decode_internal.h" #include "upb/wire/encode.h" @@ -1305,7 +1307,8 @@ upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg, state.arena.head = arena->head; state.arena.last_size = arena->last_size; state.arena.cleanup_metadata = arena->cleanup_metadata; - state.arena.parent = arena; + upb_Atomic_Init(&state.arena.parent_or_count, + _upb_Arena_TaggedFromPointer(arena)); state.status = kUpb_DecodeStatus_Ok; return upb_Decoder_Decode(&state, buf, msg, l, arena);