Allow fuse/fuse races, so that upb_Arena is fully thread-compatible.

Previously upb_Arena was not thread-compatible when `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` executed in parallel if `b` and `c` were previously fused.  This CL fixed that by allowing `upb_Arena_Fuse()` to run in parallel without limitations.

Details on the design of the algorithm are captured in comments.

The CL slightly improves the performance of `upb_Arena_Fuse()`.

```
name                                           old cpu/op   new cpu/op   delta
BM_ArenaOneAlloc                                 20.0ns ±19%  17.5ns ± 4%  -12.30%  (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc                     6.65ns ± 4%  5.17ns ± 3%  -22.23%  (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2                         69.1ns ± 7%  68.5ns ± 4%     ~     (p=0.327 n=18+19)
BM_ArenaFuseUnbalanced/8                          542ns ± 3%   513ns ± 4%   -5.25%  (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64                        5.04µs ± 8%  4.74µs ± 4%   -5.93%  (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128                       10.1µs ± 4%   9.6µs ± 4%   -4.80%  (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2                           71.8ns ± 7%  68.4ns ± 6%   -4.75%  (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8                            541ns ± 3%   519ns ± 3%   -4.21%  (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64                          5.00µs ± 7%  4.86µs ± 4%   -2.78%  (p=0.003 n=17+18)
BM_ArenaFuseBalanced/128                         10.0µs ± 4%   9.7µs ± 4%   -2.68%  (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout>               5.52ms ± 2%  5.54ms ± 4%     ~     (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout>             6.18ms ± 3%  6.15ms ± 3%     ~     (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout>            11.8ms ± 7%  11.7ms ± 5%     ~     (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout>          11.9ms ± 3%  11.8ms ± 3%     ~     (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy>            12.2µs ± 4%  12.3µs ± 4%     ~     (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias>           11.3µs ± 6%  11.3µs ± 3%     ~     (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy>           12.1µs ± 4%  12.1µs ± 3%     ~     (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias>          11.1µs ± 4%  11.1µs ± 2%     ~     (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy>         24.2µs ± 3%  25.6µs ±16%     ~     (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy>        11.6µs ± 3%  11.7µs ± 4%     ~     (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>       11.5µs ± 7%  11.4µs ± 4%     ~     (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>    12.8µs ± 5%  13.0µs ±14%     ~     (p=0.782 n=18+17)
BM_SerializeDescriptor_Proto2                    5.69µs ± 5%  5.76µs ± 6%     ~     (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb                       10.2µs ± 4%  10.2µs ± 3%     ~     (p=0.613 n=18+17)

name                                           old time/op             new time/op             delta
BM_ArenaOneAlloc                                 20.0ns ±19%             17.6ns ± 4%  -12.37%        (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc                     6.66ns ± 4%             5.18ns ± 3%  -22.24%        (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2                         69.2ns ± 7%             68.6ns ± 4%     ~           (p=0.343 n=18+19)
BM_ArenaFuseUnbalanced/8                          543ns ± 3%              515ns ± 4%   -5.21%        (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64                        5.05µs ± 8%             4.75µs ± 4%   -5.93%        (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128                       10.1µs ± 4%              9.6µs ± 4%   -4.78%        (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2                           72.0ns ± 7%             68.6ns ± 6%   -4.73%        (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8                            543ns ± 3%              520ns ± 3%   -4.20%        (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64                          5.01µs ± 7%             4.87µs ± 4%   -2.78%        (p=0.004 n=17+18)
BM_ArenaFuseBalanced/128                         10.0µs ± 3%              9.8µs ± 4%   -2.67%        (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout>               5.53ms ± 2%             5.56ms ± 4%     ~           (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout>             6.20ms ± 3%             6.17ms ± 2%     ~           (p=0.424 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout>            11.8ms ± 7%             11.7ms ± 5%     ~           (p=0.297 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout>          11.9ms ± 3%             11.9ms ± 3%     ~           (p=0.351 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy>            12.3µs ± 4%             12.3µs ± 4%     ~           (p=1.000 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias>           11.3µs ± 6%             11.3µs ± 3%     ~           (p=0.845 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy>           12.1µs ± 4%             12.1µs ± 3%     ~           (p=0.542 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias>          11.1µs ± 4%             11.2µs ± 2%     ~           (p=0.330 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy>         24.2µs ± 3%             25.7µs ±17%     ~           (p=0.167 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy>        11.6µs ± 3%             11.7µs ± 3%     ~           (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>       11.5µs ± 7%             11.4µs ± 4%     ~           (p=0.799 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>    12.8µs ± 5%             13.0µs ±14%     ~           (p=0.807 n=18+17)
BM_SerializeDescriptor_Proto2                    5.71µs ± 5%             5.78µs ± 6%     ~           (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb                       10.2µs ± 4%             10.2µs ± 3%     ~           (p=0.613 n=18+17)

name                                           old allocs/op           new allocs/op           delta
BM_ArenaOneAlloc                                   1.00 ± 0%               1.00 ± 0%     ~     (all samples are equal)
BM_ArenaFuseUnbalanced/2                           2.00 ± 0%               2.00 ± 0%     ~     (all samples are equal)
BM_ArenaFuseUnbalanced/8                           8.00 ± 0%               8.00 ± 0%     ~     (all samples are equal)
BM_ArenaFuseUnbalanced/64                          64.0 ± 0%               64.0 ± 0%     ~     (all samples are equal)
BM_ArenaFuseUnbalanced/128                          128 ± 0%                128 ± 0%     ~     (all samples are equal)
BM_ArenaFuseBalanced/2                             2.00 ± 0%               2.00 ± 0%     ~     (all samples are equal)
BM_ArenaFuseBalanced/8                             8.00 ± 0%               8.00 ± 0%     ~     (all samples are equal)
BM_ArenaFuseBalanced/64                            64.0 ± 0%               64.0 ± 0%     ~     (all samples are equal)
BM_ArenaFuseBalanced/128                            128 ± 0%                128 ± 0%     ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout>                6.05k ± 0%              6.05k ± 0%     ~     (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout>              6.36k ± 0%              6.36k ± 0%     ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout>             83.4k ± 0%              83.4k ± 0%     ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout>           84.4k ± 0%              84.4k ± 0%   -0.00%        (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy>              7.00 ± 0%               7.00 ± 0%     ~     (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias>             7.00 ± 0%               7.00 ± 0%     ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy>            765 ± 0%                765 ± 0%     ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy>          8.00 ± 0%               8.00 ± 0%     ~     (all samples are equal)

name                                           old peak-mem(Bytes)/op  new peak-mem(Bytes)/op  delta
BM_ArenaOneAlloc                                    336 ± 0%                328 ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/2                            672 ± 0%                656 ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/8                          2.69k ± 0%              2.62k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/64                         21.5k ± 0%              21.0k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/128                        43.0k ± 0%              42.0k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseBalanced/2                              672 ± 0%                656 ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseBalanced/8                            2.69k ± 0%              2.62k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseBalanced/64                           21.5k ± 0%              21.0k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_ArenaFuseBalanced/128                          43.0k ± 0%              42.0k ± 0%   -2.38%        (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<NoLayout>                10.0M ± 0%               9.9M ± 0%   -0.05%        (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout>              10.0M ± 0%              10.0M ± 0%   -0.05%        (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout>             6.62M ± 0%              6.62M ± 0%     ~     (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout>           6.66M ± 0%              6.66M ± 0%   -0.01%        (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy>             36.5k ± 0%              36.5k ± 0%   -0.02%        (p=0.000 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Alias>            36.5k ± 0%              36.5k ± 0%   -0.02%        (p=0.000 n=20+20)
BM_Parse_Proto2<FileDesc, NoArena, Copy>          35.8k ± 0%              35.8k ± 0%     ~     (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy>         65.3k ± 0%              65.3k ± 0%     ~     (all samples are equal)

name                                           old speed               new speed               delta
BM_LoadAdsDescriptor_Upb<NoLayout>              137MB/s ± 2%            137MB/s ± 4%     ~           (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout>            122MB/s ± 3%            123MB/s ± 3%     ~           (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout>          64.2MB/s ± 7%           64.7MB/s ± 5%     ~           (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout>        63.6MB/s ± 3%           63.9MB/s ± 3%     ~           (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy>           614MB/s ± 4%            613MB/s ± 4%     ~           (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias>          665MB/s ± 6%            667MB/s ± 3%     ~           (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy>          624MB/s ± 4%            622MB/s ± 3%     ~           (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias>         681MB/s ± 4%            675MB/s ± 2%     ~           (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy>        311MB/s ± 3%            296MB/s ±15%     ~           (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy>       649MB/s ± 3%            644MB/s ± 3%     ~           (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy>      656MB/s ± 7%            659MB/s ± 4%     ~           (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias>   587MB/s ± 5%            576MB/s ±16%     ~           (p=0.584 n=18+18)
BM_SerializeDescriptor_Proto2                  1.32GB/s ± 5%           1.31GB/s ± 7%     ~           (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb                      737MB/s ± 4%            737MB/s ± 7%     ~           (p=0.839 n=18+18)
```

PiperOrigin-RevId: 520452349
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 3b0c9261c6
commit c642e43a5a
  1. 190
      upb/mem/arena.c
  2. 6
      upb/mem/arena_internal.h
  3. 7
      upb/mem/arena_test.cc
  4. 10
      upb/port/atomic.h

@ -31,11 +31,6 @@
// Must be last.
#include "upb/port/def.inc"
static uintptr_t upb_cleanup_metadata(uint32_t* cleanup,
bool has_initial_block) {
return (uintptr_t)cleanup | has_initial_block;
}
struct _upb_MemBlock {
// Atomic only for the benefit of SpaceAllocated().
UPB_ATOMIC(_upb_MemBlock*) next;
@ -46,10 +41,16 @@ struct _upb_MemBlock {
static const size_t memblock_reserve =
UPB_ALIGN_UP(sizeof(_upb_MemBlock), UPB_MALLOC_ALIGN);
static upb_Arena* _upb_Arena_FindRoot(upb_Arena* a) {
typedef struct _upb_ArenaRoot {
upb_Arena* root;
uintptr_t tagged_count;
} _upb_ArenaRoot;
static _upb_ArenaRoot _upb_Arena_FindRoot(upb_Arena* a) {
uintptr_t poc = upb_Atomic_Load(&a->parent_or_count, memory_order_acquire);
while (_upb_Arena_IsTaggedPointer(poc)) {
upb_Arena* next = _upb_Arena_PointerFromTagged(poc);
UPB_ASSERT(a != next);
uintptr_t next_poc =
upb_Atomic_Load(&next->parent_or_count, memory_order_acquire);
@ -73,20 +74,22 @@ static upb_Arena* _upb_Arena_FindRoot(upb_Arena* a) {
// further away over time, but the path towards that root will continue to
// be valid and the creation of the path carries all the memory orderings
// required.
UPB_ASSERT(a != _upb_Arena_PointerFromTagged(next_poc));
upb_Atomic_Store(&a->parent_or_count, next_poc, memory_order_relaxed);
}
a = next;
poc = next_poc;
}
return a;
return (_upb_ArenaRoot){.root = a, .tagged_count = poc};
}
size_t upb_Arena_SpaceAllocated(upb_Arena* arena) {
arena = _upb_Arena_FindRoot(arena);
arena = _upb_Arena_FindRoot(arena).root;
size_t memsize = 0;
while (arena != NULL) {
_upb_MemBlock* block = arena->blocks;
_upb_MemBlock* block =
upb_Atomic_Load(&arena->blocks, memory_order_relaxed);
while (block != NULL) {
memsize += sizeof(_upb_MemBlock) + block->size;
block = upb_Atomic_Load(&block->next, memory_order_relaxed);
@ -112,9 +115,9 @@ static void upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t size) {
_upb_MemBlock* block = ptr;
// Insert into linked list.
upb_Atomic_Init(&block->next, a->blocks);
block->size = (uint32_t)size;
upb_Atomic_Store(&a->blocks, block, memory_order_relaxed);
upb_Atomic_Init(&block->next, a->blocks);
upb_Atomic_Store(&a->blocks, block, memory_order_release);
a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
a->head.end = UPB_PTR_AT(block, size, char);
@ -124,7 +127,7 @@ static void upb_Arena_AddBlock(upb_Arena* a, void* ptr, size_t size) {
static bool upb_Arena_AllocBlock(upb_Arena* a, size_t size) {
if (!a->block_alloc) return false;
_upb_MemBlock* last_block = upb_Atomic_Load(&a->blocks, memory_order_relaxed);
_upb_MemBlock* last_block = upb_Atomic_Load(&a->blocks, memory_order_acquire);
size_t last_size = last_block != NULL ? last_block->size : 128;
size_t block_size = UPB_MAX(size, last_size * 2) + memblock_reserve;
_upb_MemBlock* block = upb_malloc(upb_Arena_BlockAlloc(a), block_size);
@ -207,11 +210,11 @@ static void arena_dofree(upb_Arena* a) {
upb_Arena* next_arena =
(upb_Arena*)upb_Atomic_Load(&a->next, memory_order_acquire);
upb_alloc* block_alloc = upb_Arena_BlockAlloc(a);
_upb_MemBlock* block = upb_Atomic_Load(&a->blocks, memory_order_relaxed);
_upb_MemBlock* block = upb_Atomic_Load(&a->blocks, memory_order_acquire);
while (block != NULL) {
// Load first since we are deleting block.
_upb_MemBlock* next_block =
upb_Atomic_Load(&block->next, memory_order_relaxed);
upb_Atomic_Load(&block->next, memory_order_acquire);
upb_free(block_alloc, block);
block = next_block;
}
@ -235,7 +238,7 @@ retry:
return;
}
if (upb_Atomic_CompareExchangeStrong(
if (upb_Atomic_CompareExchangeWeak(
&a->parent_or_count, &poc,
_upb_Arena_TaggedFromRefcount(_upb_Arena_RefCountFromTagged(poc) - 1),
memory_order_release, memory_order_acquire)) {
@ -248,10 +251,9 @@ retry:
goto retry;
}
bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
// SAFE IN THE PRESENCE OF FUSE/FREE RACES BUT NOT IN THE
// PRESENCE OF FUSE/FUSE RACES!!!
//
#define kUpb_RefDelta_CannotFuse -1
upb_Arena* upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2, intptr_t* ref_delta) {
// `parent_or_count` has two disctint modes
// - parent pointer mode
// - refcount mode
@ -259,86 +261,104 @@ bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
// In parent pointer mode, it may change what pointer it refers to in the
// tree, but it will always approach a root. Any operation that walks the
// tree to the root may collapse levels of the tree concurrently.
//
// In refcount mode, any free operation may lower the refcount.
//
// Only a fuse operation may increase the refcount.
// Only a fuse operation may switch `parent_or_count` from parent mode to
// refcount mode.
//
// Given that we do not allow fuse/fuse races, we may rely on the invariant
// that only refcounts can change once we have found the root. Because the
// threads doing the fuse must hold references, we can guarantee that no
// refcounts will reach zero concurrently.
_upb_ArenaRoot r1 = _upb_Arena_FindRoot(a1);
_upb_ArenaRoot r2 = _upb_Arena_FindRoot(a2);
upb_Arena* r1 = _upb_Arena_FindRoot(a1);
upb_Arena* r2 = _upb_Arena_FindRoot(a2);
if (r1 == r2) return true; // Already fused.
if (r1.root == r2.root) return r1.root; // Already fused.
// Do not fuse initial blocks since we cannot lifetime extend them.
// Any other fuse scenario is allowed.
if (upb_Arena_HasInitialBlock(r1)) return false;
if (upb_Arena_HasInitialBlock(r2)) return false;
uintptr_t r1_poc =
upb_Atomic_Load(&r1->parent_or_count, memory_order_acquire);
uintptr_t r2_poc =
upb_Atomic_Load(&r2->parent_or_count, memory_order_acquire);
UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r1_poc));
UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r2_poc));
// Keep the tree shallow by joining the smaller tree to the larger.
if (_upb_Arena_RefCountFromTagged(r1_poc) <
_upb_Arena_RefCountFromTagged(r2_poc)) {
upb_Arena* tmp = r1;
if (upb_Arena_HasInitialBlock(r1.root) ||
upb_Arena_HasInitialBlock(r2.root)) {
*ref_delta = kUpb_RefDelta_CannotFuse;
return NULL;
}
// Avoid cycles by always fusing into the root with the lower address.
if ((uintptr_t)r1.root > (uintptr_t)r2.root) {
_upb_ArenaRoot tmp = r1;
r1 = r2;
r2 = tmp;
uintptr_t tmp_poc = r1_poc;
r1_poc = r2_poc;
r2_poc = tmp_poc;
}
// The moment we install `r1` as the parent for `r2` all racing frees may
// immediately begin decrementing `r1`'s refcount. So we must install all the
// refcounts that we know about first to prevent a premature unref to zero.
uint32_t r2_refcount = _upb_Arena_RefCountFromTagged(r2_poc);
upb_Atomic_Add(&r1->parent_or_count, ((uintptr_t)r2_refcount) << 1,
memory_order_release);
// When installing `r1` as the parent for `r2` racing frees may have changed
// the refcount for `r2` so we need to capture the old value to fix up `r1`'s
// refcount based on the delta from what we saw the first time.
r2_poc = upb_Atomic_Exchange(&r2->parent_or_count,
_upb_Arena_TaggedFromPointer(r1),
memory_order_acq_rel);
UPB_ASSERT(_upb_Arena_IsTaggedRefcount(r2_poc));
uint32_t delta_refcount = r2_refcount - _upb_Arena_RefCountFromTagged(r2_poc);
if (delta_refcount != 0) {
upb_Atomic_Sub(&r1->parent_or_count, ((uintptr_t)delta_refcount) << 1,
memory_order_release);
// immediately begin decrementing `r1`'s refcount (including pending
// increments to that refcount and their frees!). We need to add `r2`'s refs
// now, so that `r1` can withstand any unrefs that come from r2.
//
// Note that while it is possible for `r2`'s refcount to increase
// asynchronously, we will not actually do the reparenting operation below
// unless `r2`'s refcount is unchanged from when we read it.
//
// Note that we may have done this previously, either to this node or a
// different node, during a previous and failed DoFuse() attempt. But we will
// not lose track of these refs because we always add them to our overall
// delta.
uintptr_t r2_untagged_count = r2.tagged_count & ~1;
uintptr_t with_r2_refs = r1.tagged_count + r2_untagged_count;
if (!upb_Atomic_CompareExchangeStrong(
&r1.root->parent_or_count, &r1.tagged_count, with_r2_refs,
memory_order_release, memory_order_acquire)) {
return NULL;
}
// Now append r2's linked list of arenas to r1's.
upb_Arena* r2_tail = upb_Atomic_Load(&r2->tail, memory_order_relaxed);
upb_Arena* r1_tail = upb_Atomic_Load(&r1->tail, memory_order_relaxed);
upb_Arena* r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
while (r1_next != NULL) {
// r1->tail was stale. This can happen, but tail should always converge on
// the true tail.
r1_tail = r1_next;
r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
// Perform the actual fuse by removing the refs from `r2` and swapping in the
// parent pointer.
if (upb_Atomic_CompareExchangeWeak(
&r2.root->parent_or_count, &r2.tagged_count,
_upb_Arena_TaggedFromPointer(r1.root), memory_order_release,
memory_order_acquire)) {
} else {
// We'll need to remove the excess refs we added to r1 previously.
*ref_delta -= r2_untagged_count;
return NULL;
}
upb_Arena* old_next =
upb_Atomic_Exchange(&r1_tail->next, r2, memory_order_relaxed);
// Now that the fuse has been performed (and can no longer fail) we need to
// append `r2` to `r1`'s linked list. Find the region for `r2`'s linked list.
upb_Arena* r1_tail = upb_Atomic_Load(&r1.root->tail, memory_order_relaxed);
while (true) {
upb_Arena* r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
while (r1_next != NULL) {
// r1->tail was stale. This can happen, but tail should always converge
// on the true tail.
r1_tail = r1_next;
r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
}
if (upb_Atomic_CompareExchangeStrong(&r1_tail->next, &r1_next, r2.root,
memory_order_relaxed,
memory_order_relaxed)) {
break;
}
}
// Once fuse/fuse races are allowed, it will need to be a CAS instead that
// handles this mismatch gracefully.
UPB_ASSERT(old_next == NULL);
upb_Arena* r2_tail = upb_Atomic_Load(&r2.root->tail, memory_order_relaxed);
upb_Atomic_Store(&r1.root->tail, r2_tail, memory_order_relaxed);
return r1.root;
}
upb_Atomic_Store(&r1->tail, r2_tail, memory_order_relaxed);
bool upb_Arena_FixupRefs(upb_Arena* new_root, intptr_t ref_delta) {
if (ref_delta == 0) return true; // No fixup required.
uintptr_t poc =
upb_Atomic_Load(&new_root->parent_or_count, memory_order_relaxed);
if (_upb_Arena_IsTaggedPointer(poc)) return false;
uintptr_t with_refs = poc + ref_delta;
UPB_ASSERT(!_upb_Arena_IsTaggedPointer(with_refs));
return upb_Atomic_CompareExchangeStrong(&new_root->parent_or_count, &poc,
with_refs, memory_order_relaxed,
memory_order_relaxed);
}
return true;
bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
// The number of refs we ultimately need to transfer to the new root.
intptr_t ref_delta = 0;
while (true) {
upb_Arena* new_root = upb_Arena_DoFuse(a1, a2, &ref_delta);
if (new_root != NULL) {
if (upb_Arena_FixupRefs(new_root, ref_delta)) return true;
} else {
if (ref_delta == kUpb_RefDelta_CannotFuse) return false;
}
}
}

@ -72,13 +72,13 @@ UPB_INLINE bool _upb_Arena_IsTaggedPointer(uintptr_t parent_or_count) {
return (parent_or_count & 1) == 0;
}
UPB_INLINE uint32_t _upb_Arena_RefCountFromTagged(uintptr_t parent_or_count) {
UPB_INLINE uintptr_t _upb_Arena_RefCountFromTagged(uintptr_t parent_or_count) {
UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count));
return parent_or_count >> 1;
}
UPB_INLINE uintptr_t _upb_Arena_TaggedFromRefcount(uint32_t refcount) {
uintptr_t parent_or_count = (((uintptr_t)refcount) << 1) | 1;
UPB_INLINE uintptr_t _upb_Arena_TaggedFromRefcount(uintptr_t refcount) {
uintptr_t parent_or_count = (refcount << 1) | 1;
UPB_ASSERT(_upb_Arena_IsTaggedRefcount(parent_or_count));
return parent_or_count;
}

@ -139,8 +139,7 @@ TEST(ArenaTest, FuzzFuseFreeRace) {
for (auto& t : threads) t.join();
}
// Disabled because this operation is currently unsupported.
TEST(ArenaTest, DISABLED_FuzzFuseFuseRace) {
TEST(ArenaTest, FuzzFuseFuseRace) {
Environment env;
absl::Notification done;
@ -149,7 +148,7 @@ TEST(ArenaTest, DISABLED_FuzzFuseFuseRace) {
threads.emplace_back([&]() {
absl::BitGen gen;
while (!done.HasBeenNotified()) {
env.RandomPoke(gen);
env.RandomFuse(gen);
}
});
}
@ -157,7 +156,7 @@ TEST(ArenaTest, DISABLED_FuzzFuseFuseRace) {
absl::BitGen gen;
auto end = absl::Now() + absl::Seconds(2);
while (absl::Now() < end) {
env.RandomPoke(gen);
env.RandomFuse(gen);
}
done.Notify();
for (auto& t : threads) t.join();

@ -49,6 +49,10 @@
success_order, failure_order) \
atomic_compare_exchange_strong_explicit(addr, expected, desired, \
success_order, failure_order)
#define upb_Atomic_CompareExchangeWeak(addr, expected, desired, success_order, \
failure_order) \
atomic_compare_exchange_weak_explicit(addr, expected, desired, \
success_order, failure_order)
#else // !UPB_USE_C11_ATOMICS
@ -108,6 +112,12 @@ UPB_INLINE bool _upb_NonAtomic_CompareExchangeStrongP(upb_Arena** addr,
uintptr_t: _upb_NonAtomic_CompareExchangeStrongU, \
upb_Arena *: _upb_NonAtomic_CompareExchangeStrongP)(addr, expected, \
desired)
#define upb_Atomic_CompareExchangeWeak(addr, expected, desired, success_order, \
failure_order) \
_Generic((desired), \
uintptr_t: _upb_NonAtomic_CompareExchangeStrongU, \
upb_Arena *: _upb_NonAtomic_CompareExchangeStrongP)(addr, expected, \
desired)
#endif

Loading…
Cancel
Save