Restructure fuse a tiny bit for aesthetics

* make internal functions start with `_`
* make internal functions static
* pull invariant tests to the very top

name                                           old cpu/op   new cpu/op   delta
BM_ArenaOneAlloc                                 17.7ns ± 9%  17.9ns ±10%    ~     (p=0.790 n=16+17)
BM_ArenaInitialBlockOneAlloc                     5.22ns ± 3%  5.63ns ±13%    ~     (p=0.068 n=16+20)
BM_ArenaFuseUnbalanced/2                         68.4ns ± 3%  66.4ns ± 3%  -3.05%  (p=0.000 n=16+16)
BM_ArenaFuseUnbalanced/8                          515ns ± 3%   539ns ±16%    ~     (p=0.496 n=18+20)
BM_ArenaFuseUnbalanced/64                        4.73µs ± 4%  4.71µs ± 4%    ~     (p=0.444 n=16+17)
BM_ArenaFuseUnbalanced/128                       9.67µs ± 5%  9.59µs ± 6%    ~     (p=0.217 n=17+16)
BM_ArenaFuseBalanced/2                           68.3ns ± 4%  66.9ns ± 5%  -2.00%  (p=0.020 n=18+18)
BM_ArenaFuseBalanced/8                            525ns ± 4%   519ns ± 3%    ~     (p=0.128 n=16+16)
BM_ArenaFuseBalanced/64                          5.03µs ±17%  4.77µs ± 6%    ~     (p=0.109 n=20+16)
BM_ArenaFuseBalanced/128                         10.2µs ±17%   9.6µs ± 3%    ~     (p=0.058 n=20+16)

name                                           old time/op             new time/op             delta
BM_ArenaOneAlloc                                 17.7ns ± 8%             17.9ns ±10%    ~     (p=0.736 n=16+17)
BM_ArenaInitialBlockOneAlloc                     5.23ns ± 3%             5.66ns ±15%    ~     (p=0.077 n=16+20)
BM_ArenaFuseUnbalanced/2                         68.6ns ± 3%             66.5ns ± 3%  -3.07%  (p=0.000 n=16+16)
BM_ArenaFuseUnbalanced/8                          516ns ± 3%              542ns ±18%    ~     (p=0.496 n=18+20)
BM_ArenaFuseUnbalanced/64                        4.74µs ± 4%             4.72µs ± 4%    ~     (p=0.423 n=16+17)
BM_ArenaFuseUnbalanced/128                       9.69µs ± 5%             9.61µs ± 7%    ~     (p=0.191 n=17+16)
BM_ArenaFuseBalanced/2                           68.5ns ± 4%             67.1ns ± 5%  -1.99%  (p=0.022 n=18+18)
BM_ArenaFuseBalanced/8                            526ns ± 4%              520ns ± 3%    ~     (p=0.157 n=16+16)
BM_ArenaFuseBalanced/64                          5.04µs ±18%             4.79µs ± 6%    ~     (p=0.109 n=20+16)
BM_ArenaFuseBalanced/128                         10.2µs ±18%              9.7µs ± 3%    ~     (p=0.062 n=20+16)

PiperOrigin-RevId: 520541220
pull/13171/head
Matt Kulukundis 2 years ago committed by Copybara-Service
parent d260ab343e
commit 71d1e8c85a
  1. 82
      upb/mem/arena.c
  2. 2
      upb/mem/arena_test.cc

@ -251,9 +251,30 @@ retry:
goto retry;
}
#define kUpb_RefDelta_CannotFuse -1
static void _upb_Arena_DoFuseArenaLists(upb_Arena* r1, upb_Arena* r2) {
// Find the region for `r2`'s linked list.
upb_Arena* r1_tail = upb_Atomic_Load(&r1->tail, memory_order_relaxed);
while (true) {
upb_Arena* r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
while (r1_next != NULL) {
// r1->tail was stale. This can happen, but tail should always converge
// on the true tail.
r1_tail = r1_next;
r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
}
if (upb_Atomic_CompareExchangeStrong(&r1_tail->next, &r1_next, r2,
memory_order_relaxed,
memory_order_relaxed)) {
break;
}
}
upb_Arena* upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2, intptr_t* ref_delta) {
upb_Arena* r2_tail = upb_Atomic_Load(&r2->tail, memory_order_relaxed);
upb_Atomic_Store(&r1->tail, r2_tail, memory_order_relaxed);
}
static upb_Arena* _upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2,
uintptr_t* ref_delta) {
// `parent_or_count` has two disctint modes
// - parent pointer mode
// - refcount mode
@ -266,14 +287,6 @@ upb_Arena* upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2, intptr_t* ref_delta) {
if (r1.root == r2.root) return r1.root; // Already fused.
// Do not fuse initial blocks since we cannot lifetime extend them.
// Any other fuse scenario is allowed.
if (upb_Arena_HasInitialBlock(r1.root) ||
upb_Arena_HasInitialBlock(r2.root)) {
*ref_delta = kUpb_RefDelta_CannotFuse;
return NULL;
}
// Avoid cycles by always fusing into the root with the lower address.
if ((uintptr_t)r1.root > (uintptr_t)r2.root) {
_upb_ArenaRoot tmp = r1;
@ -304,45 +317,27 @@ upb_Arena* upb_Arena_DoFuse(upb_Arena* a1, upb_Arena* a2, intptr_t* ref_delta) {
// Perform the actual fuse by removing the refs from `r2` and swapping in the
// parent pointer.
if (upb_Atomic_CompareExchangeWeak(
if (!upb_Atomic_CompareExchangeStrong(
&r2.root->parent_or_count, &r2.tagged_count,
_upb_Arena_TaggedFromPointer(r1.root), memory_order_release,
memory_order_acquire)) {
} else {
// We'll need to remove the excess refs we added to r1 previously.
*ref_delta -= r2_untagged_count;
*ref_delta += r2_untagged_count;
return NULL;
}
// Now that the fuse has been performed (and can no longer fail) we need to
// append `r2` to `r1`'s linked list. Find the region for `r2`'s linked list.
upb_Arena* r1_tail = upb_Atomic_Load(&r1.root->tail, memory_order_relaxed);
while (true) {
upb_Arena* r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
while (r1_next != NULL) {
// r1->tail was stale. This can happen, but tail should always converge
// on the true tail.
r1_tail = r1_next;
r1_next = upb_Atomic_Load(&r1_tail->next, memory_order_relaxed);
}
if (upb_Atomic_CompareExchangeStrong(&r1_tail->next, &r1_next, r2.root,
memory_order_relaxed,
memory_order_relaxed)) {
break;
}
}
upb_Arena* r2_tail = upb_Atomic_Load(&r2.root->tail, memory_order_relaxed);
upb_Atomic_Store(&r1.root->tail, r2_tail, memory_order_relaxed);
// append `r2` to `r1`'s linked list.
_upb_Arena_DoFuseArenaLists(r1.root, r2.root);
return r1.root;
}
bool upb_Arena_FixupRefs(upb_Arena* new_root, intptr_t ref_delta) {
static bool _upb_Arena_FixupRefs(upb_Arena* new_root, uintptr_t ref_delta) {
if (ref_delta == 0) return true; // No fixup required.
uintptr_t poc =
upb_Atomic_Load(&new_root->parent_or_count, memory_order_relaxed);
if (_upb_Arena_IsTaggedPointer(poc)) return false;
uintptr_t with_refs = poc + ref_delta;
uintptr_t with_refs = poc - ref_delta;
UPB_ASSERT(!_upb_Arena_IsTaggedPointer(with_refs));
return upb_Atomic_CompareExchangeStrong(&new_root->parent_or_count, &poc,
with_refs, memory_order_relaxed,
@ -350,15 +345,20 @@ bool upb_Arena_FixupRefs(upb_Arena* new_root, intptr_t ref_delta) {
}
bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
// The number of refs we ultimately need to transfer to the new root.
intptr_t ref_delta = 0;
if (a1 == a2) return true; // trivial fuse
// Do not fuse initial blocks since we cannot lifetime extend them.
// Any other fuse scenario is allowed.
if (upb_Arena_HasInitialBlock(a1) || upb_Arena_HasInitialBlock(a2)) {
return false;
}
// The number of refs we ultimately need to transfer to the new root.
uintptr_t ref_delta = 0;
while (true) {
upb_Arena* new_root = upb_Arena_DoFuse(a1, a2, &ref_delta);
if (new_root != NULL) {
if (upb_Arena_FixupRefs(new_root, ref_delta)) return true;
} else {
if (ref_delta == kUpb_RefDelta_CannotFuse) return false;
upb_Arena* new_root = _upb_Arena_DoFuse(a1, a2, &ref_delta);
if (new_root != NULL && _upb_Arena_FixupRefs(new_root, ref_delta)) {
return true;
}
}
}

@ -75,7 +75,7 @@ class Environment {
if (o == nullptr) o = upb_Arena_New();
}
upb_Arena_Fuse(old[0], old[1]);
ABSL_CHECK(upb_Arena_Fuse(old[0], old[1]));
for (auto& o : old) {
o = SwapRandomly(gen, o);
if (o != nullptr) upb_Arena_Free(o);

Loading…
Cancel
Save