|
|
|
// Protocol Buffers - Google's data interchange format
|
|
|
|
// Copyright 2023 Google LLC. All rights reserved.
|
|
|
|
//
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file or at
|
|
|
|
// https://developers.google.com/open-source/licenses/bsd
|
|
|
|
|
|
|
|
#include "upb/mem/arena.h"
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <array>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <memory>
|
|
|
|
#include <thread>
|
|
|
|
#include <type_traits>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <gmock/gmock.h>
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
#include "absl/base/thread_annotations.h"
|
Remove atomics from linked list of blocks
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```
So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.
In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.
Delete arena contains, it's private and the only user is its own test.
PiperOrigin-RevId: 709918443
2 months ago
|
|
|
#include "absl/cleanup/cleanup.h"
|
|
|
|
#include "absl/container/flat_hash_map.h"
|
|
|
|
#include "absl/random/distributions.h"
|
|
|
|
#include "absl/random/random.h"
|
|
|
|
#include "absl/synchronization/mutex.h"
|
|
|
|
#include "absl/synchronization/notification.h"
|
|
|
|
#include "absl/time/clock.h"
|
|
|
|
#include "absl/time/time.h"
|
|
|
|
#include "upb/mem/alloc.h"
|
|
|
|
#include "upb/mem/arena.hpp"
|
|
|
|
|
|
|
|
// Must be last.
|
|
|
|
#include "upb/port/def.inc"
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct CustomAlloc {
|
|
|
|
upb_alloc alloc;
|
|
|
|
int counter;
|
|
|
|
bool ran_cleanup;
|
|
|
|
};
|
|
|
|
|
|
|
|
void* CustomAllocFunc(upb_alloc* alloc, void* ptr, size_t oldsize,
|
|
|
|
size_t size) {
|
|
|
|
CustomAlloc* custom_alloc = reinterpret_cast<CustomAlloc*>(alloc);
|
|
|
|
if (size == 0) {
|
|
|
|
custom_alloc->counter--;
|
|
|
|
} else {
|
|
|
|
custom_alloc->counter++;
|
|
|
|
}
|
|
|
|
return upb_alloc_global.func(alloc, ptr, oldsize, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CustomAllocCleanup(upb_alloc* alloc) {
|
|
|
|
CustomAlloc* custom_alloc = reinterpret_cast<CustomAlloc*>(alloc);
|
|
|
|
EXPECT_THAT(custom_alloc->counter, 0);
|
|
|
|
custom_alloc->ran_cleanup = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, ArenaWithAllocCleanup) {
|
|
|
|
CustomAlloc alloc = {{&CustomAllocFunc}, 0, false};
|
|
|
|
upb_Arena* arena =
|
|
|
|
upb_Arena_Init(nullptr, 0, reinterpret_cast<upb_alloc*>(&alloc));
|
|
|
|
EXPECT_EQ(alloc.counter, 1);
|
|
|
|
upb_Arena_SetAllocCleanup(arena, CustomAllocCleanup);
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
EXPECT_TRUE(alloc.ran_cleanup);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct SizeTracker {
|
|
|
|
upb_alloc alloc;
|
|
|
|
upb_alloc* delegate_alloc;
|
|
|
|
absl::flat_hash_map<void*, size_t>* sizes;
|
|
|
|
};
|
|
|
|
|
|
|
|
static_assert(std::is_standard_layout<SizeTracker>());
|
|
|
|
|
|
|
|
static void* size_checking_allocfunc(upb_alloc* alloc, void* ptr,
|
|
|
|
size_t oldsize, size_t size) {
|
|
|
|
SizeTracker* size_alloc = reinterpret_cast<SizeTracker*>(alloc);
|
|
|
|
void* result = size_alloc->delegate_alloc->func(alloc, ptr, oldsize, size);
|
|
|
|
if (ptr != nullptr) {
|
|
|
|
UPB_ASSERT(size_alloc->sizes->at(ptr) == oldsize);
|
|
|
|
size_alloc->sizes->erase(ptr);
|
|
|
|
}
|
|
|
|
if (result != nullptr) {
|
|
|
|
size_alloc->sizes->emplace(result, size);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, SizedFree) {
|
|
|
|
absl::flat_hash_map<void*, size_t> sizes;
|
|
|
|
SizeTracker alloc;
|
|
|
|
alloc.alloc.func = size_checking_allocfunc;
|
|
|
|
alloc.delegate_alloc = &upb_alloc_global;
|
|
|
|
alloc.sizes = &sizes;
|
|
|
|
|
|
|
|
upb_Arena* arena = upb_Arena_Init(nullptr, 0, &alloc.alloc);
|
|
|
|
(void)upb_Arena_Malloc(arena, 500);
|
|
|
|
void* to_resize = upb_Arena_Malloc(arena, 2000);
|
|
|
|
void* resized = upb_Arena_Realloc(arena, to_resize, 2000, 4000);
|
|
|
|
upb_Arena_ShrinkLast(arena, resized, 4000, 1);
|
|
|
|
EXPECT_GT(sizes.size(), 0);
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
EXPECT_EQ(sizes.size(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, SizeHint) {
|
|
|
|
absl::flat_hash_map<void*, size_t> sizes;
|
|
|
|
SizeTracker alloc;
|
|
|
|
alloc.alloc.func = size_checking_allocfunc;
|
|
|
|
alloc.delegate_alloc = &upb_alloc_global;
|
|
|
|
alloc.sizes = &sizes;
|
|
|
|
|
|
|
|
upb_Arena* arena = upb_Arena_Init(nullptr, 2459, &alloc.alloc);
|
|
|
|
EXPECT_EQ(sizes.size(), 1);
|
|
|
|
EXPECT_NE(upb_Arena_Malloc(arena, 2459), nullptr);
|
|
|
|
EXPECT_EQ(sizes.size(), 1);
|
|
|
|
EXPECT_NE(upb_Arena_Malloc(arena, 500), nullptr);
|
|
|
|
EXPECT_EQ(sizes.size(), 2);
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
EXPECT_EQ(sizes.size(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
class OverheadTest {
|
|
|
|
public:
|
|
|
|
OverheadTest(const OverheadTest&) = delete;
|
|
|
|
OverheadTest& operator=(const OverheadTest&) = delete;
|
|
|
|
|
|
|
|
explicit OverheadTest(size_t first = 0, size_t max_block_size = 0) {
|
|
|
|
if (max_block_size) {
|
|
|
|
upb_Arena_SetMaxBlockSize(max_block_size);
|
|
|
|
}
|
|
|
|
alloc_.alloc.func = size_checking_allocfunc;
|
|
|
|
alloc_.delegate_alloc = &upb_alloc_global;
|
|
|
|
alloc_.sizes = &sizes_;
|
|
|
|
arena_ = upb_Arena_Init(nullptr, first, &alloc_.alloc);
|
|
|
|
arena_alloced_ = 0;
|
|
|
|
arena_alloc_count_ = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Alloc(size_t size) {
|
|
|
|
upb_Arena_Malloc(arena_, size);
|
|
|
|
arena_alloced_ += size;
|
|
|
|
arena_alloc_count_++;
|
|
|
|
}
|
|
|
|
|
|
|
|
uintptr_t SpaceAllocated() {
|
|
|
|
return upb_Arena_SpaceAllocated(arena_, nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
double WastePct() {
|
|
|
|
uintptr_t backing_alloced = upb_Arena_SpaceAllocated(arena_, nullptr);
|
|
|
|
double waste = backing_alloced - arena_alloced_;
|
|
|
|
return waste / backing_alloced;
|
|
|
|
}
|
|
|
|
|
|
|
|
double AmortizedAlloc() {
|
|
|
|
return ((double)sizes_.size()) / arena_alloc_count_;
|
|
|
|
}
|
|
|
|
|
|
|
|
~OverheadTest() {
|
|
|
|
upb_Arena_Free(arena_);
|
|
|
|
upb_Arena_SetMaxBlockSize(32 << 10);
|
|
|
|
}
|
|
|
|
upb_Arena* arena_;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
absl::flat_hash_map<void*, size_t> sizes_;
|
|
|
|
SizeTracker alloc_;
|
|
|
|
uintptr_t arena_alloced_;
|
|
|
|
uintptr_t arena_alloc_count_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(OverheadTest, SingleMassiveBlockThenLittle) {
|
|
|
|
OverheadTest test;
|
|
|
|
// Little blocks
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
test.Alloc(32);
|
|
|
|
}
|
|
|
|
// Big block!
|
|
|
|
test.Alloc(16000);
|
|
|
|
for (int i = 0; i < 50; i++) {
|
|
|
|
test.Alloc(64);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.6, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 0.05, 0.025);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(OverheadTest, Overhead_AlternatingSmallLargeBlocks) {
|
|
|
|
OverheadTest test(512, 4096);
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
test.Alloc(5000);
|
|
|
|
test.Alloc(64);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.45, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 1, 0.025);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(OverheadTest, PartialMaxBlocks) {
|
|
|
|
OverheadTest test(512, 4096);
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
test.Alloc(2096 + i);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.47, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 1.1, 0.25);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(OverheadTest, SmallBlocksLargerThanInitial) {
|
|
|
|
OverheadTest test;
|
|
|
|
size_t initial_block_size = upb_Arena_SpaceAllocated(test.arena_, nullptr);
|
|
|
|
for (int i = 0; i < 10; i++) {
|
|
|
|
test.Alloc(initial_block_size * 2 + 1);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN && sizeof(void*) == 8) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.37, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 0.5, 0.025);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(OverheadTest, SmallBlocksLargerThanInitial_many) {
|
|
|
|
OverheadTest test;
|
|
|
|
size_t initial_block_size = upb_Arena_SpaceAllocated(test.arena_, nullptr);
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
|
|
test.Alloc(initial_block_size * 2 + 1);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.14, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 0.08, 0.025);
|
|
|
|
}
|
|
|
|
for (int i = 0; i < 900; i++) {
|
|
|
|
test.Alloc(initial_block_size * 2 + 1);
|
|
|
|
}
|
|
|
|
if (!UPB_ASAN) {
|
|
|
|
EXPECT_NEAR(test.WastePct(), 0.03, 0.025);
|
|
|
|
EXPECT_NEAR(test.AmortizedAlloc(), 0.05, 0.025);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(OverheadTest, DefaultMaxBlockSize) {
|
|
|
|
OverheadTest test;
|
|
|
|
// Perform 600 1k allocations (600k total) and ensure that the amount of
|
|
|
|
// memory allocated does not exceed 700k.
|
|
|
|
for (int i = 0; i < 600; ++i) {
|
|
|
|
test.Alloc(1024);
|
|
|
|
}
|
|
|
|
EXPECT_LE(test.SpaceAllocated(), 700 * 1024);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, ArenaFuse) {
|
|
|
|
upb_Arena* arena1 = upb_Arena_New();
|
|
|
|
upb_Arena* arena2 = upb_Arena_New();
|
|
|
|
|
|
|
|
EXPECT_TRUE(upb_Arena_Fuse(arena1, arena2));
|
|
|
|
|
|
|
|
upb_Arena_Free(arena1);
|
|
|
|
upb_Arena_Free(arena2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuseWithInitialBlock) {
|
|
|
|
char buf1[1024];
|
|
|
|
char buf2[1024];
|
|
|
|
upb_Arena* arenas[] = {upb_Arena_Init(buf1, 1024, &upb_alloc_global),
|
|
|
|
upb_Arena_Init(buf2, 1024, &upb_alloc_global),
|
|
|
|
upb_Arena_Init(nullptr, 0, &upb_alloc_global)};
|
|
|
|
int size = sizeof(arenas) / sizeof(arenas[0]);
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
for (int j = 0; j < size; ++j) {
|
|
|
|
if (i == j) {
|
|
|
|
// Fuse to self is always allowed.
|
|
|
|
EXPECT_TRUE(upb_Arena_Fuse(arenas[i], arenas[j]));
|
|
|
|
} else {
|
|
|
|
EXPECT_FALSE(upb_Arena_Fuse(arenas[i], arenas[j]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) upb_Arena_Free(arenas[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
class Environment {
|
|
|
|
public:
|
|
|
|
void RandomNewFree(absl::BitGen& gen, size_t min_index = 0) {
|
|
|
|
auto a = std::make_shared<const upb::Arena>();
|
|
|
|
SwapRandomArena(gen, a, min_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
void RandomIncRefCount(absl::BitGen& gen) {
|
|
|
|
std::shared_ptr<const upb::Arena> a = RandomNonNullArena(gen);
|
|
|
|
upb_Arena_IncRefFor(a->ptr(), nullptr);
|
|
|
|
upb_Arena_DecRefFor(a->ptr(), nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void RandomFuse(absl::BitGen& gen) {
|
|
|
|
std::shared_ptr<const upb::Arena> a = RandomNonNullArena(gen);
|
|
|
|
std::shared_ptr<const upb::Arena> b = RandomNonNullArena(gen);
|
|
|
|
EXPECT_TRUE(upb_Arena_Fuse(a->ptr(), b->ptr()));
|
|
|
|
}
|
|
|
|
|
|
|
|
void RandomPoke(absl::BitGen& gen, size_t min_index = 0) {
|
|
|
|
switch (absl::Uniform(gen, 0, 2)) {
|
|
|
|
case 0:
|
|
|
|
RandomNewFree(gen, min_index);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
RandomFuse(gen);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::shared_ptr<const upb::Arena> IndexedNonNullArena(size_t index) {
|
|
|
|
absl::MutexLock lock(&mutex_);
|
|
|
|
std::shared_ptr<const upb::Arena>& ret = arenas_[index];
|
|
|
|
if (!ret) ret = std::make_shared<const upb::Arena>();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
size_t RandomIndex(absl::BitGen& gen, size_t min_index = 0) {
|
|
|
|
return absl::Uniform<size_t>(gen, min_index,
|
|
|
|
std::tuple_size<ArenaArray>::value);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Swaps a random arena from the set with the given arena.
|
|
|
|
void SwapRandomArena(absl::BitGen& gen, std::shared_ptr<const upb::Arena>& a,
|
|
|
|
size_t min_index) {
|
|
|
|
size_t i = RandomIndex(gen, min_index);
|
|
|
|
absl::MutexLock lock(&mutex_);
|
|
|
|
arenas_[i].swap(a);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns a random arena from the set, ensuring that the returned arena is
|
|
|
|
// non-null.
|
|
|
|
//
|
|
|
|
// Note that the returned arena is shared and may be accessed concurrently
|
|
|
|
// by other threads.
|
|
|
|
std::shared_ptr<const upb::Arena> RandomNonNullArena(absl::BitGen& gen) {
|
|
|
|
return IndexedNonNullArena(RandomIndex(gen));
|
|
|
|
}
|
|
|
|
|
|
|
|
using ArenaArray = std::array<std::shared_ptr<const upb::Arena>, 100>;
|
|
|
|
ArenaArray arenas_ ABSL_GUARDED_BY(mutex_);
|
|
|
|
absl::Mutex mutex_;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzSingleThreaded) {
|
|
|
|
Environment env;
|
|
|
|
|
|
|
|
absl::BitGen gen;
|
|
|
|
auto end = absl::Now() + absl::Seconds(0.5);
|
|
|
|
while (absl::Now() < end) {
|
|
|
|
env.RandomPoke(gen);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, LargeAlloc) {
|
|
|
|
// Tests an allocation larger than the max block size.
|
|
|
|
upb_Arena* arena = upb_Arena_New();
|
|
|
|
size_t size = 100000;
|
|
|
|
char* mem = static_cast<char*>(upb_Arena_Malloc(arena, size));
|
|
|
|
EXPECT_NE(mem, nullptr);
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
|
|
|
mem[i] = static_cast<char>(i);
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
|
|
|
EXPECT_EQ(mem[i], static_cast<char>(i));
|
|
|
|
}
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, MaxBlockSize) {
|
|
|
|
upb_Arena* arena = upb_Arena_New();
|
|
|
|
// Perform 600 1k allocations (600k total) and ensure that the amount of
|
|
|
|
// memory allocated does not exceed 700k.
|
|
|
|
for (int i = 0; i < 600; ++i) {
|
|
|
|
upb_Arena_Malloc(arena, 1024);
|
|
|
|
}
|
|
|
|
EXPECT_LE(upb_Arena_SpaceAllocated(arena, nullptr), 700 * 1024);
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef UPB_SUPPRESS_MISSING_ATOMICS
|
Allow for fuse/free races in `upb_Arena`.
Implementation is by kfm@, I only added the portability code around it.
`upb_Arena` was designed to be only thread-compatible. However, fusing of arenas muddies the waters somewhat, because two distinct `upb_Arena` objects will end up sharing state when fused. This causes a `upb_Arena_Free(a)` to interfere with `upb_Arena_Fuse(b, c)` if `a` and `b` were previously fused.
It turns out that we can use atomics to fix this with about a 35% regression in fuse performance (see below). Arena create+free does not regress, thanks to special-case logic in Free().
`upb_Arena` is still a thread-compatible type, and it is still never safe to call `upb_Arena_xxx(a)` and `upb_Arena_yyy(a)` in parallel. However you can at least now call `upb_Arena_Free(a)` and `upb_Arena_Fuse(b, c)` in parallel, even if `a` and `b` were previously fused.
Note that `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` is still not allowed if `b` and `c` were previously fused. In practice this means that fuses must still be single-threaded within a single fused group.
Performance results:
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 18.6ns ± 1% 18.6ns ± 1% ~ (p=0.726 n=18+17)
BM_ArenaInitialBlockOneAlloc 6.28ns ± 1% 5.73ns ± 1% -8.68% (p=0.000 n=17+20)
BM_ArenaFuseUnbalanced/2 44.1ns ± 2% 60.4ns ± 1% +37.05% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/8 370ns ± 2% 500ns ± 1% +35.12% (p=0.000 n=19+20)
BM_ArenaFuseUnbalanced/64 3.52µs ± 1% 4.71µs ± 1% +33.80% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/128 7.20µs ± 1% 9.72µs ± 2% +34.93% (p=0.000 n=16+19)
BM_ArenaFuseBalanced/2 44.4ns ± 2% 61.4ns ± 1% +38.23% (p=0.000 n=20+17)
BM_ArenaFuseBalanced/8 373ns ± 2% 509ns ± 1% +36.57% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/64 3.55µs ± 2% 4.79µs ± 1% +34.80% (p=0.000 n=19+19)
BM_ArenaFuseBalanced/128 7.26µs ± 1% 9.76µs ± 1% +34.45% (p=0.000 n=17+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.66ms ± 1% 5.69ms ± 1% +0.57% (p=0.013 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.30ms ± 1% 6.36ms ± 1% +0.90% (p=0.000 n=19+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 12.1ms ± 1% 12.1ms ± 1% ~ (p=0.118 n=18+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 12.2ms ± 1% 12.3ms ± 1% +0.50% (p=0.006 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.7µs ± 1% 12.7µs ± 1% ~ (p=0.194 n=20+19)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.6µs ± 1% 11.6µs ± 1% ~ (p=0.192 n=20+20)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.5µs ± 1% 12.5µs ± 0% ~ (p=0.750 n=18+14)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.4µs ± 1% 11.3µs ± 1% -0.34% (p=0.046 n=19+19)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 25.4µs ± 1% 25.7µs ± 2% +1.37% (p=0.000 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 12.1µs ± 2% 12.1µs ± 1% ~ (p=0.143 n=18+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.9µs ± 3% 11.9µs ± 1% ~ (p=0.076 n=17+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 13.2µs ± 1% 13.2µs ± 1% ~ (p=0.053 n=19+19)
BM_SerializeDescriptor_Proto2 5.97µs ± 4% 5.90µs ± 4% ~ (p=0.093 n=17+19)
BM_SerializeDescriptor_Upb 10.4µs ± 1% 10.4µs ± 1% ~ (p=0.909 n=17+18)
name old time/op new time/op delta
BM_ArenaOneAlloc 18.7ns ± 2% 18.6ns ± 0% ~ (p=0.607 n=18+17)
BM_ArenaInitialBlockOneAlloc 6.29ns ± 1% 5.74ns ± 1% -8.71% (p=0.000 n=17+19)
BM_ArenaFuseUnbalanced/2 44.1ns ± 1% 60.6ns ± 1% +37.21% (p=0.000 n=17+19)
BM_ArenaFuseUnbalanced/8 371ns ± 2% 500ns ± 1% +35.02% (p=0.000 n=19+16)
BM_ArenaFuseUnbalanced/64 3.53µs ± 1% 4.72µs ± 1% +33.85% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/128 7.22µs ± 1% 9.73µs ± 2% +34.87% (p=0.000 n=16+19)
BM_ArenaFuseBalanced/2 44.5ns ± 2% 61.5ns ± 1% +38.22% (p=0.000 n=20+17)
BM_ArenaFuseBalanced/8 373ns ± 2% 510ns ± 1% +36.58% (p=0.000 n=19+16)
BM_ArenaFuseBalanced/64 3.56µs ± 2% 4.80µs ± 1% +34.87% (p=0.000 n=19+19)
BM_ArenaFuseBalanced/128 7.27µs ± 1% 9.77µs ± 1% +34.40% (p=0.000 n=17+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.67ms ± 1% 5.71ms ± 1% +0.60% (p=0.011 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.32ms ± 1% 6.37ms ± 1% +0.87% (p=0.000 n=19+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 12.1ms ± 1% 12.2ms ± 1% ~ (p=0.126 n=18+19)
BM_LoadAdsDescriptor_Proto2<WithLayout> 12.2ms ± 1% 12.3ms ± 1% +0.51% (p=0.002 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.7µs ± 1% 12.7µs ± 1% ~ (p=0.149 n=20+19)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.6µs ± 1% 11.6µs ± 1% ~ (p=0.211 n=20+20)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.5µs ± 1% 12.5µs ± 1% ~ (p=0.986 n=18+15)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.4µs ± 1% 11.3µs ± 1% ~ (p=0.081 n=19+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 25.4µs ± 1% 25.8µs ± 2% +1.41% (p=0.000 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 12.1µs ± 2% 12.1µs ± 1% ~ (p=0.558 n=19+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.0µs ± 3% 11.9µs ± 1% ~ (p=0.165 n=17+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 13.2µs ± 1% 13.2µs ± 1% ~ (p=0.070 n=19+19)
BM_SerializeDescriptor_Proto2 5.98µs ± 4% 5.92µs ± 3% ~ (p=0.138 n=17+19)
BM_SerializeDescriptor_Upb 10.4µs ± 1% 10.4µs ± 1% ~ (p=0.858 n=17+18)
```
PiperOrigin-RevId: 518573683
2 years ago
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzFuseFreeRace) {
|
|
|
|
Environment env;
|
|
|
|
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
threads.emplace_back([&]() {
|
|
|
|
absl::BitGen gen;
|
|
|
|
while (!done.HasBeenNotified()) {
|
|
|
|
env.RandomNewFree(gen);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
absl::BitGen gen;
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
while (absl::Now() < end) {
|
|
|
|
env.RandomFuse(gen);
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
}
|
|
|
|
|
Allow fuse/fuse races, so that upb_Arena is fully thread-compatible.
Previously upb_Arena was not thread-compatible when `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` executed in parallel if `b` and `c` were previously fused. This CL fixed that by allowing `upb_Arena_Fuse()` to run in parallel without limitations.
Details on the design of the algorithm are captured in comments.
The CL slightly improves the performance of `upb_Arena_Fuse()`.
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.5ns ± 4% -12.30% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.65ns ± 4% 5.17ns ± 3% -22.23% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.1ns ± 7% 68.5ns ± 4% ~ (p=0.327 n=18+19)
BM_ArenaFuseUnbalanced/8 542ns ± 3% 513ns ± 4% -5.25% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.04µs ± 8% 4.74µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.80% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 71.8ns ± 7% 68.4ns ± 6% -4.75% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 541ns ± 3% 519ns ± 3% -4.21% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.00µs ± 7% 4.86µs ± 4% -2.78% (p=0.003 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 4% 9.7µs ± 4% -2.68% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.52ms ± 2% 5.54ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.18ms ± 3% 6.15ms ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.8ms ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.2µs ± 4% 12.3µs ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.1µs ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.6µs ±16% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 4% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.782 n=18+17)
BM_SerializeDescriptor_Proto2 5.69µs ± 5% 5.76µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old time/op new time/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.6ns ± 4% -12.37% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.66ns ± 4% 5.18ns ± 3% -22.24% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.2ns ± 7% 68.6ns ± 4% ~ (p=0.343 n=18+19)
BM_ArenaFuseUnbalanced/8 543ns ± 3% 515ns ± 4% -5.21% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.05µs ± 8% 4.75µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.78% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 72.0ns ± 7% 68.6ns ± 6% -4.73% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 543ns ± 3% 520ns ± 3% -4.20% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.01µs ± 7% 4.87µs ± 4% -2.78% (p=0.004 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 3% 9.8µs ± 4% -2.67% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.53ms ± 2% 5.56ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.20ms ± 3% 6.17ms ± 2% ~ (p=0.424 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.297 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.9ms ± 3% ~ (p=0.351 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.3µs ± 4% 12.3µs ± 4% ~ (p=1.000 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.845 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.542 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.2µs ± 2% ~ (p=0.330 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.7µs ±17% ~ (p=0.167 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.799 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.807 n=18+17)
BM_SerializeDescriptor_Proto2 5.71µs ± 5% 5.78µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old allocs/op new allocs/op delta
BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 6.05k ± 0% 6.05k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.36k ± 0% 6.36k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout> 83.4k ± 0% 83.4k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 84.4k ± 0% 84.4k ± 0% -0.00% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_ArenaOneAlloc 336 ± 0% 328 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<NoLayout> 10.0M ± 0% 9.9M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 10.0M ± 0% 10.0M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 6.62M ± 0% 6.62M ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 6.66M ± 0% 6.66M ± 0% -0.01% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 65.3k ± 0% 65.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_LoadAdsDescriptor_Upb<NoLayout> 137MB/s ± 2% 137MB/s ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 122MB/s ± 3% 123MB/s ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 64.2MB/s ± 7% 64.7MB/s ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 63.6MB/s ± 3% 63.9MB/s ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 614MB/s ± 4% 613MB/s ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 665MB/s ± 6% 667MB/s ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 624MB/s ± 4% 622MB/s ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 681MB/s ± 4% 675MB/s ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 311MB/s ± 3% 296MB/s ±15% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 649MB/s ± 3% 644MB/s ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 656MB/s ± 7% 659MB/s ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 587MB/s ± 5% 576MB/s ±16% ~ (p=0.584 n=18+18)
BM_SerializeDescriptor_Proto2 1.32GB/s ± 5% 1.31GB/s ± 7% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 737MB/s ± 4% 737MB/s ± 7% ~ (p=0.839 n=18+18)
```
PiperOrigin-RevId: 520452349
2 years ago
|
|
|
TEST(ArenaTest, FuzzFuseFuseRace) {
|
|
|
|
Environment env;
|
|
|
|
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
threads.emplace_back([&]() {
|
|
|
|
absl::BitGen gen;
|
|
|
|
while (!done.HasBeenNotified()) {
|
Allow fuse/fuse races, so that upb_Arena is fully thread-compatible.
Previously upb_Arena was not thread-compatible when `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` executed in parallel if `b` and `c` were previously fused. This CL fixed that by allowing `upb_Arena_Fuse()` to run in parallel without limitations.
Details on the design of the algorithm are captured in comments.
The CL slightly improves the performance of `upb_Arena_Fuse()`.
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.5ns ± 4% -12.30% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.65ns ± 4% 5.17ns ± 3% -22.23% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.1ns ± 7% 68.5ns ± 4% ~ (p=0.327 n=18+19)
BM_ArenaFuseUnbalanced/8 542ns ± 3% 513ns ± 4% -5.25% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.04µs ± 8% 4.74µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.80% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 71.8ns ± 7% 68.4ns ± 6% -4.75% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 541ns ± 3% 519ns ± 3% -4.21% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.00µs ± 7% 4.86µs ± 4% -2.78% (p=0.003 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 4% 9.7µs ± 4% -2.68% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.52ms ± 2% 5.54ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.18ms ± 3% 6.15ms ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.8ms ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.2µs ± 4% 12.3µs ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.1µs ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.6µs ±16% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 4% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.782 n=18+17)
BM_SerializeDescriptor_Proto2 5.69µs ± 5% 5.76µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old time/op new time/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.6ns ± 4% -12.37% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.66ns ± 4% 5.18ns ± 3% -22.24% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.2ns ± 7% 68.6ns ± 4% ~ (p=0.343 n=18+19)
BM_ArenaFuseUnbalanced/8 543ns ± 3% 515ns ± 4% -5.21% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.05µs ± 8% 4.75µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.78% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 72.0ns ± 7% 68.6ns ± 6% -4.73% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 543ns ± 3% 520ns ± 3% -4.20% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.01µs ± 7% 4.87µs ± 4% -2.78% (p=0.004 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 3% 9.8µs ± 4% -2.67% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.53ms ± 2% 5.56ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.20ms ± 3% 6.17ms ± 2% ~ (p=0.424 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.297 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.9ms ± 3% ~ (p=0.351 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.3µs ± 4% 12.3µs ± 4% ~ (p=1.000 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.845 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.542 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.2µs ± 2% ~ (p=0.330 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.7µs ±17% ~ (p=0.167 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.799 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.807 n=18+17)
BM_SerializeDescriptor_Proto2 5.71µs ± 5% 5.78µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old allocs/op new allocs/op delta
BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 6.05k ± 0% 6.05k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.36k ± 0% 6.36k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout> 83.4k ± 0% 83.4k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 84.4k ± 0% 84.4k ± 0% -0.00% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_ArenaOneAlloc 336 ± 0% 328 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<NoLayout> 10.0M ± 0% 9.9M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 10.0M ± 0% 10.0M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 6.62M ± 0% 6.62M ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 6.66M ± 0% 6.66M ± 0% -0.01% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 65.3k ± 0% 65.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_LoadAdsDescriptor_Upb<NoLayout> 137MB/s ± 2% 137MB/s ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 122MB/s ± 3% 123MB/s ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 64.2MB/s ± 7% 64.7MB/s ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 63.6MB/s ± 3% 63.9MB/s ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 614MB/s ± 4% 613MB/s ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 665MB/s ± 6% 667MB/s ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 624MB/s ± 4% 622MB/s ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 681MB/s ± 4% 675MB/s ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 311MB/s ± 3% 296MB/s ±15% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 649MB/s ± 3% 644MB/s ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 656MB/s ± 7% 659MB/s ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 587MB/s ± 5% 576MB/s ±16% ~ (p=0.584 n=18+18)
BM_SerializeDescriptor_Proto2 1.32GB/s ± 5% 1.31GB/s ± 7% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 737MB/s ± 4% 737MB/s ± 7% ~ (p=0.839 n=18+18)
```
PiperOrigin-RevId: 520452349
2 years ago
|
|
|
env.RandomFuse(gen);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
absl::BitGen gen;
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
while (absl::Now() < end) {
|
Allow fuse/fuse races, so that upb_Arena is fully thread-compatible.
Previously upb_Arena was not thread-compatible when `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` executed in parallel if `b` and `c` were previously fused. This CL fixed that by allowing `upb_Arena_Fuse()` to run in parallel without limitations.
Details on the design of the algorithm are captured in comments.
The CL slightly improves the performance of `upb_Arena_Fuse()`.
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.5ns ± 4% -12.30% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.65ns ± 4% 5.17ns ± 3% -22.23% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.1ns ± 7% 68.5ns ± 4% ~ (p=0.327 n=18+19)
BM_ArenaFuseUnbalanced/8 542ns ± 3% 513ns ± 4% -5.25% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.04µs ± 8% 4.74µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.80% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 71.8ns ± 7% 68.4ns ± 6% -4.75% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 541ns ± 3% 519ns ± 3% -4.21% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.00µs ± 7% 4.86µs ± 4% -2.78% (p=0.003 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 4% 9.7µs ± 4% -2.68% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.52ms ± 2% 5.54ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.18ms ± 3% 6.15ms ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.8ms ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.2µs ± 4% 12.3µs ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.1µs ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.6µs ±16% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 4% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.782 n=18+17)
BM_SerializeDescriptor_Proto2 5.69µs ± 5% 5.76µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old time/op new time/op delta
BM_ArenaOneAlloc 20.0ns ±19% 17.6ns ± 4% -12.37% (p=0.000 n=19+17)
BM_ArenaInitialBlockOneAlloc 6.66ns ± 4% 5.18ns ± 3% -22.24% (p=0.000 n=18+17)
BM_ArenaFuseUnbalanced/2 69.2ns ± 7% 68.6ns ± 4% ~ (p=0.343 n=18+19)
BM_ArenaFuseUnbalanced/8 543ns ± 3% 515ns ± 4% -5.21% (p=0.000 n=18+18)
BM_ArenaFuseUnbalanced/64 5.05µs ± 8% 4.75µs ± 4% -5.93% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 10.1µs ± 4% 9.6µs ± 4% -4.78% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/2 72.0ns ± 7% 68.6ns ± 6% -4.73% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/8 543ns ± 3% 520ns ± 3% -4.20% (p=0.000 n=18+17)
BM_ArenaFuseBalanced/64 5.01µs ± 7% 4.87µs ± 4% -2.78% (p=0.004 n=17+18)
BM_ArenaFuseBalanced/128 10.0µs ± 3% 9.8µs ± 4% -2.67% (p=0.001 n=16+18)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.53ms ± 2% 5.56ms ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.20ms ± 3% 6.17ms ± 2% ~ (p=0.424 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.8ms ± 7% 11.7ms ± 5% ~ (p=0.297 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 3% 11.9ms ± 3% ~ (p=0.351 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.3µs ± 4% 12.3µs ± 4% ~ (p=1.000 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.3µs ± 6% 11.3µs ± 3% ~ (p=0.845 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.1µs ± 4% 12.1µs ± 3% ~ (p=0.542 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.1µs ± 4% 11.2µs ± 2% ~ (p=0.330 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 3% 25.7µs ±17% ~ (p=0.167 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 3% 11.7µs ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.5µs ± 7% 11.4µs ± 4% ~ (p=0.799 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.8µs ± 5% 13.0µs ±14% ~ (p=0.807 n=18+17)
BM_SerializeDescriptor_Proto2 5.71µs ± 5% 5.78µs ± 6% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 10.2µs ± 4% 10.2µs ± 3% ~ (p=0.613 n=18+17)
name old allocs/op new allocs/op delta
BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 6.05k ± 0% 6.05k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.36k ± 0% 6.36k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<NoLayout> 83.4k ± 0% 83.4k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 84.4k ± 0% 84.4k ± 0% -0.00% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_ArenaOneAlloc 336 ± 0% 328 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseUnbalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/2 672 ± 0% 656 ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/8 2.69k ± 0% 2.62k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/64 21.5k ± 0% 21.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_ArenaFuseBalanced/128 43.0k ± 0% 42.0k ± 0% -2.38% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<NoLayout> 10.0M ± 0% 9.9M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 10.0M ± 0% 10.0M ± 0% -0.05% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 6.62M ± 0% 6.62M ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Proto2<WithLayout> 6.66M ± 0% 6.66M ± 0% -0.01% (p=0.013 n=19+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% -0.02% (p=0.000 n=20+20)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 65.3k ± 0% 65.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_LoadAdsDescriptor_Upb<NoLayout> 137MB/s ± 2% 137MB/s ± 4% ~ (p=0.707 n=16+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 122MB/s ± 3% 123MB/s ± 3% ~ (p=0.501 n=18+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 64.2MB/s ± 7% 64.7MB/s ± 5% ~ (p=0.330 n=16+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 63.6MB/s ± 3% 63.9MB/s ± 3% ~ (p=0.303 n=18+17)
BM_Parse_Upb_FileDesc<UseArena, Copy> 614MB/s ± 4% 613MB/s ± 4% ~ (p=0.935 n=17+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 665MB/s ± 6% 667MB/s ± 3% ~ (p=0.873 n=16+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 624MB/s ± 4% 622MB/s ± 3% ~ (p=0.501 n=18+18)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 681MB/s ± 4% 675MB/s ± 2% ~ (p=0.297 n=18+16)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 311MB/s ± 3% 296MB/s ±15% ~ (p=0.177 n=17+20)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 649MB/s ± 3% 644MB/s ± 3% ~ (p=0.232 n=17+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 656MB/s ± 7% 659MB/s ± 4% ~ (p=0.707 n=18+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 587MB/s ± 5% 576MB/s ±16% ~ (p=0.584 n=18+18)
BM_SerializeDescriptor_Proto2 1.32GB/s ± 5% 1.31GB/s ± 7% ~ (p=0.143 n=18+18)
BM_SerializeDescriptor_Upb 737MB/s ± 4% 737MB/s ± 7% ~ (p=0.839 n=18+18)
```
PiperOrigin-RevId: 520452349
2 years ago
|
|
|
env.RandomFuse(gen);
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void* checking_global_allocfunc(upb_alloc* alloc, void* ptr,
|
|
|
|
size_t oldsize, size_t size) {
|
|
|
|
int header_size = std::max(alignof(max_align_t), sizeof(int));
|
|
|
|
if (ptr) {
|
|
|
|
ptr = UPB_PTR_AT(ptr, -header_size, void);
|
|
|
|
UPB_ASSERT(*reinterpret_cast<int*>(ptr) == 0x5AFE);
|
|
|
|
}
|
|
|
|
if (size == 0) {
|
|
|
|
free(ptr);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
void* ret;
|
|
|
|
if (oldsize == 0) {
|
|
|
|
ret = malloc(size + header_size);
|
|
|
|
} else {
|
|
|
|
ret = realloc(ptr, size + header_size);
|
|
|
|
}
|
|
|
|
if (ret) {
|
|
|
|
*reinterpret_cast<int*>(ret) = 0x5AFE;
|
|
|
|
return UPB_PTR_AT(ret, header_size, void);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzFuseFreeAllocatorRace) {
|
|
|
|
upb_Arena_SetMaxBlockSize(128);
|
|
|
|
upb_alloc_func* old = upb_alloc_global.func;
|
|
|
|
upb_alloc_global.func = checking_global_allocfunc;
|
|
|
|
absl::Cleanup reset_max_block_size = [old] {
|
|
|
|
upb_Arena_SetMaxBlockSize(32 << 10);
|
|
|
|
upb_alloc_global.func = old;
|
|
|
|
};
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
size_t thread_count = 10;
|
|
|
|
std::vector<std::array<upb_Arena*, 11>> arenas;
|
|
|
|
for (size_t i = 0; i < 10000; ++i) {
|
|
|
|
std::array<upb_Arena*, 11> arr;
|
|
|
|
arr[0] = upb_Arena_New();
|
|
|
|
for (size_t j = 1; j < thread_count + 1; ++j) {
|
|
|
|
arr[j] = upb_Arena_New();
|
|
|
|
upb_Arena_Fuse(arr[j - 1], arr[j]);
|
|
|
|
}
|
|
|
|
arenas.push_back(arr);
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < thread_count; ++i) {
|
|
|
|
size_t tid = i;
|
|
|
|
threads.emplace_back([&, tid]() {
|
|
|
|
size_t arenaCtr = 0;
|
|
|
|
while (!done.HasBeenNotified() && arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena* read = arenas[arenaCtr++][tid];
|
|
|
|
(void)upb_Arena_Malloc(read, 128);
|
|
|
|
(void)upb_Arena_Malloc(read, 128);
|
|
|
|
upb_Arena_Free(read);
|
|
|
|
}
|
|
|
|
while (arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena_Free(arenas[arenaCtr++][tid]);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
size_t arenaCtr = 0;
|
|
|
|
while (absl::Now() < end && arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena* read = arenas[arenaCtr++][thread_count];
|
|
|
|
(void)upb_Arena_Malloc(read, 128);
|
|
|
|
(void)upb_Arena_Malloc(read, 128);
|
|
|
|
upb_Arena_Free(read);
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
while (arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena_Free(arenas[arenaCtr++][thread_count]);
|
|
|
|
}
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzFuseSpaceAllocatedRace) {
|
|
|
|
upb_Arena_SetMaxBlockSize(128);
|
|
|
|
absl::Cleanup reset_max_block_size = [] {
|
|
|
|
upb_Arena_SetMaxBlockSize(32 << 10);
|
|
|
|
};
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
std::vector<upb_Arena*> arenas;
|
|
|
|
size_t thread_count = 10;
|
|
|
|
size_t fuses_per_thread = 1000;
|
|
|
|
size_t root_arenas_limit = 250;
|
|
|
|
for (size_t i = 0; i < root_arenas_limit; ++i) {
|
|
|
|
arenas.push_back(upb_Arena_New());
|
|
|
|
for (size_t j = 0; j < thread_count; ++j) {
|
|
|
|
upb_Arena_IncRefFor(arenas[i], nullptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < thread_count; ++i) {
|
|
|
|
threads.emplace_back([&]() {
|
|
|
|
size_t arenaCtr = 0;
|
|
|
|
while (!done.HasBeenNotified() && arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena* read = arenas[arenaCtr++];
|
|
|
|
for (size_t j = 0; j < fuses_per_thread; ++j) {
|
|
|
|
upb_Arena* fuse = upb_Arena_New();
|
|
|
|
upb_Arena_Fuse(read, fuse);
|
|
|
|
upb_Arena_Free(read);
|
|
|
|
read = fuse;
|
|
|
|
}
|
|
|
|
upb_Arena_Free(read);
|
|
|
|
}
|
|
|
|
while (arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena_Free(arenas[arenaCtr++]);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
size_t arenaCtr = 0;
|
|
|
|
uintptr_t total_allocated = 0;
|
|
|
|
while (absl::Now() < end && arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena* read = arenas[arenaCtr++];
|
|
|
|
size_t count;
|
|
|
|
size_t allocated;
|
|
|
|
do {
|
|
|
|
allocated = upb_Arena_SpaceAllocated(read, &count);
|
|
|
|
} while (count < fuses_per_thread * thread_count);
|
|
|
|
upb_Arena_Free(read);
|
|
|
|
total_allocated += allocated;
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
while (arenaCtr < arenas.size()) {
|
|
|
|
upb_Arena_Free(arenas[arenaCtr++]);
|
|
|
|
}
|
|
|
|
ASSERT_GT(total_allocated, arenaCtr);
|
|
|
|
}
|
|
|
|
|
Remove atomics from linked list of blocks
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```
So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.
In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.
Delete arena contains, it's private and the only user is its own test.
PiperOrigin-RevId: 709918443
2 months ago
|
|
|
TEST(ArenaTest, FuzzAllocSpaceAllocatedRace) {
|
|
|
|
upb_Arena_SetMaxBlockSize(128);
|
Remove atomics from linked list of blocks
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```
So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.
In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.
Delete arena contains, it's private and the only user is its own test.
PiperOrigin-RevId: 709918443
2 months ago
|
|
|
absl::Cleanup reset_max_block_size = [] {
|
|
|
|
upb_Arena_SetMaxBlockSize(32 << 10);
|
|
|
|
};
|
|
|
|
upb_Arena* arena = upb_Arena_New();
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < 1; ++i) {
|
Remove atomics from linked list of blocks
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```
So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.
In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.
Delete arena contains, it's private and the only user is its own test.
PiperOrigin-RevId: 709918443
2 months ago
|
|
|
threads.emplace_back([&]() {
|
|
|
|
while (!done.HasBeenNotified()) {
|
|
|
|
size_t count;
|
|
|
|
upb_Arena_SpaceAllocated(arena, &count);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
uintptr_t total = 0;
|
|
|
|
while (absl::Now() < end && total < 10000000) {
|
|
|
|
if (upb_Arena_Malloc(arena, 128) == nullptr) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
total += 128;
|
Remove atomics from linked list of blocks
We no longer need to traverse the linked list of blocks to check allocated space, which means we also no longer need atomics in the linked list or even its head. This is especially beneficial as the previous implementation contained a race where we could dereference uninitialized memory; because the setting of the `next` pointers did not use release semantics and the reading of them in `SpaceAllocated` reads with relaxed order, there's no guarantee that `size` has actually been initialized - but worse, *there is also no guarantee that `next` has been!*. Simplified:
```
AddBlock:
1 ptr = malloc();
2 ptr->size = 123;
3 ptr->next = ai->blocks;
4 ai->blocks = ptr (release order);
```
```
SpaceAllocated:
5 block = ai->blocks (relaxed order)
6 block->size (acquire, but probably by accident)
7 block = block->next (relaxed order)
```
So I think a second thread calling SpaceAllocated could see the order 1, 4, 5, 6, 7, 2, 3 and read uninitialized memory - there is no data-dependency relationship or happens-before edge that this order violates, and so it would be valid for a compiler+hardware to produce.
In reality, operation 4 will produce an `stlr` on arm (forcing an order of 1, 2, 3 before 4), and `block->next` has a data dependency on `ai->blocks` which would force an ordering in the hardware between 5->6 and 5->7 even for regular `ldr` instructions.
Delete arena contains, it's private and the only user is its own test.
PiperOrigin-RevId: 709918443
2 months ago
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
upb_Arena_Free(arena);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, ArenaIncRef) {
|
|
|
|
upb_Arena* arena1 = upb_Arena_New();
|
|
|
|
EXPECT_EQ(upb_Arena_DebugRefCount(arena1), 1);
|
|
|
|
upb_Arena_IncRefFor(arena1, nullptr);
|
|
|
|
EXPECT_EQ(upb_Arena_DebugRefCount(arena1), 2);
|
|
|
|
upb_Arena_DecRefFor(arena1, nullptr);
|
|
|
|
EXPECT_EQ(upb_Arena_DebugRefCount(arena1), 1);
|
|
|
|
upb_Arena_Free(arena1);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzFuseIncRefCountRace) {
|
|
|
|
Environment env;
|
|
|
|
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
threads.emplace_back([&]() {
|
|
|
|
absl::BitGen gen;
|
|
|
|
while (!done.HasBeenNotified()) {
|
|
|
|
env.RandomNewFree(gen);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
absl::BitGen gen;
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
while (absl::Now() < end) {
|
|
|
|
env.RandomFuse(gen);
|
|
|
|
env.RandomIncRefCount(gen);
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, IncRefCountShouldFailForInitialBlock) {
|
|
|
|
char buf1[1024];
|
|
|
|
upb_Arena* arena = upb_Arena_Init(buf1, 1024, &upb_alloc_global);
|
|
|
|
EXPECT_FALSE(upb_Arena_IncRefFor(arena, nullptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(ArenaTest, FuzzFuseIsFusedRace) {
|
|
|
|
Environment env;
|
|
|
|
|
|
|
|
// Create two arenas and fuse them.
|
|
|
|
std::shared_ptr<const upb::Arena> a = env.IndexedNonNullArena(0);
|
|
|
|
std::shared_ptr<const upb::Arena> b = env.IndexedNonNullArena(1);
|
|
|
|
upb_Arena_Fuse(a->ptr(), b->ptr());
|
|
|
|
EXPECT_TRUE(upb_Arena_IsFused(a->ptr(), b->ptr()));
|
|
|
|
|
|
|
|
absl::Notification done;
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < 10; ++i) {
|
|
|
|
threads.emplace_back([&]() {
|
|
|
|
absl::BitGen gen;
|
|
|
|
while (!done.HasBeenNotified()) {
|
|
|
|
env.RandomPoke(gen, 2);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
absl::BitGen gen;
|
|
|
|
auto end = absl::Now() + absl::Seconds(2);
|
|
|
|
while (absl::Now() < end) {
|
|
|
|
// Verify that the two arenas are still fused.
|
|
|
|
EXPECT_TRUE(upb_Arena_IsFused(a->ptr(), b->ptr()));
|
|
|
|
}
|
|
|
|
done.Notify();
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
}
|
|
|
|
|
Allow for fuse/free races in `upb_Arena`.
Implementation is by kfm@, I only added the portability code around it.
`upb_Arena` was designed to be only thread-compatible. However, fusing of arenas muddies the waters somewhat, because two distinct `upb_Arena` objects will end up sharing state when fused. This causes a `upb_Arena_Free(a)` to interfere with `upb_Arena_Fuse(b, c)` if `a` and `b` were previously fused.
It turns out that we can use atomics to fix this with about a 35% regression in fuse performance (see below). Arena create+free does not regress, thanks to special-case logic in Free().
`upb_Arena` is still a thread-compatible type, and it is still never safe to call `upb_Arena_xxx(a)` and `upb_Arena_yyy(a)` in parallel. However you can at least now call `upb_Arena_Free(a)` and `upb_Arena_Fuse(b, c)` in parallel, even if `a` and `b` were previously fused.
Note that `upb_Arena_Fuse(a, b)` and `upb_Arena_Fuse(c, d)` is still not allowed if `b` and `c` were previously fused. In practice this means that fuses must still be single-threaded within a single fused group.
Performance results:
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 18.6ns ± 1% 18.6ns ± 1% ~ (p=0.726 n=18+17)
BM_ArenaInitialBlockOneAlloc 6.28ns ± 1% 5.73ns ± 1% -8.68% (p=0.000 n=17+20)
BM_ArenaFuseUnbalanced/2 44.1ns ± 2% 60.4ns ± 1% +37.05% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/8 370ns ± 2% 500ns ± 1% +35.12% (p=0.000 n=19+20)
BM_ArenaFuseUnbalanced/64 3.52µs ± 1% 4.71µs ± 1% +33.80% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/128 7.20µs ± 1% 9.72µs ± 2% +34.93% (p=0.000 n=16+19)
BM_ArenaFuseBalanced/2 44.4ns ± 2% 61.4ns ± 1% +38.23% (p=0.000 n=20+17)
BM_ArenaFuseBalanced/8 373ns ± 2% 509ns ± 1% +36.57% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/64 3.55µs ± 2% 4.79µs ± 1% +34.80% (p=0.000 n=19+19)
BM_ArenaFuseBalanced/128 7.26µs ± 1% 9.76µs ± 1% +34.45% (p=0.000 n=17+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.66ms ± 1% 5.69ms ± 1% +0.57% (p=0.013 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.30ms ± 1% 6.36ms ± 1% +0.90% (p=0.000 n=19+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 12.1ms ± 1% 12.1ms ± 1% ~ (p=0.118 n=18+18)
BM_LoadAdsDescriptor_Proto2<WithLayout> 12.2ms ± 1% 12.3ms ± 1% +0.50% (p=0.006 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.7µs ± 1% 12.7µs ± 1% ~ (p=0.194 n=20+19)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.6µs ± 1% 11.6µs ± 1% ~ (p=0.192 n=20+20)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.5µs ± 1% 12.5µs ± 0% ~ (p=0.750 n=18+14)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.4µs ± 1% 11.3µs ± 1% -0.34% (p=0.046 n=19+19)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 25.4µs ± 1% 25.7µs ± 2% +1.37% (p=0.000 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 12.1µs ± 2% 12.1µs ± 1% ~ (p=0.143 n=18+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.9µs ± 3% 11.9µs ± 1% ~ (p=0.076 n=17+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 13.2µs ± 1% 13.2µs ± 1% ~ (p=0.053 n=19+19)
BM_SerializeDescriptor_Proto2 5.97µs ± 4% 5.90µs ± 4% ~ (p=0.093 n=17+19)
BM_SerializeDescriptor_Upb 10.4µs ± 1% 10.4µs ± 1% ~ (p=0.909 n=17+18)
name old time/op new time/op delta
BM_ArenaOneAlloc 18.7ns ± 2% 18.6ns ± 0% ~ (p=0.607 n=18+17)
BM_ArenaInitialBlockOneAlloc 6.29ns ± 1% 5.74ns ± 1% -8.71% (p=0.000 n=17+19)
BM_ArenaFuseUnbalanced/2 44.1ns ± 1% 60.6ns ± 1% +37.21% (p=0.000 n=17+19)
BM_ArenaFuseUnbalanced/8 371ns ± 2% 500ns ± 1% +35.02% (p=0.000 n=19+16)
BM_ArenaFuseUnbalanced/64 3.53µs ± 1% 4.72µs ± 1% +33.85% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/128 7.22µs ± 1% 9.73µs ± 2% +34.87% (p=0.000 n=16+19)
BM_ArenaFuseBalanced/2 44.5ns ± 2% 61.5ns ± 1% +38.22% (p=0.000 n=20+17)
BM_ArenaFuseBalanced/8 373ns ± 2% 510ns ± 1% +36.58% (p=0.000 n=19+16)
BM_ArenaFuseBalanced/64 3.56µs ± 2% 4.80µs ± 1% +34.87% (p=0.000 n=19+19)
BM_ArenaFuseBalanced/128 7.27µs ± 1% 9.77µs ± 1% +34.40% (p=0.000 n=17+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.67ms ± 1% 5.71ms ± 1% +0.60% (p=0.011 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.32ms ± 1% 6.37ms ± 1% +0.87% (p=0.000 n=19+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 12.1ms ± 1% 12.2ms ± 1% ~ (p=0.126 n=18+19)
BM_LoadAdsDescriptor_Proto2<WithLayout> 12.2ms ± 1% 12.3ms ± 1% +0.51% (p=0.002 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.7µs ± 1% 12.7µs ± 1% ~ (p=0.149 n=20+19)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.6µs ± 1% 11.6µs ± 1% ~ (p=0.211 n=20+20)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.5µs ± 1% 12.5µs ± 1% ~ (p=0.986 n=18+15)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.4µs ± 1% 11.3µs ± 1% ~ (p=0.081 n=19+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 25.4µs ± 1% 25.8µs ± 2% +1.41% (p=0.000 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 12.1µs ± 2% 12.1µs ± 1% ~ (p=0.558 n=19+18)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 12.0µs ± 3% 11.9µs ± 1% ~ (p=0.165 n=17+19)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 13.2µs ± 1% 13.2µs ± 1% ~ (p=0.070 n=19+19)
BM_SerializeDescriptor_Proto2 5.98µs ± 4% 5.92µs ± 3% ~ (p=0.138 n=17+19)
BM_SerializeDescriptor_Upb 10.4µs ± 1% 10.4µs ± 1% ~ (p=0.858 n=17+18)
```
PiperOrigin-RevId: 518573683
2 years ago
|
|
|
#endif
|
|
|
|
|
|
|
|
} // namespace
|