Added benchmarks for `upb_Arena_Fuse()`

I added two variants, one which is unbalanced (always fusing a single arena into the group) while the other is balanced (always fusing two groups of equal size).

Unfortunately it is difficult to benchmark `upb_Arena_Free()` separately from fuse, since the only way to get a non-zero refcount is to fuse.

Results on my machine:

```
CPU: Intel Skylake Xeon with HyperThreading (48 cores) dL1:32KB dL2:1024KB dL3:38MB
Benchmark                    Time(ns)        CPU(ns)     Iterations
-------------------------------------------------------------------
BM_ArenaFuseUnbalanced/2           52.2           52.2      1000000  38.300M items/s
BM_ArenaFuseUnbalanced/8          416            416         142315  19.243M items/s
BM_ArenaFuseUnbalanced/64        4034           4033          14306  15.868M items/s
BM_ArenaFuseUnbalanced/128       8302           8301           6837  15.420M items/s
BM_ArenaFuseBalanced/2             54.7           54.7      1000000  36.581M items/s
BM_ArenaFuseBalanced/8            425            425         100000  18.845M items/s
BM_ArenaFuseBalanced/64          4029           4029          14632  15.886M items/s
BM_ArenaFuseBalanced/128         8050           8049           7176  15.902M items/s
```
PiperOrigin-RevId: 518292073
pull/13171/head
Joshua Haberman 2 years ago committed by Copybara-Service
parent 666a28e6ac
commit d520014cfa
  1. 2
      benchmarks/BUILD
  2. 48
      benchmarks/benchmark.cc

@ -87,7 +87,9 @@ cc_test(
":benchmark_descriptor_sv_cc_proto",
":benchmark_descriptor_upb_proto",
":benchmark_descriptor_upb_proto_reflection",
"//:base",
"//:descriptor_upb_proto",
"//:mem",
"//:reflection",
"@com_github_google_benchmark//:benchmark_main",
"@com_google_absl//absl/container:flat_hash_set",

@ -35,6 +35,8 @@
#include "benchmarks/descriptor.upb.h"
#include "benchmarks/descriptor.upbdefs.h"
#include "benchmarks/descriptor_sv.pb.h"
#include "upb/base/log2.h"
#include "upb/mem/arena.h"
#include "upb/reflection/def.hpp"
upb_StringView descriptor = benchmarks_descriptor_proto_upbdefinit.descriptor;
@ -73,6 +75,52 @@ static void BM_ArenaInitialBlockOneAlloc(benchmark::State& state) {
}
BENCHMARK(BM_ArenaInitialBlockOneAlloc);
static void BM_ArenaFuseUnbalanced(benchmark::State& state) {
std::vector<upb_Arena*> arenas(state.range(0));
size_t n = 0;
for (auto _ : state) {
for (auto& arena : arenas) {
arena = upb_Arena_New();
}
for (auto& arena : arenas) {
upb_Arena_Fuse(arenas[0], arena);
}
for (auto& arena : arenas) {
upb_Arena_Free(arena);
}
n += arenas.size();
}
state.SetItemsProcessed(n);
}
BENCHMARK(BM_ArenaFuseUnbalanced)->Range(2, 128);
static void BM_ArenaFuseBalanced(benchmark::State& state) {
std::vector<upb_Arena*> arenas(state.range(0));
size_t n = 0;
for (auto _ : state) {
for (auto& arena : arenas) {
arena = upb_Arena_New();
}
// Perform a series of fuses that keeps the halves balanced.
size_t max = upb_Log2Ceiling(arenas.size());
for (size_t n = 0; n <= max; n++) {
size_t step = 1 << n;
for (size_t i = 0; i + step < arenas.size(); i += (step * 2)) {
upb_Arena_Fuse(arenas[i], arenas[i + step]);
}
}
for (auto& arena : arenas) {
upb_Arena_Free(arena);
}
n += arenas.size();
}
state.SetItemsProcessed(n);
}
BENCHMARK(BM_ArenaFuseBalanced)->Range(2, 128);
enum LoadDescriptorMode {
NoLayout,
WithLayout,

Loading…
Cancel
Save