|
|
|
// Protocol Buffers - Google's data interchange format
|
|
|
|
// Copyright 2023 Google LLC. All rights reserved.
|
|
|
|
//
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file or at
|
|
|
|
// https://developers.google.com/open-source/licenses/bsd
|
|
|
|
|
|
|
|
#ifndef UPB_MEM_INTERNAL_ARENA_H_
|
|
|
|
#define UPB_MEM_INTERNAL_ARENA_H_
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
// Must be last.
|
|
|
|
#include "upb/port/def.inc"
|
|
|
|
|
|
|
|
// This is QUITE an ugly hack, which specifies the number of pointers needed
|
|
|
|
// to equal (or exceed) the storage required for one upb_Arena.
|
|
|
|
//
|
|
|
|
// We need this because the decoder inlines a upb_Arena for performance but
|
|
|
|
// the full struct is not visible outside of arena.c. Yes, I know, it's awful.
|
|
|
|
#define UPB_ARENA_SIZE_HACK 7
|
|
|
|
|
|
|
|
// LINT.IfChange(upb_Array)
|
|
|
|
|
|
|
|
struct upb_Arena {
|
|
|
|
char* UPB_ONLYBITS(ptr);
|
|
|
|
char* UPB_ONLYBITS(end);
|
|
|
|
};
|
|
|
|
|
|
|
|
// LINT.ThenChange(//depot/google3/third_party/upb/bits/typescript/arena.ts:upb_Array)
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void UPB_PRIVATE(_upb_Arena_SwapIn)(struct upb_Arena* des,
|
|
|
|
const struct upb_Arena* src);
|
|
|
|
void UPB_PRIVATE(_upb_Arena_SwapOut)(struct upb_Arena* des,
|
|
|
|
const struct upb_Arena* src);
|
|
|
|
|
|
|
|
UPB_INLINE size_t UPB_PRIVATE(_upb_ArenaHas)(const struct upb_Arena* a) {
|
|
|
|
return (size_t)(a->UPB_ONLYBITS(end) - a->UPB_ONLYBITS(ptr));
|
|
|
|
}
|
Changed Arena representation so that fusing links arenas together instead of blocks.
Previously when fusing, we would concatenate all blocks into a single list that lived in the arena root. From then on, all arenas would add their blocks to this single unified list.
After this CL, arenas keep their distinct list of blocks even after being fused. Instead of unifying the block list, fuse now puts the arenas themselves into a list, so all arenas in the fused group can be iterated over at any time.
This design makes it easier to keep each individual arena thread-compatible, because fuse and free are now the only mutating operations that touch state that is shared with the entire group. Read-only operations like `SpaceAllocated()` also iterate the list of arenas, but in a read-only fashion.
(Note: we need tests for SpaceAllocated(), both single-threaded for correctness and multi-threaded for resilience to crashes and data races).
Performance of fuse regresses by 5-20%. This is somewhat expected as we are performing more atomic operations during a fuse.
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 18.4ns ± 6% 18.7ns ± 4% +2.00% (p=0.016 n=18+18)
BM_ArenaInitialBlockOneAlloc 5.50ns ± 4% 6.57ns ± 4% +19.42% (p=0.000 n=16+17)
BM_ArenaFuseUnbalanced/2 59.3ns ±10% 68.7ns ± 4% +15.85% (p=0.000 n=19+19)
BM_ArenaFuseUnbalanced/8 479ns ± 5% 540ns ± 8% +12.57% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/64 4.50µs ± 4% 4.93µs ± 8% +9.59% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 9.24µs ± 3% 9.96µs ± 3% +7.81% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/2 63.3ns ±18% 71.0ns ± 4% +12.14% (p=0.000 n=19+18)
BM_ArenaFuseBalanced/8 484ns ± 9% 543ns ±10% +12.11% (p=0.000 n=17+16)
BM_ArenaFuseBalanced/64 4.50µs ± 6% 4.94µs ± 4% +9.62% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/128 9.20µs ± 4% 9.95µs ± 4% +8.12% (p=0.000 n=16+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.50ms ± 8% 5.69ms ±17% ~ (p=0.189 n=18+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.10ms ± 5% 6.05ms ± 4% ~ (p=0.258 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.9ms ±15% 11.6ms ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.8ms ± 5% 12.4ms ±17% ~ (p=0.604 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.1µs ± 8% 12.1µs ± 4% ~ (p=1.000 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.8µs ±17% 11.1µs ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.0µs ± 5% 11.9µs ± 4% ~ (p=0.134 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 10.9µs ± 7% 11.0µs ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 4% 24.4µs ± 7% ~ (p=0.767 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 5% 11.6µs ± 4% ~ (p=0.621 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.3µs ± 3% 11.3µs ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.7µs ± 8% 12.7µs ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 5.77µs ± 5% 5.71µs ± 5% ~ (p=0.433 n=17+17)
BM_SerializeDescriptor_Upb 10.0µs ± 5% 10.1µs ± 7% ~ (p=0.102 n=19+16)
name old time/op new time/op delta
BM_ArenaOneAlloc 18.4ns ± 6% 18.8ns ± 4% +1.98% (p=0.019 n=18+18)
BM_ArenaInitialBlockOneAlloc 5.51ns ± 4% 6.58ns ± 4% +19.42% (p=0.000 n=16+17)
BM_ArenaFuseUnbalanced/2 59.5ns ±10% 68.9ns ± 4% +15.83% (p=0.000 n=19+19)
BM_ArenaFuseUnbalanced/8 481ns ± 5% 541ns ± 8% +12.54% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/64 4.51µs ± 4% 4.94µs ± 8% +9.53% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 9.26µs ± 3% 9.98µs ± 3% +7.79% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/2 63.5ns ±19% 71.1ns ± 3% +12.07% (p=0.000 n=19+18)
BM_ArenaFuseBalanced/8 485ns ± 9% 551ns ±20% +13.47% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/64 4.51µs ± 6% 4.95µs ± 4% +9.62% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/128 9.22µs ± 4% 9.97µs ± 4% +8.12% (p=0.000 n=16+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.52ms ± 8% 5.72ms ±18% ~ (p=0.199 n=18+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.12ms ± 5% 6.07ms ± 4% ~ (p=0.273 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.9ms ±15% 11.6ms ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 5% 12.5ms ±18% ~ (p=0.582 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.2µs ± 8% 12.1µs ± 3% ~ (p=0.963 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.8µs ±17% 11.1µs ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.0µs ± 5% 11.9µs ± 4% ~ (p=0.126 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.0µs ± 6% 11.1µs ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.3µs ± 4% 24.5µs ± 6% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.7µs ± 5% 11.6µs ± 4% ~ (p=0.574 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.3µs ± 3% 11.3µs ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.7µs ± 8% 12.7µs ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 5.78µs ± 5% 5.73µs ± 5% ~ (p=0.357 n=17+17)
BM_SerializeDescriptor_Upb 10.0µs ± 5% 10.1µs ± 7% ~ (p=0.117 n=19+16)
name old allocs/op new allocs/op delta
BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 6.08k ± 0% 6.05k ± 0% -0.54% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.39k ± 0% 6.36k ± 0% -0.55% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 83.4k ± 0% 83.4k ± 0% ~ (p=0.800 n=20+20)
BM_LoadAdsDescriptor_Proto2<WithLayout> 84.4k ± 0% 84.4k ± 0% ~ (p=0.752 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_ArenaOneAlloc 336 ± 0% 336 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 672 ± 0% 672 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 2.69k ± 0% 2.69k ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 21.5k ± 0% 21.5k ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 43.0k ± 0% 43.0k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 672 ± 0% 672 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 2.69k ± 0% 2.69k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 21.5k ± 0% 21.5k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 43.0k ± 0% 43.0k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 9.89M ± 0% 9.95M ± 0% +0.65% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 9.95M ± 0% 10.02M ± 0% +0.70% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 6.62M ± 0% 6.62M ± 0% ~ (p=0.800 n=20+20)
BM_LoadAdsDescriptor_Proto2<WithLayout> 6.66M ± 0% 6.66M ± 0% ~ (p=0.752 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 65.3k ± 0% 65.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_LoadAdsDescriptor_Upb<NoLayout> 138MB/s ± 7% 132MB/s ±15% ~ (p=0.126 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 124MB/s ± 5% 125MB/s ± 4% ~ (p=0.258 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 63.9MB/s ±13% 65.2MB/s ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 64.0MB/s ± 5% 61.3MB/s ±15% ~ (p=0.604 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 620MB/s ± 8% 622MB/s ± 4% ~ (p=1.000 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 644MB/s ±15% 679MB/s ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 627MB/s ± 4% 633MB/s ± 4% ~ (p=0.134 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 688MB/s ± 6% 682MB/s ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 310MB/s ± 4% 309MB/s ± 6% ~ (p=0.767 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 646MB/s ± 4% 649MB/s ± 4% ~ (p=0.621 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 666MB/s ± 3% 666MB/s ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 592MB/s ± 7% 593MB/s ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 1.30GB/s ± 5% 1.32GB/s ± 5% ~ (p=0.433 n=17+17)
BM_SerializeDescriptor_Upb 756MB/s ± 5% 745MB/s ± 6% ~ (p=0.102 n=19+16)
```
PiperOrigin-RevId: 520144430
2 years ago
|
|
|
|
|
|
|
UPB_INLINE void* UPB_PRIVATE(_upb_Arena_Malloc)(struct upb_Arena* a,
|
|
|
|
size_t size) {
|
|
|
|
void* UPB_PRIVATE(_upb_Arena_SlowMalloc)(struct upb_Arena * a, size_t size);
|
Changed Arena representation so that fusing links arenas together instead of blocks.
Previously when fusing, we would concatenate all blocks into a single list that lived in the arena root. From then on, all arenas would add their blocks to this single unified list.
After this CL, arenas keep their distinct list of blocks even after being fused. Instead of unifying the block list, fuse now puts the arenas themselves into a list, so all arenas in the fused group can be iterated over at any time.
This design makes it easier to keep each individual arena thread-compatible, because fuse and free are now the only mutating operations that touch state that is shared with the entire group. Read-only operations like `SpaceAllocated()` also iterate the list of arenas, but in a read-only fashion.
(Note: we need tests for SpaceAllocated(), both single-threaded for correctness and multi-threaded for resilience to crashes and data races).
Performance of fuse regresses by 5-20%. This is somewhat expected as we are performing more atomic operations during a fuse.
```
name old cpu/op new cpu/op delta
BM_ArenaOneAlloc 18.4ns ± 6% 18.7ns ± 4% +2.00% (p=0.016 n=18+18)
BM_ArenaInitialBlockOneAlloc 5.50ns ± 4% 6.57ns ± 4% +19.42% (p=0.000 n=16+17)
BM_ArenaFuseUnbalanced/2 59.3ns ±10% 68.7ns ± 4% +15.85% (p=0.000 n=19+19)
BM_ArenaFuseUnbalanced/8 479ns ± 5% 540ns ± 8% +12.57% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/64 4.50µs ± 4% 4.93µs ± 8% +9.59% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 9.24µs ± 3% 9.96µs ± 3% +7.81% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/2 63.3ns ±18% 71.0ns ± 4% +12.14% (p=0.000 n=19+18)
BM_ArenaFuseBalanced/8 484ns ± 9% 543ns ±10% +12.11% (p=0.000 n=17+16)
BM_ArenaFuseBalanced/64 4.50µs ± 6% 4.94µs ± 4% +9.62% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/128 9.20µs ± 4% 9.95µs ± 4% +8.12% (p=0.000 n=16+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.50ms ± 8% 5.69ms ±17% ~ (p=0.189 n=18+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.10ms ± 5% 6.05ms ± 4% ~ (p=0.258 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.9ms ±15% 11.6ms ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.8ms ± 5% 12.4ms ±17% ~ (p=0.604 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.1µs ± 8% 12.1µs ± 4% ~ (p=1.000 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.8µs ±17% 11.1µs ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.0µs ± 5% 11.9µs ± 4% ~ (p=0.134 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 10.9µs ± 7% 11.0µs ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.2µs ± 4% 24.4µs ± 7% ~ (p=0.767 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.6µs ± 5% 11.6µs ± 4% ~ (p=0.621 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.3µs ± 3% 11.3µs ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.7µs ± 8% 12.7µs ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 5.77µs ± 5% 5.71µs ± 5% ~ (p=0.433 n=17+17)
BM_SerializeDescriptor_Upb 10.0µs ± 5% 10.1µs ± 7% ~ (p=0.102 n=19+16)
name old time/op new time/op delta
BM_ArenaOneAlloc 18.4ns ± 6% 18.8ns ± 4% +1.98% (p=0.019 n=18+18)
BM_ArenaInitialBlockOneAlloc 5.51ns ± 4% 6.58ns ± 4% +19.42% (p=0.000 n=16+17)
BM_ArenaFuseUnbalanced/2 59.5ns ±10% 68.9ns ± 4% +15.83% (p=0.000 n=19+19)
BM_ArenaFuseUnbalanced/8 481ns ± 5% 541ns ± 8% +12.54% (p=0.000 n=18+19)
BM_ArenaFuseUnbalanced/64 4.51µs ± 4% 4.94µs ± 8% +9.53% (p=0.000 n=17+17)
BM_ArenaFuseUnbalanced/128 9.26µs ± 3% 9.98µs ± 3% +7.79% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/2 63.5ns ±19% 71.1ns ± 3% +12.07% (p=0.000 n=19+18)
BM_ArenaFuseBalanced/8 485ns ± 9% 551ns ±20% +13.47% (p=0.000 n=17+17)
BM_ArenaFuseBalanced/64 4.51µs ± 6% 4.95µs ± 4% +9.62% (p=0.000 n=19+17)
BM_ArenaFuseBalanced/128 9.22µs ± 4% 9.97µs ± 4% +8.12% (p=0.000 n=16+19)
BM_LoadAdsDescriptor_Upb<NoLayout> 5.52ms ± 8% 5.72ms ±18% ~ (p=0.199 n=18+19)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.12ms ± 5% 6.07ms ± 4% ~ (p=0.273 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 11.9ms ±15% 11.6ms ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 11.9ms ± 5% 12.5ms ±18% ~ (p=0.582 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 12.2µs ± 8% 12.1µs ± 3% ~ (p=0.963 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 11.8µs ±17% 11.1µs ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 12.0µs ± 5% 11.9µs ± 4% ~ (p=0.126 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 11.0µs ± 6% 11.1µs ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 24.3µs ± 4% 24.5µs ± 6% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 11.7µs ± 5% 11.6µs ± 4% ~ (p=0.574 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 11.3µs ± 3% 11.3µs ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 12.7µs ± 8% 12.7µs ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 5.78µs ± 5% 5.73µs ± 5% ~ (p=0.357 n=17+17)
BM_SerializeDescriptor_Upb 10.0µs ± 5% 10.1µs ± 7% ~ (p=0.117 n=19+16)
name old allocs/op new allocs/op delta
BM_ArenaOneAlloc 1.00 ± 0% 1.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 2.00 ± 0% 2.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 64.0 ± 0% 64.0 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 128 ± 0% 128 ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 6.08k ± 0% 6.05k ± 0% -0.54% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 6.39k ± 0% 6.36k ± 0% -0.55% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 83.4k ± 0% 83.4k ± 0% ~ (p=0.800 n=20+20)
BM_LoadAdsDescriptor_Proto2<WithLayout> 84.4k ± 0% 84.4k ± 0% ~ (p=0.752 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 7.00 ± 0% 7.00 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 765 ± 0% 765 ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 8.00 ± 0% 8.00 ± 0% ~ (all samples are equal)
name old peak-mem(Bytes)/op new peak-mem(Bytes)/op delta
BM_ArenaOneAlloc 336 ± 0% 336 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/2 672 ± 0% 672 ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/8 2.69k ± 0% 2.69k ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/64 21.5k ± 0% 21.5k ± 0% ~ (all samples are equal)
BM_ArenaFuseUnbalanced/128 43.0k ± 0% 43.0k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/2 672 ± 0% 672 ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/8 2.69k ± 0% 2.69k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/64 21.5k ± 0% 21.5k ± 0% ~ (all samples are equal)
BM_ArenaFuseBalanced/128 43.0k ± 0% 43.0k ± 0% ~ (all samples are equal)
BM_LoadAdsDescriptor_Upb<NoLayout> 9.89M ± 0% 9.95M ± 0% +0.65% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 9.95M ± 0% 10.02M ± 0% +0.70% (p=0.000 n=20+20)
BM_LoadAdsDescriptor_Proto2<NoLayout> 6.62M ± 0% 6.62M ± 0% ~ (p=0.800 n=20+20)
BM_LoadAdsDescriptor_Proto2<WithLayout> 6.66M ± 0% 6.66M ± 0% ~ (p=0.752 n=20+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal)
BM_Parse_Upb_FileDesc<UseArena, Alias> 36.5k ± 0% 36.5k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 35.8k ± 0% 35.8k ± 0% ~ (all samples are equal)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 65.3k ± 0% 65.3k ± 0% ~ (all samples are equal)
name old speed new speed delta
BM_LoadAdsDescriptor_Upb<NoLayout> 138MB/s ± 7% 132MB/s ±15% ~ (p=0.126 n=18+20)
BM_LoadAdsDescriptor_Upb<WithLayout> 124MB/s ± 5% 125MB/s ± 4% ~ (p=0.258 n=17+18)
BM_LoadAdsDescriptor_Proto2<NoLayout> 63.9MB/s ±13% 65.2MB/s ± 5% ~ (p=0.589 n=19+16)
BM_LoadAdsDescriptor_Proto2<WithLayout> 64.0MB/s ± 5% 61.3MB/s ±15% ~ (p=0.604 n=16+20)
BM_Parse_Upb_FileDesc<UseArena, Copy> 620MB/s ± 8% 622MB/s ± 4% ~ (p=1.000 n=18+18)
BM_Parse_Upb_FileDesc<UseArena, Alias> 644MB/s ±15% 679MB/s ± 4% ~ (p=0.104 n=20+17)
BM_Parse_Upb_FileDesc<InitBlock, Copy> 627MB/s ± 4% 633MB/s ± 4% ~ (p=0.134 n=18+19)
BM_Parse_Upb_FileDesc<InitBlock, Alias> 688MB/s ± 6% 682MB/s ± 4% ~ (p=0.195 n=17+18)
BM_Parse_Proto2<FileDesc, NoArena, Copy> 310MB/s ± 4% 309MB/s ± 6% ~ (p=0.767 n=18+18)
BM_Parse_Proto2<FileDesc, UseArena, Copy> 646MB/s ± 4% 649MB/s ± 4% ~ (p=0.621 n=18+16)
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 666MB/s ± 3% 666MB/s ± 3% ~ (p=0.743 n=18+18)
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 592MB/s ± 7% 593MB/s ± 4% ~ (p=0.988 n=18+19)
BM_SerializeDescriptor_Proto2 1.30GB/s ± 5% 1.32GB/s ± 5% ~ (p=0.433 n=17+17)
BM_SerializeDescriptor_Upb 756MB/s ± 5% 745MB/s ± 6% ~ (p=0.102 n=19+16)
```
PiperOrigin-RevId: 520144430
2 years ago
|
|
|
|
|
|
|
size = UPB_ALIGN_MALLOC(size);
|
|
|
|
const size_t span = size + UPB_ASAN_GUARD_SIZE;
|
|
|
|
if (UPB_UNLIKELY(UPB_PRIVATE(_upb_ArenaHas)(a) < span)) {
|
|
|
|
return UPB_PRIVATE(_upb_Arena_SlowMalloc)(a, span);
|
|
|
|
}
|
|
|
|
|
|
|
|
// We have enough space to do a fast malloc.
|
|
|
|
void* ret = a->UPB_ONLYBITS(ptr);
|
|
|
|
UPB_ASSERT(UPB_ALIGN_MALLOC((uintptr_t)ret) == (uintptr_t)ret);
|
|
|
|
UPB_ASSERT(UPB_ALIGN_MALLOC(size) == size);
|
|
|
|
UPB_UNPOISON_MEMORY_REGION(ret, size);
|
|
|
|
|
|
|
|
a->UPB_ONLYBITS(ptr) += span;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE void* UPB_PRIVATE(_upb_Arena_Realloc)(struct upb_Arena* a, void* ptr,
|
|
|
|
size_t oldsize, size_t size) {
|
|
|
|
oldsize = UPB_ALIGN_MALLOC(oldsize);
|
|
|
|
size = UPB_ALIGN_MALLOC(size);
|
|
|
|
bool is_most_recent_alloc =
|
|
|
|
(uintptr_t)ptr + oldsize == (uintptr_t)a->UPB_ONLYBITS(ptr);
|
|
|
|
|
|
|
|
if (is_most_recent_alloc) {
|
|
|
|
ptrdiff_t diff = size - oldsize;
|
|
|
|
if ((ptrdiff_t)UPB_PRIVATE(_upb_ArenaHas)(a) >= diff) {
|
|
|
|
a->UPB_ONLYBITS(ptr) += diff;
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
} else if (size <= oldsize) {
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void* ret = UPB_PRIVATE(_upb_Arena_Malloc)(a, size);
|
|
|
|
|
|
|
|
if (ret && oldsize > 0) {
|
|
|
|
memcpy(ret, ptr, UPB_MIN(oldsize, size));
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE void UPB_PRIVATE(_upb_Arena_ShrinkLast)(struct upb_Arena* a,
|
|
|
|
void* ptr, size_t oldsize,
|
|
|
|
size_t size) {
|
|
|
|
oldsize = UPB_ALIGN_MALLOC(oldsize);
|
|
|
|
size = UPB_ALIGN_MALLOC(size);
|
|
|
|
// Must be the last alloc.
|
|
|
|
UPB_ASSERT((char*)ptr + oldsize ==
|
|
|
|
a->UPB_ONLYBITS(ptr) - UPB_ASAN_GUARD_SIZE);
|
|
|
|
UPB_ASSERT(size <= oldsize);
|
|
|
|
a->UPB_ONLYBITS(ptr) = (char*)ptr + size;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
} /* extern "C" */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "upb/port/undef.inc"
|
|
|
|
|
|
|
|
#endif /* UPB_MEM_INTERNAL_ARENA_H_ */
|