Changed upb's arena alignment from 16 to 8.

upb has traditionally returned 16-byte-aligned pointers from arena allocation.  This was out of an abundance of caution, since users could theoretically be using upb arenas to allocate memory that is then used for SSE/AVX values (eg. [`__m128`](https://docs.microsoft.com/en-us/cpp/cpp/m128?view=msvc-170), which require 16-byte alignment.

In practice, the protobuf C++ arena has used 8-byte alignment for 8 years with no significant problems I know of arising from SSE etc.

Reducing the alignment requirement to 8 will save memory.  It will also help with compatibility on 32-bit architectures where `malloc()` only returns 8-byte aligned memory.  The immediate motivation is to fix the win32 build for Python protobuf.

PiperOrigin-RevId: 448331777
pull/13171/head
Joshua Haberman 3 years ago committed by Copybara-Service
parent c3cfd09b01
commit 1cf8214e4d
  1. 1
      BUILD
  2. 18
      upb/mini_table.c
  3. 2
      upb/msg_internal.h
  4. 3
      upb/port_def.inc
  5. 1
      upb/port_undef.inc
  6. 10
      upb/test_generated_code.cc
  7. 7
      upb/upb.c

@ -360,6 +360,7 @@ cc_test(
srcs = ["upb/test_generated_code.cc"],
deps = [
":empty_upbdefs_proto",
":port",
":test_messages_proto2_proto_upb",
":test_messages_proto3_proto_upb",
":test_upb_proto",

@ -912,18 +912,12 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
}
}
if (d->platform == kUpb_MiniTablePlatform_64Bit) {
// For compatibility with fast table parsing, we have to align this up to a
// multiple of 16 + 8. This is because arena alloc size must be a multiple
// of 16, but we will add sizeof(upb_Message_Internal) at runtime, as the
// table size does not include this value.
//
// This is a bit convoluted and should probably be simplified.
d->table->size = UPB_ALIGN_UP(d->table->size, 8);
if (UPB_ALIGN_UP(d->table->size, 16) == d->table->size) {
d->table->size += 8;
}
}
// The fasttable parser (supported on 64-bit only) depends on this being a
// multiple of 8 in order to satisfy UPB_MALLOC_ALIGN, which is also 8.
//
// On 32-bit we could potentially make this smaller, but there is no
// compelling reason to optimize this right now.
d->table->size = UPB_ALIGN_UP(d->table->size, 8);
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,

@ -275,7 +275,7 @@ UPB_INLINE size_t upb_msg_sizeof(const upb_MiniTable* l) {
UPB_INLINE upb_Message* _upb_Message_New_inl(const upb_MiniTable* l,
upb_Arena* a) {
size_t size = upb_msg_sizeof(l);
void* mem = upb_Arena_Malloc(a, size);
void* mem = upb_Arena_Malloc(a, size + sizeof(upb_Message_Internal));
upb_Message* msg;
if (UPB_UNLIKELY(!mem)) return NULL;
msg = UPB_PTR_AT(mem, sizeof(upb_Message_Internal), upb_Message);

@ -88,9 +88,10 @@
#define UPB_INLINE static
#endif
#define UPB_MALLOC_ALIGN 8
#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16)
#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, UPB_MALLOC_ALIGN)
#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
/* Hints to the compiler about likely/unlikely branches. */

@ -37,6 +37,7 @@
#undef UPB_ALIGN_DOWN
#undef UPB_ALIGN_MALLOC
#undef UPB_ALIGN_OF
#undef UPB_MALLOC_ALIGN
#undef UPB_LIKELY
#undef UPB_UNLIKELY
#undef UPB_FORCEINLINE

@ -36,6 +36,9 @@
#include "upb/test.upb.h"
#include "upb/upb.hpp"
// Must be last.
#include "upb/port_def.inc"
#if !defined(MIN)
#define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
@ -962,16 +965,17 @@ TEST(GeneratedCode, ArenaDecode) {
TEST(GeneratedCode, ArenaUnaligned) {
char buf1[1024];
// Force the pointer to be unaligned.
char* unaligned_buf_ptr = (char*)((uintptr_t)buf1 | 7);
uintptr_t low_bits = UPB_MALLOC_ALIGN - 1;
char* unaligned_buf_ptr = (char*)((uintptr_t)buf1 | low_bits);
upb_Arena* arena = upb_Arena_Init(
unaligned_buf_ptr, &buf1[sizeof(buf1)] - unaligned_buf_ptr, NULL);
char* mem = static_cast<char*>(upb_Arena_Malloc(arena, 5));
EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & 15);
EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & low_bits);
upb_Arena_Free(arena);
// Try the same, but with a size so small that aligning up will overflow.
arena = upb_Arena_Init(unaligned_buf_ptr, 5, &upb_alloc_global);
mem = static_cast<char*>(upb_Arena_Malloc(arena, 5));
EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & 15);
EXPECT_EQ(0, reinterpret_cast<uintptr_t>(mem) & low_bits);
upb_Arena_Free(arena);
}

@ -116,8 +116,6 @@ upb_alloc upb_alloc_global = {&upb_global_allocfunc};
/* upb_Arena ******************************************************************/
/* Be conservative and choose 16 in case anyone is using SSE. */
struct mem_block {
struct mem_block* next;
uint32_t size;
@ -130,7 +128,8 @@ typedef struct cleanup_ent {
void* ud;
} cleanup_ent;
static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(mem_block), 16);
static const size_t memblock_reserve =
UPB_ALIGN_UP(sizeof(mem_block), UPB_MALLOC_ALIGN);
static upb_Arena* arena_findroot(upb_Arena* a) {
/* Path splitting keeps time complexity down, see:
@ -218,7 +217,7 @@ upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
if (n) {
/* Align initial pointer up so that we return properly-aligned pointers. */
void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, 16);
void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, UPB_MALLOC_ALIGN);
size_t delta = (uintptr_t)aligned - (uintptr_t)mem;
n = delta <= n ? n - delta : 0;
mem = aligned;

Loading…
Cancel
Save