diff --git a/generated_for_cmake/google/protobuf/descriptor.upb.c b/generated_for_cmake/google/protobuf/descriptor.upb.c index 58137f58e4..ac95b2ec9e 100644 --- a/generated_for_cmake/google/protobuf/descriptor.upb.c +++ b/generated_for_cmake/google/protobuf/descriptor.upb.c @@ -91,7 +91,7 @@ const upb_msglayout google_protobuf_FileDescriptorSet_msginit = { }, &google_protobuf_FileDescriptorSet_submsgs[0], &google_protobuf_FileDescriptorSet__fields[0], - UPB_SIZE(4, 8), 1, false, + UPB_SIZE(8, 8), 1, false, }; static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = { @@ -449,7 +449,7 @@ const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = { }, NULL, &google_protobuf_DescriptorProto_ReservedRange__fields[0], - UPB_SIZE(12, 12), 2, false, + UPB_SIZE(16, 16), 2, false, }; static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = { @@ -531,7 +531,7 @@ const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = { }, &google_protobuf_ExtensionRangeOptions_submsgs[0], &google_protobuf_ExtensionRangeOptions__fields[0], - UPB_SIZE(4, 8), 1, false, + UPB_SIZE(8, 8), 1, false, }; static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = { @@ -873,7 +873,7 @@ const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msgini }, NULL, &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0], - UPB_SIZE(12, 12), 2, false, + UPB_SIZE(16, 16), 2, false, }; static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = { @@ -1317,7 +1317,7 @@ const upb_msglayout google_protobuf_MessageOptions_msginit = { }, &google_protobuf_MessageOptions_submsgs[0], &google_protobuf_MessageOptions__fields[0], - UPB_SIZE(12, 16), 5, false, + UPB_SIZE(16, 16), 5, false, }; static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = { @@ -1487,7 +1487,7 @@ const upb_msglayout google_protobuf_OneofOptions_msginit = { }, &google_protobuf_OneofOptions_submsgs[0], &google_protobuf_OneofOptions__fields[0], - UPB_SIZE(4, 8), 1, false, + UPB_SIZE(8, 8), 1, false, }; static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = { @@ -2070,7 +2070,7 @@ const upb_msglayout google_protobuf_SourceCodeInfo_msginit = { }, &google_protobuf_SourceCodeInfo_submsgs[0], &google_protobuf_SourceCodeInfo__fields[0], - UPB_SIZE(4, 8), 1, false, + UPB_SIZE(8, 8), 1, false, }; static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = { @@ -2234,7 +2234,7 @@ const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = { }, &google_protobuf_GeneratedCodeInfo_submsgs[0], &google_protobuf_GeneratedCodeInfo__fields[0], - UPB_SIZE(4, 8), 1, false, + UPB_SIZE(8, 8), 1, false, }; static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = { diff --git a/upb/decode.c b/upb/decode.c index 8b87ed2a01..25b07e0a1a 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -185,12 +185,93 @@ void decode_verifyutf8(upb_decstate *d, const char *buf, int len) { if (i != len) decode_err(d); } -static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) { - bool need_realloc = arr->size - arr->len < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) { +static void decode_stealmem(upb_decstate *d) { + _upb_arena_head *a = (_upb_arena_head*)d->arena; + d->arena_ptr = a->ptr; + d->arena_end = a->end; + a->ptr = a->end; +} + +static void decode_donatemem(upb_decstate *d) { + _upb_arena_head *a = (_upb_arena_head*)d->arena; + UPB_ASSERT(a->end == d->arena_end); + a->ptr = d->arena_ptr; +} + +UPB_NOINLINE +static void *decode_mallocfallback(upb_decstate *d, size_t size) { + char *ptr = _upb_arena_slowmalloc(d->arena, size); + if (!ptr) decode_err(d); + decode_stealmem(d); + return ptr; +} + +UPB_FORCEINLINE +static void *decode_malloc(upb_decstate *d, size_t size) { + UPB_ASSERT((size & 7) == 0); + char *ptr = d->arena_ptr; + if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < size)) { + return decode_mallocfallback(d, size); + } + d->arena_ptr += size; + return ptr; +} + +static upb_msg *decode_newmsg(upb_decstate *d, const upb_msglayout *l) { + size_t size = l->size + sizeof(upb_msg_internal); + char *msg_data = decode_malloc(d, size); + memset(msg_data, 0, size); + return msg_data + sizeof(upb_msg_internal); +} + +UPB_NOINLINE +static void decode_realloc(upb_decstate *d, upb_array *arr, size_t need_elem) { + decode_donatemem(d); + bool ok = _upb_array_realloc(arr, arr->len + need_elem, d->arena); + decode_stealmem(d); + if (!ok) decode_err(d); +} + +UPB_FORCEINLINE +static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t need_elem) { + if (arr->size - arr->len < need_elem) { + decode_realloc(d, arr, need_elem); + return true; + } + return false; +} + +static upb_array *decode_newarr(upb_decstate *d, upb_fieldtype_t type) { + size_t elem_size_lg2 = _upb_fieldtype_to_sizelg2[type]; + size_t count = type == UPB_TYPE_BOOL ? 8 : 4; + size_t size = sizeof(upb_array) + (count * (1 << elem_size_lg2)); + upb_array *arr = decode_malloc(d, size); + + if (!arr) { decode_err(d); } - return need_realloc; + + arr->data = _upb_array_tagptr(arr + 1, elem_size_lg2); + arr->len = 0; + arr->size = count; + + return arr; +} + +static void decode_addunknown(upb_decstate *d, upb_msg *msg, const char *ptr, + size_t len) { + upb_msg_internal *in = upb_msg_getinternal(msg); + if (!in->unknown || in->unknown->size - in->unknown->len < len) { + bool ok; + decode_donatemem(d); + ok = _upb_msg_addunknown(msg, ptr, len, d->arena); + decode_stealmem(d); + if (!ok) decode_err(d); + } else { + char *dst = UPB_PTR_AT(in->unknown + 1, in->unknown->len, char); + memcpy(dst, ptr, len); + in->unknown->len += len; + } } UPB_NOINLINE @@ -276,8 +357,7 @@ static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout, const upb_msglayout_field *field) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - return _upb_msg_new(subl, d->arena); + return decode_newmsg(d, layout->submsgs[field->submsg_index]); } static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg, @@ -324,8 +404,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr, if (!arr) { upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype]; - arr = _upb_array_new(d->arena, type); - if (!arr) decode_err(d); + arr = decode_newarr(d, type); *arrp = arr; } @@ -423,7 +502,9 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg, char val_size = desctype_to_mapsize[val_field->descriptortype]; UPB_ASSERT(key_field->offset == 0); UPB_ASSERT(val_field->offset == sizeof(upb_strview)); + decode_donatemem(d); /* We'll let map use the actual arena. */ map = _upb_map_new(d->arena, key_size, val_size); + decode_stealmem(d); *map_p = map; } @@ -433,13 +514,15 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg, if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { /* Create proactively to handle the case where it doesn't appear. */ - ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], d->arena)); + ent.v.val = upb_value_ptr(decode_newmsg(d, entry->submsgs[0])); } decode_tosubmsg(d, &ent.k, layout, field, val.str_val); /* Insert into map. */ + decode_donatemem(d); /* We'll let map use the actual arena. */ _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena); + decode_stealmem(d); } UPB_FORCEINLINE @@ -587,10 +670,7 @@ static const char *decode_field(upb_decstate *d, const char *ptr, upb_msg *msg, ptr = decode_group(d, ptr, NULL, NULL, field_number); } if (msg) { - if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, - d->arena)) { - decode_err(d); - } + decode_addunknown(d, msg, field_start, ptr - field_start); } } @@ -618,7 +698,9 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, upb_arena *arena) { + bool ret; upb_decstate state; + state.limit = buf + size; state.fastend = buf + size - 16; state.fastlimit = state.fastend; @@ -626,12 +708,19 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, state.depth = 64; state.end_group = 0; - if (setjmp(state.err)) return false; - if (size == 0) return true; - decode_msg(&state, buf, msg, l); - return state.end_group == 0; + decode_stealmem(&state); + + if (setjmp(state.err)) { + ret = false; + } else { + decode_msg(&state, buf, msg, l); + ret = state.end_group == 0; + } + + decode_donatemem(&state); + return ret; } #undef OP_SCALAR_LG2 diff --git a/upb/decode.h b/upb/decode.h index 9a8473e026..432604c71c 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -21,6 +21,8 @@ typedef struct upb_decstate { const char *limit; /* End of delimited region or end of buffer. */ const char *fastlimit; /* End of delimited region or end of buffer. */ const char *fastend; + char *arena_ptr; + char *arena_end; upb_array *arr; upb_arena *arena; int depth; diff --git a/upb/msg.c b/upb/msg.c index 25747c8481..b9310c9f7b 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -7,7 +7,7 @@ /** upb_msg *******************************************************************/ -static const char _upb_fieldtype_to_sizelg2[12] = { +const char _upb_fieldtype_to_sizelg2[12] = { 0, 0, /* UPB_TYPE_BOOL */ 2, /* UPB_TYPE_FLOAT */ @@ -22,17 +22,10 @@ static const char _upb_fieldtype_to_sizelg2[12] = { UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ }; -static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { - UPB_ASSERT(elem_size_lg2 <= 4); - return (uintptr_t)ptr | elem_size_lg2; -} - -static int upb_msg_internalsize(const upb_msglayout *l) { - return sizeof(upb_msg_internal) - l->extendable * sizeof(void *); -} +static const size_t overhead = sizeof(upb_msg_internal); static size_t upb_msg_sizeof(const upb_msglayout *l) { - return l->size + upb_msg_internalsize(l); + return l->size + overhead; } static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { @@ -40,14 +33,9 @@ static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { return UPB_PTR_AT(msg, -size, upb_msg_internal); } -static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { - return (upb_msg_internal*)upb_msg_getinternal_const(msg); -} - void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) { - ptrdiff_t internal = upb_msg_internalsize(l); - void *mem = UPB_PTR_AT(msg, -internal, char); - memset(mem, 0, l->size + internal); + void *mem = UPB_PTR_AT(msg, -overhead, char); + memset(mem, 0, l->size + overhead); } upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { @@ -58,37 +46,51 @@ upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { return NULL; } - msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg); + msg = UPB_PTR_AT(mem, overhead, upb_msg); _upb_msg_clear(msg, l); return msg; } bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena) { + upb_msg_internal *in = upb_msg_getinternal(msg); - if (len > in->unknown_size - in->unknown_len) { - upb_alloc *alloc = upb_arena_alloc(arena); - size_t need = in->unknown_size + len; - size_t newsize = UPB_MAX(in->unknown_size * 2, need); - void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); - if (!mem) return false; - in->unknown = mem; - in->unknown_size = newsize; + if (!in->unknown) { + size_t size = 128; + while (size < len) size *= 2; + in->unknown = upb_arena_malloc(arena, size + overhead); + if (!in->unknown) return false; + in->unknown->size = size; + in->unknown->len = 0; + } else if (in->unknown->size - in->unknown->len < len) { + size_t need = in->unknown->len + len; + size_t size = in->unknown->size;; + while (size < need) size *= 2; + in->unknown = upb_arena_realloc( + arena, in->unknown, in->unknown->size + overhead, size + overhead); + if (!in->unknown) return false; } - memcpy(in->unknown + in->unknown_len, data, len); - in->unknown_len += len; + memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len); + in->unknown->len += len; return true; } void _upb_msg_discardunknown_shallow(upb_msg *msg) { upb_msg_internal *in = upb_msg_getinternal(msg); - in->unknown_len = 0; + if (in->unknown) { + in->unknown->len = 0; + } } const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { const upb_msg_internal *in = upb_msg_getinternal_const(msg); - *len = in->unknown_len; - return in->unknown; + if (in->unknown) { + *len = in->unknown->len; + return (char*)(in->unknown + 1); + } else { + *len = 0; + return NULL; + } } /** upb_array *****************************************************************/ @@ -100,7 +102,7 @@ upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) { return NULL; } - arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]); + arr->data = _upb_array_tagptr(NULL, _upb_fieldtype_to_sizelg2[type]); arr->len = 0; arr->size = 0; @@ -124,7 +126,7 @@ bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) { return false; } - arr->data = tag_arrptr(ptr, elem_size_lg2); + arr->data = _upb_array_tagptr(ptr, elem_size_lg2); arr->size = new_size; return true; } diff --git a/upb/msg.h b/upb/msg.h index 7a1ec760ed..b132afe604 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -72,25 +72,28 @@ typedef struct upb_msglayout { * compatibility. We put these before the user's data. The user's upb_msg* * points after the upb_msg_internal. */ -/* Used when a message is not extendable. */ typedef struct { - char *unknown; - size_t unknown_len; - size_t unknown_size; -} upb_msg_internal; + uint32_t len; + uint32_t size; + /* Data follows. */ +} upb_msg_unknowndata; -/* Used when a message is extendable. */ +/* Used when a message is not extendable. */ typedef struct { - upb_inttable *extdict; - upb_msg_internal base; -} upb_msg_internal_withext; + upb_msg_unknowndata *unknown; +} upb_msg_internal; /* Maps upb_fieldtype_t -> memory size. */ extern char _upb_fieldtype_to_size[12]; +extern const char _upb_fieldtype_to_sizelg2[12]; /* Creates a new messages with the given layout on the given arena. */ upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a); +UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); +} + /* Clears the given message. */ void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l); @@ -189,6 +192,11 @@ UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) { return (void*)(arr->data & ~(uintptr_t)7); } +UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); + return (uintptr_t)ptr | elem_size_lg2; +} + UPB_INLINE void *_upb_array_ptr(upb_array *arr) { return (void*)_upb_array_constptr(arr); } diff --git a/upbc/message_layout.cc b/upbc/message_layout.cc index cb7f7f9c43..14aa3e1a3e 100644 --- a/upbc/message_layout.cc +++ b/upbc/message_layout.cc @@ -105,7 +105,7 @@ int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) { void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) { size_ = Size{0, 0}; - maxalign_ = Size{0, 0}; + maxalign_ = Size{8, 8}; if (descriptor->options().map_entry()) { // Map entries aren't actually stored, they are only used during parsing.