diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index 99d8c41a9f..e29858f994 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -3790,6 +3790,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { #include +#include #include @@ -3920,14 +3921,19 @@ static int _upb_mapsorter_cmpext(const void* _a, const void* _b) { bool _upb_mapsorter_pushexts(_upb_mapsorter* s, const upb_Message_Internal* in, _upb_sortedmap* sorted) { - size_t count = (in->size - in->ext_begin) / sizeof(upb_Extension); + size_t count = 0; + for (size_t i = 0; i < in->size; i++) { + count += upb_TaggedAuxPtr_IsExtension(in->aux_data[i]); + } if (!_upb_mapsorter_resize(s, sorted, count)) return false; if (count == 0) return true; - const upb_Extension* exts = - UPB_PTR_AT(in, in->ext_begin, const upb_Extension); - - for (size_t i = 0; i < count; i++) { - s->entries[sorted->start + i] = &exts[i]; + const upb_Extension** entry = + (const upb_Extension**)&s->entries[sorted->start]; + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + *entry++ = upb_TaggedAuxPtr_Extension(tagged_ptr); + } } qsort(&s->entries[sorted->start], count, sizeof(*s->entries), _upb_mapsorter_cmpext); @@ -3953,10 +3959,17 @@ bool UPB_PRIVATE(_upb_Message_AddUnknown)(upb_Message* msg, const char* data, UPB_ASSERT(!upb_Message_IsFrozen(msg)); // TODO: b/376969853 - Add debug check that the unknown field is an overall // valid proto field - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, len, arena)) return false; + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, arena)) { + return false; + } + upb_StringView* view = upb_Arena_Malloc(arena, sizeof(upb_StringView) + len); + if (!view) return false; + char* copy = UPB_PTR_AT(view, sizeof(upb_StringView), char); + memcpy(copy, data, len); + view->data = copy; + view->size = len; upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - memcpy(UPB_PTR_AT(in, in->unknown_end, char), data, len); - in->unknown_end += len; + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeUnknownData(view); return true; } @@ -3970,71 +3983,66 @@ bool UPB_PRIVATE(_upb_Message_AddUnknownV)(struct upb_Message* msg, for (size_t i = 0; i < count; i++) { total_len += data[i].size; } - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, total_len, arena)) - return false; + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, arena)) return false; - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + upb_StringView* view = + upb_Arena_Malloc(arena, sizeof(upb_StringView) + total_len); + if (!view) return false; + char* copy = UPB_PTR_AT(view, sizeof(upb_StringView), char); + view->data = copy; + view->size = total_len; for (size_t i = 0; i < count; i++) { - memcpy(UPB_PTR_AT(in, in->unknown_end, char), data[i].data, data[i].size); - in->unknown_end += data[i].size; + memcpy(copy, data[i].data, data[i].size); + copy += data[i].size; } // TODO: b/376969853 - Add debug check that the unknown field is an overall // valid proto field + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeUnknownData(view); return true; } void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - in->unknown_end = sizeof(upb_Message_Internal); - } -} - -const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - *len = in->unknown_end - sizeof(upb_Message_Internal); - return (char*)(in + 1); - } else { - *len = 0; - return NULL; + if (!in) return; + size_t size = 0; + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + in->aux_data[size++] = tagged_ptr; + } } + in->size = size; } bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data, uintptr_t* iter) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); - UPB_ASSERT(*iter == kUpb_Message_UnknownBegin + 1); + UPB_ASSERT(*iter != kUpb_Message_UnknownBegin); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - const char* internal_unknown_end = UPB_PTR_AT(in, in->unknown_end, char); - + UPB_ASSERT(in); + UPB_ASSERT(*iter <= in->size); #ifndef NDEBUG - size_t full_unknown_size; - const char* full_unknown = upb_Message_GetUnknown(msg, &full_unknown_size); - UPB_ASSERT((uintptr_t)data->data >= (uintptr_t)full_unknown); - UPB_ASSERT((uintptr_t)data->data < - (uintptr_t)(full_unknown + full_unknown_size)); - UPB_ASSERT((uintptr_t)(data->data + data->size) > (uintptr_t)data->data); - UPB_ASSERT((uintptr_t)(data->data + data->size) <= - (uintptr_t)internal_unknown_end); + upb_TaggedAuxPtr unknown_ptr = in->aux_data[*iter - 1]; + UPB_ASSERT(upb_TaggedAuxPtr_IsUnknown(unknown_ptr)); + upb_StringView* unknown = upb_TaggedAuxPtr_UnknownData(unknown_ptr); + UPB_ASSERT(unknown->data == data->data); + UPB_ASSERT(unknown->size == data->size); #endif - const char* end = data->data + data->size; - size_t offset = data->data - (const char*)in; - if (end != internal_unknown_end) { - memmove(UPB_PTR_AT(in, offset, char), end, internal_unknown_end - end); - } - in->unknown_end -= data->size; - data->size = in->unknown_end - offset; - return data->size != 0; + in->aux_data[*iter - 1] = upb_TaggedAuxPtr_Null(); + + return upb_Message_NextUnknown(msg, data, iter); } size_t upb_Message_ExtensionCount(const upb_Message* msg) { - const upb_MiniTableExtension* e; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return 0; + const upb_MiniTableExtension* ext; upb_MessageValue val; - size_t iter = kUpb_Message_ExtensionBegin; + uintptr_t iter = kUpb_Message_ExtensionBegin; size_t count = 0; - while (upb_Message_NextExtension(msg, &e, &val, &iter)) { + while (upb_Message_NextExtension(msg, &ext, &val, &iter)) { count++; } return count; @@ -4077,13 +4085,22 @@ void upb_Message_Freeze(upb_Message* msg, const upb_MiniTable* m) { } // Extensions. - uintptr_t iter = kUpb_Message_ExtensionBegin; - const upb_MiniTableExtension* e; - upb_MessageValue val; - while (upb_Message_NextExtension(msg, &e, &val, &iter)) { + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + // TODO: b/376969853 - use iterator API + size_t size = in ? in->size : 0; + for (size_t i = 0; i < size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (!upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + continue; + } + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + const upb_MiniTableExtension* e = ext->ext; const upb_MiniTableField* f = &e->UPB_PRIVATE(field); const upb_MiniTable* m2 = upb_MiniTableExtension_GetSubMessage(e); + upb_MessageValue val; + memcpy(&val, &(ext->data), sizeof(upb_MessageValue)); + switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(f)) { case kUpb_FieldMode_Array: { upb_Array* arr = (upb_Array*)val.array_val; @@ -4830,41 +4847,44 @@ upb_Message* _upb_Message_Copy(upb_Message* dst, const upb_Message* src, } } // Clone extensions. - size_t ext_count; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(src, &ext_count); - for (size_t i = 0; i < ext_count; ++i) { - const upb_Extension* msg_ext = &ext[i]; - const upb_MiniTableField* field = &msg_ext->ext->UPB_PRIVATE(field); - upb_Extension* dst_ext = UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( - dst, msg_ext->ext, arena); - if (!dst_ext) return NULL; - if (upb_MiniTableField_IsScalar(field)) { - if (!upb_Clone_ExtensionValue(msg_ext->ext, msg_ext, dst_ext, arena)) { - return NULL; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(src); + if (!in) return dst; + + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + // Clone extension + const upb_Extension* msg_ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + const upb_MiniTableField* field = &msg_ext->ext->UPB_PRIVATE(field); + upb_Extension* dst_ext = UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( + dst, msg_ext->ext, arena); + if (!dst_ext) return NULL; + if (upb_MiniTableField_IsScalar(field)) { + if (!upb_Clone_ExtensionValue(msg_ext->ext, msg_ext, dst_ext, arena)) { + return NULL; + } + } else { + upb_Array* msg_array = (upb_Array*)msg_ext->data.array_val; + UPB_ASSERT(msg_array); + upb_Array* cloned_array = upb_Array_DeepClone( + msg_array, upb_MiniTableField_CType(field), + upb_MiniTableExtension_GetSubMessage(msg_ext->ext), arena); + if (!cloned_array) { + return NULL; + } + dst_ext->data.array_val = cloned_array; } - } else { - upb_Array* msg_array = (upb_Array*)msg_ext->data.array_val; - UPB_ASSERT(msg_array); - upb_Array* cloned_array = upb_Array_DeepClone( - msg_array, upb_MiniTableField_CType(field), - upb_MiniTableExtension_GetSubMessage(msg_ext->ext), arena); - if (!cloned_array) { + } else if (upb_TaggedAuxPtr_IsUnknown(tagged_ptr)) { + // Clone unknown + upb_StringView* unknown = upb_TaggedAuxPtr_UnknownData(tagged_ptr); + // Make a copy into destination arena. + if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, unknown->data, + unknown->size, arena, false)) { return NULL; } - dst_ext->data.array_val = cloned_array; } } - // Clone unknowns. - uintptr_t iter = kUpb_Message_UnknownBegin; - upb_StringView unknowns; - while (upb_Message_NextUnknown(src, &unknowns, &iter)) { - // Make a copy into destination arena. - if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, unknowns.data, unknowns.size, - arena, false)) { - return NULL; - } - } return dst; } @@ -8252,6 +8272,45 @@ static void encode_ext(upb_encstate* e, const upb_MiniTableExtension* ext, } } +static void encode_exts(upb_encstate* e, const upb_MiniTable* m, + const upb_Message* msg) { + if (m->UPB_PRIVATE(ext) == kUpb_ExtMode_NonExtendable) return; + + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return; + + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + uintptr_t iter = kUpb_Message_ExtensionBegin; + const upb_MiniTableExtension* ext; + upb_MessageValue ext_val; + if (!UPB_PRIVATE(_upb_Message_NextExtensionReverse)(msg, &ext, &ext_val, + &iter)) { + // Message has no extensions. + return; + } + + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + if (!_upb_mapsorter_pushexts(&e->sorter, in, &sorted)) { + // TODO: b/378744096 - handle alloc failure + } + const upb_Extension* ext; + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext->ext, ext->data, + m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + do { + encode_ext(e, ext, ext_val, + m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } while (UPB_PRIVATE(_upb_Message_NextExtensionReverse)(msg, &ext, &ext_val, + &iter)); + } +} + static void encode_message(upb_encstate* e, const upb_Message* msg, const upb_MiniTable* m, size_t* size) { size_t pre_len = e->limit - e->ptr; @@ -8284,35 +8343,7 @@ static void encode_message(upb_encstate* e, const upb_Message* msg, } } - if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - if (!_upb_mapsorter_pushexts(&e->sorter, in, &sorted)) { - // TODO: b/378744096 - handle alloc failure - } - const upb_Extension* ext; - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext->ext, ext->data, - m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_MiniTableExtension* ext; - upb_MessageValue ext_val; - uintptr_t iter = kUpb_Message_ExtensionBegin; - while (UPB_PRIVATE(_upb_Message_NextExtensionReverse)( - msg, &ext, &ext_val, &iter)) { - encode_ext(e, ext, ext_val, - m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - } - } - } + encode_exts(e, m, msg); if (upb_MiniTable_FieldCount(m)) { const upb_MiniTableField* f = @@ -12363,6 +12394,7 @@ int upb_Unicode_ToUTF8(uint32_t cp, char* out) { } +#include #include @@ -12370,50 +12402,42 @@ int upb_Unicode_ToUTF8(uint32_t cp, char* out) { const upb_Extension* UPB_PRIVATE(_upb_Message_Getext)( const struct upb_Message* msg, const upb_MiniTableExtension* e) { - size_t n; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(msg, &n); - - // For now we use linear search exclusively to find extensions. - // If this becomes an issue due to messages with lots of extensions, - // we can introduce a table of some sort. - for (size_t i = 0; i < n; i++) { - if (ext[i].ext == e) { - return &ext[i]; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return NULL; + + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + if (ext->ext == e) { + return ext; + } } } return NULL; } -const upb_Extension* UPB_PRIVATE(_upb_Message_Getexts)( - const struct upb_Message* msg, size_t* count) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - *count = (in->size - in->ext_begin) / sizeof(upb_Extension); - return UPB_PTR_AT(in, in->ext_begin, const upb_Extension); - } else { - *count = 0; - return NULL; - } -} - upb_Extension* UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( struct upb_Message* msg, const upb_MiniTableExtension* e, upb_Arena* a) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Extension* ext = (upb_Extension*)UPB_PRIVATE(_upb_Message_Getext)(msg, e); if (ext) return ext; - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, sizeof(upb_Extension), a)) - return NULL; + + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, a)) return NULL; upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - in->ext_begin -= sizeof(upb_Extension); - ext = UPB_PTR_AT(in, in->ext_begin, void); + ext = upb_Arena_Malloc(a, sizeof(upb_Extension)); + if (!ext) return NULL; memset(ext, 0, sizeof(upb_Extension)); ext->ext = e; + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeExtension(ext); return ext; } #include +#include +#include #include @@ -12449,41 +12473,32 @@ const float kUpb_FltInfinity = UPB_INFINITY; const double kUpb_Infinity = UPB_INFINITY; const double kUpb_NaN = UPB_NAN; -bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, - size_t need, upb_Arena* a) { - UPB_ASSERT(!upb_Message_IsFrozen(msg)); - const size_t overhead = sizeof(upb_Message_Internal); +static size_t _upb_Message_SizeOfInternal(size_t count) { + return UPB_SIZEOF_FLEX(upb_Message_Internal, aux_data, count); +} +bool UPB_PRIVATE(_upb_Message_ReserveSlot)(struct upb_Message* msg, + upb_Arena* a) { + UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); if (!in) { // No internal data, allocate from scratch. - size_t size = UPB_MAX(128, upb_RoundUpToPowerOfTwo(need + overhead)); - in = upb_Arena_Malloc(a, size); + size_t capacity = 4; + in = upb_Arena_Malloc(a, _upb_Message_SizeOfInternal(capacity)); if (!in) return false; - - in->size = size; - in->unknown_end = overhead; - in->ext_begin = size; + in->size = 0; + in->capacity = capacity; UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); - } else if (in->ext_begin - in->unknown_end < need) { + } else if (in->capacity == in->size) { // Internal data is too small, reallocate. - size_t new_size = upb_RoundUpToPowerOfTwo(in->size + need); - size_t ext_bytes = in->size - in->ext_begin; - size_t new_ext_begin = new_size - ext_bytes; - in = upb_Arena_Realloc(a, in, in->size, new_size); + size_t new_capacity = upb_RoundUpToPowerOfTwo(in->size + 1); + in = upb_Arena_Realloc(a, in, _upb_Message_SizeOfInternal(in->capacity), + _upb_Message_SizeOfInternal(new_capacity)); if (!in) return false; - - if (ext_bytes) { - // Need to move extension data to the end. - char* ptr = (char*)in; - memmove(ptr + new_ext_begin, ptr + in->ext_begin, ext_bytes); - } - in->ext_begin = new_ext_begin; - in->size = new_size; + in->capacity = new_capacity; UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); } - - UPB_ASSERT(in->ext_begin - in->unknown_end >= need); + UPB_ASSERT(in->capacity - in->size >= 1); return true; } @@ -16024,15 +16039,18 @@ bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, } if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return false; + + for (; (i - n) < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i - n]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } } } diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index a937c9c38e..dc006b7119 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -2992,11 +2992,6 @@ upb_Extension* UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( struct upb_Message* msg, const upb_MiniTableExtension* ext, upb_Arena* arena); -// Returns an array of extensions for this message. -// Note: the array is ordered in reverse relative to the order of creation. -const upb_Extension* UPB_PRIVATE(_upb_Message_Getexts)( - const struct upb_Message* msg, size_t* count); - // Returns an extension for a message with a given mini table, // or NULL if no extension exists with this mini table. const upb_Extension* UPB_PRIVATE(_upb_Message_Getext)( @@ -3050,27 +3045,58 @@ extern const double kUpb_NaN; // Internal members of a upb_Message that track unknown fields and/or // extensions. We can change this without breaking binary compatibility. -typedef struct upb_Message_Internal { - // Total size of this structure, including the data that follows. - // Must be aligned to 8, which is alignof(upb_Extension) - uint32_t size; +typedef struct upb_TaggedAuxPtr { + uintptr_t ptr; +} upb_TaggedAuxPtr; - /* Offsets relative to the beginning of this structure. - * - * Unknown data grows forward from the beginning to unknown_end. - * Extension data grows backward from size to ext_begin. - * When the two meet, we're out of data and have to realloc. - * - * If we imagine that the final member of this struct is: - * char data[size - overhead]; // overhead = sizeof(upb_Message_Internal) - * - * Then we have: - * unknown data: data[0 .. (unknown_end - overhead)] - * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */ - uint32_t unknown_end; - uint32_t ext_begin; - // Data follows, as if there were an array: - // char data[size - sizeof(upb_Message_Internal)]; +UPB_INLINE bool upb_TaggedAuxPtr_IsNull(upb_TaggedAuxPtr ptr) { + return ptr.ptr == 0; +} + +UPB_INLINE bool upb_TaggedAuxPtr_IsExtension(upb_TaggedAuxPtr ptr) { + return ptr.ptr & 1; +} + +UPB_INLINE bool upb_TaggedAuxPtr_IsUnknown(upb_TaggedAuxPtr ptr) { + return (ptr.ptr != 0) && ((ptr.ptr & 1) == 0); +} + +UPB_INLINE upb_Extension* upb_TaggedAuxPtr_Extension(upb_TaggedAuxPtr ptr) { + UPB_ASSERT(upb_TaggedAuxPtr_IsExtension(ptr)); + return (upb_Extension*)(ptr.ptr & ~1ULL); +} + +UPB_INLINE upb_StringView* upb_TaggedAuxPtr_UnknownData(upb_TaggedAuxPtr ptr) { + UPB_ASSERT(!upb_TaggedAuxPtr_IsExtension(ptr)); + return (upb_StringView*)(ptr.ptr); +} + +UPB_INLINE upb_TaggedAuxPtr upb_TaggedAuxPtr_Null(void) { + upb_TaggedAuxPtr ptr; + ptr.ptr = 0; + return ptr; +} + +UPB_INLINE upb_TaggedAuxPtr +upb_TaggedAuxPtr_MakeExtension(const upb_Extension* e) { + upb_TaggedAuxPtr ptr; + ptr.ptr = (uintptr_t)e | 1; + return ptr; +} + +UPB_INLINE upb_TaggedAuxPtr +upb_TaggedAuxPtr_MakeUnknownData(const upb_StringView* sv) { + upb_TaggedAuxPtr ptr; + ptr.ptr = (uintptr_t)sv; + return ptr; +} + +typedef struct upb_Message_Internal { + // Total number of entries set in aux_data + size_t size; + size_t capacity; + // Tagged pointers to upb_StringView or upb_Extension + upb_TaggedAuxPtr aux_data[]; } upb_Message_Internal; #ifdef UPB_TRACING_ENABLED @@ -3112,11 +3138,10 @@ bool UPB_PRIVATE(_upb_Message_AddUnknownV)(struct upb_Message* msg, upb_Arena* arena, upb_StringView data[], size_t count); -// Ensure at least `need` unused bytes are available for unknown fields or -// extensions. Returns false if a reallocation is needed to satisfy the request, -// and fails. -bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, - size_t need, upb_Arena* arena); +// Ensures at least one slot is available in the aux_data of this message. +// Returns false if a reallocation is needed to satisfy the request, and fails. +bool UPB_PRIVATE(_upb_Message_ReserveSlot)(struct upb_Message* msg, + upb_Arena* arena); #define kUpb_Message_UnknownBegin 0 #define kUpb_Message_ExtensionBegin 0 @@ -3124,17 +3149,20 @@ bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, UPB_INLINE bool upb_Message_NextUnknown(const struct upb_Message* msg, upb_StringView* data, uintptr_t* iter) { const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in && *iter == kUpb_Message_UnknownBegin) { - size_t len = in->unknown_end - sizeof(upb_Message_Internal); - if (len != 0) { - data->size = len; - data->data = (const char*)(in + 1); - (*iter)++; - return true; + size_t i = *iter; + if (in) { + while (i < in->size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; + if (upb_TaggedAuxPtr_IsUnknown(tagged_ptr)) { + *data = *upb_TaggedAuxPtr_UnknownData(tagged_ptr); + *iter = i; + return true; + } } } data->size = 0; data->data = NULL; + *iter = i; return false; } @@ -3148,22 +3176,25 @@ UPB_INLINE bool upb_Message_NextExtension(const struct upb_Message* msg, const upb_MiniTableExtension** out_e, upb_MessageValue* out_v, uintptr_t* iter) { - size_t count; - const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - size_t i = *iter; - while (i++ < count) { - // Extensions are stored in reverse wire order, so to iterate in wire order, - // we need to iterate backwards. - const upb_Extension* ext = &exts[count - i]; - - // Empty repeated fields or maps semantically don't exist. - if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; - - *out_e = ext->ext; - *out_v = ext->data; - *iter = i; - return true; + const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + uintptr_t i = *iter; + if (in) { + while (i < in->size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + + // Empty repeated fields or maps semantically don't exist. + if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; + + *out_e = ext->ext; + *out_v = ext->data; + *iter = i; + return true; + } + } } + *iter = i; return false; } @@ -3171,12 +3202,17 @@ UPB_INLINE bool upb_Message_NextExtension(const struct upb_Message* msg, UPB_INLINE bool UPB_PRIVATE(_upb_Message_NextExtensionReverse)( const struct upb_Message* msg, const upb_MiniTableExtension** out_e, upb_MessageValue* out_v, uintptr_t* iter) { - size_t count; - const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - size_t i = *iter; - while (i++ < count) { - // Extensions are stored in reverse wire order - const upb_Extension* ext = &exts[i - 1]; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return false; + uintptr_t i = *iter; + size_t size = in->size; + while (i < size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[size - 1 - i]; + i++; + if (!upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + continue; + } + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); // Empty repeated fields or maps semantically don't exist. if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; @@ -3186,7 +3222,7 @@ UPB_INLINE bool UPB_PRIVATE(_upb_Message_NextExtensionReverse)( *iter = i; return true; } - + *iter = i; return false; } @@ -4281,9 +4317,7 @@ UPB_API_INLINE void upb_Message_Clear(struct upb_Message* msg, memset(msg, 0, m->UPB_PRIVATE(size)); if (in) { // Reset the internal buffer to empty. - in->unknown_end = sizeof(upb_Message_Internal); - in->ext_begin = in->size; - UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); + in->size = 0; } } @@ -4307,11 +4341,15 @@ UPB_API_INLINE void upb_Message_ClearExtension( UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); if (!in) return; - const upb_Extension* base = UPB_PTR_AT(in, in->ext_begin, upb_Extension); - upb_Extension* ext = (upb_Extension*)UPB_PRIVATE(_upb_Message_Getext)(msg, e); - if (ext) { - *ext = *base; - in->ext_begin += sizeof(upb_Extension); + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + if (ext->ext == e) { + in->aux_data[i] = upb_TaggedAuxPtr_Null(); + return; + } + } } } diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index 0b60b42347..90d4dbfde1 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -3790,6 +3790,7 @@ upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) { #include +#include #include @@ -3920,14 +3921,19 @@ static int _upb_mapsorter_cmpext(const void* _a, const void* _b) { bool _upb_mapsorter_pushexts(_upb_mapsorter* s, const upb_Message_Internal* in, _upb_sortedmap* sorted) { - size_t count = (in->size - in->ext_begin) / sizeof(upb_Extension); + size_t count = 0; + for (size_t i = 0; i < in->size; i++) { + count += upb_TaggedAuxPtr_IsExtension(in->aux_data[i]); + } if (!_upb_mapsorter_resize(s, sorted, count)) return false; if (count == 0) return true; - const upb_Extension* exts = - UPB_PTR_AT(in, in->ext_begin, const upb_Extension); - - for (size_t i = 0; i < count; i++) { - s->entries[sorted->start + i] = &exts[i]; + const upb_Extension** entry = + (const upb_Extension**)&s->entries[sorted->start]; + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + *entry++ = upb_TaggedAuxPtr_Extension(tagged_ptr); + } } qsort(&s->entries[sorted->start], count, sizeof(*s->entries), _upb_mapsorter_cmpext); @@ -3953,10 +3959,17 @@ bool UPB_PRIVATE(_upb_Message_AddUnknown)(upb_Message* msg, const char* data, UPB_ASSERT(!upb_Message_IsFrozen(msg)); // TODO: b/376969853 - Add debug check that the unknown field is an overall // valid proto field - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, len, arena)) return false; + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, arena)) { + return false; + } + upb_StringView* view = upb_Arena_Malloc(arena, sizeof(upb_StringView) + len); + if (!view) return false; + char* copy = UPB_PTR_AT(view, sizeof(upb_StringView), char); + memcpy(copy, data, len); + view->data = copy; + view->size = len; upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - memcpy(UPB_PTR_AT(in, in->unknown_end, char), data, len); - in->unknown_end += len; + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeUnknownData(view); return true; } @@ -3970,71 +3983,66 @@ bool UPB_PRIVATE(_upb_Message_AddUnknownV)(struct upb_Message* msg, for (size_t i = 0; i < count; i++) { total_len += data[i].size; } - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, total_len, arena)) - return false; + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, arena)) return false; - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + upb_StringView* view = + upb_Arena_Malloc(arena, sizeof(upb_StringView) + total_len); + if (!view) return false; + char* copy = UPB_PTR_AT(view, sizeof(upb_StringView), char); + view->data = copy; + view->size = total_len; for (size_t i = 0; i < count; i++) { - memcpy(UPB_PTR_AT(in, in->unknown_end, char), data[i].data, data[i].size); - in->unknown_end += data[i].size; + memcpy(copy, data[i].data, data[i].size); + copy += data[i].size; } // TODO: b/376969853 - Add debug check that the unknown field is an overall // valid proto field + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeUnknownData(view); return true; } void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - in->unknown_end = sizeof(upb_Message_Internal); - } -} - -const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - *len = in->unknown_end - sizeof(upb_Message_Internal); - return (char*)(in + 1); - } else { - *len = 0; - return NULL; + if (!in) return; + size_t size = 0; + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + in->aux_data[size++] = tagged_ptr; + } } + in->size = size; } bool upb_Message_DeleteUnknown(upb_Message* msg, upb_StringView* data, uintptr_t* iter) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); - UPB_ASSERT(*iter == kUpb_Message_UnknownBegin + 1); + UPB_ASSERT(*iter != kUpb_Message_UnknownBegin); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - const char* internal_unknown_end = UPB_PTR_AT(in, in->unknown_end, char); - + UPB_ASSERT(in); + UPB_ASSERT(*iter <= in->size); #ifndef NDEBUG - size_t full_unknown_size; - const char* full_unknown = upb_Message_GetUnknown(msg, &full_unknown_size); - UPB_ASSERT((uintptr_t)data->data >= (uintptr_t)full_unknown); - UPB_ASSERT((uintptr_t)data->data < - (uintptr_t)(full_unknown + full_unknown_size)); - UPB_ASSERT((uintptr_t)(data->data + data->size) > (uintptr_t)data->data); - UPB_ASSERT((uintptr_t)(data->data + data->size) <= - (uintptr_t)internal_unknown_end); + upb_TaggedAuxPtr unknown_ptr = in->aux_data[*iter - 1]; + UPB_ASSERT(upb_TaggedAuxPtr_IsUnknown(unknown_ptr)); + upb_StringView* unknown = upb_TaggedAuxPtr_UnknownData(unknown_ptr); + UPB_ASSERT(unknown->data == data->data); + UPB_ASSERT(unknown->size == data->size); #endif - const char* end = data->data + data->size; - size_t offset = data->data - (const char*)in; - if (end != internal_unknown_end) { - memmove(UPB_PTR_AT(in, offset, char), end, internal_unknown_end - end); - } - in->unknown_end -= data->size; - data->size = in->unknown_end - offset; - return data->size != 0; + in->aux_data[*iter - 1] = upb_TaggedAuxPtr_Null(); + + return upb_Message_NextUnknown(msg, data, iter); } size_t upb_Message_ExtensionCount(const upb_Message* msg) { - const upb_MiniTableExtension* e; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return 0; + const upb_MiniTableExtension* ext; upb_MessageValue val; - size_t iter = kUpb_Message_ExtensionBegin; + uintptr_t iter = kUpb_Message_ExtensionBegin; size_t count = 0; - while (upb_Message_NextExtension(msg, &e, &val, &iter)) { + while (upb_Message_NextExtension(msg, &ext, &val, &iter)) { count++; } return count; @@ -4077,13 +4085,22 @@ void upb_Message_Freeze(upb_Message* msg, const upb_MiniTable* m) { } // Extensions. - uintptr_t iter = kUpb_Message_ExtensionBegin; - const upb_MiniTableExtension* e; - upb_MessageValue val; - while (upb_Message_NextExtension(msg, &e, &val, &iter)) { + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + // TODO: b/376969853 - use iterator API + size_t size = in ? in->size : 0; + for (size_t i = 0; i < size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (!upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + continue; + } + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + const upb_MiniTableExtension* e = ext->ext; const upb_MiniTableField* f = &e->UPB_PRIVATE(field); const upb_MiniTable* m2 = upb_MiniTableExtension_GetSubMessage(e); + upb_MessageValue val; + memcpy(&val, &(ext->data), sizeof(upb_MessageValue)); + switch (UPB_PRIVATE(_upb_MiniTableField_Mode)(f)) { case kUpb_FieldMode_Array: { upb_Array* arr = (upb_Array*)val.array_val; @@ -4830,41 +4847,44 @@ upb_Message* _upb_Message_Copy(upb_Message* dst, const upb_Message* src, } } // Clone extensions. - size_t ext_count; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(src, &ext_count); - for (size_t i = 0; i < ext_count; ++i) { - const upb_Extension* msg_ext = &ext[i]; - const upb_MiniTableField* field = &msg_ext->ext->UPB_PRIVATE(field); - upb_Extension* dst_ext = UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( - dst, msg_ext->ext, arena); - if (!dst_ext) return NULL; - if (upb_MiniTableField_IsScalar(field)) { - if (!upb_Clone_ExtensionValue(msg_ext->ext, msg_ext, dst_ext, arena)) { - return NULL; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(src); + if (!in) return dst; + + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + // Clone extension + const upb_Extension* msg_ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + const upb_MiniTableField* field = &msg_ext->ext->UPB_PRIVATE(field); + upb_Extension* dst_ext = UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( + dst, msg_ext->ext, arena); + if (!dst_ext) return NULL; + if (upb_MiniTableField_IsScalar(field)) { + if (!upb_Clone_ExtensionValue(msg_ext->ext, msg_ext, dst_ext, arena)) { + return NULL; + } + } else { + upb_Array* msg_array = (upb_Array*)msg_ext->data.array_val; + UPB_ASSERT(msg_array); + upb_Array* cloned_array = upb_Array_DeepClone( + msg_array, upb_MiniTableField_CType(field), + upb_MiniTableExtension_GetSubMessage(msg_ext->ext), arena); + if (!cloned_array) { + return NULL; + } + dst_ext->data.array_val = cloned_array; } - } else { - upb_Array* msg_array = (upb_Array*)msg_ext->data.array_val; - UPB_ASSERT(msg_array); - upb_Array* cloned_array = upb_Array_DeepClone( - msg_array, upb_MiniTableField_CType(field), - upb_MiniTableExtension_GetSubMessage(msg_ext->ext), arena); - if (!cloned_array) { + } else if (upb_TaggedAuxPtr_IsUnknown(tagged_ptr)) { + // Clone unknown + upb_StringView* unknown = upb_TaggedAuxPtr_UnknownData(tagged_ptr); + // Make a copy into destination arena. + if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, unknown->data, + unknown->size, arena, false)) { return NULL; } - dst_ext->data.array_val = cloned_array; } } - // Clone unknowns. - uintptr_t iter = kUpb_Message_UnknownBegin; - upb_StringView unknowns; - while (upb_Message_NextUnknown(src, &unknowns, &iter)) { - // Make a copy into destination arena. - if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, unknowns.data, unknowns.size, - arena, false)) { - return NULL; - } - } return dst; } @@ -8252,6 +8272,45 @@ static void encode_ext(upb_encstate* e, const upb_MiniTableExtension* ext, } } +static void encode_exts(upb_encstate* e, const upb_MiniTable* m, + const upb_Message* msg) { + if (m->UPB_PRIVATE(ext) == kUpb_ExtMode_NonExtendable) return; + + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return; + + /* Encode all extensions together. Unlike C++, we do not attempt to keep + * these in field number order relative to normal fields or even to each + * other. */ + uintptr_t iter = kUpb_Message_ExtensionBegin; + const upb_MiniTableExtension* ext; + upb_MessageValue ext_val; + if (!UPB_PRIVATE(_upb_Message_NextExtensionReverse)(msg, &ext, &ext_val, + &iter)) { + // Message has no extensions. + return; + } + + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + if (!_upb_mapsorter_pushexts(&e->sorter, in, &sorted)) { + // TODO: b/378744096 - handle alloc failure + } + const upb_Extension* ext; + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext->ext, ext->data, + m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + do { + encode_ext(e, ext, ext_val, + m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); + } while (UPB_PRIVATE(_upb_Message_NextExtensionReverse)(msg, &ext, &ext_val, + &iter)); + } +} + static void encode_message(upb_encstate* e, const upb_Message* msg, const upb_MiniTable* m, size_t* size) { size_t pre_len = e->limit - e->ptr; @@ -8284,35 +8343,7 @@ static void encode_message(upb_encstate* e, const upb_Message* msg, } } - if (m->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - /* Encode all extensions together. Unlike C++, we do not attempt to keep - * these in field number order relative to normal fields or even to each - * other. */ - if (e->options & kUpb_EncodeOption_Deterministic) { - _upb_sortedmap sorted; - if (!_upb_mapsorter_pushexts(&e->sorter, in, &sorted)) { - // TODO: b/378744096 - handle alloc failure - } - const upb_Extension* ext; - while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { - encode_ext(e, ext->ext, ext->data, - m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - _upb_mapsorter_popmap(&e->sorter, &sorted); - } else { - const upb_MiniTableExtension* ext; - upb_MessageValue ext_val; - uintptr_t iter = kUpb_Message_ExtensionBegin; - while (UPB_PRIVATE(_upb_Message_NextExtensionReverse)( - msg, &ext, &ext_val, &iter)) { - encode_ext(e, ext, ext_val, - m->UPB_PRIVATE(ext) == kUpb_ExtMode_IsMessageSet); - } - } - } - } + encode_exts(e, m, msg); if (upb_MiniTable_FieldCount(m)) { const upb_MiniTableField* f = @@ -11850,6 +11881,7 @@ int upb_Unicode_ToUTF8(uint32_t cp, char* out) { } +#include #include @@ -11857,50 +11889,42 @@ int upb_Unicode_ToUTF8(uint32_t cp, char* out) { const upb_Extension* UPB_PRIVATE(_upb_Message_Getext)( const struct upb_Message* msg, const upb_MiniTableExtension* e) { - size_t n; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(msg, &n); - - // For now we use linear search exclusively to find extensions. - // If this becomes an issue due to messages with lots of extensions, - // we can introduce a table of some sort. - for (size_t i = 0; i < n; i++) { - if (ext[i].ext == e) { - return &ext[i]; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return NULL; + + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + if (ext->ext == e) { + return ext; + } } } return NULL; } -const upb_Extension* UPB_PRIVATE(_upb_Message_Getexts)( - const struct upb_Message* msg, size_t* count) { - upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in) { - *count = (in->size - in->ext_begin) / sizeof(upb_Extension); - return UPB_PTR_AT(in, in->ext_begin, const upb_Extension); - } else { - *count = 0; - return NULL; - } -} - upb_Extension* UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( struct upb_Message* msg, const upb_MiniTableExtension* e, upb_Arena* a) { UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Extension* ext = (upb_Extension*)UPB_PRIVATE(_upb_Message_Getext)(msg, e); if (ext) return ext; - if (!UPB_PRIVATE(_upb_Message_EnsureAvailable)(msg, sizeof(upb_Extension), a)) - return NULL; + + if (!UPB_PRIVATE(_upb_Message_ReserveSlot)(msg, a)) return NULL; upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - in->ext_begin -= sizeof(upb_Extension); - ext = UPB_PTR_AT(in, in->ext_begin, void); + ext = upb_Arena_Malloc(a, sizeof(upb_Extension)); + if (!ext) return NULL; memset(ext, 0, sizeof(upb_Extension)); ext->ext = e; + in->aux_data[in->size++] = upb_TaggedAuxPtr_MakeExtension(ext); return ext; } #include +#include +#include #include @@ -11936,41 +11960,32 @@ const float kUpb_FltInfinity = UPB_INFINITY; const double kUpb_Infinity = UPB_INFINITY; const double kUpb_NaN = UPB_NAN; -bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, - size_t need, upb_Arena* a) { - UPB_ASSERT(!upb_Message_IsFrozen(msg)); - const size_t overhead = sizeof(upb_Message_Internal); +static size_t _upb_Message_SizeOfInternal(size_t count) { + return UPB_SIZEOF_FLEX(upb_Message_Internal, aux_data, count); +} +bool UPB_PRIVATE(_upb_Message_ReserveSlot)(struct upb_Message* msg, + upb_Arena* a) { + UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); if (!in) { // No internal data, allocate from scratch. - size_t size = UPB_MAX(128, upb_RoundUpToPowerOfTwo(need + overhead)); - in = upb_Arena_Malloc(a, size); + size_t capacity = 4; + in = upb_Arena_Malloc(a, _upb_Message_SizeOfInternal(capacity)); if (!in) return false; - - in->size = size; - in->unknown_end = overhead; - in->ext_begin = size; + in->size = 0; + in->capacity = capacity; UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); - } else if (in->ext_begin - in->unknown_end < need) { + } else if (in->capacity == in->size) { // Internal data is too small, reallocate. - size_t new_size = upb_RoundUpToPowerOfTwo(in->size + need); - size_t ext_bytes = in->size - in->ext_begin; - size_t new_ext_begin = new_size - ext_bytes; - in = upb_Arena_Realloc(a, in, in->size, new_size); + size_t new_capacity = upb_RoundUpToPowerOfTwo(in->size + 1); + in = upb_Arena_Realloc(a, in, _upb_Message_SizeOfInternal(in->capacity), + _upb_Message_SizeOfInternal(new_capacity)); if (!in) return false; - - if (ext_bytes) { - // Need to move extension data to the end. - char* ptr = (char*)in; - memmove(ptr + new_ext_begin, ptr + in->ext_begin, ext_bytes); - } - in->ext_begin = new_ext_begin; - in->size = new_size; + in->capacity = new_capacity; UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); } - - UPB_ASSERT(in->ext_begin - in->unknown_end >= need); + UPB_ASSERT(in->capacity - in->size >= 1); return true; } @@ -15511,15 +15526,18 @@ bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m, } if (ext_pool) { - // Return any extensions that are set. - size_t count; - const upb_Extension* ext = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - if (i - n < count) { - ext += count - 1 - (i - n); - memcpy(out_val, &ext->data, sizeof(*out_val)); - *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); - *iter = i; - return true; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return false; + + for (; (i - n) < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i - n]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + memcpy(out_val, &ext->data, sizeof(*out_val)); + *out_f = upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext); + *iter = i; + return true; + } } } diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index 45a6e1292c..8feb07cb3d 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -2994,11 +2994,6 @@ upb_Extension* UPB_PRIVATE(_upb_Message_GetOrCreateExtension)( struct upb_Message* msg, const upb_MiniTableExtension* ext, upb_Arena* arena); -// Returns an array of extensions for this message. -// Note: the array is ordered in reverse relative to the order of creation. -const upb_Extension* UPB_PRIVATE(_upb_Message_Getexts)( - const struct upb_Message* msg, size_t* count); - // Returns an extension for a message with a given mini table, // or NULL if no extension exists with this mini table. const upb_Extension* UPB_PRIVATE(_upb_Message_Getext)( @@ -3052,27 +3047,58 @@ extern const double kUpb_NaN; // Internal members of a upb_Message that track unknown fields and/or // extensions. We can change this without breaking binary compatibility. -typedef struct upb_Message_Internal { - // Total size of this structure, including the data that follows. - // Must be aligned to 8, which is alignof(upb_Extension) - uint32_t size; +typedef struct upb_TaggedAuxPtr { + uintptr_t ptr; +} upb_TaggedAuxPtr; - /* Offsets relative to the beginning of this structure. - * - * Unknown data grows forward from the beginning to unknown_end. - * Extension data grows backward from size to ext_begin. - * When the two meet, we're out of data and have to realloc. - * - * If we imagine that the final member of this struct is: - * char data[size - overhead]; // overhead = sizeof(upb_Message_Internal) - * - * Then we have: - * unknown data: data[0 .. (unknown_end - overhead)] - * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */ - uint32_t unknown_end; - uint32_t ext_begin; - // Data follows, as if there were an array: - // char data[size - sizeof(upb_Message_Internal)]; +UPB_INLINE bool upb_TaggedAuxPtr_IsNull(upb_TaggedAuxPtr ptr) { + return ptr.ptr == 0; +} + +UPB_INLINE bool upb_TaggedAuxPtr_IsExtension(upb_TaggedAuxPtr ptr) { + return ptr.ptr & 1; +} + +UPB_INLINE bool upb_TaggedAuxPtr_IsUnknown(upb_TaggedAuxPtr ptr) { + return (ptr.ptr != 0) && ((ptr.ptr & 1) == 0); +} + +UPB_INLINE upb_Extension* upb_TaggedAuxPtr_Extension(upb_TaggedAuxPtr ptr) { + UPB_ASSERT(upb_TaggedAuxPtr_IsExtension(ptr)); + return (upb_Extension*)(ptr.ptr & ~1ULL); +} + +UPB_INLINE upb_StringView* upb_TaggedAuxPtr_UnknownData(upb_TaggedAuxPtr ptr) { + UPB_ASSERT(!upb_TaggedAuxPtr_IsExtension(ptr)); + return (upb_StringView*)(ptr.ptr); +} + +UPB_INLINE upb_TaggedAuxPtr upb_TaggedAuxPtr_Null(void) { + upb_TaggedAuxPtr ptr; + ptr.ptr = 0; + return ptr; +} + +UPB_INLINE upb_TaggedAuxPtr +upb_TaggedAuxPtr_MakeExtension(const upb_Extension* e) { + upb_TaggedAuxPtr ptr; + ptr.ptr = (uintptr_t)e | 1; + return ptr; +} + +UPB_INLINE upb_TaggedAuxPtr +upb_TaggedAuxPtr_MakeUnknownData(const upb_StringView* sv) { + upb_TaggedAuxPtr ptr; + ptr.ptr = (uintptr_t)sv; + return ptr; +} + +typedef struct upb_Message_Internal { + // Total number of entries set in aux_data + size_t size; + size_t capacity; + // Tagged pointers to upb_StringView or upb_Extension + upb_TaggedAuxPtr aux_data[]; } upb_Message_Internal; #ifdef UPB_TRACING_ENABLED @@ -3114,11 +3140,10 @@ bool UPB_PRIVATE(_upb_Message_AddUnknownV)(struct upb_Message* msg, upb_Arena* arena, upb_StringView data[], size_t count); -// Ensure at least `need` unused bytes are available for unknown fields or -// extensions. Returns false if a reallocation is needed to satisfy the request, -// and fails. -bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, - size_t need, upb_Arena* arena); +// Ensures at least one slot is available in the aux_data of this message. +// Returns false if a reallocation is needed to satisfy the request, and fails. +bool UPB_PRIVATE(_upb_Message_ReserveSlot)(struct upb_Message* msg, + upb_Arena* arena); #define kUpb_Message_UnknownBegin 0 #define kUpb_Message_ExtensionBegin 0 @@ -3126,17 +3151,20 @@ bool UPB_PRIVATE(_upb_Message_EnsureAvailable)(struct upb_Message* msg, UPB_INLINE bool upb_Message_NextUnknown(const struct upb_Message* msg, upb_StringView* data, uintptr_t* iter) { const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); - if (in && *iter == kUpb_Message_UnknownBegin) { - size_t len = in->unknown_end - sizeof(upb_Message_Internal); - if (len != 0) { - data->size = len; - data->data = (const char*)(in + 1); - (*iter)++; - return true; + size_t i = *iter; + if (in) { + while (i < in->size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; + if (upb_TaggedAuxPtr_IsUnknown(tagged_ptr)) { + *data = *upb_TaggedAuxPtr_UnknownData(tagged_ptr); + *iter = i; + return true; + } } } data->size = 0; data->data = NULL; + *iter = i; return false; } @@ -3150,22 +3178,25 @@ UPB_INLINE bool upb_Message_NextExtension(const struct upb_Message* msg, const upb_MiniTableExtension** out_e, upb_MessageValue* out_v, uintptr_t* iter) { - size_t count; - const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - size_t i = *iter; - while (i++ < count) { - // Extensions are stored in reverse wire order, so to iterate in wire order, - // we need to iterate backwards. - const upb_Extension* ext = &exts[count - i]; - - // Empty repeated fields or maps semantically don't exist. - if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; - - *out_e = ext->ext; - *out_v = ext->data; - *iter = i; - return true; + const upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + uintptr_t i = *iter; + if (in) { + while (i < in->size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i++]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + + // Empty repeated fields or maps semantically don't exist. + if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; + + *out_e = ext->ext; + *out_v = ext->data; + *iter = i; + return true; + } + } } + *iter = i; return false; } @@ -3173,12 +3204,17 @@ UPB_INLINE bool upb_Message_NextExtension(const struct upb_Message* msg, UPB_INLINE bool UPB_PRIVATE(_upb_Message_NextExtensionReverse)( const struct upb_Message* msg, const upb_MiniTableExtension** out_e, upb_MessageValue* out_v, uintptr_t* iter) { - size_t count; - const upb_Extension* exts = UPB_PRIVATE(_upb_Message_Getexts)(msg, &count); - size_t i = *iter; - while (i++ < count) { - // Extensions are stored in reverse wire order - const upb_Extension* ext = &exts[i - 1]; + upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); + if (!in) return false; + uintptr_t i = *iter; + size_t size = in->size; + while (i < size) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[size - 1 - i]; + i++; + if (!upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + continue; + } + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); // Empty repeated fields or maps semantically don't exist. if (UPB_PRIVATE(_upb_Extension_IsEmpty)(ext)) continue; @@ -3188,7 +3224,7 @@ UPB_INLINE bool UPB_PRIVATE(_upb_Message_NextExtensionReverse)( *iter = i; return true; } - + *iter = i; return false; } @@ -4283,9 +4319,7 @@ UPB_API_INLINE void upb_Message_Clear(struct upb_Message* msg, memset(msg, 0, m->UPB_PRIVATE(size)); if (in) { // Reset the internal buffer to empty. - in->unknown_end = sizeof(upb_Message_Internal); - in->ext_begin = in->size; - UPB_PRIVATE(_upb_Message_SetInternal)(msg, in); + in->size = 0; } } @@ -4309,11 +4343,15 @@ UPB_API_INLINE void upb_Message_ClearExtension( UPB_ASSERT(!upb_Message_IsFrozen(msg)); upb_Message_Internal* in = UPB_PRIVATE(_upb_Message_GetInternal)(msg); if (!in) return; - const upb_Extension* base = UPB_PTR_AT(in, in->ext_begin, upb_Extension); - upb_Extension* ext = (upb_Extension*)UPB_PRIVATE(_upb_Message_Getext)(msg, e); - if (ext) { - *ext = *base; - in->ext_begin += sizeof(upb_Extension); + for (size_t i = 0; i < in->size; i++) { + upb_TaggedAuxPtr tagged_ptr = in->aux_data[i]; + if (upb_TaggedAuxPtr_IsExtension(tagged_ptr)) { + const upb_Extension* ext = upb_TaggedAuxPtr_Extension(tagged_ptr); + if (ext->ext == e) { + in->aux_data[i] = upb_TaggedAuxPtr_Null(); + return; + } + } } }