From 9e9d43b46278425da8ec3bb40f5fecfa2ce6e7cc Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 12 Dec 2022 13:54:20 -0800 Subject: [PATCH] Add support for promoting unlinked map fields from unknown to upb_Map. PiperOrigin-RevId: 494820200 --- upb/message/accessors.c | 89 ++++++++++++++++++++++++---------- upb/message/accessors.h | 41 ++++++++++++++++ upb/message/accessors_test.cc | 90 +++++++++++++++++++++++++++++++++++ upb/mini_table/common.c | 35 ++++++++++++++ upb/mini_table/common.h | 2 + upb/test/test.proto | 6 +++ 6 files changed, 238 insertions(+), 25 deletions(-) diff --git a/upb/message/accessors.c b/upb/message/accessors.c index cd6aa19117..beb5fe64ba 100644 --- a/upb/message/accessors.c +++ b/upb/message/accessors.c @@ -27,7 +27,9 @@ #include "upb/message/accessors.h" +#include "upb/collections/array.h" #include "upb/collections/array_internal.h" +#include "upb/collections/map.h" #include "upb/message/message.h" #include "upb/wire/decode.h" #include "upb/wire/encode.h" @@ -36,31 +38,6 @@ // Must be last. #include "upb/port/def.inc" -static size_t _upb_MiniTableField_Size(const upb_MiniTableField* f) { - static unsigned char sizes[] = { - 0, /* 0 */ - 8, /* kUpb_FieldType_Double */ - 4, /* kUpb_FieldType_Float */ - 8, /* kUpb_FieldType_Int64 */ - 8, /* kUpb_FieldType_UInt64 */ - 4, /* kUpb_FieldType_Int32 */ - 8, /* kUpb_FieldType_Fixed64 */ - 4, /* kUpb_FieldType_Fixed32 */ - 1, /* kUpb_FieldType_Bool */ - sizeof(upb_StringView), /* kUpb_FieldType_String */ - sizeof(void*), /* kUpb_FieldType_Group */ - sizeof(void*), /* kUpb_FieldType_Message */ - sizeof(upb_StringView), /* kUpb_FieldType_Bytes */ - 4, /* kUpb_FieldType_UInt32 */ - 4, /* kUpb_FieldType_Enum */ - 4, /* kUpb_FieldType_SFixed32 */ - 8, /* kUpb_FieldType_SFixed64 */ - 4, /* kUpb_FieldType_SInt32 */ - 8, /* kUpb_FieldType_SInt64 */ - }; - return upb_IsRepeatedOrMap(f) ? sizeof(void*) : sizes[f->descriptortype]; -} - // Maps descriptor type to elem_size_lg2. static int _upb_MiniTableField_CTypeLg2Size(const upb_MiniTableField* f) { static const uint8_t sizes[] = { @@ -391,6 +368,7 @@ upb_UnknownToMessageRet upb_MiniTable_PromoteUnknownToMessage( upb_Message* message = NULL; // Callers should check that message is not set first before calling // PromotoUnknownToMessage. + UPB_ASSERT(mini_table->subs[field->submsg_index].submsg == sub_mini_table); UPB_ASSERT(upb_MiniTable_GetMessage(msg, field, NULL) == NULL); upb_UnknownToMessageRet ret; ret.status = kUpb_UnknownToMessage_Ok; @@ -462,3 +440,64 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray( } while (unknown.status == kUpb_FindUnknown_Ok); return kUpb_UnknownToMessage_Ok; } + +upb_MapInsertStatus upb_Message_InsertMapEntry(upb_Map* map, + const upb_MiniTable* mini_table, + const upb_MiniTableField* field, + upb_Message* map_entry_message, + upb_Arena* arena) { + const upb_MiniTable* map_entry_mini_table = + mini_table->subs[field->submsg_index].submsg; + UPB_ASSERT(map_entry_mini_table); + UPB_ASSERT(map_entry_mini_table->field_count == 2); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + // Map key/value cannot have explicit defaults, + // hence assuming a zero default is valid. + upb_MessageValue default_val; + memset(&default_val, 0, sizeof(upb_MessageValue)); + upb_MessageValue map_entry_key; + upb_MessageValue map_entry_value; + _upb_Message_GetField(map_entry_message, map_entry_key_field, &default_val, + &map_entry_key); + _upb_Message_GetField(map_entry_message, map_entry_value_field, &default_val, + &map_entry_value); + return upb_Map_Insert(map, map_entry_key, map_entry_value, arena); +} + +// Moves repeated messages in unknowns to a upb_Map. +upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( + upb_Message* msg, const upb_MiniTable* mini_table, + const upb_MiniTableField* field, int decode_options, upb_Arena* arena) { + const upb_MiniTable* map_entry_mini_table = + mini_table->subs[field->submsg_index].submsg; + UPB_ASSERT(map_entry_mini_table); + UPB_ASSERT(map_entry_mini_table); + UPB_ASSERT(map_entry_mini_table->field_count == 2); + UPB_ASSERT(upb_FieldMode_Get(field) == kUpb_FieldMode_Map); + // Find all unknowns with given field number and parse. + upb_FindUnknownRet unknown; + while (1) { + unknown = upb_MiniTable_FindUnknown(msg, field->number); + if (unknown.status != kUpb_FindUnknown_Ok) break; + upb_UnknownToMessageRet ret = upb_MiniTable_ParseUnknownMessage( + unknown.ptr, unknown.len, map_entry_mini_table, + /* base_message= */ NULL, decode_options, arena); + if (ret.status != kUpb_UnknownToMessage_Ok) return ret.status; + // Allocate map on demand before append. + upb_Map* map = + upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, field, arena); + upb_Message* map_entry_message = ret.message; + upb_MapInsertStatus insert_status = upb_Message_InsertMapEntry( + map, mini_table, field, map_entry_message, arena); + if (insert_status == kUpb_MapInsertStatus_OutOfMemory) { + return kUpb_UnknownToMessage_OutOfMemory; + } + UPB_ASSUME(insert_status == kUpb_MapInsertStatus_Inserted || + insert_status == kUpb_MapInsertStatus_Replaced); + upb_Message_DeleteUnknown(msg, unknown.ptr, unknown.len); + } + return kUpb_UnknownToMessage_Ok; +} diff --git a/upb/message/accessors.h b/upb/message/accessors.h index aadab910e1..b77d1cdc4e 100644 --- a/upb/message/accessors.h +++ b/upb/message/accessors.h @@ -28,7 +28,10 @@ #ifndef UPB_MESSAGE_ACCESSORS_H_ #define UPB_MESSAGE_ACCESSORS_H_ +#include "upb/base/descriptor_constants.h" #include "upb/collections/array.h" +#include "upb/collections/map.h" +#include "upb/collections/map_internal.h" #include "upb/message/extension_internal.h" #include "upb/message/internal.h" #include "upb/mini_table/common.h" @@ -521,6 +524,35 @@ UPB_API_INLINE bool upb_MiniTableField_IsClosedEnum( return field->descriptortype == kUpb_FieldType_Enum; } +UPB_API_INLINE upb_Map* upb_MiniTable_GetMutableMap( + upb_Message* msg, const upb_MiniTable* map_entry_mini_table, + const upb_MiniTableField* field, upb_Arena* arena) { + UPB_ASSERT(map_entry_mini_table != NULL); + UPB_ASSUME(upb_IsRepeatedOrMap(field)); + upb_Map* map = NULL; + upb_Map* default_map_value = NULL; + _upb_Message_GetNonExtensionField(msg, field, &default_map_value, &map); + if (!map) { + // Allocate map. + UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message || + field->descriptortype == kUpb_FieldType_Group); + const upb_MiniTableField* map_entry_key_field = + &map_entry_mini_table->fields[0]; + const upb_MiniTableField* map_entry_value_field = + &map_entry_mini_table->fields[1]; + map = upb_Map_New(arena, upb_MiniTableField_CType(map_entry_key_field), + upb_MiniTableField_CType(map_entry_value_field)); + _upb_Message_SetNonExtensionField(msg, field, &map); + } + return map; +} + +// Updates a map entry given an entry message. +upb_MapInsertStatus upb_Message_InsertMapEntry( + upb_Map* map, const upb_MiniTable* map_entry_mini_table, + const upb_MiniTableField* field, upb_Message* map_entry_message, + upb_Arena* arena); + typedef enum { kUpb_GetExtension_Ok, kUpb_GetExtension_NotPresent, @@ -603,6 +635,15 @@ upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMessageArray( upb_Message* msg, const upb_MiniTableField* field, const upb_MiniTable* mini_table, int decode_options, upb_Arena* arena); +// Promotes all unknown data that matches field tag id to upb_Map. +// +// The unknown data is removed from message after upb_Map is populated. +// Since repeated messages can't be packed we remove each unknown that +// contains the target tag id. +upb_UnknownToMessage_Status upb_MiniTable_PromoteUnknownToMap( + upb_Message* msg, const upb_MiniTable* mini_table, + const upb_MiniTableField* field, int decode_options, upb_Arena* arena); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/upb/message/accessors_test.cc b/upb/message/accessors_test.cc index 4fdf8ea55f..d1a276f559 100644 --- a/upb/message/accessors_test.cc +++ b/upb/message/accessors_test.cc @@ -582,6 +582,42 @@ upb_MiniTable* CreateMiniTableWithEmptySubTables(upb_Arena* arena) { return table; } +// Create a minitable to mimic ModelWithMaps with unlinked subs +// to lazily promote unknowns after parsing. +upb_MiniTable* CreateMiniTableWithEmptySubTablesForMaps(upb_Arena* arena) { + upb::MtDataEncoder e; + e.StartMessage(0); + e.PutField(kUpb_FieldType_Int32, 1, 0); + e.PutField(kUpb_FieldType_Message, 3, kUpb_FieldModifier_IsRepeated); + e.PutField(kUpb_FieldType_Message, 4, kUpb_FieldModifier_IsRepeated); + + upb_Status status; + upb_Status_Clear(&status); + upb_MiniTable* table = + upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status); + EXPECT_EQ(status.ok, true); + // Initialize sub table to null. Not using upb_MiniTable_SetSubMessage + // since it checks ->ext on parameter. + upb_MiniTableSub* sub = const_cast( + &table->subs[table->fields[1].submsg_index]); + sub->submsg = nullptr; + sub = const_cast( + &table->subs[table->fields[2].submsg_index]); + sub->submsg = nullptr; + return table; +} + +upb_MiniTable* CreateMapEntryMiniTable(upb_Arena* arena) { + upb::MtDataEncoder e; + e.EncodeMap(kUpb_FieldType_String, kUpb_FieldType_String, 0, 0); + upb_Status status; + upb_Status_Clear(&status); + upb_MiniTable* table = + upb_MiniTable_Build(e.data().data(), e.data().size(), arena, &status); + EXPECT_EQ(status.ok, true); + return table; +} + TEST(GeneratedCode, PromoteUnknownMessage) { upb_Arena* arena = upb_Arena_New(); upb_test_ModelWithSubMessages* input_msg = @@ -678,6 +714,60 @@ TEST(GeneratedCode, PromoteUnknownRepeatedMessage) { upb_Arena_Free(arena); } +TEST(GeneratedCode, PromoteUnknownToMap) { + upb_Arena* arena = upb_Arena_New(); + upb_test_ModelWithMaps* input_msg = upb_test_ModelWithMaps_new(arena); + upb_test_ModelWithMaps_set_id(input_msg, 123); + + // Add 2 map entries. + upb_test_ModelWithMaps_map_ss_set(input_msg, + upb_StringView_FromString("key1"), + upb_StringView_FromString("value1"), arena); + upb_test_ModelWithMaps_map_ss_set(input_msg, + upb_StringView_FromString("key2"), + upb_StringView_FromString("value2"), arena); + + size_t serialized_size; + char* serialized = + upb_test_ModelWithMaps_serialize(input_msg, arena, &serialized_size); + + upb_MiniTable* mini_table = CreateMiniTableWithEmptySubTablesForMaps(arena); + upb_MiniTable* map_entry_mini_table = CreateMapEntryMiniTable(arena); + upb_Message* msg = _upb_Message_New(mini_table, arena); + upb_DecodeStatus decode_status = upb_Decode(serialized, serialized_size, msg, + mini_table, nullptr, 0, arena); + EXPECT_EQ(decode_status, kUpb_DecodeStatus_Ok); + int32_t val = upb_Message_GetInt32( + msg, upb_MiniTable_FindFieldByNumber(mini_table, 1), 0); + EXPECT_EQ(val, 123); + + // Check that we have map data in an unknown. + upb_FindUnknownRet unknown = upb_MiniTable_FindUnknown(msg, 3); + EXPECT_EQ(unknown.status, kUpb_FindUnknown_Ok); + + // Update mini table and promote unknown to a message. + upb_MiniTable_SetSubMessage(mini_table, + (upb_MiniTableField*)&mini_table->fields[1], + map_entry_mini_table); + const int decode_options = + UPB_DECODE_MAXDEPTH(100); // UPB_DECODE_ALIAS disabled. + upb_UnknownToMessage_Status promote_result = + upb_MiniTable_PromoteUnknownToMap(msg, mini_table, &mini_table->fields[1], + decode_options, arena); + EXPECT_EQ(promote_result, kUpb_UnknownToMessage_Ok); + + upb_Map* map = upb_MiniTable_GetMutableMap(msg, map_entry_mini_table, + &mini_table->fields[1], arena); + EXPECT_NE(map, nullptr); + // Lookup in map. + upb_MessageValue key; + key.str_val = upb_StringView_FromString("key2"); + upb_MessageValue value; + EXPECT_TRUE(upb_Map_Get(map, key, &value)); + EXPECT_EQ(0, strncmp(value.str_val.data, "value2", 5)); + upb_Arena_Free(arena); +} + TEST(GeneratedCode, EnumClosedCheck) { upb_Arena* arena = upb_Arena_New(); diff --git a/upb/mini_table/common.c b/upb/mini_table/common.c index 818bfa2a7c..2b84af1a7f 100644 --- a/upb/mini_table/common.c +++ b/upb/mini_table/common.c @@ -76,3 +76,38 @@ upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field) { } return field->descriptortype; } + +upb_CType upb_MiniTableField_CType(const upb_MiniTableField* f) { + switch (f->descriptortype) { + case kUpb_FieldType_Double: + return kUpb_CType_Double; + case kUpb_FieldType_Float: + return kUpb_CType_Float; + case kUpb_FieldType_Int64: + case kUpb_FieldType_SInt64: + case kUpb_FieldType_SFixed64: + return kUpb_CType_Int64; + case kUpb_FieldType_Int32: + case kUpb_FieldType_SFixed32: + case kUpb_FieldType_SInt32: + return kUpb_CType_Int32; + case kUpb_FieldType_UInt64: + case kUpb_FieldType_Fixed64: + return kUpb_CType_UInt64; + case kUpb_FieldType_UInt32: + case kUpb_FieldType_Fixed32: + return kUpb_CType_UInt32; + case kUpb_FieldType_Enum: + return kUpb_CType_Enum; + case kUpb_FieldType_Bool: + return kUpb_CType_Bool; + case kUpb_FieldType_String: + return kUpb_CType_String; + case kUpb_FieldType_Bytes: + return kUpb_CType_Bytes; + case kUpb_FieldType_Group: + case kUpb_FieldType_Message: + return kUpb_CType_Message; + } + UPB_UNREACHABLE(); +} diff --git a/upb/mini_table/common.h b/upb/mini_table/common.h index b2b87628c5..0f94cbc2fd 100644 --- a/upb/mini_table/common.h +++ b/upb/mini_table/common.h @@ -58,6 +58,8 @@ UPB_API const upb_MiniTableField* upb_MiniTable_FindFieldByNumber( UPB_API upb_FieldType upb_MiniTableField_Type(const upb_MiniTableField* field); +UPB_API upb_CType upb_MiniTableField_CType(const upb_MiniTableField* field); + UPB_API_INLINE bool upb_MiniTableField_IsExtension( const upb_MiniTableField* field) { return field->mode & kUpb_LabelFlags_IsExtension; diff --git a/upb/test/test.proto b/upb/test/test.proto index 85270d0d47..2310f342d8 100644 --- a/upb/test/test.proto +++ b/upb/test/test.proto @@ -82,3 +82,9 @@ message ModelWithSubMessages { optional ModelWithExtensions optional_child = 5; repeated ModelWithExtensions items = 6; } + +message ModelWithMaps { + optional int32 id = 1; + map map_ss = 3; + map map_ii = 4; +}