diff --git a/upb/mini_table.c b/upb/mini_table.c index 78f188f948..868c4eb514 100644 --- a/upb/mini_table.c +++ b/upb/mini_table.c @@ -28,7 +28,6 @@ #include "upb/mini_table.h" #include -#include #include "upb/arena.h" #include "upb/msg_internal.h" @@ -86,6 +85,7 @@ enum { enum { kUpb_EncodedVersion_EnumV1 = '!', kUpb_EncodedVersion_ExtensionV1 = '#', + kUpb_EncodedVersion_MapV1 = '%', kUpb_EncodedVersion_MessageV1 = '$', }; @@ -213,6 +213,24 @@ char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod); } +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, + uint64_t value_mod) { + upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); + in->state.msg_state.msg_modifiers = 0; + in->state.msg_state.last_field_num = 0; + in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted; + + ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1); + if (!ptr) return NULL; + + ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, 0); + if (!ptr) return NULL; + + return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod); +} + char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, uint64_t msg_mod) { upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); @@ -537,6 +555,28 @@ static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field, } } +static const char kUpb_EncodedToType[] = { + [kUpb_EncodedType_Double] = kUpb_FieldType_Double, + [kUpb_EncodedType_Float] = kUpb_FieldType_Float, + [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, + [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, + [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, + [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, + [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, + [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, + [kUpb_EncodedType_String] = kUpb_FieldType_String, + [kUpb_EncodedType_Group] = kUpb_FieldType_Group, + [kUpb_EncodedType_Message] = kUpb_FieldType_Message, + [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, + [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, + [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, + [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, + [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, + [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, + [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, + [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, +}; + static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, upb_MiniTable_Field* field, uint64_t msg_modifiers, @@ -561,28 +601,6 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, [kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte, }; - static const char kUpb_EncodedToType[] = { - [kUpb_EncodedType_Double] = kUpb_FieldType_Double, - [kUpb_EncodedType_Float] = kUpb_FieldType_Float, - [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, - [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, - [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, - [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, - [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, - [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, - [kUpb_EncodedType_String] = kUpb_FieldType_String, - [kUpb_EncodedType_Group] = kUpb_FieldType_Group, - [kUpb_EncodedType_Message] = kUpb_FieldType_Message, - [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, - [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, - [kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum, - [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, - [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, - [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, - [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, - [kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum, - }; - char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit ? kUpb_FieldRep_4Byte : kUpb_FieldRep_8Byte; @@ -1019,6 +1037,79 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { d->table->size = UPB_ALIGN_UP(d->table->size, 8); } +static void upb_MiniTable_BuildMapEntry(upb_MtDecoder* d, + upb_FieldType key_type, + upb_FieldType value_type, + bool value_is_proto3_enum) { + upb_MiniTable_Field* fields = upb_Arena_Malloc(d->arena, sizeof(*fields) * 2); + if (!fields) { + upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table field"); + UPB_UNREACHABLE(); + } + + upb_MiniTable_Sub* subs = NULL; + if (value_is_proto3_enum) { + UPB_ASSERT(value_type == kUpb_FieldType_Enum); + // No sub needed. + } else if (value_type == kUpb_FieldType_Message || + value_type == kUpb_FieldType_Group || + value_type == kUpb_FieldType_Enum) { + subs = upb_Arena_Malloc(d->arena, sizeof(*subs)); + if (!subs) { + upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table sub"); + UPB_UNREACHABLE(); + } + } + + size_t field_size = + upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, d->platform); + + fields[0].number = 1; + fields[1].number = 2; + fields[0].mode = kUpb_FieldMode_Scalar; + fields[1].mode = kUpb_FieldMode_Scalar; + fields[0].presence = 0; + fields[1].presence = 0; + fields[0].offset = 0; + fields[1].offset = field_size; + + upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false); + upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0, + value_is_proto3_enum); + + upb_MiniTable* ret = d->table; + ret->size = UPB_ALIGN_UP(2 * field_size, 8); + ret->field_count = 2; + ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry; + ret->dense_below = 2; + ret->table_mask = -1; + ret->required_count = 0; + ret->subs = subs; + ret->fields = fields; +} + +static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data, + size_t len) { + if (len < 2) { + upb_MtDecoder_ErrorFormat(d, "Invalid map encoding length: %zu", len); + UPB_UNREACHABLE(); + } + const int e0 = upb_FromBase92(data[0]); + const int e1 = upb_FromBase92(data[1]); + if (e0 >= sizeof(kUpb_EncodedToType)) { + upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", e0); + UPB_UNREACHABLE(); + } + if (e1 >= sizeof(kUpb_EncodedToType)) { + upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", e1); + UPB_UNREACHABLE(); + } + const upb_FieldType key_type = kUpb_EncodedToType[e0]; + const upb_FieldType val_type = kUpb_EncodedToType[e1]; + const bool value_is_proto3_enum = (e1 == kUpb_EncodedType_OpenEnum); + upb_MiniTable_BuildMapEntry(d, key_type, val_type, value_is_proto3_enum); +} + upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, upb_MiniTablePlatform platform, upb_Arena* arena, void** buf, @@ -1042,16 +1133,6 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, goto done; } - // If the string is non-empty then it must begin with a version tag. - if (len) { - if (*data != kUpb_EncodedVersion_MessageV1) { - upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", *data); - UPB_UNREACHABLE(); - } - data++; - len--; - } - upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table); decoder.table->size = 0; @@ -1061,10 +1142,26 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, decoder.table->table_mask = -1; decoder.table->required_count = 0; - upb_MtDecoder_ParseMessage(&decoder, data, len); - upb_MtDecoder_AssignHasbits(decoder.table); - upb_MtDecoder_SortLayoutItems(&decoder); - upb_MtDecoder_AssignOffsets(&decoder); + // Strip off and verify the version tag. + if (!len--) goto done; + const char vers = *data++; + + switch (vers) { + case kUpb_EncodedVersion_MapV1: + upb_MtDecoder_ParseMap(&decoder, data, len); + break; + + case kUpb_EncodedVersion_MessageV1: + upb_MtDecoder_ParseMessage(&decoder, data, len); + upb_MtDecoder_AssignHasbits(decoder.table); + upb_MtDecoder_SortLayoutItems(&decoder); + upb_MtDecoder_AssignOffsets(&decoder); + break; + + default: + upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", vers); + UPB_UNREACHABLE(); + } done: *buf = decoder.vec.data; @@ -1086,53 +1183,6 @@ upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform, return ret; } -upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type, - upb_FieldType value_type, - bool value_is_proto3_enum, - upb_MiniTablePlatform platform, - upb_Arena* arena) { - upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret)); - upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2); - if (!ret || !fields) return NULL; - - upb_MiniTable_Sub* subs = NULL; - if (value_is_proto3_enum) { - UPB_ASSERT(value_type == kUpb_FieldType_Enum); - // No sub needed. - } else if (value_type == kUpb_FieldType_Message || - value_type == kUpb_FieldType_Group || - value_type == kUpb_FieldType_Enum) { - subs = upb_Arena_Malloc(arena, sizeof(*subs)); - if (!subs) return NULL; - } - - size_t field_size = - upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform); - - fields[0].number = 1; - fields[1].number = 2; - fields[0].mode = kUpb_FieldMode_Scalar; - fields[1].mode = kUpb_FieldMode_Scalar; - fields[0].presence = 0; - fields[1].presence = 0; - fields[0].offset = 0; - fields[1].offset = field_size; - - upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false); - upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0, - value_is_proto3_enum); - - ret->size = UPB_ALIGN_UP(2 * field_size, 8); - ret->field_count = 2; - ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry; - ret->dense_below = 2; - ret->table_mask = -1; - ret->required_count = 0; - ret->subs = subs; - ret->fields = fields; - return ret; -} - static size_t upb_MiniTable_EnumSize(size_t count) { return sizeof(upb_MiniTable_Enum) + count * sizeof(uint32_t); } diff --git a/upb/mini_table.h b/upb/mini_table.h index 83fe3d75f7..27a7471a2f 100644 --- a/upb/mini_table.h +++ b/upb/mini_table.h @@ -121,11 +121,16 @@ char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr, uint32_t val); char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr); -// Encodes an entire mini descriptor for one extension. +// Encodes an entire mini descriptor for an extension. char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr, upb_FieldType type, uint32_t field_num, uint64_t field_mod); +// Encodes an entire mini descriptor for a map. +char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr, + upb_FieldType key_type, + upb_FieldType value_type, uint64_t value_mod); + /** upb_MiniTable *************************************************************/ typedef enum { @@ -167,11 +172,6 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len, // Special-case functions for MessageSet layout and map entries. upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform, upb_Arena* arena); -upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type, - upb_FieldType value_type, - bool value_is_proto3_enum, - upb_MiniTablePlatform platform, - upb_Arena* arena); upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len, upb_Arena* arena, diff --git a/upb/mini_table.hpp b/upb/mini_table.hpp index fac241370c..c3bfb8bc17 100644 --- a/upb/mini_table.hpp +++ b/upb/mini_table.hpp @@ -81,8 +81,6 @@ class MtDataEncoder { [=](char* buf) { return upb_MtDataEncoder_EndEnum(&encoder_, buf); }); } - const std::string& data() const { return appender_.data(); } - bool EncodeExtension(upb_FieldType type, uint32_t field_num, uint64_t field_mod) { return appender_([=](char* buf) { @@ -91,6 +89,16 @@ class MtDataEncoder { }); } + bool EncodeMap(upb_FieldType key_type, upb_FieldType val_type, + uint64_t val_mod) { + return appender_([=](char* buf) { + return upb_MtDataEncoder_EncodeMap(&encoder_, buf, key_type, val_type, + val_mod); + }); + } + + const std::string& data() const { return appender_.data(); } + private: class StringAppender { public: diff --git a/upb/reflection/message_def.c b/upb/reflection/message_def.c index 4becf48155..7e0e9eb3b7 100644 --- a/upb/reflection/message_def.c +++ b/upb/reflection/message_def.c @@ -324,31 +324,6 @@ static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, ctx->arena); } - if (upb_MessageDef_IsMapEntry(m)) { - if (m->field_count != 2) { - _upb_DefBuilder_Errf(ctx, "invalid map (%s)", m->full_name); - } - - const upb_FieldDef* key_f = upb_MessageDef_Field(m, 0); - const upb_FieldDef* val_f = upb_MessageDef_Field(m, 1); - if (key_f == NULL || val_f == NULL) { - _upb_DefBuilder_Errf(ctx, "Malformed map entry from message: %s", - m->full_name); - } - - const upb_FieldType key_t = upb_FieldDef_Type(key_f); - const upb_FieldType val_t = upb_FieldDef_Type(val_f); - - const bool is_proto3_enum = - (val_t == kUpb_FieldType_Enum) && !_upb_FieldDef_IsClosedEnum(val_f); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_f) == 0); - UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_f) == 1); - - return upb_MiniTable_BuildMapEntry(key_t, val_t, is_proto3_enum, - kUpb_MiniTablePlatform_Native, - ctx->arena); - } - upb_StringView desc; bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); if (!ok) _upb_DefBuilder_OomErr(ctx); @@ -486,20 +461,42 @@ static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { return out; } -bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, - upb_StringView* out) { - upb_DescState s; - _upb_DescState_Init(&s); +static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m, + upb_Arena* a) { + if (m->field_count != 2) return false; + + const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0); + const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1); + if (key_field == NULL || val_field == NULL) return false; + + UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0); + UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1); + + const upb_FieldType key_type = upb_FieldDef_Type(key_field); + const upb_FieldType val_type = upb_FieldDef_Type(val_field); + + const uint64_t val_mod = _upb_FieldDef_IsClosedEnum(val_field) + ? kUpb_FieldModifier_IsClosedEnum + : 0; + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = + upb_MtDataEncoder_EncodeMap(&s->e, s->ptr, key_type, val_type, val_mod); + return true; +} + +static bool _upb_MessageDef_EncodeMessage(upb_DescState* s, + const upb_MessageDef* m, + upb_Arena* a) { const upb_FieldDef** sorted = NULL; if (!m->is_sorted) { sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a); if (!sorted) return false; } - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = - upb_MtDataEncoder_StartMessage(&s.e, s.ptr, _upb_MessageDef_Modifiers(m)); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr, + _upb_MessageDef_Modifiers(m)); for (int i = 0; i < m->field_count; i++) { const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i); @@ -507,24 +504,38 @@ bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, const int number = upb_FieldDef_Number(f); const uint64_t modifiers = _upb_FieldDef_Modifiers(f); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers); } for (int i = 0; i < m->oneof_count; i++) { - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_StartOneof(&s.e, s.ptr); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr); const upb_OneofDef* o = upb_MessageDef_Oneof(m, i); const int field_count = upb_OneofDef_FieldCount(o); for (int j = 0; j < field_count; j++) { const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j)); - if (!_upb_DescState_Grow(&s, a)) return false; - s.ptr = upb_MtDataEncoder_PutOneofField(&s.e, s.ptr, number); + if (!_upb_DescState_Grow(s, a)) return false; + s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number); } } + return true; +} + +bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a, + upb_StringView* out) { + upb_DescState s; + _upb_DescState_Init(&s); + + if (upb_MessageDef_IsMapEntry(m)) { + if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false; + } else { + if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false; + } + if (!_upb_DescState_Grow(&s, a)) return false; *s.ptr = '\0'; diff --git a/upbc/file_layout.cc b/upbc/file_layout.cc index f9b2a56196..f3c489c024 100644 --- a/upbc/file_layout.cc +++ b/upbc/file_layout.cc @@ -287,18 +287,36 @@ upb_MiniTable* FilePlatformLayout::MakeMiniTable( if (m->options().message_set_wire_format()) { return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr()); } else if (m->options().map_entry()) { - return upb_MiniTable_BuildMapEntry( - static_cast(m->map_key()->type()), - static_cast(m->map_value()->type()), - m->map_value()->enum_type() && - m->map_value()->enum_type()->file()->syntax() == - protobuf::FileDescriptor::SYNTAX_PROTO3, - platform_, arena_.ptr()); + return MakeMapMiniTable(m); } else { return MakeRegularMiniTable(m); } } +upb_MiniTable* FilePlatformLayout::MakeMapMiniTable( + const protobuf::Descriptor* m) { + const auto key_type = static_cast(m->map_key()->type()); + const auto val_type = static_cast(m->map_value()->type()); + const uint64_t val_mod = (m->map_value()->enum_type() && + m->map_value()->enum_type()->file()->syntax() == + protobuf::FileDescriptor::SYNTAX_PROTO2) + ? kUpb_FieldModifier_IsClosedEnum + : 0; + + upb::MtDataEncoder e; + e.EncodeMap(key_type, val_type, val_mod); + + const absl::string_view str = e.data(); + upb::Status status; + upb_MiniTable* ret = upb_MiniTable_Build(str.data(), str.size(), platform_, + arena_.ptr(), status.ptr()); + if (!ret) { + fprintf(stderr, "Error building mini-table: %s\n", status.error_message()); + } + assert(ret); + return ret; +} + upb_MiniTable* FilePlatformLayout::MakeRegularMiniTable( const protobuf::Descriptor* m) { upb::MtDataEncoder e; diff --git a/upbc/file_layout.h b/upbc/file_layout.h index 271db45c39..b448df6447 100644 --- a/upbc/file_layout.h +++ b/upbc/file_layout.h @@ -90,6 +90,7 @@ class FilePlatformLayout { void BuildMiniTables(const protobuf::FileDescriptor* fd); void BuildExtensions(const protobuf::FileDescriptor* fd); upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m); + upb_MiniTable* MakeMapMiniTable(const protobuf::Descriptor* m); upb_MiniTable* MakeRegularMiniTable(const protobuf::Descriptor* m); upb_MiniTable_Enum* MakeMiniTableEnum(const protobuf::EnumDescriptor* d); uint64_t GetMessageModifiers(const protobuf::Descriptor* m);