implement mini descriptors for maps

PiperOrigin-RevId: 483474044
pull/13171/head
Eric Salo 2 years ago committed by Copybara-Service
parent 6f68ba1350
commit 20310e2f3a
  1. 218
      upb/mini_table.c
  2. 12
      upb/mini_table.h
  3. 12
      upb/mini_table.hpp
  4. 87
      upb/reflection/message_def.c
  5. 32
      upbc/file_layout.cc
  6. 1
      upbc/file_layout.h

@ -28,7 +28,6 @@
#include "upb/mini_table.h"
#include <inttypes.h>
#include <setjmp.h>
#include "upb/arena.h"
#include "upb/msg_internal.h"
@ -86,6 +85,7 @@ enum {
enum {
kUpb_EncodedVersion_EnumV1 = '!',
kUpb_EncodedVersion_ExtensionV1 = '#',
kUpb_EncodedVersion_MapV1 = '%',
kUpb_EncodedVersion_MessageV1 = '$',
};
@ -213,6 +213,24 @@ char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
return upb_MtDataEncoder_PutField(e, ptr, type, field_num, field_mod);
}
char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
upb_FieldType key_type,
upb_FieldType value_type,
uint64_t value_mod) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
in->state.msg_state.msg_modifiers = 0;
in->state.msg_state.last_field_num = 0;
in->state.msg_state.oneof_state = kUpb_OneofState_NotStarted;
ptr = upb_MtDataEncoder_PutRaw(e, ptr, kUpb_EncodedVersion_MapV1);
if (!ptr) return NULL;
ptr = upb_MtDataEncoder_PutField(e, ptr, key_type, 1, 0);
if (!ptr) return NULL;
return upb_MtDataEncoder_PutField(e, ptr, value_type, 2, value_mod);
}
char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr,
uint64_t msg_mod) {
upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr);
@ -537,6 +555,28 @@ static void upb_MiniTable_SetTypeAndSub(upb_MiniTable_Field* field,
}
}
static const char kUpb_EncodedToType[] = {
[kUpb_EncodedType_Double] = kUpb_FieldType_Double,
[kUpb_EncodedType_Float] = kUpb_FieldType_Float,
[kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
[kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
[kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
[kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
[kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
[kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
[kUpb_EncodedType_String] = kUpb_FieldType_String,
[kUpb_EncodedType_Group] = kUpb_FieldType_Group,
[kUpb_EncodedType_Message] = kUpb_FieldType_Message,
[kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
[kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
[kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
[kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
[kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
};
static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
upb_MiniTable_Field* field,
uint64_t msg_modifiers,
@ -561,28 +601,6 @@ static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch,
[kUpb_EncodedType_ClosedEnum] = kUpb_FieldRep_4Byte,
};
static const char kUpb_EncodedToType[] = {
[kUpb_EncodedType_Double] = kUpb_FieldType_Double,
[kUpb_EncodedType_Float] = kUpb_FieldType_Float,
[kUpb_EncodedType_Int64] = kUpb_FieldType_Int64,
[kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64,
[kUpb_EncodedType_Int32] = kUpb_FieldType_Int32,
[kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64,
[kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32,
[kUpb_EncodedType_Bool] = kUpb_FieldType_Bool,
[kUpb_EncodedType_String] = kUpb_FieldType_String,
[kUpb_EncodedType_Group] = kUpb_FieldType_Group,
[kUpb_EncodedType_Message] = kUpb_FieldType_Message,
[kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes,
[kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32,
[kUpb_EncodedType_OpenEnum] = kUpb_FieldType_Enum,
[kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32,
[kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64,
[kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32,
[kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64,
[kUpb_EncodedType_ClosedEnum] = kUpb_FieldType_Enum,
};
char pointer_rep = d->platform == kUpb_MiniTablePlatform_32Bit
? kUpb_FieldRep_4Byte
: kUpb_FieldRep_8Byte;
@ -1019,6 +1037,79 @@ static void upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) {
d->table->size = UPB_ALIGN_UP(d->table->size, 8);
}
static void upb_MiniTable_BuildMapEntry(upb_MtDecoder* d,
upb_FieldType key_type,
upb_FieldType value_type,
bool value_is_proto3_enum) {
upb_MiniTable_Field* fields = upb_Arena_Malloc(d->arena, sizeof(*fields) * 2);
if (!fields) {
upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table field");
UPB_UNREACHABLE();
}
upb_MiniTable_Sub* subs = NULL;
if (value_is_proto3_enum) {
UPB_ASSERT(value_type == kUpb_FieldType_Enum);
// No sub needed.
} else if (value_type == kUpb_FieldType_Message ||
value_type == kUpb_FieldType_Group ||
value_type == kUpb_FieldType_Enum) {
subs = upb_Arena_Malloc(d->arena, sizeof(*subs));
if (!subs) {
upb_MtDecoder_ErrorFormat(d, "OOM while building map mini table sub");
UPB_UNREACHABLE();
}
}
size_t field_size =
upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, d->platform);
fields[0].number = 1;
fields[1].number = 2;
fields[0].mode = kUpb_FieldMode_Scalar;
fields[1].mode = kUpb_FieldMode_Scalar;
fields[0].presence = 0;
fields[1].presence = 0;
fields[0].offset = 0;
fields[1].offset = field_size;
upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false);
upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0,
value_is_proto3_enum);
upb_MiniTable* ret = d->table;
ret->size = UPB_ALIGN_UP(2 * field_size, 8);
ret->field_count = 2;
ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
ret->dense_below = 2;
ret->table_mask = -1;
ret->required_count = 0;
ret->subs = subs;
ret->fields = fields;
}
static void upb_MtDecoder_ParseMap(upb_MtDecoder* d, const char* data,
size_t len) {
if (len < 2) {
upb_MtDecoder_ErrorFormat(d, "Invalid map encoding length: %zu", len);
UPB_UNREACHABLE();
}
const int e0 = upb_FromBase92(data[0]);
const int e1 = upb_FromBase92(data[1]);
if (e0 >= sizeof(kUpb_EncodedToType)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", e0);
UPB_UNREACHABLE();
}
if (e1 >= sizeof(kUpb_EncodedToType)) {
upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", e1);
UPB_UNREACHABLE();
}
const upb_FieldType key_type = kUpb_EncodedToType[e0];
const upb_FieldType val_type = kUpb_EncodedToType[e1];
const bool value_is_proto3_enum = (e1 == kUpb_EncodedType_OpenEnum);
upb_MiniTable_BuildMapEntry(d, key_type, val_type, value_is_proto3_enum);
}
upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
upb_MiniTablePlatform platform,
upb_Arena* arena, void** buf,
@ -1042,16 +1133,6 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
goto done;
}
// If the string is non-empty then it must begin with a version tag.
if (len) {
if (*data != kUpb_EncodedVersion_MessageV1) {
upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", *data);
UPB_UNREACHABLE();
}
data++;
len--;
}
upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table);
decoder.table->size = 0;
@ -1061,10 +1142,26 @@ upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len,
decoder.table->table_mask = -1;
decoder.table->required_count = 0;
upb_MtDecoder_ParseMessage(&decoder, data, len);
upb_MtDecoder_AssignHasbits(decoder.table);
upb_MtDecoder_SortLayoutItems(&decoder);
upb_MtDecoder_AssignOffsets(&decoder);
// Strip off and verify the version tag.
if (!len--) goto done;
const char vers = *data++;
switch (vers) {
case kUpb_EncodedVersion_MapV1:
upb_MtDecoder_ParseMap(&decoder, data, len);
break;
case kUpb_EncodedVersion_MessageV1:
upb_MtDecoder_ParseMessage(&decoder, data, len);
upb_MtDecoder_AssignHasbits(decoder.table);
upb_MtDecoder_SortLayoutItems(&decoder);
upb_MtDecoder_AssignOffsets(&decoder);
break;
default:
upb_MtDecoder_ErrorFormat(&decoder, "Invalid message version: %c", vers);
UPB_UNREACHABLE();
}
done:
*buf = decoder.vec.data;
@ -1086,53 +1183,6 @@ upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
return ret;
}
upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
upb_FieldType value_type,
bool value_is_proto3_enum,
upb_MiniTablePlatform platform,
upb_Arena* arena) {
upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret));
upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2);
if (!ret || !fields) return NULL;
upb_MiniTable_Sub* subs = NULL;
if (value_is_proto3_enum) {
UPB_ASSERT(value_type == kUpb_FieldType_Enum);
// No sub needed.
} else if (value_type == kUpb_FieldType_Message ||
value_type == kUpb_FieldType_Group ||
value_type == kUpb_FieldType_Enum) {
subs = upb_Arena_Malloc(arena, sizeof(*subs));
if (!subs) return NULL;
}
size_t field_size =
upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform);
fields[0].number = 1;
fields[1].number = 2;
fields[0].mode = kUpb_FieldMode_Scalar;
fields[1].mode = kUpb_FieldMode_Scalar;
fields[0].presence = 0;
fields[1].presence = 0;
fields[0].offset = 0;
fields[1].offset = field_size;
upb_MiniTable_SetTypeAndSub(&fields[0], key_type, NULL, 0, false);
upb_MiniTable_SetTypeAndSub(&fields[1], value_type, NULL, 0,
value_is_proto3_enum);
ret->size = UPB_ALIGN_UP(2 * field_size, 8);
ret->field_count = 2;
ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry;
ret->dense_below = 2;
ret->table_mask = -1;
ret->required_count = 0;
ret->subs = subs;
ret->fields = fields;
return ret;
}
static size_t upb_MiniTable_EnumSize(size_t count) {
return sizeof(upb_MiniTable_Enum) + count * sizeof(uint32_t);
}

@ -121,11 +121,16 @@ char* upb_MtDataEncoder_PutEnumValue(upb_MtDataEncoder* e, char* ptr,
uint32_t val);
char* upb_MtDataEncoder_EndEnum(upb_MtDataEncoder* e, char* ptr);
// Encodes an entire mini descriptor for one extension.
// Encodes an entire mini descriptor for an extension.
char* upb_MtDataEncoder_EncodeExtension(upb_MtDataEncoder* e, char* ptr,
upb_FieldType type, uint32_t field_num,
uint64_t field_mod);
// Encodes an entire mini descriptor for a map.
char* upb_MtDataEncoder_EncodeMap(upb_MtDataEncoder* e, char* ptr,
upb_FieldType key_type,
upb_FieldType value_type, uint64_t value_mod);
/** upb_MiniTable *************************************************************/
typedef enum {
@ -167,11 +172,6 @@ const char* upb_MiniTable_BuildExtension(const char* data, size_t len,
// Special-case functions for MessageSet layout and map entries.
upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform,
upb_Arena* arena);
upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type,
upb_FieldType value_type,
bool value_is_proto3_enum,
upb_MiniTablePlatform platform,
upb_Arena* arena);
upb_MiniTable_Enum* upb_MiniTable_BuildEnum(const char* data, size_t len,
upb_Arena* arena,

@ -81,8 +81,6 @@ class MtDataEncoder {
[=](char* buf) { return upb_MtDataEncoder_EndEnum(&encoder_, buf); });
}
const std::string& data() const { return appender_.data(); }
bool EncodeExtension(upb_FieldType type, uint32_t field_num,
uint64_t field_mod) {
return appender_([=](char* buf) {
@ -91,6 +89,16 @@ class MtDataEncoder {
});
}
bool EncodeMap(upb_FieldType key_type, upb_FieldType val_type,
uint64_t val_mod) {
return appender_([=](char* buf) {
return upb_MtDataEncoder_EncodeMap(&encoder_, buf, key_type, val_type,
val_mod);
});
}
const std::string& data() const { return appender_.data(); }
private:
class StringAppender {
public:

@ -324,31 +324,6 @@ static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx,
ctx->arena);
}
if (upb_MessageDef_IsMapEntry(m)) {
if (m->field_count != 2) {
_upb_DefBuilder_Errf(ctx, "invalid map (%s)", m->full_name);
}
const upb_FieldDef* key_f = upb_MessageDef_Field(m, 0);
const upb_FieldDef* val_f = upb_MessageDef_Field(m, 1);
if (key_f == NULL || val_f == NULL) {
_upb_DefBuilder_Errf(ctx, "Malformed map entry from message: %s",
m->full_name);
}
const upb_FieldType key_t = upb_FieldDef_Type(key_f);
const upb_FieldType val_t = upb_FieldDef_Type(val_f);
const bool is_proto3_enum =
(val_t == kUpb_FieldType_Enum) && !_upb_FieldDef_IsClosedEnum(val_f);
UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_f) == 0);
UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_f) == 1);
return upb_MiniTable_BuildMapEntry(key_t, val_t, is_proto3_enum,
kUpb_MiniTablePlatform_Native,
ctx->arena);
}
upb_StringView desc;
bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc);
if (!ok) _upb_DefBuilder_OomErr(ctx);
@ -486,20 +461,42 @@ static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) {
return out;
}
bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a,
upb_StringView* out) {
upb_DescState s;
_upb_DescState_Init(&s);
static bool _upb_MessageDef_EncodeMap(upb_DescState* s, const upb_MessageDef* m,
upb_Arena* a) {
if (m->field_count != 2) return false;
const upb_FieldDef* key_field = upb_MessageDef_Field(m, 0);
const upb_FieldDef* val_field = upb_MessageDef_Field(m, 1);
if (key_field == NULL || val_field == NULL) return false;
UPB_ASSERT(_upb_FieldDef_LayoutIndex(key_field) == 0);
UPB_ASSERT(_upb_FieldDef_LayoutIndex(val_field) == 1);
const upb_FieldType key_type = upb_FieldDef_Type(key_field);
const upb_FieldType val_type = upb_FieldDef_Type(val_field);
const uint64_t val_mod = _upb_FieldDef_IsClosedEnum(val_field)
? kUpb_FieldModifier_IsClosedEnum
: 0;
if (!_upb_DescState_Grow(s, a)) return false;
s->ptr =
upb_MtDataEncoder_EncodeMap(&s->e, s->ptr, key_type, val_type, val_mod);
return true;
}
static bool _upb_MessageDef_EncodeMessage(upb_DescState* s,
const upb_MessageDef* m,
upb_Arena* a) {
const upb_FieldDef** sorted = NULL;
if (!m->is_sorted) {
sorted = _upb_FieldDefs_Sorted(m->fields, m->field_count, a);
if (!sorted) return false;
}
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr =
upb_MtDataEncoder_StartMessage(&s.e, s.ptr, _upb_MessageDef_Modifiers(m));
if (!_upb_DescState_Grow(s, a)) return false;
s->ptr = upb_MtDataEncoder_StartMessage(&s->e, s->ptr,
_upb_MessageDef_Modifiers(m));
for (int i = 0; i < m->field_count; i++) {
const upb_FieldDef* f = sorted ? sorted[i] : upb_MessageDef_Field(m, i);
@ -507,24 +504,38 @@ bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a,
const int number = upb_FieldDef_Number(f);
const uint64_t modifiers = _upb_FieldDef_Modifiers(f);
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_PutField(&s.e, s.ptr, type, number, modifiers);
if (!_upb_DescState_Grow(s, a)) return false;
s->ptr = upb_MtDataEncoder_PutField(&s->e, s->ptr, type, number, modifiers);
}
for (int i = 0; i < m->oneof_count; i++) {
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_StartOneof(&s.e, s.ptr);
if (!_upb_DescState_Grow(s, a)) return false;
s->ptr = upb_MtDataEncoder_StartOneof(&s->e, s->ptr);
const upb_OneofDef* o = upb_MessageDef_Oneof(m, i);
const int field_count = upb_OneofDef_FieldCount(o);
for (int j = 0; j < field_count; j++) {
const int number = upb_FieldDef_Number(upb_OneofDef_Field(o, j));
if (!_upb_DescState_Grow(&s, a)) return false;
s.ptr = upb_MtDataEncoder_PutOneofField(&s.e, s.ptr, number);
if (!_upb_DescState_Grow(s, a)) return false;
s->ptr = upb_MtDataEncoder_PutOneofField(&s->e, s->ptr, number);
}
}
return true;
}
bool upb_MessageDef_MiniDescriptorEncode(const upb_MessageDef* m, upb_Arena* a,
upb_StringView* out) {
upb_DescState s;
_upb_DescState_Init(&s);
if (upb_MessageDef_IsMapEntry(m)) {
if (!_upb_MessageDef_EncodeMap(&s, m, a)) return false;
} else {
if (!_upb_MessageDef_EncodeMessage(&s, m, a)) return false;
}
if (!_upb_DescState_Grow(&s, a)) return false;
*s.ptr = '\0';

@ -287,18 +287,36 @@ upb_MiniTable* FilePlatformLayout::MakeMiniTable(
if (m->options().message_set_wire_format()) {
return upb_MiniTable_BuildMessageSet(platform_, arena_.ptr());
} else if (m->options().map_entry()) {
return upb_MiniTable_BuildMapEntry(
static_cast<upb_FieldType>(m->map_key()->type()),
static_cast<upb_FieldType>(m->map_value()->type()),
m->map_value()->enum_type() &&
m->map_value()->enum_type()->file()->syntax() ==
protobuf::FileDescriptor::SYNTAX_PROTO3,
platform_, arena_.ptr());
return MakeMapMiniTable(m);
} else {
return MakeRegularMiniTable(m);
}
}
upb_MiniTable* FilePlatformLayout::MakeMapMiniTable(
const protobuf::Descriptor* m) {
const auto key_type = static_cast<upb_FieldType>(m->map_key()->type());
const auto val_type = static_cast<upb_FieldType>(m->map_value()->type());
const uint64_t val_mod = (m->map_value()->enum_type() &&
m->map_value()->enum_type()->file()->syntax() ==
protobuf::FileDescriptor::SYNTAX_PROTO2)
? kUpb_FieldModifier_IsClosedEnum
: 0;
upb::MtDataEncoder e;
e.EncodeMap(key_type, val_type, val_mod);
const absl::string_view str = e.data();
upb::Status status;
upb_MiniTable* ret = upb_MiniTable_Build(str.data(), str.size(), platform_,
arena_.ptr(), status.ptr());
if (!ret) {
fprintf(stderr, "Error building mini-table: %s\n", status.error_message());
}
assert(ret);
return ret;
}
upb_MiniTable* FilePlatformLayout::MakeRegularMiniTable(
const protobuf::Descriptor* m) {
upb::MtDataEncoder e;

@ -90,6 +90,7 @@ class FilePlatformLayout {
void BuildMiniTables(const protobuf::FileDescriptor* fd);
void BuildExtensions(const protobuf::FileDescriptor* fd);
upb_MiniTable* MakeMiniTable(const protobuf::Descriptor* m);
upb_MiniTable* MakeMapMiniTable(const protobuf::Descriptor* m);
upb_MiniTable* MakeRegularMiniTable(const protobuf::Descriptor* m);
upb_MiniTable_Enum* MakeMiniTableEnum(const protobuf::EnumDescriptor* d);
uint64_t GetMessageModifiers(const protobuf::Descriptor* m);

Loading…
Cancel
Save