Put submsg_index directly in table data. Drop oneof support for now to focus.

pull/13171/head
Joshua Haberman 4 years ago
parent d87179501d
commit 9938cf8f27
  1. 52
      generated_for_cmake/google/protobuf/descriptor.upb.c
  2. 2
      upb/decode.h
  3. 46
      upb/decode_fast.c
  4. 83
      upbc/generator.cc
  5. 4
      upbc/message_layout.cc

@ -158,12 +158,12 @@ const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
UPB_SIZE(1125899906973706, 2251799813816330),
UPB_SIZE(3377699720790034, 6755399441317906),
UPB_SIZE(0, 0),
UPB_SIZE(844424930132002, 844424930132002),
UPB_SIZE(1125899906842666, 1125899906842666),
UPB_SIZE(1407374883553330, 1407374883553330),
UPB_SIZE(1688849860263994, 1688849860263994),
UPB_SIZE(1970324838023234, 1970324838023234),
UPB_SIZE(2251799815782474, 2251799815782474),
UPB_SIZE(11258999068426274, 22517998136852514),
UPB_SIZE(12384898975268906, 24769797950537770),
UPB_SIZE(13510798882111538, 27021597764223026),
UPB_SIZE(14636698788954170, 29273397577908282),
UPB_SIZE(7881299348160578, 15762598696058946),
UPB_SIZE(9007199255068746, 18014398509809738),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(5629499534737506, 11258999068950626),
@ -192,7 +192,7 @@ const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
UPB_SIZE(64, 128), 12, false,
};
static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[7] = {
&google_protobuf_DescriptorProto_msginit,
&google_protobuf_DescriptorProto_ExtensionRange_msginit,
&google_protobuf_DescriptorProto_ReservedRange_msginit,
@ -253,14 +253,14 @@ const upb_msglayout google_protobuf_DescriptorProto_msginit = {
{
UPB_SIZE(0, 0),
UPB_SIZE(1125899906973706, 2251799813816330),
UPB_SIZE(281474976710674, 281474976710674),
UPB_SIZE(562949953421338, 562949953421338),
UPB_SIZE(844424930132002, 844424930132002),
UPB_SIZE(1125899906842666, 1125899906842666),
UPB_SIZE(1407374883553330, 1407374883553330),
UPB_SIZE(1688849860526138, 1688849860526138),
UPB_SIZE(1970324836974658, 1970324836974658),
UPB_SIZE(2251799813685322, 2251799813685322),
UPB_SIZE(4503599627370514, 9007199254741010),
UPB_SIZE(5629499534213146, 11258999068426266),
UPB_SIZE(6755399441055778, 13510798882111522),
UPB_SIZE(7881299347898410, 15762598695796778),
UPB_SIZE(9007199254741042, 18014398509482034),
UPB_SIZE(3377699720659002, 6755399441186874),
UPB_SIZE(10133099161583682, 20266198323167298),
UPB_SIZE(11258999068426314, 22517998136852554),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -338,7 +338,7 @@ const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
UPB_SIZE(0, 0),
UPB_SIZE(1125899906973704, 1125899906973704),
UPB_SIZE(2251799813947408, 2251799813947408),
UPB_SIZE(562949953945626, 562949953945626),
UPB_SIZE(3377699720724506, 4503599627567130),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -596,7 +596,7 @@ const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
UPB_SIZE(4503599627632680, 4503599627632680),
UPB_SIZE(14636698805731378, 20266198339944498),
UPB_SIZE(16888498636193850, 24769797984092218),
UPB_SIZE(1970324971192386, 1970324971192386),
UPB_SIZE(21392098230730818, 33776997205999682),
UPB_SIZE(7881299348947016, 7881299348947016),
UPB_SIZE(19140298483433554, 29273397645017170),
UPB_SIZE(0, 0),
@ -673,7 +673,7 @@ const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
{
UPB_SIZE(0, 0),
UPB_SIZE(1125899906973706, 2251799813816330),
UPB_SIZE(281474976972818, 281474976972818),
UPB_SIZE(3377699720658962, 6755399441186834),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -761,9 +761,9 @@ const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
{
UPB_SIZE(0, 0),
UPB_SIZE(1125899906973706, 2251799813816330),
UPB_SIZE(281474976710674, 281474976710674),
UPB_SIZE(562949953683482, 562949953683482),
UPB_SIZE(844424930132002, 844424930132002),
UPB_SIZE(4503599627370514, 9007199254741010),
UPB_SIZE(3377699720658970, 6755399441186842),
UPB_SIZE(5629499534213154, 11258999068426274),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -925,7 +925,7 @@ const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
UPB_SIZE(0, 0),
UPB_SIZE(2251799813947402, 2251799813947402),
UPB_SIZE(1125899906973712, 1125899906973712),
UPB_SIZE(562949953945626, 562949953945626),
UPB_SIZE(4503599627567130, 6755399441252378),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -1009,8 +1009,8 @@ const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
{
UPB_SIZE(0, 0),
UPB_SIZE(1125899906973706, 2251799813816330),
UPB_SIZE(281474976710674, 281474976710674),
UPB_SIZE(562949953683482, 562949953683482),
UPB_SIZE(4503599627370514, 9007199254741010),
UPB_SIZE(3377699720658970, 6755399441186842),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
@ -1098,7 +1098,7 @@ const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
UPB_SIZE(1125899907366922, 2251799814209546),
UPB_SIZE(3377699721576466, 6755399442104338),
UPB_SIZE(5629499536310298, 11258999070523418),
UPB_SIZE(844424934326306, 844424934326306),
UPB_SIZE(7881299348291618, 15762598696189986),
UPB_SIZE(281474976841768, 281474976841768),
UPB_SIZE(562949953683504, 562949953683504),
UPB_SIZE(0, 0),
@ -1876,7 +1876,7 @@ const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
{
UPB_SIZE(0, 0),
UPB_SIZE(0, 0),
UPB_SIZE(18, 18),
UPB_SIZE(15762598695796754, 22517998136852498),
UPB_SIZE(9007199255789594, 9007199255789594),
UPB_SIZE(2251799813816352, 2251799813816352),
UPB_SIZE(4503599627632680, 4503599627632680),

@ -60,7 +60,7 @@ upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l,
int msg_ceil_bytes) {
size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data;
if (msg_ceil_bytes > 0 && decode_arenahas(d, msg_ceil_bytes)) {
if (UPB_LIKELY(msg_ceil_bytes > 0 && decode_arenahas(d, msg_ceil_bytes))) {
UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
msg_data = d->arena_ptr;
memset(msg_data, 0, msg_ceil_bytes);

@ -56,21 +56,21 @@ static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) {
UPB_FORCEINLINE
static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
upb_msg *msg, size_t ofs, uint64_t *data,
upb_msg *msg, uint64_t *data,
uint64_t *hasbits, upb_array **outarr,
void **end, int tagbytes, int valbytes,
upb_card card) {
upb_card card, bool hasbit_is_idx) {
size_t ofs = *data >> 48;
void *field = (char *)msg + ofs;
switch (card) {
case CARD_s:
*hasbits |= *data;
return field;
case CARD_o: {
uint32_t *case_ptr = UPB_PTR_AT(msg, (*data >> 16) & 0xffff, uint32_t);
*case_ptr = (*data >> 32) & 0xffff;
if (hasbit_is_idx) {
*hasbits |= 1 << (uint16_t)(*data >> 16);
} else {
*hasbits |= *data;
}
return field;
}
case CARD_r: {
uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
upb_array **arr_p = field;
@ -115,8 +115,8 @@ UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
uint64_t *data, uint64_t *hasbits,
int tagbytes, int valbytes, upb_card card) {
return fastdecode_getfield_ofs(d, ptr, msg, *data >> 48, data, hasbits, NULL,
NULL, tagbytes, valbytes, card);
return fastdecode_getfield_ofs(d, ptr, msg, data, hasbits, NULL, NULL,
tagbytes, valbytes, card, false);
}
/* varint fields **************************************************************/
@ -355,14 +355,6 @@ bool fastdecode_boundscheck2(const char *ptr, unsigned len, const char *end) {
UPB_FORCEINLINE
static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
int msg_ceil_bytes, upb_card card) {
const char *saved_limit = d->limit;
const char *saved_fastlimit = d->fastlimit;
const upb_msglayout_field *field = &table->fields[data >> 48];
size_t ofs = field->offset;
const upb_msglayout *subl = table->submsgs[field->submsg_index];
upb_array *arr;
upb_msg **submsg;
void *end;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("submessage field tag mismatch\n");
@ -370,13 +362,21 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
if (--d->depth < 0) return fastdecode_err(d);
submsg = fastdecode_getfield_ofs(d, ptr, msg, ofs, &data, &hasbits, &arr,
&end, tagbytes, sizeof(upb_msg *), card);
upb_msg **submsg;
upb_array *arr;
void *end;
uint16_t submsg_idx = data >> 32;
const upb_msglayout *subl = table->submsgs[submsg_idx];
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end,
tagbytes, sizeof(upb_msg *), card, true);
const char *saved_limit = d->limit;
const char *saved_fastlimit = d->fastlimit;
again:
if (card == CARD_r) {
if (UPB_UNLIKELY(submsg == end)) {
if (arr) {
if (UPB_LIKELY(arr != NULL)) {
size_t old_size = arr->size;
size_t old_bytes = old_size * sizeof(upb_msg*);
size_t new_size = old_size * 2;
@ -428,13 +428,13 @@ again:
d->fastlimit = UPB_MIN(d->limit, d->fastend);
}
if (card == CARD_r || !*submsg) {
if (card == CARD_r || UPB_LIKELY(!*submsg)) {
*submsg = decode_newmsg_ceil(d, subl, msg_ceil_bytes);
}
ptr = fastdecode_dispatch(d, ptr, *submsg, subl, 0);
submsg++;
if (ptr != d->limit || d->end_group != 0) {
if (UPB_UNLIKELY(ptr != d->limit || d->end_group != 0)) {
return fastdecode_err(d);
}

@ -703,6 +703,27 @@ int TableDescriptorType(const protobuf::FieldDescriptor* field) {
}
}
struct SubmsgArray {
std::vector<const protobuf::Descriptor*> messages;
absl::flat_hash_map<const protobuf::Descriptor*, int> indexes;
};
SubmsgArray GetSubmsgArray(const protobuf::Descriptor* message) {
SubmsgArray ret;
MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message);
int i = 0;
for (auto submsg : sorted_submsgs) {
if (ret.indexes.find(submsg->message_type()) != ret.indexes.end()) {
continue;
}
ret.messages.push_back(submsg->message_type());
ret.indexes[submsg->message_type()] = i++;
}
return ret;
}
typedef std::pair<std::string, MessageLayout::Size> TableEntry;
void TryFillTableEntry(const protobuf::Descriptor* message,
@ -759,7 +780,7 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
case protobuf::FieldDescriptor::LABEL_OPTIONAL:
case protobuf::FieldDescriptor::LABEL_REQUIRED:
if (field->real_containing_oneof()) {
cardinality = "o";
return; // Not supported yet.
} else {
cardinality = "s";
}
@ -769,40 +790,30 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
uint16_t expected_tag = (num << 3) | wire_type;
if (num > 15) expected_tag |= 0x100;
MessageLayout::Size offset = layout.GetFieldOffset(field);
uint64_t hasbit_index = 0;
MessageLayout::Size data;
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
// Message fields index into the field array instead of giving an offset.
std::vector<const protobuf::FieldDescriptor*> order =
FieldNumberOrder(message);
auto it = std::find(order.begin(), order.end(), field);
assert(it != order.end());
uint64_t idx = it - order.begin();
data.size32 = (idx << 48) | expected_tag;
data.size64 = (idx << 48) | expected_tag;
} else {
data.size32 = ((uint64_t)offset.size32 << 48) | expected_tag;
data.size64 = ((uint64_t)offset.size64 << 48) | expected_tag;
if (layout.HasHasbit(field)) {
hasbit_index = layout.GetHasbitIndex(field);
if (hasbit_index > 31) return;
}
if (field->real_containing_oneof()) {
MessageLayout::Size case_ofs =
layout.GetOneofCaseOffset(field->real_containing_oneof());
data.size32 |= ((uint64_t)num << 32) | (case_ofs.size32 << 16);
data.size64 |= ((uint64_t)num << 32) | (case_ofs.size64 << 16);
} else {
uint32_t hasbit_mask = 0;
MessageLayout::Size data;
if (layout.HasHasbit(field)) {
int index = layout.GetHasbitIndex(field);
if (index > 31) return;
hasbit_mask = 1 << index;
}
data.size32 = ((uint64_t)offset.size32 << 48) | expected_tag;
data.size64 = ((uint64_t)offset.size64 << 48) | expected_tag;
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
SubmsgArray submsg_array = GetSubmsgArray(message);
uint64_t idx = submsg_array.indexes[field->message_type()];
data.size32 |= idx << 32 | hasbit_index << 16;
data.size64 |= idx << 32 | hasbit_index << 16;
} else {
uint32_t hasbit_mask = 1U << hasbit_index;
data.size32 |= (uint64_t)hasbit_mask << 16;
data.size64 |= (uint64_t)hasbit_mask << 16;
}
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
std::string size_ceil = "max";
size_t size = SIZE_MAX;
@ -863,27 +874,19 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string msgname = ToCIdent(message->full_name());
std::string fields_array_ref = "NULL";
std::string submsgs_array_ref = "NULL";
absl::flat_hash_map<const protobuf::Descriptor*, int> submsg_indexes;
MessageLayout layout(message);
std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
SortedSubmessages(message);
SubmsgArray submsg_array = GetSubmsgArray(message);
if (!sorted_submsgs.empty()) {
if (!submsg_array.messages.empty()) {
// TODO(haberman): could save a little bit of space by only generating a
// "submsgs" array for every strongly-connected component.
std::string submsgs_array_name = msgname + "_submsgs";
submsgs_array_ref = "&" + submsgs_array_name + "[0]";
output("static const upb_msglayout *const $0[$1] = {\n",
submsgs_array_name, sorted_submsgs.size());
submsgs_array_name, submsg_array.messages.size());
int i = 0;
for (auto submsg : sorted_submsgs) {
if (submsg_indexes.find(submsg->message_type()) !=
submsg_indexes.end()) {
continue;
}
output(" &$0,\n", MessageInit(submsg->message_type()));
submsg_indexes[submsg->message_type()] = i++;
for (auto submsg : submsg_array.messages) {
output(" &$0,\n", MessageInit(submsg));
}
output("};\n\n");
@ -901,7 +904,7 @@ void WriteSource(const protobuf::FileDescriptor* file, Output& output) {
std::string presence = "0";
if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
submsg_index = submsg_indexes[field->message_type()];
submsg_index = submsg_array.indexes[field->message_type()];
}
if (MessageLayout::HasHasbit(field)) {

@ -24,9 +24,7 @@ MessageLayout::Size MessageLayout::Place(
}
bool MessageLayout::HasHasbit(const protobuf::FieldDescriptor* field) {
return field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
field->label() != protobuf::FieldDescriptor::LABEL_REPEATED &&
!field->containing_oneof() &&
return field->has_presence() && !field->real_containing_oneof() &&
!field->containing_type()->options().map_entry();
}

Loading…
Cancel
Save