diff --git a/upb/decode.c b/upb/decode.c index 8ca88cd058..8611526d1c 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -125,25 +125,25 @@ static const int8_t varint_ops[19] = { static const int8_t delim_ops[37] = { /* For non-repeated field type. */ - OP_UNKNOWN, /* field not found */ - OP_UNKNOWN, /* DOUBLE */ - OP_UNKNOWN, /* FLOAT */ - OP_UNKNOWN, /* INT64 */ - OP_UNKNOWN, /* UINT64 */ - OP_UNKNOWN, /* INT32 */ - OP_UNKNOWN, /* FIXED64 */ - OP_UNKNOWN, /* FIXED32 */ - OP_UNKNOWN, /* BOOL */ - OP_STRING, /* STRING */ - OP_UNKNOWN, /* GROUP */ - OP_SUBMSG, /* MESSAGE */ - OP_BYTES, /* BYTES */ - OP_UNKNOWN, /* UINT32 */ - OP_UNKNOWN, /* ENUM */ - OP_UNKNOWN, /* SFIXED32 */ - OP_UNKNOWN, /* SFIXED64 */ - OP_UNKNOWN, /* SINT32 */ - OP_UNKNOWN, /* SINT64 */ + OP_UNKNOWN, /* field not found */ + OP_UNKNOWN, /* DOUBLE */ + OP_UNKNOWN, /* FLOAT */ + OP_UNKNOWN, /* INT64 */ + OP_UNKNOWN, /* UINT64 */ + OP_UNKNOWN, /* INT32 */ + OP_UNKNOWN, /* FIXED64 */ + OP_UNKNOWN, /* FIXED32 */ + OP_UNKNOWN, /* BOOL */ + OP_STRING, /* STRING */ + OP_UNKNOWN, /* GROUP */ + OP_SUBMSG, /* MESSAGE */ + OP_BYTES, /* BYTES */ + OP_UNKNOWN, /* UINT32 */ + OP_UNKNOWN, /* ENUM */ + OP_UNKNOWN, /* SFIXED32 */ + OP_UNKNOWN, /* SFIXED64 */ + OP_UNKNOWN, /* SINT32 */ + OP_UNKNOWN, /* SINT64 */ /* For repeated field type. */ OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ @@ -299,50 +299,6 @@ static void decode_munge(int type, wireval *val) { } } -static const upb_msglayout_field *upb_find_field(upb_decstate *d, - const upb_msglayout *l, - uint32_t field_number, - int *last_field_index) { - static upb_msglayout_field none = {0, 0, 0, 0, 0, 0}; - - if (l == NULL) return &none; - - size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX - if (idx < l->dense_below) { - /* Fastest case: index into dense fields. */ - goto found; - } - - if (l->dense_below < l->field_count) { - /* Linear search non-dense fields. Resume scanning from last_field_index - * since fields are usually in order. */ - int last = *last_field_index; - for (idx = last; idx < l->field_count; idx++) { - if (l->fields[idx].number == field_number) { - goto found; - } - } - - for (idx = 0; idx < last; idx++) { - if (l->fields[idx].number == field_number) { - goto found; - } - } - } - - if (l->ext == _UPB_MSGEXT_EXTENDABLE && d->extreg) { - const upb_msglayout_ext *ext = _upb_extreg_get(d->extreg, l, field_number); - if (ext) return &ext->field; - } - - return &none; /* Unknown field. */ - - found: - UPB_ASSERT(l->fields[idx].number == field_number); - *last_field_index = idx; - return &l->fields[idx]; -} - static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout_sub *subs, const upb_msglayout_field *field) { const upb_msglayout *subl = subs[field->submsg_index].submsg; @@ -609,8 +565,9 @@ static bool decode_tryfastdispatch(upb_decstate *d, const char **ptr, return false; } -static const char *decode_mset_item(upb_decstate *d, const char *ptr, - upb_msg *msg, const upb_msglayout *layout) { +static const char *decode_msgset_item(upb_decstate *d, const char *ptr, + upb_msg *msg, + const upb_msglayout *layout) { // We create a temporary upb_msglayout here and abuse its fields as temporary // storage, to avoid creating lots of MessageSet-specific parsing code-paths: // 1. We store 'layout' in item_layout.subs. We will need this later as @@ -628,6 +585,183 @@ static const char *decode_mset_item(upb_decstate *d, const char *ptr, return decode_group(d, ptr, msg, &item_layout, 1); } +static const upb_msglayout_field *decode_findfield(upb_decstate *d, + const upb_msglayout *l, + uint32_t field_number, + int *last_field_index) { + static upb_msglayout_field none = {0, 0, 0, 0, 0, 0}; + if (l == NULL) return &none; + + size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX + if (idx < l->dense_below) { + /* Fastest case: index into dense fields. */ + goto found; + } + + if (l->dense_below < l->field_count) { + /* Linear search non-dense fields. Resume scanning from last_field_index + * since fields are usually in order. */ + int last = *last_field_index; + for (idx = last; idx < l->field_count; idx++) { + if (l->fields[idx].number == field_number) { + goto found; + } + } + + for (idx = 0; idx < last; idx++) { + if (l->fields[idx].number == field_number) { + goto found; + } + } + } + + if (l->ext == _UPB_MSGEXT_EXTENDABLE && d->extreg) { + const upb_msglayout_ext *ext = _upb_extreg_get(d->extreg, l, field_number); + if (ext) return &ext->field; + } + + return &none; /* Unknown field. */ + + found: + UPB_ASSERT(l->fields[idx].number == field_number); + *last_field_index = idx; + return &l->fields[idx]; + } + +UPB_FORCEINLINE +static const char *decode_wireval(upb_decstate *d, const char *ptr, + const upb_msglayout_field *field, + int wire_type, wireval *val, int *op) { + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: + ptr = decode_varint64(d, ptr, &val->uint64_val); + *op = varint_ops[field->descriptortype]; + decode_munge(field->descriptortype, val); + return ptr; + case UPB_WIRE_TYPE_32BIT: + memcpy(&val->uint32_val, ptr, 4); + val->uint32_val = _upb_be_swap32(val->uint32_val); + *op = OP_SCALAR_LG2(2); + if (((1 << field->descriptortype) & fixed32_ok) == 0) *op = OP_UNKNOWN; + return ptr + 4; + case UPB_WIRE_TYPE_64BIT: + memcpy(&val->uint64_val, ptr, 8); + val->uint64_val = _upb_be_swap64(val->uint64_val); + *op = OP_SCALAR_LG2(3); + if (((1 << field->descriptortype) & fixed64_ok) == 0) *op = OP_UNKNOWN; + return ptr + 8; + case UPB_WIRE_TYPE_DELIMITED: { + int ndx = field->descriptortype; + uint64_t size; + if (_upb_getmode(field) == _UPB_MODE_ARRAY) ndx += 18; + ptr = decode_varint64(d, ptr, &size); + if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) { + decode_err(d); /* Length overflow. */ + } + *op = delim_ops[ndx]; + val->size = size; + return ptr; + } + case UPB_WIRE_TYPE_START_GROUP: + val->uint32_val = field->number; + if (field->descriptortype == UPB_DTYPE_GROUP) { + op = OP_SUBMSG; + } else { + if (layout->ext == _UPB_MSGEXT_MSET && + field_number == _UPB_MSET_ITEM && d->extreg) { + ptr = decode_mset_item(d, ptr, msg, layout); + goto success; + } + op = OP_UNKNOWN; + } + return ptr; + default: + decode_err(d); + } +} + +UPB_FORCEINLINE +static const char *decode_known(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *layout, + const upb_msglayout_field *field, int op, + wireval *val) { + const upb_msglayout_sub *subs = layout->subs; + uint8_t mode = field->mode; + + if (UPB_UNLIKELY(mode & _UPB_MODE_IS_EXTENSION)) { + const upb_msglayout_ext *ext_layout = (const upb_msglayout_ext*)field; + upb_msg_ext *ext = _upb_msg_getorcreateext(msg, ext_layout, &d->arena); + if (UPB_UNLIKELY(!ext)) decode_err(d); + msg = &ext->data; + subs = &ext->ext->sub; + } + + switch (mode & _UPB_MODE_MASK) { + case _UPB_MODE_ARRAY: + return decode_toarray(d, ptr, msg, subs, field, val, op); + case _UPB_MODE_MAP: + return decode_tomap(d, ptr, msg, subs, field, val); + case _UPB_MODE_SCALAR: + return decode_tomsg(d, ptr, msg, subs, field, val, op); + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static const char *decode_unknown(upb_decstate *d, const char *ptr, + upb_msg *msg, int field_number, int wire_type, + wireval val, const char **field_start) { + if (field_number == 0) decode_err(d); + + if (layout && UPB_UNLIKELY(layout->ext == _UPB_MSGEXT_MSET_ITEM)) { + /* MessageSet handling. */ + switch (field_number) { + case _UPB_MSET_TYPEID: { + if (wire_type != UPB_WIRE_TYPE_VARINT) break; + const upb_msglayout_ext *ext = _upb_extreg_get( + d->extreg, layout->subs[0].submsg, val.uint64_val); + if (!ext) break; + ((upb_msglayout*)layout)->fields = &ext->field; + goto success; + } + case _UPB_MSET_MESSAGE: + if (wire_type != UPB_WIRE_TYPE_DELIMITED) break; + if (layout->fields) { + // We saw type_id previously and succeeded in looking up msg. + field = layout->fields; + op = OP_SUBMSG; + goto have_field; + } else { + // TODO: out of order MessageSet. + // This is a very rare case: all serializers will emit in-order + // MessageSets. To hit this case there has to be some kind of + // re-ordering proxy. We should eventually handle this case, but + // not today. + } + break; + } + } + + if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size; + if (msg) { + if (wire_type == UPB_WIRE_TYPE_START_GROUP) { + d->unknown = *field_start; + d->unknown_msg = msg; + ptr = decode_group(d, ptr, NULL, NULL, field_number); + d->unknown_msg = NULL; + *field_start = d->unknown; + } + if (!_upb_msg_addunknown(msg, *field_start, ptr - *field_start, + &d->arena)) { + decode_err(d); + } + } else if (wire_type == UPB_WIRE_TYPE_START_GROUP) { + ptr = decode_group(d, ptr, NULL, NULL, field_number); + } + return ptr; +} + UPB_NOINLINE static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, const upb_msglayout *layout) { @@ -646,140 +780,20 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, field_number = tag >> 3; wire_type = tag & 7; - field = upb_find_field(d, layout, field_number, &last_field_index); + field = decode_findfield(d, layout, field_number, &last_field_index); - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: - ptr = decode_varint64(d, ptr, &val.uint64_val); - op = varint_ops[field->descriptortype]; - decode_munge(field->descriptortype, &val); - break; - case UPB_WIRE_TYPE_32BIT: - memcpy(&val.uint32_val, ptr, 4); - val.uint32_val = _upb_be_swap32(val.uint32_val); - ptr += 4; - op = OP_SCALAR_LG2(2); - if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_64BIT: - memcpy(&val.uint64_val, ptr, 8); - val.uint64_val = _upb_be_swap64(val.uint64_val); - ptr += 8; - op = OP_SCALAR_LG2(3); - if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_DELIMITED: { - int ndx = field->descriptortype; - uint64_t size; - if (_upb_getmode(field) == _UPB_MODE_ARRAY) ndx += 18; - ptr = decode_varint64(d, ptr, &size); - if (size >= INT32_MAX || - ptr - d->end + (int32_t)size > d->limit) { - decode_err(d); /* Length overflow. */ - } - op = delim_ops[ndx]; - val.size = size; - break; - } - case UPB_WIRE_TYPE_START_GROUP: - val.uint32_val = field_number; - if (field->descriptortype == UPB_DTYPE_GROUP) { - op = OP_SUBMSG; - } else { - if (layout->ext == _UPB_MSGEXT_MSET && - field_number == _UPB_MSET_ITEM && d->extreg) { - ptr = decode_mset_item(d, ptr, msg, layout); - goto success; - } - op = OP_UNKNOWN; - } - break; - case UPB_WIRE_TYPE_END_GROUP: - d->end_group = field_number; - return ptr; - default: - decode_err(d); + if (wire_type == UPB_WIRE_TYPE_END_GROUP) { + d->end_group = field_number; + return ptr; } - have_field: - if (op >= 0) { - /* Known field, possibly an extension. */ - upb_msg *field_msg = msg; - const upb_msglayout_sub *subs = layout->subs; - uint8_t mode = field->mode; - - if (UPB_UNLIKELY(mode & _UPB_MODE_IS_EXTENSION)) { - const upb_msglayout_ext *ext_layout = (const upb_msglayout_ext*)field; - upb_msg_ext *ext = _upb_msg_getorcreateext(msg, ext_layout, &d->arena); - if (UPB_UNLIKELY(!ext)) decode_err(d); - field_msg = &ext->data; - subs = &ext->ext->sub; - } + ptr = decode_wireval(d, ptr, field, wire_type, &val, &op); - /* Parse, using op for dispatch. */ - switch (mode & _UPB_MODE_MASK) { - case _UPB_MODE_ARRAY: - ptr = decode_toarray(d, ptr, field_msg, subs, field, &val, op); - break; - case _UPB_MODE_MAP: - ptr = decode_tomap(d, ptr, field_msg, subs, field, &val); - break; - case _UPB_MODE_SCALAR: - ptr = decode_tomsg(d, ptr, field_msg, subs, field, &val, op); - break; - default: - UPB_UNREACHABLE(); - } + if (op >= 0) { + ptr = decode_known(d, ptr, msg, layout, field, op, &val); } else { - unknown: - /* Skip unknown field. */ - if (field_number == 0) decode_err(d); - - if (layout && UPB_UNLIKELY(layout->ext == _UPB_MSGEXT_MSET_ITEM)) { - /* MessageSet handling. */ - switch (field_number) { - case _UPB_MSET_TYPEID: { - if (wire_type != UPB_WIRE_TYPE_VARINT) break; - const upb_msglayout_ext *ext = _upb_extreg_get( - d->extreg, layout->subs[0].submsg, val.uint64_val); - if (!ext) break; - ((upb_msglayout*)layout)->fields = &ext->field; - goto success; - } - case _UPB_MSET_MESSAGE: - if (wire_type != UPB_WIRE_TYPE_DELIMITED) break; - if (layout->fields) { - // We saw type_id previously and succeeded in looking up msg. - field = layout->fields; - op = OP_SUBMSG; - goto have_field; - } else { - // TODO: out of order MessageSet. - // This is a very rare case: all serializers will emit in-order - // MessageSets. To hit this case there has to be some kind of - // re-ordering proxy. We should eventually handle this case, but - // not today. - } - break; - } - } - - if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size; - if (msg) { - if (wire_type == UPB_WIRE_TYPE_START_GROUP) { - d->unknown = field_start; - d->unknown_msg = msg; - ptr = decode_group(d, ptr, NULL, NULL, field_number); - d->unknown_msg = NULL; - field_start = d->unknown; - } - if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, - &d->arena)) { - decode_err(d); - } - } else if (wire_type == UPB_WIRE_TYPE_START_GROUP) { - ptr = decode_group(d, ptr, NULL, NULL, field_number); - } + ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val, + &field_start); } success: