Merge branch 'decodefunc' into msgset

pull/13171/head
Joshua Haberman 3 years ago
commit a98454cd3f
  1. 402
      upb/decode.c

@ -125,25 +125,25 @@ static const int8_t varint_ops[19] = {
static const int8_t delim_ops[37] = {
/* For non-repeated field type. */
OP_UNKNOWN, /* field not found */
OP_UNKNOWN, /* DOUBLE */
OP_UNKNOWN, /* FLOAT */
OP_UNKNOWN, /* INT64 */
OP_UNKNOWN, /* UINT64 */
OP_UNKNOWN, /* INT32 */
OP_UNKNOWN, /* FIXED64 */
OP_UNKNOWN, /* FIXED32 */
OP_UNKNOWN, /* BOOL */
OP_STRING, /* STRING */
OP_UNKNOWN, /* GROUP */
OP_SUBMSG, /* MESSAGE */
OP_BYTES, /* BYTES */
OP_UNKNOWN, /* UINT32 */
OP_UNKNOWN, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_UNKNOWN, /* SINT32 */
OP_UNKNOWN, /* SINT64 */
OP_UNKNOWN, /* field not found */
OP_UNKNOWN, /* DOUBLE */
OP_UNKNOWN, /* FLOAT */
OP_UNKNOWN, /* INT64 */
OP_UNKNOWN, /* UINT64 */
OP_UNKNOWN, /* INT32 */
OP_UNKNOWN, /* FIXED64 */
OP_UNKNOWN, /* FIXED32 */
OP_UNKNOWN, /* BOOL */
OP_STRING, /* STRING */
OP_UNKNOWN, /* GROUP */
OP_SUBMSG, /* MESSAGE */
OP_BYTES, /* BYTES */
OP_UNKNOWN, /* UINT32 */
OP_UNKNOWN, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_UNKNOWN, /* SINT32 */
OP_UNKNOWN, /* SINT64 */
/* For repeated field type. */
OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
@ -299,50 +299,6 @@ static void decode_munge(int type, wireval *val) {
}
}
static const upb_msglayout_field *upb_find_field(upb_decstate *d,
const upb_msglayout *l,
uint32_t field_number,
int *last_field_index) {
static upb_msglayout_field none = {0, 0, 0, 0, 0, 0};
if (l == NULL) return &none;
size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
if (idx < l->dense_below) {
/* Fastest case: index into dense fields. */
goto found;
}
if (l->dense_below < l->field_count) {
/* Linear search non-dense fields. Resume scanning from last_field_index
* since fields are usually in order. */
int last = *last_field_index;
for (idx = last; idx < l->field_count; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
for (idx = 0; idx < last; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
}
if (l->ext == _UPB_MSGEXT_EXTENDABLE && d->extreg) {
const upb_msglayout_ext *ext = _upb_extreg_get(d->extreg, l, field_number);
if (ext) return &ext->field;
}
return &none; /* Unknown field. */
found:
UPB_ASSERT(l->fields[idx].number == field_number);
*last_field_index = idx;
return &l->fields[idx];
}
static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout_sub *subs,
const upb_msglayout_field *field) {
const upb_msglayout *subl = subs[field->submsg_index].submsg;
@ -609,8 +565,9 @@ static bool decode_tryfastdispatch(upb_decstate *d, const char **ptr,
return false;
}
static const char *decode_mset_item(upb_decstate *d, const char *ptr,
upb_msg *msg, const upb_msglayout *layout) {
static const char *decode_msgset_item(upb_decstate *d, const char *ptr,
upb_msg *msg,
const upb_msglayout *layout) {
// We create a temporary upb_msglayout here and abuse its fields as temporary
// storage, to avoid creating lots of MessageSet-specific parsing code-paths:
// 1. We store 'layout' in item_layout.subs. We will need this later as
@ -628,6 +585,183 @@ static const char *decode_mset_item(upb_decstate *d, const char *ptr,
return decode_group(d, ptr, msg, &item_layout, 1);
}
static const upb_msglayout_field *decode_findfield(upb_decstate *d,
const upb_msglayout *l,
uint32_t field_number,
int *last_field_index) {
static upb_msglayout_field none = {0, 0, 0, 0, 0, 0};
if (l == NULL) return &none;
size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
if (idx < l->dense_below) {
/* Fastest case: index into dense fields. */
goto found;
}
if (l->dense_below < l->field_count) {
/* Linear search non-dense fields. Resume scanning from last_field_index
* since fields are usually in order. */
int last = *last_field_index;
for (idx = last; idx < l->field_count; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
for (idx = 0; idx < last; idx++) {
if (l->fields[idx].number == field_number) {
goto found;
}
}
}
if (l->ext == _UPB_MSGEXT_EXTENDABLE && d->extreg) {
const upb_msglayout_ext *ext = _upb_extreg_get(d->extreg, l, field_number);
if (ext) return &ext->field;
}
return &none; /* Unknown field. */
found:
UPB_ASSERT(l->fields[idx].number == field_number);
*last_field_index = idx;
return &l->fields[idx];
}
UPB_FORCEINLINE
static const char *decode_wireval(upb_decstate *d, const char *ptr,
const upb_msglayout_field *field,
int wire_type, wireval *val, int *op) {
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
ptr = decode_varint64(d, ptr, &val->uint64_val);
*op = varint_ops[field->descriptortype];
decode_munge(field->descriptortype, val);
return ptr;
case UPB_WIRE_TYPE_32BIT:
memcpy(&val->uint32_val, ptr, 4);
val->uint32_val = _upb_be_swap32(val->uint32_val);
*op = OP_SCALAR_LG2(2);
if (((1 << field->descriptortype) & fixed32_ok) == 0) *op = OP_UNKNOWN;
return ptr + 4;
case UPB_WIRE_TYPE_64BIT:
memcpy(&val->uint64_val, ptr, 8);
val->uint64_val = _upb_be_swap64(val->uint64_val);
*op = OP_SCALAR_LG2(3);
if (((1 << field->descriptortype) & fixed64_ok) == 0) *op = OP_UNKNOWN;
return ptr + 8;
case UPB_WIRE_TYPE_DELIMITED: {
int ndx = field->descriptortype;
uint64_t size;
if (_upb_getmode(field) == _UPB_MODE_ARRAY) ndx += 18;
ptr = decode_varint64(d, ptr, &size);
if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) {
decode_err(d); /* Length overflow. */
}
*op = delim_ops[ndx];
val->size = size;
return ptr;
}
case UPB_WIRE_TYPE_START_GROUP:
val->uint32_val = field->number;
if (field->descriptortype == UPB_DTYPE_GROUP) {
op = OP_SUBMSG;
} else {
if (layout->ext == _UPB_MSGEXT_MSET &&
field_number == _UPB_MSET_ITEM && d->extreg) {
ptr = decode_mset_item(d, ptr, msg, layout);
goto success;
}
op = OP_UNKNOWN;
}
return ptr;
default:
decode_err(d);
}
}
UPB_FORCEINLINE
static const char *decode_known(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout,
const upb_msglayout_field *field, int op,
wireval *val) {
const upb_msglayout_sub *subs = layout->subs;
uint8_t mode = field->mode;
if (UPB_UNLIKELY(mode & _UPB_MODE_IS_EXTENSION)) {
const upb_msglayout_ext *ext_layout = (const upb_msglayout_ext*)field;
upb_msg_ext *ext = _upb_msg_getorcreateext(msg, ext_layout, &d->arena);
if (UPB_UNLIKELY(!ext)) decode_err(d);
msg = &ext->data;
subs = &ext->ext->sub;
}
switch (mode & _UPB_MODE_MASK) {
case _UPB_MODE_ARRAY:
return decode_toarray(d, ptr, msg, subs, field, val, op);
case _UPB_MODE_MAP:
return decode_tomap(d, ptr, msg, subs, field, val);
case _UPB_MODE_SCALAR:
return decode_tomsg(d, ptr, msg, subs, field, val, op);
default:
UPB_UNREACHABLE();
}
}
UPB_FORCEINLINE
static const char *decode_unknown(upb_decstate *d, const char *ptr,
upb_msg *msg, int field_number, int wire_type,
wireval val, const char **field_start) {
if (field_number == 0) decode_err(d);
if (layout && UPB_UNLIKELY(layout->ext == _UPB_MSGEXT_MSET_ITEM)) {
/* MessageSet handling. */
switch (field_number) {
case _UPB_MSET_TYPEID: {
if (wire_type != UPB_WIRE_TYPE_VARINT) break;
const upb_msglayout_ext *ext = _upb_extreg_get(
d->extreg, layout->subs[0].submsg, val.uint64_val);
if (!ext) break;
((upb_msglayout*)layout)->fields = &ext->field;
goto success;
}
case _UPB_MSET_MESSAGE:
if (wire_type != UPB_WIRE_TYPE_DELIMITED) break;
if (layout->fields) {
// We saw type_id previously and succeeded in looking up msg.
field = layout->fields;
op = OP_SUBMSG;
goto have_field;
} else {
// TODO: out of order MessageSet.
// This is a very rare case: all serializers will emit in-order
// MessageSets. To hit this case there has to be some kind of
// re-ordering proxy. We should eventually handle this case, but
// not today.
}
break;
}
}
if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
if (msg) {
if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
d->unknown = *field_start;
d->unknown_msg = msg;
ptr = decode_group(d, ptr, NULL, NULL, field_number);
d->unknown_msg = NULL;
*field_start = d->unknown;
}
if (!_upb_msg_addunknown(msg, *field_start, ptr - *field_start,
&d->arena)) {
decode_err(d);
}
} else if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
ptr = decode_group(d, ptr, NULL, NULL, field_number);
}
return ptr;
}
UPB_NOINLINE
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout) {
@ -646,140 +780,20 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
field_number = tag >> 3;
wire_type = tag & 7;
field = upb_find_field(d, layout, field_number, &last_field_index);
field = decode_findfield(d, layout, field_number, &last_field_index);
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT:
ptr = decode_varint64(d, ptr, &val.uint64_val);
op = varint_ops[field->descriptortype];
decode_munge(field->descriptortype, &val);
break;
case UPB_WIRE_TYPE_32BIT:
memcpy(&val.uint32_val, ptr, 4);
val.uint32_val = _upb_be_swap32(val.uint32_val);
ptr += 4;
op = OP_SCALAR_LG2(2);
if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
break;
case UPB_WIRE_TYPE_64BIT:
memcpy(&val.uint64_val, ptr, 8);
val.uint64_val = _upb_be_swap64(val.uint64_val);
ptr += 8;
op = OP_SCALAR_LG2(3);
if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
break;
case UPB_WIRE_TYPE_DELIMITED: {
int ndx = field->descriptortype;
uint64_t size;
if (_upb_getmode(field) == _UPB_MODE_ARRAY) ndx += 18;
ptr = decode_varint64(d, ptr, &size);
if (size >= INT32_MAX ||
ptr - d->end + (int32_t)size > d->limit) {
decode_err(d); /* Length overflow. */
}
op = delim_ops[ndx];
val.size = size;
break;
}
case UPB_WIRE_TYPE_START_GROUP:
val.uint32_val = field_number;
if (field->descriptortype == UPB_DTYPE_GROUP) {
op = OP_SUBMSG;
} else {
if (layout->ext == _UPB_MSGEXT_MSET &&
field_number == _UPB_MSET_ITEM && d->extreg) {
ptr = decode_mset_item(d, ptr, msg, layout);
goto success;
}
op = OP_UNKNOWN;
}
break;
case UPB_WIRE_TYPE_END_GROUP:
d->end_group = field_number;
return ptr;
default:
decode_err(d);
if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
d->end_group = field_number;
return ptr;
}
have_field:
if (op >= 0) {
/* Known field, possibly an extension. */
upb_msg *field_msg = msg;
const upb_msglayout_sub *subs = layout->subs;
uint8_t mode = field->mode;
if (UPB_UNLIKELY(mode & _UPB_MODE_IS_EXTENSION)) {
const upb_msglayout_ext *ext_layout = (const upb_msglayout_ext*)field;
upb_msg_ext *ext = _upb_msg_getorcreateext(msg, ext_layout, &d->arena);
if (UPB_UNLIKELY(!ext)) decode_err(d);
field_msg = &ext->data;
subs = &ext->ext->sub;
}
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
/* Parse, using op for dispatch. */
switch (mode & _UPB_MODE_MASK) {
case _UPB_MODE_ARRAY:
ptr = decode_toarray(d, ptr, field_msg, subs, field, &val, op);
break;
case _UPB_MODE_MAP:
ptr = decode_tomap(d, ptr, field_msg, subs, field, &val);
break;
case _UPB_MODE_SCALAR:
ptr = decode_tomsg(d, ptr, field_msg, subs, field, &val, op);
break;
default:
UPB_UNREACHABLE();
}
if (op >= 0) {
ptr = decode_known(d, ptr, msg, layout, field, op, &val);
} else {
unknown:
/* Skip unknown field. */
if (field_number == 0) decode_err(d);
if (layout && UPB_UNLIKELY(layout->ext == _UPB_MSGEXT_MSET_ITEM)) {
/* MessageSet handling. */
switch (field_number) {
case _UPB_MSET_TYPEID: {
if (wire_type != UPB_WIRE_TYPE_VARINT) break;
const upb_msglayout_ext *ext = _upb_extreg_get(
d->extreg, layout->subs[0].submsg, val.uint64_val);
if (!ext) break;
((upb_msglayout*)layout)->fields = &ext->field;
goto success;
}
case _UPB_MSET_MESSAGE:
if (wire_type != UPB_WIRE_TYPE_DELIMITED) break;
if (layout->fields) {
// We saw type_id previously and succeeded in looking up msg.
field = layout->fields;
op = OP_SUBMSG;
goto have_field;
} else {
// TODO: out of order MessageSet.
// This is a very rare case: all serializers will emit in-order
// MessageSets. To hit this case there has to be some kind of
// re-ordering proxy. We should eventually handle this case, but
// not today.
}
break;
}
}
if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
if (msg) {
if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
d->unknown = field_start;
d->unknown_msg = msg;
ptr = decode_group(d, ptr, NULL, NULL, field_number);
d->unknown_msg = NULL;
field_start = d->unknown;
}
if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
&d->arena)) {
decode_err(d);
}
} else if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
ptr = decode_group(d, ptr, NULL, NULL, field_number);
}
ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val,
&field_start);
}
success:

Loading…
Cancel
Save