Expanded the test to cover packed fields also.

pull/13171/head
Joshua Haberman 3 years ago
parent c755099a89
commit 7907ed913b
  1. 2
      benchmarks/compare.py
  2. 190
      upb/decode.c
  3. 52
      upb/def.c
  4. 34
      upb/msg_test.cc
  5. 4
      upb/msg_test.proto

@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
#
# Copyright (c) 2009-2021, Google LLC
# All rights reserved.

@ -101,12 +101,14 @@ static const unsigned FIXED64_OK_MASK = (1 << UPB_DTYPE_DOUBLE) |
#define OP_MSGSET_ITEM -2
#define OP_MSGSET_TYPEID -3
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_ENUM 1
#define OP_STRING 4
#define OP_BYTES 5
#define OP_SUBMSG 6
/* Ops above are scalar-only. Repeated fields can use any op. */
/* Scalar fields use only ops above. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
#define OP_PACKED_ENUM 13
static const int8_t varint_ops[] = {
OP_UNKNOWN, /* field not found */
@ -123,7 +125,7 @@ static const int8_t varint_ops[] = {
OP_UNKNOWN, /* MESSAGE */
OP_UNKNOWN, /* BYTES */
OP_SCALAR_LG2(2), /* UINT32 */
OP_SCALAR_LG2(2), /* ENUM */
OP_ENUM, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */
OP_SCALAR_LG2(2), /* SINT32 */
@ -169,7 +171,7 @@ static const int8_t delim_ops[] = {
OP_SUBMSG, /* REPEATED MESSAGE */
OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_VARPCK_LG2(2), /* REPEATED ENUM */
OP_PACKED_ENUM, /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
OP_VARPCK_LG2(2), /* REPEATED SINT32 */
@ -384,6 +386,134 @@ static const char *decode_togroup(upb_decstate *d, const char *ptr,
return decode_group(d, ptr, submsg, subl, field->number);
}
static char *encode_varint32(uint32_t val, char *ptr) {
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
*(ptr++) = byte;
} while (val);
return ptr;
}
UPB_NOINLINE
static bool decode_checkenum_slow(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, uint32_t v) {
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if ((uint32_t)e->values[i] == v) return true;
}
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
// just re-encode the tag here.
char buf[20];
char *end = buf;
uint32_t tag = ((uint32_t)field->number << 3) | UPB_WIRE_TYPE_VARINT;
end = encode_varint32(tag, end);
end = encode_varint32(v, end);
if (!_upb_msg_addunknown(msg, buf, end - buf, &d->arena)) {
decode_err(d);
}
return false;
}
UPB_FORCEINLINE
static bool decode_checkenum(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, wireval *val) {
uint32_t v = val->uint32_val;
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
return decode_checkenum_slow(d, ptr, msg, e, field, v);
}
UPB_NOINLINE
static const char *decode_enum_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
arr->len++;
memcpy(mem, val, 4);
return ptr;
}
#include <stdio.h>
UPB_FORCEINLINE
static const char *decode_fixed_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, ptr, val->size); /* XXX: ptr boundary. */
return ptr + val->size;
}
UPB_FORCEINLINE
static const char *decode_varint_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
UPB_NOINLINE
static const char *decode_enum_packed(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
continue;
}
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
}
arr->len++;
memcpy(out, &elem, 4);
out += 4;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
static const char *decode_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg,
const upb_msglayout_sub *subs,
@ -433,42 +563,18 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
}
}
case OP_FIXPCK_LG2(2):
case OP_FIXPCK_LG2(3): {
/* Fixed packed. */
int lg2 = op - OP_FIXPCK_LG2(0);
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, ptr, val->size); /* XXX: ptr boundary. */
return ptr + val->size;
}
case OP_FIXPCK_LG2(3):
return decode_fixed_packed(d, ptr, arr, val, field,
op - OP_FIXPCK_LG2(0));
case OP_VARPCK_LG2(0):
case OP_VARPCK_LG2(2):
case OP_VARPCK_LG2(3): {
/* Varint packed. */
int lg2 = op - OP_VARPCK_LG2(0);
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
case OP_VARPCK_LG2(3):
return decode_varint_packed(d, ptr, arr, val, field,
op - OP_VARPCK_LG2(0));
case OP_ENUM:
return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
case OP_PACKED_ENUM:
return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
default:
UPB_UNREACHABLE();
}
@ -516,6 +622,12 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
void *mem = UPB_PTR_AT(msg, field->offset, void);
int type = field->descriptortype;
if (UPB_UNLIKELY(op == OP_ENUM) &&
!decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
val)) {
return ptr;
}
/* Set presence if necessary. */
if (field->presence > 0) {
_upb_sethas_field(msg, field);
@ -552,6 +664,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
case OP_SCALAR_LG2(3):
memcpy(mem, val, 8);
break;
case OP_ENUM:
case OP_SCALAR_LG2(2):
memcpy(mem, val, 4);
break;
@ -798,13 +911,12 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
field_number = tag >> 3;
wire_type = tag & 7;
field = decode_findfield(d, layout, field_number, &last_field_index);
if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
d->end_group = field_number;
return ptr;
}
field = decode_findfield(d, layout, field_number, &last_field_index);
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
if (op >= 0) {

@ -120,6 +120,7 @@ struct upb_msgdef {
struct upb_enumdef {
const google_protobuf_EnumOptions *opts;
const upb_enumlayout *layout; // Only for proto2.
const upb_filedef *file;
const upb_msgdef *containing_type; // Could be merged with "file".
const char *full_name;
@ -1664,6 +1665,11 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
if (upb_fielddef_issubmsg(f)) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].submsg = upb_fielddef_msgsubdef(f)->layout;
} else if (upb_fielddef_type(f) == UPB_TYPE_ENUM &&
f->file->syntax == UPB_SYNTAX_PROTO2) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].subenum = upb_fielddef_enumsubdef(f)->layout;
UPB_ASSERT(subs[field->submsg_index].subenum);
}
if (upb_fielddef_label(f) == UPB_LABEL_REQUIRED) {
@ -2357,6 +2363,40 @@ static void create_service(
}
}
upb_enumlayout *create_enumlayout(symtab_addctx *ctx, const upb_enumdef *e) {
int n = 0;
uint64_t mask = 0;
for (int i = 0; i < e->value_count; i++) {
uint32_t val = (uint32_t)e->values[i].number;
if (val < 64) {
mask |= 1 << val;
} else {
n++;
}
}
int32_t *values = symtab_alloc(ctx, sizeof(*values) * n);
int32_t *p = values;
for (int i = 0; i < e->value_count; i++) {
int32_t val = e->values[i].number;
if ((uint32_t)val >= 64) {
*p++ = val;
}
}
UPB_ASSERT(p == values + n);
UPB_ASSERT(upb_inttable_count(&e->iton) == n + __builtin_popcountll(mask));
upb_enumlayout *layout = symtab_alloc(ctx, sizeof(*layout));
layout->value_count = n;
layout->mask = mask;
layout->values = values;
return layout;
}
static void create_enumdef(
symtab_addctx *ctx, const char *prefix,
const google_protobuf_EnumDescriptorProto *enum_proto,
@ -2418,6 +2458,18 @@ static void create_enumdef(
}
upb_inttable_compact(&e->iton, ctx->arena);
if (e->file->syntax == UPB_SYNTAX_PROTO2) {
if (ctx->layout) {
UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
e->layout = ctx->layout->enums[ctx->enum_count++];
UPB_ASSERT(n == e->layout->value_count + __builtin_popcountll(e->layout->mask));
} else {
e->layout = create_enumlayout(ctx, e);
}
} else {
e->layout = NULL;
}
}
static void create_msgdef(symtab_addctx *ctx, const char *prefix,

@ -100,7 +100,9 @@ TEST(MessageTest, Extensions) {
}
void VerifyMessageSet(const upb_test_TestMessageSet *mset_msg) {
EXPECT_TRUE(upb_test_MessageSetMember_has_message_set_extension(mset_msg));
bool has = upb_test_MessageSetMember_has_message_set_extension(mset_msg);
EXPECT_TRUE(has);
if (!has) return;
const upb_test_MessageSetMember *member =
upb_test_MessageSetMember_message_set_extension(mset_msg);
EXPECT_TRUE(member != nullptr);
@ -159,6 +161,7 @@ TEST(MessageTest, Proto2Enum) {
upb_test_Proto2FakeEnumMessage *fake_msg = upb_test_Proto2FakeEnumMessage_new(arena.ptr());
upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999);
int32_t *vals = upb_test_Proto2FakeEnumMessage_resize_repeated_enum(
fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO;
@ -167,13 +170,21 @@ TEST(MessageTest, Proto2Enum) {
vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
upb_test_Proto2FakeEnumMessage_int32_enum_map_set(fake_msg, 5, 777,
arena.ptr());
vals = upb_test_Proto2FakeEnumMessage_resize_packed_enum(
fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO;
vals[1] = 7; // Unknown small.
vals[2] = upb_test_Proto2EnumMessage_SMALL;
vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
size_t size;
char *pb =
upb_test_Proto2FakeEnumMessage_serialize(fake_msg, arena.ptr(), &size);
// Parsing as enums puts unknown values into unknown fields.
upb_test_Proto2EnumMessage *enum_msg =
upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr());
ASSERT_TRUE(enum_msg != nullptr);
@ -183,14 +194,15 @@ TEST(MessageTest, Proto2Enum) {
upb_test_Proto2EnumMessage_repeated_enum(enum_msg, &size);
EXPECT_EQ(4, size); // Two unknown values moved to the unknown field set.
// Parsing back into the fake message shows the original data, except the
// repeated enum is rearranged.
pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size);
upb_test_Proto2FakeEnumMessage *fake_msg2 =
upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr());
EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2));
vals_const =
upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_EQ(999, upb_test_Proto2FakeEnumMessage_optional_enum(fake_msg2));
int32_t expected[] = {
upb_test_Proto2EnumMessage_ZERO,
upb_test_Proto2EnumMessage_SMALL,
@ -199,6 +211,16 @@ TEST(MessageTest, Proto2Enum) {
7,
888,
};
vals_const =
upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected));
vals_const =
upb_test_Proto2FakeEnumMessage_packed_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected));
}

@ -69,7 +69,7 @@ message Proto2EnumMessage {
optional Proto2TestEnum optional_enum = 1;
repeated Proto2TestEnum repeated_enum = 2;
map<int32, Proto2TestEnum> int32_enum_map = 3;
repeated Proto2TestEnum packed_enum = 3 [packed = true];
}
// The same fields as Proto2EnumMessage, but with int32 fields so we can fake
@ -77,5 +77,5 @@ message Proto2EnumMessage {
message Proto2FakeEnumMessage {
optional int32 optional_enum = 1;
repeated int32 repeated_enum = 2;
map<int32, int32> int32_enum_map = 3;
repeated int32 packed_enum = 3 [packed = true];
}

Loading…
Cancel
Save