Expanded the test to cover packed fields also.

pull/13171/head
Joshua Haberman 3 years ago
parent c755099a89
commit 7907ed913b
  1. 2
      benchmarks/compare.py
  2. 190
      upb/decode.c
  3. 52
      upb/def.c
  4. 34
      upb/msg_test.cc
  5. 4
      upb/msg_test.proto

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/python3
# #
# Copyright (c) 2009-2021, Google LLC # Copyright (c) 2009-2021, Google LLC
# All rights reserved. # All rights reserved.

@ -101,12 +101,14 @@ static const unsigned FIXED64_OK_MASK = (1 << UPB_DTYPE_DOUBLE) |
#define OP_MSGSET_ITEM -2 #define OP_MSGSET_ITEM -2
#define OP_MSGSET_TYPEID -3 #define OP_MSGSET_TYPEID -3
#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */ #define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
#define OP_ENUM 1
#define OP_STRING 4 #define OP_STRING 4
#define OP_BYTES 5 #define OP_BYTES 5
#define OP_SUBMSG 6 #define OP_SUBMSG 6
/* Ops above are scalar-only. Repeated fields can use any op. */ /* Scalar fields use only ops above. Repeated fields can use any op. */
#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ #define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ #define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
#define OP_PACKED_ENUM 13
static const int8_t varint_ops[] = { static const int8_t varint_ops[] = {
OP_UNKNOWN, /* field not found */ OP_UNKNOWN, /* field not found */
@ -123,7 +125,7 @@ static const int8_t varint_ops[] = {
OP_UNKNOWN, /* MESSAGE */ OP_UNKNOWN, /* MESSAGE */
OP_UNKNOWN, /* BYTES */ OP_UNKNOWN, /* BYTES */
OP_SCALAR_LG2(2), /* UINT32 */ OP_SCALAR_LG2(2), /* UINT32 */
OP_SCALAR_LG2(2), /* ENUM */ OP_ENUM, /* ENUM */
OP_UNKNOWN, /* SFIXED32 */ OP_UNKNOWN, /* SFIXED32 */
OP_UNKNOWN, /* SFIXED64 */ OP_UNKNOWN, /* SFIXED64 */
OP_SCALAR_LG2(2), /* SINT32 */ OP_SCALAR_LG2(2), /* SINT32 */
@ -169,7 +171,7 @@ static const int8_t delim_ops[] = {
OP_SUBMSG, /* REPEATED MESSAGE */ OP_SUBMSG, /* REPEATED MESSAGE */
OP_BYTES, /* REPEATED BYTES */ OP_BYTES, /* REPEATED BYTES */
OP_VARPCK_LG2(2), /* REPEATED UINT32 */ OP_VARPCK_LG2(2), /* REPEATED UINT32 */
OP_VARPCK_LG2(2), /* REPEATED ENUM */ OP_PACKED_ENUM, /* REPEATED ENUM */
OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
OP_VARPCK_LG2(2), /* REPEATED SINT32 */ OP_VARPCK_LG2(2), /* REPEATED SINT32 */
@ -384,6 +386,134 @@ static const char *decode_togroup(upb_decstate *d, const char *ptr,
return decode_group(d, ptr, submsg, subl, field->number); return decode_group(d, ptr, submsg, subl, field->number);
} }
static char *encode_varint32(uint32_t val, char *ptr) {
do {
uint8_t byte = val & 0x7fU;
val >>= 7;
if (val) byte |= 0x80U;
*(ptr++) = byte;
} while (val);
return ptr;
}
UPB_NOINLINE
static bool decode_checkenum_slow(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, uint32_t v) {
// OPT: binary search long lists?
int n = e->value_count;
for (int i = 0; i < n; i++) {
if ((uint32_t)e->values[i] == v) return true;
}
// Unrecognized enum goes into unknown fields.
// For packed fields the tag could be arbitrarily far in the past, so we
// just re-encode the tag here.
char buf[20];
char *end = buf;
uint32_t tag = ((uint32_t)field->number << 3) | UPB_WIRE_TYPE_VARINT;
end = encode_varint32(tag, end);
end = encode_varint32(v, end);
if (!_upb_msg_addunknown(msg, buf, end - buf, &d->arena)) {
decode_err(d);
}
return false;
}
UPB_FORCEINLINE
static bool decode_checkenum(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_enumlayout *e,
const upb_msglayout_field *field, wireval *val) {
uint32_t v = val->uint32_val;
if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
return decode_checkenum_slow(d, ptr, msg, e, field, v);
}
UPB_NOINLINE
static const char *decode_enum_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
arr->len++;
memcpy(mem, val, 4);
return ptr;
}
#include <stdio.h>
UPB_FORCEINLINE
static const char *decode_fixed_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
void *mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, ptr, val->size); /* XXX: ptr boundary. */
return ptr + val->size;
}
UPB_FORCEINLINE
static const char *decode_varint_packed(upb_decstate *d, const char *ptr,
upb_array *arr, wireval *val,
const upb_msglayout_field *field,
int lg2) {
int scale = 1 << lg2;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
UPB_NOINLINE
static const char *decode_enum_packed(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_array *arr,
const upb_msglayout_sub *subs,
const upb_msglayout_field *field,
wireval *val) {
const upb_enumlayout *e = subs[field->submsg_index].subenum;
int saved_limit = decode_pushlimit(d, ptr, val->size);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
while (!decode_isdone(d, &ptr)) {
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
continue;
}
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
}
arr->len++;
memcpy(out, &elem, 4);
out += 4;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
static const char *decode_toarray(upb_decstate *d, const char *ptr, static const char *decode_toarray(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_msg *msg,
const upb_msglayout_sub *subs, const upb_msglayout_sub *subs,
@ -433,42 +563,18 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
} }
} }
case OP_FIXPCK_LG2(2): case OP_FIXPCK_LG2(2):
case OP_FIXPCK_LG2(3): { case OP_FIXPCK_LG2(3):
/* Fixed packed. */ return decode_fixed_packed(d, ptr, arr, val, field,
int lg2 = op - OP_FIXPCK_LG2(0); op - OP_FIXPCK_LG2(0));
int mask = (1 << lg2) - 1;
size_t count = val->size >> lg2;
if ((val->size & mask) != 0) {
return decode_err(d); /* Length isn't a round multiple of elem size. */
}
decode_reserve(d, arr, count);
mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
arr->len += count;
memcpy(mem, ptr, val->size); /* XXX: ptr boundary. */
return ptr + val->size;
}
case OP_VARPCK_LG2(0): case OP_VARPCK_LG2(0):
case OP_VARPCK_LG2(2): case OP_VARPCK_LG2(2):
case OP_VARPCK_LG2(3): { case OP_VARPCK_LG2(3):
/* Varint packed. */ return decode_varint_packed(d, ptr, arr, val, field,
int lg2 = op - OP_VARPCK_LG2(0); op - OP_VARPCK_LG2(0));
int scale = 1 << lg2; case OP_ENUM:
int saved_limit = decode_pushlimit(d, ptr, val->size); return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); case OP_PACKED_ENUM:
while (!decode_isdone(d, &ptr)) { return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
wireval elem;
ptr = decode_varint64(d, ptr, &elem.uint64_val);
decode_munge(field->descriptortype, &elem);
if (decode_reserve(d, arr, 1)) {
out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
}
arr->len++;
memcpy(out, &elem, scale);
out += scale;
}
decode_poplimit(d, ptr, saved_limit);
return ptr;
}
default: default:
UPB_UNREACHABLE(); UPB_UNREACHABLE();
} }
@ -516,6 +622,12 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
void *mem = UPB_PTR_AT(msg, field->offset, void); void *mem = UPB_PTR_AT(msg, field->offset, void);
int type = field->descriptortype; int type = field->descriptortype;
if (UPB_UNLIKELY(op == OP_ENUM) &&
!decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
val)) {
return ptr;
}
/* Set presence if necessary. */ /* Set presence if necessary. */
if (field->presence > 0) { if (field->presence > 0) {
_upb_sethas_field(msg, field); _upb_sethas_field(msg, field);
@ -552,6 +664,7 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
case OP_SCALAR_LG2(3): case OP_SCALAR_LG2(3):
memcpy(mem, val, 8); memcpy(mem, val, 8);
break; break;
case OP_ENUM:
case OP_SCALAR_LG2(2): case OP_SCALAR_LG2(2):
memcpy(mem, val, 4); memcpy(mem, val, 4);
break; break;
@ -798,13 +911,12 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
field_number = tag >> 3; field_number = tag >> 3;
wire_type = tag & 7; wire_type = tag & 7;
field = decode_findfield(d, layout, field_number, &last_field_index);
if (wire_type == UPB_WIRE_TYPE_END_GROUP) { if (wire_type == UPB_WIRE_TYPE_END_GROUP) {
d->end_group = field_number; d->end_group = field_number;
return ptr; return ptr;
} }
field = decode_findfield(d, layout, field_number, &last_field_index);
ptr = decode_wireval(d, ptr, field, wire_type, &val, &op); ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
if (op >= 0) { if (op >= 0) {

@ -120,6 +120,7 @@ struct upb_msgdef {
struct upb_enumdef { struct upb_enumdef {
const google_protobuf_EnumOptions *opts; const google_protobuf_EnumOptions *opts;
const upb_enumlayout *layout; // Only for proto2.
const upb_filedef *file; const upb_filedef *file;
const upb_msgdef *containing_type; // Could be merged with "file". const upb_msgdef *containing_type; // Could be merged with "file".
const char *full_name; const char *full_name;
@ -1664,6 +1665,11 @@ static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) {
if (upb_fielddef_issubmsg(f)) { if (upb_fielddef_issubmsg(f)) {
field->submsg_index = sublayout_count++; field->submsg_index = sublayout_count++;
subs[field->submsg_index].submsg = upb_fielddef_msgsubdef(f)->layout; subs[field->submsg_index].submsg = upb_fielddef_msgsubdef(f)->layout;
} else if (upb_fielddef_type(f) == UPB_TYPE_ENUM &&
f->file->syntax == UPB_SYNTAX_PROTO2) {
field->submsg_index = sublayout_count++;
subs[field->submsg_index].subenum = upb_fielddef_enumsubdef(f)->layout;
UPB_ASSERT(subs[field->submsg_index].subenum);
} }
if (upb_fielddef_label(f) == UPB_LABEL_REQUIRED) { if (upb_fielddef_label(f) == UPB_LABEL_REQUIRED) {
@ -2357,6 +2363,40 @@ static void create_service(
} }
} }
upb_enumlayout *create_enumlayout(symtab_addctx *ctx, const upb_enumdef *e) {
int n = 0;
uint64_t mask = 0;
for (int i = 0; i < e->value_count; i++) {
uint32_t val = (uint32_t)e->values[i].number;
if (val < 64) {
mask |= 1 << val;
} else {
n++;
}
}
int32_t *values = symtab_alloc(ctx, sizeof(*values) * n);
int32_t *p = values;
for (int i = 0; i < e->value_count; i++) {
int32_t val = e->values[i].number;
if ((uint32_t)val >= 64) {
*p++ = val;
}
}
UPB_ASSERT(p == values + n);
UPB_ASSERT(upb_inttable_count(&e->iton) == n + __builtin_popcountll(mask));
upb_enumlayout *layout = symtab_alloc(ctx, sizeof(*layout));
layout->value_count = n;
layout->mask = mask;
layout->values = values;
return layout;
}
static void create_enumdef( static void create_enumdef(
symtab_addctx *ctx, const char *prefix, symtab_addctx *ctx, const char *prefix,
const google_protobuf_EnumDescriptorProto *enum_proto, const google_protobuf_EnumDescriptorProto *enum_proto,
@ -2418,6 +2458,18 @@ static void create_enumdef(
} }
upb_inttable_compact(&e->iton, ctx->arena); upb_inttable_compact(&e->iton, ctx->arena);
if (e->file->syntax == UPB_SYNTAX_PROTO2) {
if (ctx->layout) {
UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
e->layout = ctx->layout->enums[ctx->enum_count++];
UPB_ASSERT(n == e->layout->value_count + __builtin_popcountll(e->layout->mask));
} else {
e->layout = create_enumlayout(ctx, e);
}
} else {
e->layout = NULL;
}
} }
static void create_msgdef(symtab_addctx *ctx, const char *prefix, static void create_msgdef(symtab_addctx *ctx, const char *prefix,

@ -100,7 +100,9 @@ TEST(MessageTest, Extensions) {
} }
void VerifyMessageSet(const upb_test_TestMessageSet *mset_msg) { void VerifyMessageSet(const upb_test_TestMessageSet *mset_msg) {
EXPECT_TRUE(upb_test_MessageSetMember_has_message_set_extension(mset_msg)); bool has = upb_test_MessageSetMember_has_message_set_extension(mset_msg);
EXPECT_TRUE(has);
if (!has) return;
const upb_test_MessageSetMember *member = const upb_test_MessageSetMember *member =
upb_test_MessageSetMember_message_set_extension(mset_msg); upb_test_MessageSetMember_message_set_extension(mset_msg);
EXPECT_TRUE(member != nullptr); EXPECT_TRUE(member != nullptr);
@ -159,6 +161,7 @@ TEST(MessageTest, Proto2Enum) {
upb_test_Proto2FakeEnumMessage *fake_msg = upb_test_Proto2FakeEnumMessage_new(arena.ptr()); upb_test_Proto2FakeEnumMessage *fake_msg = upb_test_Proto2FakeEnumMessage_new(arena.ptr());
upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999); upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999);
int32_t *vals = upb_test_Proto2FakeEnumMessage_resize_repeated_enum( int32_t *vals = upb_test_Proto2FakeEnumMessage_resize_repeated_enum(
fake_msg, 6, arena.ptr()); fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO; vals[0] = upb_test_Proto2EnumMessage_ZERO;
@ -167,13 +170,21 @@ TEST(MessageTest, Proto2Enum) {
vals[3] = 888; // Unknown large. vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE; vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE; vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
upb_test_Proto2FakeEnumMessage_int32_enum_map_set(fake_msg, 5, 777,
arena.ptr()); vals = upb_test_Proto2FakeEnumMessage_resize_packed_enum(
fake_msg, 6, arena.ptr());
vals[0] = upb_test_Proto2EnumMessage_ZERO;
vals[1] = 7; // Unknown small.
vals[2] = upb_test_Proto2EnumMessage_SMALL;
vals[3] = 888; // Unknown large.
vals[4] = upb_test_Proto2EnumMessage_LARGE;
vals[5] = upb_test_Proto2EnumMessage_NEGATIVE;
size_t size; size_t size;
char *pb = char *pb =
upb_test_Proto2FakeEnumMessage_serialize(fake_msg, arena.ptr(), &size); upb_test_Proto2FakeEnumMessage_serialize(fake_msg, arena.ptr(), &size);
// Parsing as enums puts unknown values into unknown fields.
upb_test_Proto2EnumMessage *enum_msg = upb_test_Proto2EnumMessage *enum_msg =
upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr()); upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr());
ASSERT_TRUE(enum_msg != nullptr); ASSERT_TRUE(enum_msg != nullptr);
@ -183,14 +194,15 @@ TEST(MessageTest, Proto2Enum) {
upb_test_Proto2EnumMessage_repeated_enum(enum_msg, &size); upb_test_Proto2EnumMessage_repeated_enum(enum_msg, &size);
EXPECT_EQ(4, size); // Two unknown values moved to the unknown field set. EXPECT_EQ(4, size); // Two unknown values moved to the unknown field set.
// Parsing back into the fake message shows the original data, except the
// repeated enum is rearranged.
pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size); pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size);
upb_test_Proto2FakeEnumMessage *fake_msg2 = upb_test_Proto2FakeEnumMessage *fake_msg2 =
upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr()); upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr());
EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2)); EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2));
vals_const = EXPECT_EQ(999, upb_test_Proto2FakeEnumMessage_optional_enum(fake_msg2));
upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
int32_t expected[] = { int32_t expected[] = {
upb_test_Proto2EnumMessage_ZERO, upb_test_Proto2EnumMessage_ZERO,
upb_test_Proto2EnumMessage_SMALL, upb_test_Proto2EnumMessage_SMALL,
@ -199,6 +211,16 @@ TEST(MessageTest, Proto2Enum) {
7, 7,
888, 888,
}; };
vals_const =
upb_test_Proto2FakeEnumMessage_repeated_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected));
vals_const =
upb_test_Proto2FakeEnumMessage_packed_enum(fake_msg2, &size);
EXPECT_EQ(6, size);
EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size), EXPECT_THAT(std::vector<int32_t>(vals_const, vals_const + size),
::testing::ElementsAreArray(expected)); ::testing::ElementsAreArray(expected));
} }

@ -69,7 +69,7 @@ message Proto2EnumMessage {
optional Proto2TestEnum optional_enum = 1; optional Proto2TestEnum optional_enum = 1;
repeated Proto2TestEnum repeated_enum = 2; repeated Proto2TestEnum repeated_enum = 2;
map<int32, Proto2TestEnum> int32_enum_map = 3; repeated Proto2TestEnum packed_enum = 3 [packed = true];
} }
// The same fields as Proto2EnumMessage, but with int32 fields so we can fake // The same fields as Proto2EnumMessage, but with int32 fields so we can fake
@ -77,5 +77,5 @@ message Proto2EnumMessage {
message Proto2FakeEnumMessage { message Proto2FakeEnumMessage {
optional int32 optional_enum = 1; optional int32 optional_enum = 1;
repeated int32 repeated_enum = 2; repeated int32 repeated_enum = 2;
map<int32, int32> int32_enum_map = 3; repeated int32 packed_enum = 3 [packed = true];
} }

Loading…
Cancel
Save