Restructured for simplicity and fixed fasttable parser.

pull/13171/head
Joshua Haberman 3 years ago
parent 62bae8fff9
commit f7980b7ed1
  1. 58
      upb/decode.c
  2. 5
      upb/decode_fast.c
  3. 2
      upb/def.c
  4. 89
      upb/msg_test.cc
  5. 14
      upb/msg_test.proto

@ -326,9 +326,7 @@ static const char *decode_tosubmsg2(upb_decstate *d, const char *ptr,
int size) {
int saved_delta = decode_pushlimit(d, ptr, size);
if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
if (!decode_isdone(d, &ptr)) {
ptr = decode_msg(d, ptr, submsg, subl);
}
ptr = decode_msg(d, ptr, submsg, subl);
if (d->end_group != DECODE_NOGROUP) return decode_err(d, kUpb_DecodeStatus_Malformed);
decode_poplimit(d, ptr, saved_delta);
d->depth++;
@ -662,15 +660,21 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
return ptr;
}
UPB_FORCEINLINE
static bool decode_checkrequired(upb_decstate *d, const upb_msg *msg,
const upb_msglayout *l) {
if (UPB_LIKELY(l->required_count == 0)) return true;
UPB_NOINLINE
const char *decode_checkrequired(upb_decstate *d, const char *ptr,
const upb_msg *msg, const upb_msglayout *l) {
assert(l->required_count);
if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
return ptr;
}
uint64_t required_mask = ((1 << l->required_count) - 1) << 1;
uint64_t msg_head;
memcpy(&msg_head, msg, 8);
msg_head = _upb_be_swap64(msg_head);
return (required_mask & ~msg_head) == 0;
if (required_mask & ~msg_head) {
d->missing_required = true;
}
return ptr;
}
UPB_FORCEINLINE
@ -878,7 +882,12 @@ static const char *decode_unknown(upb_decstate *d, const char *ptr,
wireval val) {
if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
// Since unknown fields are the uncommon case, we do a little extra work here
// to walk backwards through the buffer to find the field start. This frees
// up a register in the fast paths (when the field is known), which leads to
// significant speedups in benchmarks.
const char *start = ptr;
if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size;
if (msg) {
switch (wire_type) {
@ -922,7 +931,14 @@ UPB_NOINLINE
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout) {
int last_field_index = 0;
while (true) {
#if UPB_FASTTABLE
// The first time we want to skip fast dispatch, because we may have just been
// invoked by the fast parser to handle a case that it bailed on.
if (!decode_isdone(d, &ptr)) goto nofast;
#endif
while (!decode_isdone(d, &ptr)) {
uint32_t tag;
const upb_msglayout_field *field;
int field_number;
@ -930,6 +946,12 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
wireval val;
int op;
if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
#if UPB_FASTTABLE
nofast:
#endif
#ifndef NDEBUG
d->debug_tagstart = ptr;
#endif
@ -969,17 +991,11 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
}
}
}
if (decode_isdone(d, &ptr)) break;
if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
}
if (UPB_UNLIKELY(d->options & kUpb_DecodeOption_CheckRequired) &&
!decode_checkrequired(d, msg, layout)) {
d->missing_required = true;
}
return ptr;
return UPB_UNLIKELY(layout->required_count)
? decode_checkrequired(d, ptr, msg, layout)
: ptr;
}
const char *fastdecode_generic(struct upb_decstate *d, const char *ptr,
@ -1006,9 +1022,7 @@ upb_DecodeStatus _upb_decode(const char *buf, size_t size, void *msg,
upb_decstate state;
unsigned depth = (unsigned)options >> 16;
if (size == 0) {
return kUpb_DecodeStatus_Ok;
} else if (size <= 16) {
if (size <= 16) {
memset(&state.patch, 0, 32);
memcpy(&state.patch, buf, size);
buf = state.patch;
@ -1033,7 +1047,7 @@ upb_DecodeStatus _upb_decode(const char *buf, size_t size, void *msg,
state.arena.parent = arena;
upb_DecodeStatus status = UPB_SETJMP(state.err);
if (UPB_LIKELY(!status)) {
if (UPB_LIKELY(status == 0)) {
status = decode_top(&state, buf, msg, l);
}

@ -84,7 +84,10 @@ static const char *fastdecode_dispatch(UPB_PARSE_PARAMS) {
if (UPB_LIKELY(overrun == d->limit)) {
// Parse is finished.
*(uint32_t*)msg |= hasbits; // Sync hasbits.
return ptr;
const upb_msglayout *l = decode_totablep(table);
return UPB_UNLIKELY(l->required_count)
? decode_checkrequired(d, ptr, msg, l)
: ptr;
} else {
data = overrun;
UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);

@ -2214,7 +2214,7 @@ static void create_fielddef(
if (ctx->symtab->allow_name_conflicts &&
deftype(existing_v) == UPB_DEFTYPE_FIELD_JSONNAME) {
// Field name takes precedence over json name.
upb_strtable_remove2(&m->ntof, shortname, strlen(shortname), NULL);
upb_strtable_remove(&m->ntof, shortname, NULL);
} else {
symtab_errf(ctx, "duplicate field name (%s)", shortname);
}

@ -158,7 +158,8 @@ TEST(MessageTest, MessageSet) {
TEST(MessageTest, Proto2Enum) {
upb::Arena arena;
upb_test_Proto2FakeEnumMessage *fake_msg = upb_test_Proto2FakeEnumMessage_new(arena.ptr());
upb_test_Proto2FakeEnumMessage *fake_msg =
upb_test_Proto2FakeEnumMessage_new(arena.ptr());
upb_test_Proto2FakeEnumMessage_set_optional_enum(fake_msg, 999);
@ -231,3 +232,89 @@ TEST(MessageTest, TestBadUTF8) {
EXPECT_EQ(nullptr, protobuf_test_messages_proto3_TestAllTypesProto3_parse(
serialized.data(), serialized.size(), arena.ptr()));
}
TEST(MessageTest, RequiredFieldsTopLevelMessage) {
upb::Arena arena;
upb_test_TestRequiredFields *test_msg;
upb_test_EmptyMessage *empty_msg;
// Succeeds, because we did not request required field checks.
test_msg = upb_test_TestRequiredFields_parse(NULL, 0, arena.ptr());
EXPECT_NE(nullptr, test_msg);
// Fails, because required fields are missing.
EXPECT_EQ(kUpb_DecodeStatus_MissingRequired,
_upb_decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msginit,
NULL, kUpb_DecodeOption_CheckRequired, arena.ptr()));
upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
size_t size;
char *serialized =
upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size);
ASSERT_TRUE(serialized != nullptr);
EXPECT_NE(0, size);
// Fails, but the code path is slightly different because the serialized
// payload is not empty.
EXPECT_EQ(kUpb_DecodeStatus_MissingRequired,
_upb_decode(serialized, size, test_msg,
&upb_test_TestRequiredFields_msginit, NULL,
kUpb_DecodeOption_CheckRequired, arena.ptr()));
empty_msg = upb_test_EmptyMessage_new(arena.ptr());
upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
upb_test_TestRequiredFields_set_required_int64(test_msg, 2);
upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg);
// Succeeds, because required fields are present (though not in the input).
EXPECT_EQ(kUpb_DecodeStatus_Ok,
_upb_decode(NULL, 0, test_msg, &upb_test_TestRequiredFields_msginit,
NULL, kUpb_DecodeOption_CheckRequired, arena.ptr()));
// Serialize a complete payload.
serialized =
upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size);
ASSERT_TRUE(serialized != nullptr);
EXPECT_NE(0, size);
upb_test_TestRequiredFields *test_msg2 = upb_test_TestRequiredFields_parse_ex(
serialized, size, NULL, kUpb_DecodeOption_CheckRequired, arena.ptr());
EXPECT_NE(nullptr, test_msg2);
// When we add an incomplete sub-message, this is not flagged by the parser.
// This makes parser checking unsuitable for MergeFrom().
upb_test_TestRequiredFields_set_optional_message(
test_msg2, upb_test_TestRequiredFields_new(arena.ptr()));
EXPECT_EQ(kUpb_DecodeStatus_Ok,
_upb_decode(serialized, size, test_msg2,
&upb_test_TestRequiredFields_msginit, NULL,
kUpb_DecodeOption_CheckRequired, arena.ptr()));
}
TEST(MessageTest, RequiredFieldsSubMessage) {
upb::Arena arena;
upb_test_TestRequiredFields *test_msg =
upb_test_TestRequiredFields_new(arena.ptr());
upb_test_SubMessageHasRequired *sub_msg =
upb_test_SubMessageHasRequired_new(arena.ptr());
upb_test_EmptyMessage *empty_msg = upb_test_EmptyMessage_new(arena.ptr());
upb_test_SubMessageHasRequired_set_optional_message(sub_msg, test_msg);
size_t size;
char *serialized =
upb_test_SubMessageHasRequired_serialize(sub_msg, arena.ptr(), &size);
EXPECT_NE(0, size);
// No parse error when parsing normally.
EXPECT_NE(nullptr, upb_test_SubMessageHasRequired_parse(serialized, size, arena.ptr()));
fprintf(stderr, "YO, here goes!\n");
// Parse error when verifying required fields, due to incomplete sub-message.
EXPECT_EQ(nullptr, upb_test_SubMessageHasRequired_parse_ex(
serialized, size, NULL,
kUpb_DecodeOption_CheckRequired, arena.ptr()));
upb_test_TestRequiredFields_set_required_int32(test_msg, 1);
upb_test_TestRequiredFields_set_required_int64(test_msg, 2);
upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg);
}

@ -79,3 +79,17 @@ message Proto2FakeEnumMessage {
repeated int32 repeated_enum = 2;
repeated int32 packed_enum = 3 [packed = true];
}
message EmptyMessage {}
message TestRequiredFields {
required int32 required_int32 = 1;
optional int32 optional_int32 = 2;
required int64 required_int64 = 3;
optional TestRequiredFields optional_message = 4;
required EmptyMessage required_message = 5;
}
message SubMessageHasRequired {
optional TestRequiredFields optional_message = 1;
}

Loading…
Cancel
Save