More decoder fixes, and slightly changed parse call semantics.

Prior to this change, if an error was returned, it would be
guaranteed to always return a short byte count.  Now the two
concepts are a bit more orthogonal.  There are cases where
the entire input is consumed even though an error was
encountered.
pull/13171/head
Josh Haberman 10 years ago
parent fe427341f2
commit 85440108e5
  1. 48
      tests/pb/test_decoder.cc
  2. 15
      tests/test_util.h
  3. 13
      upb/pb/compile_decoder_x64.dasc
  4. 595
      upb/pb/compile_decoder_x64.h
  5. 44
      upb/pb/decoder.c
  6. 1
      upb/pb/decoder.int.h

@ -202,6 +202,16 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}
// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
return cat(varint(buf.size() + err), buf);
}
string badlen_submsg(int err, uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) );
}
/* A set of handlers that covers all .proto types *****************************/
@ -436,6 +446,21 @@ upb::reffed_ptr<const upb::MessageDef> NewMessageDef() {
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));
f = upb::FieldDef::New();
ASSERT(f->set_name("f_group", NULL));
ASSERT(f->set_number(UPB_DESCRIPTOR_TYPE_GROUP, NULL));
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));
f = upb::FieldDef::New();
ASSERT(f->set_name("r_group", NULL));
ASSERT(f->set_number(rep_fn(UPB_DESCRIPTOR_TYPE_GROUP), NULL));
f->set_label(UPB_LABEL_REPEATED);
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));
upb::reffed_ptr<upb::EnumDef> e = upb::EnumDef::New();
ASSERT(e->AddValue("FOO", 1, NULL));
ASSERT(e->Freeze(NULL));
@ -492,6 +517,8 @@ upb::reffed_ptr<const upb::Handlers> NewHandlers(TestMode mode) {
// to this type, eg: message M { optional M m = 1; }
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_MESSAGE);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE));
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_GROUP);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_GROUP));
// For NOP_FIELD we register no handlers, so we can pad a proto freely without
// changing the output.
@ -645,6 +672,12 @@ void assert_successful_parse(const string& proto,
void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL);
// Also test that we fail to parse at end-of-submessage, not just
// end-of-message.
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
NULL);
}
void assert_does_not_parse(const string& proto) {
@ -859,6 +892,18 @@ void test_invalid() {
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)),
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP)));
// Unknown string extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
submsg(12345, string(" "))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Unknown fixed-length field extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Test exceeding the resource limit of stack depth.
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
@ -942,8 +987,7 @@ void test_valid() {
// Unknown field inside a known submessage.
assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
submsg(12345, string(" "))),
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
varint(5)),
LINE("<")

@ -117,21 +117,6 @@ class VerboseParserEnvironment {
}
}
if (status_.ok() != (parsed >= bytes)) {
if (status_.ok()) {
fprintf(stderr,
"Error: decode function returned short byte count but set no "
"error status\n");
} else {
fprintf(stderr,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%u, bytes=%u\n",
status_.error_message(), (unsigned)parsed, (unsigned)bytes);
ASSERT(false);
}
if (!status_.ok())
return false;

@ -300,7 +300,7 @@ static void emit_static_asm(jitcompiler *jc) {
| mov rcx, DELIMEND
| sub rcx, PTR
| sub rcx, rdx
| jb ->err // Len is greater than enclosing message.
| jb >4 // Len is greater than enclosing message.
| mov FRAME->end_ofs, rcx
| cmp FRAME, DECODER->limit
| je >3 // Stack overflow
@ -319,7 +319,7 @@ static void emit_static_asm(jitcompiler *jc) {
|2:
| ret
|3:
| // Error -- call seterr.
| // Stack overflow error.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args.
| mov ARG1_64, DECODER
@ -327,6 +327,15 @@ static void emit_static_asm(jitcompiler *jc) {
| callp upb_pbdecoder_seterr
| call ->suspend
| jmp <1
|4:
| // Overextended len.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args.
| mov ARG1_64, DECODER
| ld64 kPbDecoderSubmessageTooLong
| callp upb_pbdecoder_seterr
| call ->suspend
| jmp <1
|
| // For getting a value that spans a buffer seam. Falls back to C.
|.macro getvalue_slow, func, bytes

File diff suppressed because it is too large Load Diff

@ -26,6 +26,8 @@
/* Error messages that are shared between the bytecode and JIT decoders. */
const char *kPbDecoderStackOverflow = "Nesting too deep.";
const char *kPbDecoderSubmessageTooLong =
"Submessage end extends past enclosing submessage.";
/* Error messages shared within this file. */
static const char *kUnterminatedVarint = "Unterminated varint.";
@ -185,8 +187,11 @@ static void checkpoint(upb_pbdecoder *d) {
*/
static int32_t skip(upb_pbdecoder *d, size_t bytes) {
assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
assert(bytes <= delim_remaining(d));
if (bufleft(d) > bytes) {
assert(d->skip == 0);
if (bytes > delim_remaining(d)) {
seterr(d, "Skipped value extended beyond enclosing submessage.");
return upb_pbdecoder_suspend(d);
} else if (bufleft(d) > bytes) {
/* Skipped data is all in current buffer, and more is still available. */
advance(d, bytes);
d->skip = 0;
@ -222,7 +227,9 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
d->checkpoint = d->ptr;
if (d->skip) {
CHECK_RETURN(skip(d, d->skip));
size_t skip_bytes = d->skip;
d->skip = 0;
CHECK_RETURN(skip(d, skip_bytes));
d->checkpoint = d->ptr;
}
@ -461,7 +468,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
upb_pbdecoder_frame *fr = d->top;
if (end > fr->end_ofs) {
seterr(d, "Submessage end extends past enclosing submessage.");
seterr(d, kPbDecoderSubmessageTooLong);
return false;
} else if (fr == d->limit) {
seterr(d, kPbDecoderStackOverflow);
@ -565,34 +572,7 @@ have_tag:
return DECODE_OK;
}
if (d->ptr == d->delim_end) {
seterr(d, "Enclosing submessage ended in the middle of value or group");
/* Unlike most errors we notice during parsing, right now we have consumed
* all of the user's input.
*
* There are three different options for how to handle this case:
*
* 1. decode() = short count, error = set
* 2. decode() = full count, error = set
* 3. decode() = full count, error NOT set, short count and error will
* be reported on next call to decode() (or end())
*
* (1) and (3) have the advantage that they preserve the invariant that an
* error occurs iff decode() returns a short count.
*
* (2) and (3) have the advantage of reflecting the fact that all of the
* bytes were in fact parsed (and possibly delivered to the unknown field
* handler, in the future when that is supported).
*
* (3) requires extra state in the decode (a place to store the "permanent
* error" that we should return for all subsequent attempts to decode).
* But we likely want this anyway.
*
* Right now we do (1), thanks to the fact that we checkpoint *after* this
* check. (3) may be a better choice long term; unclear at the moment. */
return upb_pbdecoder_suspend(d);
}
/* Unknown group -- continue looping over unknown fields. */
checkpoint(d);
}
}

@ -273,6 +273,7 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
/* Error messages that are shared between the bytecode and JIT decoders. */
extern const char *kPbDecoderStackOverflow;
extern const char *kPbDecoderSubmessageTooLong;
/* Access to decoderplan members needed by the decoder. */
const char *upb_pbdecoder_getopname(unsigned int op);

Loading…
Cancel
Save