Merge pull request #36 from haberman/decoderfix

Decoder fix: skipped data at end of submessage.
pull/13171/head
Joshua Haberman 9 years ago
commit d56339e6ff
  1. 61
      tests/pb/test_decoder.cc
  2. 21
      tests/test_util.h
  3. 13
      upb/pb/compile_decoder_x64.dasc
  4. 595
      upb/pb/compile_decoder_x64.h
  5. 88
      upb/pb/decoder.c
  6. 1
      upb/pb/decoder.int.h

@ -138,7 +138,7 @@ void PrintBinary(const string& str) {
if (isprint(str[i])) { if (isprint(str[i])) {
fprintf(stderr, "%c", str[i]); fprintf(stderr, "%c", str[i]);
} else { } else {
fprintf(stderr, "\\x%02x", str[i]); fprintf(stderr, "\\x%02x", (int)(uint8_t)str[i]);
} }
} }
} }
@ -202,6 +202,16 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) ); return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
} }
// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
return cat(varint(buf.size() + err), buf);
}
string badlen_submsg(int err, uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) );
}
/* A set of handlers that covers all .proto types *****************************/ /* A set of handlers that covers all .proto types *****************************/
@ -436,6 +446,21 @@ upb::reffed_ptr<const upb::MessageDef> NewMessageDef() {
ASSERT(f->set_message_subdef(md.get(), NULL)); ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL)); ASSERT(md->AddField(f.get(), NULL));
f = upb::FieldDef::New();
ASSERT(f->set_name("f_group", NULL));
ASSERT(f->set_number(UPB_DESCRIPTOR_TYPE_GROUP, NULL));
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));
f = upb::FieldDef::New();
ASSERT(f->set_name("r_group", NULL));
ASSERT(f->set_number(rep_fn(UPB_DESCRIPTOR_TYPE_GROUP), NULL));
f->set_label(UPB_LABEL_REPEATED);
f->set_descriptor_type(UPB_DESCRIPTOR_TYPE_GROUP);
ASSERT(f->set_message_subdef(md.get(), NULL));
ASSERT(md->AddField(f.get(), NULL));
upb::reffed_ptr<upb::EnumDef> e = upb::EnumDef::New(); upb::reffed_ptr<upb::EnumDef> e = upb::EnumDef::New();
ASSERT(e->AddValue("FOO", 1, NULL)); ASSERT(e->AddValue("FOO", 1, NULL));
ASSERT(e->Freeze(NULL)); ASSERT(e->Freeze(NULL));
@ -492,6 +517,8 @@ upb::reffed_ptr<const upb::Handlers> NewHandlers(TestMode mode) {
// to this type, eg: message M { optional M m = 1; } // to this type, eg: message M { optional M m = 1; }
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_MESSAGE); reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_MESSAGE);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE)); reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE));
reg_subm(h.get(), UPB_DESCRIPTOR_TYPE_GROUP);
reg_subm(h.get(), rep_fn(UPB_DESCRIPTOR_TYPE_GROUP));
// For NOP_FIELD we register no handlers, so we can pad a proto freely without // For NOP_FIELD we register no handlers, so we can pad a proto freely without
// changing the output. // changing the output.
@ -575,7 +602,6 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
} else { } else {
fprintf(stderr, "Expected to FAIL\n"); fprintf(stderr, "Expected to FAIL\n");
} }
fprintf(stderr, "Calling start()\n");
} }
bool ok = env->Start() && bool ok = env->Start() &&
@ -646,6 +672,12 @@ void assert_successful_parse(const string& proto,
void assert_does_not_parse_at_eof(const string& proto) { void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL); run_decoder(proto, NULL);
// Also test that we fail to parse at end-of-submessage, not just
// end-of-message.
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
NULL);
} }
void assert_does_not_parse(const string& proto) { void assert_does_not_parse(const string& proto) {
@ -860,6 +892,18 @@ void test_invalid() {
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)), tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)),
tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP))); tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP)));
// Unknown string extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
submsg(12345, string(" "))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Unknown fixed-length field extends past enclosing submessage.
assert_does_not_parse(
cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))),
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Test exceeding the resource limit of stack depth. // Test exceeding the resource limit of stack depth.
string buf; string buf;
for (int i = 0; i <= MAX_NESTING; i++) { for (int i = 0; i <= MAX_NESTING; i++) {
@ -941,6 +985,19 @@ void test_valid() {
submsg(12345, string(" ")), submsg(12345, string(" ")),
"<\n>\n"); "<\n>\n");
// Unknown field inside a known submessage.
assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
varint(5)),
LINE("<")
LINE("%u:{")
LINE(" <")
LINE(" >")
LINE("}")
LINE("%u:5")
LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE, UPB_DESCRIPTOR_TYPE_INT32);
// This triggered a previous bug in the decoder. // This triggered a previous bug in the decoder.
assert_successful_parse( assert_successful_parse(
cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT), cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT),

@ -53,10 +53,16 @@ class VerboseParserEnvironment {
bool Start() { bool Start() {
if (verbose_) {
fprintf(stderr, "Calling start()\n");
}
return sink_->Start(len_, &subc_); return sink_->Start(len_, &subc_);
} }
bool End() { bool End() {
if (verbose_) {
fprintf(stderr, "Calling end()\n");
}
return sink_->End(); return sink_->End();
} }
@ -111,21 +117,6 @@ class VerboseParserEnvironment {
} }
} }
if (status_.ok() != (parsed >= bytes)) {
if (status_.ok()) {
fprintf(stderr,
"Error: decode function returned short byte count but set no "
"error status\n");
} else {
fprintf(stderr,
"Error: decode function returned complete byte count but set "
"error status\n");
}
fprintf(stderr, "Status: %s, parsed=%u, bytes=%u\n",
status_.error_message(), (unsigned)parsed, (unsigned)bytes);
ASSERT(false);
}
if (!status_.ok()) if (!status_.ok())
return false; return false;

@ -300,7 +300,7 @@ static void emit_static_asm(jitcompiler *jc) {
| mov rcx, DELIMEND | mov rcx, DELIMEND
| sub rcx, PTR | sub rcx, PTR
| sub rcx, rdx | sub rcx, rdx
| jb ->err // Len is greater than enclosing message. | jb >4 // Len is greater than enclosing message.
| mov FRAME->end_ofs, rcx | mov FRAME->end_ofs, rcx
| cmp FRAME, DECODER->limit | cmp FRAME, DECODER->limit
| je >3 // Stack overflow | je >3 // Stack overflow
@ -319,7 +319,7 @@ static void emit_static_asm(jitcompiler *jc) {
|2: |2:
| ret | ret
|3: |3:
| // Error -- call seterr. | // Stack overflow error.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len. | mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args. | // Prepare seterr args.
| mov ARG1_64, DECODER | mov ARG1_64, DECODER
@ -327,6 +327,15 @@ static void emit_static_asm(jitcompiler *jc) {
| callp upb_pbdecoder_seterr | callp upb_pbdecoder_seterr
| call ->suspend | call ->suspend
| jmp <1 | jmp <1
|4:
| // Overextended len.
| mov PTR, DECODER->checkpoint // Rollback to before the delim len.
| // Prepare seterr args.
| mov ARG1_64, DECODER
| ld64 kPbDecoderSubmessageTooLong
| callp upb_pbdecoder_seterr
| call ->suspend
| jmp <1
| |
| // For getting a value that spans a buffer seam. Falls back to C. | // For getting a value that spans a buffer seam. Falls back to C.
|.macro getvalue_slow, func, bytes |.macro getvalue_slow, func, bytes

File diff suppressed because it is too large Load Diff

@ -26,6 +26,8 @@
/* Error messages that are shared between the bytecode and JIT decoders. */ /* Error messages that are shared between the bytecode and JIT decoders. */
const char *kPbDecoderStackOverflow = "Nesting too deep."; const char *kPbDecoderStackOverflow = "Nesting too deep.";
const char *kPbDecoderSubmessageTooLong =
"Submessage end extends past enclosing submessage.";
/* Error messages shared within this file. */ /* Error messages shared within this file. */
static const char *kUnterminatedVarint = "Unterminated varint."; static const char *kUnterminatedVarint = "Unterminated varint.";
@ -114,11 +116,21 @@ static size_t curbufleft(const upb_pbdecoder *d) {
return d->data_end - d->ptr; return d->data_end - d->ptr;
} }
/* How many bytes are available before end-of-buffer. */
static size_t bufleft(const upb_pbdecoder *d) {
return d->end - d->ptr;
}
/* Overall stream offset of d->ptr. */ /* Overall stream offset of d->ptr. */
uint64_t offset(const upb_pbdecoder *d) { uint64_t offset(const upb_pbdecoder *d) {
return d->bufstart_ofs + (d->ptr - d->buf); return d->bufstart_ofs + (d->ptr - d->buf);
} }
/* How many bytes are available before the end of this delimited region. */
size_t delim_remaining(const upb_pbdecoder *d) {
return d->top->end_ofs - offset(d);
}
/* Advances d->ptr. */ /* Advances d->ptr. */
static void advance(upb_pbdecoder *d, size_t len) { static void advance(upb_pbdecoder *d, size_t len) {
assert(curbufleft(d) >= len); assert(curbufleft(d) >= len);
@ -175,7 +187,11 @@ static void checkpoint(upb_pbdecoder *d) {
*/ */
static int32_t skip(upb_pbdecoder *d, size_t bytes) { static int32_t skip(upb_pbdecoder *d, size_t bytes) {
assert(!in_residual_buf(d, d->ptr) || d->size_param == 0); assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
if (curbufleft(d) > bytes) { assert(d->skip == 0);
if (bytes > delim_remaining(d)) {
seterr(d, "Skipped value extended beyond enclosing submessage.");
return upb_pbdecoder_suspend(d);
} else if (bufleft(d) > bytes) {
/* Skipped data is all in current buffer, and more is still available. */ /* Skipped data is all in current buffer, and more is still available. */
advance(d, bytes); advance(d, bytes);
d->skip = 0; d->skip = 0;
@ -211,7 +227,9 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
d->checkpoint = d->ptr; d->checkpoint = d->ptr;
if (d->skip) { if (d->skip) {
CHECK_RETURN(skip(d, d->skip)); size_t skip_bytes = d->skip;
d->skip = 0;
CHECK_RETURN(skip(d, skip_bytes));
d->checkpoint = d->ptr; d->checkpoint = d->ptr;
} }
@ -450,7 +468,7 @@ static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
upb_pbdecoder_frame *fr = d->top; upb_pbdecoder_frame *fr = d->top;
if (end > fr->end_ofs) { if (end > fr->end_ofs) {
seterr(d, "Submessage end extends past enclosing submessage."); seterr(d, kPbDecoderSubmessageTooLong);
return false; return false;
} else if (fr == d->limit) { } else if (fr == d->limit) {
seterr(d, kPbDecoderStackOverflow); seterr(d, kPbDecoderStackOverflow);
@ -554,34 +572,7 @@ have_tag:
return DECODE_OK; return DECODE_OK;
} }
if (d->ptr == d->delim_end) { /* Unknown group -- continue looping over unknown fields. */
seterr(d, "Enclosing submessage ended in the middle of value or group");
/* Unlike most errors we notice during parsing, right now we have consumed
* all of the user's input.
*
* There are three different options for how to handle this case:
*
* 1. decode() = short count, error = set
* 2. decode() = full count, error = set
* 3. decode() = full count, error NOT set, short count and error will
* be reported on next call to decode() (or end())
*
* (1) and (3) have the advantage that they preserve the invariant that an
* error occurs iff decode() returns a short count.
*
* (2) and (3) have the advantage of reflecting the fact that all of the
* bytes were in fact parsed (and possibly delivered to the unknown field
* handler, in the future when that is supported).
*
* (3) requires extra state in the decode (a place to store the "permanent
* error" that we should return for all subsequent attempts to decode).
* But we likely want this anyway.
*
* Right now we do (1), thanks to the fact that we checkpoint *after* this
* check. (3) may be a better choice long term; unclear at the moment. */
return upb_pbdecoder_suspend(d);
}
checkpoint(d); checkpoint(d);
} }
} }
@ -605,7 +596,7 @@ static int32_t dispatch(upb_pbdecoder *d) {
uint8_t wire_type; uint8_t wire_type;
uint32_t fieldnum; uint32_t fieldnum;
upb_value val; upb_value val;
int32_t ret; int32_t retval;
/* Decode tag. */ /* Decode tag. */
CHECK_RETURN(decode_v32(d, &tag)); CHECK_RETURN(decode_v32(d, &tag));
@ -629,23 +620,25 @@ static int32_t dispatch(upb_pbdecoder *d) {
} }
} }
/* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
* bytecode that triggered this is preceded by a CHECKDELIM bytecode which
* we need to back up to, so that when we're done skipping unknown data we
* can re-check the delimited end. */
d->last--; /* Necessary if we get suspended */
d->pc = d->last;
assert(getop(*d->last) == OP_CHECKDELIM);
/* Unknown field or ENDGROUP. */ /* Unknown field or ENDGROUP. */
ret = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
CHECK_RETURN(retval);
if (ret == DECODE_ENDGROUP) { if (retval == DECODE_ENDGROUP) {
goto_endmsg(d); goto_endmsg(d);
return DECODE_OK; return DECODE_OK;
} else if (ret == DECODE_OK) {
/* We just consumed some input, so we might now have consumed all the data
* in the delmited region. Since every opcode that can trigger dispatch is
* directly preceded by OP_CHECKDELIM, rewind to it now to re-check the
* delimited end. */
d->pc = d->last - 1;
assert(getop(*d->pc) == OP_CHECKDELIM);
return DECODE_OK;
} }
return ret; return DECODE_OK;
} }
/* Callers know that the stack is more than one deep because the opcodes that /* Callers know that the stack is more than one deep because the opcodes that
@ -741,7 +734,7 @@ size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg)); CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
) )
VMCASE(OP_STARTSTR, VMCASE(OP_STARTSTR,
uint32_t len = d->top->end_ofs - offset(d); uint32_t len = delim_remaining(d);
upb_pbdecoder_frame *outer = outer_frame(d); upb_pbdecoder_frame *outer = outer_frame(d);
CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink)); CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
if (len == 0) { if (len == 0) {
@ -752,7 +745,7 @@ size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
uint32_t len = curbufleft(d); uint32_t len = curbufleft(d);
size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle); size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
if (n > len) { if (n > len) {
if (n > d->top->end_ofs - offset(d)) { if (n > delim_remaining(d)) {
seterr(d, "Tried to skip past end of string."); seterr(d, "Tried to skip past end of string.");
return upb_pbdecoder_suspend(d); return upb_pbdecoder_suspend(d);
} else { } else {
@ -903,6 +896,11 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
return false; return false;
} }
if (d->skip) {
seterr(d, "Unexpected EOF inside skipped data");
return false;
}
if (d->top->end_ofs != UINT64_MAX) { if (d->top->end_ofs != UINT64_MAX) {
seterr(d, "Unexpected EOF inside delimited string"); seterr(d, "Unexpected EOF inside delimited string");
return false; return false;

@ -273,6 +273,7 @@ void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
/* Error messages that are shared between the bytecode and JIT decoders. */ /* Error messages that are shared between the bytecode and JIT decoders. */
extern const char *kPbDecoderStackOverflow; extern const char *kPbDecoderStackOverflow;
extern const char *kPbDecoderSubmessageTooLong;
/* Access to decoderplan members needed by the decoder. */ /* Access to decoderplan members needed by the decoder. */
const char *upb_pbdecoder_getopname(unsigned int op); const char *upb_pbdecoder_getopname(unsigned int op);

Loading…
Cancel
Save