Merge pull request #38 from haberman/decoderfix2

Added lots of decoder tests and fixed lots of bugs.
pull/13171/head
Joshua Haberman 10 years ago
commit 77d45edfb3
  1. 6
      tests/json/test_json.cc
  2. 107
      tests/pb/test_decoder.cc
  3. 55
      tests/test_util.h
  4. 7
      upb/pb/compile_decoder.c
  5. 63
      upb/pb/decoder.c
  6. 7
      upb/pb/decoder.h
  7. 7
      upb/pb/decoder.int.h

@ -295,17 +295,15 @@ void test_json_roundtrip_message(const char* json_src,
upb::json::Parser* parser =
upb::json::Parser::Create(env.env(), printer->input());
env.ResetBytesSink(parser->input());
env.Reset(json_src, strlen(json_src), false);
env.Reset(json_src, strlen(json_src), false, false);
bool ok = env.Start() &&
env.ParseBuffer(seam) &&
env.ParseBuffer(-1) &&
env.End();
if (!ok) {
fprintf(stderr, "upb parse error: %s\n", env.status().error_message());
}
ASSERT(ok);
ASSERT(env.CheckConsistency());
if (memcmp(json_expected,
data_sink.Data().data(),

@ -32,6 +32,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sstream>
#include "tests/test_util.h"
#include "tests/upb_test.h"
@ -176,6 +177,13 @@ string cat(const string& a, const string& b,
return ret;
}
template <typename T>
string num2string(T num) {
std::ostringstream ss;
ss << num;
return ss.str();
}
string varint(uint64_t x) {
char buf[UPB_PB_VARINT_MAX_LEN];
size_t len = upb_vencode64(x, buf);
@ -202,6 +210,11 @@ string submsg(uint32_t fn, const string& buf) {
return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
}
string group(uint32_t fn, const string& buf) {
return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
tag(fn, UPB_WIRE_TYPE_END_GROUP));
}
// Like delim()/submsg(), but intentionally encodes an incorrect length.
// These help test when a delimited boundary doesn't land in the right place.
string badlen_delim(int err, const string& buf) {
@ -557,15 +570,14 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
}
void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
// We could have parsed as many as 10 bytes fewer than what the decoder
// previously accepted, since we can buffer up to 10 partial bytes internally
// before accumulating an entire value.
const int MAX_BUFFERED = 10;
// We can't have parsed more data than the decoder callback is telling us it
// parsed.
ASSERT(decoder.BytesParsed() <= ofs);
ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED));
// The difference between what we've decoded and what the decoder has accepted
// represents the internally buffered amount. This amount should not exceed
// this value which comes from decoder.int.h.
ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES));
}
static bool parse(VerboseParserEnvironment* env,
@ -582,7 +594,7 @@ static bool parse(VerboseParserEnvironment* env,
void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
const string& proto, const string* expected_output,
size_t i, size_t j, bool may_skip) {
env->Reset(proto.c_str(), proto.size(), may_skip);
env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
decoder->Reset();
testhash = Hash(proto, expected_output, i, j, may_skip);
@ -598,7 +610,14 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
PrintBinary(proto);
fprintf(stderr, "\n");
if (expected_output) {
fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
if (test_mode == ALL_HANDLERS) {
fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
} else if (test_mode == NO_HANDLERS) {
fprintf(stderr,
"No handlers are registered, BUT if they were "
"the expected output would be: %s\n",
expected_output->c_str());
}
} else {
fprintf(stderr, "Expected to FAIL\n");
}
@ -610,7 +629,7 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
parse(env, *decoder, -1) &&
env->End();
ASSERT(ok == env->status().ok());
ASSERT(env->CheckConsistency());
if (test_mode == ALL_HANDLERS) {
if (expected_output) {
@ -618,18 +637,12 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
output.c_str(), expected_output->c_str());
}
if (!ok) {
fprintf(stderr, "Failed: %s\n", env->status().error_message());
}
ASSERT(ok);
ASSERT(output == *expected_output);
} else {
if (ok) {
fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
output.c_str());
} else if (filter_hash) {
fprintf(stderr, "Failed as we expected, with message: %s\n",
env->status().error_message());
}
ASSERT(!ok);
}
@ -657,6 +670,25 @@ void run_decoder(const string& proto, const string* expected_output) {
const static string thirty_byte_nop = cat(
tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );
// Indents and wraps text as if it were a submessage with this field number
string wrap_text(int32_t fn, const string& text) {
string wrapped_text = text;
size_t pos = 0;
string replace_with = "\n ";
while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
pos != wrapped_text.size() - 1) {
wrapped_text.replace(pos, 1, replace_with);
pos += replace_with.size();
}
wrapped_text = cat(
LINE("<"),
num2string(fn), LINE(":{")
" ", wrapped_text,
LINE("}")
LINE(">"));
return wrapped_text;
}
void assert_successful_parse(const string& proto,
const char *expected_fmt, ...) {
string expected_text;
@ -668,16 +700,27 @@ void assert_successful_parse(const string& proto,
// repeat once with no-op padding data at the end of buffer.
run_decoder(proto, &expected_text);
run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
// Test that this also works when wrapped in a submessage or group.
// Indent the expected text one level and wrap it.
string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
}
void assert_does_not_parse_at_eof(const string& proto) {
run_decoder(proto, NULL);
// Also test that we fail to parse at end-of-submessage, not just
// end-of-message.
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
NULL);
// end-of-message. But skip this if we have no handlers, because in that
// case we won't descend into the submessage.
if (test_mode != NO_HANDLERS) {
run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
thirty_byte_nop), NULL);
}
}
void assert_does_not_parse(const string& proto) {
@ -905,11 +948,13 @@ void test_invalid() {
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" "))));
// Test exceeding the resource limit of stack depth.
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
if (test_mode != NO_HANDLERS) {
string buf;
for (int i = 0; i <= MAX_NESTING; i++) {
buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
}
assert_does_not_parse(buf);
}
assert_does_not_parse(buf);
}
void test_valid() {
@ -986,6 +1031,15 @@ void test_valid() {
"<\n>\n");
// Unknown field inside a known submessage.
assert_successful_parse(
submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
LINE("<")
LINE("%u:{")
LINE(" <")
LINE(" >")
LINE("}")
LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);
assert_successful_parse(
cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))),
tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
@ -1162,7 +1216,9 @@ void test_valid() {
indentbuf(&textbuf, total - i - 1);
textbuf.append(">\n");
}
assert_successful_parse(buf, "%s", textbuf.c_str());
// Have to use run_decoder directly, because we are at max nesting and can't
// afford the extra nesting that assert_successful_parse() will do.
run_decoder(buf, &textbuf);
}
upb::reffed_ptr<const upb::pb::DecoderMethod> NewMethod(
@ -1207,10 +1263,11 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
upb::Sink sink(method->dest_handlers(), &closures[0]);
upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink);
env.ResetBytesSink(decoder->input());
env.Reset(testdata[i].data, testdata[i].length, true);
env.Reset(testdata[i].data, testdata[i].length, true, false);
ASSERT(env.Start());
ASSERT(env.ParseBuffer(-1));
ASSERT(env.End());
ASSERT(env.CheckConsistency());
}
}

@ -29,16 +29,35 @@ class VerboseParserEnvironment {
public:
// Pass verbose=true to print detailed diagnostics to stderr.
VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
env_.ReportErrorsTo(&status_);
env_.SetErrorFunction(&VerboseParserEnvironment::OnError, this);
}
void Reset(const char *buf, size_t len, bool may_skip) {
static bool OnError(void *ud, const upb::Status* status) {
VerboseParserEnvironment* env = static_cast<VerboseParserEnvironment*>(ud);
env->saw_error_ = true;
if (env->expect_error_ && env->verbose_) {
fprintf(stderr, "Encountered error, as expected: ");
} else if (!env->expect_error_) {
fprintf(stderr, "Encountered unexpected error: ");
} else {
return false;
}
fprintf(stderr, "%s\n", status->error_message());
return false;
}
void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) {
buf_ = buf;
len_ = len;
ofs_ = 0;
expect_error_ = expect_error;
saw_error_ = false;
end_ok_set_ = false;
skip_until_ = may_skip ? 0 : -1;
skipped_with_null_ = false;
status_.Clear();
}
// The user should call a series of:
@ -63,7 +82,26 @@ class VerboseParserEnvironment {
if (verbose_) {
fprintf(stderr, "Calling end()\n");
}
return sink_->End();
end_ok_ = sink_->End();
end_ok_set_ = true;
return end_ok_;
}
bool CheckConsistency() {
/* If we called end (which we should only do when previous bytes are fully
* accepted), then end() should return true iff there were no errors. */
if (end_ok_set_ && end_ok_ != !saw_error_) {
fprintf(stderr, "End() status and saw_error didn't match.\n");
return false;
}
if (expect_error_ && !saw_error_) {
fprintf(stderr, "Expected error but saw none.\n");
return false;
}
return true;
}
bool ParseBuffer(int bytes) {
@ -117,7 +155,7 @@ class VerboseParserEnvironment {
}
}
if (!status_.ok())
if (saw_error_)
return false;
if (parsed > bytes && skip_until_ >= 0) {
@ -133,8 +171,6 @@ class VerboseParserEnvironment {
sink_ = sink;
}
const upb::Status& status() { return status_; }
size_t ofs() { return ofs_; }
upb::Environment* env() { return &env_; }
@ -142,13 +178,16 @@ class VerboseParserEnvironment {
private:
upb::Environment env_;
upb::Status status_;
upb::BytesSink* sink_;
const char* buf_;
size_t len_;
bool verbose_;
size_t ofs_;
void *subc_;
bool expect_error_;
bool saw_error_;
bool end_ok_;
bool end_ok_set_;
// When our parse call returns a value greater than the number of bytes
// we passed in, the decoder is indicating to us that the next N bytes

@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,
if (!sub_m) {
/* Don't emit any code for this field at all; it will be parsed as an
* unknown field. */
* unknown field.
*
* TODO(haberman): we should change this to parse it as a string field
* instead. It will probably be faster, but more importantly, once we
* start vending unknown fields, a field shouldn't be treated as unknown
* just because it doesn't have subhandlers registered. */
return;
}

@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint.";
static opcode halt = OP_HALT;
/* A dummy character we can point to when the user passes us a NULL buffer.
* We need this because in C (NULL + 0) and (NULL - NULL) are undefined
* behavior, which would invalidate functions like curbufleft(). */
static const char dummy_char;
/* Whether an op consumes any of the input buffer. */
static bool consumes_input(opcode op) {
switch (op) {
@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
if (bytes > delim_remaining(d)) {
seterr(d, "Skipped value extended beyond enclosing submessage.");
return upb_pbdecoder_suspend(d);
} else if (bufleft(d) > bytes) {
} else if (bufleft(d) >= bytes) {
/* Skipped data is all in current buffer, and more is still available. */
advance(d, bytes);
d->skip = 0;
@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
size_t size, const upb_bufhandle *handle) {
UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
d->buf_param = buf;
/* d->skip and d->residual_end could probably elegantly be represented
* as a single variable, to more easily represent this invariant. */
assert(!(d->skip && d->residual_end > d->residual));
/* We need to remember the original size_param, so that the value we return
* is relative to it, even if we do some skipping first. */
d->size_param = size;
d->handle = handle;
/* Have to handle this case specially (ie. not with skip()) because the user
* is allowed to pass a NULL buffer here, which won't allow us to safely
* calculate a d->end or use our normal functions like curbufleft(). */
if (d->skip && d->skip >= size) {
d->skip -= size;
d->bufstart_ofs += size;
buf = &dummy_char;
size = 0;
/* We can't just return now, because we might need to execute some ops
* like CHECKDELIM, which could call some callbacks and pop the stack. */
}
/* We need to pretend that this was the actual buffer param, since some of the
* calculations assume that d->ptr/d->buf is relative to this. */
d->buf_param = buf;
if (!buf) {
/* NULL buf is ok if its entire span is covered by the "skip" above, but
* by this point we know that "skip" doesn't cover the buffer. */
seterr(d, "Passed NULL buffer over non-skippable region.");
return upb_pbdecoder_suspend(d);
}
if (d->residual_end > d->residual) {
/* We have residual bytes from the last buffer. */
assert(d->ptr == d->residual);
@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
d->checkpoint = d->ptr;
/* Handle skips that don't cover the whole buffer (as above). */
if (d->skip) {
size_t skip_bytes = d->skip;
d->skip = 0;
CHECK_RETURN(skip(d, skip_bytes));
d->checkpoint = d->ptr;
}
if (!buf) {
/* NULL buf is ok if its entire span is covered by the "skip" above, but
* by this point we know that "skip" doesn't cover the buffer. */
seterr(d, "Passed NULL buffer over non-skippable region.");
return upb_pbdecoder_suspend(d);
checkpoint(d);
}
/* If we're inside an unknown group, continue to parse unknown values. */
if (d->top->groupnum < 0) {
CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
d->checkpoint = d->ptr;
checkpoint(d);
}
return DECODE_OK;
@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
d->ptr = d->residual;
return 0;
} else {
size_t consumed;
size_t ret = d->size_param - (d->end - d->checkpoint);
assert(!in_residual_buf(d, d->checkpoint));
assert(d->buf == d->buf_param);
assert(d->buf == d->buf_param || d->buf == &dummy_char);
consumed = d->checkpoint - d->buf;
d->bufstart_ofs += consumed;
d->bufstart_ofs += (d->checkpoint - d->buf);
d->residual_end = d->residual;
switchtobuf(d, d->residual, d->residual_end);
return consumed;
return ret;
}
}
@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
int bitpos;
*u64 = 0;
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
int32_t ret = getbytes(d, &byte, 1);
if (ret >= 0) return ret;
CHECK_RETURN(getbytes(d, &byte, 1));
*u64 |= (uint64_t)(byte & 0x7F) << bitpos;
}
if(bitpos == 70 && (byte & 0x80)) {

@ -36,6 +36,13 @@ UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts)
UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted,
upb_pbdecodermethod, upb_refcounted)
/* The maximum number of bytes we are required to buffer internally between
* calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte
* varint, less one because we are buffering an incomplete value.
*
* Should only be used by unit tests. */
#define UPB_DECODER_MAX_RESIDUAL_BYTES 14
#ifdef __cplusplus
/* The parameters one uses to construct a DecoderMethod.

@ -218,11 +218,8 @@ struct upb_pbdecoder {
/* Overall stream offset of "buf." */
uint64_t bufstart_ofs;
/* Buffer for residual bytes not parsed from the previous buffer.
* The maximum number of residual bytes we require is 12; a five-byte
* unknown tag plus an eight-byte value, less one because the value
* is only a partial value. */
char residual[12];
/* Buffer for residual bytes not parsed from the previous buffer. */
char residual[UPB_DECODER_MAX_RESIDUAL_BYTES];
char *residual_end;
/* Bytes of data that should be discarded from the input beore we start

Loading…
Cancel
Save