Fixes to decoder, stdio, textprinter.

pull/13171/head
Joshua Haberman 14 years ago
parent 93099cccd1
commit 02a8cdfff2
  1. 6
      core/upb_stream_vtbl.h
  2. 68
      stream/upb_decoder.c
  3. 38
      stream/upb_stdio.c
  4. 4
      stream/upb_textprinter.c
  5. 10
      tests/test_decoder.c

@ -34,10 +34,10 @@ typedef bool (*upb_bytesrc_getstr_fptr)(
// upb_bytesink. // upb_bytesink.
typedef upb_strlen_t (*upb_bytesink_write_fptr)( typedef upb_strlen_t (*upb_bytesink_write_fptr)(
upb_bytesink *bytesink, void *buf, upb_strlen_t count); upb_bytesink *bytesink, void *buf, upb_strlen_t count);
typedef bool (*upb_bytesink_putstr_fptr)( typedef upb_strlen_t (*upb_bytesink_putstr_fptr)(
upb_bytesink *bytesink, upb_string *str, upb_status *status); upb_bytesink *bytesink, upb_string *str, upb_status *status);
typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)( typedef upb_strlen_t (*upb_bytesink_vprintf_fptr)(
upb_status *status, const char *fmt, va_list args); upb_bytesink *bytesink, upb_status *status, const char *fmt, va_list args);
// Vtables for the above interfaces. // Vtables for the above interfaces.
typedef struct { typedef struct {
@ -153,7 +153,7 @@ INLINE upb_status *upb_bytesink_status(upb_bytesink *sink) {
INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) { INLINE upb_strlen_t upb_bytesink_printf(upb_bytesink *sink, upb_status *status, const char *fmt, ...) {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
upb_strlen_t ret = sink->vtbl->vprintf(status, fmt, args); upb_strlen_t ret = sink->vtbl->vprintf(sink, status, fmt, args);
va_end(args); va_end(args);
return ret; return ret;
} }

@ -18,22 +18,24 @@
// possibilities for optimization/experimentation here. // possibilities for optimization/experimentation here.
INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val, INLINE bool upb_decode_varint_fast(const char **ptr, uint64_t *val,
upb_status *status) { upb_status *status) {
const char *p = *ptr;
uint32_t low, high = 0; uint32_t low, high = 0;
uint32_t b; uint32_t b;
b = *(*ptr++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done; b = *(p++); low = (b & 0x7f) ; if(!(b & 0x80)) goto done;
b = *(*ptr++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; b = *(p++); low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done;
b = *(*ptr++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; b = *(p++); low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done;
b = *(*ptr++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; b = *(p++); low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done;
b = *(*ptr++); low |= (b & 0x7f) << 28; b = *(p++); low |= (b & 0x7f) << 28;
high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done;
b = *(*ptr++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; b = *(p++); high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done;
b = *(*ptr++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; b = *(p++); high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done;
b = *(*ptr++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; b = *(p++); high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done;
b = *(*ptr++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; b = *(p++); high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done;
upb_seterr(status, UPB_ERROR, "Unterminated varint"); upb_seterr(status, UPB_ERROR, "Unterminated varint");
return false; return false;
done: done:
*ptr = p;
*val = ((uint64_t)high << 32) | low; *val = ((uint64_t)high << 32) | low;
return true; return true;
} }
@ -50,7 +52,7 @@ INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
typedef struct { typedef struct {
upb_msgdef *msgdef; upb_msgdef *msgdef;
upb_fielddef *field; upb_fielddef *field;
ssize_t end_offset; // For groups, 0. size_t end_offset; // For groups, 0.
} upb_decoder_frame; } upb_decoder_frame;
struct upb_decoder { struct upb_decoder {
@ -73,7 +75,7 @@ struct upb_decoder {
upb_string *buf; upb_string *buf;
// The offset within the overall stream represented by the *beginning* of buf. // The offset within the overall stream represented by the *beginning* of buf.
upb_strlen_t buf_stream_offset; size_t buf_stream_offset;
}; };
typedef struct { typedef struct {
@ -98,7 +100,7 @@ static upb_flow_t upb_pop(upb_decoder *d);
// Constant used to signal that the submessage is a group and therefore we // Constant used to signal that the submessage is a group and therefore we
// don't know its end offset. This cannot be the offset of a real submessage // don't know its end offset. This cannot be the offset of a real submessage
// end because it takes at least one byte to begin a submessage. // end because it takes at least one byte to begin a submessage.
#define UPB_GROUP_END_OFFSET -1 #define UPB_GROUP_END_OFFSET 0
#define UPB_MAX_VARINT_ENCODED_SIZE 10 #define UPB_MAX_VARINT_ENCODED_SIZE 10
// Called only from the slow path, this function copies the next "len" bytes // Called only from the slow path, this function copies the next "len" bytes
@ -132,12 +134,12 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted,
} }
// Wait for end-of-submessage or end-of-buffer, whichever comes first. // Wait for end-of-submessage or end-of-buffer, whichever comes first.
ssize_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf); size_t offset_in_buf = s->ptr - upb_string_getrobuf(d->buf);
ssize_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr; size_t buf_remaining = upb_string_getbufend(d->buf) - s->ptr;
ssize_t submsg_remaining = size_t submsg_remaining =
d->top->end_offset - d->buf_stream_offset - offset_in_buf; d->top->end_offset - d->buf_stream_offset - offset_in_buf;
if (d->top->end_offset == UPB_GROUP_END_OFFSET || if (d->top->end_offset == UPB_GROUP_END_OFFSET ||
buf_remaining > submsg_remaining) { buf_remaining < submsg_remaining) {
s->len = buf_remaining; s->len = buf_remaining;
} else { } else {
// Check that non of our subtraction overflowed. // Check that non of our subtraction overflowed.
@ -165,13 +167,16 @@ static bool upb_decode_varint_slow(upb_decoder *d, upb_dstate *s,
upb_seterr(d->status, UPB_ERROR, upb_seterr(d->status, UPB_ERROR,
"Varint was unterminated after 10 bytes.\n"); "Varint was unterminated after 10 bytes.\n");
return false; return false;
} else if (d->status->code == UPB_EOF && bitpos == 0) {
// Regular EOF.
return false;
} else if (d->status->code == UPB_EOF && (byte & 0x80)) { } else if (d->status->code == UPB_EOF && (byte & 0x80)) {
upb_seterr(d->status, UPB_ERROR, upb_seterr(d->status, UPB_ERROR,
"Provided data ended in the middle of a varint.\n"); "Provided data ended in the middle of a varint.\n");
return false; return false;
} else { } else {
// Success. // Success.
upb_value_setint64(val, val64); upb_value_setraw(val, val64);
return true; return true;
} }
} }
@ -210,7 +215,7 @@ INLINE bool upb_decode_varint(upb_decoder *d, upb_dstate *s, upb_value *val) {
const char *p = s->ptr; const char *p = s->ptr;
if (!upb_decode_varint_fast(&p, &val64, d->status)) return false; if (!upb_decode_varint_fast(&p, &val64, d->status)) return false;
upb_dstate_advance(s, p - s->ptr); upb_dstate_advance(s, p - s->ptr);
upb_value_setint64(val, val64); upb_value_setraw(val, val64);
return true; return true;
} else { } else {
return upb_decode_varint_slow(d, s, val); return upb_decode_varint_slow(d, s, val);
@ -245,6 +250,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str,
if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s)) if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen, s))
return false; return false;
} }
upb_value_setstr(val, *str);
return true; return true;
} }
@ -259,7 +265,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) {
} }
static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f, static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f,
upb_strlen_t submsg_len, upb_fieldtype_t type) { upb_value submsg_len, upb_fieldtype_t type) {
d->top->field = f; d->top->field = f;
d->top++; d->top++;
if(d->top >= d->limit) { if(d->top >= d->limit) {
@ -268,7 +274,7 @@ static upb_flow_t upb_push(upb_decoder *d, upb_dstate *s, upb_fielddef *f,
} }
d->top->end_offset = (type == UPB_TYPE(GROUP)) ? d->top->end_offset = (type == UPB_TYPE(GROUP)) ?
UPB_GROUP_END_OFFSET : UPB_GROUP_END_OFFSET :
d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + submsg_len; d->buf_stream_offset + (s->ptr - upb_string_getrobuf(d->buf)) + upb_value_getint32(submsg_len);
d->top->msgdef = upb_downcast_msgdef(f->def); d->top->msgdef = upb_downcast_msgdef(f->def);
return upb_dispatch_startsubmsg(&d->dispatcher, f); return upb_dispatch_startsubmsg(&d->dispatcher, f);
} }
@ -280,6 +286,7 @@ static upb_flow_t upb_pop(upb_decoder *d) {
void upb_decoder_run(upb_src *src, upb_status *status) { void upb_decoder_run(upb_src *src, upb_status *status) {
upb_decoder *d = (upb_decoder*)src; upb_decoder *d = (upb_decoder*)src;
d->status = status;
// We put our dstate on the stack so the compiler knows they can't be changed // We put our dstate on the stack so the compiler knows they can't be changed
// by external code (like when we dispatch a callback). We must be sure not // by external code (like when we dispatch a callback). We must be sure not
// to let its address escape this source file. // to let its address escape this source file.
@ -299,9 +306,14 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
if (!upb_decode_tag(d, &state, &tag)) { if (!upb_decode_tag(d, &state, &tag)) {
if (status->code == UPB_EOF && d->top == d->stack) { if (status->code == UPB_EOF && d->top == d->stack) {
// Normal end-of-file. // Normal end-of-file.
upb_clearerr(status);
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher));
return; return;
} else { } else {
if (status->code == UPB_EOF) {
upb_seterr(status, UPB_ERROR,
"Input ended in the middle of a submessage.");
}
goto err; goto err;
} }
} }
@ -352,7 +364,7 @@ void upb_decoder_run(upb_src *src, upb_status *status) {
switch (f->type) { switch (f->type) {
case UPB_TYPE(MESSAGE): case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP): case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, &state, f, upb_value_getint32(val), f->type)); CHECK_FLOW(upb_push(d, &state, f, val, f->type));
continue; // We have no value to dispatch. continue; // We have no value to dispatch.
case UPB_TYPE(STRING): case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): case UPB_TYPE(BYTES):
@ -397,9 +409,21 @@ upb_decoder *upb_decoder_new(upb_msgdef *msgdef) {
upb_dispatcher_init(&d->dispatcher); upb_dispatcher_init(&d->dispatcher);
d->toplevel_msgdef = msgdef; d->toplevel_msgdef = msgdef;
d->limit = &d->stack[UPB_MAX_NESTING]; d->limit = &d->stack[UPB_MAX_NESTING];
d->buf = NULL;
return d; return d;
} }
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc) {
d->bytesrc = bytesrc;
d->top = &d->stack[0];
d->top->msgdef = d->toplevel_msgdef;
d->top->end_offset = SIZE_MAX; // never want to end top-level message.
upb_string_unref(d->buf);
d->buf = NULL;
}
void upb_decoder_free(upb_decoder *d) { void upb_decoder_free(upb_decoder *d) {
free(d); free(d);
} }
upb_src *upb_decoder_src(upb_decoder *d) { return &d->src; }

@ -23,6 +23,9 @@ void upb_stdio_reset(upb_stdio *stdio, FILE* file) {
stdio->file = file; stdio->file = file;
} }
/* upb_bytesrc methods ********************************************************/
static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf, static upb_strlen_t upb_stdio_read(upb_bytesrc *src, void *buf,
upb_strlen_t count, upb_status *status) { upb_strlen_t count, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)src; upb_stdio *stdio = (upb_stdio*)src;
@ -50,18 +53,27 @@ static bool upb_stdio_getstr(upb_bytesrc *src, upb_string *str,
return true; return true;
} }
int32_t upb_stdio_put(upb_bytesink *sink, upb_string *str) {
/* upb_bytesink methods *******************************************************/
upb_strlen_t upb_stdio_putstr(upb_bytesink *sink, upb_string *str, upb_status *status) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink)); upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_strlen_t len = upb_string_len(str); upb_strlen_t len = upb_string_len(str);
upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file); upb_strlen_t written = fwrite(upb_string_getrobuf(str), 1, len, stdio->file);
if(written < len) { if(written < len) {
// Error or EOF. upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
stdio->bytesink.eof = feof(stdio->file); return -1;
if(ferror(stdio->file)) { }
upb_seterr(&stdio->bytesink.status, UPB_ERROR, return written;
"Error writing to stdio stream."); }
return 0;
} upb_strlen_t upb_stdio_vprintf(upb_bytesink *sink, upb_status *status,
const char *fmt, va_list args) {
upb_stdio *stdio = (upb_stdio*)((char*)sink - offsetof(upb_stdio, bytesink));
upb_strlen_t written = vfprintf(stdio->file, fmt, args);
if (written < 0) {
upb_seterr(status, UPB_ERROR, "Error writing to stdio stream.");
return -1;
} }
return written; return written;
} }
@ -72,13 +84,15 @@ upb_stdio *upb_stdio_new() {
upb_stdio_getstr, upb_stdio_getstr,
}; };
//static upb_bytesink_vtbl bytesink_vtbl = { static upb_bytesink_vtbl bytesink_vtbl = {
// upb_stdio_put NULL,
//}; upb_stdio_putstr,
upb_stdio_vprintf
};
upb_stdio *stdio = malloc(sizeof(*stdio)); upb_stdio *stdio = malloc(sizeof(*stdio));
upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl); upb_bytesrc_init(&stdio->bytesrc, &bytesrc_vtbl);
//upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl); upb_bytesink_init(&stdio->bytesink, &bytesink_vtbl);
return stdio; return stdio;
} }

@ -90,7 +90,7 @@ static upb_flow_t upb_textprinter_value(void *_p, upb_fielddef *f,
case UPB_TYPE(STRING): case UPB_TYPE(STRING):
case UPB_TYPE(BYTES): case UPB_TYPE(BYTES):
// TODO: escaping. // TODO: escaping.
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT(": \""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status)) CHECK(upb_bytesink_putstr(p->bytesink, upb_value_getstr(val), &p->status))
CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status)); CHECK(upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\""), &p->status));
break; break;
@ -107,7 +107,7 @@ static upb_flow_t upb_textprinter_startsubmsg(void *_p, upb_fielddef *f,
upb_textprinter *p = _p; upb_textprinter *p = _p;
upb_textprinter_startfield(p, f); upb_textprinter_startfield(p, f);
p->indent_depth++; p->indent_depth++;
upb_bytesink_putstr(p->bytesink, UPB_STRLIT(" {"), &p->status); upb_bytesink_putstr(p->bytesink, UPB_STRLIT("{"), &p->status);
if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status); if(!p->single_line) upb_bytesink_putstr(p->bytesink, UPB_STRLIT("\n"), &p->status);
return UPB_CONTINUE; return UPB_CONTINUE;
} }

@ -16,13 +16,19 @@ int main() {
upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds)); upb_decoder *d = upb_decoder_new(upb_downcast_msgdef(fds));
upb_decoder_reset(d, upb_stdio_bytesrc(in)); upb_decoder_reset(d, upb_stdio_bytesrc(in));
upb_textprinter *p = upb_textprinter_new(); upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(out), false); upb_handlers handlers;
upb_handlers_init(&handlers);
upb_textprinter_reset(p, &handlers, upb_stdio_bytesink(out), false);
upb_src *src = upb_decoder_src(d);
upb_src_sethandlers(src, &handlers);
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_streamdata(upb_decoder_src(d), upb_textprinter_sink(p), &status); upb_src_run(src, &status);
upb_printerr(&status);
assert(upb_ok(&status)); assert(upb_ok(&status));
upb_stdio_free(in); upb_stdio_free(in);
upb_stdio_free(out); upb_stdio_free(out);
upb_decoder_free(d); upb_decoder_free(d);

Loading…
Cancel
Save