Small semantics changes in the decoder.

Simplified some of the semantics around
the decoder's data structures, in anticipation
of sharing them between the regular C decoder
and a JIT-ted decoder.
pull/13171/head
Josh Haberman 14 years ago
parent 484809c272
commit 2c86e7eddb
  1. 59
      src/upb_decoder.c
  2. 16
      src/upb_decoder.h
  3. 5
      src/upb_msg.c
  4. 18
      src/upb_stream.c
  5. 10
      src/upb_stream.h

@ -40,7 +40,7 @@ INLINE void upb_decoder_advance(upb_decoder *d, size_t len) {
INLINE size_t upb_decoder_offset(upb_decoder *d) {
size_t offset = d->buf_stream_offset;
if (d->buf) offset += (d->ptr - upb_string_getrobuf(d->buf));
if (d->buf) offset += (d->ptr - d->buf);
return offset;
}
@ -49,9 +49,9 @@ INLINE size_t upb_decoder_bufleft(upb_decoder *d) {
}
INLINE void upb_dstate_setmsgend(upb_decoder *d) {
size_t end_offset = d->dispatcher.top->end_offset;
d->submsg_end = (end_offset == UPB_GROUP_END_OFFSET) ? (void*)UINTPTR_MAX :
d->ptr + (end_offset - upb_decoder_offset(d));
uint32_t end_offset = d->dispatcher.top->end_offset;
d->submsg_end = (end_offset == UINT32_MAX) ?
(void*)UINTPTR_MAX : d->buf + end_offset;
}
// Called only from the slow path, this function copies the next "len" bytes
@ -68,11 +68,21 @@ static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) {
}
// Get next buffer.
if (d->buf) d->buf_stream_offset += upb_string_len(d->buf);
upb_string_recycle(&d->buf);
if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false;
d->ptr = upb_string_getrobuf(d->buf);
d->end = d->ptr + upb_string_len(d->buf);
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1;
upb_string_recycle(&d->bufstr);
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) {
d->buf = NULL;
return false;
}
if (last_buf_len != -1) {
d->buf_stream_offset += last_buf_len;
for (upb_dispatcher_frame *f = d->dispatcher.stack; f <= d->dispatcher.top; ++f)
if (f->end_offset != UINT32_MAX)
f->end_offset -= last_buf_len;
}
d->buf = upb_string_getrobuf(d->bufstr);
d->ptr = upb_string_getrobuf(d->bufstr);
d->end = d->buf + upb_string_len(d->bufstr);
}
}
@ -165,7 +175,7 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val,
uint32_t strlen = upb_value_getint32(*val);
if (upb_decoder_bufleft(d) >= strlen) {
// Common (fast) case.
upb_string_substr(*str, d->buf, d->ptr - upb_string_getrobuf(d->buf), strlen);
upb_string_substr(*str, d->bufstr, d->ptr - d->buf, strlen);
upb_decoder_advance(d, strlen);
} else {
if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen))
@ -196,17 +206,14 @@ static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) {
fprintf(stderr, "upb_decoder: Can't skip groups yet.\n");
abort();
}
upb_decoder_advance(d, d->dispatcher.top->end_offset - d->buf_stream_offset -
(d->ptr - upb_string_getrobuf(d->buf)));
upb_decoder_advance(d, d->dispatcher.top->end_offset - (d->ptr - d->buf));
upb_pop(d);
return UPB_CONTINUE;
}
static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f,
upb_value submsg_len) {
upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f,
(f->type == UPB_TYPE(GROUP)) ? UPB_GROUP_END_OFFSET :
upb_decoder_offset(d) + upb_value_getint32(submsg_len));
uint32_t end_offset) {
upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, end_offset);
upb_dstate_setmsgend(d);
return flow;
}
@ -222,7 +229,7 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
}
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; }
if (upb_dispatch_startmsg(&d->dispatcher, d->closure) != UPB_CONTINUE) goto err;
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err;
// Main loop: executed once per tag/field pair.
while(1) {
@ -272,7 +279,8 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
case UPB_WIRE_TYPE_START_GROUP:
break; // Nothing to do now, below we will push appropriately.
case UPB_WIRE_TYPE_END_GROUP:
if(d->dispatcher.top->end_offset != UPB_GROUP_END_OFFSET) {
// Strictly speaking we should also check the field number here.
if(d->dispatcher.top->f->type != UPB_TYPE(GROUP)) {
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag.");
goto err;
}
@ -311,9 +319,11 @@ void upb_decoder_decode(upb_decoder *d, upb_status *status) {
// If this is not true we are losing data. But the main protobuf library
// doesn't check this, and it would slow us down, so pass for now.
switch (f->type) {
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP):
CHECK_FLOW(upb_push(d, f, val));
CHECK_FLOW(upb_push(d, f, UINT32_MAX));
continue; // We have no value to dispatch.
case UPB_TYPE(MESSAGE):
CHECK_FLOW(upb_push(d, f, upb_value_getuint32(val) + (d->ptr - d->buf)));
continue; // We have no value to dispatch.
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
@ -343,15 +353,16 @@ err:
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(&d->dispatcher, handlers, UPB_GROUP_END_OFFSET);
upb_dispatcher_init(&d->dispatcher, handlers);
d->bufstr = NULL;
d->buf = NULL;
d->tmp = NULL;
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
upb_dispatcher_reset(&d->dispatcher, closure, UINT32_MAX);
d->bytesrc = bytesrc;
d->closure = closure;
upb_dispatcher_reset(&d->dispatcher);
d->buf = NULL;
d->ptr = NULL;
d->end = NULL; // Force a buffer pull.
d->submsg_end = (void*)0x1; // But don't let end-of-message get triggered.
@ -360,6 +371,6 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
void upb_decoder_uninit(upb_decoder *d) {
upb_dispatcher_uninit(&d->dispatcher);
upb_string_unref(d->buf);
upb_string_unref(d->bufstr);
upb_string_unref(d->tmp);
}

@ -34,8 +34,8 @@ struct _upb_decoder {
// Dispatcher to which we push parsed data.
upb_dispatcher dispatcher;
// Current input buffer.
upb_string *buf;
// String to hold our input buffer; is only active if d->buf != NULL.
upb_string *bufstr;
// Temporary string for passing string data to callbacks.
upb_string *tmp;
@ -43,18 +43,20 @@ struct _upb_decoder {
// The offset within the overall stream represented by the *beginning* of buf.
size_t buf_stream_offset;
// Our current position in the data buffer.
const char *ptr;
// Pointer to the beginning of our current data buffer, or NULL if none.
const char *buf;
// End of this buffer, relative to *ptr.
const char *end;
// Members which may also be written by the JIT:
// Our current position in the data buffer.
const char *ptr;
// End of this submessage, relative to *ptr.
const char *submsg_end;
// The closure that was passed by the caller for the top-level message.
void *closure;
// Where we will store any errors that occur.
upb_status *status;
};

@ -233,9 +233,10 @@ static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
void *closure, upb_status *status) {
upb_dispatcher d;
upb_dispatcher_init(&d, h, 0);
upb_dispatcher_init(&d, h);
upb_dispatcher_reset(&d, closure, 0);
upb_dispatch_startmsg(&d, closure);
upb_dispatch_startmsg(&d);
upb_msg_dispatch(msg, md, &d);
upb_dispatch_endmsg(&d, status);

@ -222,7 +222,8 @@ void upb_handlers_pop(upb_handlers *h, upb_fielddef *f) {
/* upb_dispatcher *************************************************************/
static upb_handlers_fieldent toplevel_f = {
false, 0, 0, // The one value that is actually read
false, UPB_TYPE(GROUP),
0, // msgent_index
#ifdef NDEBUG
{{0}},
#else
@ -230,17 +231,15 @@ static upb_handlers_fieldent toplevel_f = {
#endif
{NULL}, NULL};
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
size_t top_end_offset) {
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h) {
d->handlers = h;
for (int i = 0; i < h->msgs_len; i++)
upb_inttable_compact(&h->msgs[i].fieldtab);
d->stack[0].end_offset = top_end_offset;
d->stack[0].f = &toplevel_f;
upb_status_init(&d->status);
}
void upb_dispatcher_reset(upb_dispatcher *d) {
void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset) {
d->msgent = &d->handlers->msgs[0];
d->dispatch_table = &d->msgent->fieldtab;
d->current_depth = 0;
@ -248,6 +247,8 @@ void upb_dispatcher_reset(upb_dispatcher *d) {
d->noframe_depth = INT_MAX;
d->delegated_depth = 0;
d->top = d->stack;
d->top->closure = top_closure;
d->top->end_offset = top_end_offset;
d->limit = &d->stack[UPB_MAX_NESTING];
}
@ -261,9 +262,8 @@ void upb_dispatcher_break(upb_dispatcher *d) {
d->noframe_depth = d->current_depth;
}
upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure) {
d->top->closure = closure;
upb_flow_t flow = d->msgent->startmsg(closure);
upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d) {
upb_flow_t flow = d->msgent->startmsg(d->top->closure);
if (flow != UPB_CONTINUE) {
d->noframe_depth = d->current_depth + 1;
d->skip_depth = (flow == UPB_BREAK) ? d->delegated_depth : d->current_depth;
@ -304,7 +304,7 @@ upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d,
d->top->closure = sflow.closure;
d->msgent = upb_handlers_getmsgent(d->handlers, f);
d->dispatch_table = &d->msgent->fieldtab;
return upb_dispatch_startmsg(d, d->top->closure);
return upb_dispatch_startmsg(d);
}
upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d) {

@ -280,7 +280,9 @@ upb_handlers_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_fie
typedef struct {
upb_handlers_fieldent *f;
void *closure;
size_t end_offset; // For groups, 0.
// Relative to the beginning of this buffer.
// For groups and the top-level: UINT32_MAX.
uint32_t end_offset;
} upb_dispatcher_frame;
typedef struct {
@ -322,11 +324,11 @@ INLINE bool upb_dispatcher_noframe(upb_dispatcher *d) {
typedef upb_handlers_fieldent upb_dispatcher_field;
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, size_t top_end_offset);
void upb_dispatcher_reset(upb_dispatcher *d);
void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h);
void upb_dispatcher_reset(upb_dispatcher *d, void *top_closure, uint32_t top_end_offset);
void upb_dispatcher_uninit(upb_dispatcher *d);
upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d, void *closure);
upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d);
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status);
// Looks up a field by number for the current message.

Loading…
Cancel
Save