Add packed field support (untested).

pull/13171/head
Joshua Haberman 13 years ago
parent c03802b0f1
commit 48fedab345
  1. 1
      tests/tests.c
  2. 13
      upb/bytestream.c
  3. 10
      upb/handlers.c
  4. 9
      upb/handlers.h
  5. 65
      upb/pb/decoder.c
  6. 3
      upb/pb/decoder.h

@ -19,6 +19,7 @@ static upb_symtab *load_test_proto() {
}
upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(s, descriptor, len, &status);
upb_status_print(&status, stderr);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
free(descriptor);

@ -66,12 +66,12 @@ size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
size_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
if(read < (size_t)BUF_SIZE) {
// Error or EOF.
if(feof(stdio->file)) {
upb_status_setf(s, UPB_EOF, "");
} else if(ferror(stdio->file)) {
if(feof(stdio->file)) return 0;
if(ferror(stdio->file)) {
upb_status_fromerrno(s);
return 0;
return -1;
}
assert(false);
}
buf->len = read;
return buf->ofs + buf->len;
@ -190,9 +190,8 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
upb_stringsrc *src = _src;
size_t bytes = src->len - ofs;
if (bytes == 0) s->code = UPB_EOF;
return bytes;
(void)s; // No errors can occur.
return src->len - ofs;
}
void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {

@ -182,6 +182,7 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
d->skip = skip;
d->exit = exit;
d->srcclosure = srcclosure;
d->top_is_implicit = false;
upb_status_init(&d->status);
}
@ -191,6 +192,7 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
d->top = d->stack;
d->top->closure = closure;
d->top->is_sequence = false;
d->top->is_packed = false;
return d->top;
}
@ -240,6 +242,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
++d->top;
d->top->f = f;
d->top->is_sequence = true;
d->top->is_packed = false;
d->top->closure = sflow.closure;
return d->top;
}
@ -283,6 +286,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
++d->top;
d->top->f = f;
d->top->is_sequence = false;
d->top->is_packed = false;
d->top->closure = sflow.closure;
d->msgent = f->submsg;
d->dispatch_table = &d->msgent->fieldtab;
@ -309,6 +313,12 @@ upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
bool upb_dispatcher_stackempty(upb_dispatcher *d) {
return d->top == d->stack;
}
bool upb_dispatcher_islegalend(upb_dispatcher *d) {
if (d->top == d->stack) return true;
if (d->top - 1 == d->stack &&
d->top->is_sequence && !d->top->is_packed) return true;
return false;
}
void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
upb_dispatcher_frame *frame = d->top;

@ -303,6 +303,9 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, upb_msgdef *m,
/* upb_dispatcher *************************************************************/
// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface
// between it and upb_decoder is somewhat tightly coupled and may change.
//
// upb_dispatcher can be used by sources of data to invoke the appropriate
// handlers on a upb_handlers object. Besides maintaining the runtime stack of
// closures and handlers, the dispatcher checks the return status of user
@ -340,6 +343,7 @@ typedef struct {
upb_skip_handler *skip;
upb_exit_handler *exit;
void *srcclosure;
bool top_is_implicit;
// Stack.
upb_status status;
@ -352,8 +356,9 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
void upb_dispatcher_uninit(upb_dispatcher *d);
// Tests whether the runtime stack is in the base level message.
bool upb_dispatcher_stackempty(upb_dispatcher *d);
// Tests whether the message could legally end here (either the stack is empty
// or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);
// Looks up a field by number for the current message.
INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {

@ -68,18 +68,20 @@ static void upb_decoder_setmsgend(upb_decoder *d) {
size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
}
static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0);
if (d->bufend_ofs == d->refend_ofs) {
d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
if (!upb_ok(d->status)) {
size_t read = upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
if (read <= 0) {
d->ptr = NULL;
d->end = NULL;
if (upb_iseof(d->status)) return false;
if (read == 0) return false; // EOF
upb_decoder_exit(d); // Non-EOF error.
}
d->refend_ofs += read;
}
d->bufstart_ofs = d->bufend_ofs;
size_t len;
@ -150,11 +152,10 @@ done:
}
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0) {
if (upb_decoder_bufleft(d) == 0 && upb_dispatcher_islegalend(&d->dispatcher)) {
// Check for our two successful end-of-message conditions
// (user-specified EOM and bytesrc EOF).
if (d->bufend_ofs == d->end_ofs) return false;
if (!upb_trypullbuf(d)) return false;
if (d->bufend_ofs == d->end_ofs || !upb_trypullbuf(d)) return false;
}
*val = upb_decode_varint32(d);
return true;
@ -195,6 +196,7 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail);
read += avail;
upb_pullbuf(d);
}
}
}
@ -232,7 +234,7 @@ INLINE upb_strref *upb_decode_string(upb_decoder *d) {
return &d->strref;
}
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) {
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d);
}
@ -281,7 +283,7 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d);
}
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf));
upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
}
@ -315,6 +317,7 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) {
uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf
@ -325,17 +328,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
upb_decoder_setmsgend(d);
}
if (f && f->repeated && d->dispatcher.top->f != f) {
// TODO: support packed.
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) ||
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED);
uint32_t end = d->dispatcher.top->end_ofs;
upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end;
uint64_t old_end = d->dispatcher.top->end_ofs;
upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
// Non-packed field -- this tag pertains to only a single message.
fr->end_ofs = old_end;
} else {
// Packed primitive field.
fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
fr->is_packed = true;
}
upb_decoder_setmsgend(d);
}
if (f) return f;
// Unknown field.
switch (tag & 0x7) {
switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
@ -350,34 +360,23 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
}
}
void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
void upb_decoder_decode(upb_decoder *d, upb_status *status) {
if (sigsetjmp(d->exitjmp, 0)) {
upb_decoder_onexit(d);
return;
}
if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
d->status = status;
upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
while(1) { // Main loop: executed once per tag/field pair.
upb_decoder_checkdelim(d);
upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed)
upb_fhandlers *f = upb_decode_tag(d);
if (!d->top_is_packed) f = upb_decode_tag(d);
if (!f) {
upb_decoder_onexit(d);
// Sucessful EOF. We may need to dispatch a top-level implicit frame.
if (d->dispatcher.top == d->dispatcher.stack + 1) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
return;
}
f->decode(d, f);

@ -52,6 +52,7 @@ typedef struct _upb_decoder {
// End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end;
bool top_is_packed;
#ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field.
@ -70,7 +71,7 @@ typedef struct _upb_decoder {
// Used for frames that have no specific end offset: groups, repeated primitive
// fields inside groups, and the top-level message.
#define UPB_NONDELIMITED UINT32_MAX
#define UPB_NONDELIMITED UINT64_MAX
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref

Loading…
Cancel
Save