Add packed field support (untested).

pull/13171/head
Joshua Haberman 13 years ago
parent c03802b0f1
commit 48fedab345
  1. 1
      tests/tests.c
  2. 13
      upb/bytestream.c
  3. 10
      upb/handlers.c
  4. 9
      upb/handlers.h
  5. 65
      upb/pb/decoder.c
  6. 3
      upb/pb/decoder.h

@ -19,6 +19,7 @@ static upb_symtab *load_test_proto() {
} }
upb_status status = UPB_STATUS_INIT; upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(s, descriptor, len, &status); upb_read_descriptor(s, descriptor, len, &status);
upb_status_print(&status, stderr);
ASSERT(upb_ok(&status)); ASSERT(upb_ok(&status));
upb_status_uninit(&status); upb_status_uninit(&status);
free(descriptor); free(descriptor);

@ -66,12 +66,12 @@ size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) {
size_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file); size_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file);
if(read < (size_t)BUF_SIZE) { if(read < (size_t)BUF_SIZE) {
// Error or EOF. // Error or EOF.
if(feof(stdio->file)) { if(feof(stdio->file)) return 0;
upb_status_setf(s, UPB_EOF, ""); if(ferror(stdio->file)) {
} else if(ferror(stdio->file)) {
upb_status_fromerrno(s); upb_status_fromerrno(s);
return 0; return -1;
} }
assert(false);
} }
buf->len = read; buf->len = read;
return buf->ofs + buf->len; return buf->ofs + buf->len;
@ -190,9 +190,8 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; }
size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) {
upb_stringsrc *src = _src; upb_stringsrc *src = _src;
size_t bytes = src->len - ofs; (void)s; // No errors can occur.
if (bytes == 0) s->code = UPB_EOF; return src->len - ofs;
return bytes;
} }
void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) { void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) {

@ -182,6 +182,7 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
d->skip = skip; d->skip = skip;
d->exit = exit; d->exit = exit;
d->srcclosure = srcclosure; d->srcclosure = srcclosure;
d->top_is_implicit = false;
upb_status_init(&d->status); upb_status_init(&d->status);
} }
@ -191,6 +192,7 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) {
d->top = d->stack; d->top = d->stack;
d->top->closure = closure; d->top->closure = closure;
d->top->is_sequence = false; d->top->is_sequence = false;
d->top->is_packed = false;
return d->top; return d->top;
} }
@ -240,6 +242,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d,
++d->top; ++d->top;
d->top->f = f; d->top->f = f;
d->top->is_sequence = true; d->top->is_sequence = true;
d->top->is_packed = false;
d->top->closure = sflow.closure; d->top->closure = sflow.closure;
return d->top; return d->top;
} }
@ -283,6 +286,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d,
++d->top; ++d->top;
d->top->f = f; d->top->f = f;
d->top->is_sequence = false; d->top->is_sequence = false;
d->top->is_packed = false;
d->top->closure = sflow.closure; d->top->closure = sflow.closure;
d->msgent = f->submsg; d->msgent = f->submsg;
d->dispatch_table = &d->msgent->fieldtab; d->dispatch_table = &d->msgent->fieldtab;
@ -309,6 +313,12 @@ upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) {
bool upb_dispatcher_stackempty(upb_dispatcher *d) { bool upb_dispatcher_stackempty(upb_dispatcher *d) {
return d->top == d->stack; return d->top == d->stack;
} }
bool upb_dispatcher_islegalend(upb_dispatcher *d) {
if (d->top == d->stack) return true;
if (d->top - 1 == d->stack &&
d->top->is_sequence && !d->top->is_packed) return true;
return false;
}
void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) { void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) {
upb_dispatcher_frame *frame = d->top; upb_dispatcher_frame *frame = d->top;

@ -303,6 +303,9 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, upb_msgdef *m,
/* upb_dispatcher *************************************************************/ /* upb_dispatcher *************************************************************/
// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface
// between it and upb_decoder is somewhat tightly coupled and may change.
//
// upb_dispatcher can be used by sources of data to invoke the appropriate // upb_dispatcher can be used by sources of data to invoke the appropriate
// handlers on a upb_handlers object. Besides maintaining the runtime stack of // handlers on a upb_handlers object. Besides maintaining the runtime stack of
// closures and handlers, the dispatcher checks the return status of user // closures and handlers, the dispatcher checks the return status of user
@ -340,6 +343,7 @@ typedef struct {
upb_skip_handler *skip; upb_skip_handler *skip;
upb_exit_handler *exit; upb_exit_handler *exit;
void *srcclosure; void *srcclosure;
bool top_is_implicit;
// Stack. // Stack.
upb_status status; upb_status status;
@ -352,8 +356,9 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h,
upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure); upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure);
void upb_dispatcher_uninit(upb_dispatcher *d); void upb_dispatcher_uninit(upb_dispatcher *d);
// Tests whether the runtime stack is in the base level message. // Tests whether the message could legally end here (either the stack is empty
bool upb_dispatcher_stackempty(upb_dispatcher *d); // or the only open stack frame is implicit).
bool upb_dispatcher_islegalend(upb_dispatcher *d);
// Looks up a field by number for the current message. // Looks up a field by number for the current message.
INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) { INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) {

@ -68,18 +68,20 @@ static void upb_decoder_setmsgend(upb_decoder *d) {
size_t buflen = d->end - d->buf; size_t buflen = d->end - d->buf;
d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ?
d->buf + delimlen : NULL; // NULL if not in this buf. d->buf + delimlen : NULL; // NULL if not in this buf.
d->top_is_packed = f->is_packed;
} }
static bool upb_trypullbuf(upb_decoder *d) { static bool upb_trypullbuf(upb_decoder *d) {
assert(upb_decoder_bufleft(d) == 0); assert(upb_decoder_bufleft(d) == 0);
if (d->bufend_ofs == d->refend_ofs) { if (d->bufend_ofs == d->refend_ofs) {
d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); size_t read = upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status);
if (!upb_ok(d->status)) { if (read <= 0) {
d->ptr = NULL; d->ptr = NULL;
d->end = NULL; d->end = NULL;
if (upb_iseof(d->status)) return false; if (read == 0) return false; // EOF
upb_decoder_exit(d); // Non-EOF error. upb_decoder_exit(d); // Non-EOF error.
} }
d->refend_ofs += read;
} }
d->bufstart_ofs = d->bufend_ofs; d->bufstart_ofs = d->bufend_ofs;
size_t len; size_t len;
@ -150,11 +152,10 @@ done:
} }
FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) {
if (upb_decoder_bufleft(d) == 0) { if (upb_decoder_bufleft(d) == 0 && upb_dispatcher_islegalend(&d->dispatcher)) {
// Check for our two successful end-of-message conditions // Check for our two successful end-of-message conditions
// (user-specified EOM and bytesrc EOF). // (user-specified EOM and bytesrc EOF).
if (d->bufend_ofs == d->end_ofs) return false; if (d->bufend_ofs == d->end_ofs || !upb_trypullbuf(d)) return false;
if (!upb_trypullbuf(d)) return false;
} }
*val = upb_decode_varint32(d); *val = upb_decode_varint32(d);
return true; return true;
@ -195,6 +196,7 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) {
memcpy(buf + read, d->ptr, avail); memcpy(buf + read, d->ptr, avail);
upb_decoder_advance(d, avail); upb_decoder_advance(d, avail);
read += avail; read += avail;
upb_pullbuf(d);
} }
} }
} }
@ -232,7 +234,7 @@ INLINE upb_strref *upb_decode_string(upb_decoder *d) {
return &d->strref; return &d->strref;
} }
INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) {
upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end;
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
@ -281,7 +283,7 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) {
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) {
upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf)); upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d));
} }
@ -315,6 +317,7 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
while (1) { while (1) {
uint32_t tag; uint32_t tag;
if (!upb_trydecode_varint32(d, &tag)) return NULL; if (!upb_trydecode_varint32(d, &tag)) return NULL;
uint8_t wire_type = tag & 0x7;
upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag);
// There are no explicit "startseq" or "endseq" markers in protobuf // There are no explicit "startseq" or "endseq" markers in protobuf
@ -325,17 +328,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
if (f && f->repeated && d->dispatcher.top->f != f) { if (f && f->repeated && d->dispatcher.top->f != f) {
// TODO: support packed. uint64_t old_end = d->dispatcher.top->end_ofs;
assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f);
(tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); if (wire_type != UPB_WIRE_TYPE_DELIMITED ||
uint32_t end = d->dispatcher.top->end_ofs; upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) {
upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end; // Non-packed field -- this tag pertains to only a single message.
fr->end_ofs = old_end;
} else {
// Packed primitive field.
fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d);
fr->is_packed = true;
}
upb_decoder_setmsgend(d); upb_decoder_setmsgend(d);
} }
if (f) return f; if (f) return f;
// Unknown field. // Unknown field.
switch (tag & 0x7) { switch (wire_type) {
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break;
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break;
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break;
@ -350,34 +360,23 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) {
} }
} }
void upb_decoder_onexit(upb_decoder *d) {
if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher);
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) {
// Normal end-of-file.
upb_status_clear(d->status);
upb_dispatch_endmsg(&d->dispatcher, d->status);
} else {
if (d->status->code == UPB_EOF)
upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage.");
}
}
void upb_decoder_decode(upb_decoder *d, upb_status *status) { void upb_decoder_decode(upb_decoder *d, upb_status *status) {
if (sigsetjmp(d->exitjmp, 0)) { if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; }
upb_decoder_onexit(d);
return;
}
d->status = status; d->status = status;
upb_dispatch_startmsg(&d->dispatcher); upb_dispatch_startmsg(&d->dispatcher);
// Prime the buf so we can hit the JIT immediately. // Prime the buf so we can hit the JIT immediately.
upb_trypullbuf(d); upb_trypullbuf(d);
upb_fhandlers *f = d->dispatcher.top->f;
while(1) { // Main loop: executed once per tag/field pair. while(1) { // Main loop: executed once per tag/field pair.
upb_decoder_checkdelim(d); upb_decoder_checkdelim(d);
upb_decoder_enterjit(d); upb_decoder_enterjit(d);
// if (!d->dispatcher.top->is_packed) if (!d->top_is_packed) f = upb_decode_tag(d);
upb_fhandlers *f = upb_decode_tag(d);
if (!f) { if (!f) {
upb_decoder_onexit(d); // Sucessful EOF. We may need to dispatch a top-level implicit frame.
if (d->dispatcher.top == d->dispatcher.stack + 1) {
assert(d->dispatcher.top->is_sequence);
upb_dispatch_endseq(&d->dispatcher);
}
return; return;
} }
f->decode(d, f); f->decode(d, f);

@ -52,6 +52,7 @@ typedef struct _upb_decoder {
// End of the delimited region, relative to ptr, or NULL if not in this buf. // End of the delimited region, relative to ptr, or NULL if not in this buf.
const char *delim_end; const char *delim_end;
bool top_is_packed;
#ifdef UPB_USE_JIT_X64 #ifdef UPB_USE_JIT_X64
// For JIT, which doesn't do bounds checks in the middle of parsing a field. // For JIT, which doesn't do bounds checks in the middle of parsing a field.
@ -70,7 +71,7 @@ typedef struct _upb_decoder {
// Used for frames that have no specific end offset: groups, repeated primitive // Used for frames that have no specific end offset: groups, repeated primitive
// fields inside groups, and the top-level message. // fields inside groups, and the top-level message.
#define UPB_NONDELIMITED UINT32_MAX #define UPB_NONDELIMITED UINT64_MAX
// Initializes/uninitializes a decoder for calling into the given handlers // Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref // or to write into the given msgdef, given its accessors). Takes a ref

Loading…
Cancel
Save