From 48fedab345674000dd2f8dd4d8356ee995d9263e Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 27 Aug 2011 18:44:37 -0700 Subject: [PATCH] Add packed field support (untested). --- tests/tests.c | 1 + upb/bytestream.c | 13 +++++----- upb/handlers.c | 10 ++++++++ upb/handlers.h | 9 +++++-- upb/pb/decoder.c | 65 ++++++++++++++++++++++++------------------------ upb/pb/decoder.h | 3 ++- 6 files changed, 58 insertions(+), 43 deletions(-) diff --git a/tests/tests.c b/tests/tests.c index fc8e11d476..7941c2f79c 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -19,6 +19,7 @@ static upb_symtab *load_test_proto() { } upb_status status = UPB_STATUS_INIT; upb_read_descriptor(s, descriptor, len, &status); + upb_status_print(&status, stderr); ASSERT(upb_ok(&status)); upb_status_uninit(&status); free(descriptor); diff --git a/upb/bytestream.c b/upb/bytestream.c index 976b139aee..dc2fa610e1 100644 --- a/upb/bytestream.c +++ b/upb/bytestream.c @@ -66,12 +66,12 @@ size_t upb_stdio_fetch(void *src, uint64_t ofs, upb_status *s) { size_t read = fread(&buf->data, 1, BUF_SIZE, stdio->file); if(read < (size_t)BUF_SIZE) { // Error or EOF. - if(feof(stdio->file)) { - upb_status_setf(s, UPB_EOF, ""); - } else if(ferror(stdio->file)) { + if(feof(stdio->file)) return 0; + if(ferror(stdio->file)) { upb_status_fromerrno(s); - return 0; + return -1; } + assert(false); } buf->len = read; return buf->ofs + buf->len; @@ -190,9 +190,8 @@ upb_bytesink* upb_stdio_bytesink(upb_stdio *stdio) { return &stdio->sink; } size_t upb_stringsrc_fetch(void *_src, uint64_t ofs, upb_status *s) { upb_stringsrc *src = _src; - size_t bytes = src->len - ofs; - if (bytes == 0) s->code = UPB_EOF; - return bytes; + (void)s; // No errors can occur. + return src->len - ofs; } void upb_stringsrc_read(void *_src, uint64_t src_ofs, size_t len, char *dst) { diff --git a/upb/handlers.c b/upb/handlers.c index b6d02442f2..28b72d5806 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -182,6 +182,7 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, d->skip = skip; d->exit = exit; d->srcclosure = srcclosure; + d->top_is_implicit = false; upb_status_init(&d->status); } @@ -191,6 +192,7 @@ upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *closure) { d->top = d->stack; d->top->closure = closure; d->top->is_sequence = false; + d->top->is_packed = false; return d->top; } @@ -240,6 +242,7 @@ upb_dispatcher_frame *upb_dispatch_startseq(upb_dispatcher *d, ++d->top; d->top->f = f; d->top->is_sequence = true; + d->top->is_packed = false; d->top->closure = sflow.closure; return d->top; } @@ -283,6 +286,7 @@ upb_dispatcher_frame *upb_dispatch_startsubmsg(upb_dispatcher *d, ++d->top; d->top->f = f; d->top->is_sequence = false; + d->top->is_packed = false; d->top->closure = sflow.closure; d->msgent = f->submsg; d->dispatch_table = &d->msgent->fieldtab; @@ -309,6 +313,12 @@ upb_dispatcher_frame *upb_dispatch_endsubmsg(upb_dispatcher *d) { bool upb_dispatcher_stackempty(upb_dispatcher *d) { return d->top == d->stack; } +bool upb_dispatcher_islegalend(upb_dispatcher *d) { + if (d->top == d->stack) return true; + if (d->top - 1 == d->stack && + d->top->is_sequence && !d->top->is_packed) return true; + return false; +} void _upb_dispatcher_unwind(upb_dispatcher *d, upb_flow_t flow) { upb_dispatcher_frame *frame = d->top; diff --git a/upb/handlers.h b/upb/handlers.h index 3a9509a9a3..2e44318b8f 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -303,6 +303,9 @@ INLINE upb_mhandlers *upb_handlers_reghandlerset(upb_handlers *h, upb_msgdef *m, /* upb_dispatcher *************************************************************/ +// WARNING: upb_dispatcher should be considered INTERNAL-ONLY. The interface +// between it and upb_decoder is somewhat tightly coupled and may change. +// // upb_dispatcher can be used by sources of data to invoke the appropriate // handlers on a upb_handlers object. Besides maintaining the runtime stack of // closures and handlers, the dispatcher checks the return status of user @@ -340,6 +343,7 @@ typedef struct { upb_skip_handler *skip; upb_exit_handler *exit; void *srcclosure; + bool top_is_implicit; // Stack. upb_status status; @@ -352,8 +356,9 @@ void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, upb_dispatcher_frame *upb_dispatcher_reset(upb_dispatcher *d, void *topclosure); void upb_dispatcher_uninit(upb_dispatcher *d); -// Tests whether the runtime stack is in the base level message. -bool upb_dispatcher_stackempty(upb_dispatcher *d); +// Tests whether the message could legally end here (either the stack is empty +// or the only open stack frame is implicit). +bool upb_dispatcher_islegalend(upb_dispatcher *d); // Looks up a field by number for the current message. INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, uint32_t n) { diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 4521ce25a0..5f2b6dcab3 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -68,18 +68,20 @@ static void upb_decoder_setmsgend(upb_decoder *d) { size_t buflen = d->end - d->buf; d->delim_end = (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) ? d->buf + delimlen : NULL; // NULL if not in this buf. + d->top_is_packed = f->is_packed; } static bool upb_trypullbuf(upb_decoder *d) { assert(upb_decoder_bufleft(d) == 0); if (d->bufend_ofs == d->refend_ofs) { - d->refend_ofs += upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); - if (!upb_ok(d->status)) { + size_t read = upb_bytesrc_fetch(d->bytesrc, d->refend_ofs, d->status); + if (read <= 0) { d->ptr = NULL; d->end = NULL; - if (upb_iseof(d->status)) return false; + if (read == 0) return false; // EOF upb_decoder_exit(d); // Non-EOF error. } + d->refend_ofs += read; } d->bufstart_ofs = d->bufend_ofs; size_t len; @@ -150,11 +152,10 @@ done: } FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { - if (upb_decoder_bufleft(d) == 0) { + if (upb_decoder_bufleft(d) == 0 && upb_dispatcher_islegalend(&d->dispatcher)) { // Check for our two successful end-of-message conditions // (user-specified EOM and bytesrc EOF). - if (d->bufend_ofs == d->end_ofs) return false; - if (!upb_trypullbuf(d)) return false; + if (d->bufend_ofs == d->end_ofs || !upb_trypullbuf(d)) return false; } *val = upb_decode_varint32(d); return true; @@ -195,6 +196,7 @@ FORCEINLINE void upb_decode_fixed(upb_decoder *d, char *buf, size_t bytes) { memcpy(buf + read, d->ptr, avail); upb_decoder_advance(d, avail); read += avail; + upb_pullbuf(d); } } } @@ -232,7 +234,7 @@ INLINE upb_strref *upb_decode_string(upb_decoder *d) { return &d->strref; } -INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint32_t end) { +INLINE void upb_push(upb_decoder *d, upb_fhandlers *f, uint64_t end) { upb_dispatch_startsubmsg(&d->dispatcher, f)->end_ofs = end; upb_decoder_setmsgend(d); } @@ -281,7 +283,7 @@ static void upb_endgroup(upb_decoder *d, upb_fhandlers *f) { upb_decoder_setmsgend(d); } static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { - upb_push(d, f, upb_decode_varint32(d) + (d->ptr - d->buf)); + upb_push(d, f, upb_decode_varint32(d) + upb_decoder_offset(d)); } @@ -315,6 +317,7 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { while (1) { uint32_t tag; if (!upb_trydecode_varint32(d, &tag)) return NULL; + uint8_t wire_type = tag & 0x7; upb_fhandlers *f = upb_dispatcher_lookup(&d->dispatcher, tag); // There are no explicit "startseq" or "endseq" markers in protobuf @@ -325,17 +328,24 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { upb_decoder_setmsgend(d); } if (f && f->repeated && d->dispatcher.top->f != f) { - // TODO: support packed. - assert(upb_issubmsgtype(f->type) || upb_isstringtype(f->type) || - (tag & 0x7) != UPB_WIRE_TYPE_DELIMITED); - uint32_t end = d->dispatcher.top->end_ofs; - upb_dispatch_startseq(&d->dispatcher, f)->end_ofs = end; + uint64_t old_end = d->dispatcher.top->end_ofs; + upb_dispatcher_frame *fr = upb_dispatch_startseq(&d->dispatcher, f); + if (wire_type != UPB_WIRE_TYPE_DELIMITED || + upb_issubmsgtype(f->type) || upb_isstringtype(f->type)) { + // Non-packed field -- this tag pertains to only a single message. + fr->end_ofs = old_end; + } else { + // Packed primitive field. + fr->end_ofs = upb_decoder_offset(d) + upb_decode_varint(d); + fr->is_packed = true; + } upb_decoder_setmsgend(d); } + if (f) return f; // Unknown field. - switch (tag & 0x7) { + switch (wire_type) { case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; @@ -350,34 +360,23 @@ INLINE upb_fhandlers *upb_decode_tag(upb_decoder *d) { } } -void upb_decoder_onexit(upb_decoder *d) { - if (d->dispatcher.top->is_sequence) upb_dispatch_endseq(&d->dispatcher); - if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { - // Normal end-of-file. - upb_status_clear(d->status); - upb_dispatch_endmsg(&d->dispatcher, d->status); - } else { - if (d->status->code == UPB_EOF) - upb_status_setf(d->status, UPB_ERROR, "Input ended mid-submessage."); - } -} - void upb_decoder_decode(upb_decoder *d, upb_status *status) { - if (sigsetjmp(d->exitjmp, 0)) { - upb_decoder_onexit(d); - return; - } + if (sigsetjmp(d->exitjmp, 0)) { assert(!upb_ok(status)); return; } d->status = status; upb_dispatch_startmsg(&d->dispatcher); // Prime the buf so we can hit the JIT immediately. upb_trypullbuf(d); + upb_fhandlers *f = d->dispatcher.top->f; while(1) { // Main loop: executed once per tag/field pair. upb_decoder_checkdelim(d); upb_decoder_enterjit(d); - // if (!d->dispatcher.top->is_packed) - upb_fhandlers *f = upb_decode_tag(d); + if (!d->top_is_packed) f = upb_decode_tag(d); if (!f) { - upb_decoder_onexit(d); + // Sucessful EOF. We may need to dispatch a top-level implicit frame. + if (d->dispatcher.top == d->dispatcher.stack + 1) { + assert(d->dispatcher.top->is_sequence); + upb_dispatch_endseq(&d->dispatcher); + } return; } f->decode(d, f); diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 9a20d7664b..921697aacf 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -52,6 +52,7 @@ typedef struct _upb_decoder { // End of the delimited region, relative to ptr, or NULL if not in this buf. const char *delim_end; + bool top_is_packed; #ifdef UPB_USE_JIT_X64 // For JIT, which doesn't do bounds checks in the middle of parsing a field. @@ -70,7 +71,7 @@ typedef struct _upb_decoder { // Used for frames that have no specific end offset: groups, repeated primitive // fields inside groups, and the top-level message. -#define UPB_NONDELIMITED UINT32_MAX +#define UPB_NONDELIMITED UINT64_MAX // Initializes/uninitializes a decoder for calling into the given handlers // or to write into the given msgdef, given its accessors). Takes a ref