diff --git a/benchmarks/parsestream.upb.c b/benchmarks/parsestream.upb.c index 37ccb42dcb..80f94440d2 100644 --- a/benchmarks/parsestream.upb.c +++ b/benchmarks/parsestream.upb.c @@ -75,7 +75,8 @@ static size_t run(int i) (void)i; upb_status status = UPB_STATUS_INIT; upb_stringsrc_reset(&stringsrc, input_str, input_len); - upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), 0, UINT64_MAX, NULL); + upb_decoder_reset(&decoder, upb_stringsrc_bytesrc(&stringsrc), + 0, UPB_NONDELIMITED, NULL); upb_decoder_decode(&decoder, &status); if(!upb_ok(&status)) goto err; return input_len; diff --git a/benchmarks/parsetoproto2.upb.cc b/benchmarks/parsetoproto2.upb.cc index af3e1f236a..74d068091e 100644 --- a/benchmarks/parsetoproto2.upb.cc +++ b/benchmarks/parsetoproto2.upb.cc @@ -283,7 +283,8 @@ static size_t run(int i) (void)i; upb_status status = UPB_STATUS_INIT; msg[i % NUM_MESSAGES].Clear(); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, &msg[i % NUM_MESSAGES]); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), + 0, UPB_NONDELIMITED, &msg[i % NUM_MESSAGES]); upb_decoder_decode(&d, &status); if(!upb_ok(&status)) goto err; return len; diff --git a/benchmarks/parsetostruct.upb.c b/benchmarks/parsetostruct.upb.c index 64a4d35c36..f44b6502ae 100644 --- a/benchmarks/parsetostruct.upb.c +++ b/benchmarks/parsetostruct.upb.c @@ -69,7 +69,8 @@ static size_t run(int i) upb_status status = UPB_STATUS_INIT; i %= NUM_MESSAGES; upb_msg_clear(msg[i], def); - upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg[i]); + upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), + 0, UPB_NONDELIMITED, msg[i]); upb_decoder_decode(&d, &status); if(!upb_ok(&status)) goto err; return len; diff --git a/tests/test_decoder.c b/tests/test_decoder.c index 88b87416f8..e3fc1ead68 100644 --- a/tests/test_decoder.c +++ b/tests/test_decoder.c @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) { upb_decoder d; upb_decoder_initforhandlers(&d, handlers); - upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UINT64_MAX, p); + upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p); upb_status_clear(&status); upb_decoder_decode(&d, &status); diff --git a/upb/handlers.c b/upb/handlers.c index 05300c0cc1..d02a32a05b 100644 --- a/upb/handlers.c +++ b/upb/handlers.c @@ -16,8 +16,10 @@ static upb_mhandlers *upb_mhandlers_new() { upb_inttable_init(&m->fieldtab, 8, sizeof(upb_fhandlers)); m->startmsg = NULL; m->endmsg = NULL; - m->tablearray = NULL; m->is_group = false; +#ifdef UPB_USE_JIT_X64 + m->tablearray = NULL; +#endif return m; } @@ -29,7 +31,11 @@ static upb_fhandlers *_upb_mhandlers_newfhandlers(upb_mhandlers *m, uint32_t n, if (f) abort(); upb_fhandlers new_f = {false, type, repeated, repeated && upb_isprimitivetype(type), UPB_ATOMIC_INIT(0), - n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL}; + n, m, NULL, UPB_NO_VALUE, NULL, NULL, NULL, NULL, NULL, +#ifdef UPB_USE_JIT_X64 + 0, 0, 0, +#endif + NULL}; upb_inttable_insert(&m->fieldtab, tag, &new_f); f = upb_inttable_lookup(&m->fieldtab, tag); assert(f); @@ -77,7 +83,9 @@ void upb_handlers_unref(upb_handlers *h) { for (int i = 0; i < h->msgs_len; i++) { upb_mhandlers *mh = h->msgs[i]; upb_inttable_free(&mh->fieldtab); +#ifdef UPB_USE_JIT_X64 free(mh->tablearray); +#endif free(mh); } free(h->msgs); @@ -154,7 +162,11 @@ static upb_fhandlers toplevel_f = { #else {{0}, -1}, #endif - NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL}; + NULL, NULL, NULL, NULL, NULL, +#ifdef UPB_USE_JIT_X64 + 0, 0, 0, +#endif + NULL}; void upb_dispatcher_init(upb_dispatcher *d, upb_handlers *h, upb_skip_handler *skip, upb_exit_handler *exit, diff --git a/upb/handlers.h b/upb/handlers.h index b9e120aaa1..db28705874 100644 --- a/upb/handlers.h +++ b/upb/handlers.h @@ -149,9 +149,11 @@ typedef struct _upb_fieldent { upb_endfield_handler *endsubmsg; upb_startfield_handler *startseq; upb_endfield_handler *endseq; +#ifdef UPB_USE_JIT_X64 uint32_t jit_pclabel; uint32_t jit_pclabel_notypecheck; uint32_t jit_submsg_done_pclabel; +#endif void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); } upb_fhandlers; @@ -184,16 +186,18 @@ typedef struct _upb_mhandlers { upb_startmsg_handler *startmsg; upb_endmsg_handler *endmsg; upb_inttable fieldtab; // Maps field number -> upb_fhandlers. + bool is_group; +#ifdef UPB_USE_JIT_X64 uint32_t jit_startmsg_pclabel; uint32_t jit_endofbuf_pclabel; uint32_t jit_endofmsg_pclabel; uint32_t jit_unknownfield_pclabel; - bool is_group; int32_t jit_parent_field_done_pclabel; uint32_t max_field_number; // Currently keyed on field number. Could also try keying it // on encoded or decoded tag, or on encoded field number. void **tablearray; +#endif } upb_mhandlers; // mhandlers are created as part of a upb_handlers instance, but can be ref'd diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index df74b4815f..20838498c1 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -13,10 +13,6 @@ #include "upb/pb/decoder.h" #include "upb/pb/varint.h" -// Used for frames that have no specific end offset: groups, repeated primitive -// fields inside groups, and the top-level message. -#define UPB_NONDELIMITED UINT32_MAX - #ifdef UPB_USE_JIT_X64 #define Dst_DECL upb_decoder *d #define Dst_REF (d->dynasm) @@ -33,7 +29,11 @@ #define FORCEINLINE static __attribute__((always_inline)) #define NOINLINE static __attribute__((noinline)) -static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); } +static void upb_decoder_exit(upb_decoder *d) { + // If/when we support resumable decoding, we would want to back our progress + // up to completed_ptr and possibly get a previous buffer. + siglongjmp(d->exitjmp, 1); +} static void upb_decoder_exit2(void *_d) { upb_decoder *d = _d; upb_decoder_exit(d); @@ -43,7 +43,12 @@ static void upb_decoder_abort(upb_decoder *d, const char *msg) { upb_decoder_exit(d); } -/* Decoding/Buffering of wire types *******************************************/ +/* Buffering ******************************************************************/ + +// We operate on one buffer at a time, which may be a subset of the bytesrc +// region we have ref'd. When data for the buffer is gone we pull the next +// one. When we've committed our progress we release our ref on any previous +// buffers' regions. static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } static void upb_decoder_advance(upb_decoder *d, size_t len) { @@ -61,12 +66,11 @@ static void upb_decoder_setmsgend(upb_decoder *d) { upb_dispatcher_frame *f = d->dispatcher.top; size_t delimlen = f->end_ofs - d->bufstart_ofs; size_t buflen = d->end - d->buf; - if (f->end_ofs != UINT64_MAX && delimlen <= buflen) { - d->delim_end = (uintptr_t)(d->buf + delimlen); + if (f->end_ofs != UPB_NONDELIMITED && delimlen <= buflen) { + // Delimited message ends in this buffer. + d->delim_end = d->buf + delimlen; } else { - // Buffers must not run up against the end of memory. - assert((uintptr_t)d->end < UINTPTR_MAX); - d->delim_end = UINTPTR_MAX; + d->delim_end = NULL; } } @@ -111,6 +115,9 @@ void upb_decoder_commit(upb_decoder *d) { } } + +/* Decoding of wire types *****************************************************/ + NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d) { uint8_t byte = 0x80; uint64_t u64 = 0; @@ -150,7 +157,8 @@ done: FORCEINLINE bool upb_trydecode_varint32(upb_decoder *d, uint32_t *val) { if (upb_decoder_bufleft(d) == 0) { - // Check for our two normal end-of-message conditions. + // Check for our two successful end-of-message conditions + // (user-specified EOM and bytesrc EOF). if (d->bufend_ofs == d->end_ofs) return false; if (!upb_trypullbuf(d)) return false; } @@ -286,8 +294,8 @@ static void upb_decode_MESSAGE(upb_decoder *d, upb_fhandlers *f) { /* The main decoding loop *****************************************************/ static void upb_decoder_checkdelim(upb_decoder *d) { - while ((uintptr_t)d->ptr >= d->delim_end) { - if ((uintptr_t)d->ptr > d->delim_end) + while (d->delim_end != NULL && d->ptr >= d->delim_end) { + if (d->ptr > d->delim_end) upb_decoder_abort(d, "Bad submessage end"); if (d->dispatcher.top->is_sequence) { @@ -460,7 +468,7 @@ void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs, #ifdef UPB_USE_JIT_X64 d->jit_end = NULL; #endif - d->delim_end = UINTPTR_MAX; // But don't let end-of-message get triggered. + d->delim_end = NULL; // But don't let end-of-message get triggered. d->strref.bytesrc = bytesrc; } diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 398135903b..9a20d7664b 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -50,9 +50,8 @@ typedef struct _upb_decoder { // UPB_TRYAGAIN (or in the future, UPB_SUSPEND). const char *completed_ptr; - // End of the delimited region, relative to ptr, or UINTPTR_MAX if not in - // this buf. - uintptr_t delim_end; + // End of the delimited region, relative to ptr, or NULL if not in this buf. + const char *delim_end; #ifdef UPB_USE_JIT_X64 // For JIT, which doesn't do bounds checks in the middle of parsing a field. @@ -69,6 +68,10 @@ typedef struct _upb_decoder { sigjmp_buf exitjmp; } upb_decoder; +// Used for frames that have no specific end offset: groups, repeated primitive +// fields inside groups, and the top-level message. +#define UPB_NONDELIMITED UINT32_MAX + // Initializes/uninitializes a decoder for calling into the given handlers // or to write into the given msgdef, given its accessors). Takes a ref // on the handlers or msgdef.