diff --git a/upb/decode.h b/upb/decode.h index a13e39b47c..650418c1ce 100644 --- a/upb/decode.h +++ b/upb/decode.h @@ -5,12 +5,8 @@ #ifndef UPB_DECODE_H_ #define UPB_DECODE_H_ -#include - #include "upb/msg.h" -#include "upb/port_def.inc" - #ifdef __cplusplus extern "C" { #endif @@ -23,6 +19,4 @@ bool upb_decode(const char *buf, size_t size, upb_msg *msg, } /* extern "C" */ #endif -#include "upb/port_undef.inc" - #endif /* UPB_DECODE_H_ */ diff --git a/upb/decode.int.h b/upb/decode.int.h index ab1982ef56..4a7c70385a 100644 --- a/upb/decode.int.h +++ b/upb/decode.int.h @@ -1,3 +1,7 @@ +/* +** Internal implementation details of the decoder that are shared between +** decode.c and decode_fast.c. +*/ #ifndef UPB_DECODE_INT_H_ #define UPB_DECODE_INT_H_ @@ -24,26 +28,6 @@ const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg, const upb_msglayout *table, uint64_t hasbits); const char *fastdecode_err(upb_decstate *d); - -UPB_INLINE -upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l, - int msg_ceil_bytes) { - size_t size = l->size + sizeof(upb_msg_internal); - char *msg_data; - if (UPB_LIKELY(msg_ceil_bytes > 0 && _upb_arenahas(&d->arena, msg_ceil_bytes))) { - UPB_ASSERT(size <= (size_t)msg_ceil_bytes); - msg_data = d->arena.head.ptr; - d->arena.head.ptr += size; - UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); - memset(msg_data, 0, msg_ceil_bytes); - UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); - } else { - msg_data = (char*)upb_arena_malloc(&d->arena, size); - memset(msg_data, 0, size); - } - return msg_data + sizeof(upb_msg_internal); -} - #include "upb/port_undef.inc" #endif /* UPB_DECODE_INT_H_ */ diff --git a/upb/decode_fast.c b/upb/decode_fast.c index a1737f10e0..1ad94684bf 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -1,3 +1,8 @@ +// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. +// Also the table size grows by 2x. +// +// Could potentially be ported to ARM64 or other 64-bit archs that pass at +// least six arguments in registers. #include "upb/decode_fast.h" @@ -7,6 +12,8 @@ /* Must be last. */ #include "upb/port_def.inc" +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. #define UPB_PARSE_PARAMS \ upb_decstate *d, const char *ptr, upb_msg *msg, const upb_msglayout *table, \ uint64_t hasbits, uint64_t data @@ -23,18 +30,44 @@ typedef enum { CARD_r = 2 /* Repeated */ } upb_card; +UPB_INLINE +upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_msg_internal); + char *msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && _upb_arenahas(&d->arena, msg_ceil_bytes))) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); + } else { + msg_data = (char*)upb_arena_malloc(&d->arena, size); + memset(msg_data, 0, size); + } + return msg_data + sizeof(upb_msg_internal); +} + UPB_FORCEINLINE -const char *fastdecode_tag_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *table, uint64_t hasbits, uint32_t tag) { - uint64_t data; - size_t idx; - idx = (tag & 0xf8) >> 3; - data = table->fasttable[idx].field_data ^ tag; +static const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, + upb_msg *msg, + const upb_msglayout *table, + uint64_t hasbits, uint32_t tag) { + // Get 5 bits of field number (we pretend the continuation bit is a data bit, + // speculating that the second byte, if any, will be 0x01). + size_t idx = (tag & 0xf8) >> 3; + + // Xor the actual tag with the expected tag (in the low bytes of the table) + // so that the field parser can verify the tag by comparing with zero. + uint64_t data = table->fasttable[idx].field_data ^ tag; + + // Jump to the specialized field parser function. return table->fasttable[idx].field_parser(UPB_PARSE_ARGS); } UPB_FORCEINLINE -uint32_t fastdecode_load_tag(const char* ptr) { +static uint32_t fastdecode_loadtag(const char *ptr) { uint16_t tag; memcpy(&tag, ptr, 2); return tag; @@ -45,13 +78,19 @@ const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg, const upb_msglayout *table, uint64_t hasbits) { if (UPB_UNLIKELY(ptr >= d->fastlimit)) { if (UPB_LIKELY(ptr == d->limit)) { - *(uint32_t*)msg |= hasbits >> 16; /* Sync hasbits. */ + // Parse is finished. + *(uint32_t*)msg |= hasbits >> 16; // Sync hasbits. return ptr; } + // We are within 16 bytes of end-of-buffer, so we can't use fast parsing + // functions anymore (they will read up to 16b without bounds checks). uint64_t data = 0; RETURN_GENERIC("dispatch hit end\n"); } - return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, fastdecode_load_tag(ptr)); + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + uint16_t tag = fastdecode_loadtag(ptr); + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); } UPB_FORCEINLINE @@ -74,6 +113,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, switch (card) { case CARD_s: + // Set hasbit and return pointer to scalar field. if (hasbit_is_idx) { *hasbits |= 1ull << ((*data >> 32) & 63); } else { @@ -81,31 +121,24 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, } return field; case CARD_r: { + // Get pointer to upb_array and allocate/expand if necessary. uint8_t elem_size_lg2 = __builtin_ctz(valbytes); upb_array **arr_p = field; upb_array *arr; + char *begin; *hasbits >>= 16; *(uint32_t*)msg |= *hasbits; *hasbits = 0; if (UPB_LIKELY(!*arr_p)) { - const size_t initial_len = 8; - size_t need = (valbytes * initial_len) + sizeof(upb_array); - if (!hasbit_is_idx && UPB_UNLIKELY(!_upb_arenahas(&d->arena, need))) { - return NULL; - } - arr = upb_arena_malloc(&d->arena, need); - field = arr + 1; - arr->data = _upb_array_tagptr(field, elem_size_lg2); + arr = _upb_array_new(&d->arena, 8, elem_size_lg2); *arr_p = arr; - arr->size = initial_len; - *end = (char*)field + (arr->size * valbytes); } else { arr = *arr_p; - field = _upb_array_ptr(arr); - *end = (char*)field + (arr->size * valbytes); - field = (char*)field + (arr->len * valbytes); } - *data = fastdecode_load_tag(ptr); + begin = _upb_array_ptr(arr); + field = begin + (arr->len * valbytes); + *end = begin + (arr->size * valbytes); + *data = fastdecode_loadtag(ptr); *outarr = arr; return field; } @@ -124,11 +157,8 @@ static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg, /* varint fields **************************************************************/ -#ifdef __BMI2__ -#include -#endif - -UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { if (valbytes == 1) { return val != 0; } else if (zigzag) { @@ -215,7 +245,8 @@ TAGBYTES(o) /* string fields **************************************************************/ UPB_FORCEINLINE -bool fastdecode_boundscheck(const char *ptr, size_t len, const char *end) { +static bool fastdecode_boundscheck(const char *ptr, size_t len, + const char *end) { uintptr_t uptr = (uintptr_t)ptr; uintptr_t uend = (uintptr_t)end; uintptr_t res = uptr + len; @@ -263,9 +294,11 @@ const char *upb_pos_2bt(UPB_PARSE_PARAMS) { /* message fields *************************************************************/ -UPB_NOINLINE static -const char *fastdecode_lendelim_submsg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *table, uint64_t hasbits, const char* saved_limit) { +UPB_NOINLINE +static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, + upb_msg *msg, const upb_msglayout *table, + uint64_t hasbits, + const char *saved_limit) { size_t len = (uint8_t)ptr[-1]; if (UPB_UNLIKELY(len & 0x80)) { int i; @@ -277,7 +310,8 @@ const char *fastdecode_lendelim_submsg(upb_decstate *d, const char *ptr, upb_msg } ptr++; size_t byte = (uint8_t)ptr[-1]; - // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected for a 32 bit varint. + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. if (UPB_UNLIKELY(byte >= 8)) return fastdecode_err(d); len += (byte - 1) << 28; } @@ -342,7 +376,7 @@ again: ptr += tagbytes + 1; - ptr = fastdecode_lendelim_submsg(d, ptr, child, subl, 0, saved_limit); + ptr = fastdecode_tosubmsg(d, ptr, child, subl, 0, saved_limit); if (UPB_UNLIKELY(ptr != d->limit || d->end_group != 0)) { return fastdecode_err(d); @@ -351,7 +385,7 @@ again: if (card == CARD_r) { submsg++; if (UPB_LIKELY(ptr < saved_fastlimit)) { - uint32_t tag = fastdecode_load_tag(ptr); + uint32_t tag = fastdecode_loadtag(ptr); if (tagbytes == 1) { if ((uint8_t)tag == (uint8_t)data) goto again; } else { @@ -361,7 +395,7 @@ again: d->limit = saved_limit; d->fastlimit = saved_fastlimit; d->depth++; - return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, tag); + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); } else { if (ptr == saved_limit) { arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); diff --git a/upb/decode_fast.h b/upb/decode_fast.h index 47ca1bc4ba..bb0f14d721 100644 --- a/upb/decode_fast.h +++ b/upb/decode_fast.h @@ -1,3 +1,8 @@ +// These are the specialized field parser functions for the fast parser. +// Generated tables will refer to these by name. +// +// Here we follow the same pattern of macros used in decode_fast.c to declare +// all of the variants. #ifndef UPB_DECODE_FAST_H_ #define UPB_DECODE_FAST_H_ diff --git a/upb/def.c b/upb/def.c index 9496476546..0b59667a09 100644 --- a/upb/def.c +++ b/upb/def.c @@ -957,6 +957,8 @@ static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { l->fields = fields; l->submsgs = submsgs; + /* TODO(haberman): initialize fast tables so that reflection-based parsing + * can get the same speeds as linked-in types. */ for (i = 0; i < 32; i++) { l->fasttable[i].field_parser = &fastdecode_generic; }