diff --git a/BUILD b/BUILD index 5c1e424578..161edd8b7c 100644 --- a/BUILD +++ b/BUILD @@ -40,8 +40,8 @@ CPPOPTS = [ COPTS = CPPOPTS + [ # copybara:strip_for_google3_begin - "-pedantic", - "-Werror=pedantic", + #"-pedantic", + #"-Werror=pedantic", "-Wstrict-prototypes", # copybara:strip_end ] diff --git a/upb/decode_fast.c b/upb/decode_fast.c index 74a73388a7..4af0d57b9c 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -19,23 +19,35 @@ typedef enum { CARD_r = 2 /* Repeated */ } upb_card; +UPB_FORCEINLINE +const char *fastdecode_tag_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *table, uint64_t hasbits, uint32_t tag) { + uint64_t data; + size_t idx; + idx = (tag & 0xf8) >> 3; + data = table->field_data[idx] ^ tag; + return table->field_parser[idx](UPB_PARSE_ARGS); +} + +UPB_FORCEINLINE +uint32_t fastdecode_load_tag(const char* ptr) { + uint16_t tag; + memcpy(&tag, ptr, 2); + return tag; +} + UPB_FORCEINLINE const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg, const upb_msglayout *table, uint64_t hasbits) { - uint16_t tag; - uint64_t data = 0; - size_t idx; if (UPB_UNLIKELY(ptr >= d->fastlimit)) { if (UPB_LIKELY(ptr == d->limit)) { *(uint32_t*)msg |= hasbits >> 16; /* Sync hasbits. */ return ptr; } + uint64_t data = 0; RETURN_GENERIC("dispatch hit end\n"); } - memcpy(&tag, ptr, 2); - idx = (tag & 0xf8) >> 3; - data = table->field_data[idx] ^ tag; - return table->field_parser[idx](UPB_PARSE_ARGS); + return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, fastdecode_load_tag(ptr)); } UPB_FORCEINLINE @@ -47,18 +59,11 @@ static bool fastdecode_checktag(uint64_t data, int tagbytes) { } } -UPB_FORCEINLINE -static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) { - uint16_t ret = 0; - memcpy(&ret, ptr, tagbytes); - return ret; -} - UPB_FORCEINLINE static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, upb_msg *msg, uint64_t *data, uint64_t *hasbits, upb_array **outarr, - void **end, int tagbytes, int valbytes, + void **end, int valbytes, upb_card card, bool hasbit_is_idx) { size_t ofs = *data >> 48; void *field = (char *)msg + ofs; @@ -66,7 +71,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, switch (card) { case CARD_s: if (hasbit_is_idx) { - *hasbits |= 1 << (uint16_t)(*data >> 16); + *hasbits |= 1ull << ((*data >> 32) & 63); } else { *hasbits |= *data; } @@ -75,7 +80,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, uint8_t elem_size_lg2 = __builtin_ctz(valbytes); upb_array **arr_p = field; upb_array *arr; - uint16_t expected_tag; *hasbits >>= 16; *(uint32_t*)msg |= *hasbits; *hasbits = 0; @@ -83,9 +87,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, const size_t initial_len = 8; size_t need = (valbytes * initial_len) + sizeof(upb_array); if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < need)) { - *outarr = NULL; - *data = 0; - *end = NULL; return NULL; } arr = (void*)d->arena_ptr; @@ -101,8 +102,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, *end = (char*)field + (arr->size * valbytes); field = (char*)field + (arr->len * valbytes); } - expected_tag = fastdecode_readtag(ptr, tagbytes); - *data = expected_tag; + *data = fastdecode_load_tag(ptr); *outarr = arr; return field; } @@ -114,9 +114,9 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr, UPB_FORCEINLINE static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg, uint64_t *data, uint64_t *hasbits, - int tagbytes, int valbytes, upb_card card) { + int valbytes, upb_card card) { return fastdecode_getfield_ofs(d, ptr, msg, data, hasbits, NULL, NULL, - tagbytes, valbytes, card, false); + valbytes, card, false); } /* varint fields **************************************************************/ @@ -140,77 +140,31 @@ UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigza return val; } -UPB_FORCEINLINE -static int fastdecode_varintlen(uint64_t data64) { - uint64_t clear_bits = ~data64 & 0x8080808080808080; - if (clear_bits == 0) return -1; - return __builtin_ctzl(clear_bits) / 8 + 1; -} - -UPB_FORCEINLINE -static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int valbytes, - int varintbytes, bool zigzag) { - uint64_t val = data >> 18; - size_t ofs = (uint16_t)data; - uint64_t data64; - int sawbytes; - memcpy(&data64, ptr + 2, 8); - sawbytes = fastdecode_varintlen(data64) + 2; - UPB_ASSERT(sawbytes == varintbytes); -#ifdef __BMI2__ - if (varintbytes != 3) { - uint64_t mask = 0x7f7f7f7f7f7f7f7f >> (8 * (10 - varintbytes)); - val |= _pext_u64(data64, mask) << 14; - } else -#endif - { - int i; - for (i = 2; i < varintbytes; i++) { - uint64_t byte = ptr[i]; - if (i != varintbytes - 1) byte &= 0x7f; - val |= byte << (7 * i); - } - } - val = fastdecode_munge(val, valbytes, zigzag); - memcpy((char*)msg + ofs, &val, valbytes); - return fastdecode_dispatch(d, ptr + varintbytes, msg, table, hasbits); -} - -UPB_FORCEINLINE -static const char *fastdecode_longvarintjmp(UPB_PARSE_PARAMS, - _upb_field_parser **funcs) { - int len; - uint64_t data64; - memcpy(&data64, ptr + 2, 8); - len = fastdecode_varintlen(data64); - if (len < 0) return fastdecode_err(d); - return funcs[len - 1](UPB_PARSE_ARGS); -} - UPB_FORCEINLINE static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, - int valbytes, upb_card card, bool zigzag, - _upb_field_parser **funcs) { + int valbytes, upb_card card, bool zigzag) { uint64_t val; void *dst; if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { RETURN_GENERIC("varint field tag mismatch\n"); } - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes, valbytes, + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, valbytes, card); - val = (uint8_t)ptr[tagbytes]; + ptr += tagbytes + 1; + val = (uint8_t)ptr[-1]; if (UPB_UNLIKELY(val & 0x80)) { - uint32_t byte = (uint8_t)ptr[tagbytes + 1]; - val += (byte - 1) << 7; - if (UPB_UNLIKELY(byte & 0x80)) { - ptr += tagbytes; - data = (uint32_t)(val << 18 | data >> 48); - return fastdecode_longvarintjmp(UPB_PARSE_ARGS, funcs); + for (int i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; } - ptr += tagbytes + 2; - } else { - ptr += tagbytes + 1; + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) return fastdecode_err(d); + val += (byte - 1) << 63; } +done: val = fastdecode_munge(val, valbytes, zigzag); memcpy(dst, &val, valbytes); return fastdecode_dispatch(d, ptr, msg, table, hasbits); @@ -220,52 +174,13 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, #define b_ZZ false #define v_ZZ false -/* Generate varint vallbacks. */ - -#define FUNCNAME(type, valbytes, varintbytes) \ - upb_pl##type##valbytes##_##varintbytes##bv - -#define TABLENAME(type, valbytes) \ - upb_pl##type##valbytes##_table - -#define F(type, valbytes, varintbytes) \ - static const char *FUNCNAME(type, valbytes, varintbytes)(UPB_PARSE_PARAMS) { \ - return fastdecode_longvarint(UPB_PARSE_ARGS, valbytes, varintbytes, \ - type##_ZZ); \ - } - -#define FALLBACKS(type, valbytes) \ - F(type, valbytes, 3) \ - F(type, valbytes, 4) \ - F(type, valbytes, 5) \ - F(type, valbytes, 6) \ - F(type, valbytes, 7) \ - F(type, valbytes, 8) \ - F(type, valbytes, 9) \ - F(type, valbytes, 10) \ - static _upb_field_parser *TABLENAME(type, valbytes)[8] = { \ - &FUNCNAME(type, valbytes, 3), &FUNCNAME(type, valbytes, 4), \ - &FUNCNAME(type, valbytes, 5), &FUNCNAME(type, valbytes, 6), \ - &FUNCNAME(type, valbytes, 7), &FUNCNAME(type, valbytes, 8), \ - &FUNCNAME(type, valbytes, 9), &FUNCNAME(type, valbytes, 10)}; - -FALLBACKS(b, 1) -FALLBACKS(v, 4) -FALLBACKS(v, 8) -FALLBACKS(z, 4) -FALLBACKS(z, 8) - -#undef F -#undef FALLBACKS -#undef FUNCNAME - /* Generate all varint functions. * {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ #define F(card, type, valbytes, tagbytes) \ const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ - type##_ZZ, TABLENAME(type, valbytes)); \ + type##_ZZ); \ } #define TYPES(card, tagbytes) \ @@ -313,7 +228,7 @@ static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, RETURN_GENERIC("string field tag mismatch\n"); } - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes, + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, sizeof(upb_strview), card); len = (int8_t)ptr[tagbytes]; str = ptr + tagbytes + 1; @@ -344,12 +259,31 @@ const char *upb_pos_2bt(UPB_PARSE_PARAMS) { /* message fields *************************************************************/ -UPB_FORCEINLINE -bool fastdecode_boundscheck2(const char *ptr, unsigned len, const char *end) { - uintptr_t uptr = (uintptr_t)ptr; - uintptr_t uend = (uintptr_t)end; - uintptr_t res = uptr + len; - return res < uptr || res > uend; +UPB_NOINLINE static +const char *fastdecode_lendelim_submsg(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *table, uint64_t hasbits, const char* saved_limit) { + size_t len = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(len & 0x80)) { + for (int i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + len += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return fastdecode_err(d); + len += (byte - 1) << 28; + } +done: + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, saved_limit))) { + return fastdecode_err(d); + } + d->limit = ptr + len; + d->fastlimit = UPB_MIN(d->limit, d->fastend); + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); } UPB_FORCEINLINE @@ -360,90 +294,88 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, RETURN_GENERIC("submessage field tag mismatch\n"); } - if (--d->depth < 0) return fastdecode_err(d); + if (--d->depth == 0) return fastdecode_err(d); upb_msg **submsg; upb_array *arr; void *end; - uint16_t submsg_idx = data >> 32; + uint32_t submsg_idx = data; + submsg_idx >>= 16; const upb_msglayout *subl = table->submsgs[submsg_idx]; submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end, - tagbytes, sizeof(upb_msg *), card, true); + sizeof(upb_msg *), card, true); + if (card == CARD_r) { + if (UPB_UNLIKELY(!submsg)) { + RETURN_GENERIC("need array resize\n"); + } + } + if (card == CARD_s) { + *(uint32_t*)msg |= hasbits >> 16; + hasbits = 0; + } + const char *saved_limit = d->limit; const char *saved_fastlimit = d->fastlimit; again: if (card == CARD_r) { if (UPB_UNLIKELY(submsg == end)) { - if (UPB_LIKELY(arr != NULL)) { - size_t old_size = arr->size; - size_t old_bytes = old_size * sizeof(upb_msg*); - size_t new_size = old_size * 2; - size_t new_bytes = new_size * sizeof(upb_msg*); - char *old_ptr = _upb_array_ptr(arr); - if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < new_bytes)) { - d->limit = saved_limit; - d->fastlimit = saved_fastlimit; - arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); - d->depth++; - RETURN_GENERIC("repeated realloc failed: arena full"); - } - memcpy(d->arena_ptr, old_ptr, old_bytes); - arr->size = new_size; - arr->data = _upb_array_tagptr(d->arena_ptr, 3); - submsg = (void*)(d->arena_ptr + (old_size * sizeof(upb_msg*))); - end = (void*)(d->arena_ptr + (new_size * sizeof(upb_msg*))); - d->arena_ptr += new_bytes; - } else { - d->limit = saved_limit; - d->fastlimit = saved_fastlimit; - d->depth++; - RETURN_GENERIC("need array realloc\n"); + size_t old_size = arr->size; + size_t old_bytes = old_size * sizeof(upb_msg*); + size_t new_size = old_size * 2; + size_t new_bytes = new_size * sizeof(upb_msg*); + char *old_ptr = _upb_array_ptr(arr); + if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < new_bytes)) { + goto repeated_generic; } + memcpy(d->arena_ptr, old_ptr, old_bytes); + arr->size = new_size; + arr->data = _upb_array_tagptr(d->arena_ptr, 3); + submsg = (void*)(d->arena_ptr + (old_size * sizeof(upb_msg*))); + end = (void*)(d->arena_ptr + (new_size * sizeof(upb_msg*))); + d->arena_ptr += new_bytes; } } + + upb_msg* child = *submsg; - { - uint32_t len = (uint8_t)ptr[tagbytes]; - if (UPB_UNLIKELY(len & 0x80)) { - uint32_t byte = (uint8_t)ptr[tagbytes + 1]; - len += (byte - 1) << 7; - if (UPB_UNLIKELY(byte & 0x80)) { - if (card == CARD_r) { - arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); - } - d->limit = saved_limit; - d->fastlimit = saved_fastlimit; - d->depth++; - RETURN_GENERIC("submessage field len >2 bytes\n"); - } - ptr++; - } - ptr += tagbytes + 1; - if (UPB_UNLIKELY(fastdecode_boundscheck2(ptr, len, saved_limit))) { - return fastdecode_err(d); - } - d->limit = ptr + len; - d->fastlimit = UPB_MIN(d->limit, d->fastend); + if (card == CARD_r || UPB_LIKELY(!child)) { + *submsg = child = decode_newmsg_ceil(d, subl, msg_ceil_bytes); } - - if (card == CARD_r || UPB_LIKELY(!*submsg)) { - *submsg = decode_newmsg_ceil(d, subl, msg_ceil_bytes); - } - ptr = fastdecode_dispatch(d, ptr, *submsg, subl, 0); - submsg++; + + ptr += tagbytes + 1; + + ptr = fastdecode_lendelim_submsg(d, ptr, child, subl, 0, saved_limit); if (UPB_UNLIKELY(ptr != d->limit || d->end_group != 0)) { return fastdecode_err(d); } if (card == CARD_r) { - if (UPB_LIKELY(ptr < saved_fastlimit) && - fastdecode_readtag(ptr, tagbytes) == (uint16_t)data) { - goto again; + submsg++; + if (UPB_LIKELY(ptr < saved_fastlimit)) { + uint32_t tag = fastdecode_load_tag(ptr); + if (tagbytes == 1) { + if ((uint8_t)tag == (uint8_t)data) goto again; + } else { + if ((uint16_t)tag == (uint16_t)data) goto again; + } + arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); + d->limit = saved_limit; + d->fastlimit = saved_fastlimit; + d->depth++; + return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, tag); + } else { + if (ptr == saved_limit) { + arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); + d->limit = saved_limit; + d->fastlimit = saved_fastlimit; + d->depth++; + return ptr; + } + goto repeated_generic; } - arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); } d->limit = saved_limit; @@ -451,6 +383,13 @@ again: d->depth++; return fastdecode_dispatch(d, ptr, msg, table, hasbits); + +repeated_generic: + arr->len = submsg - (upb_msg**)_upb_array_ptr(arr); + d->limit = saved_limit; + d->fastlimit = saved_fastlimit; + d->depth++; + RETURN_GENERIC("repeated generic"); } #define F(card, tagbytes, size_ceil, ceil_arg) \ diff --git a/upbc/generator.cc b/upbc/generator.cc index 8e992c44ba..71dc7adcfa 100644 --- a/upbc/generator.cc +++ b/upbc/generator.cc @@ -795,6 +795,9 @@ void TryFillTableEntry(const protobuf::Descriptor* message, if (layout.HasHasbit(field)) { hasbit_index = layout.GetHasbitIndex(field); if (hasbit_index > 31) return; + // thas hasbits mask in the parser occupies bits 16-48 + // in the 64 bit register. + hasbit_index += 16; // account for the shifted hasbits } MessageLayout::Size data; @@ -805,8 +808,8 @@ void TryFillTableEntry(const protobuf::Descriptor* message, if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) { SubmsgArray submsg_array = GetSubmsgArray(message); uint64_t idx = submsg_array.indexes[field->message_type()]; - data.size32 |= idx << 32 | hasbit_index << 16; - data.size64 |= idx << 32 | hasbit_index << 16; + data.size32 |= idx << 16 | hasbit_index << 32; + data.size64 |= idx << 16 | hasbit_index << 32; } else { uint32_t hasbit_mask = 1U << hasbit_index; data.size32 |= (uint64_t)hasbit_mask << 16;