From 286441afa745a81c1ee710fa86bd6ce96fe8743a Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sat, 17 Apr 2021 16:23:30 -0700 Subject: [PATCH] Fixed a size regression due to inlining UTF-8 verification. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Overall size/speed impact on fasttable decoder is now: name old time/op new time/op delta ArenaOneAlloc 21.5ns ± 0% 21.5ns ± 0% ~ (p=0.060 n=12+12) ArenaInitialBlockOneAlloc 6.33ns ± 0% 6.33ns ± 0% ~ (p=0.413 n=11+12) LoadDescriptor_Upb 43.4µs ± 1% 45.5µs ± 1% +4.79% (p=0.000 n=12+12) LoadAdsDescriptor_Upb 2.50ms ± 0% 2.51ms ± 2% ~ (p=0.512 n=10+11) LoadDescriptor_Proto2 240µs ± 0% 240µs ± 0% -0.25% (p=0.000 n=12+12) LoadAdsDescriptor_Proto2 12.9ms ± 0% 12.9ms ± 0% +0.20% (p=0.014 n=10+12) Parse_Upb_FileDesc 4.99µs ± 0% 5.04µs ± 0% +0.98% (p=0.000 n=11+10) Parse_Upb_FileDesc 4.02µs ± 0% 4.18µs ± 0% +4.16% (p=0.000 n=10+12) Parse_Upb_FileDesc 4.49µs ± 0% 4.54µs ± 0% +1.16% (p=0.000 n=11+10) Parse_Upb_FileDesc 3.60µs ± 0% 3.80µs ± 0% +5.73% (p=0.000 n=12+11) Parse_Proto2 29.3µs ± 0% 29.3µs ± 0% ~ (p=0.069 n=11+12) Parse_Proto2 20.2µs ± 3% 20.3µs ± 2% ~ (p=0.880 n=12+11) Parse_Proto2 16.5µs ± 0% 16.5µs ± 0% ~ (p=1.000 n=12+12) Parse_Proto2 16.4µs ± 0% 16.4µs ± 1% ~ (p=0.590 n=12+12) SerializeDescriptor_Proto2 5.31µs ± 1% 6.65µs ±29% +25.07% (p=0.000 n=12+12) SerializeDescriptor_Upb 12.4µs ± 0% 12.5µs ± 0% +1.23% (p=0.000 n=12+12) FILE SIZE VM SIZE -------------- -------------- +16% +128 [ = ] 0 [Unmapped] -1.2% -4 -1.2% -4 [section .text] [NEW] +2 [NEW] +2 fastdecode_isdonefallback [DEL] -6 [DEL] -6 fastdecode_longstring_noutf8 -0.2% -124 -0.2% -124 upb/decode_fast.c +5.8% +64 +6.0% +64 upb_pom_1bt_max64b +2.7% +64 +2.7% +64 upb_ppv8_2bt +2.7% +32 +2.8% +32 upb_psm_1bt_max256b +2.8% +32 +3.0% +32 upb_psm_1bt_max64b +2.8% +32 +3.0% +32 upb_psm_2bt_max64b +4.0% +24 +4.2% +24 upb_psv8_1bt +2.0% +16 +2.1% +16 upb_prf4_2bt +1.3% +16 +1.4% +16 upb_prz8_2bt -0.3% -4 -0.3% -4 [3 Others] -1.6% -8 -1.7% -8 upb_cob_1bt -1.6% -8 -1.7% -8 upb_csb_1bt -2.5% -16 -2.6% -16 upb_pov4_1bt -1.3% -16 -1.3% -16 upb_prv8_2bt -2.5% -16 -2.7% -16 upb_psv4_1bt -2.5% -16 -2.6% -16 upb_psv4_2bt -3.0% -32 -3.1% -32 upb_prs_2bt -2.6% -32 -2.6% -32 upb_prv4_2bt -4.9% -48 -5.1% -48 upb_prb_2bt -3.9% -48 -4.0% -48 upb_prv4_1bt -7.2% -72 -7.5% -72 upb_prb_1bt -7.8% -88 -8.0% -88 upb_prs_1bt [ = ] 0 -0.1% -128 TOTAL There is a bit of speed regression, but it appears there were bigger CPU regressions prior to this. We probably need some separate optimization attention again to get back to the performance numbers we had when fasttable was first submitted. --- upb/decode_fast.c | 323 ++++++++++++++++++++++++---------------------- 1 file changed, 167 insertions(+), 156 deletions(-) diff --git a/upb/decode_fast.c b/upb/decode_fast.c index 16f0881664..974dd139ab 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -670,11 +670,16 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, const upb_msglayout *table, uint64_t hasbits, upb_strview *dst); -#define FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst) \ - if (!decode_verifyutf8_inl(dst->data, dst->size)) { \ - return fastdecode_err(d); \ - } \ +UPB_NOINLINE +static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits, uint64_t data) { + upb_strview *dst = (upb_strview*)data; + if (!decode_verifyutf8_inl(dst->data, dst->size)) { + return fastdecode_err(d); + } UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); +} #define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ @@ -703,7 +708,8 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d, \ ptr += size; \ if (validate_utf8) { \ - FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ } else { \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ } @@ -737,158 +743,164 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, UPB_POISON_MEMORY_REGION(data + size, copy - size); } -#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ - card, validate_utf8) \ - upb_strview *dst; \ - fastdecode_arr farr; \ - int64_t size; \ - size_t arena_has; \ - size_t common_has; \ - char *buf; \ - \ - UPB_ASSERT(!d->alias); \ - UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_strview), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ - } \ - \ - size = (uint8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->size = size; \ - \ - buf = d->arena.head.ptr; \ - arena_has = _upb_arenahas(&d->arena); \ - common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ - \ - if (UPB_LIKELY(size <= 15 - tagbytes)) { \ - if (arena_has < 16) goto longstr; \ - d->arena.head.ptr += 16; \ - memcpy(buf, ptr - tagbytes - 1, 16); \ - dst->data = buf + tagbytes + 1; \ - } else if (UPB_LIKELY(size <= 32)) { \ - if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 32, buf, dst); \ - } else if (UPB_LIKELY(size <= 64)) { \ - if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 64, buf, dst); \ - } else if (UPB_LIKELY(size < 128)) { \ - if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ - fastdecode_docopy(d, ptr, size, 128, buf, dst); \ - } else { \ - goto longstr; \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ - return fastdecode_err(d); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - /* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ - } \ - \ - UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ - \ - longstr: \ - ptr--; \ - if (validate_utf8) { \ - UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ - } else { \ - UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ - hasbits, (uint64_t)dst); \ +#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ + card, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + size_t arena_has; \ + size_t common_has; \ + char *buf; \ + \ + UPB_ASSERT(!d->alias); \ + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (uint8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->size = size; \ + \ + buf = d->arena.head.ptr; \ + arena_has = _upb_arenahas(&d->arena); \ + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ + \ + if (UPB_LIKELY(size <= 15 - tagbytes)) { \ + if (arena_has < 16) \ + goto longstr; \ + d->arena.head.ptr += 16; \ + memcpy(buf, ptr - tagbytes - 1, 16); \ + dst->data = buf + tagbytes + 1; \ + } else if (UPB_LIKELY(size <= 32)) { \ + if (UPB_UNLIKELY(common_has < 32)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 32, buf, dst); \ + } else if (UPB_LIKELY(size <= 64)) { \ + if (UPB_UNLIKELY(common_has < 64)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 64, buf, dst); \ + } else if (UPB_LIKELY(size < 128)) { \ + if (UPB_UNLIKELY(common_has < 128)) \ + goto longstr; \ + fastdecode_docopy(d, ptr, size, 128, buf, dst); \ + } else { \ + goto longstr; \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ + UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ + \ + longstr: \ + ptr--; \ + if (validate_utf8) { \ + UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ + } else { \ + UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ + hasbits, (uint64_t)dst); \ } -#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ - copyfunc, validate_utf8) \ - upb_strview *dst; \ - fastdecode_arr farr; \ - int64_t size; \ - \ - if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ - RETURN_GENERIC("string field tag mismatch\n"); \ - } \ - \ - if (UPB_UNLIKELY(!d->alias)) { \ - UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ - } \ - \ - dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ - sizeof(upb_strview), card); \ - \ - again: \ - if (card == CARD_r) { \ - dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ - } \ - \ - size = (int8_t)ptr[tagbytes]; \ - ptr += tagbytes + 1; \ - dst->data = ptr; \ - dst->size = size; \ - \ - if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ - ptr--; \ - if (validate_utf8) { \ - return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } else { \ - return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ - (uint64_t)dst); \ - } \ - } \ - \ - ptr += size; \ - \ - if (card == CARD_r) { \ - if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ - return fastdecode_err(d); \ - } \ - fastdecode_nextret ret = fastdecode_nextrepeated( \ - d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ - switch (ret.next) { \ - case FD_NEXT_SAMEFIELD: \ - dst = ret.dst; \ - if (UPB_UNLIKELY(!d->alias)) { \ - /* Buffer flipped and we can't alias any more. Bounce to */ \ - /* copyfunc(), but via dispatch since we need to reload table */ \ - /* data also. */ \ - fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ - data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ - } \ - goto again; \ - case FD_NEXT_OTHERFIELD: \ - data = ret.tag; \ - UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ - case FD_NEXT_ATLIMIT: \ - return ptr; \ - } \ - } \ - \ - if (card != CARD_r && validate_utf8) { \ - /* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ - } \ - \ +#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ + copyfunc, validate_utf8) \ + upb_strview *dst; \ + fastdecode_arr farr; \ + int64_t size; \ + \ + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ + RETURN_GENERIC("string field tag mismatch\n"); \ + } \ + \ + if (UPB_UNLIKELY(!d->alias)) { \ + UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ + } \ + \ + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ + sizeof(upb_strview), card); \ + \ + again: \ + if (card == CARD_r) { \ + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ + } \ + \ + size = (int8_t)ptr[tagbytes]; \ + ptr += tagbytes + 1; \ + dst->data = ptr; \ + dst->size = size; \ + \ + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ + ptr--; \ + if (validate_utf8) { \ + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } else { \ + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ + (uint64_t)dst); \ + } \ + } \ + \ + ptr += size; \ + \ + if (card == CARD_r) { \ + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ + return fastdecode_err(d); \ + } \ + fastdecode_nextret ret = fastdecode_nextrepeated( \ + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ + switch (ret.next) { \ + case FD_NEXT_SAMEFIELD: \ + dst = ret.dst; \ + if (UPB_UNLIKELY(!d->alias)) { \ + /* Buffer flipped and we can't alias any more. Bounce to */ \ + /* copyfunc(), but via dispatch since we need to reload table */ \ + /* data also. */ \ + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + } \ + goto again; \ + case FD_NEXT_OTHERFIELD: \ + data = ret.tag; \ + UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ + case FD_NEXT_ATLIMIT: \ + return ptr; \ + } \ + } \ + \ + if (card != CARD_r && validate_utf8) { \ + data = (uint64_t)dst; \ + UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \ + } \ + \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); /* Generate all combinations: @@ -925,7 +937,6 @@ TAGBYTES(r) #undef b_VALIDATE #undef F #undef TAGBYTES -#undef FASTDECODE_VERIFYUTF8 #undef FASTDECODE_LONGSTRING #undef FASTDECODE_COPYSTRING #undef FASTDECODE_STRING