Fixed a size regression due to inlining UTF-8 verification.

Overall size/speed impact on fasttable decoder is now:

name                                      old time/op  new time/op  delta
ArenaOneAlloc                             21.5ns ± 0%  21.5ns ± 0%     ~     (p=0.060 n=12+12)
ArenaInitialBlockOneAlloc                 6.33ns ± 0%  6.33ns ± 0%     ~     (p=0.413 n=11+12)
LoadDescriptor_Upb                        43.4µs ± 1%  45.5µs ± 1%   +4.79%  (p=0.000 n=12+12)
LoadAdsDescriptor_Upb                     2.50ms ± 0%  2.51ms ± 2%     ~     (p=0.512 n=10+11)
LoadDescriptor_Proto2                      240µs ± 0%   240µs ± 0%   -0.25%  (p=0.000 n=12+12)
LoadAdsDescriptor_Proto2                  12.9ms ± 0%  12.9ms ± 0%   +0.20%  (p=0.014 n=10+12)
Parse_Upb_FileDesc<UseArena,Copy>         4.99µs ± 0%  5.04µs ± 0%   +0.98%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<UseArena,Alias>        4.02µs ± 0%  4.18µs ± 0%   +4.16%  (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Copy>        4.49µs ± 0%  4.54µs ± 0%   +1.16%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<InitBlock,Alias>       3.60µs ± 0%  3.80µs ± 0%   +5.73%  (p=0.000 n=12+11)
Parse_Proto2<FileDesc,NoArena,Copy>       29.3µs ± 0%  29.3µs ± 0%     ~     (p=0.069 n=11+12)
Parse_Proto2<FileDesc,UseArena,Copy>      20.2µs ± 3%  20.3µs ± 2%     ~     (p=0.880 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy>     16.5µs ± 0%  16.5µs ± 0%     ~     (p=1.000 n=12+12)
Parse_Proto2<FileDescSV,InitBlock,Alias>  16.4µs ± 0%  16.4µs ± 1%     ~     (p=0.590 n=12+12)
SerializeDescriptor_Proto2                5.31µs ± 1%  6.65µs ±29%  +25.07%  (p=0.000 n=12+12)
SerializeDescriptor_Upb                   12.4µs ± 0%  12.5µs ± 0%   +1.23%  (p=0.000 n=12+12)

    FILE SIZE        VM SIZE
 --------------  --------------
   +16%    +128  [ = ]       0    [Unmapped]
  -1.2%      -4  -1.2%      -4    [section .text]
    [NEW]      +2  [NEW]      +2    fastdecode_isdonefallback
    [DEL]      -6  [DEL]      -6    fastdecode_longstring_noutf8
  -0.2%    -124  -0.2%    -124    upb/decode_fast.c
    +5.8%     +64  +6.0%     +64    upb_pom_1bt_max64b
    +2.7%     +64  +2.7%     +64    upb_ppv8_2bt
    +2.7%     +32  +2.8%     +32    upb_psm_1bt_max256b
    +2.8%     +32  +3.0%     +32    upb_psm_1bt_max64b
    +2.8%     +32  +3.0%     +32    upb_psm_2bt_max64b
    +4.0%     +24  +4.2%     +24    upb_psv8_1bt
    +2.0%     +16  +2.1%     +16    upb_prf4_2bt
    +1.3%     +16  +1.4%     +16    upb_prz8_2bt
    -0.3%      -4  -0.3%      -4    [3 Others]
    -1.6%      -8  -1.7%      -8    upb_cob_1bt
    -1.6%      -8  -1.7%      -8    upb_csb_1bt
    -2.5%     -16  -2.6%     -16    upb_pov4_1bt
    -1.3%     -16  -1.3%     -16    upb_prv8_2bt
    -2.5%     -16  -2.7%     -16    upb_psv4_1bt
    -2.5%     -16  -2.6%     -16    upb_psv4_2bt
    -3.0%     -32  -3.1%     -32    upb_prs_2bt
    -2.6%     -32  -2.6%     -32    upb_prv4_2bt
    -4.9%     -48  -5.1%     -48    upb_prb_2bt
    -3.9%     -48  -4.0%     -48    upb_prv4_1bt
    -7.2%     -72  -7.5%     -72    upb_prb_1bt
    -7.8%     -88  -8.0%     -88    upb_prs_1bt
  [ = ]       0  -0.1%    -128    TOTAL

There is a bit of speed regression, but it appears there were bigger
CPU regressions prior to this.  We probably need some separate
optimization attention again to get back to the performance numbers
we had when fasttable was first submitted.
pull/13171/head
Joshua Haberman 4 years ago
parent 75df4cdaa3
commit 286441afa7
  1. 323
      upb/decode_fast.c

@ -670,11 +670,16 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
const upb_msglayout *table, const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst); uint64_t hasbits, upb_strview *dst);
#define FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst) \ UPB_NOINLINE
if (!decode_verifyutf8_inl(dst->data, dst->size)) { \ static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr,
return fastdecode_err(d); \ upb_msg *msg, intptr_t table,
} \ uint64_t hasbits, uint64_t data) {
upb_strview *dst = (upb_strview*)data;
if (!decode_verifyutf8_inl(dst->data, dst->size)) {
return fastdecode_err(d);
}
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
}
#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ #define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
@ -703,7 +708,8 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
\ \
ptr += size; \ ptr += size; \
if (validate_utf8) { \ if (validate_utf8) { \
FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} else { \ } else { \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
} }
@ -737,158 +743,164 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
UPB_POISON_MEMORY_REGION(data + size, copy - size); UPB_POISON_MEMORY_REGION(data + size, copy - size);
} }
#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \ #define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
card, validate_utf8) \ card, validate_utf8) \
upb_strview *dst; \ upb_strview *dst; \
fastdecode_arr farr; \ fastdecode_arr farr; \
int64_t size; \ int64_t size; \
size_t arena_has; \ size_t arena_has; \
size_t common_has; \ size_t common_has; \
char *buf; \ char *buf; \
\ \
UPB_ASSERT(!d->alias); \ UPB_ASSERT(!d->alias); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \ UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\ \
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \ sizeof(upb_strview), card); \
\ \
again: \ again: \
if (card == CARD_r) { \ if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \ } \
\ \
size = (uint8_t)ptr[tagbytes]; \ size = (uint8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \ ptr += tagbytes + 1; \
dst->size = size; \ dst->size = size; \
\ \
buf = d->arena.head.ptr; \ buf = d->arena.head.ptr; \
arena_has = _upb_arenahas(&d->arena); \ arena_has = _upb_arenahas(&d->arena); \
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
\ \
if (UPB_LIKELY(size <= 15 - tagbytes)) { \ if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) goto longstr; \ if (arena_has < 16) \
d->arena.head.ptr += 16; \ goto longstr; \
memcpy(buf, ptr - tagbytes - 1, 16); \ d->arena.head.ptr += 16; \
dst->data = buf + tagbytes + 1; \ memcpy(buf, ptr - tagbytes - 1, 16); \
} else if (UPB_LIKELY(size <= 32)) { \ dst->data = buf + tagbytes + 1; \
if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ } else if (UPB_LIKELY(size <= 32)) { \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \ if (UPB_UNLIKELY(common_has < 32)) \
} else if (UPB_LIKELY(size <= 64)) { \ goto longstr; \
if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ fastdecode_docopy(d, ptr, size, 32, buf, dst); \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \ } else if (UPB_LIKELY(size <= 64)) { \
} else if (UPB_LIKELY(size < 128)) { \ if (UPB_UNLIKELY(common_has < 64)) \
if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \ fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else { \ } else if (UPB_LIKELY(size < 128)) { \
goto longstr; \ if (UPB_UNLIKELY(common_has < 128)) \
} \ goto longstr; \
\ fastdecode_docopy(d, ptr, size, 128, buf, dst); \
ptr += size; \ } else { \
\ goto longstr; \
if (card == CARD_r) { \ } \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ \
return fastdecode_err(d); \ ptr += size; \
} \ \
fastdecode_nextret ret = fastdecode_nextrepeated( \ if (card == CARD_r) { \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
switch (ret.next) { \ return fastdecode_err(d); \
case FD_NEXT_SAMEFIELD: \ } \
dst = ret.dst; \ fastdecode_nextret ret = fastdecode_nextrepeated( \
goto again; \ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
case FD_NEXT_OTHERFIELD: \ switch (ret.next) { \
data = ret.tag; \ case FD_NEXT_SAMEFIELD: \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ dst = ret.dst; \
case FD_NEXT_ATLIMIT: \ goto again; \
return ptr; \ case FD_NEXT_OTHERFIELD: \
} \ data = ret.tag; \
} \ UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
\ case FD_NEXT_ATLIMIT: \
if (card != CARD_r && validate_utf8) { \ return ptr; \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ } \
} \ } \
\ \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ if (card != CARD_r && validate_utf8) { \
\ data = (uint64_t)dst; \
longstr: \ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
ptr--; \ } \
if (validate_utf8) { \ \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
hasbits, (uint64_t)dst); \ \
} else { \ longstr: \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \ ptr--; \
hasbits, (uint64_t)dst); \ if (validate_utf8) { \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} else { \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} }
#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \ #define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
copyfunc, validate_utf8) \ copyfunc, validate_utf8) \
upb_strview *dst; \ upb_strview *dst; \
fastdecode_arr farr; \ fastdecode_arr farr; \
int64_t size; \ int64_t size; \
\ \
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \ if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
RETURN_GENERIC("string field tag mismatch\n"); \ RETURN_GENERIC("string field tag mismatch\n"); \
} \ } \
\ \
if (UPB_UNLIKELY(!d->alias)) { \ if (UPB_UNLIKELY(!d->alias)) { \
UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
} \ } \
\ \
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \ dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \ sizeof(upb_strview), card); \
\ \
again: \ again: \
if (card == CARD_r) { \ if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \ dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \ } \
\ \
size = (int8_t)ptr[tagbytes]; \ size = (int8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \ ptr += tagbytes + 1; \
dst->data = ptr; \ dst->data = ptr; \
dst->size = size; \ dst->size = size; \
\ \
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \ if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
ptr--; \ ptr--; \
if (validate_utf8) { \ if (validate_utf8) { \
return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \ return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \ (uint64_t)dst); \
} else { \ } else { \
return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \ return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \ (uint64_t)dst); \
} \ } \
} \ } \
\ \
ptr += size; \ ptr += size; \
\ \
if (card == CARD_r) { \ if (card == CARD_r) { \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \ if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \ return fastdecode_err(d); \
} \ } \
fastdecode_nextret ret = fastdecode_nextrepeated( \ fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \ d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
switch (ret.next) { \ switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \ case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \ dst = ret.dst; \
if (UPB_UNLIKELY(!d->alias)) { \ if (UPB_UNLIKELY(!d->alias)) { \
/* Buffer flipped and we can't alias any more. Bounce to */ \ /* Buffer flipped and we can't alias any more. Bounce to */ \
/* copyfunc(), but via dispatch since we need to reload table */ \ /* copyfunc(), but via dispatch since we need to reload table */ \
/* data also. */ \ /* data also. */ \
fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \ fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \
data = ret.tag; \ data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
} \ } \
goto again; \ goto again; \
case FD_NEXT_OTHERFIELD: \ case FD_NEXT_OTHERFIELD: \
data = ret.tag; \ data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \ case FD_NEXT_ATLIMIT: \
return ptr; \ return ptr; \
} \ } \
} \ } \
\ \
if (card != CARD_r && validate_utf8) { \ if (card != CARD_r && validate_utf8) { \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ data = (uint64_t)dst; \
} \ UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
\ } \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
/* Generate all combinations: /* Generate all combinations:
@ -925,7 +937,6 @@ TAGBYTES(r)
#undef b_VALIDATE #undef b_VALIDATE
#undef F #undef F
#undef TAGBYTES #undef TAGBYTES
#undef FASTDECODE_VERIFYUTF8
#undef FASTDECODE_LONGSTRING #undef FASTDECODE_LONGSTRING
#undef FASTDECODE_COPYSTRING #undef FASTDECODE_COPYSTRING
#undef FASTDECODE_STRING #undef FASTDECODE_STRING

Loading…
Cancel
Save