Fixed a size regression due to inlining UTF-8 verification.

Overall size/speed impact on fasttable decoder is now:

name                                      old time/op  new time/op  delta
ArenaOneAlloc                             21.5ns ± 0%  21.5ns ± 0%     ~     (p=0.060 n=12+12)
ArenaInitialBlockOneAlloc                 6.33ns ± 0%  6.33ns ± 0%     ~     (p=0.413 n=11+12)
LoadDescriptor_Upb                        43.4µs ± 1%  45.5µs ± 1%   +4.79%  (p=0.000 n=12+12)
LoadAdsDescriptor_Upb                     2.50ms ± 0%  2.51ms ± 2%     ~     (p=0.512 n=10+11)
LoadDescriptor_Proto2                      240µs ± 0%   240µs ± 0%   -0.25%  (p=0.000 n=12+12)
LoadAdsDescriptor_Proto2                  12.9ms ± 0%  12.9ms ± 0%   +0.20%  (p=0.014 n=10+12)
Parse_Upb_FileDesc<UseArena,Copy>         4.99µs ± 0%  5.04µs ± 0%   +0.98%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<UseArena,Alias>        4.02µs ± 0%  4.18µs ± 0%   +4.16%  (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Copy>        4.49µs ± 0%  4.54µs ± 0%   +1.16%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<InitBlock,Alias>       3.60µs ± 0%  3.80µs ± 0%   +5.73%  (p=0.000 n=12+11)
Parse_Proto2<FileDesc,NoArena,Copy>       29.3µs ± 0%  29.3µs ± 0%     ~     (p=0.069 n=11+12)
Parse_Proto2<FileDesc,UseArena,Copy>      20.2µs ± 3%  20.3µs ± 2%     ~     (p=0.880 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy>     16.5µs ± 0%  16.5µs ± 0%     ~     (p=1.000 n=12+12)
Parse_Proto2<FileDescSV,InitBlock,Alias>  16.4µs ± 0%  16.4µs ± 1%     ~     (p=0.590 n=12+12)
SerializeDescriptor_Proto2                5.31µs ± 1%  6.65µs ±29%  +25.07%  (p=0.000 n=12+12)
SerializeDescriptor_Upb                   12.4µs ± 0%  12.5µs ± 0%   +1.23%  (p=0.000 n=12+12)

    FILE SIZE        VM SIZE
 --------------  --------------
   +16%    +128  [ = ]       0    [Unmapped]
  -1.2%      -4  -1.2%      -4    [section .text]
    [NEW]      +2  [NEW]      +2    fastdecode_isdonefallback
    [DEL]      -6  [DEL]      -6    fastdecode_longstring_noutf8
  -0.2%    -124  -0.2%    -124    upb/decode_fast.c
    +5.8%     +64  +6.0%     +64    upb_pom_1bt_max64b
    +2.7%     +64  +2.7%     +64    upb_ppv8_2bt
    +2.7%     +32  +2.8%     +32    upb_psm_1bt_max256b
    +2.8%     +32  +3.0%     +32    upb_psm_1bt_max64b
    +2.8%     +32  +3.0%     +32    upb_psm_2bt_max64b
    +4.0%     +24  +4.2%     +24    upb_psv8_1bt
    +2.0%     +16  +2.1%     +16    upb_prf4_2bt
    +1.3%     +16  +1.4%     +16    upb_prz8_2bt
    -0.3%      -4  -0.3%      -4    [3 Others]
    -1.6%      -8  -1.7%      -8    upb_cob_1bt
    -1.6%      -8  -1.7%      -8    upb_csb_1bt
    -2.5%     -16  -2.6%     -16    upb_pov4_1bt
    -1.3%     -16  -1.3%     -16    upb_prv8_2bt
    -2.5%     -16  -2.7%     -16    upb_psv4_1bt
    -2.5%     -16  -2.6%     -16    upb_psv4_2bt
    -3.0%     -32  -3.1%     -32    upb_prs_2bt
    -2.6%     -32  -2.6%     -32    upb_prv4_2bt
    -4.9%     -48  -5.1%     -48    upb_prb_2bt
    -3.9%     -48  -4.0%     -48    upb_prv4_1bt
    -7.2%     -72  -7.5%     -72    upb_prb_1bt
    -7.8%     -88  -8.0%     -88    upb_prs_1bt
  [ = ]       0  -0.1%    -128    TOTAL

There is a bit of speed regression, but it appears there were bigger
CPU regressions prior to this.  We probably need some separate
optimization attention again to get back to the performance numbers
we had when fasttable was first submitted.
pull/13171/head
Joshua Haberman 4 years ago
parent 75df4cdaa3
commit 286441afa7
  1. 323
      upb/decode_fast.c

@ -670,11 +670,16 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst);
#define FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst) \
if (!decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \
} \
UPB_NOINLINE
static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr,
upb_msg *msg, intptr_t table,
uint64_t hasbits, uint64_t data) {
upb_strview *dst = (upb_strview*)data;
if (!decode_verifyutf8_inl(dst->data, dst->size)) {
return fastdecode_err(d);
}
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
}
#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
@ -703,7 +708,8 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
\
ptr += size; \
if (validate_utf8) { \
FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} else { \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
}
@ -737,158 +743,164 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
UPB_POISON_MEMORY_REGION(data + size, copy - size);
}
#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
card, validate_utf8) \
upb_strview *dst; \
fastdecode_arr farr; \
int64_t size; \
size_t arena_has; \
size_t common_has; \
char *buf; \
\
UPB_ASSERT(!d->alias); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \
\
size = (uint8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->size = size; \
\
buf = d->arena.head.ptr; \
arena_has = _upb_arenahas(&d->arena); \
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
\
if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) goto longstr; \
d->arena.head.ptr += 16; \
memcpy(buf, ptr - tagbytes - 1, 16); \
dst->data = buf + tagbytes + 1; \
} else if (UPB_LIKELY(size <= 32)) { \
if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \
} else if (UPB_LIKELY(size <= 64)) { \
if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else if (UPB_LIKELY(size < 128)) { \
if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \
} else { \
goto longstr; \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \
} \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
\
longstr: \
ptr--; \
if (validate_utf8) { \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} else { \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
card, validate_utf8) \
upb_strview *dst; \
fastdecode_arr farr; \
int64_t size; \
size_t arena_has; \
size_t common_has; \
char *buf; \
\
UPB_ASSERT(!d->alias); \
UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \
\
size = (uint8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->size = size; \
\
buf = d->arena.head.ptr; \
arena_has = _upb_arenahas(&d->arena); \
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
\
if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) \
goto longstr; \
d->arena.head.ptr += 16; \
memcpy(buf, ptr - tagbytes - 1, 16); \
dst->data = buf + tagbytes + 1; \
} else if (UPB_LIKELY(size <= 32)) { \
if (UPB_UNLIKELY(common_has < 32)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \
} else if (UPB_LIKELY(size <= 64)) { \
if (UPB_UNLIKELY(common_has < 64)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else if (UPB_LIKELY(size < 128)) { \
if (UPB_UNLIKELY(common_has < 128)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \
} else { \
goto longstr; \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
\
longstr: \
ptr--; \
if (validate_utf8) { \
UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
} else { \
UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
hasbits, (uint64_t)dst); \
}
#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
copyfunc, validate_utf8) \
upb_strview *dst; \
fastdecode_arr farr; \
int64_t size; \
\
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
RETURN_GENERIC("string field tag mismatch\n"); \
} \
\
if (UPB_UNLIKELY(!d->alias)) { \
UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
} \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \
\
size = (int8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->data = ptr; \
dst->size = size; \
\
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
ptr--; \
if (validate_utf8) { \
return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \
} else { \
return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \
} \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
if (UPB_UNLIKELY(!d->alias)) { \
/* Buffer flipped and we can't alias any more. Bounce to */ \
/* copyfunc(), but via dispatch since we need to reload table */ \
/* data also. */ \
fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
} \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \
} \
\
#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
copyfunc, validate_utf8) \
upb_strview *dst; \
fastdecode_arr farr; \
int64_t size; \
\
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
RETURN_GENERIC("string field tag mismatch\n"); \
} \
\
if (UPB_UNLIKELY(!d->alias)) { \
UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
} \
\
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
sizeof(upb_strview), card); \
\
again: \
if (card == CARD_r) { \
dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); \
} \
\
size = (int8_t)ptr[tagbytes]; \
ptr += tagbytes + 1; \
dst->data = ptr; \
dst->size = size; \
\
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
ptr--; \
if (validate_utf8) { \
return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \
} else { \
return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
(uint64_t)dst); \
} \
} \
\
ptr += size; \
\
if (card == CARD_r) { \
if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
return fastdecode_err(d); \
} \
fastdecode_nextret ret = fastdecode_nextrepeated( \
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); \
switch (ret.next) { \
case FD_NEXT_SAMEFIELD: \
dst = ret.dst; \
if (UPB_UNLIKELY(!d->alias)) { \
/* Buffer flipped and we can't alias any more. Bounce to */ \
/* copyfunc(), but via dispatch since we need to reload table */ \
/* data also. */ \
fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
} \
goto again; \
case FD_NEXT_OTHERFIELD: \
data = ret.tag; \
UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
case FD_NEXT_ATLIMIT: \
return ptr; \
} \
} \
\
if (card != CARD_r && validate_utf8) { \
data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \
\
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
/* Generate all combinations:
@ -925,7 +937,6 @@ TAGBYTES(r)
#undef b_VALIDATE
#undef F
#undef TAGBYTES
#undef FASTDECODE_VERIFYUTF8
#undef FASTDECODE_LONGSTRING
#undef FASTDECODE_COPYSTRING
#undef FASTDECODE_STRING

Loading…
Cancel
Save