Fixed a size regression due to inlining UTF-8 verification.

Overall size/speed impact on fasttable decoder is now:

name                                      old time/op  new time/op  delta
ArenaOneAlloc                             21.5ns ± 0%  21.5ns ± 0%     ~     (p=0.060 n=12+12)
ArenaInitialBlockOneAlloc                 6.33ns ± 0%  6.33ns ± 0%     ~     (p=0.413 n=11+12)
LoadDescriptor_Upb                        43.4µs ± 1%  45.5µs ± 1%   +4.79%  (p=0.000 n=12+12)
LoadAdsDescriptor_Upb                     2.50ms ± 0%  2.51ms ± 2%     ~     (p=0.512 n=10+11)
LoadDescriptor_Proto2                      240µs ± 0%   240µs ± 0%   -0.25%  (p=0.000 n=12+12)
LoadAdsDescriptor_Proto2                  12.9ms ± 0%  12.9ms ± 0%   +0.20%  (p=0.014 n=10+12)
Parse_Upb_FileDesc<UseArena,Copy>         4.99µs ± 0%  5.04µs ± 0%   +0.98%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<UseArena,Alias>        4.02µs ± 0%  4.18µs ± 0%   +4.16%  (p=0.000 n=10+12)
Parse_Upb_FileDesc<InitBlock,Copy>        4.49µs ± 0%  4.54µs ± 0%   +1.16%  (p=0.000 n=11+10)
Parse_Upb_FileDesc<InitBlock,Alias>       3.60µs ± 0%  3.80µs ± 0%   +5.73%  (p=0.000 n=12+11)
Parse_Proto2<FileDesc,NoArena,Copy>       29.3µs ± 0%  29.3µs ± 0%     ~     (p=0.069 n=11+12)
Parse_Proto2<FileDesc,UseArena,Copy>      20.2µs ± 3%  20.3µs ± 2%     ~     (p=0.880 n=12+11)
Parse_Proto2<FileDesc,InitBlock,Copy>     16.5µs ± 0%  16.5µs ± 0%     ~     (p=1.000 n=12+12)
Parse_Proto2<FileDescSV,InitBlock,Alias>  16.4µs ± 0%  16.4µs ± 1%     ~     (p=0.590 n=12+12)
SerializeDescriptor_Proto2                5.31µs ± 1%  6.65µs ±29%  +25.07%  (p=0.000 n=12+12)
SerializeDescriptor_Upb                   12.4µs ± 0%  12.5µs ± 0%   +1.23%  (p=0.000 n=12+12)

    FILE SIZE        VM SIZE
 --------------  --------------
   +16%    +128  [ = ]       0    [Unmapped]
  -1.2%      -4  -1.2%      -4    [section .text]
    [NEW]      +2  [NEW]      +2    fastdecode_isdonefallback
    [DEL]      -6  [DEL]      -6    fastdecode_longstring_noutf8
  -0.2%    -124  -0.2%    -124    upb/decode_fast.c
    +5.8%     +64  +6.0%     +64    upb_pom_1bt_max64b
    +2.7%     +64  +2.7%     +64    upb_ppv8_2bt
    +2.7%     +32  +2.8%     +32    upb_psm_1bt_max256b
    +2.8%     +32  +3.0%     +32    upb_psm_1bt_max64b
    +2.8%     +32  +3.0%     +32    upb_psm_2bt_max64b
    +4.0%     +24  +4.2%     +24    upb_psv8_1bt
    +2.0%     +16  +2.1%     +16    upb_prf4_2bt
    +1.3%     +16  +1.4%     +16    upb_prz8_2bt
    -0.3%      -4  -0.3%      -4    [3 Others]
    -1.6%      -8  -1.7%      -8    upb_cob_1bt
    -1.6%      -8  -1.7%      -8    upb_csb_1bt
    -2.5%     -16  -2.6%     -16    upb_pov4_1bt
    -1.3%     -16  -1.3%     -16    upb_prv8_2bt
    -2.5%     -16  -2.7%     -16    upb_psv4_1bt
    -2.5%     -16  -2.6%     -16    upb_psv4_2bt
    -3.0%     -32  -3.1%     -32    upb_prs_2bt
    -2.6%     -32  -2.6%     -32    upb_prv4_2bt
    -4.9%     -48  -5.1%     -48    upb_prb_2bt
    -3.9%     -48  -4.0%     -48    upb_prv4_1bt
    -7.2%     -72  -7.5%     -72    upb_prb_1bt
    -7.8%     -88  -8.0%     -88    upb_prs_1bt
  [ = ]       0  -0.1%    -128    TOTAL

There is a bit of speed regression, but it appears there were bigger
CPU regressions prior to this.  We probably need some separate
optimization attention again to get back to the performance numbers
we had when fasttable was first submitted.
pull/13171/head
Joshua Haberman 4 years ago
parent 75df4cdaa3
commit 286441afa7
  1. 35
      upb/decode_fast.c

@ -670,11 +670,16 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
const upb_msglayout *table, const upb_msglayout *table,
uint64_t hasbits, upb_strview *dst); uint64_t hasbits, upb_strview *dst);
#define FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst) \ UPB_NOINLINE
if (!decode_verifyutf8_inl(dst->data, dst->size)) { \ static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr,
return fastdecode_err(d); \ upb_msg *msg, intptr_t table,
} \ uint64_t hasbits, uint64_t data) {
upb_strview *dst = (upb_strview*)data;
if (!decode_verifyutf8_inl(dst->data, dst->size)) {
return fastdecode_err(d);
}
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
}
#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \ #define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \ int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
@ -703,7 +708,8 @@ typedef const char *fastdecode_copystr_func(struct upb_decstate *d,
\ \
ptr += size; \ ptr += size; \
if (validate_utf8) { \ if (validate_utf8) { \
FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} else { \ } else { \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
} }
@ -766,18 +772,22 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \ common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
\ \
if (UPB_LIKELY(size <= 15 - tagbytes)) { \ if (UPB_LIKELY(size <= 15 - tagbytes)) { \
if (arena_has < 16) goto longstr; \ if (arena_has < 16) \
goto longstr; \
d->arena.head.ptr += 16; \ d->arena.head.ptr += 16; \
memcpy(buf, ptr - tagbytes - 1, 16); \ memcpy(buf, ptr - tagbytes - 1, 16); \
dst->data = buf + tagbytes + 1; \ dst->data = buf + tagbytes + 1; \
} else if (UPB_LIKELY(size <= 32)) { \ } else if (UPB_LIKELY(size <= 32)) { \
if (UPB_UNLIKELY(common_has < 32)) goto longstr; \ if (UPB_UNLIKELY(common_has < 32)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 32, buf, dst); \ fastdecode_docopy(d, ptr, size, 32, buf, dst); \
} else if (UPB_LIKELY(size <= 64)) { \ } else if (UPB_LIKELY(size <= 64)) { \
if (UPB_UNLIKELY(common_has < 64)) goto longstr; \ if (UPB_UNLIKELY(common_has < 64)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 64, buf, dst); \ fastdecode_docopy(d, ptr, size, 64, buf, dst); \
} else if (UPB_LIKELY(size < 128)) { \ } else if (UPB_LIKELY(size < 128)) { \
if (UPB_UNLIKELY(common_has < 128)) goto longstr; \ if (UPB_UNLIKELY(common_has < 128)) \
goto longstr; \
fastdecode_docopy(d, ptr, size, 128, buf, dst); \ fastdecode_docopy(d, ptr, size, 128, buf, dst); \
} else { \ } else { \
goto longstr; \ goto longstr; \
@ -804,7 +814,8 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
} \ } \
\ \
if (card != CARD_r && validate_utf8) { \ if (card != CARD_r && validate_utf8) { \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \ } \
\ \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \ UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
@ -886,7 +897,8 @@ static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
} \ } \
\ \
if (card != CARD_r && validate_utf8) { \ if (card != CARD_r && validate_utf8) { \
/* return */ FASTDECODE_VERIFYUTF8(d, ptr, msg, table, hasbits, dst); \ data = (uint64_t)dst; \
UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
} \ } \
\ \
UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
@ -925,7 +937,6 @@ TAGBYTES(r)
#undef b_VALIDATE #undef b_VALIDATE
#undef F #undef F
#undef TAGBYTES #undef TAGBYTES
#undef FASTDECODE_VERIFYUTF8
#undef FASTDECODE_LONGSTRING #undef FASTDECODE_LONGSTRING
#undef FASTDECODE_COPYSTRING #undef FASTDECODE_COPYSTRING
#undef FASTDECODE_STRING #undef FASTDECODE_STRING

Loading…
Cancel
Save