Optimized varint decoding from Gerben.

This speeds things up but costs some code size.

name                                 old time/op  new time/op  delta
ArenaOneAlloc                        21.1ns ± 0%  21.3ns ± 0%  +1.33%  (p=0.000 n=12+12)
ArenaInitialBlockOneAlloc            6.02ns ± 0%  6.02ns ± 0%    ~     (p=0.579 n=10+10)
LoadDescriptor_Upb                    111µs ± 1%   110µs ± 1%  -0.91%  (p=0.003 n=11+12)
LoadDescriptor_Proto2                 258µs ± 1%   258µs ± 1%    ~     (p=0.674 n=10+12)
Parse_Upb_FileDesc_WithArena         11.2µs ± 0%  10.4µs ± 0%  -6.67%  (p=0.000 n=12+12)
Parse_Upb_FileDesc_WithInitialBlock  10.6µs ± 0%  10.1µs ± 0%  -4.48%  (p=0.000 n=12+11)
SerializeDescriptor_Proto2           5.36µs ± 5%  5.36µs ± 3%    ~     (p=0.880 n=12+11)
SerializeDescriptor_Upb              11.9µs ± 0%  12.0µs ± 0%  +0.81%  (p=0.000 n=12+12)

    FILE SIZE        VM SIZE
 --------------  --------------
   +23% +1.11Ki   +24% +1.06Ki    upb/decode.c
     +15%    +560   +15%    +560    decode_msg
    +140%    +240  +188%    +240    decode_longvarint64
    [NEW]    +174  [NEW]    +128    decode_isdonefallback
     +56%    +160   +65%    +160    upb_decode
 -49.7% -1.06Ki  [ = ]       0    [Unmapped]
  +0.0%     +48  +0.9% +1.06Ki    TOTAL
pull/13171/head
Joshua Haberman 4 years ago
parent 48689df72e
commit f4adbe0698
  1. 47
      upb/decode.c

@ -204,33 +204,40 @@ static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
return need_realloc; return need_realloc;
} }
typedef struct {
const char *ptr;
uint64_t val;
} decode_vret;
UPB_NOINLINE UPB_NOINLINE
static const char *decode_longvarint64(upb_decstate *d, const char *ptr, static decode_vret decode_longvarint64(const char *ptr, uint64_t val) {
uint64_t *val) { decode_vret ret = {NULL, 0};
uint8_t byte; uint64_t byte;
int bitpos = 0; int i;
uint64_t out = 0; for (i = 1; i < 10; i++) {
byte = (uint8_t)ptr[i];
do { val += (byte - 1) << (i * 7);
if (bitpos >= 70) decode_err(d); if (!(byte & 0x80)) {
byte = *ptr; ret.ptr = ptr + i + 1;
out |= (uint64_t)(byte & 0x7F) << bitpos; ret.val = val;
ptr++; return ret;
bitpos += 7; }
} while (byte & 0x80); }
return ret;
*val = out;
return ptr;
} }
UPB_FORCEINLINE UPB_FORCEINLINE
static const char *decode_varint64(upb_decstate *d, const char *ptr, static const char *decode_varint64(upb_decstate *d, const char *ptr,
uint64_t *val) { uint64_t *val) {
if (UPB_LIKELY((*ptr & 0x80) == 0)) { uint64_t byte = (uint8_t)*ptr;
*val = (uint8_t)*ptr; if (UPB_LIKELY((byte & 0x80) == 0)) {
*val = byte;
return ptr + 1; return ptr + 1;
} else { } else {
return decode_longvarint64(d, ptr, val); decode_vret res = decode_longvarint64(ptr, byte);
if (!res.ptr) decode_err(d);
*val = res.val;
return res.ptr;
} }
} }
@ -313,7 +320,7 @@ UPB_NOINLINE
static const char *decode_isdonefallback(upb_decstate *d, const char *ptr, static const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
int overrun) { int overrun) {
if (overrun < d->limit) { if (overrun < d->limit) {
// Need to copy remaining data into patch buffer. /* Need to copy remaining data into patch buffer. */
UPB_ASSERT(overrun < 16); UPB_ASSERT(overrun < 16);
memset(d->patch + 16, 0, 16); memset(d->patch + 16, 0, 16);
memcpy(d->patch, d->end, 16); memcpy(d->patch, d->end, 16);

Loading…
Cancel
Save