Fixed some small bugs and performance problems in string copying.

Before this CL, with alias=false:

------------------------------------------------------------------------------
Benchmark                                       Time           CPU Iterations
------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc_WithInitialBlock       3715 ns       3715 ns     188916   1.88206GB/s

 Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock':

          1,122.92 msec task-clock                #    0.979 CPUs utilized
                 3      context-switches          #    0.003 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               196      page-faults               #    0.175 K/sec
     4,144,746,717      cycles                    #    3.691 GHz
    15,351,966,804      instructions              #    3.70  insn per cycle
     2,590,281,905      branches                  # 2306.728 M/sec
         2,996,157      branch-misses             #    0.12% of all branches

       1.146615328 seconds time elapsed

       1.115578000 seconds user
       0.008025000 seconds sys

After this CL:

------------------------------------------------------------------------------
Benchmark                                       Time           CPU Iterations
------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc_WithInitialBlock       3554 ns       3554 ns     197527    1.9674GB/s

 Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock':

          1,105.34 msec task-clock                #    0.982 CPUs utilized
                 3      context-switches          #    0.003 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               197      page-faults               #    0.178 K/sec
     4,077,736,892      cycles                    #    3.689 GHz
    15,442,709,352      instructions              #    3.79  insn per cycle
     2,435,131,301      branches                  # 2203.068 M/sec
         2,643,775      branch-misses             #    0.11% of all branches

       1.125393845 seconds time elapsed

       1.097770000 seconds user
       0.008012000 seconds sys
pull/13171/head
Joshua Haberman 4 years ago
parent e2c709e047
commit 7ffa9c181a
  1. 17
      upb/decode_fast.c

@ -472,11 +472,11 @@ static const char *fastdecode_longstring(struct upb_decstate *d,
UPB_FORCEINLINE
static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size,
int copy, char *data, upb_strview *dst) {
d->arena.head.ptr += copy;
UPB_UNPOISON_MEMORY_REGION(data, copy);
memcpy(data, ptr, copy);
UPB_POISON_MEMORY_REGION(data + size, copy - size);
dst->data = data;
d->arena.head.ptr += copy;
}
UPB_FORCEINLINE
@ -504,35 +504,30 @@ again:
ptr += tagbytes + 1;
dst->size = size;
if (UPB_UNLIKELY(size == 0)) {
goto done;
} else if (UPB_UNLIKELY(size > 127)) {
goto longstr;
}
buf = d->arena.head.ptr;
arena_has = _upb_arenahas(&d->arena);
common_has = UPB_MIN(arena_has, (d->end - ptr) + 16);
if (UPB_LIKELY(size <= 15 - tagbytes)) {
if (arena_has < 16) goto longstr;
d->arena.head.ptr += 16;
memcpy(buf, ptr - tagbytes - 1, 16);
dst->data = buf + tagbytes + 1;
d->arena.head.ptr += 16;
} else if (UPB_LIKELY(size <= 32)) {
if (UPB_UNLIKELY(common_has < 32)) goto longstr;
fastdecode_docopy(d, ptr, size, 32, buf, dst);
} else if (UPB_LIKELY(size <= 64 && common_has >= 64)) {
} else if (UPB_LIKELY(size <= 64)) {
if (UPB_UNLIKELY(common_has < 64)) goto longstr;
fastdecode_docopy(d, ptr, size, 64, buf, dst);
} else {
} else if (UPB_LIKELY(size <= 128)) {
if (UPB_UNLIKELY(common_has < 128)) goto longstr;
fastdecode_docopy(d, ptr, size, 128, buf, dst);
} else {
goto longstr;
}
ptr += size;
done:
if (card == CARD_r) {
fastdecode_nextret ret = fastdecode_nextrepeated(
d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));

Loading…
Cancel
Save