From 7ffa9c181a0e34b1038c146db80dc0020cfc21d3 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Fri, 23 Oct 2020 16:57:57 -0700 Subject: [PATCH] Fixed some small bugs and performance problems in string copying. Before this CL, with alias=false: ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ BM_Parse_Upb_FileDesc_WithInitialBlock 3715 ns 3715 ns 188916 1.88206GB/s Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock': 1,122.92 msec task-clock # 0.979 CPUs utilized 3 context-switches # 0.003 K/sec 0 cpu-migrations # 0.000 K/sec 196 page-faults # 0.175 K/sec 4,144,746,717 cycles # 3.691 GHz 15,351,966,804 instructions # 3.70 insn per cycle 2,590,281,905 branches # 2306.728 M/sec 2,996,157 branch-misses # 0.12% of all branches 1.146615328 seconds time elapsed 1.115578000 seconds user 0.008025000 seconds sys After this CL: ------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------ BM_Parse_Upb_FileDesc_WithInitialBlock 3554 ns 3554 ns 197527 1.9674GB/s Performance counter stats for 'bazel-bin/benchmarks/benchmark --benchmark_filter=BM_Parse_Upb_FileDesc_WithInitialBlock': 1,105.34 msec task-clock # 0.982 CPUs utilized 3 context-switches # 0.003 K/sec 0 cpu-migrations # 0.000 K/sec 197 page-faults # 0.178 K/sec 4,077,736,892 cycles # 3.689 GHz 15,442,709,352 instructions # 3.79 insn per cycle 2,435,131,301 branches # 2203.068 M/sec 2,643,775 branch-misses # 0.11% of all branches 1.125393845 seconds time elapsed 1.097770000 seconds user 0.008012000 seconds sys --- upb/decode_fast.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/upb/decode_fast.c b/upb/decode_fast.c index f831e9112c..82c030c4cd 100644 --- a/upb/decode_fast.c +++ b/upb/decode_fast.c @@ -472,11 +472,11 @@ static const char *fastdecode_longstring(struct upb_decstate *d, UPB_FORCEINLINE static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, int copy, char *data, upb_strview *dst) { + d->arena.head.ptr += copy; UPB_UNPOISON_MEMORY_REGION(data, copy); memcpy(data, ptr, copy); UPB_POISON_MEMORY_REGION(data + size, copy - size); dst->data = data; - d->arena.head.ptr += copy; } UPB_FORCEINLINE @@ -504,35 +504,30 @@ again: ptr += tagbytes + 1; dst->size = size; - if (UPB_UNLIKELY(size == 0)) { - goto done; - } else if (UPB_UNLIKELY(size > 127)) { - goto longstr; - } - buf = d->arena.head.ptr; arena_has = _upb_arenahas(&d->arena); common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); if (UPB_LIKELY(size <= 15 - tagbytes)) { if (arena_has < 16) goto longstr; + d->arena.head.ptr += 16; memcpy(buf, ptr - tagbytes - 1, 16); dst->data = buf + tagbytes + 1; - d->arena.head.ptr += 16; } else if (UPB_LIKELY(size <= 32)) { if (UPB_UNLIKELY(common_has < 32)) goto longstr; fastdecode_docopy(d, ptr, size, 32, buf, dst); - } else if (UPB_LIKELY(size <= 64 && common_has >= 64)) { + } else if (UPB_LIKELY(size <= 64)) { if (UPB_UNLIKELY(common_has < 64)) goto longstr; fastdecode_docopy(d, ptr, size, 64, buf, dst); - } else { + } else if (UPB_LIKELY(size <= 128)) { if (UPB_UNLIKELY(common_has < 128)) goto longstr; fastdecode_docopy(d, ptr, size, 128, buf, dst); + } else { + goto longstr; } ptr += size; -done: if (card == CARD_r) { fastdecode_nextret ret = fastdecode_nextrepeated( d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview));