From b58d2a0ee68fad50e7eee616edfd7a90a9c30177 Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Sun, 11 Oct 2020 17:56:57 -0700 Subject: [PATCH] Shrink overhead of message representation. --- benchmark.py | 42 ++++++++++++++++------------- upb/decode.c | 2 +- upb/msg.c | 76 ++++++++++++++++++++++++---------------------------- upb/msg.h | 34 ++++++++++++++++------- 4 files changed, 84 insertions(+), 70 deletions(-) diff --git a/benchmark.py b/benchmark.py index b75dd4d5a8..44f4940f5f 100755 --- a/benchmark.py +++ b/benchmark.py @@ -27,30 +27,33 @@ def GitWorktree(commit): def Run(cmd): subprocess.check_call(cmd, shell=True) -def Benchmark(outbase, runs=12): +def Benchmark(outbase, bench_cpu=True, runs=12): tmpfile = "/tmp/bench-output.json" Run("rm -rf {}".format(tmpfile)) - Run("bazel test :all") - Run("bazel build -c opt :benchmark") + Run("CC=clang bazel test :all") - Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs)) + if bench_cpu: + Run("CC=clang bazel build -c opt :benchmark") - Run("bazel build -c opt --copt=-g :conformance_upb") - Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase)) + Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs)) + with open(tmpfile) as f: + bench_json = json.load(f) + + # Translate into the format expected by benchstat. + with open(outbase + ".txt", "w") as f: + for run in bench_json["benchmarks"]: + name = re.sub(r'^BM_', 'Benchmark', run["name"]) + if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"): + continue + values = (name, run["iterations"], run["cpu_time"]) + print("{} {} {} ns/op".format(*values), file=f) - with open(tmpfile) as f: - bench_json = json.load(f) + Run("CC=clang bazel build -c opt --copt=-g :conformance_upb") + Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase)) - # Translate into the format expected by benchstat. - with open(outbase + ".txt", "w") as f: - for run in bench_json["benchmarks"]: - name = re.sub(r'^BM_', 'Benchmark', run["name"]) - if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"): - continue - values = (name, run["iterations"], run["cpu_time"]) - print("{} {} {} ns/op".format(*values), file=f) baseline = "master" +bench_cpu = True if len(sys.argv) > 1: baseline = sys.argv[1] @@ -60,16 +63,17 @@ if len(sys.argv) > 1: pass # Benchmark our current directory first, since it's more likely to be broken. -Benchmark("/tmp/new") +Benchmark("/tmp/new", bench_cpu) # Benchmark the baseline. with GitWorktree(baseline): - Benchmark("/tmp/old") + Benchmark("/tmp/old", bench_cpu) print() print() -Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt") +if bench_cpu: + Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt") print() print() diff --git a/upb/decode.c b/upb/decode.c index 8dcd3df1eb..cedca1e374 100644 --- a/upb/decode.c +++ b/upb/decode.c @@ -616,7 +616,7 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, state.arena.last_size = arena->last_size; state.arena.parent = arena; - if (setjmp(state.err)) { + if (UPB_UNLIKELY(setjmp(state.err))) { ok = false; } else { decode_msg(&state, buf, msg, l); diff --git a/upb/msg.c b/upb/msg.c index 25747c8481..ee4ee8b06b 100644 --- a/upb/msg.c +++ b/upb/msg.c @@ -22,73 +22,67 @@ static const char _upb_fieldtype_to_sizelg2[12] = { UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ }; -static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { - UPB_ASSERT(elem_size_lg2 <= 4); - return (uintptr_t)ptr | elem_size_lg2; -} - -static int upb_msg_internalsize(const upb_msglayout *l) { - return sizeof(upb_msg_internal) - l->extendable * sizeof(void *); -} - -static size_t upb_msg_sizeof(const upb_msglayout *l) { - return l->size + upb_msg_internalsize(l); -} +static const size_t overhead = sizeof(upb_msg_internal); static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { ptrdiff_t size = sizeof(upb_msg_internal); return UPB_PTR_AT(msg, -size, upb_msg_internal); } -static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { - return (upb_msg_internal*)upb_msg_getinternal_const(msg); +upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { + return _upb_msg_new_inl(l, a); } void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) { - ptrdiff_t internal = upb_msg_internalsize(l); - void *mem = UPB_PTR_AT(msg, -internal, char); - memset(mem, 0, l->size + internal); + void *mem = UPB_PTR_AT(msg, -sizeof(upb_msg_internal), char); + memset(mem, 0, upb_msg_sizeof(l)); } -upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { - void *mem = upb_arena_malloc(a, upb_msg_sizeof(l)); - upb_msg *msg; - - if (!mem) { - return NULL; - } - - msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg); - _upb_msg_clear(msg, l); - return msg; +static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); + return (uintptr_t)ptr | elem_size_lg2; } bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena) { + upb_msg_internal *in = upb_msg_getinternal(msg); - if (len > in->unknown_size - in->unknown_len) { - upb_alloc *alloc = upb_arena_alloc(arena); - size_t need = in->unknown_size + len; - size_t newsize = UPB_MAX(in->unknown_size * 2, need); - void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); - if (!mem) return false; - in->unknown = mem; - in->unknown_size = newsize; + if (!in->unknown) { + size_t size = 128; + while (size < len) size *= 2; + in->unknown = upb_arena_malloc(arena, size + overhead); + if (!in->unknown) return false; + in->unknown->size = size; + in->unknown->len = 0; + } else if (in->unknown->size - in->unknown->len < len) { + size_t need = in->unknown->len + len; + size_t size = in->unknown->size;; + while (size < need) size *= 2; + in->unknown = upb_arena_realloc( + arena, in->unknown, in->unknown->size + overhead, size + overhead); + if (!in->unknown) return false; } - memcpy(in->unknown + in->unknown_len, data, len); - in->unknown_len += len; + memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len); + in->unknown->len += len; return true; } void _upb_msg_discardunknown_shallow(upb_msg *msg) { upb_msg_internal *in = upb_msg_getinternal(msg); - in->unknown_len = 0; + if (in->unknown) { + in->unknown->len = 0; + } } const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { const upb_msg_internal *in = upb_msg_getinternal_const(msg); - *len = in->unknown_len; - return in->unknown; + if (in->unknown) { + *len = in->unknown->len; + return (char*)(in->unknown + 1); + } else { + *len = 0; + return NULL; + } } /** upb_array *****************************************************************/ diff --git a/upb/msg.h b/upb/msg.h index 695c278b21..e06d3c7a5c 100644 --- a/upb/msg.h +++ b/upb/msg.h @@ -62,25 +62,41 @@ typedef struct upb_msglayout { * compatibility. We put these before the user's data. The user's upb_msg* * points after the upb_msg_internal. */ -/* Used when a message is not extendable. */ typedef struct { - char *unknown; - size_t unknown_len; - size_t unknown_size; -} upb_msg_internal; + uint32_t len; + uint32_t size; + /* Data follows. */ +} upb_msg_unknowndata; -/* Used when a message is extendable. */ +/* Used when a message is not extendable. */ typedef struct { - upb_inttable *extdict; - upb_msg_internal base; -} upb_msg_internal_withext; + upb_msg_unknowndata *unknown; +} upb_msg_internal; /* Maps upb_fieldtype_t -> memory size. */ extern char _upb_fieldtype_to_size[12]; +UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) { + return l->size + sizeof(upb_msg_internal); +} + +UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) { + size_t size = upb_msg_sizeof(l); + void *mem = upb_arena_malloc(a, size); + upb_msg *msg; + if (UPB_UNLIKELY(!mem)) return NULL; + msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg); + memset(mem, 0, size); + return msg; +} + /* Creates a new messages with the given layout on the given arena. */ upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a); +UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); +} + /* Clears the given message. */ void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);