Shrink overhead of message representation.

pull/13171/head
Joshua Haberman 5 years ago
parent d87ceeacab
commit b58d2a0ee6
  1. 42
      benchmark.py
  2. 2
      upb/decode.c
  3. 76
      upb/msg.c
  4. 34
      upb/msg.h

@ -27,30 +27,33 @@ def GitWorktree(commit):
def Run(cmd): def Run(cmd):
subprocess.check_call(cmd, shell=True) subprocess.check_call(cmd, shell=True)
def Benchmark(outbase, runs=12): def Benchmark(outbase, bench_cpu=True, runs=12):
tmpfile = "/tmp/bench-output.json" tmpfile = "/tmp/bench-output.json"
Run("rm -rf {}".format(tmpfile)) Run("rm -rf {}".format(tmpfile))
Run("bazel test :all") Run("CC=clang bazel test :all")
Run("bazel build -c opt :benchmark")
Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs)) if bench_cpu:
Run("CC=clang bazel build -c opt :benchmark")
Run("bazel build -c opt --copt=-g :conformance_upb") Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase)) with open(tmpfile) as f:
bench_json = json.load(f)
# Translate into the format expected by benchstat.
with open(outbase + ".txt", "w") as f:
for run in bench_json["benchmarks"]:
name = re.sub(r'^BM_', 'Benchmark', run["name"])
if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"):
continue
values = (name, run["iterations"], run["cpu_time"])
print("{} {} {} ns/op".format(*values), file=f)
with open(tmpfile) as f: Run("CC=clang bazel build -c opt --copt=-g :conformance_upb")
bench_json = json.load(f) Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
# Translate into the format expected by benchstat.
with open(outbase + ".txt", "w") as f:
for run in bench_json["benchmarks"]:
name = re.sub(r'^BM_', 'Benchmark', run["name"])
if name.endswith("_mean") or name.endswith("_median") or name.endswith("_stddev"):
continue
values = (name, run["iterations"], run["cpu_time"])
print("{} {} {} ns/op".format(*values), file=f)
baseline = "master" baseline = "master"
bench_cpu = True
if len(sys.argv) > 1: if len(sys.argv) > 1:
baseline = sys.argv[1] baseline = sys.argv[1]
@ -60,16 +63,17 @@ if len(sys.argv) > 1:
pass pass
# Benchmark our current directory first, since it's more likely to be broken. # Benchmark our current directory first, since it's more likely to be broken.
Benchmark("/tmp/new") Benchmark("/tmp/new", bench_cpu)
# Benchmark the baseline. # Benchmark the baseline.
with GitWorktree(baseline): with GitWorktree(baseline):
Benchmark("/tmp/old") Benchmark("/tmp/old", bench_cpu)
print() print()
print() print()
Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt") if bench_cpu:
Run("~/go/bin/benchstat /tmp/old.txt /tmp/new.txt")
print() print()
print() print()

@ -616,7 +616,7 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
state.arena.last_size = arena->last_size; state.arena.last_size = arena->last_size;
state.arena.parent = arena; state.arena.parent = arena;
if (setjmp(state.err)) { if (UPB_UNLIKELY(setjmp(state.err))) {
ok = false; ok = false;
} else { } else {
decode_msg(&state, buf, msg, l); decode_msg(&state, buf, msg, l);

@ -22,73 +22,67 @@ static const char _upb_fieldtype_to_sizelg2[12] = {
UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */
}; };
static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { static const size_t overhead = sizeof(upb_msg_internal);
UPB_ASSERT(elem_size_lg2 <= 4);
return (uintptr_t)ptr | elem_size_lg2;
}
static int upb_msg_internalsize(const upb_msglayout *l) {
return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
}
static size_t upb_msg_sizeof(const upb_msglayout *l) {
return l->size + upb_msg_internalsize(l);
}
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
ptrdiff_t size = sizeof(upb_msg_internal); ptrdiff_t size = sizeof(upb_msg_internal);
return UPB_PTR_AT(msg, -size, upb_msg_internal); return UPB_PTR_AT(msg, -size, upb_msg_internal);
} }
static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
return (upb_msg_internal*)upb_msg_getinternal_const(msg); return _upb_msg_new_inl(l, a);
} }
void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) { void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) {
ptrdiff_t internal = upb_msg_internalsize(l); void *mem = UPB_PTR_AT(msg, -sizeof(upb_msg_internal), char);
void *mem = UPB_PTR_AT(msg, -internal, char); memset(mem, 0, upb_msg_sizeof(l));
memset(mem, 0, l->size + internal);
} }
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) {
void *mem = upb_arena_malloc(a, upb_msg_sizeof(l)); UPB_ASSERT(elem_size_lg2 <= 4);
upb_msg *msg; return (uintptr_t)ptr | elem_size_lg2;
if (!mem) {
return NULL;
}
msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg);
_upb_msg_clear(msg, l);
return msg;
} }
bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
upb_arena *arena) { upb_arena *arena) {
upb_msg_internal *in = upb_msg_getinternal(msg); upb_msg_internal *in = upb_msg_getinternal(msg);
if (len > in->unknown_size - in->unknown_len) { if (!in->unknown) {
upb_alloc *alloc = upb_arena_alloc(arena); size_t size = 128;
size_t need = in->unknown_size + len; while (size < len) size *= 2;
size_t newsize = UPB_MAX(in->unknown_size * 2, need); in->unknown = upb_arena_malloc(arena, size + overhead);
void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); if (!in->unknown) return false;
if (!mem) return false; in->unknown->size = size;
in->unknown = mem; in->unknown->len = 0;
in->unknown_size = newsize; } else if (in->unknown->size - in->unknown->len < len) {
size_t need = in->unknown->len + len;
size_t size = in->unknown->size;;
while (size < need) size *= 2;
in->unknown = upb_arena_realloc(
arena, in->unknown, in->unknown->size + overhead, size + overhead);
if (!in->unknown) return false;
} }
memcpy(in->unknown + in->unknown_len, data, len); memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len);
in->unknown_len += len; in->unknown->len += len;
return true; return true;
} }
void _upb_msg_discardunknown_shallow(upb_msg *msg) { void _upb_msg_discardunknown_shallow(upb_msg *msg) {
upb_msg_internal *in = upb_msg_getinternal(msg); upb_msg_internal *in = upb_msg_getinternal(msg);
in->unknown_len = 0; if (in->unknown) {
in->unknown->len = 0;
}
} }
const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
const upb_msg_internal *in = upb_msg_getinternal_const(msg); const upb_msg_internal *in = upb_msg_getinternal_const(msg);
*len = in->unknown_len; if (in->unknown) {
return in->unknown; *len = in->unknown->len;
return (char*)(in->unknown + 1);
} else {
*len = 0;
return NULL;
}
} }
/** upb_array *****************************************************************/ /** upb_array *****************************************************************/

@ -62,25 +62,41 @@ typedef struct upb_msglayout {
* compatibility. We put these before the user's data. The user's upb_msg* * compatibility. We put these before the user's data. The user's upb_msg*
* points after the upb_msg_internal. */ * points after the upb_msg_internal. */
/* Used when a message is not extendable. */
typedef struct { typedef struct {
char *unknown; uint32_t len;
size_t unknown_len; uint32_t size;
size_t unknown_size; /* Data follows. */
} upb_msg_internal; } upb_msg_unknowndata;
/* Used when a message is extendable. */ /* Used when a message is not extendable. */
typedef struct { typedef struct {
upb_inttable *extdict; upb_msg_unknowndata *unknown;
upb_msg_internal base; } upb_msg_internal;
} upb_msg_internal_withext;
/* Maps upb_fieldtype_t -> memory size. */ /* Maps upb_fieldtype_t -> memory size. */
extern char _upb_fieldtype_to_size[12]; extern char _upb_fieldtype_to_size[12];
UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) {
return l->size + sizeof(upb_msg_internal);
}
UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
size_t size = upb_msg_sizeof(l);
void *mem = upb_arena_malloc(a, size);
upb_msg *msg;
if (UPB_UNLIKELY(!mem)) return NULL;
msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg);
memset(mem, 0, size);
return msg;
}
/* Creates a new messages with the given layout on the given arena. */ /* Creates a new messages with the given layout on the given arena. */
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a); upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
}
/* Clears the given message. */ /* Clears the given message. */
void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l); void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);

Loading…
Cancel
Save