Donate/steal from arena to accelerate decoding.

pull/13171/head
Joshua Haberman 4 years ago
parent 02ff6fb996
commit 7ec2c52346
  1. 6
      benchmark.py
  2. 16
      generated_for_cmake/google/protobuf/descriptor.upb.c
  3. 123
      upb/decode.c
  4. 70
      upb/msg.c
  5. 26
      upb/msg.h
  6. 2
      upbc/message_layout.cc

@ -30,12 +30,12 @@ def Run(cmd):
def Benchmark(outbase, runs=12):
tmpfile = "/tmp/bench-output.json"
Run("rm -rf {}".format(tmpfile))
Run("bazel test :all")
Run("bazel build -c opt :benchmark")
Run("CC=clang bazel test :all")
Run("CC=clang bazel build -c opt :benchmark")
Run("./bazel-bin/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
Run("bazel build -c opt --copt=-g :conformance_upb")
Run("CC=clang bazel build -c opt --copt=-g :conformance_upb")
Run("cp -f bazel-bin/conformance_upb {}.bin".format(outbase))
with open(tmpfile) as f:

@ -23,7 +23,7 @@ static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] =
const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
&google_protobuf_FileDescriptorSet_submsgs[0],
&google_protobuf_FileDescriptorSet__fields[0],
UPB_SIZE(4, 8), 1, false,
UPB_SIZE(8, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
@ -109,7 +109,7 @@ static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__
const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
NULL,
&google_protobuf_DescriptorProto_ReservedRange__fields[0],
UPB_SIZE(12, 12), 2, false,
UPB_SIZE(16, 16), 2, false,
};
static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
@ -123,7 +123,7 @@ static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1
const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
&google_protobuf_ExtensionRangeOptions_submsgs[0],
&google_protobuf_ExtensionRangeOptions__fields[0],
UPB_SIZE(4, 8), 1, false,
UPB_SIZE(8, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
@ -193,7 +193,7 @@ static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReserve
const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
NULL,
&google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
UPB_SIZE(12, 12), 2, false,
UPB_SIZE(16, 16), 2, false,
};
static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
@ -297,7 +297,7 @@ static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
const upb_msglayout google_protobuf_MessageOptions_msginit = {
&google_protobuf_MessageOptions_submsgs[0],
&google_protobuf_MessageOptions__fields[0],
UPB_SIZE(12, 16), 5, false,
UPB_SIZE(16, 16), 5, false,
};
static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
@ -331,7 +331,7 @@ static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
const upb_msglayout google_protobuf_OneofOptions_msginit = {
&google_protobuf_OneofOptions_submsgs[0],
&google_protobuf_OneofOptions__fields[0],
UPB_SIZE(4, 8), 1, false,
UPB_SIZE(8, 8), 1, false,
};
static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
@ -438,7 +438,7 @@ static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
&google_protobuf_SourceCodeInfo_submsgs[0],
&google_protobuf_SourceCodeInfo__fields[0],
UPB_SIZE(4, 8), 1, false,
UPB_SIZE(8, 8), 1, false,
};
static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
@ -466,7 +466,7 @@ static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] =
const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
&google_protobuf_GeneratedCodeInfo_submsgs[0],
&google_protobuf_GeneratedCodeInfo__fields[0],
UPB_SIZE(4, 8), 1, false,
UPB_SIZE(8, 8), 1, false,
};
static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {

@ -137,6 +137,8 @@ static const int8_t delim_ops[37] = {
/* Data pertaining to the parse. */
typedef struct {
const char *limit; /* End of delimited region or end of buffer. */
char *arena_ptr;
char *arena_end;
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
@ -189,12 +191,93 @@ void decode_verifyutf8(upb_decstate *d, const char *buf, int len) {
if (i != len) decode_err(d);
}
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) {
bool need_realloc = arr->size - arr->len < elem;
if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
static void decode_stealmem(upb_decstate *d) {
_upb_arena_head *a = (_upb_arena_head*)d->arena;
d->arena_ptr = a->ptr;
d->arena_end = a->end;
a->ptr = a->end;
}
static void decode_donatemem(upb_decstate *d) {
_upb_arena_head *a = (_upb_arena_head*)d->arena;
UPB_ASSERT(a->end == d->arena_end);
a->ptr = d->arena_ptr;
}
UPB_NOINLINE
static void *decode_mallocfallback(upb_decstate *d, size_t size) {
char *ptr = _upb_arena_slowmalloc(d->arena, size);
if (!ptr) decode_err(d);
decode_stealmem(d);
return ptr;
}
UPB_FORCEINLINE
static void *decode_malloc(upb_decstate *d, size_t size) {
UPB_ASSERT((size & 7) == 0);
char *ptr = d->arena_ptr;
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < size)) {
return decode_mallocfallback(d, size);
}
d->arena_ptr += size;
return ptr;
}
static upb_msg *decode_newmsg(upb_decstate *d, const upb_msglayout *l) {
size_t size = l->size + sizeof(upb_msg_internal);
char *msg_data = decode_malloc(d, size);
memset(msg_data, 0, size);
return msg_data + sizeof(upb_msg_internal);
}
UPB_NOINLINE
static void decode_realloc(upb_decstate *d, upb_array *arr, size_t need_elem) {
decode_donatemem(d);
bool ok = _upb_array_realloc(arr, arr->len + need_elem, d->arena);
decode_stealmem(d);
if (!ok) decode_err(d);
}
UPB_FORCEINLINE
static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t need_elem) {
if (arr->size - arr->len < need_elem) {
decode_realloc(d, arr, need_elem);
return true;
}
return false;
}
static upb_array *decode_newarr(upb_decstate *d, upb_fieldtype_t type) {
size_t elem_size_lg2 = _upb_fieldtype_to_sizelg2[type];
size_t count = type == UPB_TYPE_BOOL ? 8 : 4;
size_t size = sizeof(upb_array) + (count * (1 << elem_size_lg2));
upb_array *arr = decode_malloc(d, size);
if (!arr) {
decode_err(d);
}
return need_realloc;
arr->data = _upb_array_tagptr(arr + 1, elem_size_lg2);
arr->len = 0;
arr->size = count;
return arr;
}
static void decode_addunknown(upb_decstate *d, upb_msg *msg, const char *ptr,
size_t len) {
upb_msg_internal *in = upb_msg_getinternal(msg);
if (!in->unknown || in->unknown->size - in->unknown->len < len) {
bool ok;
decode_donatemem(d);
ok = _upb_msg_addunknown(msg, ptr, len, d->arena);
decode_stealmem(d);
if (!ok) decode_err(d);
} else {
char *dst = UPB_PTR_AT(in->unknown + 1, in->unknown->len, char);
memcpy(dst, ptr, len);
in->unknown->len += len;
}
}
UPB_NOINLINE
@ -280,8 +363,7 @@ static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
const upb_msglayout_field *field) {
const upb_msglayout *subl = layout->submsgs[field->submsg_index];
return _upb_msg_new(subl, d->arena);
return decode_newmsg(d, layout->submsgs[field->submsg_index]);
}
static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
@ -325,8 +407,7 @@ static const char *decode_toarray(upb_decstate *d, const char *ptr,
if (!arr) {
upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype];
arr = _upb_array_new(d->arena, type);
if (!arr) decode_err(d);
arr = decode_newarr(d, type);
*arrp = arr;
}
@ -424,7 +505,9 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
char val_size = desctype_to_mapsize[val_field->descriptortype];
UPB_ASSERT(key_field->offset == 0);
UPB_ASSERT(val_field->offset == sizeof(upb_strview));
decode_donatemem(d); /* We'll let map use the actual arena. */
map = _upb_map_new(d->arena, key_size, val_size);
decode_stealmem(d);
*map_p = map;
}
@ -434,13 +517,15 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
/* Create proactively to handle the case where it doesn't appear. */
ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], d->arena));
ent.v.val = upb_value_ptr(decode_newmsg(d, entry->submsgs[0]));
}
decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
/* Insert into map. */
decode_donatemem(d); /* We'll let map use the actual arena. */
_upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena);
decode_stealmem(d);
}
static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
@ -587,10 +672,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
ptr = decode_group(d, ptr, NULL, NULL, field_number);
}
if (msg) {
if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
d->arena)) {
decode_err(d);
}
decode_addunknown(d, msg, field_start, ptr - field_start);
}
}
}
@ -601,18 +683,27 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
upb_arena *arena) {
bool ret;
upb_decstate state;
state.limit = buf + size;
state.arena = arena;
state.depth = 64;
state.end_group = 0;
if (setjmp(state.err)) return false;
if (size == 0) return true;
decode_stealmem(&state);
if (setjmp(state.err)) {
ret = false;
} else {
decode_msg(&state, buf, msg, l);
ret = state.end_group == 0;
}
return state.end_group == 0;
decode_donatemem(&state);
return ret;
}
#undef OP_SCALAR_LG2

@ -7,7 +7,7 @@
/** upb_msg *******************************************************************/
static const char _upb_fieldtype_to_sizelg2[12] = {
const char _upb_fieldtype_to_sizelg2[12] = {
0,
0, /* UPB_TYPE_BOOL */
2, /* UPB_TYPE_FLOAT */
@ -22,17 +22,10 @@ static const char _upb_fieldtype_to_sizelg2[12] = {
UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */
};
static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) {
UPB_ASSERT(elem_size_lg2 <= 4);
return (uintptr_t)ptr | elem_size_lg2;
}
static int upb_msg_internalsize(const upb_msglayout *l) {
return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
}
static const size_t overhead = sizeof(upb_msg_internal);
static size_t upb_msg_sizeof(const upb_msglayout *l) {
return l->size + upb_msg_internalsize(l);
return l->size + overhead;
}
static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
@ -40,14 +33,9 @@ static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
return UPB_PTR_AT(msg, -size, upb_msg_internal);
}
static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return (upb_msg_internal*)upb_msg_getinternal_const(msg);
}
void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) {
ptrdiff_t internal = upb_msg_internalsize(l);
void *mem = UPB_PTR_AT(msg, -internal, char);
memset(mem, 0, l->size + internal);
void *mem = UPB_PTR_AT(msg, -overhead, char);
memset(mem, 0, l->size + overhead);
}
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
@ -58,37 +46,51 @@ upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
return NULL;
}
msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg);
msg = UPB_PTR_AT(mem, overhead, upb_msg);
_upb_msg_clear(msg, l);
return msg;
}
bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
upb_arena *arena) {
upb_msg_internal *in = upb_msg_getinternal(msg);
if (len > in->unknown_size - in->unknown_len) {
upb_alloc *alloc = upb_arena_alloc(arena);
size_t need = in->unknown_size + len;
size_t newsize = UPB_MAX(in->unknown_size * 2, need);
void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
if (!mem) return false;
in->unknown = mem;
in->unknown_size = newsize;
}
memcpy(in->unknown + in->unknown_len, data, len);
in->unknown_len += len;
if (!in->unknown) {
size_t size = 128;
while (size < len) size *= 2;
in->unknown = upb_arena_malloc(arena, size + overhead);
if (!in->unknown) return false;
in->unknown->size = size;
in->unknown->len = 0;
} else if (in->unknown->size - in->unknown->len < len) {
size_t need = in->unknown->len + len;
size_t size = in->unknown->size;;
while (size < need) size *= 2;
in->unknown = upb_arena_realloc(
arena, in->unknown, in->unknown->size + overhead, size + overhead);
if (!in->unknown) return false;
}
memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len);
in->unknown->len += len;
return true;
}
void _upb_msg_discardunknown_shallow(upb_msg *msg) {
upb_msg_internal *in = upb_msg_getinternal(msg);
in->unknown_len = 0;
if (in->unknown) {
in->unknown->len = 0;
}
}
const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
const upb_msg_internal *in = upb_msg_getinternal_const(msg);
*len = in->unknown_len;
return in->unknown;
if (in->unknown) {
*len = in->unknown->len;
return (char*)(in->unknown + 1);
} else {
*len = 0;
return NULL;
}
}
/** upb_array *****************************************************************/
@ -100,7 +102,7 @@ upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) {
return NULL;
}
arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]);
arr->data = _upb_array_tagptr(NULL, _upb_fieldtype_to_sizelg2[type]);
arr->len = 0;
arr->size = 0;
@ -124,7 +126,7 @@ bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
return false;
}
arr->data = tag_arrptr(ptr, elem_size_lg2);
arr->data = _upb_array_tagptr(ptr, elem_size_lg2);
arr->size = new_size;
return true;
}

@ -62,25 +62,28 @@ typedef struct upb_msglayout {
* compatibility. We put these before the user's data. The user's upb_msg*
* points after the upb_msg_internal. */
/* Used when a message is not extendable. */
typedef struct {
char *unknown;
size_t unknown_len;
size_t unknown_size;
} upb_msg_internal;
uint32_t len;
uint32_t size;
/* Data follows. */
} upb_msg_unknowndata;
/* Used when a message is extendable. */
/* Used when a message is not extendable. */
typedef struct {
upb_inttable *extdict;
upb_msg_internal base;
} upb_msg_internal_withext;
upb_msg_unknowndata *unknown;
} upb_msg_internal;
/* Maps upb_fieldtype_t -> memory size. */
extern char _upb_fieldtype_to_size[12];
extern const char _upb_fieldtype_to_sizelg2[12];
/* Creates a new messages with the given layout on the given arena. */
upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
}
/* Clears the given message. */
void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);
@ -179,6 +182,11 @@ UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
return (void*)(arr->data & ~(uintptr_t)7);
}
UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) {
UPB_ASSERT(elem_size_lg2 <= 4);
return (uintptr_t)ptr | elem_size_lg2;
}
UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
return (void*)_upb_array_constptr(arr);
}

@ -105,7 +105,7 @@ int64_t MessageLayout::FieldLayoutRank(const protobuf::FieldDescriptor* field) {
void MessageLayout::ComputeLayout(const protobuf::Descriptor* descriptor) {
size_ = Size{0, 0};
maxalign_ = Size{0, 0};
maxalign_ = Size{8, 8};
if (descriptor->options().map_entry()) {
// Map entries aren't actually stored, they are only used during parsing.

Loading…
Cancel
Save