pull/13171/head
Joshua Haberman 4 years ago
parent 34b98bc030
commit 26abaa2345
  1. 11
      upb/decode.c
  2. 28
      upb/decode.h
  3. 482
      upb/decode_fast.c

@ -491,9 +491,8 @@ static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
return ptr;
}
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
const char *decode_field(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout) {
while (ptr < d->limit) {
uint32_t tag;
const upb_msglayout_field *field;
int field_number;
@ -584,6 +583,14 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
}
}
}
return ptr;
}
static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *layout) {
while (ptr < d->limit) {
ptr = decode_field(d, ptr, msg, layout);
}
if (ptr != d->limit) decode_err(d);

@ -16,27 +16,33 @@ extern "C" {
bool upb_decode(const char *buf, size_t size, upb_msg *msg,
const upb_msglayout *l, upb_arena *arena);
/* Internal only: data pertaining to the parse. */
typedef struct {
const char *limit; /* End of delimited region or end of buffer. */
const char *fastlimit; /* End of delimited region or end of buffer. */
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
jmp_buf err;
} upb_decstate;
struct upb_fasttable;
struct upb_decstate;
typedef const char *_upb_field_parser(upb_decstate *d, const char *ptr,
typedef const char *_upb_field_parser(struct upb_decstate *d, const char *ptr,
upb_msg *msg, struct upb_fasttable *table,
uint64_t hasbits, uint64_t data);
typedef struct upb_fasttable {
_upb_field_parser *field_parser[16];
uint64_t field_data[16];
_upb_field_parser *fallback;
} upb_fasttable;
/* Internal only: data pertaining to the parse. */
typedef struct upb_decstate {
char *arena_ptr, *arena_end;
const void *rep_end;
const char *limit; /* End of delimited region or end of buffer. */
const char *fastlimit; /* End of delimited region or end of buffer. */
upb_array *arr;
_upb_field_parser *resume;
upb_arena *arena;
int depth;
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
jmp_buf err;
} upb_decstate;
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -7,54 +7,411 @@
upb_decstate *d, const char *ptr, upb_msg *msg, upb_fasttable *table, \
uint64_t hasbits, uint64_t data
#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
const char *fastdecode_err(upb_decstate *d);
const char *fastdecode_reallocarr(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
int elem_size);
UPB_NOINLINE
const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
upb_fasttable *table, uint64_t hasbits) {
static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
uint64_t hasbits) {
uint16_t tag;
uint64_t data;
if (UPB_UNLIKELY(ptr >= d->fastlimit)) return ptr;
memcpy(&tag, ptr, 2);
data = table->field_data[(tag & 0xf7) >> 3] ^ tag;
return table->field_parser[(tag & 0xf7) >> 3](d, ptr, msg, table, hasbits,
data);
return table->field_parser[(tag & 0xf7) >> 3](UPB_PARSE_ARGS);
}
UPB_FORCEINLINE bool fastdecode_checktag(uint64_t data, int tagbytes) {
const char zeros[2] = {0, 0};
return memcmp(&data, &zeros, tagbytes) == 0;
#if 0
UPB_NOINLINE
static const char *fastdecode_parseloop(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table) {
uint64_t hasbits = 0;
while (ptr < d->fastlimit) {
ptr = fastdecode_dispatch(d, ptr, msg, table, hasbits);
/*ptr = decode_field(d, ptr, msg, table->layout);*/
}
return ptr;
}
#endif
UPB_FORCEINLINE static bool fastdecode_checktag(uint64_t data, int tagbytes) {
if (tagbytes == 1) {
return (data & 0xff) == 0;
} else {
return (data & 0xffff) == 0;
}
}
UPB_FORCEINLINE static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) {
uint16_t ret = 0;
memcpy(&ret, ptr, tagbytes);
return ret;
}
typedef enum {
CARD_s = 0,
CARD_o = 1,
CARD_r = 2,
CARD_p = 3
} upb_card;
UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
uint64_t data, uint64_t *hasbits,
uint16_t *expected_tag, int *elem_avail,
upb_card card, int tagbytes, int valbytes) {
void *field = (char *)msg + (data >> 48);
switch (card) {
case CARD_s:
*hasbits |= data;
return field;
case CARD_o: {
uint32_t *case_ptr = UPB_PTR_AT(msg, (data >> 16) & 0xffff, uint32_t);
*case_ptr = (data >> 32) & 0xffff;
return field;
}
case CARD_r: {
upb_array **arr_p = field;
upb_array *arr;
*hasbits >>= 16;
*(uint32_t*)msg |= *hasbits;
*hasbits = 0;
if (UPB_LIKELY(!*arr_p)) {
//(void)d;
size_t need = (valbytes * 4) + sizeof(upb_array);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < need)) {
*elem_avail = 0;
return NULL;
}
arr = (void*)d->arena_ptr;
field = arr + 1;
arr->data = (uintptr_t)field;
*arr_p = arr;
arr->size = 4;
arr->len = 0;
*elem_avail = 4;
d->arena_ptr += need;
} else {
arr = *arr_p;
field = _upb_array_ptr(arr);
*elem_avail = arr->size - arr->len;
field = (char*)field + (arr->len * valbytes);
arr->len = arr->size;
}
*expected_tag = fastdecode_readtag(ptr, tagbytes);
d->arr = arr;
return field;
}
default:
UPB_UNREACHABLE();
}
}
UPB_FORCEINLINE
static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
upb_card card) {
upb_strview *dst;
uint16_t expected_tag;
int elem_avail;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
return table->fallback(UPB_PARSE_ARGS);
}
dst = fastdecode_getfield(d, ptr, msg, data, &hasbits, &expected_tag,
&elem_avail, card, tagbytes, sizeof(upb_strview));
again:
if (card == CARD_r) {
if (UPB_UNLIKELY(elem_avail == 0)) {
return fastdecode_reallocarr(d, ptr, msg, table, sizeof(upb_strview));
}
}
{
int64_t len = ptr[tagbytes];
if (UPB_UNLIKELY(len < 0)) {
if (card == CARD_r) {
d->arr->len -= elem_avail;
}
return ptr;
}
ptr += tagbytes + 1;
dst->data = ptr;
dst->size = len;
ptr += len;
if (UPB_UNLIKELY(ptr > d->limit)) {
return fastdecode_err(d);
}
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < d->fastlimit) &&
fastdecode_readtag(ptr, tagbytes) == expected_tag) {
elem_avail--;
dst++;
goto again;
}
d->arr->len -= elem_avail;
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
const char *upb_pss_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_s);
}
const char *upb_pos_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_o);
}
const char *upb_prs_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_r);
}
UPB_FORCEINLINE
static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, int valbytes,
upb_card card) {
char *dst;
uint16_t expected_tag;
int elem_avail;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
return ptr;
}
dst = fastdecode_getfield(d, ptr, msg, data, &hasbits, &expected_tag,
&elem_avail, card, tagbytes, valbytes);
again:
if (card == CARD_r) {
if (UPB_UNLIKELY(elem_avail == 0)) {
return fastdecode_reallocarr(d, ptr, msg, table, valbytes);
}
}
{
ptr += tagbytes;
memcpy(dst, ptr, valbytes);
ptr += valbytes;
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < d->fastlimit) &&
fastdecode_readtag(ptr, tagbytes) == expected_tag) {
elem_avail--;
dst += valbytes;
goto again;
}
d->arr->len -= elem_avail;
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
const char *upb_psf8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_s);
}
const char *upb_pof8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_o);
}
const char *upb_prf8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_r);
}
#if 0
UPB_FORCEINLINE
static const char *fastdecode_repeatedfixed(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, _upb_field_parser *fallback) {
char *dst;
uint16_t expected_tag;
upb_array **arr_p;
upb_array *arr;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
if (fallback) {
// Patch data to amtch packed wiretype.
data ^= 0x2 ^ (valbytes == 4 ? 5 : 1);
fallback(UPB_PARSE_ARGS);
} else {
return table->fallback(UPB_PARSE_ARGS);
}
}
arr_p = UPB_PTR_AT(msg, (data >> 48), upb_array*);
arr = *arr_p;
if (UPB_UNLIKELY(!arr || arr->size - arr->len < 4)) {
return fastdecode_allocarr(UPB_PARSE_ARGS);
}
dst = _upb_array_ptr(arr);
d->dstend = dst + (arr->size * valbytes);
dst += (arr->len * valbytes);
expected_tag = fastdecode_readtag(ptr, tagbytes);
do {
ptr += tagbytes;
//fastdecode_reserve(d, arr, &dst, &dstend);
if (UPB_UNLIKELY(dst == d->dstend)) {
return fastdecode_reallocarr(UPB_PARSE_ARGS);
}
memcpy(dst, ptr, valbytes);
dst += valbytes;
ptr += valbytes;
/*
if (UPB_UNLIKELY(ptr >= d->fastlimit)) {
arr->len = (dst - (char*)_upb_array_ptr(arr)) / valbytes;
return ptr;
}
*/
} while (fastdecode_readtag(ptr, tagbytes) == expected_tag);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
UPB_FORCEINLINE
static const char *fastdecode_scalarfixed(UPB_PARSE_PARAMS, int tagbytes,
int valbytes) {
int valbytes, upb_card card) {
char *field;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) return ptr;
hasbits |= data;
field = (char*)msg + (data >> 48);
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
return table->fallback(UPB_PARSE_ARGS);
}
field = fastdecode_getfield(msg, data, &hasbits, card);
memcpy(field, ptr + tagbytes, valbytes);
return fastdecode_dispatch(d, ptr + tagbytes + valbytes, msg, table, hasbits);
ptr += tagbytes + valbytes;
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
const char *upb_psf64_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 1, 8);
arr_p = UPB_PTR_AT(msg, (data >> 48), upb_array*);
if (UPB_UNLIKELY(!arr_p)) goto alloc_arr;
arr = *arr_p;
dst = (char*)_upb_array_ptr(arr);
dstend = dst + arr->size;
dst += arr->len;
const char *fastdecode_allocarr(UPB_PARSE_PARAMS)
;
UPB_FORCEINLINE
static void fastdecode_getarr(upb_decstate *d, upb_msg *msg, uint64_t data,
int valbytes, char **dst) {
upb_array **arr_p = UPB_PTR_AT(msg, (data >> 48), upb_array*);
upb_array *arr = *arr_p;
/*
if (UPB_UNLIKELY(!arr || arr->size - arr->len < 4)) {
fastdecode_allocarr(d, arr_p);
}
*/
(void)d;
*dst = _upb_array_ptr(arr);
d->dstend = *dst + (arr->size * valbytes);
*dst += (arr->len * valbytes);
}
const char *upb_psf64_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 2, 8);
UPB_FORCEINLINE
static const char *fastdecode_repeatedfixed(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, _upb_field_parser *fallback) {
char *dst;
uint16_t expected_tag;
upb_array **arr_p;
upb_array *arr;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
if (fallback) {
// Patch data to amtch packed wiretype.
data ^= 0x2 ^ (valbytes == 4 ? 5 : 1);
fallback(UPB_PARSE_ARGS);
} else {
return table->fallback(UPB_PARSE_ARGS);
}
}
arr_p = UPB_PTR_AT(msg, (data >> 48), upb_array*);
arr = *arr_p;
if (UPB_UNLIKELY(!arr || arr->size - arr->len < 4)) {
return fastdecode_allocarr(UPB_PARSE_ARGS);
}
dst = _upb_array_ptr(arr);
d->dstend = dst + (arr->size * valbytes);
dst += (arr->len * valbytes);
expected_tag = fastdecode_readtag(ptr, tagbytes);
do {
ptr += tagbytes;
//fastdecode_reserve(d, arr, &dst, &dstend);
if (UPB_UNLIKELY(dst == d->dstend)) {
return fastdecode_reallocarr(UPB_PARSE_ARGS);
}
memcpy(dst, ptr, valbytes);
dst += valbytes;
ptr += valbytes;
/*
if (UPB_UNLIKELY(ptr >= d->fastlimit)) {
arr->len = (dst - (char*)_upb_array_ptr(arr)) / valbytes;
return ptr;
}
*/
} while (fastdecode_readtag(ptr, tagbytes) == expected_tag);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
const char *upb_psf32_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 1, 4);
UPB_NOINLINE
const char *upb_prf8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_repeatedfixed(UPB_PARSE_ARGS, 1, 8, false);
}
const char *upb_psf32_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarfixed(d, ptr, msg, table, hasbits, data, 2, 4);
// Generate all fixed functions.
// {s,o,r,p} x {f4,f8} x {1bt,2bt}
#define F(card, valbytes, tagbytes) \
const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card); \
}
#define TYPES(card, tagbytes) \
F(card, 4, tagbytes) \
F(card, 8, tagbytes)
#define TAGBYTES(card) \
TYPES(card, 1) \
TYPES(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
TAGBYTES(p)
UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
if (valbytes == 1) {
return val != 0;
} else if (zigzag) {
if (valbytes == 4) {
uint32_t n = val;
return (n >> 1) ^ -(int32_t)(n & 1);
} else if (valbytes == 8) {
return (val >> 1) ^ -(int64_t)(val & 1);
}
UPB_UNREACHABLE();
}
return val;
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint_impl(UPB_PARSE_PARAMS, int valbytes) {
char *field = (char *)data;
static const char *fastdecode_longvarint_impl(const char *ptr, void *field,
int valbytes) {
// The algorithm relies on sign extension to set all high bits when the varint
// continues. This way it can use "and" to aggregate in to the result.
const int8_t *p = (const int8_t*)(ptr);
@ -135,13 +492,13 @@ static const char *fastdecode_longvarint_impl(UPB_PARSE_PARAMS, int valbytes) {
done##n : { \
uint64_t val = res1 & res2 & res3; \
memcpy(field, &val, valbytes); \
return fastdecode_dispatch(d, (const char *)p + n, msg, table, hasbits); \
return (const char *)p + n; \
};
done2 : {
uint64_t val = res1 & res2;
memcpy(field, &val, valbytes);
return fastdecode_dispatch(d, (const char*)p + 2, msg, table, hasbits);
return (const char*)p + 2;
}
DONE(3)
@ -152,59 +509,94 @@ done2 : {
DONE(8)
DONE(9)
DONE(10)
#undef DONE
#undef SHLD
#undef SHLD_SIGN
}
UPB_NOINLINE
static const char *fastdecode_longvarint32(UPB_PARSE_PARAMS) {
return fastdecode_longvarint_impl(d, ptr, msg, table, hasbits, data, 4);
(void)d;
(void)msg;
(void)table;
(void)hasbits;
return fastdecode_longvarint_impl(ptr, (void*)data, 4);
}
UPB_NOINLINE
static const char *fastdecode_longvarint64(UPB_PARSE_PARAMS) {
return fastdecode_longvarint_impl(d, ptr, msg, table, hasbits, data, 8);
(void)d;
(void)msg;
(void)table;
(void)hasbits;
return fastdecode_longvarint_impl(ptr, (void*)data, 8);
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int valbytes) {
if (valbytes == 4) {
return fastdecode_longvarint32(d, ptr, msg, table, hasbits, data);
return fastdecode_longvarint32(UPB_PARSE_ARGS);
} else if (valbytes == 8) {
return fastdecode_longvarint64(d, ptr, msg, table, hasbits, data);
return fastdecode_longvarint64(UPB_PARSE_ARGS);
}
UPB_UNREACHABLE();
}
UPB_FORCEINLINE
static const char *fastdecode_scalarvarint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes) {
static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, bool zigzag, bool oneof) {
uint64_t val = 0;
void *field;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) return ptr;
ptr += tagbytes;
hasbits |= data;
fastdecode_getfield(msg, data, &hasbits, oneof);
field = (char*)msg + (data >> 48);
if (UPB_UNLIKELY(*ptr < 0)) {
return fastdecode_longvarint(d, ptr, msg, table, hasbits, (uint64_t)field,
valbytes);
}
val = *ptr;
val = fastdecode_munge(*ptr, valbytes, zigzag);
memcpy(field, &val, valbytes);
return fastdecode_dispatch(d, ptr + 1, msg, table, hasbits);
}
const char *upb_psv32_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 1, 4);
}
// Generate all varint functions.
// {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt}
const char *upb_psv32_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 2, 4);
}
#define z_ZZ true
#define b_ZZ false
#define v_ZZ false
const char *upb_psv64_1bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 1, 8);
}
#define F(card, type, valbytes, tagbytes) \
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, type##_ZZ, \
card##_ONEOF); \
}
const char *upb_psv64_2bt(UPB_PARSE_PARAMS) {
return fastdecode_scalarvarint(d, ptr, msg, table, hasbits, data, 2, 8);
}
#define TYPES(card, tagbytes) \
F(card, b, 1, tagbytes) \
F(card, v, 4, tagbytes) \
F(card, v, 8, tagbytes) \
F(card, z, 4, tagbytes) \
F(card, z, 8, tagbytes)
#define TAGBYTES(card) \
TYPES(card, 1) \
TYPES(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
#undef z_ZZ
#undef b_ZZ
#undef v_ZZ
#undef o_ONEOF
#undef s_ONEOF
#undef r_ONEOF
#undef F
#undef TYPES
#undef TAGBYTES
#endif

Loading…
Cancel
Save