Refactor some code. I extracted some common code from all message field

parsers, to a tail recursive function. Removed the varint jmp table for
a simple varint parse loop, that removes the stack frames. Also careful
with not losing information in repeated message tag check. When written
mindful the checks and loads that happen can be reused for tag dispatch
if not the expected tag.
pull/13171/head
Gerben Stavenga 4 years ago
parent 9938cf8f27
commit 36662b3735
  1. 4
      BUILD
  2. 288
      upb/decode_fast.c
  3. 4
      upbc/generator.cc

@ -40,8 +40,8 @@ CPPOPTS = [
COPTS = CPPOPTS + [
# copybara:strip_for_google3_begin
"-pedantic",
"-Werror=pedantic",
#"-pedantic",
#"-Werror=pedantic",
"-Wstrict-prototypes",
# copybara:strip_end
]

@ -19,23 +19,35 @@ typedef enum {
CARD_r = 2 /* Repeated */
} upb_card;
UPB_FORCEINLINE
const char *fastdecode_tag_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits, uint32_t tag) {
uint64_t data;
size_t idx;
idx = (tag & 0xf8) >> 3;
data = table->field_data[idx] ^ tag;
return table->field_parser[idx](UPB_PARSE_ARGS);
}
UPB_FORCEINLINE
uint32_t fastdecode_load_tag(const char* ptr) {
uint16_t tag;
memcpy(&tag, ptr, 2);
return tag;
}
UPB_FORCEINLINE
const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits) {
uint16_t tag;
uint64_t data = 0;
size_t idx;
if (UPB_UNLIKELY(ptr >= d->fastlimit)) {
if (UPB_LIKELY(ptr == d->limit)) {
*(uint32_t*)msg |= hasbits >> 16; /* Sync hasbits. */
return ptr;
}
uint64_t data = 0;
RETURN_GENERIC("dispatch hit end\n");
}
memcpy(&tag, ptr, 2);
idx = (tag & 0xf8) >> 3;
data = table->field_data[idx] ^ tag;
return table->field_parser[idx](UPB_PARSE_ARGS);
return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, fastdecode_load_tag(ptr));
}
UPB_FORCEINLINE
@ -47,18 +59,11 @@ static bool fastdecode_checktag(uint64_t data, int tagbytes) {
}
}
UPB_FORCEINLINE
static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) {
uint16_t ret = 0;
memcpy(&ret, ptr, tagbytes);
return ret;
}
UPB_FORCEINLINE
static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
upb_msg *msg, uint64_t *data,
uint64_t *hasbits, upb_array **outarr,
void **end, int tagbytes, int valbytes,
void **end, int valbytes,
upb_card card, bool hasbit_is_idx) {
size_t ofs = *data >> 48;
void *field = (char *)msg + ofs;
@ -66,7 +71,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
switch (card) {
case CARD_s:
if (hasbit_is_idx) {
*hasbits |= 1 << (uint16_t)(*data >> 16);
*hasbits |= 1ull << ((*data >> 32) & 63);
} else {
*hasbits |= *data;
}
@ -75,7 +80,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
upb_array **arr_p = field;
upb_array *arr;
uint16_t expected_tag;
*hasbits >>= 16;
*(uint32_t*)msg |= *hasbits;
*hasbits = 0;
@ -101,8 +105,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
*end = (char*)field + (arr->size * valbytes);
field = (char*)field + (arr->len * valbytes);
}
expected_tag = fastdecode_readtag(ptr, tagbytes);
*data = expected_tag;
*data = fastdecode_load_tag(ptr);
*outarr = arr;
return field;
}
@ -114,9 +117,9 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
uint64_t *data, uint64_t *hasbits,
int tagbytes, int valbytes, upb_card card) {
int valbytes, upb_card card) {
return fastdecode_getfield_ofs(d, ptr, msg, data, hasbits, NULL, NULL,
tagbytes, valbytes, card, false);
valbytes, card, false);
}
/* varint fields **************************************************************/
@ -140,77 +143,31 @@ UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigza
return val;
}
UPB_FORCEINLINE
static int fastdecode_varintlen(uint64_t data64) {
uint64_t clear_bits = ~data64 & 0x8080808080808080;
if (clear_bits == 0) return -1;
return __builtin_ctzl(clear_bits) / 8 + 1;
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int valbytes,
int varintbytes, bool zigzag) {
uint64_t val = data >> 18;
size_t ofs = (uint16_t)data;
uint64_t data64;
int sawbytes;
memcpy(&data64, ptr + 2, 8);
sawbytes = fastdecode_varintlen(data64) + 2;
UPB_ASSERT(sawbytes == varintbytes);
#ifdef __BMI2__
if (varintbytes != 3) {
uint64_t mask = 0x7f7f7f7f7f7f7f7f >> (8 * (10 - varintbytes));
val |= _pext_u64(data64, mask) << 14;
} else
#endif
{
int i;
for (i = 2; i < varintbytes; i++) {
uint64_t byte = ptr[i];
if (i != varintbytes - 1) byte &= 0x7f;
val |= byte << (7 * i);
}
}
val = fastdecode_munge(val, valbytes, zigzag);
memcpy((char*)msg + ofs, &val, valbytes);
return fastdecode_dispatch(d, ptr + varintbytes, msg, table, hasbits);
}
UPB_FORCEINLINE
static const char *fastdecode_longvarintjmp(UPB_PARSE_PARAMS,
_upb_field_parser **funcs) {
int len;
uint64_t data64;
memcpy(&data64, ptr + 2, 8);
len = fastdecode_varintlen(data64);
if (len < 0) return fastdecode_err(d);
return funcs[len - 1](UPB_PARSE_ARGS);
}
UPB_FORCEINLINE
static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, upb_card card, bool zigzag,
_upb_field_parser **funcs) {
int valbytes, upb_card card, bool zigzag) {
uint64_t val;
void *dst;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("varint field tag mismatch\n");
}
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes, valbytes,
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, valbytes,
card);
val = (uint8_t)ptr[tagbytes];
ptr += tagbytes + 1;
val = (uint8_t)ptr[-1];
if (UPB_UNLIKELY(val & 0x80)) {
uint32_t byte = (uint8_t)ptr[tagbytes + 1];
val += (byte - 1) << 7;
if (UPB_UNLIKELY(byte & 0x80)) {
ptr += tagbytes;
data = (uint32_t)(val << 18 | data >> 48);
return fastdecode_longvarintjmp(UPB_PARSE_ARGS, funcs);
for (int i = 0; i < 8; i++) {
ptr++;
uint64_t byte = (uint8_t)ptr[-1];
val += (byte - 1) << (7 + 7 * i);
if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
}
ptr += tagbytes + 2;
} else {
ptr += tagbytes + 1;
ptr++;
uint64_t byte = (uint8_t)ptr[-1];
if (byte > 1) return fastdecode_err(d);
val += (byte - 1) << 63;
}
done:
val = fastdecode_munge(val, valbytes, zigzag);
memcpy(dst, &val, valbytes);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
@ -220,52 +177,13 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
#define b_ZZ false
#define v_ZZ false
/* Generate varint vallbacks. */
#define FUNCNAME(type, valbytes, varintbytes) \
upb_pl##type##valbytes##_##varintbytes##bv
#define TABLENAME(type, valbytes) \
upb_pl##type##valbytes##_table
#define F(type, valbytes, varintbytes) \
static const char *FUNCNAME(type, valbytes, varintbytes)(UPB_PARSE_PARAMS) { \
return fastdecode_longvarint(UPB_PARSE_ARGS, valbytes, varintbytes, \
type##_ZZ); \
}
#define FALLBACKS(type, valbytes) \
F(type, valbytes, 3) \
F(type, valbytes, 4) \
F(type, valbytes, 5) \
F(type, valbytes, 6) \
F(type, valbytes, 7) \
F(type, valbytes, 8) \
F(type, valbytes, 9) \
F(type, valbytes, 10) \
static _upb_field_parser *TABLENAME(type, valbytes)[8] = { \
&FUNCNAME(type, valbytes, 3), &FUNCNAME(type, valbytes, 4), \
&FUNCNAME(type, valbytes, 5), &FUNCNAME(type, valbytes, 6), \
&FUNCNAME(type, valbytes, 7), &FUNCNAME(type, valbytes, 8), \
&FUNCNAME(type, valbytes, 9), &FUNCNAME(type, valbytes, 10)};
FALLBACKS(b, 1)
FALLBACKS(v, 4)
FALLBACKS(v, 8)
FALLBACKS(z, 4)
FALLBACKS(z, 8)
#undef F
#undef FALLBACKS
#undef FUNCNAME
/* Generate all varint functions.
* {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
#define F(card, type, valbytes, tagbytes) \
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \
type##_ZZ, TABLENAME(type, valbytes)); \
type##_ZZ); \
}
#define TYPES(card, tagbytes) \
@ -313,7 +231,7 @@ static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
RETURN_GENERIC("string field tag mismatch\n");
}
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes,
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits,
sizeof(upb_strview), card);
len = (int8_t)ptr[tagbytes];
str = ptr + tagbytes + 1;
@ -352,6 +270,40 @@ bool fastdecode_boundscheck2(const char *ptr, unsigned len, const char *end) {
return res < uptr || res > uend;
}
UPB_NOINLINE static
const char *fastdecode_lendelim_submsg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits, const char* saved_limit) {
uint32_t len = (uint8_t)ptr[-1];
if (UPB_UNLIKELY(len & 0x80)) {
ptr++;
uint32_t byte = (uint8_t)ptr[-1];
len += (byte - 1) << 7;
if (UPB_UNLIKELY(byte & 0x80)) {
ptr++;
uint32_t byte = (uint8_t)ptr[-1];
len += (byte - 1) << 14;
if (UPB_UNLIKELY(byte & 0x80)) {
ptr++;
uint32_t byte = (uint8_t)ptr[-1];
len += (byte - 1) << 21;
if (UPB_UNLIKELY(byte & 0x80)) {
ptr++;
uint32_t byte = (uint8_t)ptr[-1];
len += (byte - 1) << 28;
if (UPB_UNLIKELY(byte >= 8)) return fastdecode_err(d);
}
}
}
}
if (UPB_UNLIKELY(fastdecode_boundscheck2(ptr, len, saved_limit))) {
return fastdecode_err(d);
}
d->limit = ptr + len;
d->fastlimit = UPB_MIN(d->limit, d->fastend);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
UPB_FORCEINLINE
static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
int msg_ceil_bytes, upb_card card) {
@ -360,16 +312,22 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
RETURN_GENERIC("submessage field tag mismatch\n");
}
if (--d->depth < 0) return fastdecode_err(d);
if (--d->depth == 0) return fastdecode_err(d);
upb_msg **submsg;
upb_array *arr;
void *end;
uint16_t submsg_idx = data >> 32;
uint32_t submsg_idx = data;
submsg_idx >>= 16; asm("" : "+r" (submsg_idx));
const upb_msglayout *subl = table->submsgs[submsg_idx];
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end,
tagbytes, sizeof(upb_msg *), card, true);
sizeof(upb_msg *), card, true);
if (card == CARD_s) {
*(uint32_t*)msg |= hasbits >> 16;
hasbits = 0;
}
const char *saved_limit = d->limit;
const char *saved_fastlimit = d->fastlimit;
@ -383,11 +341,7 @@ again:
size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < new_bytes)) {
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->depth++;
RETURN_GENERIC("repeated realloc failed: arena full");
goto repeated_generic;
}
memcpy(d->arena_ptr, old_ptr, old_bytes);
arr->size = new_size;
@ -396,54 +350,49 @@ again:
end = (void*)(d->arena_ptr + (new_size * sizeof(upb_msg*)));
d->arena_ptr += new_bytes;
} else {
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("need array realloc\n");
goto repeated_generic;
}
}
}
upb_msg* child = *submsg;
{
uint32_t len = (uint8_t)ptr[tagbytes];
if (UPB_UNLIKELY(len & 0x80)) {
uint32_t byte = (uint8_t)ptr[tagbytes + 1];
len += (byte - 1) << 7;
if (UPB_UNLIKELY(byte & 0x80)) {
if (card == CARD_r) {
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
}
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("submessage field len >2 bytes\n");
}
ptr++;
}
ptr += tagbytes + 1;
if (UPB_UNLIKELY(fastdecode_boundscheck2(ptr, len, saved_limit))) {
return fastdecode_err(d);
}
d->limit = ptr + len;
d->fastlimit = UPB_MIN(d->limit, d->fastend);
}
if (card == CARD_r || UPB_LIKELY(!*submsg)) {
*submsg = decode_newmsg_ceil(d, subl, msg_ceil_bytes);
if (card == CARD_r || UPB_LIKELY(!child)) {
*submsg = child = decode_newmsg_ceil(d, subl, msg_ceil_bytes);
}
ptr = fastdecode_dispatch(d, ptr, *submsg, subl, 0);
submsg++;
ptr += tagbytes + 1;
ptr = fastdecode_lendelim_submsg(d, ptr, child, subl, 0, saved_limit);
if (UPB_UNLIKELY(ptr != d->limit || d->end_group != 0)) {
return fastdecode_err(d);
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < saved_fastlimit) &&
fastdecode_readtag(ptr, tagbytes) == (uint16_t)data) {
goto again;
submsg++;
if (UPB_LIKELY(ptr < saved_fastlimit)) {
uint32_t tag = fastdecode_load_tag(ptr);
if (tagbytes == 1) {
if ((uint8_t)tag == (uint8_t)data) goto again;
} else {
if ((uint16_t)tag == (uint16_t)data) goto again;
}
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, tag);
} else {
if (ptr == saved_limit) {
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
return ptr;
}
goto repeated_generic;
}
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
}
d->limit = saved_limit;
@ -451,6 +400,13 @@ again:
d->depth++;
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
repeated_generic:
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("repeated generic");
}
#define F(card, tagbytes, size_ceil, ceil_arg) \

@ -805,8 +805,8 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
SubmsgArray submsg_array = GetSubmsgArray(message);
uint64_t idx = submsg_array.indexes[field->message_type()];
data.size32 |= idx << 32 | hasbit_index << 16;
data.size64 |= idx << 32 | hasbit_index << 16;
data.size32 |= idx << 16 | hasbit_index << 32;
data.size64 |= idx << 16 | hasbit_index << 32;
} else {
uint32_t hasbit_mask = 1U << hasbit_index;
data.size32 |= (uint64_t)hasbit_mask << 16;

Loading…
Cancel
Save