Merge pull request #2 from gerben-s/fast-table

Refactor some code. I extracted some common code from all message field
pull/13171/head
Joshua Haberman 4 years ago committed by GitHub
commit 4f77aaafd8
  1. 4
      BUILD
  2. 319
      upb/decode_fast.c
  3. 7
      upbc/generator.cc

@ -40,8 +40,8 @@ CPPOPTS = [
COPTS = CPPOPTS + [
# copybara:strip_for_google3_begin
"-pedantic",
"-Werror=pedantic",
#"-pedantic",
#"-Werror=pedantic",
"-Wstrict-prototypes",
# copybara:strip_end
]

@ -19,23 +19,35 @@ typedef enum {
CARD_r = 2 /* Repeated */
} upb_card;
UPB_FORCEINLINE
const char *fastdecode_tag_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits, uint32_t tag) {
uint64_t data;
size_t idx;
idx = (tag & 0xf8) >> 3;
data = table->field_data[idx] ^ tag;
return table->field_parser[idx](UPB_PARSE_ARGS);
}
UPB_FORCEINLINE
uint32_t fastdecode_load_tag(const char* ptr) {
uint16_t tag;
memcpy(&tag, ptr, 2);
return tag;
}
UPB_FORCEINLINE
const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits) {
uint16_t tag;
uint64_t data = 0;
size_t idx;
if (UPB_UNLIKELY(ptr >= d->fastlimit)) {
if (UPB_LIKELY(ptr == d->limit)) {
*(uint32_t*)msg |= hasbits >> 16; /* Sync hasbits. */
return ptr;
}
uint64_t data = 0;
RETURN_GENERIC("dispatch hit end\n");
}
memcpy(&tag, ptr, 2);
idx = (tag & 0xf8) >> 3;
data = table->field_data[idx] ^ tag;
return table->field_parser[idx](UPB_PARSE_ARGS);
return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, fastdecode_load_tag(ptr));
}
UPB_FORCEINLINE
@ -47,18 +59,11 @@ static bool fastdecode_checktag(uint64_t data, int tagbytes) {
}
}
UPB_FORCEINLINE
static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) {
uint16_t ret = 0;
memcpy(&ret, ptr, tagbytes);
return ret;
}
UPB_FORCEINLINE
static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
upb_msg *msg, uint64_t *data,
uint64_t *hasbits, upb_array **outarr,
void **end, int tagbytes, int valbytes,
void **end, int valbytes,
upb_card card, bool hasbit_is_idx) {
size_t ofs = *data >> 48;
void *field = (char *)msg + ofs;
@ -66,7 +71,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
switch (card) {
case CARD_s:
if (hasbit_is_idx) {
*hasbits |= 1 << (uint16_t)(*data >> 16);
*hasbits |= 1ull << ((*data >> 32) & 63);
} else {
*hasbits |= *data;
}
@ -75,7 +80,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
upb_array **arr_p = field;
upb_array *arr;
uint16_t expected_tag;
*hasbits >>= 16;
*(uint32_t*)msg |= *hasbits;
*hasbits = 0;
@ -83,9 +87,6 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
const size_t initial_len = 8;
size_t need = (valbytes * initial_len) + sizeof(upb_array);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < need)) {
*outarr = NULL;
*data = 0;
*end = NULL;
return NULL;
}
arr = (void*)d->arena_ptr;
@ -101,8 +102,7 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
*end = (char*)field + (arr->size * valbytes);
field = (char*)field + (arr->len * valbytes);
}
expected_tag = fastdecode_readtag(ptr, tagbytes);
*data = expected_tag;
*data = fastdecode_load_tag(ptr);
*outarr = arr;
return field;
}
@ -114,9 +114,9 @@ static void *fastdecode_getfield_ofs(upb_decstate *d, const char *ptr,
UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
uint64_t *data, uint64_t *hasbits,
int tagbytes, int valbytes, upb_card card) {
int valbytes, upb_card card) {
return fastdecode_getfield_ofs(d, ptr, msg, data, hasbits, NULL, NULL,
tagbytes, valbytes, card, false);
valbytes, card, false);
}
/* varint fields **************************************************************/
@ -140,77 +140,31 @@ UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigza
return val;
}
UPB_FORCEINLINE
static int fastdecode_varintlen(uint64_t data64) {
uint64_t clear_bits = ~data64 & 0x8080808080808080;
if (clear_bits == 0) return -1;
return __builtin_ctzl(clear_bits) / 8 + 1;
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int valbytes,
int varintbytes, bool zigzag) {
uint64_t val = data >> 18;
size_t ofs = (uint16_t)data;
uint64_t data64;
int sawbytes;
memcpy(&data64, ptr + 2, 8);
sawbytes = fastdecode_varintlen(data64) + 2;
UPB_ASSERT(sawbytes == varintbytes);
#ifdef __BMI2__
if (varintbytes != 3) {
uint64_t mask = 0x7f7f7f7f7f7f7f7f >> (8 * (10 - varintbytes));
val |= _pext_u64(data64, mask) << 14;
} else
#endif
{
int i;
for (i = 2; i < varintbytes; i++) {
uint64_t byte = ptr[i];
if (i != varintbytes - 1) byte &= 0x7f;
val |= byte << (7 * i);
}
}
val = fastdecode_munge(val, valbytes, zigzag);
memcpy((char*)msg + ofs, &val, valbytes);
return fastdecode_dispatch(d, ptr + varintbytes, msg, table, hasbits);
}
UPB_FORCEINLINE
static const char *fastdecode_longvarintjmp(UPB_PARSE_PARAMS,
_upb_field_parser **funcs) {
int len;
uint64_t data64;
memcpy(&data64, ptr + 2, 8);
len = fastdecode_varintlen(data64);
if (len < 0) return fastdecode_err(d);
return funcs[len - 1](UPB_PARSE_ARGS);
}
UPB_FORCEINLINE
static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, upb_card card, bool zigzag,
_upb_field_parser **funcs) {
int valbytes, upb_card card, bool zigzag) {
uint64_t val;
void *dst;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
RETURN_GENERIC("varint field tag mismatch\n");
}
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes, valbytes,
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, valbytes,
card);
val = (uint8_t)ptr[tagbytes];
ptr += tagbytes + 1;
val = (uint8_t)ptr[-1];
if (UPB_UNLIKELY(val & 0x80)) {
uint32_t byte = (uint8_t)ptr[tagbytes + 1];
val += (byte - 1) << 7;
if (UPB_UNLIKELY(byte & 0x80)) {
ptr += tagbytes;
data = (uint32_t)(val << 18 | data >> 48);
return fastdecode_longvarintjmp(UPB_PARSE_ARGS, funcs);
for (int i = 0; i < 8; i++) {
ptr++;
uint64_t byte = (uint8_t)ptr[-1];
val += (byte - 1) << (7 + 7 * i);
if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
}
ptr += tagbytes + 2;
} else {
ptr += tagbytes + 1;
ptr++;
uint64_t byte = (uint8_t)ptr[-1];
if (byte > 1) return fastdecode_err(d);
val += (byte - 1) << 63;
}
done:
val = fastdecode_munge(val, valbytes, zigzag);
memcpy(dst, &val, valbytes);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
@ -220,52 +174,13 @@ static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
#define b_ZZ false
#define v_ZZ false
/* Generate varint vallbacks. */
#define FUNCNAME(type, valbytes, varintbytes) \
upb_pl##type##valbytes##_##varintbytes##bv
#define TABLENAME(type, valbytes) \
upb_pl##type##valbytes##_table
#define F(type, valbytes, varintbytes) \
static const char *FUNCNAME(type, valbytes, varintbytes)(UPB_PARSE_PARAMS) { \
return fastdecode_longvarint(UPB_PARSE_ARGS, valbytes, varintbytes, \
type##_ZZ); \
}
#define FALLBACKS(type, valbytes) \
F(type, valbytes, 3) \
F(type, valbytes, 4) \
F(type, valbytes, 5) \
F(type, valbytes, 6) \
F(type, valbytes, 7) \
F(type, valbytes, 8) \
F(type, valbytes, 9) \
F(type, valbytes, 10) \
static _upb_field_parser *TABLENAME(type, valbytes)[8] = { \
&FUNCNAME(type, valbytes, 3), &FUNCNAME(type, valbytes, 4), \
&FUNCNAME(type, valbytes, 5), &FUNCNAME(type, valbytes, 6), \
&FUNCNAME(type, valbytes, 7), &FUNCNAME(type, valbytes, 8), \
&FUNCNAME(type, valbytes, 9), &FUNCNAME(type, valbytes, 10)};
FALLBACKS(b, 1)
FALLBACKS(v, 4)
FALLBACKS(v, 8)
FALLBACKS(z, 4)
FALLBACKS(z, 8)
#undef F
#undef FALLBACKS
#undef FUNCNAME
/* Generate all varint functions.
* {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
#define F(card, type, valbytes, tagbytes) \
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \
type##_ZZ, TABLENAME(type, valbytes)); \
type##_ZZ); \
}
#define TYPES(card, tagbytes) \
@ -313,7 +228,7 @@ static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
RETURN_GENERIC("string field tag mismatch\n");
}
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, tagbytes,
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits,
sizeof(upb_strview), card);
len = (int8_t)ptr[tagbytes];
str = ptr + tagbytes + 1;
@ -344,12 +259,31 @@ const char *upb_pos_2bt(UPB_PARSE_PARAMS) {
/* message fields *************************************************************/
UPB_FORCEINLINE
bool fastdecode_boundscheck2(const char *ptr, unsigned len, const char *end) {
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)end;
uintptr_t res = uptr + len;
return res < uptr || res > uend;
UPB_NOINLINE static
const char *fastdecode_lendelim_submsg(upb_decstate *d, const char *ptr, upb_msg *msg,
const upb_msglayout *table, uint64_t hasbits, const char* saved_limit) {
size_t len = (uint8_t)ptr[-1];
if (UPB_UNLIKELY(len & 0x80)) {
for (int i = 0; i < 3; i++) {
ptr++;
size_t byte = (uint8_t)ptr[-1];
len += (byte - 1) << (7 + 7 * i);
if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
}
ptr++;
size_t byte = (uint8_t)ptr[-1];
// len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected for a 32 bit varint.
if (UPB_UNLIKELY(byte >= 8)) return fastdecode_err(d);
len += (byte - 1) << 28;
}
done:
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, saved_limit))) {
return fastdecode_err(d);
}
d->limit = ptr + len;
d->fastlimit = UPB_MIN(d->limit, d->fastend);
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
UPB_FORCEINLINE
@ -360,90 +294,88 @@ static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes,
RETURN_GENERIC("submessage field tag mismatch\n");
}
if (--d->depth < 0) return fastdecode_err(d);
if (--d->depth == 0) return fastdecode_err(d);
upb_msg **submsg;
upb_array *arr;
void *end;
uint16_t submsg_idx = data >> 32;
uint32_t submsg_idx = data;
submsg_idx >>= 16;
const upb_msglayout *subl = table->submsgs[submsg_idx];
submsg = fastdecode_getfield_ofs(d, ptr, msg, &data, &hasbits, &arr, &end,
tagbytes, sizeof(upb_msg *), card, true);
sizeof(upb_msg *), card, true);
if (card == CARD_r) {
if (UPB_UNLIKELY(!submsg)) {
RETURN_GENERIC("need array resize\n");
}
}
if (card == CARD_s) {
*(uint32_t*)msg |= hasbits >> 16;
hasbits = 0;
}
const char *saved_limit = d->limit;
const char *saved_fastlimit = d->fastlimit;
again:
if (card == CARD_r) {
if (UPB_UNLIKELY(submsg == end)) {
if (UPB_LIKELY(arr != NULL)) {
size_t old_size = arr->size;
size_t old_bytes = old_size * sizeof(upb_msg*);
size_t new_size = old_size * 2;
size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < new_bytes)) {
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->depth++;
RETURN_GENERIC("repeated realloc failed: arena full");
}
memcpy(d->arena_ptr, old_ptr, old_bytes);
arr->size = new_size;
arr->data = _upb_array_tagptr(d->arena_ptr, 3);
submsg = (void*)(d->arena_ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(d->arena_ptr + (new_size * sizeof(upb_msg*)));
d->arena_ptr += new_bytes;
} else {
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("need array realloc\n");
size_t old_size = arr->size;
size_t old_bytes = old_size * sizeof(upb_msg*);
size_t new_size = old_size * 2;
size_t new_bytes = new_size * sizeof(upb_msg*);
char *old_ptr = _upb_array_ptr(arr);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < new_bytes)) {
goto repeated_generic;
}
memcpy(d->arena_ptr, old_ptr, old_bytes);
arr->size = new_size;
arr->data = _upb_array_tagptr(d->arena_ptr, 3);
submsg = (void*)(d->arena_ptr + (old_size * sizeof(upb_msg*)));
end = (void*)(d->arena_ptr + (new_size * sizeof(upb_msg*)));
d->arena_ptr += new_bytes;
}
}
upb_msg* child = *submsg;
{
uint32_t len = (uint8_t)ptr[tagbytes];
if (UPB_UNLIKELY(len & 0x80)) {
uint32_t byte = (uint8_t)ptr[tagbytes + 1];
len += (byte - 1) << 7;
if (UPB_UNLIKELY(byte & 0x80)) {
if (card == CARD_r) {
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
}
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("submessage field len >2 bytes\n");
}
ptr++;
}
ptr += tagbytes + 1;
if (UPB_UNLIKELY(fastdecode_boundscheck2(ptr, len, saved_limit))) {
return fastdecode_err(d);
}
d->limit = ptr + len;
d->fastlimit = UPB_MIN(d->limit, d->fastend);
if (card == CARD_r || UPB_LIKELY(!child)) {
*submsg = child = decode_newmsg_ceil(d, subl, msg_ceil_bytes);
}
if (card == CARD_r || UPB_LIKELY(!*submsg)) {
*submsg = decode_newmsg_ceil(d, subl, msg_ceil_bytes);
}
ptr = fastdecode_dispatch(d, ptr, *submsg, subl, 0);
submsg++;
ptr += tagbytes + 1;
ptr = fastdecode_lendelim_submsg(d, ptr, child, subl, 0, saved_limit);
if (UPB_UNLIKELY(ptr != d->limit || d->end_group != 0)) {
return fastdecode_err(d);
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < saved_fastlimit) &&
fastdecode_readtag(ptr, tagbytes) == (uint16_t)data) {
goto again;
submsg++;
if (UPB_LIKELY(ptr < saved_fastlimit)) {
uint32_t tag = fastdecode_load_tag(ptr);
if (tagbytes == 1) {
if ((uint8_t)tag == (uint8_t)data) goto again;
} else {
if ((uint16_t)tag == (uint16_t)data) goto again;
}
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
return fastdecode_tag_dispatch(d, ptr, msg, table, hasbits, tag);
} else {
if (ptr == saved_limit) {
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
return ptr;
}
goto repeated_generic;
}
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
}
d->limit = saved_limit;
@ -451,6 +383,13 @@ again:
d->depth++;
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
repeated_generic:
arr->len = submsg - (upb_msg**)_upb_array_ptr(arr);
d->limit = saved_limit;
d->fastlimit = saved_fastlimit;
d->depth++;
RETURN_GENERIC("repeated generic");
}
#define F(card, tagbytes, size_ceil, ceil_arg) \

@ -795,6 +795,9 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
if (layout.HasHasbit(field)) {
hasbit_index = layout.GetHasbitIndex(field);
if (hasbit_index > 31) return;
// thas hasbits mask in the parser occupies bits 16-48
// in the 64 bit register.
hasbit_index += 16; // account for the shifted hasbits
}
MessageLayout::Size data;
@ -805,8 +808,8 @@ void TryFillTableEntry(const protobuf::Descriptor* message,
if (field->type() == protobuf::FieldDescriptor::TYPE_MESSAGE) {
SubmsgArray submsg_array = GetSubmsgArray(message);
uint64_t idx = submsg_array.indexes[field->message_type()];
data.size32 |= idx << 32 | hasbit_index << 16;
data.size64 |= idx << 32 | hasbit_index << 16;
data.size32 |= idx << 16 | hasbit_index << 32;
data.size64 |= idx << 16 | hasbit_index << 32;
} else {
uint32_t hasbit_mask = 1U << hasbit_index;
data.size32 |= (uint64_t)hasbit_mask << 16;

Loading…
Cancel
Save