Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

579 lines
15 KiB

4 years ago
#include "upb/decode.h"
#include "upb/port_def.inc"
#define UPB_PARSE_PARAMS \
upb_decstate *d, const char *ptr, upb_msg *msg, upb_fasttable *table, \
uint64_t hasbits, uint64_t data
4 years ago
#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
const char *fastdecode_err(upb_decstate *d);
const char *fastdecode_reallocarr(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
4 years ago
size_t needbytes);
4 years ago
4 years ago
UPB_NOINLINE
4 years ago
static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
uint64_t hasbits) {
4 years ago
uint16_t tag;
uint64_t data;
if (UPB_UNLIKELY(ptr >= d->fastlimit)) return ptr;
memcpy(&tag, ptr, 2);
data = table->field_data[(tag & 0xf7) >> 3] ^ tag;
4 years ago
return table->field_parser[(tag & 0xf7) >> 3](UPB_PARSE_ARGS);
}
4 years ago
UPB_FORCEINLINE
static bool fastdecode_checktag(uint64_t data, int tagbytes) {
4 years ago
if (tagbytes == 1) {
return (data & 0xff) == 0;
} else {
return (data & 0xffff) == 0;
}
}
4 years ago
UPB_FORCEINLINE
bool fastdecode_boundscheck(const char *ptr, unsigned len, const char *end) {
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)end;
uintptr_t res = uptr + len;
return res < uptr || res > uend;
}
UPB_FORCEINLINE
static bool fastdecode_readlongsize(const char *ptr, uint32_t *size,
const char **out) {
int i;
*size = ptr[0];
UPB_ASSERT(ptr[0] < 0);
for (i = 1; i < 5; i++) {
uint32_t byte = ptr[i];
(*size) += (byte - 1) << (7 * i);
if (UPB_LIKELY(byte < 128)) {
*out = ptr + i + 1;
return true;
}
}
return false;
}
UPB_FORCEINLINE
static uint16_t fastdecode_readtag(const char *ptr, int tagbytes) {
4 years ago
uint16_t ret = 0;
memcpy(&ret, ptr, tagbytes);
return ret;
4 years ago
}
4 years ago
typedef enum {
CARD_s = 0,
CARD_o = 1,
CARD_r = 2,
CARD_p = 3
} upb_card;
UPB_FORCEINLINE
static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg,
4 years ago
uint64_t *data, uint64_t *hasbits,
upb_card card, int tagbytes, int valbytes,
bool store_len) {
void *field = (char *)msg + (*data >> 48);
4 years ago
switch (card) {
case CARD_s:
4 years ago
*hasbits |= *data;
4 years ago
return field;
case CARD_o: {
4 years ago
uint32_t *case_ptr = UPB_PTR_AT(msg, (*data >> 16) & 0xffff, uint32_t);
*case_ptr = (*data >> 32) & 0xffff;
4 years ago
return field;
}
case CARD_r: {
upb_array **arr_p = field;
upb_array *arr;
4 years ago
uint64_t elem_avail;
uint16_t expected_tag;
4 years ago
*hasbits >>= 16;
*(uint32_t*)msg |= *hasbits;
*hasbits = 0;
if (UPB_LIKELY(!*arr_p)) {
size_t need = (valbytes * 4) + sizeof(upb_array);
if (UPB_UNLIKELY((size_t)(d->arena_end - d->arena_ptr) < need)) {
4 years ago
*data = 0;
4 years ago
return NULL;
}
arr = (void*)d->arena_ptr;
field = arr + 1;
arr->data = (uintptr_t)field;
*arr_p = arr;
arr->size = 4;
4 years ago
if (store_len) arr->len = 5;
elem_avail = 4;
4 years ago
d->arena_ptr += need;
} else {
arr = *arr_p;
field = _upb_array_ptr(arr);
4 years ago
elem_avail = arr->size - arr->len;
4 years ago
field = (char*)field + (arr->len * valbytes);
4 years ago
if (store_len) arr->len = arr->size + 1;
4 years ago
}
4 years ago
expected_tag = fastdecode_readtag(ptr, tagbytes);
*data = elem_avail | ((uint64_t)expected_tag << 32);
4 years ago
d->arr = arr;
return field;
}
default:
UPB_UNREACHABLE();
}
}
4 years ago
/* string fields **************************************************************/
UPB_NOINLINE
static const char *fastdecode_longstring(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
uint64_t hasbits, upb_strview *dst) {
uint32_t len;
if (!fastdecode_readlongsize(ptr, &len, &ptr)) {
return fastdecode_err(d);
}
dst->data = ptr;
dst->size = len;
if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, len, d->limit))) {
return fastdecode_err(d);
}
ptr += len;
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
4 years ago
UPB_FORCEINLINE
static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes,
upb_card card) {
upb_strview *dst;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
4 years ago
return ptr;
4 years ago
}
4 years ago
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, card, tagbytes,
sizeof(upb_strview), true);
4 years ago
again:
if (card == CARD_r) {
4 years ago
if (UPB_UNLIKELY((uint32_t)data == 0)) {
4 years ago
return fastdecode_reallocarr(d, ptr, msg, table, sizeof(upb_strview));
}
}
{
int64_t len = ptr[tagbytes];
if (UPB_UNLIKELY(len < 0)) {
if (card == CARD_r) {
4 years ago
d->arr->len -= (uint32_t)data;
4 years ago
}
4 years ago
return fastdecode_longstring(d, ptr, msg, table, hasbits, dst);
4 years ago
}
ptr += tagbytes + 1;
dst->data = ptr;
dst->size = len;
4 years ago
if (UPB_UNLIKELY(!fastdecode_boundscheck(ptr, len, d->limit))) {
4 years ago
return fastdecode_err(d);
}
4 years ago
ptr += len;
4 years ago
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < d->fastlimit) &&
4 years ago
fastdecode_readtag(ptr, tagbytes) == (uint16_t)(data >> 32)) {
data--;
4 years ago
dst++;
goto again;
}
4 years ago
d->arr->len -= (uint32_t)data;
4 years ago
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
const char *upb_pss_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_s);
}
const char *upb_pos_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_o);
}
const char *upb_prs_1bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 1, CARD_r);
}
4 years ago
const char *upb_pss_2bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 2, CARD_s);
}
const char *upb_pos_2bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 2, CARD_o);
}
const char *upb_prs_2bt(UPB_PARSE_PARAMS) {
return fastdecode_string(UPB_PARSE_ARGS, 2, CARD_r);
}
/* fixed fields ***************************************************************/
4 years ago
UPB_FORCEINLINE
static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, int valbytes,
4 years ago
upb_card card, _upb_field_parser *fallback) {
4 years ago
char *dst;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
4 years ago
if (fallback) {
// Patch data to amtch packed wiretype.
data ^= 0x2 ^ (valbytes == 4 ? 5 : 1);
return fallback(UPB_PARSE_ARGS);
} else {
return ptr;
}
4 years ago
}
4 years ago
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, card, tagbytes,
valbytes, true);
4 years ago
again:
if (card == CARD_r) {
4 years ago
if (UPB_UNLIKELY((uint32_t)data == 0)) {
4 years ago
return fastdecode_reallocarr(d, ptr, msg, table, valbytes);
}
}
{
ptr += tagbytes;
memcpy(dst, ptr, valbytes);
ptr += valbytes;
}
if (card == CARD_r) {
if (UPB_LIKELY(ptr < d->fastlimit) &&
4 years ago
fastdecode_readtag(ptr, tagbytes) == (uint16_t)(data >> 32)) {
data--;
4 years ago
dst += valbytes;
goto again;
}
4 years ago
d->arr->len -= (uint32_t)data;
4 years ago
}
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
}
4 years ago
UPB_NOINLINE
const char *fastdecode_longfixedpacked(upb_decstate *d, const char *ptr,
upb_msg *msg, upb_fasttable *table,
uint64_t hasbits, void *dst);
4 years ago
UPB_FORCEINLINE
4 years ago
static const char *fastdecode_fixedpacked(UPB_PARSE_PARAMS, int tagbytes, int valbytes,
_upb_field_parser *fallback) {
4 years ago
char *dst;
4 years ago
char *end;
uint32_t len;
4 years ago
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) {
if (fallback) {
// Patch data to amtch packed wiretype.
data ^= 0x2 ^ (valbytes == 4 ? 5 : 1);
4 years ago
return fallback(UPB_PARSE_ARGS);
4 years ago
} else {
return ptr;
}
}
4 years ago
4 years ago
dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, CARD_r, tagbytes,
valbytes, false);
4 years ago
4 years ago
len = ptr[tagbytes];
if (UPB_UNLIKELY(len < 0)) {
return fastdecode_longfixedpacked(d, ptr, msg, table, hasbits, dst);
}
4 years ago
4 years ago
if (UPB_UNLIKELY(len > (size_t)(uint32_t)data * valbytes)) {
return fastdecode_reallocarr(d, ptr, msg, table, len);
4 years ago
}
4 years ago
4 years ago
ptr += tagbytes;
4 years ago
4 years ago
if (UPB_UNLIKELY((len & (valbytes - 1)) != 0 || /* Non-multiple length. */
fastdecode_boundscheck(ptr, len, d->limit))) {
return fastdecode_err(d);
4 years ago
}
4 years ago
end = dst + len;
d->arr->len += len / valbytes;
4 years ago
4 years ago
while (dst < end) {
memcpy(dst, ptr, valbytes); /* Inline memcpy() loop */
4 years ago
dst += valbytes;
ptr += valbytes;
4 years ago
}
4 years ago
return fastdecode_dispatch(d, ptr, msg, table, hasbits);
4 years ago
}
4 years ago
UPB_NOINLINE
const char *upb_prf8_1bt(UPB_PARSE_PARAMS);
UPB_NOINLINE
const char *upb_ppf8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixedpacked(UPB_PARSE_ARGS, 1, 8, &upb_prf8_1bt);
}
UPB_NOINLINE
const char *upb_psf8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_s, NULL);
}
UPB_NOINLINE
const char *upb_pof8_1bt(UPB_PARSE_PARAMS) {
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_o, NULL);
}
4 years ago
UPB_NOINLINE
const char *upb_prf8_1bt(UPB_PARSE_PARAMS) {
4 years ago
return fastdecode_fixed(UPB_PARSE_ARGS, 1, 8, CARD_r, &upb_ppf8_1bt);
4 years ago
}
4 years ago
#if 0
4 years ago
// Generate all fixed functions.
// {s,o,r,p} x {f4,f8} x {1bt,2bt}
#define F(card, valbytes, tagbytes) \
const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card); \
}
#define TYPES(card, tagbytes) \
F(card, 4, tagbytes) \
F(card, 8, tagbytes)
#define TAGBYTES(card) \
TYPES(card, 1) \
TYPES(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
TAGBYTES(p)
UPB_FORCEINLINE uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
if (valbytes == 1) {
return val != 0;
} else if (zigzag) {
if (valbytes == 4) {
uint32_t n = val;
return (n >> 1) ^ -(int32_t)(n & 1);
} else if (valbytes == 8) {
return (val >> 1) ^ -(int64_t)(val & 1);
}
UPB_UNREACHABLE();
}
return val;
4 years ago
}
UPB_FORCEINLINE
4 years ago
static const char *fastdecode_longvarint_impl(const char *ptr, void *field,
int valbytes) {
4 years ago
// The algorithm relies on sign extension to set all high bits when the varint
// continues. This way it can use "and" to aggregate in to the result.
const int8_t *p = (const int8_t*)(ptr);
int64_t res1 = *p;
uint64_t ones = res1; // save the useful high bit 1's in res1
uint64_t byte;
int64_t res2, res3;
int sign_bit;
4 years ago
// However this requires the low bits after shifting to be 1's as well. On
// x86_64 a shld from a single register filled with enough 1's in the high
// bits can accomplish all this in one instruction. It so happens that res1
// has 57 high bits of ones, which is enough for the largest shift done.
assert(res1 >> 7 == -1);
#define SHLD(n) byte = ((byte << (n * 7)) | (ones >> (64 - (n * 7))))
// Micro benchmarks show a substantial improvement to capture the sign
// of the result in the case of just assigning the result of the shift
// (ie first 2 steps).
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
#define SHLD_SIGN(n) \
__asm__("shldq %3, %2, %1" \
: "=@ccs"(sign_bit), "+r"(byte) \
: "r"(ones), "i"(n * 7))
#else
#define SHLD_SIGN(n) \
do { \
SHLD(n); \
sign_bit = (int64_t)(byte) < 0; \
} while (0)
#endif
byte = p[1];
SHLD_SIGN(1);
res2 = byte;
if (!sign_bit) goto done2;
byte = p[2];
SHLD_SIGN(2);
res3 = byte;
if (!sign_bit) goto done3;
byte = p[3];
SHLD(3);
res1 &= byte;
if (res1 >= 0) goto done4;
byte = p[4];
SHLD(4);
res2 &= byte;
if (res2 >= 0) goto done5;
byte = p[5];
SHLD(5);
res3 &= byte;
if (res3 >= 0) goto done6;
byte = p[6];
SHLD(6);
res1 &= byte;
if (res1 >= 0) goto done7;
byte = p[7];
SHLD(7);
res2 &= byte;
if (res2 >= 0) goto done8;
byte = p[8];
SHLD(8);
res3 &= byte;
if (res3 >= 0) goto done9;
byte = p[9];
// Last byte only contains 0 or 1 for valid 64bit varints. If it's 0 it's
// a denormalized varint that shouldn't happen. The continuation bit of byte
// 9 has already the right value hence just expect byte to be 1.
if (UPB_LIKELY(byte == 1)) goto done10;
if (byte == 0) {
res3 ^= (uint64_t)(1) << 63;
goto done10;
}
return NULL; // Value is too long to be a varint64
4 years ago
#define DONE(n) \
done##n : { \
uint64_t val = res1 & res2 & res3; \
memcpy(field, &val, valbytes); \
return (const char *)p + n; \
4 years ago
};
done2 : {
uint64_t val = res1 & res2;
memcpy(field, &val, valbytes);
4 years ago
return (const char*)p + 2;
4 years ago
}
DONE(3)
DONE(4)
DONE(5)
DONE(6)
DONE(7)
DONE(8)
DONE(9)
DONE(10)
4 years ago
4 years ago
#undef DONE
4 years ago
#undef SHLD
#undef SHLD_SIGN
4 years ago
}
UPB_NOINLINE
static const char *fastdecode_longvarint32(UPB_PARSE_PARAMS) {
4 years ago
(void)d;
(void)msg;
(void)table;
(void)hasbits;
return fastdecode_longvarint_impl(ptr, (void*)data, 4);
4 years ago
}
UPB_NOINLINE
static const char *fastdecode_longvarint64(UPB_PARSE_PARAMS) {
4 years ago
(void)d;
(void)msg;
(void)table;
(void)hasbits;
return fastdecode_longvarint_impl(ptr, (void*)data, 8);
4 years ago
}
UPB_FORCEINLINE
static const char *fastdecode_longvarint(UPB_PARSE_PARAMS, int valbytes) {
4 years ago
if (valbytes == 4) {
4 years ago
return fastdecode_longvarint32(UPB_PARSE_ARGS);
4 years ago
} else if (valbytes == 8) {
4 years ago
return fastdecode_longvarint64(UPB_PARSE_ARGS);
4 years ago
}
UPB_UNREACHABLE();
}
UPB_FORCEINLINE
4 years ago
static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes,
int valbytes, bool zigzag, bool oneof) {
uint64_t val = 0;
4 years ago
void *field;
if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) return ptr;
ptr += tagbytes;
4 years ago
fastdecode_getfield(msg, data, &hasbits, oneof);
4 years ago
field = (char*)msg + (data >> 48);
if (UPB_UNLIKELY(*ptr < 0)) {
4 years ago
return fastdecode_longvarint(d, ptr, msg, table, hasbits, (uint64_t)field,
valbytes);
4 years ago
}
4 years ago
val = fastdecode_munge(*ptr, valbytes, zigzag);
4 years ago
memcpy(field, &val, valbytes);
return fastdecode_dispatch(d, ptr + 1, msg, table, hasbits);
}
4 years ago
// Generate all varint functions.
// {s,o,r} x {b1,v4,z4,v8,z8} x {1bt,2bt}
4 years ago
4 years ago
#define z_ZZ true
#define b_ZZ false
#define v_ZZ false
4 years ago
4 years ago
#define F(card, type, valbytes, tagbytes) \
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, type##_ZZ, \
card##_ONEOF); \
}
4 years ago
4 years ago
#define TYPES(card, tagbytes) \
F(card, b, 1, tagbytes) \
F(card, v, 4, tagbytes) \
F(card, v, 8, tagbytes) \
F(card, z, 4, tagbytes) \
F(card, z, 8, tagbytes)
#define TAGBYTES(card) \
TYPES(card, 1) \
TYPES(card, 2)
TAGBYTES(s)
TAGBYTES(o)
TAGBYTES(r)
#undef z_ZZ
#undef b_ZZ
#undef v_ZZ
#undef o_ONEOF
#undef s_ONEOF
#undef r_ONEOF
#undef F
#undef TYPES
#undef TAGBYTES
#endif