Merge pull request #310 from haberman/fast-table
Fast table-driven parsing for upb (2+GB/s)pull/13171/head
commit
c9d2e58480
33 changed files with 2153 additions and 359 deletions
@ -0,0 +1,154 @@ |
||||
/*
|
||||
** Internal implementation details of the decoder that are shared between |
||||
** decode.c and decode_fast.c. |
||||
*/ |
||||
|
||||
#ifndef UPB_DECODE_INT_H_ |
||||
#define UPB_DECODE_INT_H_ |
||||
|
||||
#include <setjmp.h> |
||||
|
||||
#include "upb/msg.h" |
||||
#include "upb/upb.int.h" |
||||
|
||||
/* Must be last. */ |
||||
#include "upb/port_def.inc" |
||||
|
||||
typedef struct upb_decstate { |
||||
const char *end; /* Can read up to 16 bytes slop beyond this. */ |
||||
const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */ |
||||
upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */ |
||||
const char *unknown; /* Start of unknown data. */ |
||||
int limit; /* Submessage limit relative to end. */ |
||||
int depth; |
||||
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */ |
||||
bool alias; |
||||
char patch[32]; |
||||
upb_arena arena; |
||||
jmp_buf err; |
||||
} upb_decstate; |
||||
|
||||
/* Error function that will abort decoding with longjmp(). We can't declare this
|
||||
* UPB_NORETURN, even though it is appropriate, because if we do then compilers |
||||
* will "helpfully" refuse to tailcall to it |
||||
* (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
|
||||
* of our optimizations. That is also why we must declare it in a separate file, |
||||
* otherwise the compiler will see that it calls longjmp() and deduce that it is |
||||
* noreturn. */ |
||||
const char *fastdecode_err(upb_decstate *d); |
||||
|
||||
extern const uint8_t upb_utf8_offsets[]; |
||||
|
||||
UPB_INLINE |
||||
bool decode_verifyutf8_inl(const char *buf, int len) { |
||||
int i, j; |
||||
uint8_t offset; |
||||
|
||||
i = 0; |
||||
while (i < len) { |
||||
offset = upb_utf8_offsets[(uint8_t)buf[i]]; |
||||
if (offset == 0 || i + offset > len) { |
||||
return false; |
||||
} |
||||
for (j = i + 1; j < i + offset; j++) { |
||||
if ((buf[j] & 0xc0) != 0x80) { |
||||
return false; |
||||
} |
||||
} |
||||
i += offset; |
||||
} |
||||
return i == len; |
||||
} |
||||
|
||||
/* x86-64 pointers always have the high 16 bits matching. So we can shift
|
||||
* left 8 and right 8 without loss of information. */ |
||||
UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) { |
||||
return ((intptr_t)tablep << 8) | tablep->table_mask; |
||||
} |
||||
|
||||
UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) { |
||||
return (const upb_msglayout*)(table >> 8); |
||||
} |
||||
|
||||
UPB_INLINE |
||||
const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr, |
||||
int overrun) { |
||||
if (overrun < d->limit) { |
||||
/* Need to copy remaining data into patch buffer. */ |
||||
UPB_ASSERT(overrun < 16); |
||||
if (d->unknown_msg) { |
||||
if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown, |
||||
&d->arena)) { |
||||
return NULL; |
||||
} |
||||
d->unknown = &d->patch[0] + overrun; |
||||
} |
||||
memset(d->patch + 16, 0, 16); |
||||
memcpy(d->patch, d->end, 16); |
||||
ptr = &d->patch[0] + overrun; |
||||
d->end = &d->patch[16]; |
||||
d->limit -= 16; |
||||
d->limit_ptr = d->end + d->limit; |
||||
d->alias = false; |
||||
UPB_ASSERT(ptr < d->limit_ptr); |
||||
return ptr; |
||||
} else { |
||||
return NULL; |
||||
} |
||||
} |
||||
|
||||
const char *decode_isdonefallback(upb_decstate *d, const char *ptr, |
||||
int overrun); |
||||
|
||||
UPB_INLINE |
||||
bool decode_isdone(upb_decstate *d, const char **ptr) { |
||||
int overrun = *ptr - d->end; |
||||
if (UPB_LIKELY(*ptr < d->limit_ptr)) { |
||||
return false; |
||||
} else if (UPB_LIKELY(overrun == d->limit)) { |
||||
return true; |
||||
} else { |
||||
*ptr = decode_isdonefallback(d, *ptr, overrun); |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
UPB_INLINE |
||||
const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, |
||||
upb_msg *msg, intptr_t table, |
||||
uint64_t hasbits, uint32_t tag) { |
||||
const upb_msglayout *table_p = decode_totablep(table); |
||||
uint8_t mask = table; |
||||
uint64_t data; |
||||
size_t idx = tag & mask; |
||||
UPB_ASSUME((idx & 7) == 0); |
||||
idx >>= 3; |
||||
data = table_p->fasttable[idx].field_data ^ tag; |
||||
return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); |
||||
} |
||||
|
||||
UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { |
||||
uint16_t tag; |
||||
memcpy(&tag, ptr, 2); |
||||
return tag; |
||||
} |
||||
|
||||
UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) { |
||||
int limit = size + (int)(ptr - d->end); |
||||
int delta = d->limit - limit; |
||||
d->limit = limit; |
||||
d->limit_ptr = d->end + UPB_MIN(0, limit); |
||||
return delta; |
||||
} |
||||
|
||||
UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, |
||||
int saved_delta) { |
||||
UPB_ASSERT(ptr - d->end == d->limit); |
||||
d->limit += saved_delta; |
||||
d->limit_ptr = d->end + UPB_MIN(0, d->limit); |
||||
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); |
||||
} |
||||
|
||||
#include "upb/port_undef.inc" |
||||
|
||||
#endif /* UPB_DECODE_INT_H_ */ |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,126 @@ |
||||
// These are the specialized field parser functions for the fast parser.
|
||||
// Generated tables will refer to these by name.
|
||||
//
|
||||
// The function names are encoded with names like:
|
||||
//
|
||||
// // 123 4
|
||||
// upb_pss_1bt(); // Parse singular string, 1 byte tag.
|
||||
//
|
||||
// In position 1:
|
||||
// - 'p' for parse, most function use this
|
||||
// - 'c' for copy, for when we are copying strings instead of aliasing
|
||||
//
|
||||
// In position 2 (cardinality):
|
||||
// - 's' for singular, with or without hasbit
|
||||
// - 'o' for oneof
|
||||
// - 'r' for non-packed repeated
|
||||
// - 'p' for packed repeated
|
||||
//
|
||||
// In position 3 (type):
|
||||
// - 'b1' for bool
|
||||
// - 'v4' for 4-byte varint
|
||||
// - 'v8' for 8-byte varint
|
||||
// - 'z4' for zig-zag-encoded 4-byte varint
|
||||
// - 'z8' for zig-zag-encoded 8-byte varint
|
||||
// - 'f4' for 4-byte fixed
|
||||
// - 'f8' for 8-byte fixed
|
||||
// - 'm' for sub-message
|
||||
// - 's' for string (validate UTF-8)
|
||||
// - 'b' for bytes
|
||||
//
|
||||
// In position 4 (tag length):
|
||||
// - '1' for one-byte tags (field numbers 1-15)
|
||||
// - '2' for two-byte tags (field numbers 16-2048)
|
||||
|
||||
#ifndef UPB_DECODE_FAST_H_ |
||||
#define UPB_DECODE_FAST_H_ |
||||
|
||||
#include "upb/msg.h" |
||||
|
||||
struct upb_decstate; |
||||
|
||||
// The fallback, generic parsing function that can handle any field type.
|
||||
// This just uses the regular (non-fast) parser to parse a single field.
|
||||
const char *fastdecode_generic(struct upb_decstate *d, const char *ptr, |
||||
upb_msg *msg, intptr_t table, uint64_t hasbits, |
||||
uint64_t data); |
||||
|
||||
#define UPB_PARSE_PARAMS \ |
||||
struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, \
|
||||
uint64_t hasbits, uint64_t data |
||||
|
||||
/* primitive fields ***********************************************************/ |
||||
|
||||
#define F(card, type, valbytes, tagbytes) \ |
||||
const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS); |
||||
|
||||
#define TYPES(card, tagbytes) \ |
||||
F(card, b, 1, tagbytes) \
|
||||
F(card, v, 4, tagbytes) \
|
||||
F(card, v, 8, tagbytes) \
|
||||
F(card, z, 4, tagbytes) \
|
||||
F(card, z, 8, tagbytes) \
|
||||
F(card, f, 4, tagbytes) \
|
||||
F(card, f, 8, tagbytes) |
||||
|
||||
#define TAGBYTES(card) \ |
||||
TYPES(card, 1) \
|
||||
TYPES(card, 2) |
||||
|
||||
TAGBYTES(s) |
||||
TAGBYTES(o) |
||||
TAGBYTES(r) |
||||
TAGBYTES(p) |
||||
|
||||
#undef F |
||||
#undef TYPES |
||||
#undef TAGBYTES |
||||
|
||||
/* string fields **************************************************************/ |
||||
|
||||
#define F(card, tagbytes, type) \ |
||||
const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS); \
|
||||
const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS); |
||||
|
||||
#define UTF8(card, tagbytes) \ |
||||
F(card, tagbytes, s) \
|
||||
F(card, tagbytes, b) |
||||
|
||||
#define TAGBYTES(card) \ |
||||
UTF8(card, 1) \
|
||||
UTF8(card, 2) |
||||
|
||||
TAGBYTES(s) |
||||
TAGBYTES(o) |
||||
TAGBYTES(r) |
||||
|
||||
#undef F |
||||
#undef TAGBYTES |
||||
|
||||
/* sub-message fields *********************************************************/ |
||||
|
||||
#define F(card, tagbytes, size_ceil, ceil_arg) \ |
||||
const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b(UPB_PARSE_PARAMS); |
||||
|
||||
#define SIZES(card, tagbytes) \ |
||||
F(card, tagbytes, 64, 64) \
|
||||
F(card, tagbytes, 128, 128) \
|
||||
F(card, tagbytes, 192, 192) \
|
||||
F(card, tagbytes, 256, 256) \
|
||||
F(card, tagbytes, max, -1) |
||||
|
||||
#define TAGBYTES(card) \ |
||||
SIZES(card, 1) \
|
||||
SIZES(card, 2) |
||||
|
||||
TAGBYTES(s) |
||||
TAGBYTES(o) |
||||
TAGBYTES(r) |
||||
|
||||
#undef TAGBYTES |
||||
#undef SIZES |
||||
#undef F |
||||
|
||||
#undef UPB_PARSE_PARAMS |
||||
|
||||
#endif /* UPB_DECODE_FAST_H_ */ |
Loading…
Reference in new issue