|
|
|
/*
|
|
|
|
** Internal implementation details of the decoder that are shared between
|
|
|
|
** decode.c and decode_fast.c.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef UPB_DECODE_INT_H_
|
|
|
|
#define UPB_DECODE_INT_H_
|
|
|
|
|
|
|
|
#include <setjmp.h>
|
|
|
|
|
|
|
|
#include "upb/msg.h"
|
|
|
|
#include "upb/upb.int.h"
|
|
|
|
#include "upb/decode_fast.h"
|
|
|
|
|
|
|
|
/* Must be last. */
|
|
|
|
#include "upb/port_def.inc"
|
|
|
|
|
|
|
|
typedef struct upb_decstate {
|
|
|
|
const char *end; /* Can read up to 16 bytes slop beyond this. */
|
|
|
|
const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */
|
|
|
|
upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */
|
|
|
|
const char *unknown; /* Start of unknown data. */
|
|
|
|
int limit; /* Submessage limit relative to end. */
|
|
|
|
int depth;
|
|
|
|
uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
|
|
|
|
bool alias;
|
|
|
|
char patch[32];
|
|
|
|
upb_arena arena;
|
|
|
|
jmp_buf err;
|
|
|
|
} upb_decstate;
|
|
|
|
|
|
|
|
/* Error function that will abort decoding with longjmp(). We can't declare this
|
|
|
|
* UPB_NORETURN, even though it is appropriate, because if we do then compilers
|
|
|
|
* will "helpfully" refuse to tailcall to it
|
|
|
|
* (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
|
|
|
|
* of our optimizations. That is also why we must declare it in a separate file,
|
|
|
|
* otherwise the compiler will see that it calls longjmp() and deduce that it is
|
|
|
|
* noreturn. */
|
|
|
|
const char *fastdecode_err(upb_decstate *d);
|
|
|
|
|
|
|
|
extern const uint8_t upb_utf8_offsets[];
|
|
|
|
|
|
|
|
UPB_INLINE
|
|
|
|
bool decode_verifyutf8_inl(const char *buf, int len) {
|
|
|
|
int i, j;
|
|
|
|
uint8_t offset;
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
while (i < len) {
|
|
|
|
offset = upb_utf8_offsets[(uint8_t)buf[i]];
|
|
|
|
if (offset == 0 || i + offset > len) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
for (j = i + 1; j < i + offset; j++) {
|
|
|
|
if ((buf[j] & 0xc0) != 0x80) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i += offset;
|
|
|
|
}
|
|
|
|
return i == len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* x86-64 pointers always have the high 16 bits matching. So we can shift
|
|
|
|
* left 8 and right 8 without loss of information. */
|
|
|
|
UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) {
|
|
|
|
return ((intptr_t)tablep << 8) | tablep->table_mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) {
|
|
|
|
return (void*)(table >> 8);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE
|
|
|
|
const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
|
|
|
|
int overrun) {
|
|
|
|
if (overrun < d->limit) {
|
|
|
|
/* Need to copy remaining data into patch buffer. */
|
|
|
|
UPB_ASSERT(overrun < 16);
|
|
|
|
if (d->unknown_msg) {
|
|
|
|
if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown,
|
|
|
|
&d->arena)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
d->unknown = &d->patch[0] + overrun;
|
|
|
|
}
|
|
|
|
memset(d->patch + 16, 0, 16);
|
|
|
|
memcpy(d->patch, d->end, 16);
|
|
|
|
ptr = &d->patch[0] + overrun;
|
|
|
|
d->end = &d->patch[16];
|
|
|
|
d->limit -= 16;
|
|
|
|
d->limit_ptr = d->end + d->limit;
|
|
|
|
d->alias = false;
|
|
|
|
UPB_ASSERT(ptr < d->limit_ptr);
|
|
|
|
return ptr;
|
|
|
|
} else {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
|
|
|
|
int overrun);
|
|
|
|
|
|
|
|
UPB_INLINE
|
|
|
|
bool decode_isdone(upb_decstate *d, const char **ptr) {
|
|
|
|
int overrun = *ptr - d->end;
|
|
|
|
if (UPB_LIKELY(*ptr < d->limit_ptr)) {
|
|
|
|
return false;
|
|
|
|
} else if (UPB_LIKELY(overrun == d->limit)) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
*ptr = decode_isdonefallback(d, *ptr, overrun);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE
|
|
|
|
const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
|
|
|
|
upb_msg *msg, intptr_t table,
|
|
|
|
uint64_t hasbits, uint32_t tag) {
|
|
|
|
const upb_msglayout *table_p = decode_totablep(table);
|
|
|
|
uint8_t mask = table;
|
|
|
|
uint64_t data;
|
|
|
|
size_t idx = tag & mask;
|
|
|
|
UPB_ASSUME((idx & 7) == 0);
|
|
|
|
idx >>= 3;
|
|
|
|
data = table_p->fasttable[idx].field_data ^ tag;
|
|
|
|
return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) {
|
|
|
|
uint16_t tag;
|
|
|
|
memcpy(&tag, ptr, 2);
|
|
|
|
return tag;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
|
|
|
|
int limit = size + (int)(ptr - d->end);
|
|
|
|
int delta = d->limit - limit;
|
|
|
|
d->limit = limit;
|
|
|
|
d->limit_ptr = d->end + UPB_MIN(0, limit);
|
|
|
|
return delta;
|
|
|
|
}
|
|
|
|
|
|
|
|
UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr,
|
|
|
|
int saved_delta) {
|
|
|
|
UPB_ASSERT(ptr - d->end == d->limit);
|
|
|
|
d->limit += saved_delta;
|
|
|
|
d->limit_ptr = d->end + UPB_MIN(0, d->limit);
|
|
|
|
UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "upb/port_undef.inc"
|
|
|
|
|
|
|
|
#endif /* UPB_DECODE_INT_H_ */
|