|
|
|
@ -19,44 +19,50 @@ |
|
|
|
|
#include "upb_decoder_x86.h" |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
/* Decoding/Buffering of individual values ************************************/ |
|
|
|
|
// A group continues until an END_GROUP tag is seen.
|
|
|
|
|
#define UPB_GROUPEND UINT32_MAX |
|
|
|
|
// A non-packed repeated field ends when a diff. field is seen (or submsg end).
|
|
|
|
|
#define UPB_REPEATEDEND (UINT32_MAX-1) |
|
|
|
|
|
|
|
|
|
// Performs zig-zag decoding, which is used by sint32 and sint64.
|
|
|
|
|
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } |
|
|
|
|
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } |
|
|
|
|
// It's unfortunate that we have to micro-manage the compiler this way,
|
|
|
|
|
// especially since this tuning is necessarily specific to one hardware
|
|
|
|
|
// configuration. But emperically on a Core i7, performance increases 30-50%
|
|
|
|
|
// with these annotations. Every instance where these appear, gcc 4.2.1 made
|
|
|
|
|
// the wrong decision and degraded performance in benchmarks.
|
|
|
|
|
#define FORCEINLINE static __attribute__((always_inline)) |
|
|
|
|
#define NOINLINE static __attribute__((noinline)) |
|
|
|
|
|
|
|
|
|
static void upb_decoder_exit(upb_decoder *d) { siglongjmp(d->exitjmp, 1); } |
|
|
|
|
|
|
|
|
|
/* Decoding/Buffering of wire types *******************************************/ |
|
|
|
|
|
|
|
|
|
#define UPB_MAX_VARINT_ENCODED_SIZE 10 |
|
|
|
|
|
|
|
|
|
INLINE void upb_decoder_advance(upb_decoder *d, size_t len) { |
|
|
|
|
d->ptr += len; |
|
|
|
|
} |
|
|
|
|
static void upb_decoder_advance(upb_decoder *d, size_t len) { d->ptr += len; } |
|
|
|
|
static size_t upb_decoder_bufleft(upb_decoder *d) { return d->end - d->ptr; } |
|
|
|
|
|
|
|
|
|
INLINE size_t upb_decoder_offset(upb_decoder *d) { |
|
|
|
|
size_t upb_decoder_offset(upb_decoder *d) { |
|
|
|
|
size_t offset = d->buf_stream_offset; |
|
|
|
|
if (d->buf) offset += (d->ptr - d->buf); |
|
|
|
|
return offset; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE size_t upb_decoder_bufleft(upb_decoder *d) { |
|
|
|
|
return d->end - d->ptr; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE void upb_dstate_setmsgend(upb_decoder *d) { |
|
|
|
|
uint32_t end_offset = d->dispatcher.top->end_offset; |
|
|
|
|
d->submsg_end = (end_offset == UINT32_MAX) ? |
|
|
|
|
(void*)UINTPTR_MAX : d->buf + end_offset; |
|
|
|
|
static void upb_decoder_setmsgend(upb_decoder *d) { |
|
|
|
|
uint32_t end = d->dispatcher.top->end_offset; |
|
|
|
|
d->submsg_end = (end == UINT32_MAX) ? (void*)UINTPTR_MAX : d->buf + end; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Pulls the next buffer from the bytesrc. Should be called only when the
|
|
|
|
|
// current buffer is completely empty.
|
|
|
|
|
static bool upb_pullbuf(upb_decoder *d) { |
|
|
|
|
static void upb_pullbuf(upb_decoder *d, bool need) { |
|
|
|
|
assert(upb_decoder_bufleft(d) == 0); |
|
|
|
|
int32_t last_buf_len = d->buf ? upb_string_len(d->bufstr) : -1; |
|
|
|
|
upb_string_recycle(&d->bufstr); |
|
|
|
|
if (!upb_bytesrc_getstr(d->bytesrc, d->bufstr, d->status)) { |
|
|
|
|
d->buf = NULL; |
|
|
|
|
d->end = NULL; |
|
|
|
|
return false; |
|
|
|
|
if (need) upb_seterr(d->status, UPB_ERROR, "Unexpected EOF."); |
|
|
|
|
upb_decoder_exit(d); |
|
|
|
|
} |
|
|
|
|
if (last_buf_len != -1) { |
|
|
|
|
d->buf_stream_offset += last_buf_len; |
|
|
|
@ -70,290 +76,256 @@ static bool upb_pullbuf(upb_decoder *d) { |
|
|
|
|
d->jit_end = d->end - 20; |
|
|
|
|
upb_string_recycle(&d->tmp); |
|
|
|
|
upb_string_substr(d->tmp, d->bufstr, 0, 0); |
|
|
|
|
upb_dstate_setmsgend(d); |
|
|
|
|
return true; |
|
|
|
|
upb_decoder_setmsgend(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Called only from the slow path, this function copies the next "len" bytes
|
|
|
|
|
// from the stream to "data", adjusting the dstate appropriately.
|
|
|
|
|
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted) { |
|
|
|
|
// from the stream to "data", adjusting the decoder state appropriately.
|
|
|
|
|
static void upb_getbuf(upb_decoder *d, void *data, size_t bytes, bool need) { |
|
|
|
|
while (1) { |
|
|
|
|
size_t to_copy = UPB_MIN(bytes_wanted, upb_decoder_bufleft(d)); |
|
|
|
|
size_t to_copy = UPB_MIN(bytes, upb_decoder_bufleft(d)); |
|
|
|
|
memcpy(data, d->ptr, to_copy); |
|
|
|
|
upb_decoder_advance(d, to_copy); |
|
|
|
|
bytes_wanted -= to_copy; |
|
|
|
|
if (bytes_wanted == 0) return true; |
|
|
|
|
if (!upb_pullbuf(d)) return false; |
|
|
|
|
bytes -= to_copy; |
|
|
|
|
if (bytes == 0) return; |
|
|
|
|
upb_pullbuf(d, need); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// We use this path when we don't have UPB_MAX_VARINT_ENCODED_SIZE contiguous
|
|
|
|
|
// bytes available in our current buffer. We don't inline this because we
|
|
|
|
|
// accept that it will be slow and we don't want to pay for two copies of it.
|
|
|
|
|
static bool upb_decode_varint_slow(upb_decoder *d, upb_value *val) { |
|
|
|
|
char byte = 0x80; |
|
|
|
|
uint64_t val64 = 0; |
|
|
|
|
NOINLINE uint64_t upb_decode_varint_slow(upb_decoder *d, bool need) { |
|
|
|
|
uint8_t byte = 0x80; |
|
|
|
|
uint64_t u64 = 0; |
|
|
|
|
int bitpos; |
|
|
|
|
for(bitpos = 0; |
|
|
|
|
bitpos < 70 && (byte & 0x80) && upb_getbuf(d, &byte, 1); |
|
|
|
|
bitpos += 7) |
|
|
|
|
val64 |= ((uint64_t)byte & 0x7F) << bitpos; |
|
|
|
|
|
|
|
|
|
if(bitpos == 70) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, |
|
|
|
|
"Varint was unterminated after 10 bytes.\n"); |
|
|
|
|
return false; |
|
|
|
|
} else if (d->status->code == UPB_EOF && bitpos == 0) { |
|
|
|
|
// Regular EOF.
|
|
|
|
|
return false; |
|
|
|
|
} else if (d->status->code == UPB_EOF && (byte & 0x80)) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, |
|
|
|
|
"Provided data ended in the middle of a varint.\n"); |
|
|
|
|
return false; |
|
|
|
|
} else { |
|
|
|
|
// Success.
|
|
|
|
|
upb_value_setraw(val, val64); |
|
|
|
|
return true; |
|
|
|
|
for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { |
|
|
|
|
upb_getbuf(d, &byte, 1, need); |
|
|
|
|
u64 |= ((uint64_t)byte & 0x7F) << bitpos; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
upb_wire_type_t wire_type; |
|
|
|
|
upb_field_number_t field_number; |
|
|
|
|
} upb_tag; |
|
|
|
|
if(bitpos == 70 && (byte & 0x80)) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); |
|
|
|
|
upb_decoder_exit(d); |
|
|
|
|
} |
|
|
|
|
return u64; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_tag(upb_decoder *d, uint32_t *tag) { |
|
|
|
|
// For tags and delimited lengths, which must be <=32bit and are usually small.
|
|
|
|
|
FORCEINLINE uint32_t upb_decode_varint32(upb_decoder *d, bool need) { |
|
|
|
|
const char *p = d->ptr; |
|
|
|
|
upb_value val; |
|
|
|
|
// Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048).
|
|
|
|
|
uint32_t ret; |
|
|
|
|
uint64_t u64; |
|
|
|
|
// Nearly all will be either 1 byte (1-16) or 2 bytes (17-2048).
|
|
|
|
|
if (upb_decoder_bufleft(d) < 2) goto slow; // unlikely.
|
|
|
|
|
*tag = *p & 0x7f; |
|
|
|
|
ret = *p & 0x7f; |
|
|
|
|
if ((*(p++) & 0x80) == 0) goto done; // predictable if fields are in order
|
|
|
|
|
*tag |= (*p & 0x7f) << 7; |
|
|
|
|
ret |= (*p & 0x7f) << 7; |
|
|
|
|
if ((*(p++) & 0x80) == 0) goto done; // likely
|
|
|
|
|
slow: |
|
|
|
|
// Decode a full varint starting over from ptr.
|
|
|
|
|
if (!upb_decode_varint_slow(d, &val)) return false; |
|
|
|
|
*tag = upb_value_getint64(val); |
|
|
|
|
p = d->ptr; // Trick the next line into not overwriting us.
|
|
|
|
|
u64 = upb_decode_varint_slow(d, need); |
|
|
|
|
if (u64 > 0xffffffff) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Unterminated 32-bit varint.\n"); |
|
|
|
|
upb_decoder_exit(d); |
|
|
|
|
} |
|
|
|
|
ret = (uint32_t)u64; |
|
|
|
|
p = d->ptr; // Turn the next line into a nop.
|
|
|
|
|
done: |
|
|
|
|
upb_decoder_advance(d, p - d->ptr); |
|
|
|
|
return true; |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_varint(upb_decoder *d, upb_value *val) { |
|
|
|
|
FORCEINLINE uint64_t upb_decode_varint(upb_decoder *d) { |
|
|
|
|
if (upb_decoder_bufleft(d) >= 16) { |
|
|
|
|
// Common (fast) case.
|
|
|
|
|
upb_decoderet r = upb_vdecode_fast(d->ptr); |
|
|
|
|
if (r.p == NULL) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Unterminated varint.\n"); |
|
|
|
|
return false; |
|
|
|
|
upb_decoder_exit(d); |
|
|
|
|
} |
|
|
|
|
upb_value_setraw(val, r.val); |
|
|
|
|
upb_decoder_advance(d, r.p - d->ptr); |
|
|
|
|
return true; |
|
|
|
|
return r.val; |
|
|
|
|
} else { |
|
|
|
|
return upb_decode_varint_slow(d, val); |
|
|
|
|
return upb_decode_varint_slow(d, true); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_fixed(upb_decoder *d, size_t bytes, upb_value *val) { |
|
|
|
|
FORCEINLINE void upb_decode_fixed(upb_decoder *d, void *val, size_t bytes) { |
|
|
|
|
if (upb_decoder_bufleft(d) >= bytes) { |
|
|
|
|
// Common (fast) case.
|
|
|
|
|
memcpy(val, d->ptr, bytes); |
|
|
|
|
upb_decoder_advance(d, bytes); |
|
|
|
|
} else { |
|
|
|
|
if (!upb_getbuf(d, val, bytes)) return false; |
|
|
|
|
upb_getbuf(d, val, bytes, true); |
|
|
|
|
} |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// "val" initially holds the length of the string, this is replaced by the
|
|
|
|
|
// contents of the string.
|
|
|
|
|
INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, |
|
|
|
|
upb_string **str) { |
|
|
|
|
upb_string_recycle(str); |
|
|
|
|
uint32_t strlen = upb_value_getint32(*val); |
|
|
|
|
FORCEINLINE uint32_t upb_decode_fixed32(upb_decoder *d) { |
|
|
|
|
uint32_t u32; |
|
|
|
|
upb_decode_fixed(d, &u32, sizeof(uint32_t)); |
|
|
|
|
return u32; |
|
|
|
|
} |
|
|
|
|
FORCEINLINE uint64_t upb_decode_fixed64(upb_decoder *d) { |
|
|
|
|
uint64_t u64; |
|
|
|
|
upb_decode_fixed(d, &u64, sizeof(uint64_t)); |
|
|
|
|
return u64; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE upb_string *upb_decode_string(upb_decoder *d) { |
|
|
|
|
upb_string_recycle(&d->tmp); |
|
|
|
|
uint32_t strlen = upb_decode_varint32(d, true); |
|
|
|
|
if (upb_decoder_bufleft(d) >= strlen) { |
|
|
|
|
// Common (fast) case.
|
|
|
|
|
upb_string_substr(*str, d->bufstr, d->ptr - d->buf, strlen); |
|
|
|
|
upb_string_substr(d->tmp, d->bufstr, d->ptr - d->buf, strlen); |
|
|
|
|
upb_decoder_advance(d, strlen); |
|
|
|
|
} else { |
|
|
|
|
if (!upb_getbuf(d, upb_string_getrwbuf(*str, strlen), strlen)) |
|
|
|
|
return false; |
|
|
|
|
upb_getbuf(d, upb_string_getrwbuf(d->tmp, strlen), strlen, true); |
|
|
|
|
} |
|
|
|
|
upb_value_setstr(val, *str); |
|
|
|
|
return true; |
|
|
|
|
return d->tmp; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* The main decoding loop *****************************************************/ |
|
|
|
|
|
|
|
|
|
extern upb_wire_type_t upb_expected_wire_types[]; |
|
|
|
|
// Returns true if wt is the correct on-the-wire type for ft.
|
|
|
|
|
INLINE bool upb_check_type(upb_wire_type_t wt, upb_fieldtype_t ft) { |
|
|
|
|
// This doesn't currently support packed arrays.
|
|
|
|
|
return upb_types[ft].native_wire_type == wt; |
|
|
|
|
INLINE void upb_pop(upb_decoder *d) { |
|
|
|
|
//if (d->dispatcher.top->end_offset == UPB_REPEATEDEND)
|
|
|
|
|
// upb_dispatch_endseq(&d->dispatcher);
|
|
|
|
|
d->f = d->dispatcher.top->f; |
|
|
|
|
upb_dispatch_endsubmsg(&d->dispatcher); |
|
|
|
|
upb_decoder_setmsgend(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static upb_flow_t upb_pop(upb_decoder *d) { |
|
|
|
|
upb_flow_t ret = upb_dispatch_endsubmsg(&d->dispatcher); |
|
|
|
|
upb_dstate_setmsgend(d); |
|
|
|
|
return ret; |
|
|
|
|
INLINE void upb_push(upb_decoder *d, upb_fieldent *f, uint32_t end) { |
|
|
|
|
upb_dispatch_startsubmsg(&d->dispatcher, f, end); |
|
|
|
|
upb_decoder_setmsgend(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static upb_flow_t upb_decoder_skipsubmsg(upb_decoder *d) { |
|
|
|
|
if (d->dispatcher.top->f->type == UPB_TYPE(GROUP)) { |
|
|
|
|
fprintf(stderr, "upb_decoder: Can't skip groups yet.\n"); |
|
|
|
|
abort(); |
|
|
|
|
} |
|
|
|
|
upb_decoder_advance(d, d->dispatcher.top->end_offset - (d->ptr - d->buf)); |
|
|
|
|
|
|
|
|
|
/* Decoding of .proto types ***************************************************/ |
|
|
|
|
|
|
|
|
|
// Technically, we are losing data if we see a 32-bit varint that is not
|
|
|
|
|
// properly sign-extended. We could detect this and error about the data loss,
|
|
|
|
|
// but proto2 does not do this, so we pass.
|
|
|
|
|
|
|
|
|
|
#define T(type, wt, valtype, convfunc) \ |
|
|
|
|
INLINE void upb_decode_ ## type(upb_decoder *d, upb_fieldent *f) { \
|
|
|
|
|
upb_value val; \
|
|
|
|
|
upb_value_set ## valtype(&val, (convfunc)(upb_decode_ ## wt(d))); \
|
|
|
|
|
upb_dispatch_value(&d->dispatcher, f, val); \
|
|
|
|
|
} \
|
|
|
|
|
|
|
|
|
|
static double upb_asdouble(uint64_t n) { return *(double*)&n; } |
|
|
|
|
static float upb_asfloat(uint32_t n) { return *(float*)&n; } |
|
|
|
|
static int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } |
|
|
|
|
static int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } |
|
|
|
|
|
|
|
|
|
T(INT32, varint, int32, int32_t) |
|
|
|
|
T(INT64, varint, int64, int64_t) |
|
|
|
|
T(UINT32, varint, uint32, uint32_t) |
|
|
|
|
T(UINT64, varint, uint64, uint64_t) |
|
|
|
|
T(FIXED32, fixed32, uint32, uint32_t) |
|
|
|
|
T(FIXED64, fixed64, uint64, uint64_t) |
|
|
|
|
T(SFIXED32, fixed32, int32, int32_t) |
|
|
|
|
T(SFIXED64, fixed64, int64, int64_t) |
|
|
|
|
T(BOOL, varint, bool, bool) |
|
|
|
|
T(ENUM, varint, int32, int32_t) |
|
|
|
|
T(DOUBLE, fixed64, double, upb_asdouble) |
|
|
|
|
T(FLOAT, fixed32, float, upb_asfloat) |
|
|
|
|
T(SINT32, varint, int32, upb_zzdec_32) |
|
|
|
|
T(SINT64, varint, int64, upb_zzdec_64) |
|
|
|
|
T(STRING, string, str, upb_string*) |
|
|
|
|
|
|
|
|
|
static void upb_decode_GROUP(upb_decoder *d, upb_fieldent *f) { |
|
|
|
|
upb_push(d, f, UPB_GROUPEND); |
|
|
|
|
} |
|
|
|
|
static void upb_endgroup(upb_decoder *d, upb_fieldent *f) { |
|
|
|
|
(void)f; |
|
|
|
|
upb_pop(d); |
|
|
|
|
return UPB_CONTINUE; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static upb_flow_t upb_push(upb_decoder *d, upb_handlers_fieldent *f, |
|
|
|
|
uint32_t end_offset) { |
|
|
|
|
upb_flow_t flow = upb_dispatch_startsubmsg(&d->dispatcher, f, end_offset); |
|
|
|
|
upb_dstate_setmsgend(d); |
|
|
|
|
return flow; |
|
|
|
|
static void upb_decode_MESSAGE(upb_decoder *d, upb_fieldent *f) { |
|
|
|
|
upb_push(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void upb_decoder_decode(upb_decoder *d, upb_status *status) { |
|
|
|
|
d->status = status; |
|
|
|
|
|
|
|
|
|
#define CHECK_FLOW(expr) \ |
|
|
|
|
switch (expr) { \
|
|
|
|
|
case UPB_BREAK: goto callback_err; \
|
|
|
|
|
case UPB_SKIPSUBMSG: upb_decoder_skipsubmsg(d); continue; \
|
|
|
|
|
default: break; /* continue normally. */ \
|
|
|
|
|
} |
|
|
|
|
#define CHECK(expr) if (!expr) { assert(!upb_ok(status)); goto err; } |
|
|
|
|
|
|
|
|
|
CHECK(upb_pullbuf(d)); |
|
|
|
|
if (upb_dispatch_startmsg(&d->dispatcher) != UPB_CONTINUE) goto err; |
|
|
|
|
|
|
|
|
|
// Main loop: executed once per tag/field pair.
|
|
|
|
|
while(1) { |
|
|
|
|
// Check for end-of-submessage.
|
|
|
|
|
while (d->ptr >= d->submsg_end) { |
|
|
|
|
if (d->ptr > d->submsg_end) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); |
|
|
|
|
goto err; |
|
|
|
|
} |
|
|
|
|
CHECK_FLOW(upb_pop(d)); |
|
|
|
|
} |
|
|
|
|
/* The main decoding loop *****************************************************/ |
|
|
|
|
|
|
|
|
|
static void upb_unwind(upb_decoder *d) { |
|
|
|
|
// TODO.
|
|
|
|
|
(void)d; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void upb_delimend(upb_decoder *d) { |
|
|
|
|
if (d->ptr > d->submsg_end) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Bad submessage end."); |
|
|
|
|
upb_decoder_exit(d); |
|
|
|
|
} |
|
|
|
|
upb_pop(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void upb_decoder_enterjit(upb_decoder *d) { |
|
|
|
|
(void)d; |
|
|
|
|
#ifdef UPB_USE_JIT_X64 |
|
|
|
|
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) { |
|
|
|
|
// Decodes as many fields as possible, updating d->ptr appropriately,
|
|
|
|
|
// before falling through to the slow(er) path.
|
|
|
|
|
#ifdef UPB_USE_JIT_X64 |
|
|
|
|
void (*upb_jit_decode)(upb_decoder *d) = (void*)d->jit_code; |
|
|
|
|
if (d->jit_code && d->dispatcher.top == d->dispatcher.stack && d->ptr < d->jit_end) { |
|
|
|
|
//const char *before = d->ptr;
|
|
|
|
|
//fprintf(stderr, "Entering JIT, JIT bytes left: %zd\n", d->jit_end - d->ptr);
|
|
|
|
|
upb_jit_decode(d); |
|
|
|
|
//fprintf(stderr, "Exiting JIT, parsed %zd bytes\n", d->ptr - before);
|
|
|
|
|
//fprintf(stderr, "ptr: %p, effective_end: %p, jit_end: %p, effective_end-ptr=%d\n",
|
|
|
|
|
// d->ptr, d->effective_end, d->jit_end, d->effective_end - d->ptr);
|
|
|
|
|
} |
|
|
|
|
upb_jit_decode(d); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Parse/handle tag.
|
|
|
|
|
uint32_t tag; |
|
|
|
|
if (!upb_decode_tag(d, &tag)) { |
|
|
|
|
if (status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { |
|
|
|
|
// Normal end-of-file.
|
|
|
|
|
upb_clearerr(status); |
|
|
|
|
upb_dispatch_endmsg(&d->dispatcher, status); |
|
|
|
|
return; |
|
|
|
|
} else { |
|
|
|
|
if (status->code == UPB_EOF) { |
|
|
|
|
upb_seterr(status, UPB_ERROR, |
|
|
|
|
"Input ended in the middle of a submessage."); |
|
|
|
|
} |
|
|
|
|
goto err; |
|
|
|
|
} |
|
|
|
|
INLINE upb_fieldent *upb_decode_tag(upb_decoder *d) { |
|
|
|
|
while (1) { |
|
|
|
|
uint32_t tag = upb_decode_varint32(d, false); |
|
|
|
|
upb_fieldent *f = upb_dispatcher_lookup(&d->dispatcher, tag); |
|
|
|
|
if (f) { |
|
|
|
|
d->f = f; |
|
|
|
|
return f; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Decode wire data. Hopefully this branch will predict pretty well
|
|
|
|
|
// since most types will read a varint here.
|
|
|
|
|
upb_value val; |
|
|
|
|
uint8_t wire_type = tag & 0x7; |
|
|
|
|
switch (wire_type) { |
|
|
|
|
case UPB_WIRE_TYPE_START_GROUP: |
|
|
|
|
break; // Nothing to do now, below we will push appropriately.
|
|
|
|
|
case UPB_WIRE_TYPE_END_GROUP: |
|
|
|
|
// Strictly speaking we should also check the field number here.
|
|
|
|
|
if(d->dispatcher.top->f->type != UPB_TYPE(GROUP)) { |
|
|
|
|
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); |
|
|
|
|
goto err; |
|
|
|
|
} |
|
|
|
|
CHECK_FLOW(upb_pop(d)); |
|
|
|
|
continue; // We have no value to dispatch.
|
|
|
|
|
case UPB_WIRE_TYPE_VARINT: |
|
|
|
|
switch (tag & 0x7) { |
|
|
|
|
case UPB_WIRE_TYPE_VARINT: upb_decode_varint(d); break; |
|
|
|
|
case UPB_WIRE_TYPE_32BIT: upb_decoder_advance(d, 4); break; |
|
|
|
|
case UPB_WIRE_TYPE_64BIT: upb_decoder_advance(d, 8); break; |
|
|
|
|
case UPB_WIRE_TYPE_DELIMITED: |
|
|
|
|
// For the delimited case we are parsing the length.
|
|
|
|
|
CHECK(upb_decode_varint(d, &val)); |
|
|
|
|
break; |
|
|
|
|
case UPB_WIRE_TYPE_32BIT: |
|
|
|
|
CHECK(upb_decode_fixed(d, 4, &val)); |
|
|
|
|
break; |
|
|
|
|
case UPB_WIRE_TYPE_64BIT: |
|
|
|
|
CHECK(upb_decode_fixed(d, 8, &val)); |
|
|
|
|
upb_decoder_advance(d, upb_decode_varint32(d, true)); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
// TODO: deliver to unknown field callback.
|
|
|
|
|
while (d->ptr >= d->submsg_end) upb_delimend(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Look up field by tag number.
|
|
|
|
|
upb_dispatcher_field *f = upb_dispatcher_lookup(&d->dispatcher, tag); |
|
|
|
|
|
|
|
|
|
if (!f) { |
|
|
|
|
if (wire_type == UPB_WIRE_TYPE_DELIMITED) |
|
|
|
|
CHECK(upb_decode_string(d, &val, &d->tmp)); |
|
|
|
|
// TODO.
|
|
|
|
|
CHECK_FLOW(upb_dispatch_unknownval(&d->dispatcher, 0, UPB_NO_VALUE)); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
// Have to handle both packed and non-packed sequences of primitives.
|
|
|
|
|
//if (d->dispatcher.top->end_offset == UPB_REPEATEDEND && d->f != f) {
|
|
|
|
|
// upb_dispatch_endseq(&d->dispatcher);
|
|
|
|
|
//} else if (f->is_repeated_primitive) {
|
|
|
|
|
// if ((tag & 0x7) == UPB_WIRE_TYPE_DELIMITED) {
|
|
|
|
|
// upb_pushseq(d, f, upb_decode_varint32(d, true) + (d->ptr - d->buf));
|
|
|
|
|
// } else if (d->f != f) {
|
|
|
|
|
// upb_dispatch_startseq(d, f, UPB_REPEATEDEND);
|
|
|
|
|
// }
|
|
|
|
|
//}
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Perform any further massaging of the data now that we have the field's
|
|
|
|
|
// type. Now we can distinguish strings from submessages, and we know
|
|
|
|
|
// about zig-zag-encoded types.
|
|
|
|
|
// TODO: handle packed encoding.
|
|
|
|
|
// TODO: if we were being paranoid, we could check for 32-bit-varint types
|
|
|
|
|
// that the top 32 bits all match the highest bit of the low 32 bits.
|
|
|
|
|
// If this is not true we are losing data. But the main protobuf library
|
|
|
|
|
// doesn't check this, and it would slow us down, so pass for now.
|
|
|
|
|
switch (f->type) { |
|
|
|
|
case UPB_TYPE(GROUP): |
|
|
|
|
CHECK_FLOW(upb_push(d, f, UINT32_MAX)); |
|
|
|
|
continue; // We have no value to dispatch.
|
|
|
|
|
case UPB_TYPE(MESSAGE): |
|
|
|
|
CHECK_FLOW(upb_push(d, f, upb_value_getuint32(val) + (d->ptr - d->buf))); |
|
|
|
|
continue; // We have no value to dispatch.
|
|
|
|
|
case UPB_TYPE(STRING): |
|
|
|
|
case UPB_TYPE(BYTES): |
|
|
|
|
CHECK(upb_decode_string(d, &val, &d->tmp)); |
|
|
|
|
break; |
|
|
|
|
case UPB_TYPE(SINT32): |
|
|
|
|
upb_value_setint32(&val, upb_zzdec_32(upb_value_getint32(val))); |
|
|
|
|
break; |
|
|
|
|
case UPB_TYPE(SINT64): |
|
|
|
|
upb_value_setint64(&val, upb_zzdec_64(upb_value_getint64(val))); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
#ifndef NDEBUG |
|
|
|
|
val.type = upb_types[f->type].inmemory_type; |
|
|
|
|
#endif |
|
|
|
|
break; // Other types need no further processing at this point.
|
|
|
|
|
} |
|
|
|
|
CHECK_FLOW(upb_dispatch_value(&d->dispatcher, f, val)); |
|
|
|
|
void upb_decoder_onexit(upb_decoder *d) { |
|
|
|
|
if (d->status->code == UPB_EOF && upb_dispatcher_stackempty(&d->dispatcher)) { |
|
|
|
|
// Normal end-of-file.
|
|
|
|
|
upb_clearerr(d->status); |
|
|
|
|
upb_dispatch_endmsg(&d->dispatcher, d->status); |
|
|
|
|
} else { |
|
|
|
|
if (d->status->code == UPB_EOF) |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Input ended mid-submessage."); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
callback_err: |
|
|
|
|
if (upb_ok(status)) { |
|
|
|
|
upb_seterr(status, UPB_ERROR, "Callback returned UPB_BREAK"); |
|
|
|
|
void upb_decoder_decode(upb_decoder *d, upb_status *status) { |
|
|
|
|
if (sigsetjmp(d->exitjmp, 0)) { |
|
|
|
|
upb_decoder_onexit(d); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
d->status = status; |
|
|
|
|
upb_pullbuf(d, true); |
|
|
|
|
upb_dispatch_startmsg(&d->dispatcher); |
|
|
|
|
while(1) { // Main loop: executed once per tag/field pair.
|
|
|
|
|
while (d->ptr >= d->submsg_end) upb_delimend(d); |
|
|
|
|
upb_decoder_enterjit(d); |
|
|
|
|
// if (!d->dispatcher.top->is_packed)
|
|
|
|
|
upb_fieldent *f = upb_decode_tag(d); |
|
|
|
|
f->decode(d, f); |
|
|
|
|
} |
|
|
|
|
err: |
|
|
|
|
assert(!upb_ok(status)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { |
|
|
|
@ -363,9 +335,38 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) { |
|
|
|
|
if (d->dispatcher.handlers->should_jit) upb_decoder_makejit(d); |
|
|
|
|
#endif |
|
|
|
|
d->bufstr = NULL; |
|
|
|
|
d->buf = NULL; |
|
|
|
|
d->tmp = NULL; |
|
|
|
|
upb_string_recycle(&d->tmp); |
|
|
|
|
|
|
|
|
|
// Set function pointers for each field's decode function.
|
|
|
|
|
for (int i = 0; i < handlers->msgs_len; i++) { |
|
|
|
|
upb_msgent *m = &handlers->msgs[i]; |
|
|
|
|
for(upb_inttable_iter i = upb_inttable_begin(&m->fieldtab); !upb_inttable_done(i); |
|
|
|
|
i = upb_inttable_next(&m->fieldtab, i)) { |
|
|
|
|
upb_fieldent *f = upb_inttable_iter_value(i); |
|
|
|
|
switch (f->type) { |
|
|
|
|
case UPB_TYPE(INT32): f->decode = &upb_decode_INT32; break; |
|
|
|
|
case UPB_TYPE(INT64): f->decode = &upb_decode_INT64; break; |
|
|
|
|
case UPB_TYPE(UINT32): f->decode = &upb_decode_UINT32; break; |
|
|
|
|
case UPB_TYPE(UINT64): f->decode = &upb_decode_UINT64; break; |
|
|
|
|
case UPB_TYPE(FIXED32): f->decode = &upb_decode_FIXED32; break; |
|
|
|
|
case UPB_TYPE(FIXED64): f->decode = &upb_decode_FIXED64; break; |
|
|
|
|
case UPB_TYPE(SFIXED32): f->decode = &upb_decode_SFIXED32; break; |
|
|
|
|
case UPB_TYPE(SFIXED64): f->decode = &upb_decode_SFIXED64; break; |
|
|
|
|
case UPB_TYPE(BOOL): f->decode = &upb_decode_BOOL; break; |
|
|
|
|
case UPB_TYPE(ENUM): f->decode = &upb_decode_ENUM; break; |
|
|
|
|
case UPB_TYPE(DOUBLE): f->decode = &upb_decode_DOUBLE; break; |
|
|
|
|
case UPB_TYPE(FLOAT): f->decode = &upb_decode_FLOAT; break; |
|
|
|
|
case UPB_TYPE(SINT32): f->decode = &upb_decode_SINT32; break; |
|
|
|
|
case UPB_TYPE(SINT64): f->decode = &upb_decode_SINT64; break; |
|
|
|
|
case UPB_TYPE(STRING): f->decode = &upb_decode_STRING; break; |
|
|
|
|
case UPB_TYPE(BYTES): f->decode = &upb_decode_STRING; break; |
|
|
|
|
case UPB_TYPE(GROUP): f->decode = &upb_decode_GROUP; break; |
|
|
|
|
case UPB_TYPE(MESSAGE): f->decode = &upb_decode_MESSAGE; break; |
|
|
|
|
case UPB_TYPE_ENDGROUP: f->decode = &upb_endgroup; break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) { |
|
|
|
|