|
|
|
@ -13,23 +13,24 @@ |
|
|
|
|
|
|
|
|
|
/* Pure Decoding **************************************************************/ |
|
|
|
|
|
|
|
|
|
// The key fast-path varint-decoding routine. There are a lot of possibilities
|
|
|
|
|
// for optimization/experimentation here.
|
|
|
|
|
INLINE bool upb_decode_varint_fast(uint8_t **buf, uint8_t *end, uint64_t &val, |
|
|
|
|
// The key fast-path varint-decoding routine. Here we can assume we have at
|
|
|
|
|
// least UPB_MAX_ENCODED_SIZE bytes available. There are a lot of
|
|
|
|
|
// possibilities for optimization/experimentation here.
|
|
|
|
|
INLINE bool upb_decode_varint_fast(uint8_t **ptr, uint64_t &val, |
|
|
|
|
upb_status *status) { |
|
|
|
|
*high = 0; |
|
|
|
|
uint32_t b; |
|
|
|
|
uint8_t *ptr = p->ptr; |
|
|
|
|
b = *(*buf++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *low |= (b & 0x7f) << 28; |
|
|
|
|
b = *(*ptr++); *low = (b & 0x7f) ; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *low |= (b & 0x7f) << 7; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *low |= (b & 0x7f) << 14; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *low |= (b & 0x7f) << 21; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *low |= (b & 0x7f) << 28; |
|
|
|
|
*high = (b & 0x7f) >> 3; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*buf++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *high |= (b & 0x7f) << 4; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *high |= (b & 0x7f) << 11; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *high |= (b & 0x7f) << 18; if(!(b & 0x80)) goto done; |
|
|
|
|
b = *(*ptr++); *high |= (b & 0x7f) << 25; if(!(b & 0x80)) goto done; |
|
|
|
|
|
|
|
|
|
upb_seterr(status, UPB_ERROR, "Unterminated varint"); |
|
|
|
|
return false; |
|
|
|
@ -71,23 +72,51 @@ struct upb_decoder { |
|
|
|
|
// Current input buffer.
|
|
|
|
|
upb_string *buf; |
|
|
|
|
|
|
|
|
|
// Our current offset *within* buf.
|
|
|
|
|
upb_strlen_t buf_offset; |
|
|
|
|
|
|
|
|
|
// The offset within the overall stream represented by the *beginning* of buf.
|
|
|
|
|
upb_strlen_t buf_stream_offset; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
// Called only from the slow path, this function copies the next "len" bytes
|
|
|
|
|
// from the stream to "data", adjusting "buf" and "end" appropriately.
|
|
|
|
|
INLINE bool upb_getbuf(upb_decoder *d, void *data, size_t len, |
|
|
|
|
uint8_t **buf, uint8_t **end) { |
|
|
|
|
while (len > 0) { |
|
|
|
|
memcpy(data, *buf, *end-*buf); |
|
|
|
|
len -= (*end-*buf); |
|
|
|
|
// from the stream to "data", adjusting "buf" and "len" appropriately.
|
|
|
|
|
static bool upb_getbuf(upb_decoder *d, void *data, size_t bytes_wanted, |
|
|
|
|
uint8_t **ptr, size_t *len) { |
|
|
|
|
while (1) { |
|
|
|
|
memcpy(data, *ptr, *len); |
|
|
|
|
bytes_wanted -= *len; |
|
|
|
|
*ptr += *len; |
|
|
|
|
if (bytes_wanted == 0) return true; |
|
|
|
|
|
|
|
|
|
// Did "len" indicate end-of-submessage or end-of-buffer?
|
|
|
|
|
size_t buf_offset = d->buf ? (*ptr - upb_string_getrobuf(d->buf)) : 0; |
|
|
|
|
if (d->top->end_offset > 0 && |
|
|
|
|
d->top->end_offset == d->buf_stream_offset + buf_offset) { |
|
|
|
|
// End-of-submessage.
|
|
|
|
|
if (bytes_wanted > 0) { |
|
|
|
|
upb_seterr(d->status, UPB_ERROR, "Bad submessage end.") |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
if (upb_pop(d) != UPB_CONTINUE) return false; |
|
|
|
|
} else { |
|
|
|
|
// End-of-buffer.
|
|
|
|
|
if (d->buf) d->buf_stream_offset += upb_string_len(d->buf); |
|
|
|
|
if (!upb_bytesrc_getstr(d->bytesrc, d->buf, d->status)) return false; |
|
|
|
|
*buf = upb_string_getrobuf(d->buf); |
|
|
|
|
*end = *buf + upb_string_len(d->buf); |
|
|
|
|
*ptr = upb_string_getrobuf(d->buf); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Wait for end-of-submessage or end-of-buffer, whichever comes first.
|
|
|
|
|
size_t offset_in_buf = *ptr - upb_string_getrobuf(d->buf); |
|
|
|
|
size_t buf_remaining = upb_string_getbufend(d->buf) - *ptr; |
|
|
|
|
size_t submsg_remaining = |
|
|
|
|
d->top->end_offset - d->buf_stream_offset - offset_in_buf; |
|
|
|
|
if (d->top->end_offset == UPB_GROUP_END_OFFSET || |
|
|
|
|
buf_remaining > submsg_remaining) { |
|
|
|
|
*len = buf_remaining; |
|
|
|
|
} else { |
|
|
|
|
// Check that non of our subtraction overflowed.
|
|
|
|
|
assert(d->top->end_offset > d->buf_stream_offset); |
|
|
|
|
assert(d->top->end_offset - d->buf_stream_offset > offset_in_buf); |
|
|
|
|
*len = submsg_remaining; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -112,21 +141,21 @@ static bool upb_decode_varint_slow(upb_decoder *d) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_buf, |
|
|
|
|
const uint8_t **end, upb_tag *tag) { |
|
|
|
|
const uint8_t *buf = *_buf, *end = *_end; |
|
|
|
|
INLINE bool upb_decode_tag(upb_decoder *d, const uint8_t **_ptr, |
|
|
|
|
const uint8_t **len, upb_tag *tag) { |
|
|
|
|
const uint8_t *ptr = *_ptr, *len = *_end; |
|
|
|
|
uint32_t tag_int; |
|
|
|
|
// Nearly all tag varints will be either 1 byte (1-16) or 2 bytes (17-2048).
|
|
|
|
|
if (end - buf < 2) goto slow; // unlikely.
|
|
|
|
|
tag_int = *buf & 0x7f; |
|
|
|
|
if ((*(buf++) & 0x80) == 0) goto done; // predictable if fields are in order
|
|
|
|
|
tag_int |= (*buf & 0x7f) << 7; |
|
|
|
|
if ((*(buf++) & 0x80) != 0) goto slow; // unlikely.
|
|
|
|
|
if (len - ptr < 2) goto slow; // unlikely.
|
|
|
|
|
tag_int = *ptr & 0x7f; |
|
|
|
|
if ((*(ptr++) & 0x80) == 0) goto done; // predictable if fields are in order
|
|
|
|
|
tag_int |= (*ptr & 0x7f) << 7; |
|
|
|
|
if ((*(ptr++) & 0x80) != 0) goto slow; // unlikely.
|
|
|
|
|
slow: |
|
|
|
|
if (!upb_decode_varint_slow(d, _buf, _end)) return false; |
|
|
|
|
buf = *_buf; // Trick the next line into not overwriting us.
|
|
|
|
|
if (!upb_decode_varint_slow(d, _ptr, _end)) return false; |
|
|
|
|
ptr = *_ptr; // Trick the next line into not overwriting us.
|
|
|
|
|
done: |
|
|
|
|
*_buf = buf; |
|
|
|
|
*_ptr = ptr; |
|
|
|
|
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07); |
|
|
|
|
tag->field_number = tag_int >> 3; |
|
|
|
|
return true; |
|
|
|
@ -134,22 +163,22 @@ done: |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_varint(upb_decoder *d, ptrs *p, |
|
|
|
|
uint32_t *low, uint32_t *high) { |
|
|
|
|
if (p->end - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) |
|
|
|
|
if (p->len - p->ptr >= UPB_MAX_VARINT_ENCODED_SIZE) |
|
|
|
|
return upb_decode_varint_fast(d); |
|
|
|
|
else |
|
|
|
|
return upb_decode_varint_slow(d); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, |
|
|
|
|
uint8_t **buf, uint8_t **end, upb_value *val) { |
|
|
|
|
uint8_t **ptr, uint8_t **len, upb_value *val) { |
|
|
|
|
static const char table = {0, 8, 0, 0, 0, 4}; |
|
|
|
|
size_t bytes = table[wt]; |
|
|
|
|
if (*end - *buf >= bytes) { |
|
|
|
|
if (*len - *ptr >= bytes) { |
|
|
|
|
// Common (fast) case.
|
|
|
|
|
memcpy(&val, *buf, bytes); |
|
|
|
|
*buf += bytes; |
|
|
|
|
memcpy(&val, *ptr, bytes); |
|
|
|
|
*ptr += bytes; |
|
|
|
|
} else { |
|
|
|
|
if (!upb_getbuf(d, &val, bytes, buf, end)) return false; |
|
|
|
|
if (!upb_getptr(d, &val, bytes, ptr, len)) return false; |
|
|
|
|
} |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
@ -159,12 +188,12 @@ INLINE bool upb_decode_fixed(upb_decoder *d, upb_wire_type_t wt, |
|
|
|
|
INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) { |
|
|
|
|
upb_string_recycle(str); |
|
|
|
|
upb_strlen_t len = upb_valu_getint32(*val); |
|
|
|
|
if (*end - *buf >= len) { |
|
|
|
|
if (*len - *ptr >= len) { |
|
|
|
|
// Common (fast) case.
|
|
|
|
|
upb_string_substr(*str, d->buf, *buf - upb_string_getrobuf(d->buf), len); |
|
|
|
|
*buf += len; |
|
|
|
|
upb_string_substr(*str, d->buf, *ptr - upb_string_getrobuf(d->buf), len); |
|
|
|
|
*ptr += len; |
|
|
|
|
} else { |
|
|
|
|
if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, buf, end)) |
|
|
|
|
if (!upb_getbuf(d, upb_string_getrwbuf(*str, len), len, ptr, len)) |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
return true; |
|
|
|
@ -173,19 +202,6 @@ INLINE bool upb_decode_string(upb_decoder *d, upb_value *val, upb_string **str) |
|
|
|
|
|
|
|
|
|
/* The main decoding loop *****************************************************/ |
|
|
|
|
|
|
|
|
|
static const void *get_msgend(upb_decoder *d) |
|
|
|
|
{ |
|
|
|
|
if(d->top->end_offset > 0) |
|
|
|
|
return upb_string_getrobuf(d->buf) + (d->top->end_offset - d->buf_stream_offset); |
|
|
|
|
else |
|
|
|
|
return (void*)UINTPTR_MAX; // group.
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool isgroup(const void *submsg_end) |
|
|
|
|
{ |
|
|
|
|
return submsg_end == (void*)UINTPTR_MAX; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
extern upb_wire_type_t upb_expected_wire_types[]; |
|
|
|
|
// Returns true if wt is the correct on-the-wire type for ft.
|
|
|
|
|
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { |
|
|
|
@ -193,76 +209,78 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { |
|
|
|
|
return upb_types[ft].expected_wire_type == wt; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool upb_push(upb_decoder *d, const uint8_t *start, |
|
|
|
|
uint32_t submsg_len, upb_fielddef *f, |
|
|
|
|
upb_status *status) |
|
|
|
|
{ |
|
|
|
|
static upb_flow_t upb_push(upb_decoder *d, upb_fielddef *f, |
|
|
|
|
upb_strlen_t submsg_len, upb_field_type_t type) { |
|
|
|
|
d->top->field = f; |
|
|
|
|
d->top++; |
|
|
|
|
if(d->top >= d->limit) { |
|
|
|
|
upb_seterr(status, UPB_ERROR, "Nesting too deep."); |
|
|
|
|
return false; |
|
|
|
|
return UPB_ERROR; |
|
|
|
|
} |
|
|
|
|
d->top->end_offset = d->completed_offset + submsg_len; |
|
|
|
|
d->top->end_offset = type == UPB_TYPE(GROUP) ? |
|
|
|
|
UPB_GROUP_END_OFFSET : d->completed_offset + submsg_len; |
|
|
|
|
d->top->msgdef = upb_downcast_msgdef(f->def); |
|
|
|
|
*submsg_end = get_msgend(d); |
|
|
|
|
if (!upb_dispatch_startsubmsg(&d->dispatcher, f)) return false; |
|
|
|
|
return true; |
|
|
|
|
return upb_dispatch_startsubmsg(&d->dispatcher, f); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool upb_pop(upb_decoder *d, const uint8_t *start, upb_status *status) |
|
|
|
|
{ |
|
|
|
|
static upb_flow_t upb_pop(upb_decoder *d) { |
|
|
|
|
d->top--; |
|
|
|
|
upb_dispatch_endsubmsg(&d->dispatcher); |
|
|
|
|
*submsg_end = get_msgend(d); |
|
|
|
|
return true; |
|
|
|
|
return upb_dispatch_endsubmsg(&d->dispatcher); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void upb_decoder_run(upb_src *src, upb_status *status) { |
|
|
|
|
// buf is our current offset, moves from start to end.
|
|
|
|
|
const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str) + d->buf_offset; |
|
|
|
|
const uint8_t *end = (uint8_t*)upb_string_getrobuf(str) + upb_string_len(str); |
|
|
|
|
const uint8_t *submsg_end = get_msgend(d, start); |
|
|
|
|
upb_msgdef *msgdef = d->top->msgdef; |
|
|
|
|
// We use stack variables for our frequently used vars so the compiler knows
|
|
|
|
|
// they can't be changed by external code (like when we dispatch a callback).
|
|
|
|
|
|
|
|
|
|
// Our current position in the data buffer.
|
|
|
|
|
uint8_t *ptr = NULL; |
|
|
|
|
// Number of bytes available at ptr, until either end-of-buf or
|
|
|
|
|
// end-of-submessage (whichever is smaller).
|
|
|
|
|
size_t len = 0; |
|
|
|
|
|
|
|
|
|
upb_string *str = NULL; |
|
|
|
|
|
|
|
|
|
upb_dispatch_startmsg(&d->dispatcher); |
|
|
|
|
// TODO: handle UPB_SKIPSUBMSG
|
|
|
|
|
#define CHECK_FLOW(expr) if ((expr) != UPB_CONTINUE) goto err |
|
|
|
|
#define CHECK(expr) if (!expr) goto err; |
|
|
|
|
|
|
|
|
|
CHECK_FLOW(upb_dispatch_startmsg(&d->dispatcher)); |
|
|
|
|
|
|
|
|
|
// Main loop: executed once per tag/field pair.
|
|
|
|
|
while(1) { |
|
|
|
|
// Parse/handle tag.
|
|
|
|
|
upb_tag tag; |
|
|
|
|
CHECK(upb_decode_tag(d, &buf, &end, &tag)); |
|
|
|
|
CHECK(upb_decode_tag(d, &ptr, &len, &tag)); |
|
|
|
|
|
|
|
|
|
// Decode wire data. Hopefully this branch will predict pretty well
|
|
|
|
|
// since most types will read a varint here.
|
|
|
|
|
upb_value val; |
|
|
|
|
switch (tag.wire_type) { |
|
|
|
|
case UPB_WIRE_TYPE_END_GROUP: |
|
|
|
|
if(!isgroup(submsg_end)) { |
|
|
|
|
if(d->top->end_offset != UPB_GROUP_END_OFFSET) |
|
|
|
|
upb_seterr(status, UPB_ERROR, "Unexpected END_GROUP tag."); |
|
|
|
|
goto err; |
|
|
|
|
} |
|
|
|
|
CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); |
|
|
|
|
goto check_msgend; // We have no value to dispatch.
|
|
|
|
|
CHECK_FLOW(upb_pop(d)); |
|
|
|
|
continue; // We have no value to dispatch.
|
|
|
|
|
case UPB_WIRE_TYPE_VARINT: |
|
|
|
|
case UPB_WIRE_TYPE_DELIMITED: |
|
|
|
|
// For the delimited case we are parsing the length.
|
|
|
|
|
CHECK(upb_decode_varint(d, &buf, &end, &val)); |
|
|
|
|
CHECK(upb_decode_varint(d, &ptr, &len, &val)); |
|
|
|
|
break; |
|
|
|
|
case UPB_WIRE_TYPE_32BIT: |
|
|
|
|
case UPB_WIRE_TYPE_64BIT: |
|
|
|
|
CHECK(upb_decode_fixed(d, tag.wire_type, &buf, &end, &val)); |
|
|
|
|
CHECK(upb_decode_fixed(d, tag.wire_type, &ptr, &len, &val)); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Look up field by tag number.
|
|
|
|
|
upb_fielddef *f = upb_msg_itof(msgdef, tag.field_number); |
|
|
|
|
upb_fielddef *f = upb_msg_itof(d->top->msgdef, tag.field_number); |
|
|
|
|
|
|
|
|
|
if (!f) { |
|
|
|
|
if (tag.wire_type == UPB_WIRE_TYPE_DELIMITED) |
|
|
|
|
CHECK(upb_decode_string(d, &val, &str)); |
|
|
|
|
CHECK(upb_dispatch_unknownval(d, tag.field_number, val)); |
|
|
|
|
CHECK_FLOW(upb_dispatch_unknownval(d, tag.field_number, val)); |
|
|
|
|
} else if (!upb_check_type(tag.wire_type, f->type)) { |
|
|
|
|
// TODO: put more details in this error msg.
|
|
|
|
|
upb_seterr(status, UPB_ERROR, "Field had incorrect type."); |
|
|
|
@ -280,8 +298,8 @@ void upb_decoder_run(upb_src *src, upb_status *status) { |
|
|
|
|
switch (f->type) { |
|
|
|
|
case UPB_TYPE(MESSAGE): |
|
|
|
|
case UPB_TYPE(GROUP): |
|
|
|
|
CHECK(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); |
|
|
|
|
goto check_msgend; // We have no value to dispatch.
|
|
|
|
|
CHECK_FLOW(upb_push(d, start, upb_value_getint32(val), f, status, &msgdef)); |
|
|
|
|
continue; // We have no value to dispatch.
|
|
|
|
|
case UPB_TYPE(STRING): |
|
|
|
|
case UPB_TYPE(BYTES): |
|
|
|
|
CHECK(upb_decode_string(d, &val, &str)); |
|
|
|
@ -295,19 +313,10 @@ void upb_decoder_run(upb_src *src, upb_status *status) { |
|
|
|
|
default: |
|
|
|
|
break; // Other types need no further processing at this point.
|
|
|
|
|
} |
|
|
|
|
CHECK(upb_dispatch_value(d->sink, f, val, status)); |
|
|
|
|
|
|
|
|
|
check_msgend: |
|
|
|
|
while(buf >= submsg_end) { |
|
|
|
|
if(buf > submsg_end) { |
|
|
|
|
upb_seterr(status, UPB_ERROR, "Bad submessage end.") |
|
|
|
|
goto err; |
|
|
|
|
} |
|
|
|
|
CHECK(upb_pop(d, start, status, &msgdef, &submsg_end)); |
|
|
|
|
} |
|
|
|
|
CHECK_FLOW(upb_dispatch_value(d->sink, f, val, status)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CHECK(upb_dispatch_endmsg(&d->dispatcher)); |
|
|
|
|
CHECK_FLOW(upb_dispatch_endmsg(&d->dispatcher)); |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
err: |
|
|
|
|