Major revision to upb_parse.

pull/13171/head
Joshua Haberman 16 years ago
parent 9fcddef19f
commit f813688f3e
  1. 14
      tests.c
  2. 21
      upb.h
  3. 304
      upb_parse.c
  4. 66
      upb_parse.h

@ -10,7 +10,7 @@ void test_get_v_uint64_t()
uint8_t zero[] = {0x00};
void *zero_buf = zero;
uint64_t zero_val = 0;
status = get_v_uint64_t(&zero_buf, sizeof(zero), &zero_val);
status = get_v_uint64_t(&zero_buf, zero + sizeof(zero), &zero_val);
assert(status == UPB_STATUS_OK);
assert(zero_val == 0);
assert(zero_buf == zero + sizeof(zero));
@ -18,7 +18,7 @@ void test_get_v_uint64_t()
uint8_t one[] = {0x01};
void *one_buf = one;
uint64_t one_val = 0;
status = get_v_uint64_t(&one_buf, sizeof(one), &one_val);
status = get_v_uint64_t(&one_buf, one + sizeof(one), &one_val);
assert(status == UPB_STATUS_OK);
assert(one_val == 1);
assert(one_buf == one + sizeof(one));
@ -26,7 +26,7 @@ void test_get_v_uint64_t()
uint8_t twobyte[] = {0xAC, 0x02};
void *twobyte_buf = twobyte;
uint64_t twobyte_val = 0;
status = get_v_uint64_t(&twobyte_buf, sizeof(twobyte), &twobyte_val);
status = get_v_uint64_t(&twobyte_buf, twobyte + sizeof(twobyte), &twobyte_val);
assert(status == UPB_STATUS_OK);
assert(twobyte_val == 300);
assert(twobyte_buf == twobyte + sizeof(twobyte));
@ -34,7 +34,7 @@ void test_get_v_uint64_t()
uint8_t tenbyte[] = {0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x7F};
void *tenbyte_buf = tenbyte;
uint64_t tenbyte_val = 0;
status = get_v_uint64_t(&tenbyte_buf, sizeof(tenbyte), &tenbyte_val);
status = get_v_uint64_t(&tenbyte_buf, tenbyte + sizeof(tenbyte), &tenbyte_val);
assert(status == UPB_STATUS_OK);
assert(tenbyte_val == 0x89101c305080c101);
assert(tenbyte_buf == tenbyte + sizeof(tenbyte));
@ -42,12 +42,12 @@ void test_get_v_uint64_t()
uint8_t elevenbyte[] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01};
void *elevenbyte_buf = elevenbyte;
uint64_t elevenbyte_val = 0;
status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte), &elevenbyte_val);
status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte), &elevenbyte_val);
assert(status == UPB_ERROR_UNTERMINATED_VARINT);
status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte)-1, &elevenbyte_val);
status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-1, &elevenbyte_val);
/* Byte 10 is 0x80, so we know it's unterminated. */
assert(status == UPB_ERROR_UNTERMINATED_VARINT);
status = get_v_uint64_t(&elevenbyte_buf, sizeof(elevenbyte)-2, &elevenbyte_val);
status = get_v_uint64_t(&elevenbyte_buf, elevenbyte + sizeof(elevenbyte)-2, &elevenbyte_val);
assert(status == UPB_STATUS_NEED_MORE_DATA);
}

21
upb.h

@ -16,6 +16,15 @@
extern "C" {
#endif
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
/* inline if possible, emit standalone code if required. */
#define INLINE static inline
@ -113,6 +122,9 @@ typedef enum upb_status {
// The input byte stream ended in the middle of a record.
UPB_STATUS_NEED_MORE_DATA = 1,
// The user value callback opted to stop parsing.
UPB_STATUS_USER_CANCELLED = 2,
// A varint did not terminate before hitting 64 bits.
UPB_ERROR_UNTERMINATED_VARINT = -1,
@ -126,9 +138,16 @@ typedef enum upb_status {
UPB_ERROR_OVERFLOW = -4,
// An "end group" tag was encountered in an inappropriate place.
UPB_ERROR_SPURIOUS_END_GROUP = -5
UPB_ERROR_SPURIOUS_END_GROUP = -5,
UPB_ERROR_ILLEGAL = -6
} upb_status_t;
#define UPB_CHECK(func) do { \
upb_status_t status = func; \
if(status != UPB_STATUS_OK) return status; \
} while (0)
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -12,112 +12,92 @@
#include <string.h>
#include "descriptor.h"
/* Branch prediction hints for GCC. */
#ifdef __GNUC__
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#define CHECK(func) do { \
upb_status_t status = func; \
if(status != UPB_STATUS_OK) return status; \
} while (0)
/* Lowest-level functions -- these read integers from the input buffer. */
static size_t min(size_t a, size_t b) { return a < b ? a : b; }
static void *check_end(uint8_t *buf, void *end, size_t maxlen,
upb_status_t *bound_error)
{
void *maxend = buf + maxlen;
if(end < maxend) {
*bound_error = UPB_STATUS_NEED_MORE_DATA;
return end;
} else {
*bound_error = UPB_ERROR_UNTERMINATED_VARINT;
return maxend;
}
}
static upb_status_t get_v_uint64_t(void *restrict *buf, size_t len,
static upb_status_t get_v_uint64_t(void *restrict *buf, void *end,
uint64_t *restrict val)
{
uint32_t bitpos, bytes = min(len, 10);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
upb_status_t bound_error;
end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */
uint8_t last = 0x80;
*val = 0;
for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7)
for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7)
*val |= ((uint64_t)((last = *b) & 0x7F)) << bitpos;
if(unlikely(last & 0x80)) {
return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
if(unlikely(last & 0x80)) return bound_error;
*buf = b;
return UPB_STATUS_OK;
}
static upb_status_t skip_v_uint64_t(void **buf, size_t len)
static upb_status_t skip_v_uint64_t(void **buf, void *end)
{
uint32_t bytes = min(len, 10);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
upb_status_t bound_error;
end = check_end(b, end, 10, &bound_error); /* 2**64 is a 10-byte varint. */
uint8_t last = 0x80;
for(; b < end && (last & 0x80); b++)
for(; b < (uint8_t*)end && (last & 0x80); b++)
last = *b;
if(unlikely(b == end)) {
return bytes < 10 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
if(unlikely(last & 0x80)) return bound_error;
*buf = b;
return UPB_STATUS_OK;
}
static upb_status_t get_v_uint32_t(void *restrict *buf, size_t len,
static upb_status_t get_v_uint32_t(void *restrict *buf, void *end,
uint32_t *restrict val)
{
uint32_t bitpos, bytes = min(len, 5);
uint8_t *b = *buf;
uint8_t *end = b + bytes;
upb_status_t bound_error;
end = check_end(b, end, 5, &bound_error); /* 2**32 is a 5-byte varint. */
uint8_t last = 0x80;
*val = 0;
for(bitpos = 0; b < end && (last & 0x80); b++, bitpos += 7)
for(int bitpos = 0; b < (uint8_t*)end && (last & 0x80); b++, bitpos += 7)
*val |= ((uint32_t)((last = *b) & 0x7F)) << bitpos;
if(unlikely(b == end)) {
return bytes < 5 ? UPB_STATUS_NEED_MORE_DATA : UPB_ERROR_UNTERMINATED_VARINT;
} else {
*buf = b;
return UPB_STATUS_OK;
}
if(unlikely(last & 0x80)) return bound_error;
*buf = b;
return UPB_STATUS_OK;
}
#define SHL(val, bits) ((uint32_t)val << bits)
static upb_status_t get_f_uint32_t(void *restrict *buf, size_t len,
static upb_status_t get_f_uint32_t(void *restrict *buf, void *end,
uint32_t *restrict val)
{
const uint8_t size = sizeof(uint32_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
uint8_t *b = *buf;
void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t);
if(unlikely(uint32_end > end)) return UPB_STATUS_NEED_MORE_DATA;
#if UPB_UNALIGNED_READS_OK
*val = *(uint32_t*)b;
#else
#define SHL(val, bits) ((uint32_t)val << bits)
*val = SHL(b[0], 0) | SHL(b[1], 8) | SHL(b[2], 16) | SHL(b[3], 24);
#endif
b += size;
*buf = b;
return UPB_STATUS_OK;
}
#undef SHL
static upb_status_t skip_f_uint32_t(void **buf, size_t len)
{
const uint8_t size = sizeof(uint32_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
*buf = (char*)*buf + size;
#endif
*buf = uint32_end;
return UPB_STATUS_OK;
}
static upb_status_t get_f_uint64_t(void *restrict *buf, size_t len,
static upb_status_t get_f_uint64_t(void *restrict *buf, void *end,
uint64_t *restrict val)
{
if(unlikely(len < sizeof(uint64_t))) return UPB_STATUS_NEED_MORE_DATA;
void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t);
if(unlikely(uint64_end > end)) return UPB_STATUS_NEED_MORE_DATA;
#if UPB_UNALIGNED_READS_OK
*val = *(uint64_t*)*buf;
*buf = (char*)*buf + sizeof(uint64_t);
*buf = uint64_end;
#else
uint32_t lo32, hi32;
get_f_uint32_t(buf, &lo32);
@ -127,11 +107,19 @@ static upb_status_t get_f_uint64_t(void *restrict *buf, size_t len,
return UPB_STATUS_OK;
}
static upb_status_t skip_f_uint64_t(void **buf, size_t len)
static upb_status_t skip_f_uint32_t(void **buf, void *end)
{
const uint8_t size = sizeof(uint64_t);
if(unlikely(len < size)) return UPB_STATUS_NEED_MORE_DATA;
*buf = (char*)*buf + size;
void *uint32_end = (uint8_t*)*buf + sizeof(uint32_t);
if(unlikely(uint32_end > end)) return UPB_STATUS_NEED_MORE_DATA;
*buf = uint32_end;
return UPB_STATUS_OK;
}
static upb_status_t skip_f_uint64_t(void **buf, void *end)
{
void *uint64_end = (uint8_t*)*buf + sizeof(uint64_t);
if(unlikely(uint64_end > end)) return UPB_STATUS_NEED_MORE_DATA;
*buf = uint64_end;
return UPB_STATUS_OK;
}
@ -145,9 +133,9 @@ static int64_t zz_decode_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
static void wvtov_ ## type(wire_t s, val_t *d)
#define GET(type, v_or_f, wire_t, val_t, member_name) \
static upb_status_t get_ ## type(void **buf, size_t len, val_t *d) { \
static upb_status_t get_ ## type(void **buf, void *end, val_t *d) { \
wire_t tmp; \
CHECK(get_ ## v_or_f ## _ ## wire_t(buf, len, &tmp)); \
UPB_CHECK(get_ ## v_or_f ## _ ## wire_t(buf, end, &tmp)); \
wvtov_ ## type(tmp, d); \
return UPB_STATUS_OK; \
}
@ -187,9 +175,6 @@ struct upb_type_info upb_type_info[] = {
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64] = {alignof(uint64_t), sizeof(uint64_t), UPB_WIRE_TYPE_64BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_32BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL] = {alignof(bool), sizeof(bool), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE] = {alignof(void*), sizeof(void*), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM] = {alignof(uint32_t), sizeof(uint32_t), UPB_WIRE_TYPE_VARINT},
@ -197,69 +182,51 @@ struct upb_type_info upb_type_info[] = {
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64]= {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_64BIT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32] = {alignof(int32_t), sizeof(int32_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64] = {alignof(int64_t), sizeof(int64_t), UPB_WIRE_TYPE_VARINT},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES] = {alignof(struct upb_string), sizeof(struct upb_string), UPB_WIRE_TYPE_DELIMITED},
[GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP] = {0,0,0},
};
upb_status_t upb_parse_tag(void **buf, size_t len, struct upb_tag *tag)
upb_status_t upb_parse_tag(void **buf, void *end, struct upb_tag *tag)
{
uint32_t tag_int;
CHECK(get_v_uint32_t(buf, len, &tag_int));
UPB_CHECK(get_v_uint32_t(buf, end, &tag_int));
tag->wire_type = (upb_wire_type_t)(tag_int & 0x07);
tag->field_number = tag_int >> 3;
return UPB_STATUS_OK;
}
upb_status_t upb_parse_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt,
upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt,
union upb_wire_value *wv)
{
#define READ(expr) CHECK(expr); *offset += ((char*)b-(char*)buf)
void *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT: READ(get_v_uint64_t(&b, len, &wv->varint)); break;
case UPB_WIRE_TYPE_64BIT: READ(get_f_uint64_t(&b, len, &wv->_64bit)); break;
case UPB_WIRE_TYPE_32BIT: READ(get_f_uint32_t(&b, len, &wv->_32bit)); break;
case UPB_WIRE_TYPE_DELIMITED:
READ(get_v_uint32_t(&b, len, &wv->_32bit));
size_t new_offset = *offset + wv->_32bit;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
*offset = new_offset;
break;
case UPB_WIRE_TYPE_START_GROUP:
case UPB_WIRE_TYPE_END_GROUP: break;
case UPB_WIRE_TYPE_VARINT: UPB_CHECK(get_v_uint64_t(buf, end, &wv->varint)); break;
case UPB_WIRE_TYPE_64BIT: UPB_CHECK(get_f_uint64_t(buf, end, &wv->_64bit)); break;
case UPB_WIRE_TYPE_32BIT: UPB_CHECK(get_f_uint32_t(buf, end, &wv->_32bit)); break;
default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
}
return UPB_STATUS_OK;
}
upb_status_t upb_skip_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt)
upb_status_t upb_skip_wire_value(void **buf, void *end, upb_wire_type_t wt)
{
void *b = buf;
switch(wt) {
case UPB_WIRE_TYPE_VARINT: READ(skip_v_uint64_t(&b, len)); break;
case UPB_WIRE_TYPE_64BIT: READ(skip_f_uint64_t(&b, len)); break;
case UPB_WIRE_TYPE_32BIT: READ(skip_f_uint32_t(&b, len)); break;
case UPB_WIRE_TYPE_DELIMITED: {
/* Have to get (not skip) the length to skip the bytes. */
uint32_t delim_len;
READ(get_v_uint32_t(&b, len, &delim_len));
size_t new_offset = *offset + delim_len;
if (new_offset < *offset) return UPB_ERROR_OVERFLOW;
*offset = new_offset;
break;
}
case UPB_WIRE_TYPE_VARINT: UPB_CHECK(skip_v_uint64_t(buf, end)); break;
case UPB_WIRE_TYPE_64BIT: UPB_CHECK(skip_f_uint64_t(buf, end)); break;
case UPB_WIRE_TYPE_32BIT: UPB_CHECK(skip_f_uint32_t(buf, end)); break;
case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
case UPB_WIRE_TYPE_END_GROUP: break;
default: return UPB_ERROR_ILLEGAL;
}
return UPB_STATUS_OK;
#undef READ
}
upb_status_t upb_parse_value(void **b, size_t len, upb_field_type_t ft,
upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft,
union upb_value *v)
{
#define CASE(t, member_name) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
return get_ ## t(b, len, &v->member_name);
return get_ ## t(buf, end, &v->member_name);
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
@ -275,11 +242,7 @@ upb_status_t upb_parse_value(void **b, size_t len, upb_field_type_t ft,
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
return get_INT32(b, len, &v->int32);
default: return 0; /* Including GROUP -- groups have no value. */
default: return UPB_ERROR_ILLEGAL;
}
#undef CASE
}
@ -291,8 +254,6 @@ void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size)
state->stack = state->top = malloc(stack_bytes);
state->limit = (struct upb_parse_stack_frame*)((char*)state->stack + stack_bytes);
state->udata_size = udata_size;
state->done = false;
state->packed_end_offset = 0;
}
void upb_parse_state_free(struct upb_parse_state *state)
@ -318,60 +279,89 @@ static upb_status_t push_stack_frame(struct upb_parse_state *s, size_t end,
return UPB_STATUS_OK;
}
#if 0
static upb_status_t parse_delimited(struct upb_parse_state *s,
struct upb_tag *tag,
void **buf, void *end,
size_t base_offset)
{
int32_t delim_len;
void *user_field_desc;
void *bufstart = *buf;
/* Whether we are parsing or skipping the field, we always need to parse
* the length. */
UPB_CHECK(get_INT32(buf, end, &delim_len));
upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc);
if(unlikely(*buf < bufstart)) return UPB_ERROR_OVERFLOW;
if(unlikely(*buf > end &&
ft != GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE)) {
/* Streaming submessages is ok, but for other delimited types (string,
* bytes, and packed arrays) we require that all the delimited data is
* available. This could be relaxed if desired. */
return UPB_STATUS_NEED_MORE_DATA;
}
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
UPB_CHECK(push_stack_frame(s, base_offset + delim_len, user_field_desc));
} else {
void *delim_end = (char*)*buf + delim_len;
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) {
struct upb_string str = {.data = *buf, .byte_len = delim_len};
s->str_cb(s, &str, user_field_desc);
*buf = delim_end;
} else {
/* Packed Array. */
while(*buf < delim_end)
UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc));
}
}
return UPB_STATUS_OK;
}
static upb_status_t parse_nondelimited(struct upb_parse_state *s,
struct upb_tag *tag,
void **buf, void *end)
{
/* Simple value or begin group. */
void *user_field_desc;
upb_field_type_t ft = s->tag_cb(s, tag, &user_field_desc);
if(ft == 0) {
UPB_CHECK(upb_skip_wire_value(buf, end, tag->wire_type));
} else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) {
/* No length specified, an "end group" tag will mark the end. */
UPB_CHECK(push_stack_frame(s, 0, user_field_desc));
} else {
UPB_CHECK(s->value_cb(s, buf, end, ft, user_field_desc));
}
return UPB_STATUS_OK;
}
upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
size_t *read)
{
size_t start_offset = s->offset;
size_t end_offset = start_offset + len;
while(!s->done && s->offset < end_offset) {
while(s->offset >= s->top->end_offset) pop_stack_frame(s);
while(s->packed_end_offset > s->offset) {
/* Parse a packed field entry. */
void *end = (char*)buf + len;
*read = 0;
while(buf < end) {
while(s->offset >= s->top->end_offset) {
if(s->offset != s->top->end_offset) return UPB_ERROR_BAD_SUBMESSAGE_END;
pop_stack_frame(s);
}
struct upb_tag tag;
void *b = buf;
CHECK(upb_parse_tag(&b, len, &tag));
int tag_bytes = ((char*)b - (char*)buf);
s->offset += tag_bytes;
buf = b;
void *bufstart = buf;
UPB_CHECK(upb_parse_tag(&buf, end, &tag));
if(unlikely(tag.wire_type == UPB_WIRE_TYPE_END_GROUP)) {
if(unlikely(s->top->end_offset != 0)) return UPB_ERROR_SPURIOUS_END_GROUP;
pop_stack_frame(s);
continue;
}
void *user_field_desc;
//upb_field_type_t ft = s->tag_cb(s, &tag, &user_field_desc);
if(ft == 0) {
CHECK(upb_skip_wire_value(b, &s->offset, tag.wire_type));
} else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP) {
/* No length specified, an "end group" tag will mark the end. */
push_stack_frame(s, 0, user_field_desc);
} else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
parse_delimited(s, &tag, &buf, end, s->offset + (char*)buf - (char*)bufstart);
} else {
/* For all other cases we parse the next value. */
union upb_value v;
CHECK(upb_parse_value(&b, ft, &v));
if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
/* The value we parsed is the length of the submessage. */
push_stack_frame(s, s->offset + v.delim_len, user_field_desc);
} else if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING ||
ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES) {
s->value_cb(s, &v, b, user_field_desc);
b = (char*)b + v.delim_len;
} else if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
/* Delimited data which is not a string, bytes, or a submessage.
* It must be a packed array. */
s->packed_type = ft;
s->packed_end_offset = s->offset + v.delim_len;
} else {
/* The common case: a simple value. */
s->value_cb(s, &v, b, user_field_desc);
}
parse_nondelimited(s, &tag, &buf, end);
}
size_t bytes_read = ((char*)buf - (char*)bufstart);
*read += bytes_read;
s->offset += bytes_read;
}
*read = s->offset - start_offset;
return UPB_STATUS_OK;
}
#endif

@ -29,14 +29,32 @@ void upb_parse_state_init(struct upb_parse_state *state, size_t udata_size);
void upb_parse_state_free(struct upb_parse_state *state);
/* The callback that is called immediately after a tag has been parsed. The
* client must either advance the stream beyond the corresponding value or
* return an error to indicate that the stream should rewind to before the
* tag.
* client should determine whether it wants to parse or skip the corresponding
* value. If it wants to parse it, it must discover and return the correct
* .proto type (the tag only contains the wire type) and check that the wire
* type is appropriate for the .proto type. To skip the value (which means
* skipping all submessages, in the case of a submessage), the callback should
* return zero. */
typedef upb_field_type_t (*upb_tag_cb)(struct upb_parse_state *s,
struct upb_tag *tag,
void **user_field_desc);
/* The callback that is called when a regular value (ie. not a string or
* submessage) is encountered which the client has opted to parse (by not
* returning 0 from the tag_cb). The client must parse the value and update
* buf accordingly, returning success or failure.
*
* The client advances the stream beyond the corresponding value by either
* parsing the value or skipping it. */
typedef upb_field_type_t (*upb_tag_cb)(void **buf, struct upb_parse_state *s,
struct upb_tag *tag);
* Note that this callback can be called several times in a row for a single
* call to tag_cb in the case of packed arrays. */
typedef upb_status_t (*upb_value_cb)(struct upb_parse_state *s,
void **buf, void *end,
upb_field_type_t type,
void *user_field_desc);
/* The callback that is called when a string is parsed. */
typedef upb_status_t (*upb_str_cb)(struct upb_parse_state *s,
struct upb_string *str,
void *user_field_desc);
/* Callbacks that are called when a submessage begins and ends, respectively.
* Both are called with the submessage's stack frame at the top of the stack. */
@ -55,13 +73,11 @@ struct upb_parse_state {
size_t offset;
struct upb_parse_stack_frame *stack, *top, *limit;
size_t udata_size; /* How many bytes the user gets in each frame. */
bool done; /* Any callback can abort processing by setting done=true. */
/* These are only set if we're in the middle of a packed array. */
size_t packed_end_offset; /* 0 if not in a packed array. */
upb_field_type_t packed_type;
upb_tag_cb tag_cb;
upb_tag_cb tag_cb;
upb_value_cb value_cb;
upb_str_cb str_cb;
upb_submsg_start_cb submsg_start_cb;
upb_submsg_end_cb submsg_end_cb;
upb_submsg_end_cb submsg_end_cb;
};
/* Parses up to len bytes of protobuf data out of buf, calling cb as needed.
@ -71,37 +87,35 @@ struct upb_parse_state {
upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
size_t *read);
/* Low-level parsing functions. ***********************************************/
/* Parses a single tag from the character data starting at buf, and updates
* buf to point one past the bytes that were consumed. buf will be incremented
* by at most ten bytes. */
upb_status_t upb_parse_tag(void **buf, size_t len, struct upb_tag *tag);
extern upb_wire_type_t upb_expected_wire_types[];
/* Returns true if wt is the correct on-the-wire type for ft. */
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
return upb_expected_wire_types[ft] == wt;
}
/* Data-consuming functions (to be called from value cb). *********************/
/* Parses a single tag from the character data starting at buf, and updates
* buf to point one past the bytes that were consumed. buf will be incremented
* by at most ten bytes. */
upb_status_t upb_parse_tag(void **buf, void *end, struct upb_tag *tag);
/* Parses and converts a value from the character data starting at buf. The
* caller must have previously checked that the wire type is appropriate for
* this field type. For delimited data, buf is advanced to the beginning of
* the delimited data, not the end. */
upb_status_t upb_parse_value(void **buf, size_t len, upb_field_type_t ft,
union upb_value *value);
upb_status_t upb_parse_value(void **buf, void *end, upb_field_type_t ft,
union upb_value *v);
/* Parses a wire value with the given type (which must have been obtained from
* a tag that was just parsed) and adds the number of bytes that were consumed
* to *offset. For delimited types, offset is advanced past the delimited
* data. */
upb_status_t upb_parse_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt,
upb_status_t upb_parse_wire_value(void **buf, void *end, upb_wire_type_t wt,
union upb_wire_value *wv);
/* Like the above, but discards the wire value instead of saving it. */
upb_status_t upb_skip_wire_value(void *buf, size_t len, size_t *offset,
upb_wire_type_t wt);
upb_status_t upb_skip_wire_value(void **buf, void *end, upb_wire_type_t wt);
#ifdef __cplusplus
} /* extern "C" */

Loading…
Cancel
Save