Renamed upb_stream_parser -> upb_cbparser, and API cleanup.

pull/13171/head
Joshua Haberman 16 years ago
parent 2bad79f721
commit f6ee945409
  1. 30
      src/upb_msg.c
  2. 60
      src/upb_parse.c
  3. 222
      src/upb_parse.h

@ -130,14 +130,14 @@ struct upb_msg_parser_frame {
}; };
struct upb_msg_parser { struct upb_msg_parser {
struct upb_stream_parser s; struct upb_cbparser *s;
bool merge; bool merge;
bool byref; bool byref;
struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top; struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
}; };
void upb_msg_parser_reset(struct upb_msg_parser *p, void upb_msgparser_init(struct upb_msg_parser *p,
struct upb_msg *msg, bool byref); struct upb_msg *msg, bool byref);
/* Parses protocol buffer data out of data which has length of len. The data /* Parses protocol buffer data out of data which has length of len. The data
* need not be a complete protocol buffer. The number of bytes parsed is * need not be a complete protocol buffer. The number of bytes parsed is
@ -223,7 +223,7 @@ static void str_cb(void *udata, uint8_t *str,
//} //}
} }
static void submsg_start_cb(void *udata, void *user_field_desc) static void start_cb(void *udata, void *user_field_desc)
{ {
struct upb_msg_parser *mp = udata; struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc; struct upb_msg_fielddef *f = user_field_desc;
@ -244,7 +244,7 @@ static void submsg_start_cb(void *udata, void *user_field_desc)
mp->top->msg = *p.msg; mp->top->msg = *p.msg;
} }
static void submsg_end_cb(void *udata) static void end_cb(void *udata)
{ {
struct upb_msg_parser *mp = udata; struct upb_msg_parser *mp = udata;
struct upb_msg *msg = mp->top->msg; struct upb_msg *msg = mp->top->msg;
@ -258,30 +258,32 @@ static void submsg_end_cb(void *udata)
upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len) upb_status_t upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len)
{ {
struct upb_msg_parser mp; struct upb_msg_parser mp;
upb_msg_parser_reset(&mp, msg, false); upb_msgparser_init(&mp, msg, false);
size_t read; size_t read;
upb_msg_clear(msg); upb_msg_clear(msg);
upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read); upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
upb_msgparser_free(&mp);
return ret; return ret;
} }
void upb_msg_parser_reset(struct upb_msg_parser *s, struct upb_msg *msg, bool byref) void upb_msgparser_init(struct upb_msg_parser *s, struct upb_msg *msg, bool byref)
{ {
upb_stream_parser_reset(&s->s, s); s->s = upb_cbparser_new();
upb_cbparser_reset(s->s, s, tag_cb, value_cb, str_cb, start_cb, end_cb);
s->byref = byref; s->byref = byref;
s->top = s->stack; s->top = s->stack;
s->top->msg = msg; s->top->msg = msg;
s->s.tag_cb = tag_cb; }
s->s.value_cb = value_cb;
s->s.str_cb = str_cb; void upb_msgparser_free(struct upb_msg_parser *s)
s->s.submsg_start_cb = submsg_start_cb; {
s->s.submsg_end_cb = submsg_end_cb; upb_cbparser_free(s->s);
} }
upb_status_t upb_msg_parser_parse(struct upb_msg_parser *s, upb_status_t upb_msg_parser_parse(struct upb_msg_parser *s,
void *data, size_t len, size_t *read) void *data, size_t len, size_t *read)
{ {
return upb_stream_parser_parse(&s->s, data, len, read); return upb_cbparser_parse(s->s, data, len, read);
} }
/* Serialization. ************************************************************/ /* Serialization. ************************************************************/

@ -98,23 +98,55 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
#undef CASE #undef CASE
} }
void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata) struct upb_cbparser {
// Stack entries store the offset where the submsg ends (for groups, 0).
size_t stack[UPB_MAX_NESTING], *top, *limit;
size_t completed_offset;
void *udata;
upb_tag_cb tag_cb;
upb_value_cb value_cb;
upb_str_cb str_cb;
upb_start_cb start_cb;
upb_end_cb end_cb;
};
struct upb_cbparser *upb_cbparser_new(void)
{ {
state->top = state->stack; return malloc(sizeof(struct upb_cbparser));
state->limit = &state->stack[UPB_MAX_NESTING]; }
state->completed_offset = 0;
state->udata = udata; void upb_cbparser_free(struct upb_cbparser *p)
{
free(p);
}
void upb_cbparser_reset(struct upb_cbparser *p, void *udata,
upb_tag_cb tagcb,
upb_value_cb valuecb,
upb_str_cb strcb,
upb_start_cb startcb,
upb_end_cb endcb)
{
p->top = p->stack;
p->limit = &p->stack[UPB_MAX_NESTING];
p->completed_offset = 0;
p->udata = udata;
p->tag_cb = tagcb;
p->value_cb = valuecb;
p->str_cb = strcb;
p->start_cb = startcb;
p->end_cb = endcb;
// The top-level message is not delimited (we can keep receiving data for it // The top-level message is not delimited (we can keep receiving data for it
// indefinitely), so we treat it like a group. // indefinitely), so we treat it like a group.
*state->top = 0; *p->top = 0;
} }
/** /**
* Pushes a new stack frame for a submessage with the given len (which will * Pushes a new stack frame for a submessage with the given len (which will
* be zero if the submessage is a group). * be zero if the submessage is a group).
*/ */
static upb_status_t push(struct upb_stream_parser *s, uint8_t *start, static upb_status_t push(struct upb_cbparser *s, uint8_t *start,
uint32_t submsg_len, void *user_field_desc, uint32_t submsg_len, void *user_field_desc,
uint8_t **submsg_end) uint8_t **submsg_end)
{ {
@ -123,8 +155,8 @@ static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
return UPB_ERROR_STACK_OVERFLOW; return UPB_ERROR_STACK_OVERFLOW;
*s->top = s->completed_offset + submsg_len; *s->top = s->completed_offset + submsg_len;
if(s->submsg_start_cb) if(s->start_cb)
s->submsg_start_cb(s->udata, user_field_desc); s->start_cb(s->udata, user_field_desc);
*submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0); *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
return UPB_STATUS_OK; return UPB_STATUS_OK;
@ -134,10 +166,10 @@ static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
* Pops a stack frame, returning a pointer for where the next submsg should * Pops a stack frame, returning a pointer for where the next submsg should
* end (or a pointer that is out of range for a group). * end (or a pointer that is out of range for a group).
*/ */
static void *pop(struct upb_stream_parser *s, uint8_t *start) static void *pop(struct upb_cbparser *s, uint8_t *start)
{ {
if(s->submsg_end_cb) if(s->end_cb)
s->submsg_end_cb(s->udata); s->end_cb(s->udata);
s->top--; s->top--;
@ -148,8 +180,8 @@ static void *pop(struct upb_stream_parser *s, uint8_t *start)
} }
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s, upb_status_t upb_cbparser_parse(struct upb_cbparser *s, void *_buf, size_t len,
void *_buf, size_t len, size_t *read) size_t *read)
{ {
uint8_t *buf = _buf; uint8_t *buf = _buf;
uint8_t *completed = buf; uint8_t *completed = buf;

@ -21,125 +21,114 @@
extern "C" { extern "C" {
#endif #endif
/* High-level parsing interface. **********************************************/ /* Event Callbacks. ***********************************************************/
/* The general scheme is that the client registers callbacks that will be // The tag callback is called immediately after a tag has been parsed. The
* called at the appropriate times. These callbacks provide the client with // client should determine whether it wants to parse or skip the corresponding
* data and let the client make decisions (like whether to parse or to skip // value. If it wants to parse it, it must discover and return the correct
* a value). // .proto type (the tag only contains the wire type) and check that the wire
* // type is appropriate for the .proto type. To skip the value (which means
* After initializing the parse state, the client can repeatedly call upb_parse // skipping all submessages, in the case of a submessage), the callback should
* as data becomes available. The parser is fully streaming-capable, so the // return zero.
* data need not all be available at the same time. */ //
// The client can store a void* in *user_field_desc; this will be passed to
struct upb_stream_parser; // the value callback or the string callback.
typedef upb_field_type_t (*upb_tag_cb)(void *udata, struct upb_tag *tag,
/* Resets the internal state of an already-allocated parser. udata will be
* passed to callbacks as appropriate. */
void upb_stream_parser_reset(struct upb_stream_parser *p, void *udata);
/* The callback that is called immediately after a tag has been parsed. The
* client should determine whether it wants to parse or skip the corresponding
* value. If it wants to parse it, it must discover and return the correct
* .proto type (the tag only contains the wire type) and check that the wire
* type is appropriate for the .proto type. To skip the value (which means
* skipping all submessages, in the case of a submessage), the callback should
* return zero. */
typedef upb_field_type_t (*upb_tag_cb)(void *udata,
struct upb_tag *tag,
void **user_field_desc); void **user_field_desc);
/* The callback that is called when a regular value (ie. not a string or // The value callback is called when a regular value (ie. not a string or
* submessage) is encountered which the client has opted to parse (by not // submessage) is encountered which the client has opted to parse (by not
* returning 0 from the tag_cb). The client must parse the value and update // returning 0 from the tag_cb). The client must parse the value by calling
* buf accordingly, returning success or failure. // upb_parse_value(), returning success or failure accordingly.
* //
* Note that this callback can be called several times in a row for a single // Note that this callback can be called several times in a row for a single
* call to tag_cb in the case of packed arrays. */ // call to tag_cb in the case of packed arrays.
typedef upb_status_t (*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end, typedef upb_status_t (*upb_value_cb)(void *udata, uint8_t *buf, uint8_t *end,
void *user_field_desc, uint8_t **outbuf); void *user_field_desc, uint8_t **outbuf);
/* The callback that is called when a string is parsed. Note that the data // The string callback is called when a string is parsed. avail_len is the
* for the string might not all be available -- we could be streaming, and // number of bytes that are currently available at str. If the client is
* the current buffer might end right in the middle of the string. So we // streaming and the current buffer ends in the middle of the string, this
* pass both the available length and the total length. */ // number could be less than total_len.
typedef void (*upb_str_cb)(void *udata, uint8_t *str, typedef void (*upb_str_cb)(void *udata, uint8_t *str, size_t avail_len,
size_t avail_len, size_t total_len, size_t total_len, void *user_field_desc);
void *user_field_desc);
// The start and end callbacks are called when a submessage begins and ends,
/* Callbacks that are called when a submessage begins and ends, respectively. // respectively.
* Both are called with the submessage's stack frame at the top of the stack. */ typedef void (*upb_start_cb)(void *udata, void *user_field_desc);
typedef void (*upb_submsg_start_cb)(void *udata, typedef void (*upb_end_cb)(void *udata);
void *user_field_desc);
typedef void (*upb_submsg_end_cb)(void *udata); /* Callback parser interface. *************************************************/
struct upb_stream_parser { // Allocates and frees a upb_cbparser, respectively.
// Stack entries store the offset where the submsg ends (for groups, 0). struct upb_cbparser *upb_cbparser_new(void);
size_t stack[UPB_MAX_NESTING], *top, *limit; void upb_cbparser_free(struct upb_cbparser *p);
size_t completed_offset;
void *udata; // Resets the internal state of an already-allocated parser. Parsers must be
upb_tag_cb tag_cb; // reset before they can be used. A parser can be reset multiple times. udata
upb_value_cb value_cb; // will be passed as the first argument to callbacks.
upb_str_cb str_cb; void upb_cbparser_reset(struct upb_cbparser *p, void *udata,
upb_submsg_start_cb submsg_start_cb; upb_tag_cb tagcb,
upb_submsg_end_cb submsg_end_cb; upb_value_cb valuecb,
}; upb_str_cb strcb,
upb_start_cb startcb,
/* Parses up to len bytes of protobuf data out of buf, calling the appropriate upb_end_cb endcb);
* callbacks as values are parsed.
*
* The function returns a status indicating the success of the operation. Data // Parses up to len bytes of protobuf data out of buf, calling the appropriate
* is parsed until no more data can be read from buf, or the callback returns an // callbacks as values are parsed.
* error like UPB_STATUS_USER_CANCELLED, or an error occurs. //
* // The function returns a status indicating the success of the operation. Data
* *read is set to the number of bytes consumed. Note that this can be greater // is parsed until no more data can be read from buf, or the callback returns an
* than len in the case that a string was recognized that spans beyond the end // error like UPB_STATUS_USER_CANCELLED, or an error occurs.
* of the currently provided data. //
* // *read is set to the number of bytes consumed. Note that this can be greater
* The next call to upb_parse must be the first byte after buf + *read, even in // than len in the case that a string was recognized that spans beyond the end
* the case that *read > len. // of the currently provided data.
* //
* TODO: see if we can provide the following guarantee efficiently: // The next call to upb_parse must be the first byte after buf + *read, even in
* *read will always be >= len. */ // the case that *read > len.
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *p, //
void *buf, size_t len, size_t *read); // TODO: see if we can provide the following guarantee efficiently:
// *read will always be >= len. */
upb_status_t upb_cbparser_parse(struct upb_cbparser *p, void *buf, size_t len,
size_t *read);
extern upb_wire_type_t upb_expected_wire_types[]; extern upb_wire_type_t upb_expected_wire_types[];
/* Returns true if wt is the correct on-the-wire type for ft. */ // Returns true if wt is the correct on-the-wire type for ft.
INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) {
/* This doesn't currently support packed arrays. */ // This doesn't currently support packed arrays.
return upb_type_info[ft].expected_wire_type == wt; return upb_type_info[ft].expected_wire_type == wt;
} }
/* Data-consuming functions (to be called from value cb). *********************/ /* Data-consuming functions (to be called from value cb). *********************/
/* Parses and converts a value from the character data starting at buf. The // Parses and converts a value from the character data starting at buf (but not
* caller must have previously checked that the wire type is appropriate for // past end). *outbuf will be set to one past the data that was read. The
* this field type. */ // caller must have previously checked that the wire type is appropriate for
// this field type.
upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft, upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
union upb_value_ptr v, uint8_t **outbuf); union upb_value_ptr v, uint8_t **outbuf);
/* Parses a wire value with the given type (which must have been obtained from // Parses a wire value with the given type (which must have been obtained from
* a tag that was just parsed) and adds the number of bytes that were consumed // a tag that was just parsed) and sets *outbuf to one past the data that was
* to *offset. */ // read.
upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt, upb_status_t upb_parse_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
union upb_wire_value *wv, uint8_t **outbuf); union upb_wire_value *wv, uint8_t **outbuf);
/* Functions to read wire values. *********************************************/ /* Functions to read wire values. *********************************************/
/* In general, these should never be called directly from any code outside upb. // Most clients will not want to use these directly.
* They are included here only because we expect them to get inlined inside the
* value-reading functions below. */
upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val, upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf); uint8_t **outbuf);
/* Gets a varint (wire type: UPB_WIRE_TYPE_VARINT). */ // Gets a varint (wire type: UPB_WIRE_TYPE_VARINT).
INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val, INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val,
uint8_t **outbuf) uint8_t **outbuf)
{ {
/* We inline this common case (1-byte varints), if that fails we dispatch to // We inline this common case (1-byte varints), if that fails we dispatch to
* the full (non-inlined) version. */ // the full (non-inlined) version.
if((*buf & 0x80) == 0) { if((*buf & 0x80) == 0) {
*val = *buf & 0x7f; *val = *buf & 0x7f;
*outbuf = buf + 1; *outbuf = buf + 1;
@ -149,17 +138,17 @@ INLINE upb_status_t upb_get_v_uint64_t(uint8_t *buf, uint8_t *end, uint64_t *val
} }
} }
/* Gets a varint -- called when we only need 32 bits of it. */ // Gets a varint -- called when we only need 32 bits of it.
INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end, INLINE upb_status_t upb_get_v_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, uint8_t **outbuf) uint32_t *val, uint8_t **outbuf)
{ {
uint64_t val64; uint64_t val64;
UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf)); UPB_CHECK(upb_get_v_uint64_t(buf, end, &val64, outbuf));
*val = (uint32_t)val64; /* Discard the high bits. */ *val = (uint32_t)val64; // Discard the high bits.
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }
/* Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT). */ // Gets a fixed-length 32-bit integer (wire type: UPB_WIRE_TYPE_32BIT).
INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end, INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
uint32_t *val, uint8_t **outbuf) uint32_t *val, uint8_t **outbuf)
{ {
@ -176,7 +165,7 @@ INLINE upb_status_t upb_get_f_uint32_t(uint8_t *buf, uint8_t *end,
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }
/* Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT). */ // Gets a fixed-length 64-bit integer (wire type: UPB_WIRE_TYPE_64BIT).
INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end, INLINE upb_status_t upb_get_f_uint64_t(uint8_t *buf, uint8_t *end,
uint64_t *val, uint8_t **outbuf) uint64_t *val, uint8_t **outbuf)
{ {
@ -207,8 +196,8 @@ INLINE upb_status_t upb_skip_v_uint64_t(uint8_t *buf, uint8_t *end,
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }
INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end,
**outbuf) uint8_t **outbuf)
{ {
uint8_t *uint32_end = buf + sizeof(uint32_t); uint8_t *uint32_end = buf + sizeof(uint32_t);
if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA; if(uint32_end > end) return UPB_STATUS_NEED_MORE_DATA;
@ -216,8 +205,8 @@ INLINE upb_status_t upb_skip_f_uint32_t(uint8_t *buf, uint8_t *end, uint8_t
return UPB_STATUS_OK; return UPB_STATUS_OK;
} }
INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end,
**outbuf) uint8_t **outbuf)
{ {
uint8_t *uint64_end = buf + sizeof(uint64_t); uint8_t *uint64_end = buf + sizeof(uint64_t);
if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA; if(uint64_end > end) return UPB_STATUS_NEED_MORE_DATA;
@ -228,27 +217,26 @@ INLINE upb_status_t upb_skip_f_uint64_t(uint8_t *buf, uint8_t *end, uint8_t
/* Functions to read .proto values. *******************************************/ /* Functions to read .proto values. *******************************************/
/* These functions read the appropriate wire value for a given .proto type
* and then convert it based on the .proto type. These are the most efficient
* functions to call if you want to decode a value for a known type. */
/* Performs zig-zag decoding, which is used by sint32 and sint64. */ // Performs zig-zag decoding, which is used by sint32 and sint64.
INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); } INLINE int32_t upb_zzdec_32(uint32_t n) { return (n >> 1) ^ -(int32_t)(n & 1); }
INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); } INLINE int64_t upb_zzdec_64(uint64_t n) { return (n >> 1) ^ -(int64_t)(n & 1); }
/* Use macros to define a set of two functions for each .proto type: // Use macros to define a set of two functions for each .proto type:
* //
* // Reads and converts a .proto value from buf, placing it in d. // // Reads and converts a .proto value from buf, placing it in d.
* // "end" indicates the end of the current buffer (if the buffer does // // "end" indicates the end of the current buffer (if the buffer does
* // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned). // // not contain the entire value UPB_STATUS_NEED_MORE_DATA is returned).
* // On success, *outbuf will point to the first byte that was not consumed. // // On success, *outbuf will point to the first byte that was not consumed.
* upb_status_t upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d, // upb_status_t upb_get_INT32(uint8_t *buf, uint8_t *end, int32_t *d,
* uint8_t **outbuf); // uint8_t **outbuf);
* //
* // Given an already read wire value s (source), convert it to a .proto // // Given an already read wire value s (source), convert it to a .proto
* // value and return it. // // value and return it.
* int32_t upb_wvtov_INT32(uint32_t s); // int32_t upb_wvtov_INT32(uint32_t s);
*/ //
// These are the most efficient functions to call if you want to decode a value
// for a known type.
#define WVTOV(type, wire_t, val_t) \ #define WVTOV(type, wire_t, val_t) \
INLINE val_t upb_wvtov_ ## type(wire_t s) INLINE val_t upb_wvtov_ ## type(wire_t s)
@ -294,7 +282,7 @@ T(FLOAT, f, uint32_t, float, _float) {
#undef GET #undef GET
#undef T #undef T
/* Parses a tag, places the result in *tag. */ // Parses a tag, places the result in *tag.
INLINE upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag, INLINE upb_status_t parse_tag(uint8_t *buf, uint8_t *end, struct upb_tag *tag,
uint8_t **outbuf) uint8_t **outbuf)
{ {

Loading…
Cancel
Save