From b05205d224020d45d2a69978732758703f0eb7cf Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Mon, 11 Jan 2010 19:54:46 -0800 Subject: [PATCH] Defined upb_sink interface and ported the parser to it. --- benchmarks/parsetostruct.upb_table.c | 17 ++-- src/upb_data.c | 115 +++++++++++++++----------- src/upb_data.h | 44 ++++++++-- src/upb_parse.c | 57 +++++-------- src/upb_parse.h | 107 ++++-------------------- src/upb_sink.h | 117 +++++++++++++++++++++++++++ 6 files changed, 269 insertions(+), 188 deletions(-) create mode 100644 src/upb_sink.h diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c index e387de65aa..7614b3088b 100644 --- a/benchmarks/parsetostruct.upb_table.c +++ b/benchmarks/parsetostruct.upb_table.c @@ -3,12 +3,14 @@ #include "upb_data.h" #include "upb_def.h" +#include "upb_parse.h" static struct upb_symtab *s; static upb_strptr str; static struct upb_msgdef *def; static upb_msg *msgs[NUM_MESSAGES]; -static struct upb_msgparser *mp; +static upb_parser *parser; +static upb_msgsink *sink; static bool initialize() { @@ -47,7 +49,8 @@ static bool initialize() fprintf(stderr, "Error reading " MESSAGE_FILE "\n"); return false; } - mp = upb_msgparser_new(def); + parser = upb_parser_new(def); + sink = upb_msgsink_new(def); return true; } @@ -57,17 +60,19 @@ static void cleanup() upb_msg_unref(msgs[i], def); upb_string_unref(str); upb_symtab_unref(s); - upb_msgparser_free(mp); + upb_parser_free(parser); + upb_msgsink_free(sink); } static size_t run(int i) { struct upb_status status = UPB_STATUS_INIT; upb_msg *msg = msgs[i%NUM_MESSAGES]; - upb_msgparser_reset(mp, msg); + upb_msgsink_reset(sink, msg); + upb_parser_reset(parser, upb_msgsink_sink(sink)); upb_msg_clear(msg, def); - upb_msgparser_parse(mp, str, &status); - if(!upb_ok(&status)) { + size_t parsed = upb_parser_parse(parser, str, &status); + if(!upb_ok(&status) || parsed != upb_strlen(str)) { fprintf(stderr, "Parse error: %s\n", status.msg); return 0; } diff --git a/src/upb_data.c b/src/upb_data.c index 3dc39f56d9..4fc1844bdc 100644 --- a/src/upb_data.c +++ b/src/upb_data.c @@ -282,18 +282,35 @@ void _upb_msg_free(upb_msg *msg, struct upb_msgdef *md) free(msg); } +void upb_msg_parsestr(upb_msg *msg, struct upb_msgdef *md, upb_strptr str, + struct upb_status *status) +{ + upb_parser *p = upb_parser_new(md); + upb_msgsink *s = upb_msgsink_new(md); + + upb_msgsink_reset(s, msg); + upb_parser_reset(p, upb_msgsink_sink(s)); + upb_msg_clear(msg, md); + upb_parser_parse(p, str, status); + + upb_parser_free(p); + upb_msgsink_free(s); +} + -/* Parsing. ******************************************************************/ +/* upb_msgsrc ****************************************************************/ -struct upb_msgparser_frame { +/* upb_msgsink ***************************************************************/ + +struct upb_msgsink_frame { upb_msg *msg; struct upb_msgdef *md; }; -struct upb_msgparser { - struct upb_cbparser *s; - bool merge; - struct upb_msgparser_frame stack[UPB_MAX_NESTING], *top; +struct upb_msgsink { + upb_sink base; + struct upb_msgdef *toplevel_msgdef; + struct upb_msgsink_frame stack[UPB_MAX_NESTING], *top; }; /* Helper function that returns a pointer to where the next value for field "f" @@ -321,24 +338,26 @@ static union upb_value_ptr get_value_ptr(upb_msg *msg, struct upb_fielddef *f) return p; } -// Callbacks for the stream parser. +// Callbacks for upb_sink. // TODO: implement these in terms of public interfaces. -static bool value_cb(void *udata, struct upb_fielddef *f, union upb_value val) +static upb_sink_status _upb_msgsink_valuecb(upb_sink *s, struct upb_fielddef *f, + union upb_value val) { - struct upb_msgparser *mp = udata; - upb_msg *msg = mp->top->msg; + upb_msgsink *ms = (upb_msgsink*)s; + upb_msg *msg = ms->top->msg; union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_sethas(msg, f); upb_value_write(p, val, f->type); - return true; + return UPB_SINK_CONTINUE; } -static bool str_cb(void *udata, struct upb_fielddef *f, upb_strptr str, - int32_t start, uint32_t end) +static upb_sink_status _upb_msgsink_strcb(upb_sink *s, struct upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end) { - struct upb_msgparser *mp = udata; - upb_msg *msg = mp->top->msg; + upb_msgsink *ms = (upb_msgsink*)s; + upb_msg *msg = ms->top->msg; union upb_value_ptr p = get_value_ptr(msg, f); upb_msg_sethas(msg, f); if(end > upb_strlen(str)) abort(); /* TODO: support streaming. */ @@ -348,13 +367,13 @@ static bool str_cb(void *udata, struct upb_fielddef *f, upb_strptr str, *p.str = upb_string_new(); } upb_strcpylen(*p.str, upb_string_getrobuf(str) + start, end - start); - return true; + return UPB_SINK_CONTINUE; } -static void start_cb(void *udata, struct upb_fielddef *f) +static upb_sink_status _upb_msgsink_startcb(upb_sink *s, struct upb_fielddef *f) { - struct upb_msgparser *mp = udata; - upb_msg *oldmsg = mp->top->msg; + upb_msgsink *ms = (upb_msgsink*)s; + upb_msg *oldmsg = ms->top->msg; union upb_value_ptr p = get_value_ptr(oldmsg, f); if(upb_isarray(f) || !upb_msg_has(oldmsg, f)) { @@ -368,50 +387,50 @@ static void start_cb(void *udata, struct upb_fielddef *f) upb_msg_sethas(oldmsg, f); } - mp->top++; - mp->top->msg = *p.msg; + ms->top++; + ms->top->msg = *p.msg; + return UPB_SINK_CONTINUE; } -static void end_cb(void *udata) +static upb_sink_status _upb_msgsink_endcb(upb_sink *s) { - struct upb_msgparser *mp = udata; - mp->top--; + upb_msgsink *ms = (upb_msgsink*)s; + ms->top--; + return UPB_SINK_CONTINUE; } -/* Externally-visible functions for the msg parser. */ +static upb_sink_callbacks _upb_msgsink_vtbl = { + _upb_msgsink_valuecb, + _upb_msgsink_strcb, + _upb_msgsink_startcb, + _upb_msgsink_endcb +}; -struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def) -{ - struct upb_msgparser *mp = malloc(sizeof(struct upb_msgparser)); - mp->s = upb_cbparser_new(def, value_cb, str_cb, start_cb, end_cb); - return mp; -} +// +// External upb_msgsink interface. +// -void upb_msgparser_reset(struct upb_msgparser *s, upb_msg *msg) +upb_msgsink *upb_msgsink_new(struct upb_msgdef *md) { - upb_cbparser_reset(s->s, s); - s->top = s->stack; - s->top->msg = msg; + upb_msgsink *ms = malloc(sizeof(*ms)); + upb_sink_init(&ms->base, &_upb_msgsink_vtbl); + ms->toplevel_msgdef = md; + return ms; } -void upb_msgparser_free(struct upb_msgparser *s) +void upb_msgsink_free(upb_msgsink *sink) { - upb_cbparser_free(s->s); - free(s); + free(sink); } -void upb_msg_parsestr(upb_msg *msg, struct upb_msgdef *md, upb_strptr str, - struct upb_status *status) +upb_sink *upb_msgsink_sink(upb_msgsink *sink) { - struct upb_msgparser *mp = upb_msgparser_new(md); - upb_msgparser_reset(mp, msg); - upb_msg_clear(msg, md); - upb_msgparser_parse(mp, str, status); - upb_msgparser_free(mp); + return &sink->base; } -size_t upb_msgparser_parse(struct upb_msgparser *s, upb_strptr str, - struct upb_status *status) +void upb_msgsink_reset(upb_msgsink *ms, upb_msg *msg) { - return upb_cbparser_parse(s->s, str, status); + ms->top = ms->stack; + ms->top->msg = msg; + ms->top->md = ms->toplevel_msgdef; } diff --git a/src/upb_data.h b/src/upb_data.h index 98243ce578..4fc9e8d5b8 100644 --- a/src/upb_data.h +++ b/src/upb_data.h @@ -26,6 +26,7 @@ #include "upb.h" #include "upb_atomic.h" #include "upb_def.h" +#include "upb_sink.h" #ifdef __cplusplus extern "C" { @@ -513,18 +514,47 @@ INLINE void upb_msg_clear(upb_msg *msg, struct upb_msgdef *md) { memset(msg->data, 0, md->set_flags_bytes); } -/* Parsing ********************************************************************/ - +// A convenience function for parsing an entire protobuf all at once, without +// having to worry about setting up the appropriate objects. void upb_msg_parsestr(upb_msg *msg, struct upb_msgdef *md, upb_strptr str, struct upb_status *status); -struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def); -void upb_msgparser_free(struct upb_msgparser *mp); -void upb_msgparser_reset(struct upb_msgparser *mp, upb_msg *m); +/* upb_msgsrc *****************************************************************/ + +// A upb_msgsrc can push the data of a upb_msg to a upb_sink. +struct upb_msgsrc; +typedef struct upb_msgsrc upb_msgsrc; + +// Allocate and free a msgsrc, respectively. +upb_msgsrc *upb_msgsrc_new(); +void upb_msgsrc_free(upb_msgsrc *src); + +// Resets the msgsrc for the given msg, msgdef, and sink. This must be +// called before upb_msgsrc_produce(). +void upb_msgsrc_reset(upb_msgsrc *src, upb_msg *msg, struct upb_msgdef *md, + upb_sink *sink); + +// Pushes data from the upb_msgsrc to the sink that was provided at the last +// reset. Returns true if the sink is finished, or false if it is suspended. +bool upb_msgsrc_produce(upb_msgsrc *src); + + +/* upb_msgsink ****************************************************************/ + +// A upb_msgsink can accept the data from a source and write it into a message. +struct upb_msgsink; +typedef struct upb_msgsink upb_msgsink; + +// Allocate and free a msgsink, respectively. +upb_msgsink *upb_msgsink_new(struct upb_msgdef *md); +void upb_msgsink_free(upb_msgsink *sink); + +// Returns the upb_sink (like an upcast). +upb_sink *upb_msgsink_sink(upb_msgsink *sink); -size_t upb_msgparser_parse(struct upb_msgparser *mp, upb_strptr str, - struct upb_status *status); +// Resets the msgsink for the given msg. +void upb_msgsink_reset(upb_msgsink *sink, upb_msg *msg); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_parse.c b/src/upb_parse.c index 101792a638..577fa5c37f 100644 --- a/src/upb_parse.c +++ b/src/upb_parse.c @@ -310,57 +310,48 @@ static const uint8_t *upb_parse_value(const uint8_t *buf, const uint8_t *end, #undef CASE } -struct upb_cbparser_frame { +struct upb_parser_frame { struct upb_msgdef *msgdef; size_t end_offset; // For groups, 0. }; -struct upb_cbparser { +struct upb_parser { // Immutable state of the parser. struct upb_msgdef *toplevel_msgdef; - upb_value_cb value_cb; - upb_str_cb str_cb; - upb_start_cb start_cb; - upb_end_cb end_cb; + upb_sink *sink; // State pertaining to a particular parse (resettable). // Stack entries store the offset where the submsg ends (for groups, 0). - struct upb_cbparser_frame stack[UPB_MAX_NESTING], *top, *limit; + struct upb_parser_frame stack[UPB_MAX_NESTING], *top, *limit; size_t completed_offset; void *udata; }; -struct upb_cbparser *upb_cbparser_new(struct upb_msgdef *msgdef, - upb_value_cb valuecb, upb_str_cb strcb, - upb_start_cb startcb, upb_end_cb endcb) +upb_parser *upb_parser_new(struct upb_msgdef *msgdef) { - struct upb_cbparser *p = malloc(sizeof(struct upb_cbparser)); + upb_parser *p = malloc(sizeof(*p)); p->toplevel_msgdef = msgdef; - p->value_cb = valuecb; - p->str_cb = strcb; - p->start_cb = startcb; - p->end_cb = endcb; p->limit = &p->stack[UPB_MAX_NESTING]; return p; } -void upb_cbparser_free(struct upb_cbparser *p) +void upb_parser_free(upb_parser *p) { free(p); } -void upb_cbparser_reset(struct upb_cbparser *p, void *udata) +void upb_parser_reset(upb_parser *p, upb_sink *sink) { p->top = p->stack; p->completed_offset = 0; - p->udata = udata; + p->sink = sink; p->top->msgdef = p->toplevel_msgdef; // The top-level message is not delimited (we can keep receiving data for it // indefinitely), so we treat it like a group. p->top->end_offset = 0; } -static const void *get_msgend(struct upb_cbparser *p, const uint8_t *start) +static const void *get_msgend(upb_parser *p, const uint8_t *start) { if(p->top->end_offset > 0) return start + (p->top->end_offset - p->completed_offset); @@ -385,7 +376,7 @@ INLINE bool upb_check_type(upb_wire_type_t wt, upb_field_type_t ft) { * Pushes a new stack frame for a submessage with the given len (which will * be zero if the submessage is a group). */ -static const uint8_t *push(struct upb_cbparser *p, const uint8_t *start, +static const uint8_t *push(upb_parser *p, const uint8_t *start, uint32_t submsg_len, struct upb_fielddef *f, struct upb_status *status) { @@ -396,11 +387,11 @@ static const uint8_t *push(struct upb_cbparser *p, const uint8_t *start, UPB_MAX_NESTING); return NULL; } - struct upb_cbparser_frame *frame = p->top; + struct upb_parser_frame *frame = p->top; frame->end_offset = p->completed_offset + submsg_len; frame->msgdef = upb_downcast_msgdef(f->def); - if(p->start_cb) p->start_cb(p->udata, f); + upb_sink_onstart(p->sink, f); return get_msgend(p, start); } @@ -408,16 +399,15 @@ static const uint8_t *push(struct upb_cbparser *p, const uint8_t *start, * Pops a stack frame, returning a pointer for where the next submsg should * end (or a pointer that is out of range for a group). */ -static const void *pop(struct upb_cbparser *p, const uint8_t *start) +static const void *pop(upb_parser *p, const uint8_t *start) { - if(p->end_cb) p->end_cb(p->udata); + upb_sink_onend(p->sink); p->top--; return get_msgend(p, start); } -size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, - struct upb_status *status) +size_t upb_parser_parse(upb_parser *p, upb_strptr str, struct upb_status *status) { // buf is our current offset, moves from start to end. const uint8_t *buf = (uint8_t*)upb_string_getrobuf(str); @@ -429,12 +419,7 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, const uint8_t *submsg_end = get_msgend(p, start); struct upb_msgdef *msgdef = p->top->msgdef; - bool keep_going = true; - - // Make local copies so optimizer knows they won't change. - const upb_str_cb str_cb = p->str_cb; - const upb_value_cb value_cb = p->value_cb; - void *const udata = p->udata; + upb_sink_status sink_status = UPB_SINK_CONTINUE; // We need to check the status of operations that can fail, but we do so as // late as possible to avoid introducing branches that have to wait on @@ -443,7 +428,7 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, #define CHECK_STATUS() do { if(!upb_ok(status)) goto err; } while(0) // Main loop: executed once per tag/field pair. - while(keep_going && buf < end) { + while(sink_status == UPB_SINK_CONTINUE && buf < end) { // Parse/handle tag. struct upb_tag tag; buf = parse_tag(buf, end, &tag, status); @@ -476,8 +461,8 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, } else { if(f && upb_isstringtype(f->type)) { int32_t str_start = buf - start; - keep_going = - str_cb(udata, f, str, str_start, str_start + delim_len); + sink_status = + upb_sink_onstr(p->sink, f, str, str_start, str_start + delim_len); } // else { TODO: packed arrays } // If field was not found, it is skipped silently. buf = delim_end; // Could be >end. @@ -493,7 +478,7 @@ size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, buf = upb_parse_value(buf, end, f->type, upb_value_addrof(&val), status); CHECK_STATUS(); // Checking upb_parse_value(). - keep_going = value_cb(udata, f, val); + sink_status = upb_sink_onvalue(p->sink, f, val); } } CHECK_STATUS(); diff --git a/src/upb_parse.h b/src/upb_parse.h index 9cc997420c..9afa96ca27 100644 --- a/src/upb_parse.h +++ b/src/upb_parse.h @@ -24,106 +24,31 @@ extern "C" { #endif -/* Callback parser callbacks. *************************************************/ +/* upb_parser *****************************************************************/ -// The value callback is called when a regular value (ie. not a string or -// submessage) is encountered which was defined in the upb_msgdef. The client -// returns true to continue the parse or false to halt it. -// -// Note that this callback can be called several times in a row for a single -// call to tag_cb in the case of packed arrays. -typedef bool (*upb_value_cb)(void *udata, struct upb_fielddef *f, - union upb_value val); - -// The string callback is called when a string that was defined in the -// upb_msgdef is parsed. "str" is the protobuf data that is being parsed (NOT -// the string in question); "start" and "end" are the start and end offset of -// the string we parsed *within* str. The data is supplied this way to give -// you the opportunity to reference this data instead of copying it (perhaps -// using upb_strslice), or to minimize copying if it is unavoidable. -// -// Note that if you are parsing in a streaming fashion, start could be <0 and -// "end" could be >upb_strlen(str). -typedef bool (*upb_str_cb)(void *udata, struct upb_fielddef *f, upb_strptr str, - int32_t start, uint32_t end); - -// The start and end callbacks are called when a submessage begins and ends, -// respectively. -typedef void (*upb_start_cb)(void *udata, struct upb_fielddef *f); -typedef void (*upb_end_cb)(void *udata); +// A upb_parser parses the binary protocol buffer format, writing the data it +// parses to a upb_sink. +struct upb_parser; +typedef struct upb_parser upb_parser; -/* Callback parser interface. *************************************************/ - -// Allocates and frees a upb_cbparser, respectively. Callbacks may be NULL, -// in which case they will be skipped. -struct upb_cbparser *upb_cbparser_new(struct upb_msgdef *md, - upb_value_cb valuecb, upb_str_cb strcb, - upb_start_cb startcb, upb_end_cb endcb); -void upb_cbparser_free(struct upb_cbparser *p); +// Allocates and frees a upb_parser, respectively. +upb_parser *upb_parser_new(struct upb_msgdef *md); +void upb_parser_free(upb_parser *p); // Resets the internal state of an already-allocated parser. This puts it in a // state where it has not seen any data, and expects the next data to be from // the beginning of a new protobuf. Parsers must be reset before they can be -// used. A parser can be reset multiple times. udata will be passed as the -// first argument to callbacks. -void upb_cbparser_reset(struct upb_cbparser *p, void *udata); +// used. A parser can be reset multiple times. +void upb_parser_reset(upb_parser *p, upb_sink *sink); -// Parses up to len bytes of protobuf data out of buf, calling the appropriate -// callbacks as values are parsed. -// -// The function returns a status indicating the success of the operation. Data -// is parsed until no more data can be read from buf, or a user callback -// returns false, or an error occurs. -// -// The function returns the number of bytes consumed. Note that this can be -// greater than len in the case that a string was recognized that spans beyond -// the end of the currently provided data. +// Parses protobuf data out of str, returning how much data was parsed. The +// next call to upb_parser_parse should begin with the first byte that was +// not parsed. "status" indicates whether an error occurred. // -// The next call to upb_parse must be the first byte after buf + retval, even in -// the case that retval > len. -// -// TODO: see if we can provide the following guarantee efficiently: +// TODO: provide the following guarantee: // retval will always be >= len. */ -size_t upb_cbparser_parse(struct upb_cbparser *p, upb_strptr str, - struct upb_status *status); - -/* Pick parser interface. ************************************************/ - -// The pick parser provides a convenient interface for extracting a given set -// of fields from a protobuf. This is especially useful in the case that you -// want only a few fields from a large protobuf, because the pick parser can be -// much more efficient by aggressively skipping data and stopping when it has -// all the fields you asked for. The requested fields may be nested -// submessages of the top-level message. -// -// The selection parser currently does not yet support repeated fields -- this -// would involve either letting the user specify an index of the record they -// wanted, or repeatedly delivering values for the same field number. The -// latter would make it impossible to bail out of processing a message early, -// because there could always be more values for that field. -// -// This parser is layered on top of the callback parser. - -// Callbacks for the pick parser. The semantics are the same as for the -// callback parser, excet that field numbers are provided instead of msgdefs -// and fieldefs. -typedef void (*upb_pp_value_cb)(void *udata, int fieldnum, union upb_value val); -typedef void (*upb_pp_str_cb)(void *udata, int fieldnum, uint8_t *str, - size_t avail_len, size_t total_len); - -// The pickparser methods all have the same semantics as the cbparser, except -// that there are no start or end callbacks and the constructor needs a list -// of fields. The fields are in dotted notation, so "foo.bar" expects that the -// top-level message contains a field foo, which contains a field bar. The -// new function will return NULL if any of the field names are invalid, or are -// repeated fields. -struct upb_pickparser *upb_pickparser_new(struct upb_msgdef *msgdef, - char *fields[]); -void upb_pickparser_free(struct upb_pickparser *p); -void upb_pickparser_reset(struct upb_pickparser *p, - bool found[], union upb_value vals[]); -size_t upb_pickparser_parse(struct upb_pickparser *p, upb_strptr str, - struct upb_status *status); +size_t upb_parser_parse(upb_parser *p, upb_strptr str, + struct upb_status *status); #ifdef __cplusplus } /* extern "C" */ diff --git a/src/upb_sink.h b/src/upb_sink.h new file mode 100644 index 0000000000..17e1e1d640 --- /dev/null +++ b/src/upb_sink.h @@ -0,0 +1,117 @@ +/* + * upb - a minimalist implementation of protocol buffers. + * + * Copyright (c) 2010 Joshua Haberman. See LICENSE for details. + * + * upb_sink is a general purpose interface for pushing the contents of a + * protobuf from one component to another in a streaming fashion. We call the + * component that calls a upb_sink a "source". By "pushing" we mean that the + * source calls into the sink; the opposite (where a sink calls into the + * source) is known as "pull". In the push model the source gets the main + * loop; in a pull model the sink does. + * + * This interface is used as general-purpose glue in upb. For example, the + * parser interface works by implementing a source. Likewise the serialization + * simply implements a sink. Copying one protobuf to another is just a matter + * of using one message as a source and another as a sink. + * + * In terms of efficiency, we would generally expect "push" to be faster if the + * source had more state to track, and "pull" to be faster if the sink had more + * state. The reason is that whoever has the main loop can keep state on the + * stack (and possibly even in callee-save registers), whereas the the + * component that is "called into" always needs to reload its state from + * memory. + * + * In terms of programming complexity, it is easier and simpler to have the + * main loop, because you can store state in local variables. + * + * So the assumption inherent in using the push model is that sources are + * generally more complicated and stateful than consumers. For example, in the + * parser case, it has to deal with malformed input and associated errors; in + * comparison, the serializer deals with known-good input. + */ + +#ifndef UPB_SINK_H +#define UPB_SINK_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Each of the upb_sink callbacks returns a status of this type. +typedef enum { + // The normal case, where the consumer wants to continue consuming. + UPB_SINK_CONTINUE, + + // The consumer has consumed the current value, but it wants to stop for now. + // When the producer is next invoked, it should resume at the next value. + UPB_SINK_SUSPEND, + + // The consumer wants to skip to the end of the current submessage and + // continue consuming. If we are at the top-level, the rest of the + // data is discarded. + UPB_SINK_SKIP +} upb_sink_status; + + +typedef struct { + struct upb_sink_callbacks *vtbl; +} upb_sink; + +/* upb_sink callbacks *********************************************************/ + +// The value callback is called when a regular value (ie. not a string or +// submessage) is pushed. +typedef upb_sink_status (*upb_value_cb)(upb_sink *s, struct upb_fielddef *f, + union upb_value val); + +// The string callback is called when a string is pushed. "str" is the string +// in which the data lives, but it may contain more data than the effective +// string. "start" and "end" indicate the substring of "str" that is the +// effective string. If "start" is <0, this string is a continuation of the +// previous string for this field. If end > upb_strlen(str) then there is more +// data to follow for this string. "end" can also be used as a hint for how +// much data follows, but this is only a hint and is not guaranteed. +// +// The data is supplied this way to give you the opportunity to reference this +// data instead of copying it (perhaps using upb_strslice), or to minimize +// copying if it is unavoidable. +typedef upb_sink_status (*upb_str_cb)(upb_sink *s, struct upb_fielddef *f, + upb_strptr str, + int32_t start, uint32_t end); + +// The start and end callbacks are called when a submessage begins and ends, +// respectively. +typedef upb_sink_status (*upb_start_cb)(upb_sink *s, struct upb_fielddef *f); +typedef upb_sink_status (*upb_end_cb)(upb_sink *s); + + +/* upb_sink implementation *************************************************/ + +typedef struct upb_sink_callbacks { + upb_value_cb value_cb; + upb_str_cb str_cb; + upb_start_cb start_cb; + upb_end_cb end_cb; +} upb_sink_callbacks; + +// We could potentially define these later to also be capable of calling a C++ +// virtual method instead of doing the virtual dispatch manually. This would +// make it possible to write C++ sinks in a more natural style. We could have +// a flag in upb_sink defining whether it is a C sink or a C++ one. +#define upb_sink_onvalue(s, f, val) s->vtbl->value_cb(s, f, val) +#define upb_sink_onstr(s, f, str, start, end) s->vtbl->str_cb(s, f, str, start, end) +#define upb_sink_onstart(s, f) s->vtbl->start_cb(s, f) +#define upb_sink_onend(s) s->vtbl->end_cb(s) + +// Initializes a plain C visitor with the given vtbl. The visitor must have +// been allocated separately. +INLINE void upb_sink_init(upb_sink *s, upb_sink_callbacks *vtbl) { + s->vtbl = vtbl; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif