|
|
|
@ -4,29 +4,15 @@ |
|
|
|
|
* Copyright (c) 2010-2011 Google Inc. See LICENSE for details. |
|
|
|
|
* Author: Josh Haberman <jhaberman@gmail.com> |
|
|
|
|
* |
|
|
|
|
* This file defines general-purpose streaming data interfaces: |
|
|
|
|
* |
|
|
|
|
* - upb_handlers: represents a set of callbacks, very much like in XML's SAX |
|
|
|
|
* API, that a client can register to do a streaming tree traversal over a |
|
|
|
|
* stream of structured protobuf data, without knowing where that data is |
|
|
|
|
* coming from. |
|
|
|
|
* |
|
|
|
|
* - upb_bytesrc: a pull interface for streams of bytes, basically an |
|
|
|
|
* abstraction of read()/fread(), but it avoids copies where possible. |
|
|
|
|
* |
|
|
|
|
* - upb_bytesink: push interface for streams of bytes, basically an |
|
|
|
|
* abstraction of write()/fwrite(), but it avoids copies where possible. |
|
|
|
|
* |
|
|
|
|
* All of the encoders and decoders are based on these generic interfaces, |
|
|
|
|
* which lets you write streaming algorithms that do not depend on a specific |
|
|
|
|
* serialization format; for example, you can write a pretty printer that works |
|
|
|
|
* with input that came from protobuf binary format, protobuf text format, or |
|
|
|
|
* even an in-memory upb_msg -- the pretty printer will not know the |
|
|
|
|
* difference. |
|
|
|
|
* upb_handlers is a generic visitor-like interface for iterating over a stream |
|
|
|
|
* of protobuf data. You can register function pointers that will be called |
|
|
|
|
* for each message and/or field as the data is being parsed or iterated over, |
|
|
|
|
* without having to know the source format that we are parsing from. This |
|
|
|
|
* decouples the parsing logic from the processing logic. |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
#ifndef UPB_STREAM_H |
|
|
|
|
#define UPB_STREAM_H |
|
|
|
|
#ifndef UPB_HANDLERS_H |
|
|
|
|
#define UPB_HANDLERS_H |
|
|
|
|
|
|
|
|
|
#include <limits.h> |
|
|
|
|
#include "upb.h" |
|
|
|
@ -38,11 +24,50 @@ extern "C" { |
|
|
|
|
|
|
|
|
|
/* upb_handlers ***************************************************************/ |
|
|
|
|
|
|
|
|
|
// A upb_handlers object is a table of callbacks that are bound to specific
|
|
|
|
|
// messages and fields. A consumer of data registers callbacks and then
|
|
|
|
|
// passes the upb_handlers object to the producer, which calls them at the
|
|
|
|
|
// appropriate times.
|
|
|
|
|
|
|
|
|
|
// A upb_handlers object represents a graph of handlers. Each message can have
|
|
|
|
|
// a set of handlers as well as a set of fields which themselves have handlers.
|
|
|
|
|
// Fields that represent submessages or groups are linked to other message
|
|
|
|
|
// handlers, so the overall set of handlers can form a graph structure (which
|
|
|
|
|
// may be cyclic).
|
|
|
|
|
//
|
|
|
|
|
// The upb_mhandlers (message handlers) object can have the following handlers:
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t startmsg(void *closure) {
|
|
|
|
|
// // Called when the message begins. "closure" was supplied by our caller.
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static void endmsg(void *closure, upb_status *status) {
|
|
|
|
|
// // Called when processing of this message ends, whether in success or
|
|
|
|
|
// // failure. "status" indicates the final status of processing, and can
|
|
|
|
|
// / also be modified in-place to update the final status.
|
|
|
|
|
// //
|
|
|
|
|
// // Since this callback is guaranteed to always be called eventually, it
|
|
|
|
|
// // can be used to free any resources that were allocated during processing.
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// TODO: unknown field handler.
|
|
|
|
|
//
|
|
|
|
|
// The upb_fhandlers (field handlers) object can have the following handlers:
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
|
|
|
|
|
// // Called when the field's value is encountered. "fval" contains
|
|
|
|
|
// // whatever value was bound to this field at registration type
|
|
|
|
|
// // (for upb_register_all(), this will be the field's upb_fielddef*).
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_sflow_t startsubmsg(void *closure, upb_value fval) {
|
|
|
|
|
// // Called when a submessage begins. The second element of the return
|
|
|
|
|
// // value is the closure for the submessage.
|
|
|
|
|
// return UPB_CONTINUE_WITH(closure);
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t endsubmsg(void *closure, upb_value fval) {
|
|
|
|
|
// // Called when a submessage ends.
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// All handlers except the endmsg handler return a value from this enum, to
|
|
|
|
|
// control whether parsing will continue or not.
|
|
|
|
|
typedef enum { |
|
|
|
@ -51,10 +76,7 @@ typedef enum { |
|
|
|
|
|
|
|
|
|
// Halt processing permanently (in a non-resumable way). The endmsg handlers
|
|
|
|
|
// for any currently open messages will be called which can supply a more
|
|
|
|
|
// specific status message. If UPB_BREAK is returned from inside a delegated
|
|
|
|
|
// message, processing will continue normally in the containing message (though
|
|
|
|
|
// the containing message can inspect the returned status and choose to also
|
|
|
|
|
// return UPB_BREAK if it is not ok).
|
|
|
|
|
// specific status message.
|
|
|
|
|
UPB_BREAK, |
|
|
|
|
|
|
|
|
|
// Skips to the end of the current submessage (or if we are at the top
|
|
|
|
@ -70,24 +92,22 @@ typedef enum { |
|
|
|
|
// TODO: Add UPB_SUSPEND, for resumable producers/consumers.
|
|
|
|
|
} upb_flow_t; |
|
|
|
|
|
|
|
|
|
// Typedefs for all of the handler functions defined above.
|
|
|
|
|
typedef struct _upb_sflow upb_sflow_t; |
|
|
|
|
typedef upb_flow_t (*upb_startmsg_handler_t)(void *closure); |
|
|
|
|
typedef void (*upb_endmsg_handler_t)(void *closure, upb_status *status); |
|
|
|
|
typedef upb_flow_t (*upb_value_handler_t)( |
|
|
|
|
void *closure, upb_value fval, upb_value val); |
|
|
|
|
typedef upb_sflow_t (*upb_startsubmsg_handler_t)( |
|
|
|
|
void *closure, upb_value fval); |
|
|
|
|
typedef upb_flow_t (*upb_endsubmsg_handler_t)(void *closure, upb_value fval); |
|
|
|
|
typedef upb_flow_t (*upb_unknownval_handler_t)( |
|
|
|
|
void *closure, upb_field_number_t fieldnum, upb_value val); |
|
|
|
|
|
|
|
|
|
typedef upb_flow_t (upb_startmsg_handler)(void *c); |
|
|
|
|
typedef void (upb_endmsg_handler)(void *c, upb_status *status); |
|
|
|
|
typedef upb_flow_t (upb_value_handler)(void *c, upb_value fval, upb_value val); |
|
|
|
|
typedef upb_sflow_t (upb_startsubmsg_handler)(void *closure, upb_value fval); |
|
|
|
|
typedef upb_flow_t (upb_endsubmsg_handler)(void *closure, upb_value fval); |
|
|
|
|
|
|
|
|
|
// No-op implementations of all of the above handlers. Use these instead of
|
|
|
|
|
// rolling your own -- the JIT can recognize these and optimize away the call.
|
|
|
|
|
upb_flow_t upb_startmsg_nop(void *closure); |
|
|
|
|
void upb_endmsg_nop(void *closure, upb_status *status); |
|
|
|
|
upb_flow_t upb_value_nop(void *closure, upb_value fval, upb_value val); |
|
|
|
|
upb_sflow_t upb_startsubmsg_nop(void *closure, upb_value fval); |
|
|
|
|
upb_flow_t upb_endsubmsg_nop(void *closure, upb_value fval); |
|
|
|
|
upb_flow_t upb_unknownval_nop(void *closure, upb_field_number_t fieldnum, |
|
|
|
|
upb_value val); |
|
|
|
|
|
|
|
|
|
struct _upb_decoder; |
|
|
|
|
typedef struct _upb_fieldent { |
|
|
|
|
bool junk; |
|
|
|
@ -100,21 +120,20 @@ typedef struct _upb_fieldent { |
|
|
|
|
int32_t msgent_index; |
|
|
|
|
upb_value fval; |
|
|
|
|
union { |
|
|
|
|
upb_value_handler_t value; |
|
|
|
|
upb_startsubmsg_handler_t startsubmsg; |
|
|
|
|
upb_value_handler *value; |
|
|
|
|
upb_startsubmsg_handler *startsubmsg; |
|
|
|
|
} cb; |
|
|
|
|
upb_endsubmsg_handler_t endsubmsg; |
|
|
|
|
upb_endsubmsg_handler *endsubmsg; |
|
|
|
|
uint32_t jit_pclabel; |
|
|
|
|
uint32_t jit_pclabel_notypecheck; |
|
|
|
|
uint32_t jit_submsg_done_pclabel; |
|
|
|
|
void (*decode)(struct _upb_decoder *d, struct _upb_fieldent *f); |
|
|
|
|
} upb_fieldent; |
|
|
|
|
} upb_fhandlers; |
|
|
|
|
|
|
|
|
|
typedef struct _upb_msgent { |
|
|
|
|
upb_startmsg_handler_t startmsg; |
|
|
|
|
upb_endmsg_handler_t endmsg; |
|
|
|
|
upb_unknownval_handler_t unknownval; |
|
|
|
|
// Maps field number -> upb_fieldent.
|
|
|
|
|
upb_startmsg_handler *startmsg; |
|
|
|
|
upb_endmsg_handler *endmsg; |
|
|
|
|
// Maps field number -> upb_fhandlers.
|
|
|
|
|
upb_inttable fieldtab; |
|
|
|
|
uint32_t jit_startmsg_pclabel; |
|
|
|
|
uint32_t jit_endofbuf_pclabel; |
|
|
|
@ -126,7 +145,7 @@ typedef struct _upb_msgent { |
|
|
|
|
// Currently keyed on field number. Could also try keying it
|
|
|
|
|
// on encoded or decoded tag, or on encoded field number.
|
|
|
|
|
void **tablearray; |
|
|
|
|
} upb_msgent; |
|
|
|
|
} upb_mhandlers; |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
upb_msgdef *msgdef; |
|
|
|
@ -135,10 +154,10 @@ typedef struct { |
|
|
|
|
|
|
|
|
|
struct _upb_handlers { |
|
|
|
|
// Array of msgdefs, [0]=toplevel.
|
|
|
|
|
upb_msgent *msgs; |
|
|
|
|
upb_mhandlers *msgs; |
|
|
|
|
int msgs_len, msgs_size; |
|
|
|
|
upb_msgdef *toplevel_msgdef; // We own a ref.
|
|
|
|
|
upb_msgent *msgent; |
|
|
|
|
upb_mhandlers *msgent; |
|
|
|
|
upb_handlers_frame stack[UPB_MAX_TYPE_DEPTH], *top, *limit; |
|
|
|
|
bool should_jit; |
|
|
|
|
}; |
|
|
|
@ -161,69 +180,19 @@ INLINE upb_sflow_t UPB_SFLOW(upb_flow_t flow, void *closure) { |
|
|
|
|
#define UPB_CONTINUE_WITH(c) UPB_SFLOW(UPB_CONTINUE, c) |
|
|
|
|
#define UPB_S_BREAK UPB_SFLOW(UPB_BREAK, NULL) |
|
|
|
|
|
|
|
|
|
// Each message can have its own set of handlers. Here are empty definitions
|
|
|
|
|
// of the handlers for convenient copy/paste.
|
|
|
|
|
// TODO: Should endsubmsg get a copy of the upb_status*, so it can decide what
|
|
|
|
|
// to do in the case of a delegated failure?
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t startmsg(void *closure) {
|
|
|
|
|
// // Called when the message begins. "closure" was supplied by our caller.
|
|
|
|
|
// // "mval" is whatever was bound to this message at registration time (for
|
|
|
|
|
// // upb_register_all() it will be its upb_msgdef*).
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static void endmsg(void *closure, upb_status *status) {
|
|
|
|
|
// // Called when processing of this top-level message ends, whether in
|
|
|
|
|
// // success or failure. "status" indicates the final status of processing,
|
|
|
|
|
// // and can also be modified in-place to update the final status.
|
|
|
|
|
// //
|
|
|
|
|
// // Since this callback is guaranteed to always be called eventually, it
|
|
|
|
|
// // can be used to free any resources that were allocated during processing.
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t value(void *closure, upb_value fval, upb_value val) {
|
|
|
|
|
// // Called for every non-submessage value in the stream. "fval" contains
|
|
|
|
|
// // whatever value was bound to this field at registration type
|
|
|
|
|
// // (for upb_register_all(), this will be the field's upb_fielddef*).
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_sflow_t startsubmsg(void *closure, upb_value fval) {
|
|
|
|
|
// // Called when a submessage begins. The second element of the return
|
|
|
|
|
// // value is the closure for the submessage.
|
|
|
|
|
// return UPB_CONTINUE_WITH(closure);
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t endsubmsg(void *closure, upb_value fval) {
|
|
|
|
|
// // Called when a submessage ends.
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// static upb_flow_t unknownval(void *closure, upb_field_number_t fieldnum,
|
|
|
|
|
// upb_value val) {
|
|
|
|
|
// // Called with an unknown value is encountered.
|
|
|
|
|
// return UPB_CONTINUE;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// Functions to register the above handlers.
|
|
|
|
|
// TODO: as an optimization, we could special-case handlers that don't
|
|
|
|
|
// need fval, to avoid even generating the code that sets the argument.
|
|
|
|
|
// If a value does not have a handler registered and there is no unknownval
|
|
|
|
|
// handler, the value will be skipped.
|
|
|
|
|
void upb_register_startend(upb_handlers *h, upb_startmsg_handler_t startmsg, |
|
|
|
|
upb_endmsg_handler_t endmsg); |
|
|
|
|
void upb_register_startend(upb_handlers *h, upb_startmsg_handler *startmsg, |
|
|
|
|
upb_endmsg_handler *endmsg); |
|
|
|
|
void upb_register_value(upb_handlers *h, upb_fielddef *f, |
|
|
|
|
upb_value_handler_t value, upb_value fval); |
|
|
|
|
void upb_register_unknownval(upb_handlers *h, upb_unknownval_handler_t unknown); |
|
|
|
|
upb_value_handler *value, upb_value fval); |
|
|
|
|
|
|
|
|
|
// To register handlers for a submessage, push the fielddef and pop it
|
|
|
|
|
// when you're done. This can be used to delegate a submessage to a
|
|
|
|
|
// different processing component which does not need to be aware whether
|
|
|
|
|
// it is at the top level or not.
|
|
|
|
|
void upb_handlers_push(upb_handlers *h, upb_fielddef *f, |
|
|
|
|
upb_startsubmsg_handler_t start, |
|
|
|
|
upb_endsubmsg_handler_t end, upb_value fval, |
|
|
|
|
upb_startsubmsg_handler *start, |
|
|
|
|
upb_endsubmsg_handler *end, upb_value fval, |
|
|
|
|
bool delegate); |
|
|
|
|
void upb_handlers_pop(upb_handlers *h, upb_fielddef *f); |
|
|
|
|
|
|
|
|
@ -245,17 +214,16 @@ bool upb_handlers_link(upb_handlers *h, upb_fielddef *f); |
|
|
|
|
// later if desired.
|
|
|
|
|
// TODO: upb_register_path_submsg()?
|
|
|
|
|
void upb_register_path_value(upb_handlers *h, const char *path, |
|
|
|
|
upb_value_handler_t value, upb_value fval); |
|
|
|
|
upb_value_handler *value, upb_value fval); |
|
|
|
|
|
|
|
|
|
// Convenience function for registering a single set of handlers on every
|
|
|
|
|
// message in our hierarchy. mvals are bound to upb_msgdef* and fvals are
|
|
|
|
|
// bound to upb_fielddef*. Any of the handlers can be NULL.
|
|
|
|
|
void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, |
|
|
|
|
upb_endmsg_handler_t end, |
|
|
|
|
upb_value_handler_t value, |
|
|
|
|
upb_startsubmsg_handler_t startsubmsg, |
|
|
|
|
upb_endsubmsg_handler_t endsubmsg, |
|
|
|
|
upb_unknownval_handler_t unknown); |
|
|
|
|
void upb_register_all(upb_handlers *h, upb_startmsg_handler *start, |
|
|
|
|
upb_endmsg_handler *end, |
|
|
|
|
upb_value_handler *value, |
|
|
|
|
upb_startsubmsg_handler *startsubmsg, |
|
|
|
|
upb_endsubmsg_handler *endsubmsg); |
|
|
|
|
|
|
|
|
|
// TODO: for clients that want to increase efficiency by preventing bytesrcs
|
|
|
|
|
// from automatically being converted to strings in the value callback.
|
|
|
|
@ -264,11 +232,11 @@ void upb_register_all(upb_handlers *h, upb_startmsg_handler_t start, |
|
|
|
|
// Low-level functions -- internal-only.
|
|
|
|
|
void upb_register_typed_value(upb_handlers *h, upb_field_number_t fieldnum, |
|
|
|
|
upb_fieldtype_t type, bool repeated, |
|
|
|
|
upb_value_handler_t value, upb_value fval); |
|
|
|
|
upb_value_handler *value, upb_value fval); |
|
|
|
|
void upb_register_typed_submsg(upb_handlers *h, upb_field_number_t fieldnum, |
|
|
|
|
upb_fieldtype_t type, bool repeated, |
|
|
|
|
upb_startsubmsg_handler_t start, |
|
|
|
|
upb_endsubmsg_handler_t end, |
|
|
|
|
upb_startsubmsg_handler *start, |
|
|
|
|
upb_endsubmsg_handler *end, |
|
|
|
|
upb_value fval); |
|
|
|
|
void upb_handlers_typed_link(upb_handlers *h, upb_field_number_t fieldnum, |
|
|
|
|
upb_fieldtype_t type, bool repeated, int frames); |
|
|
|
@ -276,11 +244,11 @@ void upb_handlers_typed_push(upb_handlers *h, upb_field_number_t fieldnum, |
|
|
|
|
upb_fieldtype_t type, bool repeated); |
|
|
|
|
void upb_handlers_typed_pop(upb_handlers *h); |
|
|
|
|
|
|
|
|
|
INLINE upb_msgent *upb_handlers_getmsgent(upb_handlers *h, upb_fieldent *f) { |
|
|
|
|
INLINE upb_mhandlers *upb_handlers_getmsgent(upb_handlers *h, upb_fhandlers *f) { |
|
|
|
|
assert(f->msgent_index != -1); |
|
|
|
|
return &h->msgs[f->msgent_index]; |
|
|
|
|
} |
|
|
|
|
upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); |
|
|
|
|
upb_fhandlers *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number_t fieldnum); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* upb_dispatcher *************************************************************/ |
|
|
|
@ -301,7 +269,7 @@ upb_fieldent *upb_handlers_lookup(upb_inttable *dispatch_table, upb_field_number |
|
|
|
|
// consumed, like if this is a submessage of a larger stream.
|
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
|
upb_fieldent *f; |
|
|
|
|
upb_fhandlers *f; |
|
|
|
|
void *closure; |
|
|
|
|
// Relative to the beginning of this buffer.
|
|
|
|
|
// For groups and the top-level: UINT32_MAX.
|
|
|
|
@ -315,7 +283,7 @@ typedef struct { |
|
|
|
|
upb_handlers *handlers; |
|
|
|
|
|
|
|
|
|
// Msg and dispatch table for the current level.
|
|
|
|
|
upb_msgent *msgent; |
|
|
|
|
upb_mhandlers *msgent; |
|
|
|
|
upb_inttable *dispatch_table; |
|
|
|
|
|
|
|
|
|
// The number of startsubmsg calls without a corresponding endsubmsg call.
|
|
|
|
@ -354,20 +322,20 @@ upb_flow_t upb_dispatch_startmsg(upb_dispatcher *d); |
|
|
|
|
void upb_dispatch_endmsg(upb_dispatcher *d, upb_status *status); |
|
|
|
|
|
|
|
|
|
// Looks up a field by number for the current message.
|
|
|
|
|
INLINE upb_fieldent *upb_dispatcher_lookup(upb_dispatcher *d, |
|
|
|
|
INLINE upb_fhandlers *upb_dispatcher_lookup(upb_dispatcher *d, |
|
|
|
|
upb_field_number_t n) { |
|
|
|
|
return (upb_fieldent*)upb_inttable_fastlookup( |
|
|
|
|
d->dispatch_table, n, sizeof(upb_fieldent)); |
|
|
|
|
return (upb_fhandlers*)upb_inttable_fastlookup( |
|
|
|
|
d->dispatch_table, n, sizeof(upb_fhandlers)); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Dispatches values or submessages -- the client is responsible for having
|
|
|
|
|
// previously looked up the field.
|
|
|
|
|
upb_flow_t upb_dispatch_startsubmsg(upb_dispatcher *d, |
|
|
|
|
upb_fieldent *f, |
|
|
|
|
upb_fhandlers *f, |
|
|
|
|
size_t userval); |
|
|
|
|
upb_flow_t upb_dispatch_endsubmsg(upb_dispatcher *d); |
|
|
|
|
|
|
|
|
|
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fieldent *f, |
|
|
|
|
INLINE upb_flow_t upb_dispatch_value(upb_dispatcher *d, upb_fhandlers *f, |
|
|
|
|
upb_value val) { |
|
|
|
|
if (upb_dispatcher_skipping(d)) return UPB_SKIPSUBMSG; |
|
|
|
|
upb_flow_t flow = f->cb.value(d->top->closure, f->fval, val); |
|
|
|
|