Header cleanup, clarify/correct comments for interfaces.

pull/13171/head
Joshua Haberman 13 years ago
parent d1d126acf4
commit 4a8b9be46c
  1. 2
      Makefile
  2. 2
      benchmarks/parsestream.upb.c
  3. 2
      benchmarks/parsetoproto2.upb.cc
  4. 2
      benchmarks/parsetostruct.upb.c
  5. 2
      tests/test_decoder.c
  6. 2
      tests/tests.c
  7. 4
      tools/upbc.c
  8. 2
      upb/atomic.h
  9. 68
      upb/bytestream.h
  10. 9
      upb/def.c
  11. 63
      upb/def.h
  12. 6
      upb/descriptor.c
  13. 8
      upb/descriptor.h
  14. 9
      upb/pb/decoder.c
  15. 28
      upb/pb/decoder.h
  16. 7
      upb/pb/glue.c
  17. 8
      upb/table.h

@ -179,6 +179,8 @@ descriptorgen: upb/descriptor.pb tools/upbc
./tools/upbc -o upb/descriptor upb/descriptor.pb
tools/upbc: tools/upbc.c $(LIBUPB)
$(E) CC $<
$(Q) $(CC) $(CFLAGS) $(CPPFLAGS) $(DEF_OPT) -o $@ $< $(LIBUPB)
# Tests. #######################################################################

@ -57,7 +57,7 @@ static bool initialize()
// Cause all messages to be read, but do nothing when they are.
upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL};
upb_handlers_reghandlerset(handlers, def, &hset);
upb_decoder_initforhandlers(&decoder, handlers);
upb_decoder_init(&decoder, handlers);
upb_handlers_unref(handlers);
upb_stringsrc_init(&stringsrc);
return true;

@ -279,7 +279,7 @@ static bool initialize()
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
upb_decoder_initforhandlers(&d, h);
upb_decoder_init(&d, h);
upb_handlers_unref(h);
return true;

@ -47,7 +47,7 @@ static bool initialize()
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, def);
if (!JIT) h->should_jit = false;
upb_decoder_initforhandlers(&d, h);
upb_decoder_init(&d, h);
upb_handlers_unref(h);
if (!BYREF) {

@ -51,7 +51,7 @@ int main(int argc, char *argv[]) {
upb_textprinter_reghandlers(handlers, m);
upb_decoder d;
upb_decoder_initforhandlers(&d, handlers);
upb_decoder_init(&d, handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(&in), 0, UPB_NONDELIMITED, p);
upb_status_clear(&status);

@ -41,7 +41,7 @@ static void test_upb_jit() {
upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
upb_handlers_reghandlerset(h, upb_downcast_msgdef(def), &hset);
upb_decoder d;
upb_decoder_initforhandlers(&d, h);
upb_decoder_init(&d, h);
upb_decoder_uninit(&d);
upb_symtab_unref(s);
upb_def_unref(def);

@ -169,8 +169,8 @@ int main(int argc, char *argv[]) {
upb_status status = UPB_STATUS_INIT;
upb_read_descriptor(s, descriptor, len, &status);
if(!upb_ok(&status)) {
upb_status_print(&status, stderr);
error("Failed to parse input file descriptor\n");
error("Failed to parse input file descriptor: %s\n",
upb_status_getstr(&status));
}
upb_status_uninit(&status);

@ -7,7 +7,7 @@
* Only a very small part of upb is thread-safe. Notably, individual
* messages, arrays, and strings are *not* thread safe for mutating.
* However, we do make message *metadata* such as upb_msgdef and
* upb_context thread-safe, and their ownership is tracked via atomic
* upb_symtab thread-safe, and their ownership is tracked via atomic
* refcounting. This header implements the small number of atomic
* primitives required to support this. The primitives we implement
* are:

@ -10,6 +10,7 @@
* can get the data from a fd, a string, a cord, etc.
*
* Byte streams are NOT thread-safe! (Like f{read,write}_unlocked())
* This may change (in particular, bytesrc objects may be better thread-safe).
*/
#ifndef UPB_BYTESTREAM_H
@ -28,7 +29,12 @@ extern "C" {
/* upb_bytesrc ****************************************************************/
// A upb_bytesrc allows the consumer of a stream of bytes to obtain buffers as
// they become available, and to preserve some trailing amount of data.
// they become available, and to preserve some trailing amount of data, which
// is useful for lazy parsing (among other things). If there is a submessage
// that we want to parse later we can take a reference on that region of the
// input buffer. This will guarantee that the bytesrc keeps the submessage
// data around for later use, without requiring a copy out of the input
// buffers.
typedef size_t upb_bytesrc_fetch_func(void*, uint64_t, upb_status*);
typedef void upb_bytesrc_read_func(void*, uint64_t, size_t, char*);
typedef const char *upb_bytesrc_getptr_func(void*, uint64_t, size_t*);
@ -53,10 +59,15 @@ INLINE void upb_bytesrc_init(upb_bytesrc *src, upb_bytesrc_vtbl *vtbl) {
}
// Fetches at least one byte starting at ofs, returning the actual number of
// bytes fetched (or 0 on error: see "s" for details). Gives caller a ref on
// the fetched region. It is safe to re-fetch existing regions but only if
// they are ref'd. "ofs" may not greater than the end of the region that was
// previously fetched.
// bytes fetched (or 0 on error: see "s" for details). A successful return
// gives caller a ref on the fetched region.
//
// If "ofs" may be greater or equal than the end of the already-fetched region.
// It may also be less than the end of the already-fetch region *if* either of
// the following is true:
//
// * the region is ref'd (this implies that the data is still in-memory)
// * the bytesrc is seekable (this implies that the data can be fetched again).
INLINE size_t upb_bytesrc_fetch(upb_bytesrc *src, uint64_t ofs, upb_status *s) {
return src->vtbl->fetch(src, ofs, s);
}
@ -68,20 +79,21 @@ INLINE void upb_bytesrc_read(upb_bytesrc *src, uint64_t src_ofs, size_t len,
src->vtbl->read(src, src_ofs, len, dst);
}
// Returns a pointer to the bytesrc's internal buffer, returning how much data
// was actually returned (which may be less than "len" if the given region is
// not contiguous). The caller must own refs on the entire region from [ofs,
// ofs+len]. The returned buffer is valid for as long as the region remains
// ref'd.
// Returns a pointer to the bytesrc's internal buffer, storing in *len how much
// data is available. The caller must own refs on the given region. The
// returned buffer is valid for as long as the region remains ref'd.
//
// TODO: is "len" really required here?
// TODO: if more data is available than the caller has ref'd is it ok for the
// caller to read *len bytes?
INLINE const char *upb_bytesrc_getptr(upb_bytesrc *src, uint64_t ofs,
size_t *len) {
return src->vtbl->getptr(src, ofs, len);
}
// Gives the caller a ref on the given region. The caller must know that the
// given region is already ref'd.
// given region is already ref'd (for example, inside a upb_handlers callback
// that receives a upb_strref, the region is guaranteed to be ref'd -- this
// function allows that handler to take its own ref).
INLINE void upb_bytesrc_refregion(upb_bytesrc *src, uint64_t ofs, size_t len) {
src->vtbl->refregion(src, ofs, len);
}
@ -110,16 +122,17 @@ INLINE void upb_bytesrc_unref(upb_bytesrc *src) {
src->vtbl->unref(src);
}
/* upb_strref *****************************************************************/
// The structure we pass for a string.
// The structure we pass to upb_handlers for a string value.
typedef struct _upb_strref {
// Pointer to the string data. NULL if the string spans multiple input
// buffers (in which case upb_bytesrc_getptr() must be called to obtain
// the actual pointers).
const char *ptr;
// Length of the string.
// Total length of the string.
uint32_t len;
// Offset in the bytesrc that represents the beginning of this string.
@ -139,12 +152,21 @@ typedef struct _upb_strref {
char *upb_strref_dup(struct _upb_strref *r);
INLINE void upb_strref_read(struct _upb_strref *r, char *buf) {
upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf);
if (r->ptr) {
memcpy(buf, r->ptr, r->len);
} else {
assert(r->bytesrc);
upb_bytesrc_read(r->bytesrc, r->stream_offset, r->len, buf);
}
}
/* upb_bytesink ***************************************************************/
// A bytesink is an interface that allows the caller to push byte-wise data.
// It is very simple -- the only special capability is the ability to "rewind"
// the stream, which is really only a mechanism of having the bytesink ignore
// some subsequent calls.
typedef int upb_bytesink_write_func(void*, const void*, int);
typedef int upb_bytesink_vprintf_func(void*, const char *fmt, va_list args);
@ -194,6 +216,22 @@ INLINE uint64_t upb_bytesink_getoffset(upb_bytesink *sink) {
return sink->offset;
}
// Rewinds the stream to the given offset. This cannot actually "unput" any
// data, it is for situations like:
//
// // If false is returned (because of error), call again later to resume.
// bool write_some_data(upb_bytesink *sink, int indent) {
// uint64_t start_offset = upb_bytesink_getoffset(sink);
// if (upb_bytesink_writestr(sink, "Some data") < 0) goto err;
// if (upb_bytesink_putrepeated(sink, ' ', indent) < 0) goto err;
// return true;
// err:
// upb_bytesink_rewind(sink, start_offset);
// return false;
// }
//
// The subsequent bytesink writes *must* be identical to the writes that were
// rewinded past.
INLINE void upb_bytesink_rewind(upb_bytesink *sink, uint64_t offset) {
// TODO
(void)sink;

@ -523,6 +523,13 @@ upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter) {
/* upb_symtab *****************************************************************/
struct _upb_symtab {
upb_atomic_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_strtable symtab; // The symbol table.
upb_deflist olddefs;
};
typedef struct {
upb_def *def;
} upb_symtab_ent;
@ -568,6 +575,8 @@ static void upb_symtab_free(upb_symtab *s) {
free(s);
}
void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
void upb_symtab_unref(upb_symtab *s) {
if(s && upb_atomic_unref(&s->refcount)) {
upb_symtab_free(s);

@ -123,12 +123,12 @@ INLINE const char *upb_fielddef_typename(upb_fielddef *f) {
return f->def ? f->def->fqname : NULL;
}
// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
// a fielddef where !upb_hassubdef(f)).
// The enum or submessage def for this field, if any. Only meaningful for
// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true).
// Since defs are not linked together until they are in a symtab, this
// will return NULL until the msgdef is in a symtab.
upb_def *upb_fielddef_subdef(upb_fielddef *f);
// NULL until the fielddef has been added to a msgdef.
// Write accessors. "Number" and "name" must be set before the fielddef is
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
@ -214,11 +214,12 @@ void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);
// Adds a fielddef to a msgdef. Caller retains its ref on the fielddef.
// May only be done before the msgdef is in a symtab. The fielddef's name and
// number must be set, and the message may not already contain any field with
// this name or number, and this fielddef may not be part of another message,
// otherwise false is returned and no action is performed.
// Adds a fielddef to a msgdef. Caller retains its ref on the fielddef. May
// only be done before the msgdef is in a symtab (requires upb_def_ismutable(m)
// for the msgdef). The fielddef's name and number must be set, and the
// message may not already contain any field with this name or number, and this
// fielddef may not be part of another message, otherwise false is returned and
// no action is performed.
bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef **f, int n);
INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
return upb_msgdef_addfields(m, &f, 1);
@ -252,6 +253,7 @@ INLINE int upb_msgdef_numfields(upb_msgdef *m) {
}
// Iteration over fields. The order is undefined.
// TODO: the iteration should be in field order.
// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
@ -296,7 +298,7 @@ upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
// May only be set before the enumdef is in a symtab.
// May only be set if upb_def_ismutable(e).
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
@ -334,30 +336,12 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
/* upb_symtab *****************************************************************/
// A SymbolTable is where upb_defs live. It is empty when first constructed.
// Clients add definitions to the symtab (or replace existing definitions) by
// using a upb_symtab_commit() or calling upb_symtab_add().
// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
typedef struct {
upb_def **defs;
uint32_t len;
uint32_t size;
} upb_deflist;
void upb_deflist_init(upb_deflist *l);
void upb_deflist_uninit(upb_deflist *l);
void upb_deflist_push(upb_deflist *l, upb_def *d);
struct _upb_symtab {
upb_atomic_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_strtable symtab; // The symbol table.
upb_deflist olddefs;
};
// A symtab (symbol table) is where upb_defs live. It is empty when first
// constructed. Clients add definitions to the symtab (or replace existing
// definitions) by calling upb_symtab_add().
upb_symtab *upb_symtab_new(void);
INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
void upb_symtab_ref(upb_symtab *s);
void upb_symtab_unref(upb_symtab *s);
// Resolves the given symbol using the rules described in descriptor.proto,
@ -427,6 +411,21 @@ UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
/* upb_deflist ****************************************************************/
// upb_deflist is an internal-only dynamic array for storing a growing list of
// upb_defs.
typedef struct {
upb_def **defs;
uint32_t len;
uint32_t size;
} upb_deflist;
void upb_deflist_init(upb_deflist *l);
void upb_deflist_uninit(upb_deflist *l);
void upb_deflist_push(upb_deflist *l, upb_def *d);
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -29,10 +29,6 @@ static char *upb_join(char *base, char *name) {
/* upb_descreader ************************************************************/
// A upb_descreader builds a list of defs by handling a parse of a protobuf in
// the format defined in descriptor.proto. The output of a upb_descreader is
// a upb_symtabtxn.
static upb_def *upb_deflist_last(upb_deflist *l) {
return l->defs[l->len-1];
}
@ -480,8 +476,6 @@ static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_status_seterrliteral(status, "Encountered message with no name.");
return;
}
upb_msgdef_layout(m);
upb_descreader_endcontainer(r);
}

@ -21,8 +21,6 @@ extern "C" {
/* upb_descreader ************************************************************/
// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
@ -50,13 +48,11 @@ typedef struct {
upb_fielddef *f;
} upb_descreader;
// Creates a new descriptor builder that will add defs to the given txn.
void upb_descreader_init(upb_descreader *r);
void upb_descreader_uninit(upb_descreader *r);
// Registers handlers that will load descriptor data into a symtabtxn.
// Pass the descreader as the closure. The messages will have
// upb_msgdef_layout() called on them before adding to the txn.
// Registers handlers that will build the defs. Pass the descreader as the
// closure.
upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
// Gets the array of defs that have been parsed and removes them from the

@ -400,7 +400,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
#endif
}
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
@ -423,13 +423,6 @@ void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
}
}
void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, m);
upb_decoder_initforhandlers(d, h);
upb_handlers_unref(h);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, uint64_t start_ofs,
uint64_t end_ofs, void *closure) {
upb_dispatcher_frame *f = upb_dispatcher_reset(&d->dispatcher, closure);

@ -5,13 +5,8 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*
* upb_decoder implements a high performance, streaming decoder for protobuf
* data that works by implementing upb_src and getting its data from a
* upb_bytesrc.
*
* The decoder does not currently support non-blocking I/O, in the sense that
* if the bytesrc returns UPB_STATUS_TRYAGAIN it is not possible to resume the
* decoder when data becomes available again. Support for this could be added,
* but it would add complexity and perhaps cost efficiency also.
* data that works by getting its input data from a upb_bytesrc and calling
* into a upb_handlers.
*/
#ifndef UPB_DECODER_H_
@ -36,18 +31,18 @@ typedef struct _upb_decoder {
upb_status *status; // Where we will store any errors that occur.
upb_strref strref; // For passing string data to callbacks.
// Offsets for the region we currently have ref'd.
// Offsets for the bytesrc region we currently have ref'd.
uint64_t refstart_ofs, refend_ofs;
// Current buffer and its stream offset.
// Current input buffer and its stream offset.
const char *buf, *ptr, *end;
uint64_t bufstart_ofs, bufend_ofs;
// Stream offset for the end of the top-level message, if any.
uint64_t end_ofs;
// Buf offset as of which we've delivered calbacks; needed for rollback on
// UPB_TRYAGAIN (or in the future, UPB_SUSPEND).
// Buf offset as of which we've delivered calbacks; needed for rollback if
// a callback returns UPB_BREAK.
const char *completed_ptr;
// End of the delimited region, relative to ptr, or NULL if not in this buf.
@ -66,6 +61,7 @@ typedef struct _upb_decoder {
struct dasm_State *dynasm;
#endif
// For exiting the decoder on error.
sigjmp_buf exitjmp;
} upb_decoder;
@ -75,14 +71,8 @@ typedef struct _upb_decoder {
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
// on the handlers or msgdef.
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
// Equivalent to:
// upb_accessors_reghandlers(m, h);
// upb_decoder_initforhandlers(d, h);
// except possibly more efficient, by using cached state in the msgdef.
void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
// on the handlers.
void upb_decoder_init(upb_decoder *d, upb_handlers *h);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a

@ -19,7 +19,10 @@ void upb_strtomsg(const char *str, size_t len, void *msg, upb_msgdef *md,
upb_stringsrc_reset(&strsrc, str, len);
upb_decoder d;
upb_decoder_initformsgdef(&d, md);
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, md);
upb_decoder_init(&d, h);
upb_handlers_unref(h);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), 0, UINT64_MAX, msg);
upb_decoder_decode(&d, status);
@ -63,7 +66,7 @@ upb_def **upb_load_descriptor(const char *str, size_t len, int *n,
upb_descreader_reghandlers(h);
upb_decoder d;
upb_decoder_initforhandlers(&d, h);
upb_decoder_init(&d, h);
upb_handlers_unref(h);
upb_descreader r;
upb_descreader_init(&r);

@ -11,6 +11,9 @@
* The table uses internal chaining with Brent's variation (inspired by the
* Lua implementation of hash tables). The hash function for strings is
* Austin Appleby's "MurmurHash."
*
* This header is internal to upb; its interface should not be considered
* public or stable.
*/
#ifndef UPB_TABLE_H_
@ -128,21 +131,16 @@ INLINE void *_upb_inttable_fastlookup(upb_inttable *t, uint32_t key,
upb_inttable_value *arrval =
(upb_inttable_value*)UPB_INDEX(t->array, key, value_size);
if (_upb_inttable_isarrkey(t, key)) {
//DEBUGPRINTF("array lookup for key %d, &val=%p, has_entry=%d\n", key, val, val->has_entry);
return (arrval->has_entry) ? arrval : NULL;
}
uint32_t bucket = _upb_inttable_bucket(t, key);
upb_inttable_entry *e =
(upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
//DEBUGPRINTF("looking in first bucket %d, entry size=%zd, addr=%p\n", bucket, entry_size, e);
while (1) {
//DEBUGPRINTF("%d, %d, %d\n", e->val.has_entry, e->hdr.key, key);
if (e->hdr.key == key) {
//DEBUGPRINTF("returning val from hash part\n");
return &e->val;
}
if ((bucket = e->hdr.next) == UPB_END_OF_CHAIN) return NULL;
//DEBUGPRINTF("looking in bucket %d\n", bucket);
e = (upb_inttable_entry*)UPB_INDEX(t->t.entries, bucket, entry_size);
}
}

Loading…
Cancel
Save