Major refactoring: abandon upb_msg, add upb_accessors.

Next on the chopping block is upb_string.
pull/13171/head
Joshua Haberman 14 years ago
parent a503b8859c
commit 559e23c796
  1. 5
      Makefile
  2. 4
      benchmarks/parsestream.upb_table.c
  3. 13
      benchmarks/parsetostruct.upb_table.c
  4. 7
      src/upb.h
  5. 10
      src/upb_decoder.c
  6. 12
      src/upb_decoder.h
  7. 1458
      src/upb_def.c
  8. 371
      src/upb_def.h
  9. 548
      src/upb_descriptor.c
  10. 67
      src/upb_descriptor.h
  11. 49
      src/upb_glue.c
  12. 8
      src/upb_glue.h
  13. 4
      src/upb_handlers.c
  14. 611
      src/upb_msg.c
  15. 429
      src/upb_msg.h
  16. 6
      src/upb_string.h
  17. 6
      src/upb_table.h
  18. 9
      tests/test_decoder.c
  19. 42
      tests/test_vs_proto2.cc
  20. 4
      tests/tests.c

@ -73,12 +73,14 @@ $(ALLSRC): perf-cppflags
CORE= \
src/upb.c \
src/upb_handlers.c \
src/upb_descriptor.c \
src/upb_table.c \
src/upb_string.c \
src/upb_def.c \
src/upb_msg.c \
src/upb_varint.c \
# Common encoders/decoders -- you're almost certain to want these.
STREAM= \
src/upb_decoder.c \
@ -101,7 +103,8 @@ TESTS_SRC= \
tests/test_string.c \
tests/tests.c \
tests/tests_varint.c \
tests/test_vs_proto2.cc
#tests/test_vs_proto2.cc
#tests/test_stream.c \

@ -36,7 +36,7 @@ static bool initialize()
upb_printerr(&status);
return false;
}
upb_parsedesc(s, fds_str, &status);
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
if(!upb_ok(&status)) {
@ -64,7 +64,7 @@ static bool initialize()
// Cause all messages to be read, but do nothing when they are.
upb_handlerset hset = {NULL, NULL, value, startsubmsg, NULL, NULL, NULL};
upb_handlers_reghandlerset(handlers, def, &hset);
upb_decoder_init(&decoder, handlers);
upb_decoder_initforhandlers(&decoder, handlers);
upb_handlers_unref(handlers);
upb_stringsrc_init(&stringsrc);
return true;

@ -9,7 +9,7 @@
static upb_string *input_str;
static upb_msgdef *def;
static upb_msg *msg;
static void *msg;
static upb_stringsrc strsrc;
static upb_decoder d;
@ -25,7 +25,7 @@ static bool initialize()
upb_printerr(&status);
return false;
}
upb_parsedesc(s, fds_str, &status);
upb_read_descriptor(s, fds_str, &status);
upb_string_unref(fds_str);
if(!upb_ok(&status)) {
@ -49,13 +49,10 @@ static bool initialize()
return false;
}
upb_status_uninit(&status);
msg = upb_msg_new(def);
msg = upb_stdmsg_new(def);
upb_stringsrc_init(&strsrc);
upb_handlers *handlers = upb_handlers_new();
upb_msg_reghandlers(handlers, def);
upb_decoder_init(&d, handlers);
upb_handlers_unref(handlers);
upb_decoder_initformsgdef(&d, def);
if (!BYREF) {
// Pretend the input string is stack-allocated, which will force its data
@ -74,7 +71,7 @@ static void cleanup()
input_str->refcount.v = 1;
}
upb_string_unref(input_str);
upb_msg_unref(msg, def);
upb_stdmsg_free(msg, def);
upb_def_unref(UPB_UPCAST(def));
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);

@ -146,6 +146,7 @@ typedef uint8_t upb_valuetype_t;
#define UPB_VALUETYPE_BYTESRC 32
#define UPB_VALUETYPE_RAW 33
#define UPB_VALUETYPE_FIELDDEF 34
#define UPB_VALUETYPE_PTR 35
// A single .proto value. The owner must have an out-of-band way of knowing
// the type, so that it knows which union member to use.
@ -196,11 +197,9 @@ UPB_VALUE_ACCESSORS(int64, int64, int64_t, UPB_TYPE(INT64));
UPB_VALUE_ACCESSORS(uint32, uint32, uint32_t, UPB_TYPE(UINT32));
UPB_VALUE_ACCESSORS(uint64, uint64, uint64_t, UPB_TYPE(UINT64));
UPB_VALUE_ACCESSORS(bool, _bool, bool, UPB_TYPE(BOOL));
UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING));
UPB_VALUE_ACCESSORS(msg, msg, upb_msg*, UPB_TYPE(MESSAGE));
UPB_VALUE_ACCESSORS(arr, arr, upb_array*, UPB_VALUETYPE_ARRAY);
UPB_VALUE_ACCESSORS(bytesrc, bytesrc, upb_bytesrc*, UPB_VALUETYPE_BYTESRC);
UPB_VALUE_ACCESSORS(str, str, upb_string*, UPB_TYPE(STRING)); // Marked for destruction.
UPB_VALUE_ACCESSORS(fielddef, fielddef, upb_fielddef*, UPB_VALUETYPE_FIELDDEF);
UPB_VALUE_ACCESSORS(ptr, _void, void*, UPB_VALUETYPE_PTR);
extern upb_value UPB_NO_VALUE;

@ -11,6 +11,7 @@
#include "upb_bytestream.h"
#include "upb_decoder.h"
#include "upb_varint.h"
#include "upb_msg.h"
// Used for frames that have no specific end offset: groups, repeated primitive
// fields inside groups, and the top-level message.
@ -346,7 +347,7 @@ static void upb_decoder_skip(void *_d, upb_dispatcher_frame *top,
d->ptr = d->buf + bottom->end_offset;
}
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *handlers) {
upb_dispatcher_init(
&d->dispatcher, handlers, upb_decoder_skip, upb_decoder_exit2, d);
#ifdef UPB_USE_JIT_X64
@ -388,6 +389,13 @@ void upb_decoder_init(upb_decoder *d, upb_handlers *handlers) {
}
}
void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m) {
upb_handlers *h = upb_handlers_new();
upb_accessors_reghandlers(h, m);
upb_decoder_initforhandlers(d, h);
upb_handlers_unref(h);
}
void upb_decoder_reset(upb_decoder *d, upb_bytesrc *bytesrc, void *closure) {
upb_dispatcher_reset(&d->dispatcher, closure)->end_offset = UPB_NONDELIMITED;
d->bytesrc = bytesrc;

@ -91,8 +91,16 @@ typedef struct {
struct _upb_decoder;
typedef struct _upb_decoder upb_decoder;
// Allocates and frees a upb_decoder, respectively.
void upb_decoder_init(upb_decoder *d, upb_handlers *handlers);
// Initializes/uninitializes a decoder for calling into the given handlers
// or to write into the given msgdef, given its accessors). Takes a ref
// on the handlers or msgdef.
void upb_decoder_initforhandlers(upb_decoder *d, upb_handlers *h);
// Equivalent to:
// upb_accessors_reghandlers(m, h);
// upb_decoder_initforhandlers(d, h);
// except possibly more efficient, by using cached state in the msgdef.
void upb_decoder_initformsgdef(upb_decoder *d, upb_msgdef *m);
void upb_decoder_uninit(upb_decoder *d);
// Resets the internal state of an already-allocated decoder. This puts it in a

File diff suppressed because it is too large Load Diff

@ -4,25 +4,16 @@
* Copyright (c) 2009-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Provides a mechanism for loading proto definitions from descriptors, and
* data structures to represent those definitions. These form the protobuf
* schema, and are used extensively throughout upb:
* Provides a mechanism for creating and linking proto definitions.
* These form the protobuf schema, and are used extensively throughout upb:
* - upb_msgdef: describes a "message" construct.
* - upb_fielddef: describes a message field.
* - upb_enumdef: describes an enum.
* (TODO: definitions of extensions and services).
* (TODO: definitions of services).
*
* Defs are obtained from a upb_symtab object. A upb_symtab is empty when
* constructed, and definitions can be added by supplying descriptors.
*
* Defs are immutable and reference-counted. Symbol tables reference any defs
* that are the "current" definitions. If an extension is loaded that adds a
* field to an existing message, a new msgdef is constructed that includes the
* new field and the old msgdef is unref'd. The old msgdef will still be ref'd
* by messages (if any) that were constructed with that msgdef.
*
* This file contains routines for creating and manipulating the definitions
* themselves. To create and manipulate actual messages, see upb_msg.h.
* These defs are mutable (and not thread-safe) when first created.
* Once they are added to a defbuilder (and later its symtab) they become
* immutable.
*/
#ifndef UPB_DEF_H_
@ -35,51 +26,37 @@
extern "C" {
#endif
/* upb_def: base class for defs **********************************************/
struct _upb_symtab;
typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
UPB_DEF_MSG = 0,
UPB_DEF_ENUM,
UPB_DEF_SVC,
UPB_DEF_EXT,
// Internal-only, placeholder for a def that hasn't be resolved yet.
UPB_DEF_UNRESOLVED,
UPB_DEF_SERVICE, // Not yet implemented.
// For specifying that defs of any type are requsted from getdefs.
UPB_DEF_ANY = -1
} upb_deftype;
UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
UPB_DEF_UNRESOLVED = 99, // Internal-only.
} upb_deftype_t;
// This typedef is more space-efficient than declaring an enum var directly.
typedef int8_t upb_deftype_t;
/* upb_def: base class for defs **********************************************/
typedef struct {
upb_string *fqname; // Fully qualified.
upb_atomic_t refcount;
upb_string *fqname; // Fully qualified.
upb_symtab *symtab; // Def is mutable iff symtab == NULL.
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type;
// The is_cyclic flag could go in upb_msgdef instead of here, because only
// messages can be involved in cycles. However, putting them here is free
// from a space perspective because structure alignment will otherwise leave
// three bytes empty after type. It is also makes ref and unref more
// efficient, because we don't have to downcast to msgdef before checking the
// is_cyclic flag.
bool is_cyclic;
uint16_t search_depth; // Used during initialization dfs.
} upb_def;
// These must not be called directly!
void _upb_def_cyclic_ref(upb_def *def);
void _upb_def_reftozero(upb_def *def);
// Call to ref/deref a def.
INLINE void upb_def_ref(upb_def *def) {
if(upb_atomic_ref(&def->refcount) && def->is_cyclic) _upb_def_cyclic_ref(def);
}
INLINE void upb_def_unref(upb_def *def) {
if(def && upb_atomic_unref(&def->refcount)) _upb_def_reftozero(def);
}
// Call to ref/unref a def. Can be used at any time, but is not thread-safe
// until the def is in a symtab. While a def is in a symtab, everything
// reachable from that def (the symtab and all defs in the symtab) are
// guaranteed to be alive.
void upb_def_ref(upb_def *def);
void upb_def_unref(upb_def *def);
upb_def *upb_def_dup(upb_def *def);
#define UPB_UPCAST(ptr) (&(ptr)->base)
@ -88,30 +65,66 @@ INLINE void upb_def_unref(upb_def *def) {
// A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it
// is either a field of a upb_msgdef or contained inside a upb_extensiondef.
// It is also reference-counted.
// must be part of a upb_msgdef. It is also reference-counted.
struct _upb_fielddef {
uint8_t type;
uint8_t label;
// True if we own a ref on "def" (above). This is true unless this edge is
// part of a cycle.
bool owned;
uint8_t set_bit_mask;
struct _upb_msgdef *msgdef;
upb_def *def; // if upb_hasdef(f)
upb_atomic_t refcount;
bool finalized;
// The following fields may be modified until the def is finalized.
uint8_t type; // Use UPB_TYPE() constants.
uint8_t label; // Use UPB_LABEL() constants.
int16_t hasbit;
uint16_t offset;
int32_t number;
int16_t field_index; // Indicates set bit.
upb_string *name;
upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
upb_value fval;
struct _upb_accessor_vtbl *accessor;
};
uint16_t set_bit_offset;
uint32_t byte_offset; // Where in a upb_msg to find the data.
upb_fielddef *upb_fielddef_new();
void upb_fielddef_ref(upb_fielddef *f);
void upb_fielddef_unref(upb_fielddef *f);
upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
// Read accessors. May be called any time.
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
INLINE upb_string *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
return f->msgdef;
}
INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
return f->accessor;
}
upb_value default_value;
upb_string *name;
struct _upb_msgdef *msgdef;
// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
// a fielddef where !upb_hassubdef(f)).
upb_def *upb_fielddef_subdef(upb_fielddef *f);
// For the case of an enum or a submessage, points to the def for that type.
upb_def *def;
upb_atomic_t refcount;
};
// NULL until the fielddef has been added to a msgdef.
// Write accessors. "Number" and "name" must be set before the fielddef is
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
void upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
void upb_fielddef_setname(upb_fielddef *f, upb_string *name);
// These writers may be called at any time prior to being put in a symtab.
void upb_fielddef_settype(upb_fielddef *f, uint8_t type);
void upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to. Must be found
// at name resolution time (when the symtabtxn is committed to the symtab).
void upb_fielddef_settypename(upb_fielddef *f, upb_string *name);
// A variety of tests about the type of a field.
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
@ -125,58 +138,35 @@ INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
}
INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
INLINE bool upb_isarray(upb_fielddef *f) {
return f->label == UPB_LABEL(REPEATED);
}
INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hasdef(upb_fielddef *f) {
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}
INLINE upb_valuetype_t upb_field_valuetype(upb_fielddef *f) {
if (upb_isarray(f)) {
return UPB_VALUETYPE_ARRAY;
} else {
return f->type;
}
}
INLINE upb_valuetype_t upb_elem_valuetype(upb_fielddef *f) {
assert(upb_isarray(f));
return f->type;
}
INLINE bool upb_field_ismm(upb_fielddef *f) {
return upb_isarray(f) || upb_isstring(f) || upb_issubmsg(f);
}
INLINE bool upb_elem_ismm(upb_fielddef *f) {
return upb_isstring(f) || upb_issubmsg(f);
}
/* upb_msgdef *****************************************************************/
// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
upb_def base;
upb_atomic_t cycle_refcount;
uint32_t size;
uint32_t set_flags_bytes;
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// Immutable msg instance that has all default values set.
// TODO: need a way of making this immutable!
struct _upb_msg *default_message;
// The following fields may be modified until finalized.
uint16_t size;
uint8_t hasbit_bytes;
// The range of tag numbers used to store extensions.
uint32_t extension_start;
uint32_t extension_end;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
typedef struct {
bool junk;
uint8_t field_type;
uint8_t native_wire_type;
upb_fielddef *f;
} upb_itof_ent;
typedef struct {
@ -184,23 +174,56 @@ typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
INLINE void upb_msgdef_unref(upb_msgdef *md) {
upb_def_unref(UPB_UPCAST(md));
upb_msgdef *upb_msgdef_new();
INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced with
// just the name of the submessage. This can be put back into another symtab
// and the names will be re-resolved in the new context.
upb_msgdef *upb_msgdef_dup(upb_msgdef *m);
// Read accessors. May be called at any time.
INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
return m->hasbit_bytes;
}
INLINE uint32_t upb_msgdef_extension_start(upb_msgdef *m) {
return m->extension_start;
}
INLINE void upb_msgdef_ref(upb_msgdef *md) {
upb_def_ref(UPB_UPCAST(md));
INLINE uint32_t upb_msgdef_extension_end(upb_msgdef *m) {
return m->extension_end;
}
// Write accessors. May only be called before the msgdef is in a symtab.
void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
void upb_msgdef_setextension_start(upb_msgdef *m, uint32_t start);
void upb_msgdef_setextension_end(upb_msgdef *m, uint32_t end);
// Adds a fielddef to a msgdef, and passes a ref on the field to the msgdef.
// May only be done before the msgdef is in a symtab. The fielddef's name and
// number must be set, and the message may not already contain any field with
// this name or number -- if it does, the fielddef is unref'd and false is
// returned. The fielddef may not already belong to another message.
bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f);
// Sets the layout of all fields according to default rules:
// 1. Hasbits for required fields come first, then optional fields.
// 2. Values are laid out in a way that respects alignment rules.
// 3. The order is chosen to minimize memory usage.
// This should only be called once all fielddefs have been added.
// TODO: will likely want the ability to exclude strings/submessages/arrays.
// TODO: will likely want the ability to define a header size.
void upb_msgdef_layout(upb_msgdef *m);
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
INLINE upb_itof_ent *upb_msgdef_itofent(upb_msgdef *m, uint32_t num) {
return (upb_itof_ent*)upb_inttable_fastlookup(
&m->itof, num, sizeof(upb_itof_ent));
}
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t num) {
upb_itof_ent *e = upb_msgdef_itofent(m, num);
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
upb_itof_ent *e = (upb_itof_ent*)
upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
return e ? e->f : NULL;
}
@ -214,6 +237,7 @@ INLINE int upb_msgdef_numfields(upb_msgdef *m) {
}
// Iteration over fields. The order is undefined.
// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
// upb_fielddef *f = upb_msg_iter_field(i);
@ -225,6 +249,7 @@ upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
// Iterator accessor.
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
return ent->f;
@ -233,13 +258,11 @@ INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
/* upb_enumdef ****************************************************************/
typedef int32_t upb_enumval_t;
typedef struct _upb_enumdef {
upb_def base;
upb_strtable ntoi;
upb_inttable iton;
upb_enumval_t default_value; // The first value listed in the enum.
int32_t defaultval;
} upb_enumdef;
typedef struct {
@ -252,12 +275,28 @@ typedef struct {
upb_string *string;
} upb_iton_ent;
upb_enumdef *upb_enumdef_new();
INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
// May only be set before the enumdef is in a symtab.
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
// name or number (returns false and does not add if there is). May only
// be called before the enumdef is in a symtab.
bool upb_enumdef_addval(upb_enumdef *e, upb_string *name, int32_t num);
// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, upb_enumval_t *num);
bool upb_enumdef_ntoi(upb_enumdef *e, upb_string *name, int32_t *num);
// Caller does not own a ref on the returned string.
upb_string *upb_enumdef_iton(upb_enumdef *e, upb_enumval_t num);
upb_string *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators.
// upb_enum_iter i;
// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// // ...
@ -268,6 +307,7 @@ upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
// Iterator accessors.
INLINE upb_string *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
return e->string;
@ -277,28 +317,74 @@ INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
}
/* upb_symtabtxn **************************************************************/
// A symbol table transaction is a map of defs that can be added to a symtab
// in one single atomic operation that either succeeds or fails. Mutable defs
// can be added to this map (and perhaps removed, in the future).
//
// A symtabtxn is not thread-safe.
typedef struct {
upb_strtable deftab;
} upb_symtabtxn;
void upb_symtabtxn_init(upb_symtabtxn *t);
void upb_symtabtxn_uninit(upb_symtabtxn *t);
// Adds a def to the symtab. Caller passes a ref on the def to the symtabtxn.
// The def's name must be set and there must not be any existing defs in the
// symtabtxn with this name, otherwise false will be returned and no operation
// will be performed (and the ref on the def will be released).
bool upb_symtabtxn_add(upb_symtabtxn *t, upb_def *def);
// Gets the def (if any) that is associated with this name in the symtab.
// Caller does *not* inherit a ref on the def.
upb_def *upb_symtabtxn_get(upb_symtabtxn *t, upb_string *name);
// Iterate over the defs that are part of the transaction.
// The order is undefined.
// The iterator is invalidated by upb_symtabtxn_add().
// upb_symtabtxn_iter i;
// for(i = upb_symtabtxn_begin(t); !upb_symtabtxn_done(t);
// i = upb_symtabtxn_next(t, i)) {
// upb_def *def = upb_symtabtxn_iter_def(i);
// }
typedef void* upb_symtabtxn_iter;
upb_symtabtxn_iter upb_symtabtxn_begin(upb_symtabtxn *t);
upb_symtabtxn_iter upb_symtabtxn_next(upb_symtabtxn *t, upb_symtabtxn_iter i);
bool upb_symtabtxn_done(upb_symtabtxn_iter i);
upb_def *upb_symtabtxn_iter_def(upb_symtabtxn_iter iter);
/* upb_symtab *****************************************************************/
// A SymbolTable is where upb_defs live. It is empty when first constructed.
// Clients add definitions to the symtab by supplying descriptors (as defined
// in descriptor.proto) via the upb_stream interface.
// Clients add definitions to the symtab (or replace existing definitions) by
// using a upb_symtab_commit() or calling upb_symtab_add().
// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
typedef struct {
upb_def **defs;
uint32_t len;
uint32_t size;
} upb_deflist;
void upb_deflist_init(upb_deflist *l);
void upb_deflist_uninit(upb_deflist *l);
void upb_deflist_push(upb_deflist *l, upb_def *d);
struct _upb_symtab {
upb_atomic_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_strtable symtab; // The symbol table.
upb_msgdef *fds_msgdef; // Msgdef for google.protobuf.FileDescriptorSet.
upb_deflist olddefs;
};
typedef struct _upb_symtab upb_symtab;
// Initializes a upb_symtab. Symtabs are not freed explicitly, but unref'd
// when the caller is done with them.
upb_symtab *upb_symtab_new(void);
void _upb_symtab_free(upb_symtab *s); // Must not be called directly!
INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
INLINE void upb_symtab_unref(upb_symtab *s) {
if(s && upb_atomic_unref(&s->refcount)) _upb_symtab_free(s);
}
void upb_symtab_unref(upb_symtab *s);
// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
@ -310,35 +396,36 @@ INLINE void upb_symtab_unref(upb_symtab *s) {
//
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
// TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, upb_string *base, upb_string *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
// TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, upb_string *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
upb_def **upb_symtab_getdefs(upb_symtab *s, int *count, upb_deftype_t type);
// TODO: make return const
upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);
// upb_defbuilder: For adding defs to the symtab.
// You allocate the defbuilder, which can handle a single descriptor.
// It will be freed automatically when the parse completes.
struct _upb_defbuilder;
typedef struct _upb_defbuilder upb_defbuilder;
struct _upb_handlers;
struct _upb_handlers;
// Adds a single upb_def into the symtab. A ref on the def is passed to the
// symtab. If any references cannot be resolved, false is returned and the
// symtab is unchanged. The error (if any) is saved to status if non-NULL.
bool upb_symtab_add(upb_symtab *s, upb_def *d, upb_status *status);
// Allocates a new defbuilder that will add defs to the given symtab.
upb_defbuilder *upb_defbuilder_new(upb_symtab *s);
// Adds the set of defs contained in the transaction to the symtab, clearing
// the txn. The entire operation either succeeds or fails. If the operation
// fails, the symtab is unchanged, false is returned, and status indicates
// the error.
bool upb_symtab_commit(upb_symtab *s, upb_symtabtxn *t, upb_status *status);
// Registers handlers that will operate on a defbuilder to add the defs
// to the defbuilder's symtab. Will free itself when the parse finishes.
//
// TODO: should this allow redefinition? Either is possible, but which is
// more useful? Maybe it should be an option.
struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h);
// Frees defs that are no longer active in the symtab and are no longer
// reachable. Such defs are not freed when they are replaced in the symtab
// if they are still reachable from defs that are still referenced.
void upb_symtab_gc(upb_symtab *s);
/* upb_def casts **************************************************************/
@ -352,8 +439,7 @@ struct _upb_mhandlers *upb_defbuilder_reghandlers(struct _upb_handlers *h);
}
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
UPB_DYNAMIC_CAST_DEF(svcdef, SVC);
UPB_DYNAMIC_CAST_DEF(extdef, EXT);
UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF
@ -367,8 +453,7 @@ UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
}
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
UPB_DOWNCAST_DEF(svcdef, SVC);
UPB_DOWNCAST_DEF(extdef, EXT);
UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF

@ -0,0 +1,548 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2008-2009 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb_descriptor.h"
#include <stdlib.h>
#include <errno.h>
#include "upb_string.h"
#include "upb_def.h"
/* Joins strings together, for example:
* join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
* join("", "Baz") -> "Baz"
* Caller owns a ref on the returned string. */
static upb_string *upb_join(upb_string *base, upb_string *name) {
if (!base || upb_string_len(base) == 0) {
return upb_string_getref(name);
} else {
return upb_string_asprintf(UPB_STRFMT "." UPB_STRFMT,
UPB_STRARG(base), UPB_STRARG(name));
}
}
/* upb_descreader ************************************************************/
// A upb_descreader builds a list of defs by handling a parse of a protobuf in
// the format defined in descriptor.proto. The output of a upb_descreader is
// a upb_symtabtxn.
static upb_def *upb_deflist_last(upb_deflist *l) {
return l->defs[l->len-1];
}
// Qualify the defname for all defs starting with offset "start" with "str".
static void upb_deflist_qualify(upb_deflist *l, upb_string *str, int32_t start) {
for(uint32_t i = start; i < l->len; i++) {
upb_def *def = l->defs[i];
upb_string *name = def->fqname;
def->fqname = upb_join(str, name);
upb_string_unref(name);
}
}
// Forward declares for top-level file descriptors.
static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h);
static upb_mhandlers * upb_enumdef_register_EnumDescriptorProto(upb_handlers *h);
void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn) {
upb_deflist_init(&r->defs);
upb_status_init(&r->status);
r->txn = txn;
r->stack_len = 0;
r->name = NULL;
r->default_string = NULL;
}
void upb_descreader_uninit(upb_descreader *r) {
upb_string_unref(r->name);
upb_status_uninit(&r->status);
upb_deflist_uninit(&r->defs);
upb_string_unref(r->default_string);
while (r->stack_len > 0) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_string_unref(f->name);
}
}
static upb_msgdef *upb_descreader_top(upb_descreader *r) {
if (r->stack_len <= 1) return NULL;
int index = r->stack[r->stack_len-1].start - 1;
assert(index >= 0);
return upb_downcast_msgdef(r->defs.defs[index]);
}
static upb_def *upb_descreader_last(upb_descreader *r) {
return upb_deflist_last(&r->defs);
}
// Start/end handlers for FileDescriptorProto and DescriptorProto (the two
// entities that have names and can contain sub-definitions.
void upb_descreader_startcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[r->stack_len++];
f->start = r->defs.len;
f->name = NULL;
}
void upb_descreader_endcontainer(upb_descreader *r) {
upb_descreader_frame *f = &r->stack[--r->stack_len];
upb_deflist_qualify(&r->defs, f->name, f->start);
upb_string_unref(f->name);
}
void upb_descreader_setscopename(upb_descreader *r, upb_string *str) {
upb_descreader_frame *f = &r->stack[r->stack_len-1];
upb_string_unref(f->name);
f->name = upb_string_getref(str);
}
// Handlers for google.protobuf.FileDescriptorProto.
static upb_flow_t upb_descreader_FileDescriptorProto_startmsg(void *_r) {
upb_descreader *r = _r;
upb_descreader_startcontainer(r);
return UPB_CONTINUE;
}
static void upb_descreader_FileDescriptorProto_endmsg(void *_r,
upb_status *status) {
(void)status;
upb_descreader *r = _r;
upb_descreader_endcontainer(r);
}
static upb_flow_t upb_descreader_FileDescriptorProto_package(void *_r,
upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_descreader_setscopename(r, upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_mhandlers *upb_descreader_register_FileDescriptorProto(
upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setstartmsg(m, &upb_descreader_FileDescriptorProto_startmsg);
upb_mhandlers_setendmsg(m, &upb_descreader_FileDescriptorProto_endmsg);
#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDNUM
#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ ## field ## __FIELDTYPE
upb_fhandlers *f =
upb_mhandlers_newfhandlers(m, FNUM(PACKAGE), FTYPE(PACKAGE), false);
upb_fhandlers_setvalue(f, &upb_descreader_FileDescriptorProto_package);
upb_mhandlers_newfhandlers_subm(m, FNUM(MESSAGE_TYPE), FTYPE(MESSAGE_TYPE), true,
upb_msgdef_register_DescriptorProto(h));
upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
upb_enumdef_register_EnumDescriptorProto(h));
// TODO: services, extensions
return m;
}
#undef FNUM
#undef FTYPE
// Handlers for google.protobuf.FileDescriptorSet.
static void upb_descreader_FileDescriptorSet_onendmsg(void *_r,
upb_status *status) {
// Move all defs (which are now guaranteed to be fully-qualified) to the txn.
upb_descreader *r = _r;
if (upb_ok(status)) {
for (unsigned int i = 0; i < r->defs.len; i++) {
// TODO: check return for duplicate def.
upb_symtabtxn_add(r->txn, r->defs.defs[i]);
}
r->defs.len = 0;
}
}
static upb_mhandlers *upb_descreader_register_FileDescriptorSet(upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setendmsg(m, upb_descreader_FileDescriptorSet_onendmsg);
#define FNUM(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDNUM
#define FTYPE(field) GOOGLE_PROTOBUF_FILEDESCRIPTORSET_ ## field ## __FIELDTYPE
upb_mhandlers_newfhandlers_subm(m, FNUM(FILE), FTYPE(FILE), true,
upb_descreader_register_FileDescriptorProto(h));
return m;
}
#undef FNUM
#undef FTYPE
upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h) {
h->should_jit = false;
return upb_descreader_register_FileDescriptorSet(h);
}
// google.protobuf.EnumValueDescriptorProto.
static upb_flow_t upb_enumdef_EnumValueDescriptorProto_startmsg(void *_r) {
upb_descreader *r = _r;
r->saw_number = false;
r->saw_name = false;
return UPB_CONTINUE;
}
static upb_flow_t upb_enumdef_EnumValueDescriptorProto_name(void *_r,
upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_string_unref(r->name);
r->name = upb_string_getref(upb_value_getstr(val));
r->saw_name = true;
return UPB_CONTINUE;
}
static upb_flow_t upb_enumdef_EnumValueDescriptorProto_number(void *_r,
upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
r->number = upb_value_getint32(val);
r->saw_number = true;
return UPB_CONTINUE;
}
static void upb_enumdef_EnumValueDescriptorProto_endmsg(void *_r,
upb_status *status) {
upb_descreader *r = _r;
if(!r->saw_number || !r->saw_name) {
upb_seterr(status, UPB_ERROR, "Enum value missing name or number.");
return;
}
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
if (upb_inttable_count(&e->iton) == 0) {
// The default value of an enum (in the absence of an explicit default) is
// its first listed value.
upb_enumdef_setdefault(e, r->number);
}
upb_enumdef_addval(e, r->name, r->number);
upb_string_unref(r->name);
r->name = NULL;
}
static upb_mhandlers *upb_enumdef_register_EnumValueDescriptorProto(
upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumValueDescriptorProto_startmsg);
upb_mhandlers_setendmsg(m, &upb_enumdef_EnumValueDescriptorProto_endmsg);
#define FNUM(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDNUM
#define FTYPE(f) GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_ ## f ## __FIELDTYPE
upb_fhandlers *f;
f = upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_name);
f = upb_mhandlers_newfhandlers(m, FNUM(NUMBER), FTYPE(NUMBER), false);
upb_fhandlers_setvalue(f, &upb_enumdef_EnumValueDescriptorProto_number);
return m;
}
#undef FNUM
#undef FTYPE
// google.protobuf.EnumDescriptorProto.
static upb_flow_t upb_enumdef_EnumDescriptorProto_startmsg(void *_r) {
upb_descreader *r = _r;
upb_deflist_push(&r->defs, UPB_UPCAST(upb_enumdef_new()));
return UPB_CONTINUE;
}
static void upb_enumdef_EnumDescriptorProto_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
if (upb_descreader_last((upb_descreader*)_r)->fqname == NULL) {
upb_seterr(status, UPB_ERROR, "Enum had no name.");
return;
}
if (upb_inttable_count(&e->iton) == 0) {
upb_seterr(status, UPB_ERROR, "Enum had no values.");
return;
}
}
static upb_flow_t upb_enumdef_EnumDescriptorProto_name(void *_r,
upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_enumdef *e = upb_downcast_enumdef(upb_descreader_last(r));
upb_string_unref(e->base.fqname);
e->base.fqname = upb_string_getref(upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_mhandlers *upb_enumdef_register_EnumDescriptorProto(upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setstartmsg(m, &upb_enumdef_EnumDescriptorProto_startmsg);
upb_mhandlers_setendmsg(m, &upb_enumdef_EnumDescriptorProto_endmsg);
#define FNUM(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDNUM
#define FTYPE(f) GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_ ## f ## __FIELDTYPE
upb_fhandlers *f =
upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
upb_fhandlers_setvalue(f, &upb_enumdef_EnumDescriptorProto_name);
upb_mhandlers_newfhandlers_subm(m, FNUM(VALUE), FTYPE(VALUE), true,
upb_enumdef_register_EnumValueDescriptorProto(h));
return m;
}
#undef FNUM
#undef FTYPE
static upb_flow_t upb_fielddef_startmsg(void *_r) {
upb_descreader *r = _r;
r->f = upb_fielddef_new();
return UPB_CONTINUE;
}
// Converts the default value in string "dstr" into "d". Passes a ref on dstr.
// Returns true on success.
static bool upb_fielddef_parsedefault(upb_string *dstr, upb_value *d, int type) {
bool success = true;
if (type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES) || type == UPB_TYPE(ENUM)) {
// We'll keep the ref we had on it. We include enums in this case because
// we need the enumdef to resolve the name, but we may not have it yet.
// We'll resolve it later.
if (dstr) {
upb_value_setstr(d, dstr);
} else {
upb_value_setstr(d, upb_emptystring());
}
} else if (type == UPB_TYPE(MESSAGE) || type == UPB_TYPE(GROUP)) {
// We don't expect to get a default value.
upb_string_unref(dstr);
if (dstr != NULL) success = false;
} else {
// The strto* functions need the string to be NULL-terminated.
char *strz = upb_string_isempty(dstr) ? NULL : upb_string_newcstr(dstr);
char *end;
upb_string_unref(dstr);
switch (type) {
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(SFIXED32):
if (strz) {
long val = strtol(strz, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
success = false;
else
upb_value_setint32(d, val);
} else {
upb_value_setint32(d, 0);
}
break;
case UPB_TYPE(INT64):
case UPB_TYPE(SINT64):
case UPB_TYPE(SFIXED64):
if (strz) {
upb_value_setint64(d, strtoll(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setint64(d, 0);
}
break;
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32):
if (strz) {
unsigned long val = strtoul(strz, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || *end)
success = false;
else
upb_value_setuint32(d, val);
} else {
upb_value_setuint32(d, 0);
}
break;
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64):
if (strz) {
upb_value_setuint64(d, strtoull(strz, &end, 0));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setuint64(d, 0);
}
break;
case UPB_TYPE(DOUBLE):
if (strz) {
upb_value_setdouble(d, strtod(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setdouble(d, 0.0);
}
break;
case UPB_TYPE(FLOAT):
if (strz) {
upb_value_setfloat(d, strtof(strz, &end));
if (errno == ERANGE || *end) success = false;
} else {
upb_value_setfloat(d, 0.0);
}
break;
case UPB_TYPE(BOOL):
if (!strz || strcmp(strz, "false") == 0)
upb_value_setbool(d, false);
else if (strcmp(strz, "true") == 0)
upb_value_setbool(d, true);
else
success = false;
break;
}
free(strz);
}
return success;
}
static void upb_fielddef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_fielddef *f = r->f;
// TODO: verify that all required fields were present.
assert(f->number != -1 && f->name != NULL);
assert((f->def != NULL) == upb_hasdef(f));
// Field was successfully read, add it as a field of the msgdef.
upb_msgdef *m = upb_descreader_top(r);
upb_msgdef_addfield(m, f);
upb_string *dstr = r->default_string;
r->default_string = NULL;
upb_value val;
if (!upb_fielddef_parsedefault(dstr, &val, f->type)) {
// We don't worry too much about giving a great error message since the
// compiler should have ensured this was correct.
upb_seterr(status, UPB_ERROR, "Error converting default value.");
return;
}
upb_fielddef_setdefault(f, val);
}
static upb_flow_t upb_fielddef_ontype(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_settype(r->f, upb_value_getint32(val));
return UPB_CONTINUE;
}
static upb_flow_t upb_fielddef_onlabel(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_setlabel(r->f, upb_value_getint32(val));
return UPB_CONTINUE;
}
static upb_flow_t upb_fielddef_onnumber(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_setnumber(r->f, upb_value_getint32(val));
return UPB_CONTINUE;
}
static upb_flow_t upb_fielddef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_setname(r->f, upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_flow_t upb_fielddef_ontypename(void *_r, upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
upb_fielddef_settypename(r->f, upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_flow_t upb_fielddef_ondefaultval(void *_r, upb_value fval,
upb_value val) {
(void)fval;
upb_descreader *r = _r;
// Have to convert from string to the correct type, but we might not know the
// type yet.
upb_string_unref(r->default_string);
r->default_string = upb_string_getref(upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_mhandlers *upb_fielddef_register_FieldDescriptorProto(
upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setstartmsg(m, &upb_fielddef_startmsg);
upb_mhandlers_setendmsg(m, &upb_fielddef_endmsg);
#define FIELD(name, handler) \
upb_fhandlers_setvalue( \
upb_mhandlers_newfhandlers(m, \
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDNUM, \
GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_ ## name ## __FIELDTYPE, \
false), \
handler);
FIELD(TYPE, &upb_fielddef_ontype);
FIELD(LABEL, &upb_fielddef_onlabel);
FIELD(NUMBER, &upb_fielddef_onnumber);
FIELD(NAME, &upb_fielddef_onname);
FIELD(TYPE_NAME, &upb_fielddef_ontypename);
FIELD(DEFAULT_VALUE, &upb_fielddef_ondefaultval);
return m;
}
#undef FNUM
#undef FTYPE
// google.protobuf.DescriptorProto.
static upb_flow_t upb_msgdef_startmsg(void *_r) {
upb_descreader *r = _r;
upb_deflist_push(&r->defs, UPB_UPCAST(upb_msgdef_new()));
upb_descreader_startcontainer(r);
return UPB_CONTINUE;
}
static void upb_msgdef_endmsg(void *_r, upb_status *status) {
upb_descreader *r = _r;
upb_msgdef *m = upb_descreader_top(r);
if(!m->base.fqname) {
upb_seterr(status, UPB_ERROR, "Encountered message with no name.");
return;
}
upb_msgdef_layout(m);
upb_descreader_endcontainer(r);
}
static upb_flow_t upb_msgdef_onname(void *_r, upb_value fval, upb_value val) {
(void)fval;
upb_descreader *r = _r;
assert(val.type == UPB_TYPE(STRING));
upb_msgdef *m = upb_descreader_top(r);
upb_string_unref(m->base.fqname);
m->base.fqname = upb_string_getref(upb_value_getstr(val));
upb_descreader_setscopename(r, upb_value_getstr(val));
return UPB_CONTINUE;
}
static upb_mhandlers *upb_msgdef_register_DescriptorProto(upb_handlers *h) {
upb_mhandlers *m = upb_handlers_newmhandlers(h);
upb_mhandlers_setstartmsg(m, &upb_msgdef_startmsg);
upb_mhandlers_setendmsg(m, &upb_msgdef_endmsg);
#define FNUM(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDNUM
#define FTYPE(f) GOOGLE_PROTOBUF_DESCRIPTORPROTO_ ## f ## __FIELDTYPE
upb_fhandlers *f =
upb_mhandlers_newfhandlers(m, FNUM(NAME), FTYPE(NAME), false);
upb_fhandlers_setvalue(f, &upb_msgdef_onname);
upb_mhandlers_newfhandlers_subm(m, FNUM(FIELD), FTYPE(FIELD), true,
upb_fielddef_register_FieldDescriptorProto(h));
upb_mhandlers_newfhandlers_subm(m, FNUM(ENUM_TYPE), FTYPE(ENUM_TYPE), true,
upb_enumdef_register_EnumDescriptorProto(h));
// DescriptorProto is self-recursive, so we must link the definition.
upb_mhandlers_newfhandlers_subm(
m, FNUM(NESTED_TYPE), FTYPE(NESTED_TYPE), true, m);
// TODO: extensions.
return m;
}
#undef FNUM
#undef FTYPE

@ -0,0 +1,67 @@
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Routines for building defs by parsing descriptors in descriptor.proto format.
* This only needs to use the public API of upb_symtab. Later we may also
* add routines for dumping a symtab to a descriptor.
*/
#ifndef UPB_DESCRIPTOR_H
#define UPB_DESCRIPTOR_H
#include "upb_handlers.h"
#ifdef __cplusplus
extern "C" {
#endif
/* upb_descreader ************************************************************/
// upb_descreader reads a descriptor and puts defs in a upb_symtabtxn.
// We keep a stack of all the messages scopes we are currently in, as well as
// the top-level file scope. This is necessary to correctly qualify the
// definitions that are contained inside. "name" tracks the name of the
// message or package (a bare name -- not qualified by any enclosing scopes).
typedef struct {
upb_string *name;
// Index of the first def that is under this scope. For msgdefs, the
// msgdef itself is at start-1.
int start;
} upb_descreader_frame;
typedef struct {
upb_deflist defs;
upb_symtabtxn *txn;
upb_descreader_frame stack[UPB_MAX_TYPE_DEPTH];
int stack_len;
upb_status status;
uint32_t number;
upb_string *name;
bool saw_number;
bool saw_name;
upb_string *default_string;
upb_fielddef *f;
} upb_descreader;
// Creates a new descriptor builder that will add defs to the given txn.
void upb_descreader_init(upb_descreader *r, upb_symtabtxn *txn);
void upb_descreader_uninit(upb_descreader *r);
// Registers handlers that will load descriptor data into a symtabtxn.
// Pass the descreader as the closure. The messages will have
// upb_msgdef_layout() called on them before adding to the txn.
upb_mhandlers *upb_descreader_reghandlers(upb_handlers *h);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

@ -5,32 +5,29 @@
* Author: Josh Haberman <jhaberman@gmail.com>
*/
#include "upb_decoder.h"
#include "upb_descriptor.h"
#include "upb_glue.h"
#include "upb_msg.h"
#include "upb_decoder.h"
#include "upb_strstream.h"
#include "upb_textprinter.h"
void upb_strtomsg(upb_string *str, upb_msg *msg, upb_msgdef *md,
void upb_strtomsg(upb_string *str, void *msg, upb_msgdef *md,
upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_handlers *h = upb_handlers_new();
upb_msg_reghandlers(h, md);
upb_decoder d;
upb_decoder_init(&d, h);
upb_decoder_initformsgdef(&d, md);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), msg);
upb_handlers_unref(h);
upb_decoder_decode(&d, status);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}
#if 0
void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
bool single_line) {
upb_stringsink strsink;
@ -53,23 +50,49 @@ void upb_msgtotext(upb_string *str, upb_msg *msg, upb_msgdef *md,
upb_textprinter_free(p);
upb_handlers_unref(h);
}
#endif
void upb_parsedesc(upb_symtab *symtab, upb_string *str, upb_status *status) {
// TODO: read->load.
void upb_read_descriptor(upb_symtab *symtab, upb_string *str, upb_status *status) {
upb_stringsrc strsrc;
upb_stringsrc_init(&strsrc);
upb_stringsrc_reset(&strsrc, str);
upb_handlers *h = upb_handlers_new();
upb_defbuilder_reghandlers(h);
upb_descreader_reghandlers(h);
upb_decoder d;
upb_decoder_init(&d, h);
upb_decoder_initforhandlers(&d, h);
upb_handlers_unref(h);
upb_defbuilder *b = upb_defbuilder_new(symtab);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), b);
upb_descreader r;
upb_symtabtxn txn;
upb_symtabtxn_init(&txn);
upb_descreader_init(&r, &txn);
upb_decoder_reset(&d, upb_stringsrc_bytesrc(&strsrc), &r);
upb_decoder_decode(&d, status);
// Set default accessors and layouts on all messages.
// for msgdef in symtabtxn:
upb_symtabtxn_iter i;
for(i = upb_symtabtxn_begin(&txn); !upb_symtabtxn_done(i);
i = upb_symtabtxn_next(&txn, i)) {
upb_def *def = upb_symtabtxn_iter_def(i);
upb_msgdef *md = upb_dyncast_msgdef(def);
if (!md) return;
// For field in msgdef:
upb_msg_iter i;
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
upb_fielddef_setaccessor(f, upb_stdmsg_accessor(f));
}
upb_msgdef_layout(md);
}
if (upb_ok(status)) upb_symtab_commit(symtab, &txn, status);
upb_symtabtxn_uninit(&txn);
upb_descreader_uninit(&r);
upb_stringsrc_uninit(&strsrc);
upb_decoder_uninit(&d);
}

@ -42,14 +42,14 @@ struct _upb_symtab;
// Decodes the given string, which must be in protobuf binary format, to the
// given upb_msg with msgdef "md", storing the status of the operation in "s".
void upb_strtomsg(struct _upb_string *str, struct _upb_msg *msg,
void upb_strtomsg(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, struct _upb_status *s);
void upb_msgtotext(struct _upb_string *str, struct _upb_msg *msg,
void upb_msgtotext(struct _upb_string *str, void *msg,
struct _upb_msgdef *md, bool single_line);
void upb_parsedesc(struct _upb_symtab *symtab, struct _upb_string *str,
struct _upb_status *status);
void upb_read_descriptor(struct _upb_symtab *symtab, struct _upb_string *str,
struct _upb_status *status);
#ifdef __cplusplus
} /* extern "C" */

@ -123,9 +123,9 @@ static upb_mhandlers *upb_regmsg_dfs(upb_handlers *h, upb_msgdef *m,
fieldreg_cb, closure, mtab);
}
fh = upb_mhandlers_newfhandlers_subm(
mh, f->number, f->type, upb_isarray(f), sub_mh);
mh, f->number, f->type, upb_isseq(f), sub_mh);
} else {
fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isarray(f));
fh = upb_mhandlers_newfhandlers(mh, f->number, f->type, upb_isseq(f));
}
if (fieldreg_cb) fieldreg_cb(closure, fh, f);
}

@ -9,201 +9,23 @@
#include "upb_msg.h"
static uint32_t upb_round_up_pow2(uint32_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
static void upb_elem_free(upb_value v, upb_fielddef *f) {
switch(f->type) {
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP):
_upb_msg_free(upb_value_getmsg(v), upb_downcast_msgdef(f->def));
break;
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
_upb_string_free(upb_value_getstr(v));
break;
default:
abort();
}
}
static void upb_elem_unref(upb_value v, upb_fielddef *f) {
assert(upb_elem_ismm(f));
upb_atomic_t *refcount = upb_value_getrefcount(v);
if (refcount && upb_atomic_unref(refcount))
upb_elem_free(v, f);
}
static void upb_field_free(upb_value v, upb_fielddef *f) {
if (upb_isarray(f)) {
_upb_array_free(upb_value_getarr(v), f);
} else {
upb_elem_free(v, f);
}
}
static void upb_field_unref(upb_value v, upb_fielddef *f) {
assert(upb_field_ismm(f));
upb_atomic_t *refcount = upb_value_getrefcount(v);
if (refcount && upb_atomic_unref(refcount))
upb_field_free(v, f);
}
/* upb_array ******************************************************************/
upb_array *upb_array_new(void) {
upb_array *arr = malloc(sizeof(*arr));
upb_atomic_init(&arr->refcount, 1);
arr->size = 0;
arr->len = 0;
arr->ptr = NULL;
return arr;
}
void __attribute__((noinline)) upb_array_dorecycle(upb_array **_arr) {
upb_array *arr = *_arr;
if(arr && upb_atomic_only(&arr->refcount)) {
arr->len = 0;
} else {
if (arr) {
bool was_lastref = upb_atomic_unref(&arr->refcount);
(void)was_lastref;
assert(!was_lastref); // If it was, we would have just recycled.
}
*_arr = upb_array_new();
}
}
void upb_array_recycle(upb_array **_arr) {
upb_array *arr = *_arr;
if(arr && upb_atomic_only(&arr->refcount)) {
arr->len = 0;
} else {
upb_array_dorecycle(_arr);
}
}
void _upb_array_free(upb_array *arr, upb_fielddef *f) {
if (upb_elem_ismm(f)) {
// Need to release refs on sub-objects.
upb_valuetype_t type = upb_elem_valuetype(f);
for (int32_t i = 0; i < arr->size; i++) {
upb_valueptr p = _upb_array_getptr(arr, f, i);
upb_elem_unref(upb_value_read(p, type), f);
}
}
free(arr->ptr);
free(arr);
}
void __attribute__((noinline)) upb_array_doresize(
upb_array *arr, size_t type_size, upb_arraylen_t len) {
upb_arraylen_t old_size = arr->size;
size_t new_size = upb_round_up_pow2(len);
arr->ptr = realloc(arr->ptr, new_size * type_size);
arr->size = new_size;
memset(arr->ptr + (old_size * type_size), 0,
(new_size - old_size) * type_size);
}
void upb_array_resizefortypesize(upb_array *arr, size_t type_size,
int32_t len) {
assert(len >= 0);
if (arr->size < len) upb_array_doresize(arr, type_size, len);
arr->len = len;
}
void upb_array_resize(upb_array *arr, upb_fielddef *f, upb_arraylen_t len) {
upb_array_resizefortypesize(arr, upb_types[f->type].size, len);
}
/* upb_msg ********************************************************************/
upb_msg *upb_msg_new(upb_msgdef *md) {
upb_msg *msg = malloc(md->size);
// Clear all set bits and cached pointers.
memset(msg, 0, md->size);
upb_atomic_init(&msg->refcount, 1);
return msg;
}
void _upb_msg_free(upb_msg *msg, upb_msgdef *md) {
// Need to release refs on all sub-objects.
upb_msg_iter i;
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
upb_valueptr p = _upb_msg_getptr(msg, f);
upb_valuetype_t type = upb_field_valuetype(f);
if (upb_field_ismm(f)) upb_field_unref(upb_value_read(p, type), f);
}
free(msg);
}
void upb_msg_recycle(upb_msg **_msg, upb_msgdef *msgdef) {
upb_msg *msg = *_msg;
if(msg && upb_atomic_only(&msg->refcount)) {
upb_msg_clear(msg, msgdef);
} else {
upb_msg_unref(msg, msgdef);
if (msg) {
bool was_lastref = upb_atomic_unref(&msg->refcount);
(void)was_lastref;
assert(!was_lastref);
}
*_msg = upb_msg_new(msgdef);
}
}
INLINE void upb_msg_sethas(upb_msg *msg, upb_fielddef *f) {
msg->data[f->set_bit_offset] |= f->set_bit_mask;
}
void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val) {
assert(val.type == upb_types[upb_field_valuetype(f)].inmemory_type);
upb_valueptr ptr = _upb_msg_getptr(msg, f);
if (upb_field_ismm(f)) {
// Unref any previous value we may have had there.
upb_value oldval = upb_value_read(ptr, upb_field_valuetype(f));
upb_field_unref(oldval, f);
// Ref the new value.
upb_atomic_t *refcount = upb_value_getrefcount(val);
if (refcount) upb_atomic_ref(refcount);
}
upb_msg_sethas(msg, f);
return upb_value_write(ptr, val, upb_field_valuetype(f));
}
upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f) {
if (!upb_msg_has(msg, f)) {
upb_value val = f->default_value;
if (upb_issubmsg(f)) {
// TODO: handle arrays also, which must be treated similarly.
upb_msgdef *md = upb_downcast_msgdef(f->def);
upb_msg *m = upb_msg_new(md);
// Copy all set bits and values, except the refcount.
memcpy(m , upb_value_getmsg(val), md->size);
upb_atomic_init(&m->refcount, 0); // The msg will take a ref.
upb_value_setmsg(&val, m);
}
upb_msg_set(msg, f, val);
return val;
} else {
return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
void upb_msg_clear(void *msg, upb_msgdef *md) {
memset(msg, 0, md->hasbit_bytes);
// TODO: set primitive fields to defaults?
}
void *upb_stdarray_append(upb_stdarray *a, size_t type_size) {
assert(a->len <= a->size);
if (a->len == a->size) {
size_t old_size = a->size;
a->size = old_size == 0 ? 8 : (old_size * 2);
a->ptr = realloc(a->ptr, a->size * type_size);
memset(&a->ptr[old_size * type_size], 0, (a->size - old_size) * type_size);
}
return &a->ptr[a->len++ * type_size];
}
#if 0
static upb_flow_t upb_msg_dispatch(upb_msg *msg, upb_msgdef *md,
upb_dispatcher *d);
@ -253,110 +75,64 @@ void upb_msg_runhandlers(upb_msg *msg, upb_msgdef *md, upb_handlers *h,
upb_dispatcher_uninit(&d);
}
#endif
static upb_valueptr upb_msg_getappendptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p = _upb_msg_getptr(msg, f);
if (upb_isarray(f)) {
// Create/recycle/resize the array if necessary, and find a pointer to
// a newly-appended element.
if (!upb_msg_has(msg, f)) {
upb_array_recycle(p.arr);
upb_msg_sethas(msg, f);
}
assert(*p.arr != NULL);
upb_arraylen_t oldlen = upb_array_len(*p.arr);
upb_array_resize(*p.arr, f, oldlen + 1);
p = _upb_array_getptr(*p.arr, f, oldlen);
}
return p;
}
upb_msg *upb_msg_appendmsg(upb_msg *msg, upb_fielddef *f, upb_msgdef *msgdef) {
upb_valueptr p = upb_msg_getappendptr(msg, f);
if (upb_isarray(f) || !upb_msg_has(msg, f)) {
upb_msg_recycle(p.msg, msgdef);
upb_msg_sethas(msg, f);
}
return *p.msg;
}
/* upb_msg handlers ***********************************************************/
/* Standard writers. **********************************************************/
#if UPB_MAX_FIELDS > 2048
#error "We're using an 8-bit integer to store a has_offset."
#endif
typedef struct {
uint8_t has_offset;
uint8_t has_mask;
uint16_t val_offset;
uint16_t msg_size;
uint8_t set_flags_bytes;
uint8_t padding;
} upb_msgsink_fval;
static upb_msgsink_fval upb_msgsink_unpackfval(upb_value fval) {
assert(sizeof(upb_msgsink_fval) == 8);
upb_msgsink_fval ret;
uint64_t fval_u64 = upb_value_getuint64(fval);
memcpy(&ret, &fval_u64, 8);
return ret;
void upb_stdmsg_sethas(void *_m, upb_value fval) {
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
if (f->hasbit >= 0) m[f->hasbit / 8] |= (1 << (f->hasbit % 8));
}
static uint64_t upb_msgsink_packfval(uint8_t has_offset, uint8_t has_mask,
uint16_t val_offset, uint16_t msg_size,
uint8_t set_flags_bytes) {
upb_msgsink_fval fval = {
has_offset, has_mask, val_offset, msg_size, set_flags_bytes, 0};
uint64_t ret = 0;
memcpy(&ret, &fval, sizeof(fval));
return ret;
bool upb_stdmsg_has(void *_m, upb_value fval) {
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
return f->hasbit < 0 || (m[f->hasbit / 8] & (1 << (f->hasbit % 8)));
}
#define SCALAR_VALUE_CB_PAIR(type, ctype) \
upb_flow_t upb_msgsink_ ## type ## value(void *_m, upb_value _fval, \
upb_value val) { \
upb_msg *m = _m; \
upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval); \
m->data[fval.has_offset] |= fval.has_mask; \
*(ctype*)&m->data[fval.val_offset] = upb_value_get ## type(val); \
#define UPB_ACCESSORS(type, ctype) \
upb_flow_t upb_stdmsg_set ## type (void *_m, upb_value fval, \
upb_value val) { \
upb_fielddef *f = upb_value_getfielddef(fval); \
uint8_t *m = _m; \
upb_stdmsg_sethas(_m, fval); \
*(ctype*)&m[f->offset] = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
\
upb_flow_t upb_msgsink_ ## type ## value_r(void *_a, upb_value _fval, \
upb_value val) { \
upb_flow_t upb_stdmsg_set ## type ## _r(void *a, upb_value _fval, \
upb_value val) { \
(void)_fval; \
upb_array *arr = _a; \
upb_array_resizefortypesize(arr, sizeof(ctype), arr->len+1); \
upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(ctype), \
arr->len-1); \
*(ctype*)p._void = upb_value_get ## type(val); \
ctype *p = upb_stdarray_append((upb_stdarray*)a, sizeof(ctype)); \
*p = upb_value_get ## type(val); \
return UPB_CONTINUE; \
} \
SCALAR_VALUE_CB_PAIR(double, double)
SCALAR_VALUE_CB_PAIR(float, float)
SCALAR_VALUE_CB_PAIR(int32, int32_t)
SCALAR_VALUE_CB_PAIR(int64, int64_t)
SCALAR_VALUE_CB_PAIR(uint32, uint32_t)
SCALAR_VALUE_CB_PAIR(uint64, uint64_t)
SCALAR_VALUE_CB_PAIR(bool, bool)
upb_sflow_t upb_msgsink_startseq(void *_m, upb_value _fval) {
upb_msg *m = _m;
upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
upb_array **arr = (upb_array**)&m->data[fval.val_offset];
if (!(m->data[fval.has_offset] & fval.has_mask)) {
upb_array_recycle(arr);
m->data[fval.has_offset] |= fval.has_mask;
\
upb_value upb_stdmsg_get ## type(void *_m, upb_value fval) { \
uint8_t *m = _m; \
upb_fielddef *f = upb_value_getfielddef(fval); \
upb_value ret; \
upb_value_set ## type(&ret, *(ctype*)&m[f->offset]); \
return ret; \
} \
upb_value upb_stdmsg_seqget ## type(void *i) { \
upb_value val; \
upb_value_set ## type(&val, *(ctype*)i); \
return val; \
}
return UPB_CONTINUE_WITH(*arr);
}
upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) {
upb_msg *m = _m;
upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
m->data[fval.has_offset] |= fval.has_mask;
UPB_ACCESSORS(double, double)
UPB_ACCESSORS(float, float)
UPB_ACCESSORS(int32, int32_t)
UPB_ACCESSORS(int64, int64_t)
UPB_ACCESSORS(uint32, uint32_t)
UPB_ACCESSORS(uint64, uint64_t)
UPB_ACCESSORS(bool, bool)
UPB_ACCESSORS(ptr, void*)
#undef UPB_ACCESSORS
static void _upb_stdmsg_setstr(void *_dst, upb_value _src) {
// We do:
// - upb_string_recycle(), upb_string_substr() instead of
// - upb_string_unref(), upb_string_getref()
@ -369,115 +145,204 @@ upb_flow_t upb_msgsink_strvalue(void *_m, upb_value _fval, upb_value val) {
// allocate string objects whereas a upb_string_getref could have avoided
// those allocations completely; if this is an issue, we could make it an
// option of the upb_msgsink which behavior is desired.
upb_string *src = upb_value_getstr(val);
upb_string **dst = (void*)&m->data[fval.val_offset];
upb_string **dst = _dst;
upb_string *src = upb_value_getstr(_src);
upb_string_recycle(dst);
upb_string_substr(*dst, src, 0, upb_string_len(src));
}
upb_flow_t upb_stdmsg_setstr(void *_m, upb_value fval, upb_value val) {
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_stdmsg_sethas(_m, fval);
_upb_stdmsg_setstr(&m[f->offset], val);
return UPB_CONTINUE;
}
upb_flow_t upb_msgsink_strvalue_r(void *_a, upb_value _fval,
upb_value val) {
upb_array *arr = _a;
(void)_fval;
upb_array_resizefortypesize(arr, sizeof(void*), arr->len+1);
upb_valueptr p = _upb_array_getptrforsize(arr, sizeof(void*),
upb_array_len(arr)-1);
upb_string *src = upb_value_getstr(val);
upb_string_recycle(p.str);
upb_string_substr(*p.str, src, 0, upb_string_len(src));
upb_flow_t upb_stdmsg_setstr_r(void *a, upb_value fval, upb_value val) {
(void)fval;
_upb_stdmsg_setstr(upb_stdarray_append((upb_stdarray*)a, sizeof(void*)), val);
return UPB_CONTINUE;
}
upb_value upb_stdmsg_getstr(void *m, upb_value fval) {
upb_value val = upb_stdmsg_getptr(m, fval);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
}
upb_value upb_stdmsg_seqgetstr(void *i) {
upb_value val = upb_stdmsg_seqgetptr(i);
upb_value_setstr(&val, upb_value_getptr(val));
return val;
}
void *upb_stdmsg_new(upb_msgdef *md) {
void *m = malloc(md->size);
memset(m, 0, md->size);
upb_msg_clear(m, md);
return m;
}
upb_sflow_t upb_msgsink_startsubmsg(void *_m, upb_value _fval) {
upb_msg *msg = _m;
upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
upb_msgdef md;
md.size = fval.msg_size;
md.set_flags_bytes = fval.set_flags_bytes;
upb_fielddef f;
f.set_bit_mask = fval.has_mask;
f.set_bit_offset = fval.has_offset;
f.label = UPB_LABEL(OPTIONAL); // Just not repeated.
f.type = UPB_TYPE(MESSAGE);
f.byte_offset = fval.val_offset;
upb_msg **subm = _upb_msg_getptr(msg, &f).msg;
if (!upb_msg_has(msg, &f)) {
upb_msg_recycle(subm, &md);
upb_msg_sethas(msg, &f);
void upb_stdseq_free(void *s, upb_fielddef *f) {
upb_stdarray *a = s;
if (upb_issubmsg(f) || upb_isstring(f)) {
void **p = (void**)a->ptr;
for (int i = 0; i < a->size; i++) {
if (upb_issubmsg(f)) {
upb_stdmsg_free(p[i], upb_downcast_msgdef(f->def));
} else {
upb_string_unref(p[i]);
}
}
}
free(a->ptr);
free(a);
}
void upb_stdmsg_free(void *m, upb_msgdef *md) {
if (m == NULL) return;
upb_msg_iter i;
for(i = upb_msg_begin(md); !upb_msg_done(i); i = upb_msg_next(md, i)) {
upb_fielddef *f = upb_msg_iter_field(i);
if (!upb_isseq(f) && !upb_issubmsg(f) && !upb_isstring(f)) continue;
void *subp = upb_value_getptr(upb_stdmsg_getptr(m, f->fval));
if (subp == NULL) continue;
if (upb_isseq(f)) {
upb_stdseq_free(subp, f);
} else if (upb_issubmsg(f)) {
upb_stdmsg_free(subp, upb_downcast_msgdef(f->def));
} else {
upb_string_unref(subp);
}
}
free(m);
}
upb_sflow_t upb_stdmsg_startseq(void *_m, upb_value fval) {
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
upb_stdarray **arr = (void*)&m[f->offset];
if (!upb_stdmsg_has(_m, fval)) {
if (!*arr) {
*arr = malloc(sizeof(**arr));
(*arr)->size = 0;
(*arr)->ptr = NULL;
}
(*arr)->len = 0;
upb_stdmsg_sethas(m, fval);
}
return UPB_CONTINUE_WITH(*arr);
}
void upb_stdmsg_recycle(void **m, upb_msgdef *md) {
if (*m)
upb_msg_clear(*m, md);
else
*m = upb_stdmsg_new(md);
}
upb_sflow_t upb_stdmsg_startsubmsg(void *_m, upb_value fval) {
char *m = _m;
upb_fielddef *f = upb_value_getfielddef(fval);
void **subm = (void*)&m[f->offset];
if (!upb_stdmsg_has(m, fval)) {
upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
upb_stdmsg_sethas(m, fval);
}
return UPB_CONTINUE_WITH(*subm);
}
upb_sflow_t upb_msgsink_startsubmsg_r(void *_a, upb_value _fval) {
upb_array *a = _a;
upb_sflow_t upb_stdmsg_startsubmsg_r(void *a, upb_value fval) {
assert(a != NULL);
upb_msgsink_fval fval = upb_msgsink_unpackfval(_fval);
upb_msgdef md;
md.size = fval.msg_size;
md.set_flags_bytes = fval.set_flags_bytes;
upb_fielddef f;
f.set_bit_mask = fval.has_mask;
f.set_bit_offset = fval.has_offset;
f.label = UPB_LABEL(REPEATED);
f.type = UPB_TYPE(MESSAGE);
f.byte_offset = fval.val_offset;
upb_arraylen_t oldlen = upb_array_len(a);
upb_array_resize(a, &f, oldlen + 1);
upb_valueptr p = _upb_array_getptr(a, &f, oldlen);
upb_msg_recycle(p.msg, &md);
return UPB_CONTINUE_WITH(*p.msg);
upb_fielddef *f = upb_value_getfielddef(fval);
void **subm = upb_stdarray_append((upb_stdarray*)a, sizeof(void*));
upb_stdmsg_recycle(subm, upb_downcast_msgdef(f->def));
return UPB_CONTINUE_WITH(*subm);
}
INLINE void upb_msg_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
(void)c;
uint16_t msg_size = 0;
uint8_t set_flags_bytes = 0;
if (upb_issubmsg(f)) {
upb_msgdef *md = upb_downcast_msgdef(f->def);
msg_size = md->size;
set_flags_bytes = md->set_flags_bytes;
void *upb_stdmsg_seqbegin(void *_a) {
upb_stdarray *a = _a;
return a->len > 0 ? a->ptr : NULL;
}
#define NEXTFUNC(size) \
void *upb_stdmsg_ ## size ## byte_seqnext(void *_a, void *iter) { \
upb_stdarray *a = _a; \
void *next = (char*)iter + size; \
return (char*)next < (char*)a->ptr + (a->len * size) ? next : NULL; \
}
upb_value_setuint64(&fh->fval,
upb_msgsink_packfval(f->set_bit_offset, f->set_bit_mask,
f->byte_offset, msg_size, set_flags_bytes));
if (fh->repeated) upb_fhandlers_setstartseq(fh, upb_msgsink_startseq);
#define CASE(upb_type, type) \
case UPB_TYPE(upb_type): \
upb_fhandlers_setvalue(fh, upb_isarray(f) ? \
upb_msgsink_ ## type ## value_r : upb_msgsink_ ## type ## value); \
break;
switch (f->type) {
CASE(DOUBLE, double)
CASE(FLOAT, float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
#undef CASE
case UPB_TYPE(MESSAGE):
case UPB_TYPE(GROUP):
upb_fhandlers_setstartsubmsg(fh,
upb_isarray(f) ? upb_msgsink_startsubmsg_r : upb_msgsink_startsubmsg);
break;
NEXTFUNC(8)
NEXTFUNC(4)
NEXTFUNC(1)
#define STDMSG(type) { static upb_accessor_vtbl vtbl = {NULL, &upb_stdmsg_startsubmsg, \
&upb_stdmsg_set ## type, &upb_stdmsg_has, &upb_stdmsg_get ## type, \
NULL, NULL, NULL}; return &vtbl; }
#define STDMSG_R(type, size) { static upb_accessor_vtbl vtbl = { \
&upb_stdmsg_startseq, &upb_stdmsg_startsubmsg_r, &upb_stdmsg_set ## type ## _r, \
&upb_stdmsg_has, &upb_stdmsg_getptr, &upb_stdmsg_seqbegin, \
&upb_stdmsg_ ## size ## byte_seqnext, &upb_stdmsg_seqget ## type}; \
return &vtbl; }
upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f) {
if (upb_isseq(f)) {
switch (f->type) {
case UPB_TYPE(DOUBLE): STDMSG_R(double, 8)
case UPB_TYPE(FLOAT): STDMSG_R(float, 4)
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64): STDMSG_R(uint64, 8)
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64): STDMSG_R(int64, 8)
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(ENUM):
case UPB_TYPE(SFIXED32): STDMSG_R(int32, 4)
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32): STDMSG_R(uint32, 4)
case UPB_TYPE(BOOL): STDMSG_R(bool, 1)
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE): STDMSG_R(str, 8) // TODO: 32-bit
}
} else {
switch (f->type) {
case UPB_TYPE(DOUBLE): STDMSG(double)
case UPB_TYPE(FLOAT): STDMSG(float)
case UPB_TYPE(UINT64):
case UPB_TYPE(FIXED64): STDMSG(uint64)
case UPB_TYPE(INT64):
case UPB_TYPE(SFIXED64):
case UPB_TYPE(SINT64): STDMSG(int64)
case UPB_TYPE(INT32):
case UPB_TYPE(SINT32):
case UPB_TYPE(ENUM):
case UPB_TYPE(SFIXED32): STDMSG(int32)
case UPB_TYPE(UINT32):
case UPB_TYPE(FIXED32): STDMSG(uint32)
case UPB_TYPE(BOOL): STDMSG(bool)
case UPB_TYPE(STRING):
case UPB_TYPE(BYTES):
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE): STDMSG(str)
}
}
return NULL;
}
static void upb_accessors_onfreg(void *c, upb_fhandlers *fh, upb_fielddef *f) {
(void)c;
if (f->accessor) {
upb_fhandlers_setstartseq(fh, f->accessor->appendseq);
upb_fhandlers_setvalue(fh, f->accessor->set);
upb_fhandlers_setstartsubmsg(fh, f->accessor->appendsubmsg);
upb_fhandlers_setfval(fh, f->fval);
}
}
upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *m) {
return upb_handlers_regmsgdef(h, m, NULL, &upb_msg_onfreg, NULL);
upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m) {
return upb_handlers_regmsgdef(h, m, NULL, &upb_accessors_onfreg, NULL);
}

@ -4,285 +4,122 @@
* Copyright (c) 2010-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Data structure for storing a message of protobuf data. Unlike Google's
* protobuf, upb_msg and upb_array are reference counted instead of having
* exclusive ownership of their fields. This is a better match for dynamic
* languages where statements like a.b = other_b are normal.
* Routines for reading and writing message data to an in-memory structure,
* similar to a C struct.
*
* upb's parsers and serializers could also be used to populate and serialize
* other kinds of message objects (even one generated by Google's protobuf).
* upb does not define one single message object that everyone must use.
* Rather it defines an abstract interface for reading and writing members
* of a message object, and all of the parsers and serializers use this
* abstract interface. This allows upb's parsers and serializers to be used
* regardless of what memory management scheme or synchronization model the
* application is using.
*
* TODO: consider properly supporting const instances.
* A standard set of accessors is provided for doing simple reads and writes at
* a known offset into the message. These accessors should be used when
* possible, because they are specially optimized -- for example, the JIT can
* recognize them and emit specialized code instead of having to call the
* function at all. The application can substitute its own accessors when the
* standard accessors are not suitable.
*/
#ifndef UPB_MSG_H
#define UPB_MSG_H
#include <stdlib.h>
#include "upb_def.h"
#include "upb_handlers.h"
#ifdef __cplusplus
extern "C" {
#endif
// A pointer to a .proto value. The owner must have an out-of-band way of
// knowing the type, so it knows which union member to use.
typedef union {
double *_double;
float *_float;
int32_t *int32;
int64_t *int64;
uint8_t *uint8;
uint32_t *uint32;
uint64_t *uint64;
bool *_bool;
upb_string **str;
upb_msg **msg;
upb_array **arr;
void *_void;
} upb_valueptr;
INLINE upb_valueptr upb_value_addrof(upb_value *val) {
upb_valueptr ptr = {&val->val._double};
return ptr;
}
// Reads or writes a upb_value from an address represented by a upb_value_ptr.
// We need to know the value type to perform this operation, because we need to
// know how much memory to copy (and for big-endian machines, we need to know
// where in the upb_value the data goes).
//
// For little endian-machines where we didn't mind overreading, we could make
// upb_value_read simply use memcpy().
INLINE upb_value upb_value_read(upb_valueptr ptr, upb_fieldtype_t ft) {
upb_value val;
#ifdef NDEBUG
#define CASE(t, member_name) \
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; break;
#else
#define CASE(t, member_name) \
case UPB_TYPE(t): val.val.member_name = *ptr.member_name; val.type = upb_types[ft].inmemory_type; break;
#endif
/* upb_accessor ***************************************************************/
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
case UPB_VALUETYPE_ARRAY:
val.val.arr = *ptr.arr;
#ifndef NDEBUG
val.type = UPB_VALUETYPE_ARRAY;
#endif
break;
default: assert(false);
}
return val;
// A upb_accessor is a table of function pointers for doing reads and writes
// for one specific upb_fielddef. Each field has a separate accessor, which
// lives in the fielddef.
#undef CASE
}
typedef bool upb_has_reader(void *m, upb_value fval);
typedef upb_value upb_value_reader(void *m, upb_value fval);
INLINE void upb_value_write(upb_valueptr ptr, upb_value val,
upb_fieldtype_t ft) {
#ifndef NDEBUG
if (ft == UPB_VALUETYPE_ARRAY) {
assert(val.type == UPB_VALUETYPE_ARRAY);
} else if (val.type != UPB_VALUETYPE_RAW) {
assert(val.type == upb_types[ft].inmemory_type);
}
#endif
#define CASE(t, member_name) \
case UPB_TYPE(t): *ptr.member_name = val.val.member_name; break;
switch(ft) {
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
CASE(STRING, str)
CASE(BYTES, str)
CASE(MESSAGE, msg)
CASE(GROUP, msg)
case UPB_VALUETYPE_ARRAY:
*ptr.arr = val.val.arr;
break;
default: assert(false);
}
#undef CASE
}
typedef void *upb_seqbegin_handler(void *s);
typedef void *upb_seqnext_handler(void *s, void *iter);
typedef upb_value upb_seqget_handler(void *iter);
INLINE bool upb_seq_done(void *iter) { return iter == NULL; }
typedef struct _upb_accessor_vtbl {
// Writers. These take an fval as a parameter because the callbacks are used
// as upb_handlers, but the fval is always the fielddef for that field.
upb_startfield_handler *appendseq; // Repeated fields only.
upb_startfield_handler *appendsubmsg; // Submsg fields (repeated or no).
upb_value_handler *set; // Scalar fields (repeated or no).
/* upb_array ******************************************************************/
// Readers.
upb_has_reader *has;
upb_value_reader *get;
upb_seqbegin_handler *seqbegin;
upb_seqnext_handler *seqnext;
upb_seqget_handler *seqget;
} upb_accessor_vtbl;
typedef uint32_t upb_arraylen_t;
struct _upb_array {
upb_atomic_t refcount;
// "len" and "size" are measured in elements, not bytes.
int32_t len;
int32_t size;
char *ptr;
};
void _upb_array_free(upb_array *a, upb_fielddef *f);
INLINE upb_valueptr _upb_array_getptrforsize(upb_array *a, size_t type_size,
int32_t elem) {
assert(elem >= 0);
upb_valueptr p;
p._void = &a->ptr[elem * type_size];
return p;
}
// Registers handlers for writing into a message of the given type.
upb_mhandlers *upb_accessors_reghandlers(upb_handlers *h, upb_msgdef *m);
INLINE upb_valueptr _upb_array_getptr(upb_array *a, upb_fielddef *f,
uint32_t elem) {
return _upb_array_getptrforsize(a, upb_types[f->type].size, elem);
}
// Returns an stdmsg accessor for the given fielddef.
upb_accessor_vtbl *upb_stdmsg_accessor(upb_fielddef *f);
upb_array *upb_array_new(void);
INLINE void upb_array_unref(upb_array *a, upb_fielddef *f) {
if (a && upb_atomic_unref(&a->refcount)) _upb_array_free(a, f);
}
/* upb_msg/upb_seq ************************************************************/
void upb_array_recycle(upb_array **arr);
INLINE uint32_t upb_array_len(upb_array *a) {
return a->len;
}
// upb_msg and upb_seq allow for generic access to a message through its
// accessor vtable. Note that these do *not* allow you to create, destroy, or
// take references on the objects -- these operations are specifically outside
// the scope of what the accessors define.
INLINE upb_value upb_array_get(upb_array *arr, upb_fielddef *f,
upb_arraylen_t i) {
assert(i < upb_array_len(arr));
return upb_value_read(_upb_array_getptr(arr, f, i), f->type);
}
// Clears all hasbits.
// TODO: Add a separate function for setting primitive values back to their
// defaults (but not strings, submessages, or arrays).
void upb_msg_clear(void *msg, upb_msgdef *md);
// Could add a method that recursively clears submessages, strings, and
// arrays if desired. This could be a win if you wanted to merge without
// needing hasbits, because during parsing you would never clear submessages
// or arrays. Also this could be desired to provide proto2 operations on
// generated messages.
/* upb_msg ********************************************************************/
// upb_msg is not self-describing; the upb_msg does not contain a pointer to the
// upb_msgdef. While this makes the API a bit more cumbersome to use, this
// choice was made for a few important reasons:
//
// 1. it would make every message 8 bytes larger on 64-bit platforms. This is
// a high overhead for small messages.
// 2. you would want the msg to own a ref on its msgdef, but this would require
// an atomic operation for every message create or destroy!
struct _upb_msg {
upb_atomic_t refcount;
uint8_t data[4]; // We allocate the appropriate amount per message.
};
void _upb_msg_free(upb_msg *msg, upb_msgdef *md);
INLINE upb_valueptr _upb_msg_getptr(upb_msg *msg, upb_fielddef *f) {
upb_valueptr p;
p._void = &msg->data[f->byte_offset];
return p;
INLINE bool upb_msg_has(void *m, upb_fielddef *f) {
return f->accessor && f->accessor->has(m, f->fval);
}
// Creates a new msg of the given type.
upb_msg *upb_msg_new(upb_msgdef *md);
// Unrefs the given message.
INLINE void upb_msg_unref(upb_msg *msg, upb_msgdef *md) {
if (msg && upb_atomic_unref(&msg->refcount)) _upb_msg_free(msg, md);
}
INLINE upb_msg *upb_msg_getref(upb_msg *msg) {
assert(msg);
upb_atomic_ref(&msg->refcount);
return msg;
// May only be called for fields that are known to be set.
INLINE upb_value upb_msg_get(void *m, upb_fielddef *f) {
assert(upb_msg_has(m, f));
return f->accessor->get(m, f->fval);
}
// Modifies *msg to point to a newly initialized msg instance. If the msg had
// no other referents, reuses the same msg, otherwise allocates a new one.
// The caller *must* own a ref on the msg prior to calling this method!
void upb_msg_recycle(upb_msg **msg, upb_msgdef *msgdef);
// Tests whether the given field is explicitly set, or whether it will return a
// default.
INLINE bool upb_msg_has(upb_msg *msg, upb_fielddef *f) {
return (msg->data[f->set_bit_offset] & f->set_bit_mask) != 0;
INLINE void *upb_seq_begin(void *s, upb_fielddef *f) {
assert(f->accessor);
return f->accessor->seqbegin(s);
}
// We have several options for handling default values:
// 1. inside upb_msg_clear(), overwrite all values to be their defaults,
// overwriting submessage pointers to point to the default instance again.
// 2. inside upb_msg_get(), test upb_msg_has() and return md->default_value
// if it is not set. upb_msg_clear() only clears the set bits.
// We lazily clear objects if/when we reuse them.
// 3. inside upb_msg_clear(), overwrite all values to be their default,
// and recurse into submessages to set all their values to defaults also.
// 4. as a hybrid of (1) and (3), clear all set bits in upb_msg_clear()
// but also overwrite all primitive values to be their defaults. Only
// accessors for non-primitive values (submessage, strings, and arrays)
// need to check the has-bits in their accessors -- primitive values can
// always be returned straight from the msg.
//
// (1) is undesirable, because it prevents us from caching sub-objects.
// (2) makes clear() cheaper, but makes get() branchier.
// (3) makes get() less branchy, but makes clear() traverse the message graph.
// (4) is probably the best bang for the buck.
//
// For the moment upb does (2), but we should implement (4). Google's protobuf
// does (3), which is likely part of the reason that even our table-based
// decoder beats it in some benchmarks.
// For submessages and strings, the returned value is not owned.
upb_value upb_msg_get(upb_msg *msg, upb_fielddef *f);
// A specialized version of the previous that is cheaper because it doesn't
// support submessages or arrays.
INLINE upb_value upb_msg_getscalar(upb_msg *msg, upb_fielddef *f) {
if (upb_msg_has(msg, f)) {
return upb_value_read(_upb_msg_getptr(msg, f), upb_field_valuetype(f));
} else {
return f->default_value;
}
INLINE void *upb_seq_next(void *s, void *iter, upb_fielddef *f) {
assert(f->accessor);
assert(!upb_seq_done(iter));
return f->accessor->seqnext(s, iter);
}
// Sets the given field to the given value. If the field is a string, array,
// or submessage, releases the ref on any object we may have been referencing
// and takes a ref on the new object (if any).
void upb_msg_set(upb_msg *msg, upb_fielddef *f, upb_value val);
// Unsets all field values back to their defaults.
INLINE void upb_msg_clear(upb_msg *msg, upb_msgdef *md) {
memset(msg->data, 0, md->set_flags_bytes);
INLINE upb_value upb_seq_get(void *iter, upb_fielddef *f) {
assert(f->accessor);
assert(!upb_seq_done(iter));
return f->accessor->seqget(iter);
}
// Registers handlers for populating a msg for the given upb_msgdef.
// The upb_msg itself must be passed as the param to the src.
upb_mhandlers *upb_msg_reghandlers(upb_handlers *h, upb_msgdef *md);
/* upb_msgvisitor *************************************************************/
// Calls a set of upb_handlers with the contents of a upb_msg.
// A upb_msgvisitor reads data from an in-memory structure using its accessors,
// pushing the results to a given set of upb_handlers.
// TODO: not yet implemented.
typedef struct {
upb_fhandlers *fh;
upb_fielddef *f;
@ -314,6 +151,118 @@ void upb_msgvisitor_uninit(upb_msgvisitor *v);
void upb_msgvisitor_reset(upb_msgvisitor *v, upb_msg *m);
void upb_msgvisitor_visit(upb_msgvisitor *v, upb_status *status);
/* Standard writers. **********************************************************/
// Allocates a new stdmsg.
void *upb_stdmsg_new(upb_msgdef *md);
// Recursively frees any strings or submessages that the message refers to.
void upb_stdmsg_free(void *m, upb_msgdef *md);
// "hasbit" must be <= UPB_MAX_FIELDS. If it is <0, this field has no hasbit.
upb_value upb_stdmsg_packfval(int16_t hasbit, uint16_t value_offset);
upb_value upb_stdmsg_packfval_subm(int16_t hasbit, uint16_t value_offset,
uint16_t subm_size, uint8_t subm_setbytes);
// Value writers for every in-memory type: write the data to a known offset
// from the closure "c" and set the hasbit (if any).
// TODO: can we get away with having only one for int64, uint64, double, etc?
// The main thing in the way atm is that the upb_value is strongly typed.
// in debug mode.
upb_flow_t upb_stdmsg_setint64(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setint32(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setuint64(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setuint32(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setdouble(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setfloat(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setbool(void *c, upb_value fval, upb_value val);
// Value writers for repeated fields: the closure points to a standard array
// struct, appends the value to the end of the array, resizing with realloc()
// if necessary.
typedef struct {
char *ptr;
int32_t len; // Number of elements present.
int32_t size; // Number of elements allocated.
} upb_stdarray;
upb_flow_t upb_stdmsg_setint64_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setint32_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setuint64_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setuint32_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setdouble_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setfloat_r(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setbool_r(void *c, upb_value fval, upb_value val);
// Writers for C strings (NULL-terminated): we can find a char* at a known
// offset from the closure "c". Calls realloc() on the pointer to allocate
// the memory (TODO: investigate whether checking malloc_usable_size() would
// be cheaper than realloc()). Also sets the hasbit, if any.
//
// Since the string is NULL terminated and does not store an explicit length,
// these are not suitable for binary data that can contain NULLs.
upb_flow_t upb_stdmsg_setcstr(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setcstr_r(void *c, upb_value fval, upb_value val);
// Writers for length-delimited strings: we explicitly store the length, so
// the data can contain NULLs. Stores the data using upb_stdarray
// which is located at a known offset from the closure "c" (note that it
// is included inline rather than pointed to). Also sets the hasbit, if any.
upb_flow_t upb_stdmsg_setstr(void *c, upb_value fval, upb_value val);
upb_flow_t upb_stdmsg_setstr_r(void *c, upb_value fval, upb_value val);
// Writers for startseq and startmsg which allocate (or reuse, if possible)
// a sub data structure (upb_stdarray or a submessage, respectively),
// setting the hasbit. If the hasbit is already set, the existing data
// structure is used verbatim. If the hasbit is not already set, the pointer
// is checked for NULL. If it is NULL, a new substructure is allocated,
// cleared, and used. If it is not NULL, the existing substructure is
// cleared and reused.
//
// If there is no hasbit, we always behave as if the hasbit was not set,
// so any existing data for this array or submessage is cleared. In most
// cases this will be fine since each array or non-repeated submessage should
// occur at most once in the stream. But if the client is using "concatenation
// as merging", it will want to make sure hasbits are allocated so merges can
// happen appropriately.
//
// If there was a demand for the behavior that absence of a hasbit acts as if
// the bit was always set, we could provide that also. But Clear() would need
// to act recursively, which is less efficient since it requires an extra pass
// over the tree.
upb_sflow_t upb_stdmsg_startseq(void *c, upb_value fval);
upb_sflow_t upb_stdmsg_startsubmsg(void *c, upb_value fval);
upb_sflow_t upb_stdmsg_startsubmsg_r(void *c, upb_value fval);
/* Standard readers. **********************************************************/
bool upb_stdmsg_has(void *c, upb_value fval);
void *upb_stdmsg_seqbegin(void *c);
upb_value upb_stdmsg_getint64(void *c, upb_value fval);
upb_value upb_stdmsg_getint32(void *c, upb_value fval);
upb_value upb_stdmsg_getuint64(void *c, upb_value fval);
upb_value upb_stdmsg_getuint32(void *c, upb_value fval);
upb_value upb_stdmsg_getdouble(void *c, upb_value fval);
upb_value upb_stdmsg_getfloat(void *c, upb_value fval);
upb_value upb_stdmsg_getbool(void *c, upb_value fval);
upb_value upb_stdmsg_getptr(void *c, upb_value fval);
void *upb_stdmsg_8byte_seqnext(void *c, void *iter);
void *upb_stdmsg_4byte_seqnext(void *c, void *iter);
void *upb_stdmsg_1byte_seqnext(void *c, void *iter);
upb_value upb_stdmsg_seqgetint64(void *c);
upb_value upb_stdmsg_seqgetint32(void *c);
upb_value upb_stdmsg_seqgetuint64(void *c);
upb_value upb_stdmsg_seqgetuint32(void *c);
upb_value upb_stdmsg_seqgetdouble(void *c);
upb_value upb_stdmsg_seqgetfloat(void *c);
upb_value upb_stdmsg_seqgetbool(void *c);
upb_value upb_stdmsg_seqgetptr(void *c);
#ifdef __cplusplus
} /* extern "C" */
#endif

@ -107,6 +107,8 @@ void _upb_string_free(upb_string *str);
// can be NULL, in which case this is a no-op. WARNING: NOT THREAD_SAFE
// UNLESS THE STRING IS SYNCHRONIZED.
INLINE void upb_string_unref(upb_string *str) {
if (str) {
}
if (str && upb_atomic_read(&str->refcount) > 0 &&
upb_atomic_unref(&str->refcount)) {
_upb_string_free(str);
@ -129,7 +131,9 @@ INLINE upb_string *upb_string_getref(upb_string *str) {
int refcount = upb_atomic_read(&str->refcount);
if (refcount == _UPB_STRING_REFCOUNT_STACK) return upb_strdup(str);
// We don't ref the special <0 refcount for static strings.
if (refcount > 0) upb_atomic_ref(&str->refcount);
if (refcount > 0) {
upb_atomic_ref(&str->refcount);
}
return str;
}

@ -103,6 +103,12 @@ INLINE uint32_t upb_strtable_count(upb_strtable *t) {
void upb_inttable_insert(upb_inttable *t, upb_inttable_key_t key, void *val);
void upb_strtable_insert(upb_strtable *t, upb_strtable_entry *ent); // TODO: update
void upb_inttable_compact(upb_inttable *t);
INLINE void upb_strtable_clear(upb_strtable *t) {
// TODO: improve.
uint16_t entry_size = t->t.entry_size;
upb_strtable_free(t);
upb_strtable_init(t, 8, entry_size);
}
INLINE uint32_t _upb_inttable_bucket(upb_inttable *t, upb_inttable_key_t k) {
uint32_t bucket = k & t->t.mask; // Identity hash for ints.

@ -18,7 +18,7 @@ int main(int argc, char *argv[]) {
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(symtab, desc, &status);
upb_read_descriptor(symtab, desc, &status);
if (!upb_ok(&status)) {
fprintf(stderr, "Error parsing descriptor: ");
upb_printerr(&status);
@ -45,14 +45,13 @@ int main(int argc, char *argv[]) {
upb_stdio *out = upb_stdio_new();
upb_stdio_reset(out, stdout);
upb_handlers handlers;
upb_handlers_init(&handlers);
upb_handlers *handlers = upb_handlers_new();
upb_textprinter *p = upb_textprinter_new();
upb_textprinter_reset(p, upb_stdio_bytesink(out), false);
upb_textprinter_reghandlers(&handlers, m);
upb_textprinter_reghandlers(handlers, m);
upb_decoder d;
upb_decoder_init(&d, &handlers);
upb_decoder_initforhandlers(&d, handlers);
upb_decoder_reset(&d, upb_stdio_bytesrc(in), p);
upb_clearerr(&status);

@ -21,19 +21,22 @@
size_t string_size;
void compare(const google::protobuf::Message& proto2_msg,
upb_msg *upb_msg, upb_msgdef *upb_md);
void *upb_msg, upb_msgdef *upb_md);
void compare_arrays(const google::protobuf::Reflection *r,
const google::protobuf::Message& proto2_msg,
const google::protobuf::FieldDescriptor *proto2_f,
upb_msg *upb_msg, upb_fielddef *upb_f)
void *upb_msg, upb_fielddef *upb_f)
{
ASSERT(upb_msg_has(upb_msg, upb_f));
ASSERT(upb_isarray(upb_f));
upb_array *arr = upb_value_getarr(upb_msg_get(upb_msg, upb_f));
ASSERT(upb_array_len(arr) == (upb_arraylen_t)r->FieldSize(proto2_msg, proto2_f));
for(upb_arraylen_t i = 0; i < upb_array_len(arr); i++) {
upb_value v = upb_array_get(arr, upb_f, i);
ASSERT(upb_isseq(upb_f));
void *arr = upb_value_getptr(upb_msg_get(upb_msg, upb_f));
void *iter = upb_seq_begin(arr, upb_f);
for(int i = 0;
i < r->FieldSize(proto2_msg, proto2_f);
i++, iter = upb_seq_next(arr, iter, upb_f)) {
ASSERT(!upb_seq_done(iter));
upb_value v = upb_seq_get(iter, upb_f);
switch(upb_f->type) {
default:
ASSERT(false);
@ -76,18 +79,20 @@ void compare_arrays(const google::protobuf::Reflection *r,
}
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE):
// XXX: getstr
ASSERT(upb_dyncast_msgdef(upb_f->def) != NULL);
compare(r->GetRepeatedMessage(proto2_msg, proto2_f, i),
upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def));
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def));
}
}
ASSERT(upb_seq_done(iter));
}
#include <inttypes.h>
void compare_values(const google::protobuf::Reflection *r,
const google::protobuf::Message& proto2_msg,
const google::protobuf::FieldDescriptor *proto2_f,
upb_msg *upb_msg, upb_fielddef *upb_f)
void *upb_msg, upb_fielddef *upb_f)
{
upb_value v = upb_msg_get(upb_msg, upb_f);
switch(upb_f->type) {
@ -132,13 +137,14 @@ void compare_values(const google::protobuf::Reflection *r,
}
case UPB_TYPE(GROUP):
case UPB_TYPE(MESSAGE):
// XXX: getstr
compare(r->GetMessage(proto2_msg, proto2_f),
upb_value_getmsg(v), upb_downcast_msgdef(upb_f->def));
upb_value_getstr(v), upb_downcast_msgdef(upb_f->def));
}
}
void compare(const google::protobuf::Message& proto2_msg,
upb_msg *upb_msg, upb_msgdef *upb_md)
void *upb_msg, upb_msgdef *upb_md)
{
const google::protobuf::Reflection *r = proto2_msg.GetReflection();
const google::protobuf::Descriptor *d = proto2_msg.GetDescriptor();
@ -157,15 +163,15 @@ void compare(const google::protobuf::Message& proto2_msg,
upb_string_len(upb_f->name)) ==
proto2_f->name());
ASSERT(upb_f->type == proto2_f->type());
ASSERT(upb_isarray(upb_f) == proto2_f->is_repeated());
ASSERT(upb_isseq(upb_f) == proto2_f->is_repeated());
if(!upb_msg_has(upb_msg, upb_f)) {
if(upb_isarray(upb_f))
if(upb_isseq(upb_f))
ASSERT(r->FieldSize(proto2_msg, proto2_f) == 0);
else
ASSERT(r->HasField(proto2_msg, proto2_f) == false);
} else {
if(upb_isarray(upb_f)) {
if(upb_isseq(upb_f)) {
compare_arrays(r, proto2_msg, proto2_f, upb_msg, upb_f);
} else {
ASSERT(r->HasField(proto2_msg, proto2_f) == true);
@ -176,7 +182,7 @@ void compare(const google::protobuf::Message& proto2_msg,
}
void parse_and_compare(MESSAGE_CIDENT *proto2_msg,
upb_msg *upb_msg, upb_msgdef *upb_md,
void *upb_msg, upb_msgdef *upb_md,
upb_string *str)
{
// Parse to both proto2 and upb.
@ -220,7 +226,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
return 1;
}
upb_parsedesc(symtab, fds, &status);
upb_read_descriptor(symtab, fds, &status);
if(!upb_ok(&status)) {
fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ": ");
upb_printerr(&status);
@ -247,12 +253,12 @@ int main(int argc, char *argv[])
// Run twice to test proper object reuse.
MESSAGE_CIDENT proto2_msg;
upb_msg *upb_msg = upb_msg_new(msgdef);
void *upb_msg = upb_stdmsg_new(msgdef);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
parse_and_compare(&proto2_msg, upb_msg, msgdef, str);
printf("All tests passed, %d assertions.\n", num_assertions);
upb_msg_unref(upb_msg, msgdef);
upb_stdmsg_free(upb_msg, msgdef);
upb_def_unref(UPB_UPCAST(msgdef));
upb_string_unref(str);
upb_symtab_unref(symtab);

@ -17,7 +17,7 @@ static upb_symtab *load_test_proto() {
exit(1);
}
upb_status status = UPB_STATUS_INIT;
upb_parsedesc(s, descriptor, &status);
upb_read_descriptor(s, descriptor, &status);
ASSERT(upb_ok(&status));
upb_status_uninit(&status);
upb_string_unref(descriptor);
@ -42,7 +42,7 @@ static void test_upb_jit() {
upb_handlerset hset = {NULL, NULL, &upb_test_onvalue, NULL, NULL, NULL, NULL};
upb_handlers_reghandlerset(h, upb_downcast_msgdef(def), &hset);
upb_decoder d;
upb_decoder_init(&d, h);
upb_decoder_initforhandlers(&d, h);
upb_decoder_uninit(&d);
upb_symtab_unref(s);
upb_def_unref(def);

Loading…
Cancel
Save