Protocol Buffers - Google's data interchange format (grpc依赖) https://developers.google.com/protocol-buffers/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

430 lines
16 KiB

/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009-2011 Google Inc. See LICENSE for details.
* Author: Josh Haberman <jhaberman@gmail.com>
*
* Provides a mechanism for creating and linking proto definitions.
* These form the protobuf schema, and are used extensively throughout upb:
* - upb_msgdef: describes a "message" construct.
* - upb_fielddef: describes a message field.
* - upb_enumdef: describes an enum.
* (TODO: definitions of services).
*
* These defs are mutable (and not thread-safe) when first created.
* Once they are added to a defbuilder (and later its symtab) they become
* immutable.
*
* TODO: consider making thread-safe even when first created by using mutexes
* internally. Would also have to change any methods returning pointers to
* return copies instead.
*/
#ifndef UPB_DEF_H_
#define UPB_DEF_H_
#include "upb/atomic.h"
#include "upb/table.h"
#ifdef __cplusplus
extern "C" {
#endif
struct _upb_symtab;
typedef struct _upb_symtab upb_symtab;
// All the different kind of defs we support. These correspond 1:1 with
// declarations in a .proto file.
typedef enum {
UPB_DEF_MSG = 1,
UPB_DEF_ENUM,
UPB_DEF_SERVICE, // Not yet implemented.
UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*()
UPB_DEF_UNRESOLVED = 99, // Internal-only.
} upb_deftype_t;
/* upb_def: base class for defs **********************************************/
typedef struct {
char *fqname; // Fully qualified.
upb_symtab *symtab; // Def is mutable iff symtab == NULL.
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0).
upb_deftype_t type;
} upb_def;
// Call to ref/unref a def. Can be used at any time, but is not thread-safe
// until the def is in a symtab. While a def is in a symtab, everything
// reachable from that def (the symtab and all defs in the symtab) are
// guaranteed to be alive.
void upb_def_ref(upb_def *def);
void upb_def_unref(upb_def *def);
upb_def *upb_def_dup(upb_def *def);
// A def is mutable until it has been added to a symtab.
bool upb_def_ismutable(upb_def *def);
INLINE const char *upb_def_fqname(upb_def *def) { return def->fqname; }
bool upb_def_setfqname(upb_def *def, const char *fqname); // Only if mutable.
#define UPB_UPCAST(ptr) (&(ptr)->base)
/* upb_fielddef ***************************************************************/
// A upb_fielddef describes a single field in a message. It isn't a full def
// in the sense that it derives from upb_def. It cannot stand on its own; it
// must be part of a upb_msgdef. It is also reference-counted.
typedef struct _upb_fielddef {
struct _upb_msgdef *msgdef;
upb_def *def; // if upb_hasdef(f)
upb_atomic_t refcount;
bool finalized;
// The following fields may be modified until the def is finalized.
uint8_t type; // Use UPB_TYPE() constants.
uint8_t label; // Use UPB_LABEL() constants.
int16_t hasbit;
uint16_t offset;
bool hasdefault;
bool active;
int32_t number;
char *name;
upb_value defaultval; // Only meaningful for non-repeated scalars and strings.
upb_value fval;
struct _upb_accessor_vtbl *accessor;
} upb_fielddef;
upb_fielddef *upb_fielddef_new(void);
void upb_fielddef_ref(upb_fielddef *f);
void upb_fielddef_unref(upb_fielddef *f);
upb_fielddef *upb_fielddef_dup(upb_fielddef *f);
// A fielddef is mutable until its msgdef has been added to a symtab.
bool upb_fielddef_ismutable(upb_fielddef *f);
// Read accessors. May be called any time.
INLINE uint8_t upb_fielddef_type(upb_fielddef *f) { return f->type; }
INLINE uint8_t upb_fielddef_label(upb_fielddef *f) { return f->label; }
INLINE int32_t upb_fielddef_number(upb_fielddef *f) { return f->number; }
INLINE char *upb_fielddef_name(upb_fielddef *f) { return f->name; }
INLINE upb_value upb_fielddef_default(upb_fielddef *f) { return f->defaultval; }
INLINE upb_value upb_fielddef_fval(upb_fielddef *f) { return f->fval; }
INLINE bool upb_fielddef_finalized(upb_fielddef *f) { return f->finalized; }
INLINE struct _upb_msgdef *upb_fielddef_msgdef(upb_fielddef *f) {
return f->msgdef;
}
INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(upb_fielddef *f) {
return f->accessor;
}
// Only meaningful once the def is in a symtab (returns NULL otherwise, or for
// a fielddef where !upb_hassubdef(f)).
upb_def *upb_fielddef_subdef(upb_fielddef *f);
// NULL until the fielddef has been added to a msgdef.
// Write accessors. "Number" and "name" must be set before the fielddef is
// added to a msgdef. For the moment we do not allow these to be set once
// the fielddef is added to a msgdef -- this could be relaxed in the future.
bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number);
bool upb_fielddef_setname(upb_fielddef *f, const char *name);
// These writers may be called at any time prior to being put in a symtab.
bool upb_fielddef_settype(upb_fielddef *f, uint8_t type);
bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label);
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value);
void upb_fielddef_setfval(upb_fielddef *f, upb_value fval);
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl);
// The name of the message or enum this field is referring to. Must be found
// at name resolution time (when upb_symtab_add() is called).
bool upb_fielddef_settypename(upb_fielddef *f, const char *name);
// A variety of tests about the type of a field.
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) {
return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE);
}
INLINE bool upb_isstringtype(upb_fieldtype_t type) {
return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES);
}
INLINE bool upb_isprimitivetype(upb_fieldtype_t type) {
return !upb_issubmsgtype(type) && !upb_isstringtype(type);
}
INLINE bool upb_issubmsg(upb_fielddef *f) { return upb_issubmsgtype(f->type); }
INLINE bool upb_isstring(upb_fielddef *f) { return upb_isstringtype(f->type); }
INLINE bool upb_isseq(upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); }
// Does the type of this field imply that it should contain an associated def?
INLINE bool upb_hassubdef(upb_fielddef *f) {
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM);
}
/* upb_msgdef *****************************************************************/
// Structure that describes a single .proto message type.
typedef struct _upb_msgdef {
upb_def base;
// Tables for looking up fields by number and name.
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
// The following fields may be modified until finalized.
uint16_t size;
uint8_t hasbit_bytes;
// The range of tag numbers used to store extensions.
uint32_t extstart, extend;
} upb_msgdef;
// Hash table entries for looking up fields by name or number.
typedef struct {
bool junk;
upb_fielddef *f;
} upb_itof_ent;
typedef struct {
upb_fielddef *f;
} upb_ntof_ent;
upb_msgdef *upb_msgdef_new(void);
INLINE void upb_msgdef_unref(upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); }
INLINE void upb_msgdef_ref(upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); }
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all
// the fields) but with any references to submessages broken and replaced with
// just the name of the submessage. This can be put back into another symtab
// and the names will be re-resolved in the new context.
upb_msgdef *upb_msgdef_dup(upb_msgdef *m);
// Read accessors. May be called at any time.
INLINE uint16_t upb_msgdef_size(upb_msgdef *m) { return m->size; }
INLINE uint8_t upb_msgdef_hasbit_bytes(upb_msgdef *m) {
return m->hasbit_bytes;
}
INLINE uint32_t upb_msgdef_extstart(upb_msgdef *m) { return m->extstart; }
INLINE uint32_t upb_msgdef_extend(upb_msgdef *m) { return m->extend; }
// Write accessors. May only be called before the msgdef is in a symtab.
void upb_msgdef_setsize(upb_msgdef *m, uint16_t size);
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes);
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end);
// Adds a fielddef to a msgdef. Caller retains its ref on the fielddef.
// May only be done before the msgdef is in a symtab. The fielddef's name and
// number must be set, and the message may not already contain any field with
// this name or number, and this fielddef may not be part of another message,
// otherwise false is returned and no action is performed.
bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef **f, int n);
INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) {
return upb_msgdef_addfields(m, &f, 1);
}
// Sets the layout of all fields according to default rules:
// 1. Hasbits for required fields come first, then optional fields.
// 2. Values are laid out in a way that respects alignment rules.
// 3. The order is chosen to minimize memory usage.
// This should only be called once all fielddefs have been added.
// TODO: will likely want the ability to exclude strings/submessages/arrays.
// TODO: will likely want the ability to define a header size.
void upb_msgdef_layout(upb_msgdef *m);
// Looks up a field by name or number. While these are written to be as fast
// as possible, it will still be faster to cache the results of this lookup if
// possible. These return NULL if no such field is found.
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) {
upb_itof_ent *e = (upb_itof_ent*)
upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent));
return e ? e->f : NULL;
}
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, const char *name) {
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name);
return e ? e->f : NULL;
}
INLINE int upb_msgdef_numfields(upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
// Iteration over fields. The order is undefined.
// Iterators are invalidated when a field is added or removed.
// upb_msg_iter i;
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) {
// upb_fielddef *f = upb_msg_iter_field(i);
// // ...
// }
typedef upb_inttable_iter upb_msg_iter;
upb_msg_iter upb_msg_begin(upb_msgdef *m);
upb_msg_iter upb_msg_next(upb_msgdef *m, upb_msg_iter iter);
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); }
// Iterator accessor.
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) {
upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter);
return ent->f;
}
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
/* upb_enumdef ****************************************************************/
typedef struct _upb_enumdef {
upb_def base;
upb_strtable ntoi;
upb_inttable iton;
int32_t defaultval;
} upb_enumdef;
typedef struct {
uint32_t value;
} upb_ntoi_ent;
typedef struct {
bool junk;
char *str;
} upb_iton_ent;
upb_enumdef *upb_enumdef_new(void);
INLINE void upb_enumdef_ref(upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); }
INLINE void upb_enumdef_unref(upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); }
upb_enumdef *upb_enumdef_dup(upb_enumdef *e);
INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; }
// May only be set before the enumdef is in a symtab.
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val);
// Adds a value to the enumdef. Requires that no existing val has this
// name or number (returns false and does not add if there is). May only
// be called before the enumdef is in a symtab.
bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num);
// Lookups from name to integer and vice-versa.
bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num);
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num);
// Caller does not own the returned string.
const char *upb_enumdef_iton(upb_enumdef *e, int32_t num);
// Iteration over name/value pairs. The order is undefined.
// Adding an enum val invalidates any iterators.
// upb_enum_iter i;
// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) {
// // ...
// }
typedef upb_inttable_iter upb_enum_iter;
upb_enum_iter upb_enum_begin(upb_enumdef *e);
upb_enum_iter upb_enum_next(upb_enumdef *e, upb_enum_iter iter);
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); }
// Iterator accessors.
INLINE char *upb_enum_iter_name(upb_enum_iter iter) {
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter);
return e->str;
}
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) {
return upb_inttable_iter_key(iter);
}
/* upb_symtab *****************************************************************/
// A SymbolTable is where upb_defs live. It is empty when first constructed.
// Clients add definitions to the symtab (or replace existing definitions) by
// using a upb_symtab_commit() or calling upb_symtab_add().
// upb_deflist: A little dynamic array for storing a growing list of upb_defs.
typedef struct {
upb_def **defs;
uint32_t len;
uint32_t size;
} upb_deflist;
void upb_deflist_init(upb_deflist *l);
void upb_deflist_uninit(upb_deflist *l);
void upb_deflist_push(upb_deflist *l, upb_def *d);
struct _upb_symtab {
upb_atomic_t refcount;
upb_rwlock_t lock; // Protects all members except the refcount.
upb_strtable symtab; // The symbol table.
upb_deflist olddefs;
};
upb_symtab *upb_symtab_new(void);
INLINE void upb_symtab_ref(upb_symtab *s) { upb_atomic_ref(&s->refcount); }
void upb_symtab_unref(upb_symtab *s);
// Resolves the given symbol using the rules described in descriptor.proto,
// namely:
//
// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like
// scoping rules are used to find the type (i.e. first the nested types
// within this message are searched, then within the parent, on up to the
// root namespace).
//
// If a def is found, the caller owns one ref on the returned def. Otherwise
// returns NULL.
// TODO: make return const
upb_def *upb_symtab_resolve(upb_symtab *s, const char *base, const char *sym);
// Find an entry in the symbol table with this exact name. If a def is found,
// the caller owns one ref on the returned def. Otherwise returns NULL.
// TODO: make return const
upb_def *upb_symtab_lookup(upb_symtab *s, const char *sym);
// Gets an array of pointers to all currently active defs in this symtab. The
// caller owns the returned array (which is of length *count) as well as a ref
// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are
// returned, otherwise only defs of the required type are returned.
// TODO: make return const
upb_def **upb_symtab_getdefs(upb_symtab *s, int *n, upb_deftype_t type);
// Adds the given defs to the symtab, resolving all symbols. Only one def per
// name may be in the list, but defs can replace existing defs in the symtab.
// The entire operation either succeeds or fails. If the operation fails, the
// symtab is unchanged, false is returned, and status indicates the error. The
// caller retains its ref on all defs in all cases.
bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status);
upb_stream: all callbacks registered ahead-of-time. This is a significant change to the upb_stream protocol, and should hopefully be the last significant change. All callbacks are now registered ahead-of-time instead of having delegated callbacks registered at runtime, which makes it much easier to aggressively optimize ahead-of-time (like with a JIT). Other impacts of this change: - You no longer need to have loaded descriptor.proto as a upb_def to load other descriptors! This means the special-case code we used for bootstrapping is no longer necessary, and we no longer need to link the descriptor for descriptor.proto into upb. - A client can now register any upb_value as what will be delivered to their value callback, not just a upb_fielddef*. This should allow for other clients to get more bang out of the streaming decoder. This change unfortunately causes a bit of a performance regression -- I think largely due to highly suboptimal code that GCC generates when structs are returned by value. See: http://blog.reverberate.org/2011/03/19/when-a-compilers-slow-code-actually-bites-you/ On the other hand, once we have a JIT this should no longer matter. Performance numbers: plain.parsestream_googlemessage1.upb_table: 374 -> 396 (5.88) plain.parsestream_googlemessage2.upb_table: 616 -> 449 (-27.11) plain.parsetostruct_googlemessage1.upb_table_byref: 268 -> 269 (0.37) plain.parsetostruct_googlemessage1.upb_table_byval: 215 -> 204 (-5.12) plain.parsetostruct_googlemessage2.upb_table_byref: 307 -> 281 (-8.47) plain.parsetostruct_googlemessage2.upb_table_byval: 297 -> 272 (-8.42) omitfp.parsestream_googlemessage1.upb_table: 423 -> 410 (-3.07) omitfp.parsestream_googlemessage2.upb_table: 679 -> 483 (-28.87) omitfp.parsetostruct_googlemessage1.upb_table_byref: 287 -> 282 (-1.74) omitfp.parsetostruct_googlemessage1.upb_table_byval: 226 -> 219 (-3.10) omitfp.parsetostruct_googlemessage2.upb_table_byref: 315 -> 298 (-5.40) omitfp.parsetostruct_googlemessage2.upb_table_byval: 297 -> 287 (-3.37)
14 years ago
// Frees defs that are no longer active in the symtab and are no longer
// reachable. Such defs are not freed when they are replaced in the symtab
// if they are still reachable from defs that are still referenced.
void upb_symtab_gc(upb_symtab *s);
/* upb_def casts **************************************************************/
// Dynamic casts, for determining if a def is of a particular type at runtime.
#define UPB_DYNAMIC_CAST_DEF(lower, upper) \
struct _upb_ ## lower; /* Forward-declare. */ \
INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \
if(def->type != UPB_DEF_ ## upper) return NULL; \
return (struct _upb_ ## lower*)def; \
}
UPB_DYNAMIC_CAST_DEF(msgdef, MSG);
UPB_DYNAMIC_CAST_DEF(enumdef, ENUM);
UPB_DYNAMIC_CAST_DEF(svcdef, SERVICE);
UPB_DYNAMIC_CAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DYNAMIC_CAST_DEF
// Downcasts, for when some wants to assert that a def is of a particular type.
// These are only checked if we are building debug.
#define UPB_DOWNCAST_DEF(lower, upper) \
struct _upb_ ## lower; /* Forward-declare. */ \
INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \
assert(def->type == UPB_DEF_ ## upper); \
return (struct _upb_ ## lower*)def; \
}
UPB_DOWNCAST_DEF(msgdef, MSG);
UPB_DOWNCAST_DEF(enumdef, ENUM);
UPB_DOWNCAST_DEF(svcdef, SERVICE);
UPB_DOWNCAST_DEF(unresolveddef, UNRESOLVED);
#undef UPB_DOWNCAST_DEF
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UPB_DEF_H_ */