Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
461 lines
18 KiB
461 lines
18 KiB
/* |
|
* upb - a minimalist implementation of protocol buffers. |
|
* |
|
* Copyright (c) 2009-2011 Google Inc. See LICENSE for details. |
|
* Author: Josh Haberman <jhaberman@gmail.com> |
|
* |
|
* Provides a mechanism for creating and linking proto definitions. |
|
* These form the protobuf schema, and are used extensively throughout upb: |
|
* - upb_msgdef: describes a "message" construct. |
|
* - upb_fielddef: describes a message field. |
|
* - upb_enumdef: describes an enum. |
|
* (TODO: definitions of services). |
|
* |
|
* |
|
* Defs go through two distinct phases of life: |
|
* |
|
* 1. MUTABLE: when first created, the properties of the def can be set freely |
|
* (for example a message's name, its list of fields, the name/number of |
|
* fields, etc). During this phase the def is *not* thread-safe, and may |
|
* not be used for any purpose except to set its properties (it can't be |
|
* used to parse anything, create any messages in memory, etc). |
|
* |
|
* 2. IMMUTABLE: after being added to a symtab (which links the defs together) |
|
* the defs become thread-safe and immutable. Programs may only access defs |
|
* through a CONST POINTER during this stage -- upb_symtab will help you out |
|
* with this requirement by only vending const pointers, but you need to |
|
* make sure not to use any non-const pointers you still have sitting |
|
* around. In practice this means that you may not call any setters on the |
|
* defs (or functions that themselves call the setters). If you want to |
|
* modify an existing immutable def, copy it with upb_*_dup(), modify the |
|
* copy, and add the modified def to the symtab (replacing the existing |
|
* def). |
|
* |
|
* You can test for which stage of life a def is in by calling |
|
* upb_def_ismutable(). This is particularly useful for dynamic language |
|
* bindings, which must properly guarantee that the dynamic language cannot |
|
* break the rules laid out above. |
|
* |
|
* It would be possible to make the defs thread-safe during stage 1 by using |
|
* mutexes internally and changing any methods returning pointers to return |
|
* copies instead. This could be important if we are integrating with a VM or |
|
* interpreter that does not naturally serialize access to wrapped objects (for |
|
* example, in the case of Python this is not necessary because of the GIL). |
|
*/ |
|
|
|
#ifndef UPB_DEF_H_ |
|
#define UPB_DEF_H_ |
|
|
|
#include "upb/atomic.h" |
|
#include "upb/table.h" |
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
struct _upb_symtab; |
|
typedef struct _upb_symtab upb_symtab; |
|
|
|
// All the different kind of defs we support. These correspond 1:1 with |
|
// declarations in a .proto file. |
|
typedef enum { |
|
UPB_DEF_MSG = 1, |
|
UPB_DEF_ENUM, |
|
UPB_DEF_SERVICE, // Not yet implemented. |
|
|
|
UPB_DEF_ANY = -1, // Wildcard for upb_symtab_get*() |
|
UPB_DEF_UNRESOLVED = 99, // Internal-only. |
|
} upb_deftype_t; |
|
|
|
|
|
/* upb_def: base class for defs **********************************************/ |
|
|
|
typedef struct { |
|
char *fqname; // Fully qualified. |
|
upb_symtab *symtab; // Def is mutable iff symtab == NULL. |
|
upb_atomic_t refcount; // Owns a ref on symtab iff (symtab && refcount > 0). |
|
upb_deftype_t type; |
|
} upb_def; |
|
|
|
// Call to ref/unref a def. Can be used at any time, but is not thread-safe |
|
// until the def is in a symtab. While a def is in a symtab, everything |
|
// reachable from that def (the symtab and all defs in the symtab) are |
|
// guaranteed to be alive. |
|
void upb_def_ref(const upb_def *def); |
|
void upb_def_unref(const upb_def *def); |
|
upb_def *upb_def_dup(const upb_def *def); |
|
|
|
// A def is mutable until it has been added to a symtab. |
|
bool upb_def_ismutable(const upb_def *def); |
|
INLINE const char *upb_def_fqname(const upb_def *def) { return def->fqname; } |
|
bool upb_def_setfqname(upb_def *def, const char *fqname); // Only if mutable. |
|
|
|
#define UPB_UPCAST(ptr) (&(ptr)->base) |
|
|
|
|
|
/* upb_fielddef ***************************************************************/ |
|
|
|
// A upb_fielddef describes a single field in a message. It isn't a full def |
|
// in the sense that it derives from upb_def. It cannot stand on its own; it |
|
// must be part of a upb_msgdef. It is also reference-counted. |
|
typedef struct _upb_fielddef { |
|
struct _upb_msgdef *msgdef; |
|
upb_def *def; // if upb_hasdef(f) |
|
upb_atomic_t refcount; |
|
bool finalized; |
|
|
|
// The following fields may be modified until the def is finalized. |
|
uint8_t type; // Use UPB_TYPE() constants. |
|
uint8_t label; // Use UPB_LABEL() constants. |
|
int16_t hasbit; |
|
uint16_t offset; |
|
bool hasdefault; |
|
bool active; |
|
int32_t number; |
|
char *name; |
|
upb_value defaultval; // Only meaningful for non-repeated scalars and strings. |
|
upb_value fval; |
|
struct _upb_accessor_vtbl *accessor; |
|
const void *default_ptr; |
|
const void *prototype; |
|
} upb_fielddef; |
|
|
|
upb_fielddef *upb_fielddef_new(void); |
|
void upb_fielddef_ref(upb_fielddef *f); |
|
void upb_fielddef_unref(upb_fielddef *f); |
|
upb_fielddef *upb_fielddef_dup(upb_fielddef *f); |
|
|
|
// A fielddef is mutable until its msgdef has been added to a symtab. |
|
bool upb_fielddef_ismutable(const upb_fielddef *f); |
|
|
|
// Read accessors. May be called any time. |
|
INLINE uint8_t upb_fielddef_type(const upb_fielddef *f) { return f->type; } |
|
INLINE uint8_t upb_fielddef_label(const upb_fielddef *f) { return f->label; } |
|
INLINE int32_t upb_fielddef_number(const upb_fielddef *f) { return f->number; } |
|
INLINE char *upb_fielddef_name(const upb_fielddef *f) { return f->name; } |
|
INLINE upb_value upb_fielddef_default(const upb_fielddef *f) { return f->defaultval; } |
|
INLINE upb_value upb_fielddef_fval(const upb_fielddef *f) { return f->fval; } |
|
INLINE bool upb_fielddef_finalized(const upb_fielddef *f) { return f->finalized; } |
|
INLINE struct _upb_msgdef *upb_fielddef_msgdef(const upb_fielddef *f) { |
|
return f->msgdef; |
|
} |
|
INLINE struct _upb_accessor_vtbl *upb_fielddef_accessor(const upb_fielddef *f) { |
|
return f->accessor; |
|
} |
|
INLINE const char *upb_fielddef_typename(const upb_fielddef *f) { |
|
return f->def ? f->def->fqname : NULL; |
|
} |
|
|
|
// The enum or submessage def for this field, if any. Only meaningful for |
|
// submessage, group, and enum fields (ie. when upb_hassubdef(f) is true). |
|
// Since defs are not linked together until they are in a symtab, this |
|
// will return NULL until the msgdef is in a symtab. |
|
upb_def *upb_fielddef_subdef(const upb_fielddef *f); |
|
|
|
// Write accessors. "Number" and "name" must be set before the fielddef is |
|
// added to a msgdef. For the moment we do not allow these to be set once |
|
// the fielddef is added to a msgdef -- this could be relaxed in the future. |
|
bool upb_fielddef_setnumber(upb_fielddef *f, int32_t number); |
|
bool upb_fielddef_setname(upb_fielddef *f, const char *name); |
|
|
|
// These writers may be called at any time prior to being put in a symtab. |
|
bool upb_fielddef_settype(upb_fielddef *f, uint8_t type); |
|
bool upb_fielddef_setlabel(upb_fielddef *f, uint8_t label); |
|
void upb_fielddef_setdefault(upb_fielddef *f, upb_value value); |
|
void upb_fielddef_setfval(upb_fielddef *f, upb_value fval); |
|
void upb_fielddef_setaccessor(upb_fielddef *f, struct _upb_accessor_vtbl *vtbl); |
|
// The name of the message or enum this field is referring to. Must be found |
|
// at name resolution time (when upb_symtab_add() is called). |
|
bool upb_fielddef_settypename(upb_fielddef *f, const char *name); |
|
|
|
// A variety of tests about the type of a field. |
|
INLINE bool upb_issubmsgtype(upb_fieldtype_t type) { |
|
return type == UPB_TYPE(GROUP) || type == UPB_TYPE(MESSAGE); |
|
} |
|
INLINE bool upb_isstringtype(upb_fieldtype_t type) { |
|
return type == UPB_TYPE(STRING) || type == UPB_TYPE(BYTES); |
|
} |
|
INLINE bool upb_isprimitivetype(upb_fieldtype_t type) { |
|
return !upb_issubmsgtype(type) && !upb_isstringtype(type); |
|
} |
|
INLINE bool upb_issubmsg(const upb_fielddef *f) { return upb_issubmsgtype(f->type); } |
|
INLINE bool upb_isstring(const upb_fielddef *f) { return upb_isstringtype(f->type); } |
|
INLINE bool upb_isseq(const upb_fielddef *f) { return f->label == UPB_LABEL(REPEATED); } |
|
|
|
// Does the type of this field imply that it should contain an associated def? |
|
INLINE bool upb_hassubdef(const upb_fielddef *f) { |
|
return upb_issubmsg(f) || f->type == UPB_TYPE(ENUM); |
|
} |
|
|
|
|
|
/* upb_msgdef *****************************************************************/ |
|
|
|
// Structure that describes a single .proto message type. |
|
typedef struct _upb_msgdef { |
|
upb_def base; |
|
|
|
// Tables for looking up fields by number and name. |
|
upb_inttable itof; // int to field |
|
upb_strtable ntof; // name to field |
|
|
|
// The following fields may be modified until finalized. |
|
uint16_t size; |
|
uint8_t hasbit_bytes; |
|
// The range of tag numbers used to store extensions. |
|
uint32_t extstart, extend; |
|
} upb_msgdef; |
|
|
|
// Hash table entries for looking up fields by name or number. |
|
typedef struct { |
|
bool junk; |
|
upb_fielddef *f; |
|
} upb_itof_ent; |
|
typedef struct { |
|
upb_fielddef *f; |
|
} upb_ntof_ent; |
|
|
|
upb_msgdef *upb_msgdef_new(void); |
|
INLINE void upb_msgdef_unref(const upb_msgdef *md) { upb_def_unref(UPB_UPCAST(md)); } |
|
INLINE void upb_msgdef_ref(const upb_msgdef *md) { upb_def_ref(UPB_UPCAST(md)); } |
|
|
|
// Returns a new msgdef that is a copy of the given msgdef (and a copy of all |
|
// the fields) but with any references to submessages broken and replaced with |
|
// just the name of the submessage. This can be put back into another symtab |
|
// and the names will be re-resolved in the new context. |
|
upb_msgdef *upb_msgdef_dup(const upb_msgdef *m); |
|
|
|
// Read accessors. May be called at any time. |
|
INLINE size_t upb_msgdef_size(const upb_msgdef *m) { return m->size; } |
|
INLINE uint8_t upb_msgdef_hasbit_bytes(const upb_msgdef *m) { |
|
return m->hasbit_bytes; |
|
} |
|
INLINE uint32_t upb_msgdef_extstart(const upb_msgdef *m) { return m->extstart; } |
|
INLINE uint32_t upb_msgdef_extend(const upb_msgdef *m) { return m->extend; } |
|
|
|
// Write accessors. May only be called before the msgdef is in a symtab. |
|
void upb_msgdef_setsize(upb_msgdef *m, uint16_t size); |
|
void upb_msgdef_sethasbit_bytes(upb_msgdef *m, uint16_t bytes); |
|
bool upb_msgdef_setextrange(upb_msgdef *m, uint32_t start, uint32_t end); |
|
|
|
// Adds a set of fields (upb_fielddef objects) to a msgdef. Caller retains its |
|
// ref on the fielddef. May only be done before the msgdef is in a symtab |
|
// (requires upb_def_ismutable(m) for the msgdef). The fielddef's name and |
|
// number must be set, and the message may not already contain any field with |
|
// this name or number, and this fielddef may not be part of another message, |
|
// otherwise false is returned and no action is performed. |
|
bool upb_msgdef_addfields(upb_msgdef *m, upb_fielddef **f, int n); |
|
INLINE bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f) { |
|
return upb_msgdef_addfields(m, &f, 1); |
|
} |
|
|
|
// Sets the layout of all fields according to default rules: |
|
// 1. Hasbits for required fields come first, then optional fields. |
|
// 2. Values are laid out in a way that respects alignment rules. |
|
// 3. The order is chosen to minimize memory usage. |
|
// This should only be called once all fielddefs have been added. |
|
// TODO: will likely want the ability to exclude strings/submessages/arrays. |
|
// TODO: will likely want the ability to define a header size. |
|
void upb_msgdef_layout(upb_msgdef *m); |
|
|
|
// Looks up a field by name or number. While these are written to be as fast |
|
// as possible, it will still be faster to cache the results of this lookup if |
|
// possible. These return NULL if no such field is found. |
|
INLINE upb_fielddef *upb_msgdef_itof(upb_msgdef *m, uint32_t i) { |
|
upb_itof_ent *e = (upb_itof_ent*) |
|
upb_inttable_fastlookup(&m->itof, i, sizeof(upb_itof_ent)); |
|
return e ? e->f : NULL; |
|
} |
|
|
|
INLINE upb_fielddef *upb_msgdef_ntof(upb_msgdef *m, const char *name) { |
|
upb_ntof_ent *e = (upb_ntof_ent*)upb_strtable_lookup(&m->ntof, name); |
|
return e ? e->f : NULL; |
|
} |
|
|
|
INLINE int upb_msgdef_numfields(const upb_msgdef *m) { |
|
return upb_strtable_count(&m->ntof); |
|
} |
|
|
|
// Iteration over fields. The order is undefined. |
|
// TODO: the iteration should be in field order. |
|
// Iterators are invalidated when a field is added or removed. |
|
// upb_msg_iter i; |
|
// for(i = upb_msg_begin(m); !upb_msg_done(i); i = upb_msg_next(m, i)) { |
|
// upb_fielddef *f = upb_msg_iter_field(i); |
|
// // ... |
|
// } |
|
typedef upb_inttable_iter upb_msg_iter; |
|
|
|
upb_msg_iter upb_msg_begin(const upb_msgdef *m); |
|
upb_msg_iter upb_msg_next(const upb_msgdef *m, upb_msg_iter iter); |
|
INLINE bool upb_msg_done(upb_msg_iter iter) { return upb_inttable_done(iter); } |
|
|
|
// Iterator accessor. |
|
INLINE upb_fielddef *upb_msg_iter_field(upb_msg_iter iter) { |
|
upb_itof_ent *ent = (upb_itof_ent*)upb_inttable_iter_value(iter); |
|
return ent->f; |
|
} |
|
|
|
|
|
/* upb_enumdef ****************************************************************/ |
|
|
|
typedef struct _upb_enumdef { |
|
upb_def base; |
|
upb_strtable ntoi; |
|
upb_inttable iton; |
|
int32_t defaultval; |
|
} upb_enumdef; |
|
|
|
typedef struct { |
|
uint32_t value; |
|
} upb_ntoi_ent; |
|
|
|
typedef struct { |
|
bool junk; |
|
char *str; |
|
} upb_iton_ent; |
|
|
|
upb_enumdef *upb_enumdef_new(void); |
|
INLINE void upb_enumdef_ref(const upb_enumdef *e) { upb_def_ref(UPB_UPCAST(e)); } |
|
INLINE void upb_enumdef_unref(const upb_enumdef *e) { upb_def_unref(UPB_UPCAST(e)); } |
|
upb_enumdef *upb_enumdef_dup(const upb_enumdef *e); |
|
|
|
INLINE int32_t upb_enumdef_default(upb_enumdef *e) { return e->defaultval; } |
|
|
|
// May only be set if upb_def_ismutable(e). |
|
void upb_enumdef_setdefault(upb_enumdef *e, int32_t val); |
|
|
|
// Adds a value to the enumdef. Requires that no existing val has this |
|
// name or number (returns false and does not add if there is). May only |
|
// be called before the enumdef is in a symtab. |
|
bool upb_enumdef_addval(upb_enumdef *e, char *name, int32_t num); |
|
|
|
// Lookups from name to integer and vice-versa. |
|
bool upb_enumdef_ntoil(upb_enumdef *e, char *name, size_t len, int32_t *num); |
|
bool upb_enumdef_ntoi(upb_enumdef *e, char *name, int32_t *num); |
|
// Caller does not own the returned string. |
|
const char *upb_enumdef_iton(upb_enumdef *e, int32_t num); |
|
|
|
// Iteration over name/value pairs. The order is undefined. |
|
// Adding an enum val invalidates any iterators. |
|
// upb_enum_iter i; |
|
// for(i = upb_enum_begin(e); !upb_enum_done(i); i = upb_enum_next(e, i)) { |
|
// // ... |
|
// } |
|
typedef upb_inttable_iter upb_enum_iter; |
|
|
|
upb_enum_iter upb_enum_begin(const upb_enumdef *e); |
|
upb_enum_iter upb_enum_next(const upb_enumdef *e, upb_enum_iter iter); |
|
INLINE bool upb_enum_done(upb_enum_iter iter) { return upb_inttable_done(iter); } |
|
|
|
// Iterator accessors. |
|
INLINE char *upb_enum_iter_name(upb_enum_iter iter) { |
|
upb_iton_ent *e = (upb_iton_ent*)upb_inttable_iter_value(iter); |
|
return e->str; |
|
} |
|
INLINE int32_t upb_enum_iter_number(upb_enum_iter iter) { |
|
return upb_inttable_iter_key(iter); |
|
} |
|
|
|
|
|
/* upb_deflist ****************************************************************/ |
|
|
|
// upb_deflist is an internal-only dynamic array for storing a growing list of |
|
// upb_defs. |
|
typedef struct { |
|
upb_def **defs; |
|
uint32_t len; |
|
uint32_t size; |
|
} upb_deflist; |
|
|
|
void upb_deflist_init(upb_deflist *l); |
|
void upb_deflist_uninit(upb_deflist *l); |
|
void upb_deflist_push(upb_deflist *l, upb_def *d); |
|
|
|
|
|
/* upb_symtab *****************************************************************/ |
|
|
|
// A symtab (symbol table) is where upb_defs live. It is empty when first |
|
// constructed. Clients add definitions to the symtab (or replace existing |
|
// definitions) by calling upb_symtab_add(). |
|
struct _upb_symtab { |
|
upb_atomic_t refcount; |
|
upb_rwlock_t lock; // Protects all members except the refcount. |
|
upb_strtable symtab; // The symbol table. |
|
upb_deflist olddefs; |
|
}; |
|
|
|
upb_symtab *upb_symtab_new(void); |
|
void upb_symtab_ref(const upb_symtab *s); |
|
void upb_symtab_unref(const upb_symtab *s); |
|
|
|
// Resolves the given symbol using the rules described in descriptor.proto, |
|
// namely: |
|
// |
|
// If the name starts with a '.', it is fully-qualified. Otherwise, C++-like |
|
// scoping rules are used to find the type (i.e. first the nested types |
|
// within this message are searched, then within the parent, on up to the |
|
// root namespace). |
|
// |
|
// If a def is found, the caller owns one ref on the returned def. Otherwise |
|
// returns NULL. |
|
const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, |
|
const char *sym); |
|
|
|
// Find an entry in the symbol table with this exact name. If a def is found, |
|
// the caller owns one ref on the returned def. Otherwise returns NULL. |
|
const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym); |
|
const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym); |
|
|
|
// Gets an array of pointers to all currently active defs in this symtab. The |
|
// caller owns the returned array (which is of length *count) as well as a ref |
|
// to each symbol inside. If type is UPB_DEF_ANY then defs of all types are |
|
// returned, otherwise only defs of the required type are returned. |
|
const upb_def **upb_symtab_getdefs(const upb_symtab *s, int *n, upb_deftype_t type); |
|
|
|
// Adds the given defs to the symtab, resolving all symbols. Only one def per |
|
// name may be in the list, but defs can replace existing defs in the symtab. |
|
// The entire operation either succeeds or fails. If the operation fails, the |
|
// symtab is unchanged, false is returned, and status indicates the error. The |
|
// caller retains its ref on all defs in all cases. |
|
bool upb_symtab_add(upb_symtab *s, upb_def **defs, int n, upb_status *status); |
|
|
|
// Frees defs that are no longer active in the symtab and are no longer |
|
// reachable. Such defs are not freed when they are replaced in the symtab |
|
// if they are still reachable from defs that are still referenced. |
|
void upb_symtab_gc(upb_symtab *s); |
|
|
|
|
|
/* upb_def casts **************************************************************/ |
|
|
|
// Dynamic casts, for determining if a def is of a particular type at runtime. |
|
// Downcasts, for when some wants to assert that a def is of a particular type. |
|
// These are only checked if we are building debug. |
|
#define UPB_DEF_CASTS(lower, upper) \ |
|
struct _upb_ ## lower; /* Forward-declare. */ \ |
|
INLINE struct _upb_ ## lower *upb_dyncast_ ## lower(upb_def *def) { \ |
|
if(def->type != UPB_DEF_ ## upper) return NULL; \ |
|
return (struct _upb_ ## lower*)def; \ |
|
} \ |
|
INLINE const struct _upb_ ## lower *upb_dyncast_ ## lower ## _const(const upb_def *def) { \ |
|
if(def->type != UPB_DEF_ ## upper) return NULL; \ |
|
return (const struct _upb_ ## lower*)def; \ |
|
} \ |
|
INLINE struct _upb_ ## lower *upb_downcast_ ## lower(upb_def *def) { \ |
|
assert(def->type == UPB_DEF_ ## upper); \ |
|
return (struct _upb_ ## lower*)def; \ |
|
} \ |
|
INLINE const struct _upb_ ## lower *upb_downcast_ ## lower ## _const(const upb_def *def) { \ |
|
assert(def->type == UPB_DEF_ ## upper); \ |
|
return (const struct _upb_ ## lower*)def; \ |
|
} |
|
UPB_DEF_CASTS(msgdef, MSG); |
|
UPB_DEF_CASTS(enumdef, ENUM); |
|
UPB_DEF_CASTS(svcdef, SERVICE); |
|
UPB_DEF_CASTS(unresolveddef, UNRESOLVED); |
|
#undef UPB_DEF_CASTS |
|
|
|
#ifdef __cplusplus |
|
} /* extern "C" */ |
|
#endif |
|
|
|
#endif /* UPB_DEF_H_ */
|
|
|