Major refactoring of upb_msg. Temporary functionality regression.

There is significant refactoring here, as well as some more trivial
name changes.  upb_msg has become upb_msgdef, to reflect the fact
that a upb_msg is not *itself* a message, it describes a message.
There are other renamings, such as upb_parse_state -> upb_stream_parser.

More significantly, the upb_msg class and parser have been refactored
to reflect my recent realization about how memory management should
work.  upb_msg now has no memory management, and a memory mangement
scheme (that works beautifully with multiple language runtimes) will
be layered on top of it.

This iteration has the new, read-only upb_msg.  upb_mm_msg (a
memory-managed message class) will come in the next change.
pull/13171/head
Joshua Haberman 16 years ago
parent 952ea88db2
commit 8fa6a92f53
  1. 2
      src/upb.h
  2. 92
      src/upb_array.h
  3. 14
      src/upb_context.c
  4. 2
      src/upb_context.h
  5. 422
      src/upb_msg.c
  6. 457
      src/upb_msg.h
  7. 21
      src/upb_parse.c
  8. 17
      src/upb_parse.h
  9. 4
      src/upb_table.c
  10. 36
      tools/upbc.c

@ -133,7 +133,7 @@ INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) {
}
union upb_symbol_ref {
struct upb_msg *msg;
struct upb_msgdef *msg;
struct upb_enum *_enum;
struct upb_svc *svc;
};

@ -2,32 +2,63 @@
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
* Defines an in-memory array type. TODO: more documentation.
*
* Defines an in-memory, polymorphic array type. The array does not know its
* own type -- its owner must know that information out-of-band.
*
* upb_arrays are memory-managed in the sense that they contain a pointer
* ("mem") to memory that is "owned" by the array (which may be NULL if the
* array owns no memory). There is a separate pointer ("elements") that points
* to the the array's currently "effective" memory, which is either equal to
* mem (if the array's current value is memory we own) or not (if the array is
* referencing other memory).
*
* If the array is referencing other memory, it is up to the array's owner to
* ensure that the other memory remains valid for as long as the array is
* referencing it.
*
*/
#ifndef UPB_ARRAY_H_
#define UPB_ARRAY_H_
#include <stdlib.h>
#include "upb.h"
#ifdef __cplusplus
extern "C" {
#endif
struct upb_string;
#include "upb.h"
/* upb_arrays can be at most 2**32 elements long. */
typedef uint32_t upb_arraylen_t;
/* Represents an array (a repeated field) of any type. The interpretation of
* the data in the array depends on the type. */
struct upb_array {
union upb_value_ptr elements;
uint32_t len; /* Measured in elements. */
void *mem;
upb_arraylen_t len; /* Number of elements in "elements". */
upb_arraylen_t size; /* Memory allocated in "mem" (measured in elements) */
};
/* Returns a pointer to an array element. */
INLINE void upb_array_init(struct upb_array *arr)
{
arr->elements._void = NULL;
arr->mem = NULL;
arr->len = 0;
arr->size = 0;
}
INLINE void upb_array_free(struct upb_array *arr)
{
free(arr->mem);
}
/* Returns a pointer to an array element. Does not perform a bounds check! */
INLINE union upb_value_ptr upb_array_getelementptr(
struct upb_array *arr, uint32_t n, upb_field_type_t type)
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
{
union upb_value_ptr ptr;
ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
@ -35,16 +66,55 @@ INLINE union upb_value_ptr upb_array_getelementptr(
}
INLINE union upb_value upb_array_getelement(
struct upb_array *arr, uint32_t n, upb_field_type_t type)
struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
{
return upb_deref(upb_array_getelementptr(arr, n, type), type);
}
INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
{
/* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
/* Resizes array to be "len" elements long and ensures we have write access
* to the array (reallocating if necessary). Returns true iff we were
* referencing memory for the array and dropped the reference. */
INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
upb_field_type_t type)
{
size_t type_size = upb_type_info[type].size;
bool dropped = false;
bool ref = arr->elements._void != arr->mem; /* Ref'ing external memory. */
if(arr->size < newlen) {
/* Need to resize. */
arr->size = max(4, upb_round_up_to_pow2(newlen));
arr->mem = realloc(arr->mem, arr->size * type_size);
}
if(ref) {
/* Need to take referenced data and copy it to memory we own. */
memcpy(arr->mem, arr->elements._void, UPB_MIN(arr->len, newlen) * type_size);
dropped = true;
}
arr->elements._void = arr->mem;
arr->len = newlen;
return dropped;
}
/* These are all overlays on upb_array, pointers between them can be cast. */
#define UPB_DEFINE_ARRAY_TYPE(name, type) \
struct name ## _array { \
type *elements; \
uint32_t len; \
type *mem; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
UPB_DEFINE_ARRAY_TYPE(upb_double, double)
@ -57,12 +127,14 @@ UPB_DEFINE_ARRAY_TYPE(upb_bool, bool)
UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
UPB_DEFINE_ARRAY_TYPE(upb_msg, void*)
/* Defines an array of a specific message type. */
/* Defines an array of a specific message type (an overlay of upb_array). */
#define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
#define UPB_DEFINE_MSG_ARRAY(msg_type) \
UPB_MSG_ARRAY(msg_type) { \
msg_type **elements; \
uint32_t len; \
msg_type **mem; \
upb_arraylen_t len; \
upb_arraylen_t size; \
};
#ifdef __cplusplus

@ -46,7 +46,7 @@ static void free_symtab(struct upb_strtable *t)
struct upb_symtab_entry *e = upb_strtable_begin(t);
for(; e; e = upb_strtable_next(t, &e->e)) {
switch(e->type) {
case UPB_SYM_MESSAGE: upb_msg_free(e->ref.msg); break;
case UPB_SYM_MESSAGE: upb_msgdef_free(e->ref.msg); break;
case UPB_SYM_ENUM: upb_enum_free(e->ref._enum); break;
default: break; /* TODO */
}
@ -60,7 +60,7 @@ void upb_context_free(struct upb_context *c)
{
free_symtab(&c->symtab);
for(size_t i = 0; i < c->fds_len; i++)
upb_msgdata_free(c->fds[i], c->fds_msg, true);
upb_msg_free(c->fds[i], c->fds_msg);
free_symtab(&c->psymtab);
free(c->fds);
}
@ -188,7 +188,7 @@ static bool insert_message(struct upb_strtable *t,
e.e.key = fqname;
e.type = UPB_SYM_MESSAGE;
e.ref.msg = malloc(sizeof(*e.ref.msg));
if(!upb_msg_init(e.ref.msg, d, fqname, sort)) {
if(!upb_msgdef_init(e.ref.msg, d, fqname, sort)) {
free(fqname.ptr);
return false;
}
@ -232,9 +232,9 @@ bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
if(upb_strtable_lookup(existingdefs, &e->e.key))
return false; /* Redefinition prohibited. */
if(e->type == UPB_SYM_MESSAGE) {
struct upb_msg *m = e->ref.msg;
struct upb_msgdef *m = e->ref.msg;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
union upb_symbol_ref ref;
if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE ||
@ -247,7 +247,7 @@ bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
else
continue; /* No resolving necessary. */
if(!ref.msg) return false; /* Ref. to undefined symbol. */
upb_msg_ref(m, f, ref);
upb_msgdef_ref(m, f, ref);
}
}
}
@ -280,7 +280,7 @@ bool upb_context_addfds(struct upb_context *c,
bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
google_protobuf_FileDescriptorSet *fds =
upb_alloc_and_parse(c->fds_msg, fds_str, false);
upb_msg_parsenew(c->fds_msg, fds_str);
if(!fds) return false;
if(!upb_context_addfds(c, fds)) return false;

@ -40,7 +40,7 @@ struct upb_symtab_entry {
struct upb_context {
struct upb_strtable symtab; /* The context's symbol table. */
struct upb_strtable psymtab; /* Private symbols, for internal use. */
struct upb_msg *fds_msg; /* This is in psymtab, ptr here for convenience. */
struct upb_msgdef *fds_msg; /* In psymtab, ptr here for convenience. */
/* A list of the FileDescriptorProtos we own (from having parsed them
* ourselves) and must free on destruction. */

@ -10,6 +10,7 @@
#include "upb_msg.h"
#include "upb_parse.h"
#include "upb_serialize.h"
#include "upb_text.h"
/* Rounds p up to the next multiple of t. */
#define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
@ -35,13 +36,13 @@ static int compare_fields(const void *e1, const void *e2) {
}
}
void upb_msg_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
{
qsort(fds, num, sizeof(void*), compare_fields);
}
bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort)
bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort)
{
/* TODO: more complete validation. */
if(!d->set_flags.has.field) return false;
@ -65,11 +66,11 @@ bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
/* We count on the caller to keep this pointer alive. */
m->field_descriptors[i] = d->field->elements[i];
}
if(sort) upb_msg_sortfds(m->field_descriptors, m->num_fields);
if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields);
size_t max_align = 0;
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
struct upb_type_info *type_info = &upb_type_info[fd->type];
@ -98,7 +99,7 @@ bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
return true;
}
void upb_msg_free(struct upb_msg *m)
void upb_msgdef_free(struct upb_msgdef *m)
{
upb_inttable_free(&m->fields_by_num);
upb_strtable_free(&m->fields_by_name);
@ -106,8 +107,8 @@ void upb_msg_free(struct upb_msg *m)
free(m->field_descriptors);
}
void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
union upb_symbol_ref ref) {
void upb_msgdef_ref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref) {
struct google_protobuf_FieldDescriptorProto *d =
upb_msg_field_descriptor(f, m);
struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup(
@ -120,175 +121,146 @@ void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
str_e->f.ref = ref;
}
/* Memory management *********************************************************/
/* Simple, one-shot parsing ***************************************************/
/* Our memory management scheme is as follows:
*
* All pointers to dynamic memory (strings, arrays, and submessages) are
* expected to be good pointers if they are non-zero, *regardless* of whether
* that field's bit is set! That way we can reuse the memory even if the field
* is unset and then set later. */
/* For our memory-managed strings and arrays we store extra information
* (compared to a plain upb_string or upb_array). But the data starts with
* a upb_string and upb_array, so we can overlay onto the regular types. */
struct mm_upb_string {
struct upb_string s;
/* Track the allocated size, so we know when we need to reallocate. */
uint32_t size;
/* Our allocated data. Stored separately so that clients can point s.ptr to
* a referenced string, but we can reuse this data later. */
char *data;
};
void *upb_msg_new(struct upb_msgdef *md)
{
void *msg = malloc(md->size);
memset(msg, 0, md->size);
return msg;
}
struct mm_upb_array {
struct upb_array a;
/* Track the allocated size, so we know when we need to reallocate. */
uint32_t size;
};
/* Allocation callbacks. */
static struct upb_array *getarray_cb(void *msg, struct upb_msgdef *md,
struct upb_array *existingval,
struct upb_msg_fielddef *f,
upb_arraylen_t len)
{
(void)msg;
(void)md;
(void)existingval; /* Don't care -- always zero. */
(void)len;
struct upb_array *arr = existingval;
if(!arr) {
arr = malloc(sizeof(*arr));
upb_array_init(arr);
}
upb_array_resize(arr, len, f->type);
return arr;
}
static uint32_t round_up_to_pow2(uint32_t v)
static struct upb_string *getstring_cb(void *msg, struct upb_msgdef *md,
struct upb_string *existingval,
struct upb_msg_fielddef *f, size_t len)
{
/* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
(void)msg;
(void)md;
(void)existingval; /* Don't care -- always zero. */
(void)f;
struct upb_string *str = malloc(sizeof(*str));
str->ptr = malloc(len);
return str;
}
void *upb_msgdata_new(struct upb_msg *m)
static void *getmsg_cb(void *msg, struct upb_msgdef *md,
void *existingval, struct upb_msg_fielddef *f)
{
void *msg = malloc(m->size);
memset(msg, 0, m->size); /* Clear all pointers, values, and set bits. */
return msg;
(void)msg;
(void)md;
(void)existingval; /* Don't care -- always zero. */
return upb_msg_new(f->ref.msg);
}
static void free_value(union upb_value_ptr p, struct upb_msg_field *f,
bool free_submsgs)
void *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s)
{
switch(f->type) {
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: {
struct mm_upb_string *mm_str = (void*)*p.str;
if(mm_str) {
free(mm_str->data);
free(mm_str);
}
break;
}
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
if(free_submsgs) upb_msgdata_free(*p.msg, f->ref.msg, free_submsgs);
break;
default: break; /* For non-dynamic types, do nothing. */
struct upb_msg_parser mp;
void *msg = upb_msg_new(md);
upb_msg_parser_reset(&mp, msg, md, false);
mp.getarray_cb = getarray_cb;
mp.getstring_cb = getstring_cb;
mp.getmsg_cb = getmsg_cb;
size_t read;
upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read);
if(status == UPB_STATUS_OK && read == s->byte_len) {
return msg;
} else {
upb_msg_free(msg, md);
return NULL;
}
}
void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs)
/* For simple, one-shot parsing we assume that a dynamic field exists (and
* needs to be freed) iff its set bit is set. */
static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f)
{
if(upb_isarray(f)) {
free((*p.str)->ptr);
free(*p.str);
} else if(upb_issubmsg(f)) {
upb_msg_free(*p.msg, f->ref.msg);
}
}
void upb_msg_free(void *data, struct upb_msgdef *m)
{
if(!data) return; /* A very free-like thing to do. */
for(unsigned int i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
if(!upb_msg_isset(data, f)) continue;
union upb_value_ptr p = upb_msg_getptr(data, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
if(*p.arr) {
for(uint32_t j = 0; j < (*p.arr)->len; j++)
free_value(upb_array_getelementptr(*p.arr, j, f->type),
f, free_submsgs);
free((*p.arr)->elements._void);
free(*p.arr);
}
if(upb_isarray(f)) {
assert(*p.arr);
for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++)
free_value(upb_array_getelementptr(*p.arr, j, f->type), f);
free((*p.arr)->elements._void);
free(*p.arr);
} else {
free_value(p, f, free_submsgs);
free_value(p, f);
}
}
free(data);
}
void upb_msg_reuse_str(struct upb_string **str, uint32_t size)
{
if(!*str) {
*str = malloc(sizeof(struct mm_upb_string));
memset(*str, 0, sizeof(struct mm_upb_string));
}
struct mm_upb_string *s = (void*)*str;
if(s->size < size) {
size = max(16, round_up_to_pow2(size));
s->data = realloc(s->data, size);
s->size = size;
}
s->s.ptr = s->data;
}
/* Parsing. ******************************************************************/
void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, upb_field_type_t t)
/* Helper function that returns a pointer to where the next value for field "f"
* should be stored, taking into account whether f is an array that may need to
* be allocated or resized. */
static union upb_value_ptr get_value_ptr(void *data, struct upb_msgdef *m,
struct upb_msg_fielddef *f,
upb_msg_getarray_cb_t getarray_cb)
{
if(!*arr) {
*arr = malloc(sizeof(struct mm_upb_array));
memset(*arr, 0, sizeof(struct mm_upb_array));
}
struct mm_upb_array *a = (void*)*arr;
if(a->size < size) {
size = max(4, round_up_to_pow2(size));
size_t type_size = upb_type_info[t].size;
a->a.elements._void = realloc(a->a.elements._void, size * type_size);
/* Zero any newly initialized memory. */
memset(UPB_INDEX(a->a.elements._void, a->size, type_size), 0,
(size - a->size) * type_size);
a->size = size;
union upb_value_ptr p = upb_msg_getptr(data, f);
if(upb_isarray(f)) {
size_t len = upb_msg_isset(data, f) ? (*p.arr)->len : 0;
*p.arr = getarray_cb(data, m, *p.arr, f, len + 1);
p = upb_array_getelementptr(*p.arr, len, f->type);
}
return p;
}
void upb_msg_reuse_strref(struct upb_string **str) { upb_msg_reuse_str(str, 0); }
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
{
if(!*msg) *msg = upb_msgdata_new(m);
}
/* Parsing. ******************************************************************/
/* Callbacks for the stream parser. */
static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag,
void **user_field_desc)
{
struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = upb_msg_fieldbynum(s->top->m, tag->field_number);
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = upb_msg_fieldbynum(mp->top->m, tag->field_number);
if(!f || !upb_check_type(tag->wire_type, f->type))
return 0; /* Skip unknown or fields of the wrong type. */
*user_field_desc = f;
return f->type;
}
/* Returns a pointer to where the next value for field "f" should be stored,
* taking into account whether f is an array that may need to be reallocatd. */
static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f)
{
union upb_value_ptr p = upb_msg_getptr(data, f);
if(upb_isarray(f)) {
size_t len = upb_msg_isset(data, f) ? (*p.arr)->len : 0;
upb_msg_reuse_array(p.arr, len+1, f->type);
(*p.arr)->len = len + 1;
assert(p._void);
p = upb_array_getelementptr(*p.arr, len, f->type);
assert(p._void);
}
assert(p._void);
return p;
}
static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
void *user_field_desc, uint8_t **outbuf)
{
struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = user_field_desc;
union upb_value_ptr p = get_value_ptr(s->top->data, f);
upb_msg_set(s->top->data, f);
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
void *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb);
upb_msg_set(msg, f);
UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf));
//google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m);
//upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, f->type), stdout);
return UPB_STATUS_OK;
}
@ -296,60 +268,53 @@ static void str_cb(void *udata, uint8_t *str,
size_t avail_len, size_t total_len,
void *udesc)
{
struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = udesc;
union upb_value_ptr p = get_value_ptr(s->top->data, f);
upb_msg_set(s->top->data, f);
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = udesc;
void *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb);
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
if(s->byref) {
upb_msg_reuse_strref(p.str);
if(avail_len == total_len && mp->byref) {
*p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, 0);
(*p.str)->ptr = (char*)str;
(*p.str)->byte_len = avail_len;
} else {
upb_msg_reuse_str(p.str, avail_len);
*p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, total_len);
memcpy((*p.str)->ptr, str, avail_len);
(*p.str)->byte_len = avail_len;
}
//google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m);
//upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, fd->type), stdout);
}
static void submsg_start_cb(void *udata, void *user_field_desc)
{
struct upb_msg_parse_state *s = udata;
struct upb_msg_field *f = user_field_desc;
struct upb_msg *m = f->ref.msg;
void *data = s->top->data; /* The message from the existing frame. */
union upb_value_ptr p = get_value_ptr(data, f);
upb_msg_reuse_submsg(p.msg, m);
if(!upb_msg_isset(data, f) || !s->merge)
upb_msg_clear(*p.msg, m);
upb_msg_set(data, f);
s->top++;
s->top->m = m;
s->top->data = *p.msg;
//upb_text_push(&s->p, *s->top->m->descriptor->name, stdout);
struct upb_msg_parser *mp = udata;
struct upb_msg_fielddef *f = user_field_desc;
struct upb_msgdef *oldmsgdef = mp->top->m;
void *oldmsg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(oldmsg, oldmsgdef, f, mp->getarray_cb);
upb_msg_set(oldmsg, f);
*p.msg = mp->getmsg_cb(oldmsg, oldmsgdef, *p.msg, f);
mp->top++;
mp->top->m = f->ref.msg;
mp->top->msg = *p.msg;
}
static void submsg_end_cb(void *udata)
{
struct upb_msg_parse_state *s = udata;
s->top--;
//upb_text_pop(&s->p, stdout);
struct upb_msg_parser *mp = udata;
mp->top--;
}
void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
/* Externally-visible functions for the msg parser. */
void upb_msg_parser_reset(struct upb_msg_parser *s, void *msg,
struct upb_msgdef *m, bool byref)
{
upb_parse_reset(&s->s, s);
upb_text_printer_init(&s->p, false);
s->merge = merge;
upb_stream_parser_reset(&s->s, s);
s->byref = byref;
if(!merge && msg == NULL) msg = upb_msgdata_new(m);
upb_msg_clear(msg, m);
s->top = s->stack;
s->top->m = m;
s->top->data = msg;
s->top->msg = msg;
s->s.tag_cb = tag_cb;
s->s.value_cb = value_cb;
s->s.str_cb = str_cb;
@ -357,38 +322,10 @@ void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
s->s.submsg_end_cb = submsg_end_cb;
}
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
struct upb_msg *m, bool merge, bool byref)
{
upb_parse_init(&s->s, s);
upb_msg_parse_reset(s, msg, m, merge, byref);
}
void upb_msg_parse_free(struct upb_msg_parse_state *s)
{
upb_parse_free(&s->s);
}
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *data, size_t len, size_t *read)
upb_status_t upb_msg_parser_parse(struct upb_msg_parser *s,
void *data, size_t len, size_t *read)
{
return upb_parse(&s->s, data, len, read);
}
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref)
{
struct upb_msg_parse_state s;
void *msg = upb_msgdata_new(m);
upb_msg_parse_init(&s, msg, m, false, byref);
size_t read;
upb_status_t status = upb_msg_parse(&s, str->ptr, str->byte_len, &read);
upb_msg_parse_free(&s);
if(status == UPB_STATUS_OK && read == str->byte_len) {
return msg;
} else {
upb_msg_free(msg);
return NULL;
}
return upb_stream_parser_parse(&s->s, data, len, read);
}
/* Serialization. ************************************************************/
@ -405,12 +342,12 @@ struct upb_msgsizes {
/* Declared below -- this and get_valuesize are mutually recursive. */
static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m);
struct upb_msgdef *m);
/* Returns a size of a value as it will be serialized. Does *not* include
* the size of the tag -- that is already accounted for. */
static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
struct upb_msg_field *f,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd)
{
switch(f->type) {
@ -448,12 +385,12 @@ static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
* message. However it also stores the results of each level of the recursion
* in sizes, because we need all of this intermediate information later. */
static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m)
struct upb_msgdef *m)
{
size_t size = 0;
/* We iterate over fields and arrays in reverse order. */
for(int32_t i = m->num_fields - 1; i >= 0; i--) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
if(!upb_msg_isset(data, f)) continue;
union upb_value_ptr p = upb_msg_getptr(data, f);
@ -480,7 +417,7 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
return size;
}
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msg *m)
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msgdef *m)
{
get_msgsize(sizes, data, m);
}
@ -507,7 +444,7 @@ struct upb_msg_serialize_state {
struct {
int field_iter;
int elem_iter;
struct upb_msg *m;
struct upb_msgdef *m;
void *msg;
} stack[UPB_MAX_NESTING], *top, *limit;
};
@ -523,7 +460,7 @@ void upb_msg_serialize_free(struct upb_msg_serialize_state *s)
}
void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
struct upb_msg *m, struct upb_msgsizes *sizes)
struct upb_msgdef *m, struct upb_msgsizes *sizes)
{
(void)s;
(void)data;
@ -532,7 +469,7 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
}
static upb_status_t serialize_tag(uint8_t *buf, uint8_t *end,
struct upb_msg_field *f, uint8_t **outptr)
struct upb_msg_fielddef *f, uint8_t **outptr)
{
/* TODO: need to have the field number also. */
UPB_CHECK(upb_put_UINT32(buf, end, f->type, outptr));
@ -554,10 +491,10 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
int i = s->top->field_iter;
//int j = s->top->elem_iter;
void *msg = s->top->msg;
struct upb_msg *m = s->top->m;
struct upb_msgdef *m = s->top->m;
while(buf < end) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
union upb_value_ptr p = upb_msg_getptr(msg, f);
serialize_tag(buf, end, f, &buf);
if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
@ -571,6 +508,7 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
return UPB_STATUS_OK;
}
/* Comparison. ***************************************************************/
bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
@ -607,7 +545,7 @@ bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
}
bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
struct upb_msg_field *f, bool recursive)
struct upb_msg_fielddef *f, bool recursive)
{
if(arr1->len != arr2->len) return false;
if(upb_issubmsg(f)) {
@ -628,7 +566,7 @@ bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
return true;
}
bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive)
bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive)
{
/* Must have the same fields set. TODO: is this wrong? Should we also
* consider absent defaults equal to explicitly set defaults? */
@ -640,20 +578,66 @@ bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive)
* padding) and memcmp the masked messages. */
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
if(!upb_msg_isset(data1, f)) continue;
union upb_value_ptr p1 = upb_msg_getptr(data1, f);
union upb_value_ptr p2 = upb_msg_getptr(data2, f);
if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
if(upb_isarray(f)) {
if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false;
} else {
if(upb_issubmsg(f)) {
if(recursive && !upb_msg_eql(p1.msg, p2.msg, f->ref.msg, recursive))
return false;
} else if(!upb_value_eql(p1, p2, f->type)) {
} else if(upb_issubmsg(f)) {
if(recursive && !upb_msg_eql(p1.msg, p2.msg, f->ref.msg, recursive))
return false;
}
} else if(!upb_value_eql(p1, p2, f->type)) {
return false;
}
}
return true;
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream);
static void printmsg(struct upb_text_printer *printer, void *msg,
struct upb_msgdef *m, FILE *stream)
{
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_fielddef *f = &m->fields[i];
google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
if(!upb_msg_isset(msg, f)) continue;
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
struct upb_array *arr = *p.arr;
for(uint32_t j = 0; j < arr->len; j++) {
union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
printval(printer, elem_p, f, fd, stream);
}
} else {
printval(printer, p, f, fd, stream);
}
}
}
static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
struct upb_msg_fielddef *f,
google_protobuf_FieldDescriptorProto *fd,
FILE *stream)
{
if(upb_issubmsg(f)) {
upb_text_push(printer, *fd->name, stream);
printmsg(printer, *p.msg, f->ref.msg, stream);
upb_text_pop(printer, stream);
} else {
upb_text_printfield(printer, *fd->name, f->type, upb_deref(p, f->type), stream);
}
}
void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line,
FILE *stream)
{
struct upb_text_printer printer;
upb_text_printer_init(&printer, single_line);
printmsg(&printer, data, m, stream);
}

@ -3,93 +3,48 @@
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*
* A upb_msg provides a full description of a message as defined in a .proto
* file. It supports many features and operations for dealing with proto
* A upb_msgdef provides a full description of a message type as defined in a
* .proto file. Using a upb_msgdef, it is possible to treat an arbitrary hunk
* of memory (a void*) as a protobuf of the given type. We will call this
* void* a upb_msg in the context of this interface.
*
* Clients generally do not construct or destruct upb_msgdef objects directly.
* They are managed by upb_contexts, and clients can obtain upb_msgdef pointers
* directly from a upb_context.
*
* A upb_msg is READ-ONLY, and the upb_msgdef functions in this file provide
* read-only access. For a mutable message, or for a message that you can take
* a reference to to prevents its destruction, see upb_mm_msg.h, which is a
* layer on top of upb_msg that adds memory management semantics.
*
* upb_msgdef supports many features and operations for dealing with proto
* messages:
* - reflection over .proto types at runtime (list fields, get names, etc).
* - an in-memory byte-level format for efficiently storing and accessing msgs.
* - serializing and deserializing from the in-memory format to a protobuf.
* - optional memory management for handling strings, arrays, and submessages.
*
* Throughout this file, the following convention is used:
* - "struct upb_msg *m" describes a message type (name, list of fields, etc).
* - "void *data" is an actual message stored using the in-memory format.
* - serializing from the in-memory format to a protobuf.
* - parsing from a protobuf to an in-memory data structure (you either
* supply callbacks for allocating/repurposing memory or use a simplified
* version that parses into newly-allocated memory).
*
* The in-memory format is very much like a C struct that you can define at
* run-time, but also supports reflection. Like C structs it supports
* offset-based access, as opposed to the much slower name-based lookup. The
* format stores both the values themselves and bits describing whether each
* field is set or not. For example:
*
* parsed message Foo {
* optional bool a = 1;
* repeated uint32 b = 2;
* optional Bar c = 3;
* }
* field is set or not.
*
* The in-memory layout for this message on a 32-bit machine will be something
* like:
*
* Foo
* +------------------------+
* | set_flags a:1, b:1, c:1|
* +------------------------+
* | bool a (1 byte) |
* +------------------------+
* | padding (3 bytes) |
* +------------------------+ upb_array
* | upb_array* b (4 bytes) | ----> +----------------------------+
* +------------------------+ | uint32* elements (4 bytes) | ---+
* | Bar* c (4 bytes) | +----------------------------+ |
* +------------------------+ | uint32 size (4 bytes) | |
* +----------------------------+ |
* |
* -----------------------------------------------------------------+
* |
* V
* uint32 array
* +----+----+----+----+----+----+
* | e1 | e2 | e3 | e4 | e5 | e6 |
* +----+----+----+----+----+----+
*
* And the corresponding C structure (as emitted by the proto compiler) would be:
*
* struct Foo {
* union {
* uint8_t bytes[1];
* struct {
* bool a:1;
* bool b:1;
* bool c:1;
* } has;
* } set_flags;
* bool a;
* upb_uint32_array *b;
* Bar *c;
* }
* For a more in-depth description of the in-memory format, see:
* http://wiki.github.com/haberman/upb/inmemoryformat
*
* Because the C struct emitted by the upb compiler uses exactly the same
* byte-level format as the reflection interface, you can access the same hunk
* of memory either way. The C struct provides maximum performance and static
* type safety; upb_msg provides flexibility.
* type safety; upb_msg_def provides flexibility.
*
* The in-memory format has no interoperability guarantees whatsoever, except
* that a single version of upb will interoperate with itself. Don't even
* think about persisting the in-memory format or sending it anywhere. That's
* what serialized protobufs are for! The in-memory format is just that -- an
* in-memory representation that allows for fast access.
*
* The in-memory format is carefully designed to *not* mandate any particular
* memory management scheme. This should make it easier to integrate with
* existing memory management schemes, or to perform advanced techniques like
* reference counting, garbage collection, and string references. Different
* clients can read each others messages regardless of what memory management
* scheme each is using.
*
* A memory management scheme is provided for convenience, and it is used by
* default by the stock message parser. Clients can substitute their own
* memory management scheme into this parser without any loss of generality
* or performance.
*/
#ifndef UPB_MSG_H_
@ -108,21 +63,9 @@ extern "C" {
/* Message definition. ********************************************************/
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
* critical path of parsing. Minimizing the size of this struct increases
* cache-friendliness. */
struct upb_msg_field {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
uint16_t field_index; /* Indexes upb_msg.fields. Also indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
upb_label_t label;
};
struct upb_msg_fielddef;
/* Structure that describes a single .proto message type. */
struct upb_msg {
struct upb_msgdef {
struct google_protobuf_DescriptorProto *descriptor;
struct upb_string fqname; /* Fully qualified. */
size_t size;
@ -131,93 +74,65 @@ struct upb_msg {
uint32_t num_required_fields; /* Required fields have the lowest set bytemasks. */
struct upb_inttable fields_by_num;
struct upb_strtable fields_by_name;
struct upb_msg_field *fields;
struct upb_msg_fielddef *fields;
struct google_protobuf_FieldDescriptorProto **field_descriptors;
};
/* The num->field and name->field maps in upb_msg allow fast lookup of fields
* by number or name. These lookups are in the critical path of parsing and
* field lookup, so they must be as fast as possible. To make these more
* cache-friendly, we put the data in the table by value. */
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
struct upb_msg_field f;
/* Structure that describes a single field in a message. This structure is very
* consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
* because copies of this struct are in the hash table that is read in the
* critical path of parsing. Minimizing the size of this struct increases
* cache-friendliness. */
struct upb_msg_fielddef {
union upb_symbol_ref ref;
uint32_t byte_offset; /* Where to find the data. */
uint16_t field_index; /* Indexes upb_msgdef.fields and indicates set bit */
upb_field_type_t type; /* Copied from descriptor for cache-friendliness. */
upb_label_t label;
};
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
struct upb_msg_field f;
};
INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
return upb_issubmsgtype(f->type);
}
INLINE bool upb_isstring(struct upb_msg_fielddef *f) {
return upb_isstringtype(f->type);
}
INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
}
/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */
/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
struct upb_msg_field *f, struct upb_msg *m) {
struct upb_msg_fielddef *f, struct upb_msgdef *m) {
return m->field_descriptors[f->field_index];
}
/* Initializes/frees a upb_msg. Usually this will be called by upb_context, and
* clients will not have to construct one directly.
*
* Caller retains ownership of d, but the msg will contain references to it, so
* it must outlive the msg. Note that init does not resolve upb_msg_field.ref
* the caller should do that post-initialization by calling upb_msg_ref()
* below.
*
* fqname indicates the fully-qualified name of this message. Ownership of
* fqname passes to the msg, but the msg will contain references to it, so it
* must outlive the msg.
*
* sort indicates whether or not it is safe to reorder the fields from the order
* they appear in d. This should be false if code has been compiled against a
* header for this type that expects the given order. */
bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort);
void upb_msg_free(struct upb_msg *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb release.
* This is meant for internal use. */
void upb_msg_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
/* Field access. **************************************************************/
/* Clients use this function on a previously initialized upb_msg to resolve the
* "ref" field in the upb_msg_field. Since messages can refer to each other in
* mutually-recursive ways, this step must be separated from initialization. */
void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref);
/* Note that these only provide access to fields that are directly in the msg
* itself. For dynamic fields (strings, arrays, and submessages) it will be
* necessary to dereference the returned values. */
/* Looks up a field by name or number. While these are written to be as fast
* as possible, it will still be faster to cache the results of this lookup if
* possible. These return NULL if no such field is found. */
INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e =
(struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
(struct upb_fieldsbyname_entry*)upb_strtable_lookup(
&m->fields_by_name, name);
return e ? &e->f : NULL;
/* Returns a pointer to a specific field in a message. */
INLINE union upb_value_ptr upb_msg_getptr(void *msg, struct upb_msg_fielddef *f) {
union upb_value_ptr p;
p._void = ((char*)msg + f->byte_offset);
return p;
}
INLINE bool upb_issubmsg(struct upb_msg_field *f) {
return upb_issubmsgtype(f->type);
}
INLINE bool upb_isstring(struct upb_msg_field *f) {
return upb_isstringtype(f->type);
}
INLINE bool upb_isarray(struct upb_msg_field *f) {
return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
/* Returns a a specific field in a message. */
INLINE union upb_value upb_msg_get(void *msg, struct upb_msg_fielddef *f) {
return upb_deref(upb_msg_getptr(msg, f), f->type);
}
/* "Set" flag reading and writing. *******************************************/
/* Please note that these functions do not perform any memory management or in
* any way ensure that the fields are valid. They *only* test/set/clear a bit
* that indicates whether the field is set or not. */
/* All upb code and code using upb should guarantee that the set flags are
* always valid. It should always be the case that if a flag's field is set
* for a dynamic field that the pointer is valid.
*
* Clients should never set fields on a plain upb_msg, only on a upb_mm_msg. */
/* Returns the byte offset where we store whether this field is set. */
INLINE size_t upb_isset_offset(uint32_t field_index) {
@ -230,135 +145,162 @@ INLINE uint8_t upb_isset_mask(uint32_t field_index) {
}
/* Returns true if the given field is set, false otherwise. */
INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
INLINE void upb_msg_set(void *msg, struct upb_msg_fielddef *f)
{
((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
((char*)msg)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
}
/* Clears the set bit for this field in the given message. */
INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
INLINE void upb_msg_unset(void *msg, struct upb_msg_fielddef *f)
{
((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
((char*)msg)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
}
/* Tests whether the given field is set. */
INLINE bool upb_msg_isset(void *s, struct upb_msg_field *f)
INLINE bool upb_msg_isset(void *msg, struct upb_msg_fielddef *f)
{
return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
return ((char*)msg)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
}
/* Returns true if *all* required fields are set, false otherwise. */
INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
INLINE bool upb_msg_all_required_fields_set(void *msg, struct upb_msgdef *m)
{
int num_fields = m->num_required_fields;
int i = 0;
while(num_fields > 8) {
if(((uint8_t*)s)[i++] != 0xFF) return false;
if(((uint8_t*)msg)[i++] != 0xFF) return false;
num_fields -= 8;
}
if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
if(((uint8_t*)msg)[i] != (1 << num_fields) - 1) return false;
return true;
}
/* Clears the set bit for all fields. */
INLINE void upb_msg_clear(void *s, struct upb_msg *m)
INLINE void upb_msg_clear(void *msg, struct upb_msgdef *m)
{
memset(s, 0, m->set_flags_bytes);
memset(msg, 0, m->set_flags_bytes);
}
/* Scalar (non-array) data access. ********************************************/
/* Number->field and name->field lookup. *************************************/
/* Returns a pointer to a specific field in a message. */
INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) {
union upb_value_ptr p;
p._void = ((char*)data + f->byte_offset);
return p;
}
/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields
* by number or name. These lookups are in the critical path of parsing and
* field lookup, so they must be as fast as possible. To make these more
* cache-friendly, we put the data in the table by value. */
/* Returns a a specific field in a message. */
INLINE union upb_value upb_msg_get(void *data, struct upb_msg_field *f) {
return upb_deref(upb_msg_getptr(data, f), f->type);
}
struct upb_fieldsbynum_entry {
struct upb_inttable_entry e;
struct upb_msg_fielddef f;
};
/* Memory management *********************************************************/
struct upb_fieldsbyname_entry {
struct upb_strtable_entry e;
struct upb_msg_fielddef f;
};
/* One important note about these memory management routines: they must be used
* completely or not at all (for each message). In other words, you can't
* allocate your own message and then free it with upb_msgdata_free. As
* another example, you can't point a field to your own string and then call
* upb_msg_reuse_str. */
/* Looks up a field by name or number. While these are written to be as fast
* as possible, it will still be faster to cache the results of this lookup if
* possible. These return NULL if no such field is found. */
INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m,
uint32_t number) {
struct upb_fieldsbynum_entry *e =
(struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
&m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
return e ? &e->f : NULL;
}
INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
struct upb_string *name) {
struct upb_fieldsbyname_entry *e =
(struct upb_fieldsbyname_entry*)upb_strtable_lookup(
&m->fields_by_name, name);
return e ? &e->f : NULL;
}
/* Allocates and frees message data, respectively. Newly allocated data is
* initialized to empty. Freeing a message always frees string data, but
* the client can decide whether or not submessages should be deleted. */
void *upb_msgdata_new(struct upb_msg *m);
void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs);
/* Given a pointer to the appropriate field of the message or array, these
* functions will lazily allocate memory for a string, array, or submessage.
* If the previously allocated memory is big enough, it will reuse it without
* re-allocating. See upb_msg.c for example usage. */
/* Simple, one-shot parsing ***************************************************/
/* Reuse a string of at least the given size. */
void upb_msg_reuse_str(struct upb_string **str, uint32_t size);
/* Like the previous, but assumes that the string will be by reference, so
* doesn't allocate memory for the string itself. */
void upb_msg_reuse_strref(struct upb_string **str);
/* A simple interface for parsing into a newly-allocated message. This
* interface should only be used when the message will be read-only with
* respect to memory management (eg. won't add or remove internal references to
* dynamic memory). For more flexible (but also more complicated) interfaces,
* see below and in upb_mm_msg.h. */
/* Reuse an array of at least the given size, with the given type. */
void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
upb_field_type_t t);
/* Parses the protobuf in s (which is expected to be complete) and allocates
* new message data to hold it. If byref is set, strings in the returned
* upb_msg will reference s instead of copying from it, but this requires that
* s will live for as long as the returned message does. */
void *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s);
/* Reuse a submessage of the given type. */
void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
/* This function should be used to free messages that were parsed with
* upb_msg_parsenew. It will free the message appropriately (including all
* submessages). */
void upb_msg_free(void *msg, struct upb_msgdef *m);
/* Parsing. ******************************************************************/
/* This is all just a layer on top of the stream-oriented facility in
* upb_parse.h. */
/* Parsing with (re)allocation callbacks. *************************************/
struct upb_msg_parse_frame {
struct upb_msg *m;
void *data;
/* This interface parses protocol buffers into upb_msgs, but allows the client
* to supply allocation callbacks whenever the parser needs to obtain a string,
* array, or submsg (a "dynamic field"). If the parser sees that a dynamic
* field is already present (its "set bit" is set) it will use that, otherwise
* it will call the allocation callback to obtain one.
*
* This may seem trivial (since nearly all clients will use malloc and free for
* memory management), but the allocation callback can be used for more than
* just allocation. If we are parsing data into an existing upb_msg, the
* allocation callback can examine any existing memory that is allocated for
* the dynamic field and determine whether it can reuse it. It can also
* perform memory management like unrefing the existing field or refing the new.
*
* This parser is layered on top of the event-based parser in upb_parse.h. The
* parser is upb_mm_msg.h is layered on top of this parser.
*
* This parser is fully streaming-capable. */
typedef struct upb_array *(*upb_msg_getarray_cb_t)(
void *msg, struct upb_msgdef *m,
struct upb_array *existingval, struct upb_msg_fielddef *f,
upb_arraylen_t size);
/* Callback to allocate a string of size >=len. If len==0 then the client can
* assume that the parser intends to reference the memory instead of copying
* it. */
typedef struct upb_string *(*upb_msg_getstring_cb_t)(
void *msg, struct upb_msgdef *m,
struct upb_string *existingval, struct upb_msg_fielddef *f, size_t len);
typedef void *(*upb_msg_getmsg_cb_t)(
void *msg, struct upb_msgdef *m,
void *existingval, struct upb_msg_fielddef *f);
struct upb_msg_parser_frame {
struct upb_msgdef *m;
void *msg;
};
#include "upb_text.h"
struct upb_msg_parse_state {
struct upb_parse_state s;
struct upb_msg_parser {
struct upb_stream_parser s;
bool merge;
bool byref;
struct upb_msg *m;
struct upb_msg_parse_frame stack[UPB_MAX_NESTING], *top;
struct upb_text_printer p;
struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
upb_msg_getarray_cb_t getarray_cb;
upb_msg_getstring_cb_t getstring_cb;
upb_msg_getmsg_cb_t getmsg_cb;
};
/* Initializes/frees a message parser. The parser will write the data to the
* message data "data", which the caller must have previously allocated (the
* parser will allocate submsgs, strings, and arrays as needed, however).
*
* "Merge" controls whether the parser will append to data instead of
* overwriting. Merging concatenates arrays and merges submessages instead
* of clearing both.
*
* "Byref" controls whether the new message data copies or references strings
* it encounters. If byref == true, then all strings supplied to upb_msg_parse
* must remain unchanged and must outlive data. */
void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data,
struct upb_msg *m, bool merge, bool byref);
void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *data,
struct upb_msg *m, bool merge, bool byref);
void upb_msg_parse_free(struct upb_msg_parse_state *s);
/* Parses a protobuf fragment, writing the data to the message that was passed
* to upb_msg_parse_init. This function can be called multiple times as more
* data becomes available. */
upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
void *data, size_t len, size_t *read);
void upb_msg_parser_reset(struct upb_msg_parser *p,
void *msg, struct upb_msgdef *m,
bool byref);
/* Parses protocol buffer data out of data which has length of len. The data
* need not be a complete protocol buffer. The number of bytes parsed is
* returned in *read, and the next call to upb_msg_parse must supply data that
* is *read bytes past data in the logical stream. */
upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
void *data, size_t len, size_t *read);
/* Parses the protobuf in s (which is expected to be complete) and allocates
* new message data to hold it. This is an alternative to the streaming API
* above. "byref" works as in upb_msg_parse_init(). */
void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);
/* Serialization *************************************************************/
@ -377,8 +319,8 @@ void upb_msgsizes_free(struct upb_msgsizes *sizes);
/* Given a previously initialized sizes, recurse over the message and store its
* sizes in 'sizes'. */
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data,
struct upb_msg *m);
void upb_msgsizes_read(struct upb_msgsizes *sizes, void *msg,
struct upb_msgdef *m);
/* Returns the total size of the serialized message given in sizes. Must be
* preceeded by a call to upb_msgsizes_read. */
@ -391,8 +333,8 @@ struct upb_msg_serialize_state;
* "sizes" and the parse being fully completed. */
void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s);
void upb_msg_serialize_free(struct upb_msg_serialize_state *s);
void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
struct upb_msg *m, struct upb_msgsizes *sizes);
void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *msg,
struct upb_msgdef *m, struct upb_msgsizes *sizes);
/* Serializes the next set of bytes into buf (which has size len). Returns
* UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
@ -405,8 +347,43 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
/* Text dump *****************************************************************/
bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive);
void upb_msg_print(void *data, struct upb_msg *m, FILE *stream);
bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive);
void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line,
FILE *stream);
/* Internal functions. ********************************************************/
/* Initializes/frees a upb_msgdef. Usually this will be called by upb_context,
* and clients will not have to construct one directly.
*
* Caller retains ownership of d, but the msg will contain references to it, so
* it must outlive the msg. Note that init does not resolve
* upb_msg_fielddef.ref the caller should do that post-initialization by
* calling upb_msg_ref() below.
*
* fqname indicates the fully-qualified name of this message. Ownership of
* fqname passes to the msg, but the msg will contain references to it, so it
* must outlive the msg.
*
* sort indicates whether or not it is safe to reorder the fields from the order
* they appear in d. This should be false if code has been compiled against a
* header for this type that expects the given order. */
bool upb_msgdef_init(struct upb_msgdef *m,
struct google_protobuf_DescriptorProto *d,
struct upb_string fqname, bool sort);
void upb_msgdef_free(struct upb_msgdef *m);
/* Sort the given field descriptors in-place, according to what we think is an
* optimal ordering of fields. This can change from upb release to upb
* release. */
void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
/* Clients use this function on a previously initialized upb_msgdef to resolve
* the "ref" field in the upb_msg_fielddef. Since messages can refer to each
* other in mutually-recursive ways, this step must be separated from
* initialization. */
void upb_msgdef_ref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
union upb_symbol_ref ref);
#ifdef __cplusplus
} /* extern "C" */

@ -100,7 +100,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
#undef CASE
}
void upb_parse_reset(struct upb_parse_state *state, void *udata)
void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata)
{
state->top = state->stack;
state->limit = &state->stack[UPB_MAX_NESTING];
@ -111,18 +111,7 @@ void upb_parse_reset(struct upb_parse_state *state, void *udata)
state->udata = udata;
}
void upb_parse_init(struct upb_parse_state *state, void *udata)
{
memset(state, 0, sizeof(struct upb_parse_state)); /* Clear all callbacks. */
upb_parse_reset(state, udata);
}
void upb_parse_free(struct upb_parse_state *state)
{
(void)state;
}
static void *pop_stack_frame(struct upb_parse_state *s, uint8_t *buf)
static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
{
if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
s->top--;
@ -130,7 +119,7 @@ static void *pop_stack_frame(struct upb_parse_state *s, uint8_t *buf)
}
/* Returns the next end offset. */
static upb_status_t push_stack_frame(struct upb_parse_state *s,
static upb_status_t push_stack_frame(struct upb_stream_parser *s,
uint8_t *buf, uint32_t len,
void *user_field_desc, uint8_t **submsg_end)
{
@ -142,8 +131,8 @@ static upb_status_t push_stack_frame(struct upb_parse_state *s,
return UPB_STATUS_OK;
}
upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len,
size_t *read)
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
void *_buf, size_t len, size_t *read)
{
uint8_t *buf = _buf;
uint8_t *completed = buf;

@ -42,14 +42,11 @@ INLINE bool upb_isstringtype(upb_field_type_t type) {
* as data becomes available. The parser is fully streaming-capable, so the
* data need not all be available at the same time. */
struct upb_parse_state;
struct upb_stream_parser;
/* Initialize and free (respectively) the given parse state, which must have
* been previously allocated. udata_size specifies how much space will be
* available at parse_stack_frame.user_data in each frame for user data. */
void upb_parse_init(struct upb_parse_state *state, void *udata);
void upb_parse_reset(struct upb_parse_state *state, void *udata);
void upb_parse_free(struct upb_parse_state *state);
/* Resets the internal state of an already-allocated parser. udata will be
* passed to callbacks as appropriate. */
void upb_stream_parser_reset(struct upb_stream_parser *p, void *udata);
/* The callback that is called immediately after a tag has been parsed. The
* client should determine whether it wants to parse or skip the corresponding
@ -86,7 +83,7 @@ typedef void (*upb_submsg_start_cb)(void *udata,
void *user_field_desc);
typedef void (*upb_submsg_end_cb)(void *udata);
struct upb_parse_state {
struct upb_stream_parser {
/* For delimited submsgs, counts from the submsg len down to zero.
* For group submsgs, counts from zero down to the negative len. */
uint32_t stack[UPB_MAX_NESTING], *top, *limit;
@ -115,8 +112,8 @@ struct upb_parse_state {
*
* TODO: see if we can provide the following guarantee efficiently:
* *read will always be >= len. */
upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
size_t *read);
upb_status_t upb_stream_parser_parse(struct upb_stream_parser *p,
void *buf, size_t len, size_t *read);
extern upb_wire_type_t upb_expected_wire_types[];
/* Returns true if wt is the correct on-the-wire type for ft. */

@ -13,7 +13,7 @@
static const upb_inttable_key_t EMPTYENT = 0;
static const double MAX_LOAD = 0.85;
static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
/* We use 1-based indexes into the table so that 0 can be "NULL". */
static struct upb_inttable_entry *intent(struct upb_inttable *t, int32_t i) {
@ -238,7 +238,7 @@ void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur)
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.

@ -141,7 +141,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
for(int i = 0; i < num_entries; i++) { /* Foreach message */
if(entries[i]->type != UPB_SYM_MESSAGE) continue;
struct upb_symtab_entry *entry = entries[i];
struct upb_msg *m = entry->ref.msg;
struct upb_msgdef *m = entry->ref.msg;
/* We use entry->e.key (the fully qualified name). */
struct upb_string msg_name = upb_strdup(entry->e.key);
to_cident(msg_name);
@ -158,7 +158,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
fputs(" } has;\n", stream);
fputs(" } set_flags;\n", stream);
for(uint32_t j = 0; j < m->num_fields; j++) {
struct upb_msg_field *f = &m->fields[j];
struct upb_msg_fielddef *f = &m->fields[j];
struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
@ -229,7 +229,7 @@ struct strtable_entry {
struct typetable_entry {
struct upb_strtable_entry e;
struct upb_msg_field *field;
struct upb_msg_fielddef *field;
struct upb_string cident; /* Type name converted with to_cident(). */
/* A list of all values of this type, in an established order. */
union upb_value *values;
@ -259,11 +259,11 @@ int compare_entries(const void *_e1, const void *_e2)
*
* TODO: make these use a generic msg visitor. */
static void add_strings_from_msg(void *data, struct upb_msg *m,
static void add_strings_from_msg(void *data, struct upb_msgdef *m,
struct upb_strtable *t);
static void add_strings_from_value(union upb_value_ptr p,
struct upb_msg_field *f,
struct upb_msg_fielddef *f,
struct upb_strtable *t)
{
if(upb_isstringtype(f->type)) {
@ -275,11 +275,11 @@ static void add_strings_from_value(union upb_value_ptr p,
}
}
static void add_strings_from_msg(void *data, struct upb_msg *m,
static void add_strings_from_msg(void *data, struct upb_msgdef *m,
struct upb_strtable *t)
{
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
if(!upb_msg_isset(data, f)) continue;
union upb_value_ptr p = upb_msg_getptr(data, f);
if(upb_isarray(f)) {
@ -299,7 +299,7 @@ static void add_strings_from_msg(void *data, struct upb_msg *m,
struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
struct upb_msg_field *f)
struct upb_msg_fielddef *f)
{
struct upb_string type_name = upb_issubmsg(f) ? f->ref.msg->fqname :
upb_type_info[f->type].ctype;
@ -318,7 +318,7 @@ struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
return type_e;
}
static void add_value(union upb_value value, struct upb_msg_field *f,
static void add_value(union upb_value value, struct upb_msg_fielddef *f,
struct upb_strtable *t)
{
struct typetable_entry *type_e = get_or_insert_typeentry(t, f);
@ -329,10 +329,10 @@ static void add_value(union upb_value value, struct upb_msg_field *f,
type_e->values[type_e->values_len++] = value;
}
static void add_submsgs(void *data, struct upb_msg *m, struct upb_strtable *t)
static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t)
{
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_msg_field *f = &m->fields[i];
struct upb_msg_fielddef *f = &m->fields[i];
if(!upb_msg_isset(data, f)) continue;
union upb_value_ptr p = upb_msg_getptr(data, f);
if(upb_isarray(f)) {
@ -371,7 +371,7 @@ static void add_submsgs(void *data, struct upb_msg *m, struct upb_strtable *t)
/* write_messages_c emits a .c file that contains the data of a protobuf,
* serialized as C structures. */
static void write_message_c(void *data, struct upb_msg *m,
static void write_message_c(void *data, struct upb_msgdef *m,
char *cident, char *hfile_name,
int argc, char *argv[], char *infile_name,
FILE *stream)
@ -441,7 +441,7 @@ static void write_message_c(void *data, struct upb_msg *m,
upb_strtable_init(&types, 16, sizeof(struct typetable_entry));
union upb_value val = {.msg = data};
/* A fake field to get the recursion going. */
struct upb_msg_field fake_field = {
struct upb_msg_fielddef fake_field = {
.type = GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE,
.ref = {.msg = m}
};
@ -484,12 +484,12 @@ static void write_message_c(void *data, struct upb_msg *m,
for(int i = 0; i < e->values_len; i++) {
union upb_value val = e->values[i];
if(upb_issubmsg(e->field)) {
struct upb_msg *m = e->field->ref.msg;
struct upb_msgdef *m = e->field->ref.msg;
void *msgdata = val.msg;
/* Print set flags. */
fputs(" {.set_flags = {.has = {\n", stream);
for(unsigned int j = 0; j < m->num_fields; j++) {
struct upb_msg_field *f = &m->fields[j];
struct upb_msg_fielddef *f = &m->fields[j];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(*fd->name));
if(upb_msg_isset(msgdata, f))
@ -501,7 +501,7 @@ static void write_message_c(void *data, struct upb_msg *m,
fputs(" }},\n", stream);
/* Print msg data. */
for(unsigned int j = 0; j < m->num_fields; j++) {
struct upb_msg_field *f = &m->fields[j];
struct upb_msg_fielddef *f = &m->fields[j];
google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
union upb_value val = upb_msg_get(msgdata, f);
fprintf(stream, " ." UPB_STRFMT " = ", UPB_STRARG(*fd->name));
@ -599,7 +599,7 @@ void error(char *err)
void sort_fields_in_descriptor(google_protobuf_DescriptorProto *d)
{
if(d->set_flags.has.field) upb_msg_sortfds(d->field->elements, d->field->len);
if(d->set_flags.has.field) upb_msgdef_sortfds(d->field->elements, d->field->len);
if(d->set_flags.has.nested_type)
for(uint32_t i = 0; i < d->nested_type->len; i++)
sort_fields_in_descriptor(d->nested_type->elements[i]);
@ -640,7 +640,7 @@ int main(int argc, char *argv[])
struct upb_context c;
upb_context_init(&c);
google_protobuf_FileDescriptorSet *fds =
upb_alloc_and_parse(c.fds_msg, &descriptor, false);
upb_msg_parsenew(c.fds_msg, &descriptor);
if(!fds)
error("Failed to parse input file descriptor.");
if(!upb_context_addfds(&c, fds))

Loading…
Cancel
Save