Major refactoring of upb_msg. Temporary functionality regression.

There is significant refactoring here, as well as some more trivial name changes. upb_msg has become upb_msgdef, to reflect the fact that a upb_msg is not *itself* a message, it describes a message. There are other renamings, such as upb_parse_state -> upb_stream_parser. More significantly, the upb_msg class and parser have been refactored to reflect my recent realization about how memory management should work. upb_msg now has no memory management, and a memory mangement scheme (that works beautifully with multiple language runtimes) will be layered on top of it. This iteration has the new, read-only upb_msg. upb_mm_msg (a memory-managed message class) will come in the next change.
16 years ago · 8fa6a92f53
parent 952ea88db2
commit 8fa6a92f53
10 changed files with 543 additions and 524 deletions
--- a/src/upb.h
+++ b/src/upb.h
@ -133,7 +133,7 @@ INLINE union upb_value upb_deref(union upb_value_ptr ptr, upb_field_type_t t) {
 }

 union upb_symbol_ref {
-  struct upb_msg *msg;
+  struct upb_msgdef *msg;
  struct upb_enum *_enum;
  struct upb_svc *svc;
 };
--- a/src/upb_array.h
+++ b/src/upb_array.h
@ -2,32 +2,63 @@
 * upb - a minimalist implementation of protocol buffers.
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
-
- * Defines an in-memory array type.  TODO: more documentation.
+ *
+ * Defines an in-memory, polymorphic array type.  The array does not know its
+ * own type -- its owner must know that information out-of-band.
+ *
+ * upb_arrays are memory-managed in the sense that they contain a pointer
+ * ("mem") to memory that is "owned" by the array (which may be NULL if the
+ * array owns no memory).  There is a separate pointer ("elements") that points
+ * to the the array's currently "effective" memory, which is either equal to
+ * mem (if the array's current value is memory we own) or not (if the array is
+ * referencing other memory).
+ *
+ * If the array is referencing other memory, it is up to the array's owner to
+ * ensure that the other memory remains valid for as long as the array is
+ * referencing it.
 *
 */

 #ifndef UPB_ARRAY_H_
 #define UPB_ARRAY_H_

+#include <stdlib.h>
+#include "upb.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif

 struct upb_string;

-#include "upb.h"
+/* upb_arrays can be at most 2**32 elements long. */
+typedef uint32_t upb_arraylen_t;

 /* Represents an array (a repeated field) of any type.  The interpretation of
 * the data in the array depends on the type. */
 struct upb_array {
  union upb_value_ptr elements;
-  uint32_t len;     /* Measured in elements. */
+  void *mem;
+  upb_arraylen_t len;     /* Number of elements in "elements". */
+  upb_arraylen_t size;    /* Memory allocated in "mem" (measured in elements) */
 };

-/* Returns a pointer to an array element. */
+INLINE void upb_array_init(struct upb_array *arr)
+{
+  arr->elements._void = NULL;
+  arr->mem = NULL;
+  arr->len = 0;
+  arr->size = 0;
+}
+
+INLINE void upb_array_free(struct upb_array *arr)
+{
+  free(arr->mem);
+}
+
+/* Returns a pointer to an array element.  Does not perform a bounds check! */
 INLINE union upb_value_ptr upb_array_getelementptr(
-    struct upb_array *arr, uint32_t n, upb_field_type_t type)
+    struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
 {
  union upb_value_ptr ptr;
  ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
@ -35,16 +66,55 @@ INLINE union upb_value_ptr upb_array_getelementptr(
 }

 INLINE union upb_value upb_array_getelement(
-    struct upb_array *arr, uint32_t n, upb_field_type_t type)
+    struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
 {
  return upb_deref(upb_array_getelementptr(arr, n, type), type);
 }

+INLINE uint32_t upb_round_up_to_pow2(uint32_t v)
+{
+  /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
+  v--;
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  v++;
+  return v;
+}
+
+/* Resizes array to be "len" elements long and ensures we have write access
+ * to the array (reallocating if necessary).  Returns true iff we were
+ * referencing memory for the array and dropped the reference. */
+INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen,
+                             upb_field_type_t type)
+{
+  size_t type_size = upb_type_info[type].size;
+  bool dropped = false;
+  bool ref = arr->elements._void != arr->mem;  /* Ref'ing external memory. */
+  if(arr->size < newlen) {
+    /* Need to resize. */
+    arr->size = max(4, upb_round_up_to_pow2(newlen));
+    arr->mem = realloc(arr->mem, arr->size * type_size);
+  }
+  if(ref) {
+    /* Need to take referenced data and copy it to memory we own. */
+    memcpy(arr->mem, arr->elements._void, UPB_MIN(arr->len, newlen) * type_size);
+    dropped = true;
+  }
+  arr->elements._void = arr->mem;
+  arr->len = newlen;
+  return dropped;
+}
+
 /* These are all overlays on upb_array, pointers between them can be cast. */
 #define UPB_DEFINE_ARRAY_TYPE(name, type) \
  struct name ## _array { \
    type *elements; \
-    uint32_t len; \
+    type *mem; \
+    upb_arraylen_t len; \
+    upb_arraylen_t size; \
  };

 UPB_DEFINE_ARRAY_TYPE(upb_double, double)
@ -57,12 +127,14 @@ UPB_DEFINE_ARRAY_TYPE(upb_bool,   bool)
 UPB_DEFINE_ARRAY_TYPE(upb_string, struct upb_string*)
 UPB_DEFINE_ARRAY_TYPE(upb_msg,    void*)

-/* Defines an array of a specific message type. */
+/* Defines an array of a specific message type (an overlay of upb_array). */
 #define UPB_MSG_ARRAY(msg_type) struct msg_type ## _array
 #define UPB_DEFINE_MSG_ARRAY(msg_type) \
  UPB_MSG_ARRAY(msg_type) { \
    msg_type **elements; \
-    uint32_t len; \
+    msg_type **mem; \
+    upb_arraylen_t len; \
+    upb_arraylen_t size; \
  };

 #ifdef __cplusplus
--- a/src/upb_context.c
+++ b/src/upb_context.c
@ -46,7 +46,7 @@ static void free_symtab(struct upb_strtable *t)
  struct upb_symtab_entry *e = upb_strtable_begin(t);
  for(; e; e = upb_strtable_next(t, &e->e)) {
    switch(e->type) {
-      case UPB_SYM_MESSAGE: upb_msg_free(e->ref.msg); break;
+      case UPB_SYM_MESSAGE: upb_msgdef_free(e->ref.msg); break;
      case UPB_SYM_ENUM: upb_enum_free(e->ref._enum); break;
      default: break;  /* TODO */
    }
@ -60,7 +60,7 @@ void upb_context_free(struct upb_context *c)
 {
  free_symtab(&c->symtab);
  for(size_t i = 0; i < c->fds_len; i++)
-    upb_msgdata_free(c->fds[i], c->fds_msg, true);
+    upb_msg_free(c->fds[i], c->fds_msg);
  free_symtab(&c->psymtab);
  free(c->fds);
 }
@ -188,7 +188,7 @@ static bool insert_message(struct upb_strtable *t,
  e.e.key = fqname;
  e.type = UPB_SYM_MESSAGE;
  e.ref.msg = malloc(sizeof(*e.ref.msg));
-  if(!upb_msg_init(e.ref.msg, d, fqname, sort)) {
+  if(!upb_msgdef_init(e.ref.msg, d, fqname, sort)) {
    free(fqname.ptr);
    return false;
  }
@ -232,9 +232,9 @@ bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
    if(upb_strtable_lookup(existingdefs, &e->e.key))
      return false;  /* Redefinition prohibited. */
    if(e->type == UPB_SYM_MESSAGE) {
-      struct upb_msg *m = e->ref.msg;
+      struct upb_msgdef *m = e->ref.msg;
      for(unsigned int i = 0; i < m->num_fields; i++) {
-        struct upb_msg_field *f = &m->fields[i];
+        struct upb_msg_fielddef *f = &m->fields[i];
        google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
        union upb_symbol_ref ref;
        if(fd->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE ||
@ -247,7 +247,7 @@ bool addfd(struct upb_strtable *addto, struct upb_strtable *existingdefs,
        else
          continue;  /* No resolving necessary. */
        if(!ref.msg) return false;  /* Ref. to undefined symbol. */
-        upb_msg_ref(m, f, ref);
+        upb_msgdef_ref(m, f, ref);
      }
    }
  }
@ -280,7 +280,7 @@ bool upb_context_addfds(struct upb_context *c,

 bool upb_context_parsefds(struct upb_context *c, struct upb_string *fds_str) {
  google_protobuf_FileDescriptorSet *fds =
-      upb_alloc_and_parse(c->fds_msg, fds_str, false);
+      upb_msg_parsenew(c->fds_msg, fds_str);
  if(!fds) return false;
  if(!upb_context_addfds(c, fds)) return false;

--- a/src/upb_context.h
+++ b/src/upb_context.h
@ -40,7 +40,7 @@ struct upb_symtab_entry {
 struct upb_context {
  struct upb_strtable symtab;   /* The context's symbol table. */
  struct upb_strtable psymtab;  /* Private symbols, for internal use. */
-  struct upb_msg *fds_msg;   /* This is in psymtab, ptr here for convenience. */
+  struct upb_msgdef *fds_msg;   /* In psymtab, ptr here for convenience. */

  /* A list of the FileDescriptorProtos we own (from having parsed them
   * ourselves) and must free on destruction. */
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@ -10,6 +10,7 @@
 #include "upb_msg.h"
 #include "upb_parse.h"
 #include "upb_serialize.h"
+#include "upb_text.h"

 /* Rounds p up to the next multiple of t. */
 #define ALIGN_UP(p, t) ((p) % (t) == 0 ? (p) : (p) + ((t) - ((p) % (t))))
@ -35,13 +36,13 @@ static int compare_fields(const void *e1, const void *e2) {
  }
 }

-void upb_msg_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
+void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num)
 {
  qsort(fds, num, sizeof(void*), compare_fields);
 }

-bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
-                  struct upb_string fqname, bool sort)
+bool upb_msgdef_init(struct upb_msgdef *m, google_protobuf_DescriptorProto *d,
+                     struct upb_string fqname, bool sort)
 {
  /* TODO: more complete validation. */
  if(!d->set_flags.has.field) return false;
@ -65,11 +66,11 @@ bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
    /* We count on the caller to keep this pointer alive. */
    m->field_descriptors[i] = d->field->elements[i];
  }
-  if(sort) upb_msg_sortfds(m->field_descriptors, m->num_fields);
+  if(sort) upb_msgdef_sortfds(m->field_descriptors, m->num_fields);

  size_t max_align = 0;
  for(unsigned int i = 0; i < m->num_fields; i++) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[i];
    struct upb_type_info *type_info = &upb_type_info[fd->type];

@ -98,7 +99,7 @@ bool upb_msg_init(struct upb_msg *m, google_protobuf_DescriptorProto *d,
  return true;
 }

-void upb_msg_free(struct upb_msg *m)
+void upb_msgdef_free(struct upb_msgdef *m)
 {
  upb_inttable_free(&m->fields_by_num);
  upb_strtable_free(&m->fields_by_name);
@ -106,8 +107,8 @@ void upb_msg_free(struct upb_msg *m)
  free(m->field_descriptors);
 }

-void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
-                 union upb_symbol_ref ref) {
+void upb_msgdef_ref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
+                    union upb_symbol_ref ref) {
  struct google_protobuf_FieldDescriptorProto *d =
      upb_msg_field_descriptor(f, m);
  struct upb_fieldsbynum_entry *int_e = upb_inttable_fast_lookup(
@ -120,175 +121,146 @@ void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f,
  str_e->f.ref = ref;
 }

-/* Memory management  *********************************************************/
+/* Simple, one-shot parsing ***************************************************/

-/* Our memory management scheme is as follows:
- *
- * All pointers to dynamic memory (strings, arrays, and submessages) are
- * expected to be good pointers if they are non-zero, *regardless* of whether
- * that field's bit is set!  That way we can reuse the memory even if the field
- * is unset and then set later. */
-
-/* For our memory-managed strings and arrays we store extra information
- * (compared to a plain upb_string or upb_array).  But the data starts with
- * a upb_string and upb_array, so we can overlay onto the regular types. */
-struct mm_upb_string {
-  struct upb_string s;
-  /* Track the allocated size, so we know when we need to reallocate. */
-  uint32_t size;
-  /* Our allocated data.  Stored separately so that clients can point s.ptr to
-   * a referenced string, but we can reuse this data later. */
-  char *data;
-};
+void *upb_msg_new(struct upb_msgdef *md)
+{
+  void *msg = malloc(md->size);
+  memset(msg, 0, md->size);
+  return msg;
+}

-struct mm_upb_array {
-  struct upb_array a;
-  /* Track the allocated size, so we know when we need to reallocate. */
-  uint32_t size;
-};
+/* Allocation callbacks. */
+static struct upb_array *getarray_cb(void *msg, struct upb_msgdef *md,
+                                     struct upb_array *existingval,
+                                     struct upb_msg_fielddef *f,
+                                     upb_arraylen_t len)
+{
+  (void)msg;
+  (void)md;
+  (void)existingval;  /* Don't care -- always zero. */
+  (void)len;
+  struct upb_array *arr = existingval;
+  if(!arr) {
+    arr = malloc(sizeof(*arr));
+    upb_array_init(arr);
+  }
+  upb_array_resize(arr, len, f->type);
+  return arr;
+}

-static uint32_t round_up_to_pow2(uint32_t v)
+static struct upb_string *getstring_cb(void *msg, struct upb_msgdef *md,
+                                       struct upb_string *existingval,
+                                       struct upb_msg_fielddef *f, size_t len)
 {
-  /* cf. http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
-  v--;
-  v |= v >> 1;
-  v |= v >> 2;
-  v |= v >> 4;
-  v |= v >> 8;
-  v |= v >> 16;
-  v++;
-  return v;
+  (void)msg;
+  (void)md;
+  (void)existingval;  /* Don't care -- always zero. */
+  (void)f;
+  struct upb_string *str = malloc(sizeof(*str));
+  str->ptr = malloc(len);
+  return str;
 }

-void *upb_msgdata_new(struct upb_msg *m)
+static void *getmsg_cb(void *msg, struct upb_msgdef *md,
+                       void *existingval, struct upb_msg_fielddef *f)
 {
-  void *msg = malloc(m->size);
-  memset(msg, 0, m->size);  /* Clear all pointers, values, and set bits. */
-  return msg;
+  (void)msg;
+  (void)md;
+  (void)existingval;  /* Don't care -- always zero. */
+  return upb_msg_new(f->ref.msg);
 }

-static void free_value(union upb_value_ptr p, struct upb_msg_field *f,
-                       bool free_submsgs)
+void *upb_msg_parsenew(struct upb_msgdef *md, struct upb_string *s)
 {
-  switch(f->type) {
-    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING:
-    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES: {
-      struct mm_upb_string *mm_str = (void*)*p.str;
-      if(mm_str) {
-        free(mm_str->data);
-        free(mm_str);
-      }
-      break;
-    }
-    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE:
-    case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
-      if(free_submsgs) upb_msgdata_free(*p.msg, f->ref.msg, free_submsgs);
-      break;
-    default: break;  /* For non-dynamic types, do nothing. */
+  struct upb_msg_parser mp;
+  void *msg = upb_msg_new(md);
+  upb_msg_parser_reset(&mp, msg, md, false);
+  mp.getarray_cb = getarray_cb;
+  mp.getstring_cb = getstring_cb;
+  mp.getmsg_cb = getmsg_cb;
+  size_t read;
+  upb_status_t status = upb_msg_parser_parse(&mp, s->ptr, s->byte_len, &read);
+  if(status == UPB_STATUS_OK && read == s->byte_len) {
+    return msg;
+  } else {
+    upb_msg_free(msg, md);
+    return NULL;
  }
 }

-void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs)
+/* For simple, one-shot parsing we assume that a dynamic field exists (and
+ * needs to be freed) iff its set bit is set. */
+static void free_value(union upb_value_ptr p, struct upb_msg_fielddef *f)
+{
+  if(upb_isarray(f)) {
+    free((*p.str)->ptr);
+    free(*p.str);
+  } else if(upb_issubmsg(f)) {
+    upb_msg_free(*p.msg, f->ref.msg);
+  }
+}
+
+void upb_msg_free(void *data, struct upb_msgdef *m)
 {
  if(!data) return;  /* A very free-like thing to do. */
  for(unsigned int i = 0; i < m->num_fields; i++) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
+    if(!upb_msg_isset(data, f)) continue;
    union upb_value_ptr p = upb_msg_getptr(data, f);
-    if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
-      if(*p.arr) {
-        for(uint32_t j = 0; j < (*p.arr)->len; j++)
-          free_value(upb_array_getelementptr(*p.arr, j, f->type),
-                     f, free_submsgs);
-        free((*p.arr)->elements._void);
-        free(*p.arr);
-      }
+    if(upb_isarray(f)) {
+      assert(*p.arr);
+      for(upb_arraylen_t j = 0; j < (*p.arr)->len; j++)
+        free_value(upb_array_getelementptr(*p.arr, j, f->type), f);
+      free((*p.arr)->elements._void);
+      free(*p.arr);
    } else {
-      free_value(p, f, free_submsgs);
+      free_value(p, f);
    }
  }
  free(data);
 }

-void upb_msg_reuse_str(struct upb_string **str, uint32_t size)
-{
-  if(!*str) {
-    *str = malloc(sizeof(struct mm_upb_string));
-    memset(*str, 0, sizeof(struct mm_upb_string));
-  }
-  struct mm_upb_string *s = (void*)*str;
-  if(s->size < size) {
-    size = max(16, round_up_to_pow2(size));
-    s->data = realloc(s->data, size);
-    s->size = size;
-  }
-  s->s.ptr = s->data;
-}
+/* Parsing.  ******************************************************************/

-void upb_msg_reuse_array(struct upb_array **arr, uint32_t size, upb_field_type_t t)
+/* Helper function that returns a pointer to where the next value for field "f"
+ * should be stored, taking into account whether f is an array that may need to
+ * be allocated or resized. */
+static union upb_value_ptr get_value_ptr(void *data, struct upb_msgdef *m,
+                                         struct upb_msg_fielddef *f,
+                                         upb_msg_getarray_cb_t getarray_cb)
 {
-  if(!*arr) {
-    *arr = malloc(sizeof(struct mm_upb_array));
-    memset(*arr, 0, sizeof(struct mm_upb_array));
-  }
-  struct mm_upb_array *a = (void*)*arr;
-  if(a->size < size) {
-    size = max(4, round_up_to_pow2(size));
-    size_t type_size = upb_type_info[t].size;
-    a->a.elements._void = realloc(a->a.elements._void, size * type_size);
-    /* Zero any newly initialized memory. */
-    memset(UPB_INDEX(a->a.elements._void, a->size, type_size), 0,
-           (size - a->size) * type_size);
-    a->size = size;
+  union upb_value_ptr p = upb_msg_getptr(data, f);
+  if(upb_isarray(f)) {
+    size_t len = upb_msg_isset(data, f) ? (*p.arr)->len : 0;
+    *p.arr = getarray_cb(data, m, *p.arr, f, len + 1);
+    p = upb_array_getelementptr(*p.arr, len, f->type);
  }
+  return p;
 }

-void upb_msg_reuse_strref(struct upb_string **str) { upb_msg_reuse_str(str, 0); }
-
-void upb_msg_reuse_submsg(void **msg, struct upb_msg *m)
-{
-  if(!*msg) *msg = upb_msgdata_new(m);
-}
-
-/* Parsing.  ******************************************************************/
+/* Callbacks for the stream parser. */

 static upb_field_type_t tag_cb(void *udata, struct upb_tag *tag,
                               void **user_field_desc)
 {
-  struct upb_msg_parse_state *s = udata;
-  struct upb_msg_field *f = upb_msg_fieldbynum(s->top->m, tag->field_number);
+  struct upb_msg_parser *mp = udata;
+  struct upb_msg_fielddef *f = upb_msg_fieldbynum(mp->top->m, tag->field_number);
  if(!f || !upb_check_type(tag->wire_type, f->type))
    return 0;  /* Skip unknown or fields of the wrong type. */
  *user_field_desc = f;
  return f->type;
 }

-/* Returns a pointer to where the next value for field "f" should be stored,
- * taking into account whether f is an array that may need to be reallocatd. */
-static union upb_value_ptr get_value_ptr(void *data, struct upb_msg_field *f)
-{
-  union upb_value_ptr p = upb_msg_getptr(data, f);
-  if(upb_isarray(f)) {
-    size_t len = upb_msg_isset(data, f) ? (*p.arr)->len : 0;
-    upb_msg_reuse_array(p.arr, len+1, f->type);
-    (*p.arr)->len = len + 1;
-    assert(p._void);
-    p = upb_array_getelementptr(*p.arr, len, f->type);
-    assert(p._void);
-  }
-  assert(p._void);
-  return p;
-}
-
 static upb_status_t value_cb(void *udata, uint8_t *buf, uint8_t *end,
                             void *user_field_desc, uint8_t **outbuf)
 {
-  struct upb_msg_parse_state *s = udata;
-  struct upb_msg_field *f = user_field_desc;
-  union upb_value_ptr p = get_value_ptr(s->top->data, f);
-  upb_msg_set(s->top->data, f);
+  struct upb_msg_parser *mp = udata;
+  struct upb_msg_fielddef *f = user_field_desc;
+  void *msg = mp->top->msg;
+  union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb);
+  upb_msg_set(msg, f);
  UPB_CHECK(upb_parse_value(buf, end, f->type, p, outbuf));
-  //google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m);
-  //upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, f->type), stdout);
  return UPB_STATUS_OK;
 }

@ -296,60 +268,53 @@ static void str_cb(void *udata, uint8_t *str,
                   size_t avail_len, size_t total_len,
                   void *udesc)
 {
-  struct upb_msg_parse_state *s = udata;
-  struct upb_msg_field *f = udesc;
-  union upb_value_ptr p = get_value_ptr(s->top->data, f);
-  upb_msg_set(s->top->data, f);
+  struct upb_msg_parser *mp = udata;
+  struct upb_msg_fielddef *f = udesc;
+  void *msg = mp->top->msg;
+  union upb_value_ptr p = get_value_ptr(msg, mp->top->m, f, mp->getarray_cb);
+  upb_msg_set(msg, f);
  if(avail_len != total_len) abort();  /* TODO: support streaming. */
-  if(s->byref) {
-    upb_msg_reuse_strref(p.str);
+  if(avail_len == total_len && mp->byref) {
+    *p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, 0);
    (*p.str)->ptr = (char*)str;
    (*p.str)->byte_len = avail_len;
  } else {
-    upb_msg_reuse_str(p.str, avail_len);
+    *p.str = mp->getstring_cb(msg, mp->top->m, *p.str, f, total_len);
    memcpy((*p.str)->ptr, str, avail_len);
    (*p.str)->byte_len = avail_len;
  }
-  //google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, s->top->m);
-  //upb_text_printfield(&s->p, *fd->name, f->type, upb_deref(p, fd->type), stdout);
 }

 static void submsg_start_cb(void *udata, void *user_field_desc)
 {
-  struct upb_msg_parse_state *s = udata;
-  struct upb_msg_field *f = user_field_desc;
-  struct upb_msg *m = f->ref.msg;
-  void *data = s->top->data;  /* The message from the existing frame. */
-  union upb_value_ptr p = get_value_ptr(data, f);
-  upb_msg_reuse_submsg(p.msg, m);
-  if(!upb_msg_isset(data, f) || !s->merge)
-    upb_msg_clear(*p.msg, m);
-  upb_msg_set(data, f);
-  s->top++;
-  s->top->m = m;
-  s->top->data = *p.msg;
-  //upb_text_push(&s->p, *s->top->m->descriptor->name, stdout);
+  struct upb_msg_parser *mp = udata;
+  struct upb_msg_fielddef *f = user_field_desc;
+  struct upb_msgdef *oldmsgdef = mp->top->m;
+  void *oldmsg = mp->top->msg;
+  union upb_value_ptr p = get_value_ptr(oldmsg, oldmsgdef, f, mp->getarray_cb);
+  upb_msg_set(oldmsg, f);
+  *p.msg = mp->getmsg_cb(oldmsg, oldmsgdef, *p.msg, f);
+  mp->top++;
+  mp->top->m = f->ref.msg;
+  mp->top->msg = *p.msg;
 }

 static void submsg_end_cb(void *udata)
 {
-  struct upb_msg_parse_state *s = udata;
-  s->top--;
-  //upb_text_pop(&s->p, stdout);
+  struct upb_msg_parser *mp = udata;
+  mp->top--;
 }

-void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
-                         struct upb_msg *m, bool merge, bool byref)
+/* Externally-visible functions for the msg parser. */
+
+void upb_msg_parser_reset(struct upb_msg_parser *s, void *msg,
+                          struct upb_msgdef *m, bool byref)
 {
-  upb_parse_reset(&s->s, s);
-  upb_text_printer_init(&s->p, false);
-  s->merge = merge;
+  upb_stream_parser_reset(&s->s, s);
  s->byref = byref;
-  if(!merge && msg == NULL) msg = upb_msgdata_new(m);
-  upb_msg_clear(msg, m);
  s->top = s->stack;
  s->top->m = m;
-  s->top->data = msg;
+  s->top->msg = msg;
  s->s.tag_cb = tag_cb;
  s->s.value_cb = value_cb;
  s->s.str_cb = str_cb;
@ -357,38 +322,10 @@ void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *msg,
  s->s.submsg_end_cb = submsg_end_cb;
 }

-void upb_msg_parse_init(struct upb_msg_parse_state *s, void *msg,
-                        struct upb_msg *m, bool merge, bool byref)
-{
-  upb_parse_init(&s->s, s);
-  upb_msg_parse_reset(s, msg, m, merge, byref);
-}
-
-void upb_msg_parse_free(struct upb_msg_parse_state *s)
-{
-  upb_parse_free(&s->s);
-}
-
-upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
-                           void *data, size_t len, size_t *read)
+upb_status_t upb_msg_parser_parse(struct upb_msg_parser *s,
+                                  void *data, size_t len, size_t *read)
 {
-  return upb_parse(&s->s, data, len, read);
-}
-
-void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *str, bool byref)
-{
-  struct upb_msg_parse_state s;
-  void *msg = upb_msgdata_new(m);
-  upb_msg_parse_init(&s, msg, m, false, byref);
-  size_t read;
-  upb_status_t status = upb_msg_parse(&s, str->ptr, str->byte_len, &read);
-  upb_msg_parse_free(&s);
-  if(status == UPB_STATUS_OK && read == str->byte_len) {
-    return msg;
-  } else {
-    upb_msg_free(msg);
-    return NULL;
-  }
+  return upb_stream_parser_parse(&s->s, data, len, read);
 }

 /* Serialization.  ************************************************************/
@ -405,12 +342,12 @@ struct upb_msgsizes {

 /* Declared below -- this and get_valuesize are mutually recursive. */
 static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
-                          struct upb_msg *m);
+                          struct upb_msgdef *m);

 /* Returns a size of a value as it will be serialized.  Does *not* include
 * the size of the tag -- that is already accounted for. */
 static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
-                            struct upb_msg_field *f,
+                            struct upb_msg_fielddef *f,
                            google_protobuf_FieldDescriptorProto *fd)
 {
  switch(f->type) {
@ -448,12 +385,12 @@ static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
 * message.  However it also stores the results of each level of the recursion
 * in sizes, because we need all of this intermediate information later. */
 static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
-                          struct upb_msg *m)
+                          struct upb_msgdef *m)
 {
  size_t size = 0;
  /* We iterate over fields and arrays in reverse order. */
  for(int32_t i = m->num_fields - 1; i >= 0; i--) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
    if(!upb_msg_isset(data, f)) continue;
    union upb_value_ptr p = upb_msg_getptr(data, f);
@ -480,7 +417,7 @@ static size_t get_msgsize(struct upb_msgsizes *sizes, void *data,
  return size;
 }

-void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msg *m)
+void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data, struct upb_msgdef *m)
 {
  get_msgsize(sizes, data, m);
 }
@ -507,7 +444,7 @@ struct upb_msg_serialize_state {
  struct {
    int field_iter;
    int elem_iter;
-    struct upb_msg *m;
+    struct upb_msgdef *m;
    void *msg;
  } stack[UPB_MAX_NESTING], *top, *limit;
 };
@ -523,7 +460,7 @@ void upb_msg_serialize_free(struct upb_msg_serialize_state *s)
 }

 void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
-                            struct upb_msg *m, struct upb_msgsizes *sizes)
+                            struct upb_msgdef *m, struct upb_msgsizes *sizes)
 {
  (void)s;
  (void)data;
@ -532,7 +469,7 @@ void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
 }

 static upb_status_t serialize_tag(uint8_t *buf, uint8_t *end,
-                                  struct upb_msg_field *f, uint8_t **outptr)
+                                  struct upb_msg_fielddef *f, uint8_t **outptr)
 {
  /* TODO: need to have the field number also. */
  UPB_CHECK(upb_put_UINT32(buf, end, f->type, outptr));
@ -554,10 +491,10 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
  int i = s->top->field_iter;
  //int j = s->top->elem_iter;
  void *msg = s->top->msg;
-  struct upb_msg *m = s->top->m;
+  struct upb_msgdef *m = s->top->m;

  while(buf < end) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    union upb_value_ptr p = upb_msg_getptr(msg, f);
    serialize_tag(buf, end, f, &buf);
    if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
@ -571,6 +508,7 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,
  return UPB_STATUS_OK;
 }

+
 /* Comparison.  ***************************************************************/

 bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
@ -607,7 +545,7 @@ bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
 }

 bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
-                   struct upb_msg_field *f, bool recursive)
+                   struct upb_msg_fielddef *f, bool recursive)
 {
  if(arr1->len != arr2->len) return false;
  if(upb_issubmsg(f)) {
@ -628,7 +566,7 @@ bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
  return true;
 }

-bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive)
+bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive)
 {
  /* Must have the same fields set.  TODO: is this wrong?  Should we also
   * consider absent defaults equal to explicitly set defaults? */
@ -640,20 +578,66 @@ bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive)
   * padding) and memcmp the masked messages. */

  for(uint32_t i = 0; i < m->num_fields; i++) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    if(!upb_msg_isset(data1, f)) continue;
    union upb_value_ptr p1 = upb_msg_getptr(data1, f);
    union upb_value_ptr p2 = upb_msg_getptr(data2, f);
-    if(f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED) {
+    if(upb_isarray(f)) {
      if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false;
-    } else {
-      if(upb_issubmsg(f)) {
-        if(recursive && !upb_msg_eql(p1.msg, p2.msg, f->ref.msg, recursive))
-          return false;
-      } else if(!upb_value_eql(p1, p2, f->type)) {
+    } else if(upb_issubmsg(f)) {
+      if(recursive && !upb_msg_eql(p1.msg, p2.msg, f->ref.msg, recursive))
        return false;
-      }
+    } else if(!upb_value_eql(p1, p2, f->type)) {
+      return false;
    }
  }
  return true;
 }
+
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+                     struct upb_msg_fielddef *f,
+                     google_protobuf_FieldDescriptorProto *fd,
+                     FILE *stream);
+
+static void printmsg(struct upb_text_printer *printer, void *msg,
+                     struct upb_msgdef *m, FILE *stream)
+{
+  for(uint32_t i = 0; i < m->num_fields; i++) {
+    struct upb_msg_fielddef *f = &m->fields[i];
+    google_protobuf_FieldDescriptorProto *fd = upb_msg_field_descriptor(f, m);
+    if(!upb_msg_isset(msg, f)) continue;
+    union upb_value_ptr p = upb_msg_getptr(msg, f);
+    if(upb_isarray(f)) {
+      struct upb_array *arr = *p.arr;
+      for(uint32_t j = 0; j < arr->len; j++) {
+        union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
+        printval(printer, elem_p, f, fd, stream);
+      }
+    } else {
+      printval(printer, p, f, fd, stream);
+    }
+  }
+}
+
+static void printval(struct upb_text_printer *printer, union upb_value_ptr p,
+                     struct upb_msg_fielddef *f,
+                     google_protobuf_FieldDescriptorProto *fd,
+                     FILE *stream)
+{
+  if(upb_issubmsg(f)) {
+    upb_text_push(printer, *fd->name, stream);
+    printmsg(printer, *p.msg, f->ref.msg, stream);
+    upb_text_pop(printer, stream);
+  } else {
+    upb_text_printfield(printer, *fd->name, f->type, upb_deref(p, f->type), stream);
+  }
+}
+
+void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line,
+                   FILE *stream)
+{
+  struct upb_text_printer printer;
+  upb_text_printer_init(&printer, single_line);
+  printmsg(&printer, data, m, stream);
+}
--- a/src/upb_msg.h
+++ b/src/upb_msg.h
@ -3,93 +3,48 @@
 *
 * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
 *
- * A upb_msg provides a full description of a message as defined in a .proto
- * file.  It supports many features and operations for dealing with proto
+ * A upb_msgdef provides a full description of a message type as defined in a
+ * .proto file.  Using a upb_msgdef, it is possible to treat an arbitrary hunk
+ * of memory (a void*) as a protobuf of the given type.  We will call this
+ * void* a upb_msg in the context of this interface.
+ *
+ * Clients generally do not construct or destruct upb_msgdef objects directly.
+ * They are managed by upb_contexts, and clients can obtain upb_msgdef pointers
+ * directly from a upb_context.
+ *
+ * A upb_msg is READ-ONLY, and the upb_msgdef functions in this file provide
+ * read-only access.  For a mutable message, or for a message that you can take
+ * a reference to to prevents its destruction, see upb_mm_msg.h, which is a
+ * layer on top of upb_msg that adds memory management semantics.
+ *
+ * upb_msgdef supports many features and operations for dealing with proto
 * messages:
 * - reflection over .proto types at runtime (list fields, get names, etc).
 * - an in-memory byte-level format for efficiently storing and accessing msgs.
- * - serializing and deserializing from the in-memory format to a protobuf.
- * - optional memory management for handling strings, arrays, and submessages.
- *
- * Throughout this file, the following convention is used:
- * - "struct upb_msg *m" describes a message type (name, list of fields, etc).
- * - "void *data" is an actual message stored using the in-memory format.
+ * - serializing from the in-memory format to a protobuf.
+ * - parsing from a protobuf to an in-memory data structure (you either
+ *   supply callbacks for allocating/repurposing memory or use a simplified
+ *   version that parses into newly-allocated memory).
 *
 * The in-memory format is very much like a C struct that you can define at
 * run-time, but also supports reflection.  Like C structs it supports
 * offset-based access, as opposed to the much slower name-based lookup.  The
 * format stores both the values themselves and bits describing whether each
- * field is set or not.  For example:
- *
- * parsed message Foo {
- *   optional bool a = 1;
- *   repeated uint32 b = 2;
- *   optional Bar c = 3;
- * }
+ * field is set or not.
 *
- * The in-memory layout for this message on a 32-bit machine will be something
- * like:
- *
- *  Foo
- * +------------------------+
- * | set_flags a:1, b:1, c:1|
- * +------------------------+
- * | bool a (1 byte)        |
- * +------------------------+
- * | padding (3 bytes)      |
- * +------------------------+         upb_array
- * | upb_array* b (4 bytes) | ---->  +----------------------------+
- * +------------------------+        | uint32* elements (4 bytes) | ---+
- * | Bar* c (4 bytes)       |        +----------------------------+    |
- * +------------------------+        | uint32 size (4 bytes)      |    |
- *                                   +----------------------------+    |
- *                                                                     |
- *    -----------------------------------------------------------------+
- *    |
- *    V
- *  uint32 array
- * +----+----+----+----+----+----+
- * | e1 | e2 | e3 | e4 | e5 | e6 |
- * +----+----+----+----+----+----+
- *
- * And the corresponding C structure (as emitted by the proto compiler) would be:
- *
- * struct Foo {
- *   union {
- *     uint8_t bytes[1];
- *     struct {
- *       bool a:1;
- *       bool b:1;
- *       bool c:1;
- *     } has;
- *   } set_flags;
- *   bool a;
- *   upb_uint32_array *b;
- *   Bar *c;
- * }
+ * For a more in-depth description of the in-memory format, see:
+ *   http://wiki.github.com/haberman/upb/inmemoryformat
 *
 * Because the C struct emitted by the upb compiler uses exactly the same
 * byte-level format as the reflection interface, you can access the same hunk
 * of memory either way.  The C struct provides maximum performance and static
- * type safety; upb_msg provides flexibility.
+ * type safety; upb_msg_def provides flexibility.
 *
 * The in-memory format has no interoperability guarantees whatsoever, except
 * that a single version of upb will interoperate with itself.  Don't even
 * think about persisting the in-memory format or sending it anywhere.  That's
 * what serialized protobufs are for!  The in-memory format is just that -- an
 * in-memory representation that allows for fast access.
- *
- * The in-memory format is carefully designed to *not* mandate any particular
- * memory management scheme.  This should make it easier to integrate with
- * existing memory management schemes, or to perform advanced techniques like
- * reference counting, garbage collection, and string references.  Different
- * clients can read each others messages regardless of what memory management
- * scheme each is using.
- *
- * A memory management scheme is provided for convenience, and it is used by
- * default by the stock message parser.  Clients can substitute their own
- * memory management scheme into this parser without any loss of generality
- * or performance.
 */

 #ifndef UPB_MSG_H_
@ -108,21 +63,9 @@ extern "C" {

 /* Message definition. ********************************************************/

-/* Structure that describes a single field in a message.  This structure is very
- * consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
- * because copies of this struct are in the hash table that is read in the
- * critical path of parsing.  Minimizing the size of this struct increases
- * cache-friendliness. */
-struct upb_msg_field {
-  union upb_symbol_ref ref;
-  uint32_t byte_offset;     /* Where to find the data. */
-  uint16_t field_index;     /* Indexes upb_msg.fields. Also indicates set bit */
-  upb_field_type_t type;    /* Copied from descriptor for cache-friendliness. */
-  upb_label_t label;
-};
-
+struct upb_msg_fielddef;
 /* Structure that describes a single .proto message type. */
-struct upb_msg {
+struct upb_msgdef {
  struct google_protobuf_DescriptorProto *descriptor;
  struct upb_string fqname;      /* Fully qualified. */
  size_t size;
@ -131,93 +74,65 @@ struct upb_msg {
  uint32_t num_required_fields;  /* Required fields have the lowest set bytemasks. */
  struct upb_inttable fields_by_num;
  struct upb_strtable fields_by_name;
-  struct upb_msg_field *fields;
+  struct upb_msg_fielddef *fields;
  struct google_protobuf_FieldDescriptorProto **field_descriptors;
 };

-/* The num->field and name->field maps in upb_msg allow fast lookup of fields
- * by number or name.  These lookups are in the critical path of parsing and
- * field lookup, so they must be as fast as possible.  To make these more
- * cache-friendly, we put the data in the table by value. */

-struct upb_fieldsbynum_entry {
-  struct upb_inttable_entry e;
-  struct upb_msg_field f;
+/* Structure that describes a single field in a message.  This structure is very
+ * consciously designed to fit into 12/16 bytes (32/64 bit, respectively),
+ * because copies of this struct are in the hash table that is read in the
+ * critical path of parsing.  Minimizing the size of this struct increases
+ * cache-friendliness. */
+struct upb_msg_fielddef {
+  union upb_symbol_ref ref;
+  uint32_t byte_offset;     /* Where to find the data. */
+  uint16_t field_index;     /* Indexes upb_msgdef.fields and indicates set bit */
+  upb_field_type_t type;    /* Copied from descriptor for cache-friendliness. */
+  upb_label_t label;
 };

-struct upb_fieldsbyname_entry {
-  struct upb_strtable_entry e;
-  struct upb_msg_field f;
-};
+INLINE bool upb_issubmsg(struct upb_msg_fielddef *f) {
+  return upb_issubmsgtype(f->type);
+}
+INLINE bool upb_isstring(struct upb_msg_fielddef *f) {
+  return upb_isstringtype(f->type);
+}
+INLINE bool upb_isarray(struct upb_msg_fielddef *f) {
+  return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
+}

-/* Can be used to retrieve a field descriptor given the upb_msg_field ref. */
+/* Can be used to retrieve a field descriptor given the upb_msg_fielddef. */
 INLINE struct google_protobuf_FieldDescriptorProto *upb_msg_field_descriptor(
-    struct upb_msg_field *f, struct upb_msg *m) {
+    struct upb_msg_fielddef *f, struct upb_msgdef *m) {
  return m->field_descriptors[f->field_index];
 }

-/* Initializes/frees a upb_msg.  Usually this will be called by upb_context, and
- * clients will not have to construct one directly.
- *
- * Caller retains ownership of d, but the msg will contain references to it, so
- * it must outlive the msg.  Note that init does not resolve upb_msg_field.ref
- * the caller should do that post-initialization by calling upb_msg_ref()
- * below.
- *
- * fqname indicates the fully-qualified name of this message.  Ownership of
- * fqname passes to the msg, but the msg will contain references to it, so it
- * must outlive the msg.
- *
- * sort indicates whether or not it is safe to reorder the fields from the order
- * they appear in d.  This should be false if code has been compiled against a
- * header for this type that expects the given order. */
-bool upb_msg_init(struct upb_msg *m, struct google_protobuf_DescriptorProto *d,
-                  struct upb_string fqname, bool sort);
-void upb_msg_free(struct upb_msg *m);
-
-/* Sort the given field descriptors in-place, according to what we think is an
- * optimal ordering of fields.  This can change from upb release to upb release.
- * This is meant for internal use. */
-void upb_msg_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
+/* Field access. **************************************************************/

-/* Clients use this function on a previously initialized upb_msg to resolve the
- * "ref" field in the upb_msg_field.  Since messages can refer to each other in
- * mutually-recursive ways, this step must be separated from initialization. */
-void upb_msg_ref(struct upb_msg *m, struct upb_msg_field *f, union upb_symbol_ref ref);
+/* Note that these only provide access to fields that are directly in the msg
+ * itself.  For dynamic fields (strings, arrays, and submessages) it will be
+ * necessary to dereference the returned values. */

-/* Looks up a field by name or number.  While these are written to be as fast
- * as possible, it will still be faster to cache the results of this lookup if
- * possible.  These return NULL if no such field is found. */
-INLINE struct upb_msg_field *upb_msg_fieldbynum(struct upb_msg *m,
-                                                uint32_t number) {
-  struct upb_fieldsbynum_entry *e =
-      (struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
-          &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
-  return e ? &e->f : NULL;
-}
-INLINE struct upb_msg_field *upb_msg_fieldbyname(struct upb_msg *m,
-                                                 struct upb_string *name) {
-  struct upb_fieldsbyname_entry *e =
-      (struct upb_fieldsbyname_entry*)upb_strtable_lookup(
-          &m->fields_by_name, name);
-  return e ? &e->f : NULL;
+/* Returns a pointer to a specific field in a message. */
+INLINE union upb_value_ptr upb_msg_getptr(void *msg, struct upb_msg_fielddef *f) {
+  union upb_value_ptr p;
+  p._void = ((char*)msg + f->byte_offset);
+  return p;
 }

-INLINE bool upb_issubmsg(struct upb_msg_field *f) {
-  return upb_issubmsgtype(f->type);
-}
-INLINE bool upb_isstring(struct upb_msg_field *f) {
-  return upb_isstringtype(f->type);
-}
-INLINE bool upb_isarray(struct upb_msg_field *f) {
-  return f->label == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED;
+/* Returns a a specific field in a message. */
+INLINE union upb_value upb_msg_get(void *msg, struct upb_msg_fielddef *f) {
+  return upb_deref(upb_msg_getptr(msg, f), f->type);
 }

 /* "Set" flag reading and writing.  *******************************************/

-/* Please note that these functions do not perform any memory management or in
- * any way ensure that the fields are valid.  They *only* test/set/clear a bit
- * that indicates whether the field is set or not. */
+/* All upb code and code using upb should guarantee that the set flags are
+ * always valid.  It should always be the case that if a flag's field is set
+ * for a dynamic field that the pointer is valid.
+ *
+ * Clients should never set fields on a plain upb_msg, only on a upb_mm_msg. */

 /* Returns the byte offset where we store whether this field is set. */
 INLINE size_t upb_isset_offset(uint32_t field_index) {
@ -230,135 +145,162 @@ INLINE uint8_t upb_isset_mask(uint32_t field_index) {
 }

 /* Returns true if the given field is set, false otherwise. */
-INLINE void upb_msg_set(void *s, struct upb_msg_field *f)
+INLINE void upb_msg_set(void *msg, struct upb_msg_fielddef *f)
 {
-  ((char*)s)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
+  ((char*)msg)[upb_isset_offset(f->field_index)] |= upb_isset_mask(f->field_index);
 }

 /* Clears the set bit for this field in the given message. */
-INLINE void upb_msg_unset(void *s, struct upb_msg_field *f)
+INLINE void upb_msg_unset(void *msg, struct upb_msg_fielddef *f)
 {
-  ((char*)s)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
+  ((char*)msg)[upb_isset_offset(f->field_index)] &= ~upb_isset_mask(f->field_index);
 }

 /* Tests whether the given field is set. */
-INLINE bool upb_msg_isset(void *s, struct upb_msg_field *f)
+INLINE bool upb_msg_isset(void *msg, struct upb_msg_fielddef *f)
 {
-  return ((char*)s)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
+  return ((char*)msg)[upb_isset_offset(f->field_index)] & upb_isset_mask(f->field_index);
 }

 /* Returns true if *all* required fields are set, false otherwise. */
-INLINE bool upb_msg_all_required_fields_set(void *s, struct upb_msg *m)
+INLINE bool upb_msg_all_required_fields_set(void *msg, struct upb_msgdef *m)
 {
  int num_fields = m->num_required_fields;
  int i = 0;
  while(num_fields > 8) {
-    if(((uint8_t*)s)[i++] != 0xFF) return false;
+    if(((uint8_t*)msg)[i++] != 0xFF) return false;
    num_fields -= 8;
  }
-  if(((uint8_t*)s)[i] != (1 << num_fields) - 1) return false;
+  if(((uint8_t*)msg)[i] != (1 << num_fields) - 1) return false;
  return true;
 }

 /* Clears the set bit for all fields. */
-INLINE void upb_msg_clear(void *s, struct upb_msg *m)
+INLINE void upb_msg_clear(void *msg, struct upb_msgdef *m)
 {
-  memset(s, 0, m->set_flags_bytes);
+  memset(msg, 0, m->set_flags_bytes);
 }

-/* Scalar (non-array) data access. ********************************************/
+/* Number->field and name->field lookup.  *************************************/

-/* Returns a pointer to a specific field in a message. */
-INLINE union upb_value_ptr upb_msg_getptr(void *data, struct upb_msg_field *f) {
-  union upb_value_ptr p;
-  p._void = ((char*)data + f->byte_offset);
-  return p;
-}
+/* The num->field and name->field maps in upb_msgdef allow fast lookup of fields
+ * by number or name.  These lookups are in the critical path of parsing and
+ * field lookup, so they must be as fast as possible.  To make these more
+ * cache-friendly, we put the data in the table by value. */

-/* Returns a a specific field in a message. */
-INLINE union upb_value upb_msg_get(void *data, struct upb_msg_field *f) {
-  return upb_deref(upb_msg_getptr(data, f), f->type);
-}
+struct upb_fieldsbynum_entry {
+  struct upb_inttable_entry e;
+  struct upb_msg_fielddef f;
+};

-/* Memory management  *********************************************************/
+struct upb_fieldsbyname_entry {
+  struct upb_strtable_entry e;
+  struct upb_msg_fielddef f;
+};

-/* One important note about these memory management routines: they must be used
- * completely or not at all (for each message).  In other words, you can't
- * allocate your own message and then free it with upb_msgdata_free.  As
- * another example, you can't point a field to your own string and then call
- * upb_msg_reuse_str. */
+/* Looks up a field by name or number.  While these are written to be as fast
+ * as possible, it will still be faster to cache the results of this lookup if
+ * possible.  These return NULL if no such field is found. */
+INLINE struct upb_msg_fielddef *upb_msg_fieldbynum(struct upb_msgdef *m,
+                                                   uint32_t number) {
+  struct upb_fieldsbynum_entry *e =
+      (struct upb_fieldsbynum_entry*)upb_inttable_fast_lookup(
+          &m->fields_by_num, number, sizeof(struct upb_fieldsbynum_entry));
+  return e ? &e->f : NULL;
+}
+
+INLINE struct upb_msg_fielddef *upb_msg_fieldbyname(struct upb_msgdef *m,
+                                                    struct upb_string *name) {
+  struct upb_fieldsbyname_entry *e =
+      (struct upb_fieldsbyname_entry*)upb_strtable_lookup(
+          &m->fields_by_name, name);
+  return e ? &e->f : NULL;
+}

-/* Allocates and frees message data, respectively.  Newly allocated data is
- * initialized to empty.  Freeing a message always frees string data, but
- * the client can decide whether or not submessages should be deleted. */
-void *upb_msgdata_new(struct upb_msg *m);
-void upb_msgdata_free(void *data, struct upb_msg *m, bool free_submsgs);

-/* Given a pointer to the appropriate field of the message or array, these
- * functions will lazily allocate memory for a string, array, or submessage.
- * If the previously allocated memory is big enough, it will reuse it without
- * re-allocating.  See upb_msg.c for example usage. */
+/* Simple, one-shot parsing ***************************************************/

-/* Reuse a string of at least the given size. */
-void upb_msg_reuse_str(struct upb_string **str, uint32_t size);
-/* Like the previous, but assumes that the string will be by reference, so
- * doesn't allocate memory for the string itself. */
-void upb_msg_reuse_strref(struct upb_string **str);
+/* A simple interface for parsing into a newly-allocated message.  This
+ * interface should only be used when the message will be read-only with
+ * respect to memory management (eg. won't add or remove internal references to
+ * dynamic memory).  For more flexible (but also more complicated) interfaces,
+ * see below and in upb_mm_msg.h. */

-/* Reuse an array of at least the given size, with the given type. */
-void upb_msg_reuse_array(struct upb_array **arr, uint32_t size,
-                         upb_field_type_t t);
+/* Parses the protobuf in s (which is expected to be complete) and allocates
+ * new message data to hold it.  If byref is set, strings in the returned
+ * upb_msg will reference s instead of copying from it, but this requires that
+ * s will live for as long as the returned message does. */
+void *upb_msg_parsenew(struct upb_msgdef *m, struct upb_string *s);

-/* Reuse a submessage of the given type. */
-void upb_msg_reuse_submsg(void **msg, struct upb_msg *m);
+/* This function should be used to free messages that were parsed with
+ * upb_msg_parsenew.  It will free the message appropriately (including all
+ * submessages). */
+void upb_msg_free(void *msg, struct upb_msgdef *m);

-/* Parsing.  ******************************************************************/

-/* This is all just a layer on top of the stream-oriented facility in
- * upb_parse.h. */
+/* Parsing with (re)allocation callbacks. *************************************/

-struct upb_msg_parse_frame {
-  struct upb_msg *m;
-  void *data;
+/* This interface parses protocol buffers into upb_msgs, but allows the client
+ * to supply allocation callbacks whenever the parser needs to obtain a string,
+ * array, or submsg (a "dynamic field").  If the parser sees that a dynamic
+ * field is already present (its "set bit" is set) it will use that, otherwise
+ * it will call the allocation callback to obtain one.
+ *
+ * This may seem trivial (since nearly all clients will use malloc and free for
+ * memory management), but the allocation callback can be used for more than
+ * just allocation.  If we are parsing data into an existing upb_msg, the
+ * allocation callback can examine any existing memory that is allocated for
+ * the dynamic field and determine whether it can reuse it.  It can also
+ * perform memory management like unrefing the existing field or refing the new.
+ *
+ * This parser is layered on top of the event-based parser in upb_parse.h.  The
+ * parser is upb_mm_msg.h is layered on top of this parser.
+ *
+ * This parser is fully streaming-capable. */
+
+typedef struct upb_array *(*upb_msg_getarray_cb_t)(
+    void *msg, struct upb_msgdef *m,
+    struct upb_array *existingval, struct upb_msg_fielddef *f,
+    upb_arraylen_t size);
+
+/* Callback to allocate a string of size >=len.  If len==0 then the client can
+ * assume that the parser intends to reference the memory instead of copying
+ * it. */
+typedef struct upb_string *(*upb_msg_getstring_cb_t)(
+    void *msg, struct upb_msgdef *m,
+    struct upb_string *existingval, struct upb_msg_fielddef *f, size_t len);
+
+typedef void *(*upb_msg_getmsg_cb_t)(
+    void *msg, struct upb_msgdef *m,
+    void *existingval, struct upb_msg_fielddef *f);
+
+struct upb_msg_parser_frame {
+  struct upb_msgdef *m;
+  void *msg;
 };

-#include "upb_text.h"
-struct upb_msg_parse_state {
-  struct upb_parse_state s;
+struct upb_msg_parser {
+  struct upb_stream_parser s;
  bool merge;
  bool byref;
  struct upb_msg *m;
-  struct upb_msg_parse_frame stack[UPB_MAX_NESTING], *top;
-  struct upb_text_printer p;
+  struct upb_msg_parser_frame stack[UPB_MAX_NESTING], *top;
+  upb_msg_getarray_cb_t getarray_cb;
+  upb_msg_getstring_cb_t getstring_cb;
+  upb_msg_getmsg_cb_t getmsg_cb;
 };

-/* Initializes/frees a message parser.  The parser will write the data to the
- * message data "data", which the caller must have previously allocated (the
- * parser will allocate submsgs, strings, and arrays as needed, however).
- *
- * "Merge" controls whether the parser will append to data instead of
- * overwriting.  Merging concatenates arrays and merges submessages instead
- * of clearing both.
- *
- * "Byref" controls whether the new message data copies or references strings
- * it encounters.  If byref == true, then all strings supplied to upb_msg_parse
- * must remain unchanged and must outlive data. */
-void upb_msg_parse_init(struct upb_msg_parse_state *s, void *data,
-                        struct upb_msg *m, bool merge, bool byref);
-void upb_msg_parse_reset(struct upb_msg_parse_state *s, void *data,
-                         struct upb_msg *m, bool merge, bool byref);
-void upb_msg_parse_free(struct upb_msg_parse_state *s);
-
-/* Parses a protobuf fragment, writing the data to the message that was passed
- * to upb_msg_parse_init.  This function can be called multiple times as more
- * data becomes available. */
-upb_status_t upb_msg_parse(struct upb_msg_parse_state *s,
-                           void *data, size_t len, size_t *read);
+void upb_msg_parser_reset(struct upb_msg_parser *p,
+                          void *msg, struct upb_msgdef *m,
+                          bool byref);
+
+/* Parses protocol buffer data out of data which has length of len.  The data
+ * need not be a complete protocol buffer.  The number of bytes parsed is
+ * returned in *read, and the next call to upb_msg_parse must supply data that
+ * is *read bytes past data in the logical stream. */
+upb_status_t upb_msg_parser_parse(struct upb_msg_parser *p,
+                                  void *data, size_t len, size_t *read);

-/* Parses the protobuf in s (which is expected to be complete) and allocates
- * new message data to hold it.  This is an alternative to the streaming API
- * above.  "byref" works as in upb_msg_parse_init(). */
-void *upb_alloc_and_parse(struct upb_msg *m, struct upb_string *s, bool byref);

 /* Serialization  *************************************************************/

@ -377,8 +319,8 @@ void upb_msgsizes_free(struct upb_msgsizes *sizes);

 /* Given a previously initialized sizes, recurse over the message and store its
 * sizes in 'sizes'. */
-void upb_msgsizes_read(struct upb_msgsizes *sizes, void *data,
-                       struct upb_msg *m);
+void upb_msgsizes_read(struct upb_msgsizes *sizes, void *msg,
+                       struct upb_msgdef *m);

 /* Returns the total size of the serialized message given in sizes.  Must be
 * preceeded by a call to upb_msgsizes_read. */
@ -391,8 +333,8 @@ struct upb_msg_serialize_state;
 * "sizes" and the parse being fully completed. */
 void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s);
 void upb_msg_serialize_free(struct upb_msg_serialize_state *s);
-void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *data,
-                            struct upb_msg *m, struct upb_msgsizes *sizes);
+void upb_msg_serialize_init(struct upb_msg_serialize_state *s, void *msg,
+                            struct upb_msgdef *m, struct upb_msgsizes *sizes);

 /* Serializes the next set of bytes into buf (which has size len).  Returns
 * UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
@ -405,8 +347,43 @@ upb_status_t upb_msg_serialize(struct upb_msg_serialize_state *s,

 /* Text dump  *****************************************************************/

-bool upb_msg_eql(void *data1, void *data2, struct upb_msg *m, bool recursive);
-void upb_msg_print(void *data, struct upb_msg *m, FILE *stream);
+bool upb_msg_eql(void *data1, void *data2, struct upb_msgdef *m, bool recursive);
+void upb_msg_print(void *data, struct upb_msgdef *m, bool single_line,
+                   FILE *stream);
+
+/* Internal functions. ********************************************************/
+
+/* Initializes/frees a upb_msgdef.  Usually this will be called by upb_context,
+ * and clients will not have to construct one directly.
+ *
+ * Caller retains ownership of d, but the msg will contain references to it, so
+ * it must outlive the msg.  Note that init does not resolve
+ * upb_msg_fielddef.ref the caller should do that post-initialization by
+ * calling upb_msg_ref() below.
+ *
+ * fqname indicates the fully-qualified name of this message.  Ownership of
+ * fqname passes to the msg, but the msg will contain references to it, so it
+ * must outlive the msg.
+ *
+ * sort indicates whether or not it is safe to reorder the fields from the order
+ * they appear in d.  This should be false if code has been compiled against a
+ * header for this type that expects the given order. */
+bool upb_msgdef_init(struct upb_msgdef *m,
+                     struct google_protobuf_DescriptorProto *d,
+                     struct upb_string fqname, bool sort);
+void upb_msgdef_free(struct upb_msgdef *m);
+
+/* Sort the given field descriptors in-place, according to what we think is an
+ * optimal ordering of fields.  This can change from upb release to upb
+ * release. */
+void upb_msgdef_sortfds(google_protobuf_FieldDescriptorProto **fds, size_t num);
+
+/* Clients use this function on a previously initialized upb_msgdef to resolve
+ * the "ref" field in the upb_msg_fielddef.  Since messages can refer to each
+ * other in mutually-recursive ways, this step must be separated from
+ * initialization. */
+void upb_msgdef_ref(struct upb_msgdef *m, struct upb_msg_fielddef *f,
+                    union upb_symbol_ref ref);

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@ -100,7 +100,7 @@ upb_status_t upb_parse_value(uint8_t *buf, uint8_t *end, upb_field_type_t ft,
 #undef CASE
 }

-void upb_parse_reset(struct upb_parse_state *state, void *udata)
+void upb_stream_parser_reset(struct upb_stream_parser *state, void *udata)
 {
  state->top = state->stack;
  state->limit = &state->stack[UPB_MAX_NESTING];
@ -111,18 +111,7 @@ void upb_parse_reset(struct upb_parse_state *state, void *udata)
  state->udata = udata;
 }

-void upb_parse_init(struct upb_parse_state *state, void *udata)
-{
-  memset(state, 0, sizeof(struct upb_parse_state));  /* Clear all callbacks. */
-  upb_parse_reset(state, udata);
-}
-
-void upb_parse_free(struct upb_parse_state *state)
-{
-  (void)state;
-}
-
-static void *pop_stack_frame(struct upb_parse_state *s, uint8_t *buf)
+static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
 {
  if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
  s->top--;
@ -130,7 +119,7 @@ static void *pop_stack_frame(struct upb_parse_state *s, uint8_t *buf)
 }

 /* Returns the next end offset. */
-static upb_status_t push_stack_frame(struct upb_parse_state *s,
+static upb_status_t push_stack_frame(struct upb_stream_parser *s,
                                     uint8_t *buf, uint32_t len,
                                     void *user_field_desc, uint8_t **submsg_end)
 {
@ -142,8 +131,8 @@ static upb_status_t push_stack_frame(struct upb_parse_state *s,
  return UPB_STATUS_OK;
 }

-upb_status_t upb_parse(struct upb_parse_state *s, void *_buf, size_t len,
-                       size_t *read)
+upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
+                                     void *_buf, size_t len, size_t *read)
 {
  uint8_t *buf = _buf;
  uint8_t *completed = buf;
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@ -42,14 +42,11 @@ INLINE bool upb_isstringtype(upb_field_type_t type) {
 * as data becomes available.  The parser is fully streaming-capable, so the
 * data need not all be available at the same time. */

-struct upb_parse_state;
+struct upb_stream_parser;

-/* Initialize and free (respectively) the given parse state, which must have
- * been previously allocated.  udata_size specifies how much space will be
- * available at parse_stack_frame.user_data in each frame for user data. */
-void upb_parse_init(struct upb_parse_state *state, void *udata);
-void upb_parse_reset(struct upb_parse_state *state, void *udata);
-void upb_parse_free(struct upb_parse_state *state);
+/* Resets the internal state of an already-allocated parser.   udata will be
+ * passed to callbacks as appropriate. */
+void upb_stream_parser_reset(struct upb_stream_parser *p, void *udata);

 /* The callback that is called immediately after a tag has been parsed.  The
 * client should determine whether it wants to parse or skip the corresponding
@ -86,7 +83,7 @@ typedef void (*upb_submsg_start_cb)(void *udata,
                                    void *user_field_desc);
 typedef void (*upb_submsg_end_cb)(void *udata);

-struct upb_parse_state {
+struct upb_stream_parser {
  /* For delimited submsgs, counts from the submsg len down to zero.
   * For group submsgs, counts from zero down to the negative len. */
  uint32_t stack[UPB_MAX_NESTING], *top, *limit;
@ -115,8 +112,8 @@ struct upb_parse_state {
 *
 * TODO: see if we can provide the following guarantee efficiently:
 *   *read will always be >= len. */
-upb_status_t upb_parse(struct upb_parse_state *s, void *buf, size_t len,
-                       size_t *read);
+upb_status_t upb_stream_parser_parse(struct upb_stream_parser *p,
+                                     void *buf, size_t len, size_t *read);

 extern upb_wire_type_t upb_expected_wire_types[];
 /* Returns true if wt is the correct on-the-wire type for ft. */
--- a/src/upb_table.c
+++ b/src/upb_table.c
@ -13,7 +13,7 @@
 static const upb_inttable_key_t EMPTYENT = 0;
 static const double MAX_LOAD = 0.85;

-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed);

 /* We use 1-based indexes into the table so that 0 can be "NULL". */
 static struct upb_inttable_entry *intent(struct upb_inttable *t, int32_t i) {
@ -238,7 +238,7 @@ void *upb_strtable_next(struct upb_strtable *t, struct upb_strtable_entry *cur)
 //   1. It will not work incrementally.
 //   2. It will not produce the same results on little-endian and big-endian
 //      machines.
-static uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed)
 {
  // 'm' and 'r' are mixing constants generated offline.
  // They're not really 'magic', they just happen to work well.
--- a/tools/upbc.c
+++ b/tools/upbc.c
@ -141,7 +141,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
  for(int i = 0; i < num_entries; i++) {  /* Foreach message */
    if(entries[i]->type != UPB_SYM_MESSAGE) continue;
    struct upb_symtab_entry *entry = entries[i];
-    struct upb_msg *m = entry->ref.msg;
+    struct upb_msgdef *m = entry->ref.msg;
    /* We use entry->e.key (the fully qualified name). */
    struct upb_string msg_name = upb_strdup(entry->e.key);
    to_cident(msg_name);
@ -158,7 +158,7 @@ static void write_h(struct upb_symtab_entry *entries[], int num_entries,
    fputs("    } has;\n", stream);
    fputs("  } set_flags;\n", stream);
    for(uint32_t j = 0; j < m->num_fields; j++) {
-      struct upb_msg_field *f = &m->fields[j];
+      struct upb_msg_fielddef *f = &m->fields[j];
      struct google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
      if(f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP ||
         f->type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
@ -229,7 +229,7 @@ struct strtable_entry {

 struct typetable_entry {
  struct upb_strtable_entry e;
-  struct upb_msg_field *field;
+  struct upb_msg_fielddef *field;
  struct upb_string cident;  /* Type name converted with to_cident(). */
  /* A list of all values of this type, in an established order. */
  union upb_value *values;
@ -259,11 +259,11 @@ int compare_entries(const void *_e1, const void *_e2)
 *
 * TODO: make these use a generic msg visitor. */

-static void add_strings_from_msg(void *data, struct upb_msg *m,
+static void add_strings_from_msg(void *data, struct upb_msgdef *m,
                                 struct upb_strtable *t);

 static void add_strings_from_value(union upb_value_ptr p,
-                                   struct upb_msg_field *f,
+                                   struct upb_msg_fielddef *f,
                                   struct upb_strtable *t)
 {
  if(upb_isstringtype(f->type)) {
@ -275,11 +275,11 @@ static void add_strings_from_value(union upb_value_ptr p,
  }
 }

-static void add_strings_from_msg(void *data, struct upb_msg *m,
+static void add_strings_from_msg(void *data, struct upb_msgdef *m,
                                 struct upb_strtable *t)
 {
  for(uint32_t i = 0; i < m->num_fields; i++) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    if(!upb_msg_isset(data, f)) continue;
    union upb_value_ptr p = upb_msg_getptr(data, f);
    if(upb_isarray(f)) {
@ -299,7 +299,7 @@ static void add_strings_from_msg(void *data, struct upb_msg *m,


 struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
-                                                struct upb_msg_field *f)
+                                                struct upb_msg_fielddef *f)
 {
  struct upb_string type_name = upb_issubmsg(f) ? f->ref.msg->fqname :
                                                  upb_type_info[f->type].ctype;
@ -318,7 +318,7 @@ struct typetable_entry *get_or_insert_typeentry(struct upb_strtable *t,
  return type_e;
 }

-static void add_value(union upb_value value, struct upb_msg_field *f,
+static void add_value(union upb_value value, struct upb_msg_fielddef *f,
                      struct upb_strtable *t)
 {
  struct typetable_entry *type_e = get_or_insert_typeentry(t, f);
@ -329,10 +329,10 @@ static void add_value(union upb_value value, struct upb_msg_field *f,
  type_e->values[type_e->values_len++] = value;
 }

-static void add_submsgs(void *data, struct upb_msg *m, struct upb_strtable *t)
+static void add_submsgs(void *data, struct upb_msgdef *m, struct upb_strtable *t)
 {
  for(uint32_t i = 0; i < m->num_fields; i++) {
-    struct upb_msg_field *f = &m->fields[i];
+    struct upb_msg_fielddef *f = &m->fields[i];
    if(!upb_msg_isset(data, f)) continue;
    union upb_value_ptr p = upb_msg_getptr(data, f);
    if(upb_isarray(f)) {
@ -371,7 +371,7 @@ static void add_submsgs(void *data, struct upb_msg *m, struct upb_strtable *t)

 /* write_messages_c emits a .c file that contains the data of a protobuf,
 * serialized as C structures. */
-static void write_message_c(void *data, struct upb_msg *m,
+static void write_message_c(void *data, struct upb_msgdef *m,
                            char *cident, char *hfile_name,
                            int argc, char *argv[], char *infile_name,
                            FILE *stream)
@ -441,7 +441,7 @@ static void write_message_c(void *data, struct upb_msg *m,
  upb_strtable_init(&types, 16, sizeof(struct typetable_entry));
  union upb_value val = {.msg = data};
  /* A fake field to get the recursion going. */
-  struct upb_msg_field fake_field = {
+  struct upb_msg_fielddef fake_field = {
      .type = GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE,
      .ref = {.msg = m}
  };
@ -484,12 +484,12 @@ static void write_message_c(void *data, struct upb_msg *m,
    for(int i = 0; i < e->values_len; i++) {
      union upb_value val = e->values[i];
      if(upb_issubmsg(e->field)) {
-        struct upb_msg *m = e->field->ref.msg;
+        struct upb_msgdef *m = e->field->ref.msg;
        void *msgdata = val.msg;
        /* Print set flags. */
        fputs("  {.set_flags = {.has = {\n", stream);
        for(unsigned int j = 0; j < m->num_fields; j++) {
-          struct upb_msg_field *f = &m->fields[j];
+          struct upb_msg_fielddef *f = &m->fields[j];
          google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
          fprintf(stream, "    ." UPB_STRFMT " = ", UPB_STRARG(*fd->name));
          if(upb_msg_isset(msgdata, f))
@ -501,7 +501,7 @@ static void write_message_c(void *data, struct upb_msg *m,
        fputs("  }},\n", stream);
        /* Print msg data. */
        for(unsigned int j = 0; j < m->num_fields; j++) {
-          struct upb_msg_field *f = &m->fields[j];
+          struct upb_msg_fielddef *f = &m->fields[j];
          google_protobuf_FieldDescriptorProto *fd = m->field_descriptors[j];
          union upb_value val = upb_msg_get(msgdata, f);
          fprintf(stream, "    ." UPB_STRFMT " = ", UPB_STRARG(*fd->name));
@ -599,7 +599,7 @@ void error(char *err)

 void sort_fields_in_descriptor(google_protobuf_DescriptorProto *d)
 {
-  if(d->set_flags.has.field) upb_msg_sortfds(d->field->elements, d->field->len);
+  if(d->set_flags.has.field) upb_msgdef_sortfds(d->field->elements, d->field->len);
  if(d->set_flags.has.nested_type)
    for(uint32_t i = 0; i < d->nested_type->len; i++)
      sort_fields_in_descriptor(d->nested_type->elements[i]);
@ -640,7 +640,7 @@ int main(int argc, char *argv[])
  struct upb_context c;
  upb_context_init(&c);
  google_protobuf_FileDescriptorSet *fds =
-      upb_alloc_and_parse(c.fds_msg, &descriptor, false);
+      upb_msg_parsenew(c.fds_msg, &descriptor);
  if(!fds)
    error("Failed to parse input file descriptor.");
  if(!upb_context_addfds(&c, fds))